Skip to content

Commit

Permalink
powerpc: Implement accurate task and CPU time accounting
Browse files Browse the repository at this point in the history
This implements accurate task and cpu time accounting for 64-bit
powerpc kernels.  Instead of accounting a whole jiffy of time to a
task on a timer interrupt because that task happened to be running at
the time, we now account time in units of timebase ticks according to
the actual time spent by the task in user mode and kernel mode.  We
also count the time spent processing hardware and software interrupts
accurately.  This is conditional on CONFIG_VIRT_CPU_ACCOUNTING.  If
that is not set, we do tick-based approximate accounting as before.

To get this accurate information, we read either the PURR (processor
utilization of resources register) on POWER5 machines, or the timebase
on other machines on

* each entry to the kernel from usermode
* each exit to usermode
* transitions between process context, hard irq context and soft irq
  context in kernel mode
* context switches.

On POWER5 systems with shared-processor logical partitioning we also
read both the PURR and the timebase at each timer interrupt and
context switch in order to determine how much time has been taken by
the hypervisor to run other partitions ("steal" time).  Unfortunately,
since we need values of the PURR on both threads at the same time to
accurately calculate the steal time, and since we can only calculate
steal time on a per-core basis, the apportioning of the steal time
between idle time (time which we ceded to the hypervisor in the idle
loop) and actual stolen time is somewhat approximate at the moment.

This is all based quite heavily on what s390 does, and it uses the
generic interfaces that were added by the s390 developers,
i.e. account_system_time(), account_user_time(), etc.

This patch doesn't add any new interfaces between the kernel and
userspace, and doesn't change the units in which time is reported to
userspace by things such as /proc/stat, /proc/<pid>/stat, getrusage(),
times(), etc.  Internally the various task and cpu times are stored in
timebase units, but they are converted to USER_HZ units (1/100th of a
second) when reported to userspace.  Some precision is therefore lost
but there should not be any accumulating error, since the internal
accumulation is at full precision.

Signed-off-by: Paul Mackerras <paulus@samba.org>
  • Loading branch information
Paul Mackerras committed Feb 24, 2006
1 parent a00428f commit c6622f6
Show file tree
Hide file tree
Showing 16 changed files with 577 additions and 17 deletions.
15 changes: 15 additions & 0 deletions arch/powerpc/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,21 @@ config PPC_STD_MMU_32
def_bool y
depends on PPC_STD_MMU && PPC32

config VIRT_CPU_ACCOUNTING
bool "Deterministic task and CPU time accounting"
depends on PPC64
default y
help
Select this option to enable more accurate task and CPU time
accounting. This is done by reading a CPU counter on each
kernel entry and exit and on transitions within the kernel
between system, softirq and hardirq state, so there is a
small performance impact. This also enables accounting of
stolen time on logically-partitioned systems running on
IBM POWER5-based machines.

If in doubt, say Y here.

config SMP
depends on PPC_STD_MMU
bool "Symmetric multi-processing support"
Expand Down
3 changes: 3 additions & 0 deletions arch/powerpc/kernel/asm-offsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ int main(void)
DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr));
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr));
DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));

DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
Expand Down
7 changes: 5 additions & 2 deletions arch/powerpc/kernel/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ system_call_common:
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
ACCOUNT_CPU_USER_ENTRY(r10, r11)
std r2,GPR2(r1)
std r3,GPR3(r1)
std r4,GPR4(r1)
Expand Down Expand Up @@ -168,8 +169,9 @@ syscall_error_cont:
stdcx. r0,0,r1 /* to clear the reservation */
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
beq- 1f /* only restore r13 if */
ld r13,GPR13(r1) /* returning to usermode */
beq- 1f
ACCOUNT_CPU_USER_EXIT(r11, r12)
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
1: ld r2,GPR2(r1)
li r12,MSR_RI
andc r11,r10,r12
Expand Down Expand Up @@ -536,6 +538,7 @@ restore:
* userspace
*/
beq 1f
ACCOUNT_CPU_USER_EXIT(r3, r4)
REST_GPR(13, r1)
1:
ld r3,_CTR(r1)
Expand Down
9 changes: 9 additions & 0 deletions arch/powerpc/kernel/head_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ exception_marker:
std r10,0(r1); /* make stack chain pointer */ \
std r0,GPR0(r1); /* save r0 in stackframe */ \
std r10,GPR1(r1); /* save r1 in stackframe */ \
ACCOUNT_CPU_USER_ENTRY(r9, r10); \
std r2,GPR2(r1); /* save r2 in stackframe */ \
SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
Expand Down Expand Up @@ -844,6 +845,14 @@ fast_exception_return:
ld r11,_NIP(r1)
andi. r3,r12,MSR_RI /* check if RI is set */
beq- unrecov_fer

#ifdef CONFIG_VIRT_CPU_ACCOUNTING
andi. r3,r12,MSR_PR
beq 2f
ACCOUNT_CPU_USER_EXIT(r3, r4)
2:
#endif

ld r3,_CCR(r1)
ld r4,_LINK(r1)
ld r5,_CTR(r1)
Expand Down
30 changes: 22 additions & 8 deletions arch/powerpc/kernel/irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
return NO_IRQ;

}
#endif /* CONFIG_PPC64 */

#ifdef CONFIG_IRQSTACKS
struct thread_info *softirq_ctx[NR_CPUS];
Expand All @@ -392,30 +393,43 @@ void irq_ctx_init(void)
}
}

static inline void do_softirq_onstack(void)
{
struct thread_info *curtp, *irqtp;

curtp = current_thread_info();
irqtp = softirq_ctx[smp_processor_id()];
irqtp->task = curtp->task;
call_do_softirq(irqtp);
irqtp->task = NULL;
}

#else
#define do_softirq_onstack() __do_softirq()
#endif /* CONFIG_IRQSTACKS */

void do_softirq(void)
{
unsigned long flags;
struct thread_info *curtp, *irqtp;

if (in_interrupt())
return;

local_irq_save(flags);

if (local_softirq_pending()) {
curtp = current_thread_info();
irqtp = softirq_ctx[smp_processor_id()];
irqtp->task = curtp->task;
call_do_softirq(irqtp);
irqtp->task = NULL;
account_system_vtime(current);
local_bh_disable();
do_softirq_onstack();
account_system_vtime(current);
__local_bh_enable();
}

local_irq_restore(flags);
}
EXPORT_SYMBOL(do_softirq);

#endif /* CONFIG_IRQSTACKS */

#ifdef CONFIG_PPC64
static int __init setup_noirqdistrib(char *str)
{
distribute_irqs = 0;
Expand Down
7 changes: 6 additions & 1 deletion arch/powerpc/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@
#include <asm/mmu.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/time.h>
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#include <asm/time.h>
#endif

extern unsigned long _get_SP(void);
Expand Down Expand Up @@ -328,6 +328,11 @@ struct task_struct *__switch_to(struct task_struct *prev,
#endif

local_irq_save(flags);

account_system_vtime(current);
account_process_vtime(current);
calculate_steal_time();

last = _switch(old_thread, new_thread);

local_irq_restore(flags);
Expand Down
4 changes: 3 additions & 1 deletion arch/powerpc/kernel/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ int __devinit start_secondary(void *unused)
smp_ops->take_timebase();

if (system_state > SYSTEM_BOOTING)
per_cpu(last_jiffy, cpu) = get_tb();
snapshot_timebase();

spin_lock(&call_lock);
cpu_set(cpu, cpu_online_map);
Expand Down Expand Up @@ -573,6 +573,8 @@ void __init smp_cpus_done(unsigned int max_cpus)

set_cpus_allowed(current, old_mask);

snapshot_timebases();

dump_numa_cpu_topology();
}

Expand Down
Loading

0 comments on commit c6622f6

Please sign in to comment.