Skip to content

Commit

Permalink
sched: adjust scheduler cpu power for stolen time
Browse files Browse the repository at this point in the history
This patch makes update_rq_clock() aware of steal time.
The mechanism of operation is not different from irq_time,
and follows the same principles. This lives in a CONFIG
option itself, and can be compiled out independently of
the rest of steal time reporting. The effect of disabling it
is that the scheduler will still report steal time (that cannot be
disabled), but won't use this information for cpu power adjustments.

Everytime update_rq_clock_task() is invoked, we query information
about how much time was stolen since last call, and feed it into
sched_rt_avg_update().

Although steal time reporting in account_process_tick() keeps
track of the last time we read the steal clock, in prev_steal_time,
this patch do it independently using another field,
prev_steal_time_rq. This is because otherwise, information about time
accounted in update_process_tick() would never reach us in update_rq_clock().

Signed-off-by: Glauber Costa <glommer@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric B Munson <emunson@mgebm.net>
CC: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
CC: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
  • Loading branch information
Glauber Costa authored and Avi Kivity committed Jul 14, 2011
1 parent e6e6685 commit 095c0aa
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 12 deletions.
12 changes: 12 additions & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,18 @@ menuconfig PARAVIRT_GUEST

if PARAVIRT_GUEST

config PARAVIRT_TIME_ACCOUNTING
bool "Paravirtual steal time accounting"
select PARAVIRT
default n
---help---
Select this option to enable fine granularity task steal time
accounting. Time spent executing other tasks in parallel with
the current vCPU is discounted from the vCPU power. To account for
that, there can be a small performance impact.

If in doubt, say N here.

source "arch/x86/xen/Kconfig"

config KVM_CLOCK
Expand Down
47 changes: 37 additions & 10 deletions kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,9 @@ struct rq {
#ifdef CONFIG_PARAVIRT
u64 prev_steal_time;
#endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
u64 prev_steal_time_rq;
#endif

/* calc_load related fields */
unsigned long calc_load_update;
Expand Down Expand Up @@ -1973,8 +1976,14 @@ static inline u64 steal_ticks(u64 steal)

static void update_rq_clock_task(struct rq *rq, s64 delta)
{
s64 irq_delta;

/*
* In theory, the compile should just see 0 here, and optimize out the call
* to sched_rt_avg_update. But I don't trust it...
*/
#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
s64 steal = 0, irq_delta = 0;
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;

/*
Expand All @@ -1997,12 +2006,35 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)

rq->prev_irq_time += irq_delta;
delta -= irq_delta;
#endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
if (static_branch((&paravirt_steal_rq_enabled))) {
u64 st;

steal = paravirt_steal_clock(cpu_of(rq));
steal -= rq->prev_steal_time_rq;

if (unlikely(steal > delta))
steal = delta;

st = steal_ticks(steal);
steal = st * TICK_NSEC;

rq->prev_steal_time_rq += steal;

delta -= steal;
}
#endif

rq->clock_task += delta;

if (irq_delta && sched_feat(NONIRQ_POWER))
sched_rt_avg_update(rq, irq_delta);
#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
if ((irq_delta + steal) && sched_feat(NONTASK_POWER))
sched_rt_avg_update(rq, irq_delta + steal);
#endif
}

#ifdef CONFIG_IRQ_TIME_ACCOUNTING
static int irqtime_account_hi_update(void)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
Expand Down Expand Up @@ -2037,12 +2069,7 @@ static int irqtime_account_si_update(void)

#define sched_clock_irqtime (0)

static void update_rq_clock_task(struct rq *rq, s64 delta)
{
rq->clock_task += delta;
}

#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
#endif

#include "sched_idletask.c"
#include "sched_fair.c"
Expand Down
4 changes: 2 additions & 2 deletions kernel/sched_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ SCHED_FEAT(LB_BIAS, 1)
SCHED_FEAT(OWNER_SPIN, 1)

/*
* Decrement CPU power based on irq activity
* Decrement CPU power based on time not spent running tasks
*/
SCHED_FEAT(NONIRQ_POWER, 1)
SCHED_FEAT(NONTASK_POWER, 1)

/*
* Queue remote wakeups on the target CPU and process them
Expand Down

0 comments on commit 095c0aa

Please sign in to comment.