Skip to content

Commit

Permalink
sched: Maintain runnable averages across throttled periods
Browse files Browse the repository at this point in the history
With bandwidth control tracked entities may cease execution according to user
specified bandwidth limits.  Charging this time as either throttled or blocked
however, is incorrect and would falsely skew in either direction.

What we actually want is for any throttled periods to be "invisible" to
load-tracking as they are removed from the system for that interval and
contribute normally otherwise.

Do this by moderating the progression of time to omit any periods in which the
entity belonged to a throttled hierarchy.

Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Ben Segall <bsegall@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120823141506.998912151@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Paul Turner authored and Ingo Molnar committed Oct 24, 2012
1 parent bb17f65 commit f1b1728
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 11 deletions.
50 changes: 40 additions & 10 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -1222,15 +1222,26 @@ static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq,
cfs_rq->blocked_load_avg = 0;
}

static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);

/* Update a sched_entity's runnable average */
static inline void update_entity_load_avg(struct sched_entity *se,
int update_cfs_rq)
{
struct cfs_rq *cfs_rq = cfs_rq_of(se);
long contrib_delta;
u64 now;

if (!__update_entity_runnable_avg(rq_of(cfs_rq)->clock_task, &se->avg,
se->on_rq))
/*
* For a group entity we need to use their owned cfs_rq_clock_task() in
* case they are the parent of a throttled hierarchy.
*/
if (entity_is_task(se))
now = cfs_rq_clock_task(cfs_rq);
else
now = cfs_rq_clock_task(group_cfs_rq(se));

if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq))
return;

contrib_delta = __update_entity_load_avg_contrib(se);
Expand All @@ -1250,7 +1261,7 @@ static inline void update_entity_load_avg(struct sched_entity *se,
*/
static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)
{
u64 now = rq_of(cfs_rq)->clock_task >> 20;
u64 now = cfs_rq_clock_task(cfs_rq) >> 20;
u64 decays;

decays = now - cfs_rq->last_decay;
Expand Down Expand Up @@ -1841,6 +1852,15 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
return &tg->cfs_bandwidth;
}

/* rq->task_clock normalized against any time this cfs_rq has spent throttled */
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
{
if (unlikely(cfs_rq->throttle_count))
return cfs_rq->throttled_clock_task;

return rq_of(cfs_rq)->clock_task - cfs_rq->throttled_clock_task_time;
}

/* returns 0 on failure to allocate runtime */
static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
{
Expand Down Expand Up @@ -1991,6 +2011,10 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
cfs_rq->load_stamp += delta;
cfs_rq->load_last += delta;

/* adjust cfs_rq_clock_task() */
cfs_rq->throttled_clock_task_time += rq->clock_task -
cfs_rq->throttled_clock_task;

/* update entity weight now that we are on_rq again */
update_cfs_shares(cfs_rq);
}
Expand All @@ -2005,8 +2029,10 @@ static int tg_throttle_down(struct task_group *tg, void *data)
struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];

/* group is entering throttled state, record last load */
if (!cfs_rq->throttle_count)
if (!cfs_rq->throttle_count) {
update_cfs_load(cfs_rq, 0);
cfs_rq->throttled_clock_task = rq->clock_task;
}
cfs_rq->throttle_count++;

return 0;
Expand All @@ -2021,7 +2047,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)

se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];

/* account load preceding throttle */
/* freeze hierarchy runnable averages while throttled */
rcu_read_lock();
walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
rcu_read_unlock();
Expand All @@ -2045,7 +2071,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
rq->nr_running -= task_delta;

cfs_rq->throttled = 1;
cfs_rq->throttled_timestamp = rq->clock;
cfs_rq->throttled_clock = rq->clock;
raw_spin_lock(&cfs_b->lock);
list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
raw_spin_unlock(&cfs_b->lock);
Expand All @@ -2063,10 +2089,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)

cfs_rq->throttled = 0;
raw_spin_lock(&cfs_b->lock);
cfs_b->throttled_time += rq->clock - cfs_rq->throttled_timestamp;
cfs_b->throttled_time += rq->clock - cfs_rq->throttled_clock;
list_del_rcu(&cfs_rq->throttled_list);
raw_spin_unlock(&cfs_b->lock);
cfs_rq->throttled_timestamp = 0;

update_rq_clock(rq);
/* update hierarchical throttle state */
Expand Down Expand Up @@ -2466,8 +2491,13 @@ static void unthrottle_offline_cfs_rqs(struct rq *rq)
}

#else /* CONFIG_CFS_BANDWIDTH */
static __always_inline
void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {}
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
{
return rq_of(cfs_rq)->clock_task;
}

static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
unsigned long delta_exec) {}
static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
Expand Down
3 changes: 2 additions & 1 deletion kernel/sched/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,8 @@ struct cfs_rq {
u64 runtime_expires;
s64 runtime_remaining;

u64 throttled_timestamp;
u64 throttled_clock, throttled_clock_task;
u64 throttled_clock_task_time;
int throttled, throttle_count;
struct list_head throttled_list;
#endif /* CONFIG_CFS_BANDWIDTH */
Expand Down

0 comments on commit f1b1728

Please sign in to comment.