Skip to content

Commit

Permalink
sched/pelt: Sync util/runnable_sum with PELT window when propagating
Browse files Browse the repository at this point in the history
update_tg_cfs_*() propagate the impact of the attach/detach of an entity
down into the cfs_rq hierarchy and must keep the sync with the current pelt
window.

Even if we can't sync child cfs_rq and its group se, we can sync the group
se and its parent cfs_rq with current position in the PELT window. In fact,
we must keep them sync in order to stay also synced with others entities
and group entities that are already attached to the cfs_rq.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200506155301.14288-1-vincent.guittot@linaro.org
  • Loading branch information
Vincent Guittot authored and Peter Zijlstra committed May 19, 2020
1 parent 12aa258 commit 95d6859
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 22 deletions.
49 changes: 27 additions & 22 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -3441,52 +3441,46 @@ static inline void
update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{
long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
/*
* cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
* See ___update_load_avg() for details.
*/
u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;

/* Nothing to update */
if (!delta)
return;

/*
* The relation between sum and avg is:
*
* LOAD_AVG_MAX - 1024 + sa->period_contrib
*
* however, the PELT windows are not aligned between grq and gse.
*/

/* Set new sched_entity's utilization */
se->avg.util_avg = gcfs_rq->avg.util_avg;
se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
se->avg.util_sum = se->avg.util_avg * divider;

/* Update parent cfs_rq utilization */
add_positive(&cfs_rq->avg.util_avg, delta);
cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
}

static inline void
update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{
long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
/*
* cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
* See ___update_load_avg() for details.
*/
u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;

/* Nothing to update */
if (!delta)
return;

/*
* The relation between sum and avg is:
*
* LOAD_AVG_MAX - 1024 + sa->period_contrib
*
* however, the PELT windows are not aligned between grq and gse.
*/

/* Set new sched_entity's runnable */
se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
se->avg.runnable_sum = se->avg.runnable_avg * LOAD_AVG_MAX;
se->avg.runnable_sum = se->avg.runnable_avg * divider;

/* Update parent cfs_rq runnable */
add_positive(&cfs_rq->avg.runnable_avg, delta);
cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * LOAD_AVG_MAX;
cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
}

static inline void
Expand All @@ -3496,19 +3490,26 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
unsigned long load_avg;
u64 load_sum = 0;
s64 delta_sum;
u32 divider;

if (!runnable_sum)
return;

gcfs_rq->prop_runnable_sum = 0;

/*
* cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
* See ___update_load_avg() for details.
*/
divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;

if (runnable_sum >= 0) {
/*
* Add runnable; clip at LOAD_AVG_MAX. Reflects that until
* the CPU is saturated running == runnable.
*/
runnable_sum += se->avg.load_sum;
runnable_sum = min(runnable_sum, (long)LOAD_AVG_MAX);
runnable_sum = min_t(long, runnable_sum, divider);
} else {
/*
* Estimate the new unweighted runnable_sum of the gcfs_rq by
Expand All @@ -3533,7 +3534,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
runnable_sum = max(runnable_sum, running_sum);

load_sum = (s64)se_weight(se) * runnable_sum;
load_avg = div_s64(load_sum, LOAD_AVG_MAX);
load_avg = div_s64(load_sum, divider);

delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
delta_avg = load_avg - se->avg.load_avg;
Expand Down Expand Up @@ -3697,6 +3698,10 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
*/
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
/*
* cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
* See ___update_load_avg() for details.
*/
u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;

/*
Expand Down
24 changes: 24 additions & 0 deletions kernel/sched/pelt.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,30 @@ ___update_load_sum(u64 now, struct sched_avg *sa,
return 1;
}

/*
* When syncing *_avg with *_sum, we must take into account the current
* position in the PELT segment otherwise the remaining part of the segment
* will be considered as idle time whereas it's not yet elapsed and this will
* generate unwanted oscillation in the range [1002..1024[.
*
* The max value of *_sum varies with the position in the time segment and is
* equals to :
*
* LOAD_AVG_MAX*y + sa->period_contrib
*
* which can be simplified into:
*
* LOAD_AVG_MAX - 1024 + sa->period_contrib
*
* because LOAD_AVG_MAX*y == LOAD_AVG_MAX-1024
*
* The same care must be taken when a sched entity is added, updated or
* removed from a cfs_rq and we need to update sched_avg. Scheduler entities
* and the cfs rq, to which they are attached, have the same position in the
* time segment because they use the same clock. This means that we can use
* the period_contrib of cfs_rq when updating the sched_avg of a sched_entity
* if it's more convenient.
*/
static __always_inline void
___update_load_avg(struct sched_avg *sa, unsigned long load)
{
Expand Down

0 comments on commit 95d6859

Please sign in to comment.