Skip to content

Commit

Permalink
sched/fair: Remove update of blocked load from newidle_balance
Browse files Browse the repository at this point in the history
newidle_balance runs with both preempt and irq disabled which prevent
local irq to run during this period. The duration for updating the
blocked load of CPUs varies according to the number of CPU cgroups
with non-decayed load and extends this critical period to an uncontrolled
level.

Remove the update from newidle_balance and trigger a normal ILB that
will take care of the update instead.

This reduces the IRQ latency from O(nr_cgroups * nr_nohz_cpus) to
O(nr_cgroups).

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Link: https://lkml.kernel.org/r/20210224133007.28644-2-vincent.guittot@linaro.org
  • Loading branch information
Vincent Guittot authored and Ingo Molnar committed Mar 6, 2021
1 parent 183f47f commit 0826530
Showing 1 changed file with 5 additions and 28 deletions.
33 changes: 5 additions & 28 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -7392,8 +7392,6 @@ enum migration_type {
#define LBF_NEED_BREAK 0x02
#define LBF_DST_PINNED 0x04
#define LBF_SOME_PINNED 0x08
#define LBF_NOHZ_STATS 0x10
#define LBF_NOHZ_AGAIN 0x20

struct lb_env {
struct sched_domain *sd;
Expand Down Expand Up @@ -8397,9 +8395,6 @@ static inline void update_sg_lb_stats(struct lb_env *env,
for_each_cpu_and(i, sched_group_span(group), env->cpus) {
struct rq *rq = cpu_rq(i);

if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false))
env->flags |= LBF_NOHZ_AGAIN;

sgs->group_load += cpu_load(rq);
sgs->group_util += cpu_util(i);
sgs->group_runnable += cpu_runnable(rq);
Expand Down Expand Up @@ -8940,11 +8935,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
struct sg_lb_stats tmp_sgs;
int sg_status = 0;

#ifdef CONFIG_NO_HZ_COMMON
if (env->idle == CPU_NEWLY_IDLE && READ_ONCE(nohz.has_blocked))
env->flags |= LBF_NOHZ_STATS;
#endif

do {
struct sg_lb_stats *sgs = &tmp_sgs;
int local_group;
Expand Down Expand Up @@ -8981,14 +8971,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
/* Tag domain that child domain prefers tasks go to siblings first */
sds->prefer_sibling = child && child->flags & SD_PREFER_SIBLING;

#ifdef CONFIG_NO_HZ_COMMON
if ((env->flags & LBF_NOHZ_AGAIN) &&
cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd))) {

WRITE_ONCE(nohz.next_blocked,
jiffies + msecs_to_jiffies(LOAD_AVG_PERIOD));
}
#endif

if (env->sd->flags & SD_NUMA)
env->fbq_type = fbq_classify_group(&sds->busiest_stat);
Expand Down Expand Up @@ -10517,16 +10499,11 @@ static void nohz_newidle_balance(struct rq *this_rq)
time_before(jiffies, READ_ONCE(nohz.next_blocked)))
return;

raw_spin_unlock(&this_rq->lock);
/*
* This CPU is going to be idle and blocked load of idle CPUs
* need to be updated. Run the ilb locally as it is a good
* candidate for ilb instead of waking up another idle CPU.
* Kick an normal ilb if we failed to do the update.
* Blocked load of idle CPUs need to be updated.
* Kick an ILB to update statistics.
*/
if (!_nohz_idle_balance(this_rq, NOHZ_STATS_KICK, CPU_NEWLY_IDLE))
kick_ilb(NOHZ_STATS_KICK);
raw_spin_lock(&this_rq->lock);
kick_ilb(NOHZ_STATS_KICK);
}

#else /* !CONFIG_NO_HZ_COMMON */
Expand Down Expand Up @@ -10587,8 +10564,6 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
update_next_balance(sd, &next_balance);
rcu_read_unlock();

nohz_newidle_balance(this_rq);

goto out;
}

Expand Down Expand Up @@ -10654,6 +10629,8 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)

if (pulled_task)
this_rq->idle_stamp = 0;
else
nohz_newidle_balance(this_rq);

rq_repin_lock(this_rq, rf);

Expand Down

0 comments on commit 0826530

Please sign in to comment.