Skip to content

Commit

Permalink
sched/fair: Revert sched-domain iteration breakage
Browse files Browse the repository at this point in the history
Patches c22402a ("sched/fair: Let minimally loaded cpu balance the
group") and 0ce9047 ("sched/fair: Add some serialization to the
sched_domain load-balance walk") are horribly broken so revert them.

The problem is that while it sounds good to have the minimally loaded
cpu do the pulling of more load, the way we walk the domains there is
absolutely no guarantee this cpu will actually get to the domain. In
fact its very likely it wont. Therefore the higher up the tree we get,
the less likely it is we'll balance at all.

The first of mask always walks up, while sucky in that it accumulates
load on the first cpu and needs extra passes to spread it out at least
guarantees a cpu gets up that far and load-balancing happens at all.

Since its now always the first and idle cpus should always be able to
balance so they get a task as fast as possible we can also do away
with the added serialization.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-rpuhs5s56aiv1aw7khv9zkw6@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed May 14, 2012
1 parent 316ad24 commit 04f733b
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 15 deletions.
1 change: 0 additions & 1 deletion include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,6 @@ struct sched_group_power {
struct sched_group {
struct sched_group *next; /* Must be a circular list */
atomic_t ref;
int balance_cpu;

unsigned int group_weight;
struct sched_group_power *sgp;
Expand Down
2 changes: 0 additions & 2 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -5976,7 +5976,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)

sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
atomic_inc(&sg->sgp->ref);
sg->balance_cpu = -1;

if (cpumask_test_cpu(cpu, sg_span))
groups = sg;
Expand Down Expand Up @@ -6052,7 +6051,6 @@ build_sched_groups(struct sched_domain *sd, int cpu)

cpumask_clear(sched_group_cpus(sg));
sg->sgp->power = 0;
sg->balance_cpu = -1;

for_each_cpu(j, span) {
if (get_group(j, sdd, NULL) != group)
Expand Down
19 changes: 7 additions & 12 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -3776,8 +3776,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
int *balance, struct sg_lb_stats *sgs)
{
unsigned long load, max_cpu_load, min_cpu_load, max_nr_running;
unsigned int balance_cpu = -1;
unsigned long balance_load = ~0UL;
unsigned int balance_cpu = -1, first_idle_cpu = 0;
unsigned long avg_load_per_task = 0;
int i;

Expand All @@ -3794,11 +3793,12 @@ static inline void update_sg_lb_stats(struct lb_env *env,

/* Bias balancing toward cpus of our domain */
if (local_group) {
load = target_load(i, load_idx);
if (load < balance_load || idle_cpu(i)) {
balance_load = load;
if (idle_cpu(i) && !first_idle_cpu) {
first_idle_cpu = 1;
balance_cpu = i;
}

load = target_load(i, load_idx);
} else {
load = source_load(i, load_idx);
if (load > max_cpu_load) {
Expand All @@ -3824,8 +3824,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
*/
if (local_group) {
if (env->idle != CPU_NEWLY_IDLE) {
if (balance_cpu != env->dst_cpu ||
cmpxchg(&group->balance_cpu, -1, balance_cpu) != -1) {
if (balance_cpu != env->dst_cpu) {
*balance = 0;
return;
}
Expand Down Expand Up @@ -4919,7 +4918,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
int balance = 1;
struct rq *rq = cpu_rq(cpu);
unsigned long interval;
struct sched_domain *sd, *last = NULL;
struct sched_domain *sd;
/* Earliest time when we have to do rebalance again */
unsigned long next_balance = jiffies + 60*HZ;
int update_next_balance = 0;
Expand All @@ -4929,7 +4928,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)

rcu_read_lock();
for_each_domain(cpu, sd) {
last = sd;
if (!(sd->flags & SD_LOAD_BALANCE))
continue;

Expand Down Expand Up @@ -4974,9 +4972,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
if (!balance)
break;
}
for (sd = last; sd; sd = sd->child)
(void)cmpxchg(&sd->groups->balance_cpu, cpu, -1);

rcu_read_unlock();

/*
Expand Down

0 comments on commit 04f733b

Please sign in to comment.