Skip to content

Commit

Permalink
sched, cgroup: Optimize load_balance_fair()
Browse files Browse the repository at this point in the history
Use for_each_leaf_cfs_rq() instead of list_for_each_entry_rcu(), this
achieves that load_balance_fair() only iterates those task_groups that
actually have tasks on busiest, and that we iterate bottom-up, trying to
move light groups before the heavier ones.

No idea if it will actually work out to be beneficial in practice, does
anybody have a cgroup workload that might show a difference one way or
the other?

[ Also move update_h_load to sched_fair.c, loosing #ifdef-ery ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/1310557009.2586.28.camel@twins
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Jul 21, 2011
1 parent 9598c82 commit 9763b67
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 37 deletions.
32 changes: 0 additions & 32 deletions kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -1568,38 +1568,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
return rq->avg_load_per_task;
}

#ifdef CONFIG_FAIR_GROUP_SCHED

/*
* Compute the cpu's hierarchical load factor for each task group.
* This needs to be done in a top-down fashion because the load of a child
* group is a fraction of its parents load.
*/
static int tg_load_down(struct task_group *tg, void *data)
{
unsigned long load;
long cpu = (long)data;

if (!tg->parent) {
load = cpu_rq(cpu)->load.weight;
} else {
load = tg->parent->cfs_rq[cpu]->h_load;
load *= tg->se[cpu]->load.weight;
load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
}

tg->cfs_rq[cpu]->h_load = load;

return 0;
}

static void update_h_load(long cpu)
{
walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
}

#endif

#ifdef CONFIG_PREEMPT

static void double_rq_lock(struct rq *rq1, struct rq *rq2);
Expand Down
40 changes: 35 additions & 5 deletions kernel/sched_fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -2232,26 +2232,56 @@ static void update_shares(int cpu)
struct rq *rq = cpu_rq(cpu);

rcu_read_lock();
/*
* Iterates the task_group tree in a bottom up fashion, see
* list_add_leaf_cfs_rq() for details.
*/
for_each_leaf_cfs_rq(rq, cfs_rq)
update_shares_cpu(cfs_rq->tg, cpu);
rcu_read_unlock();
}

/*
* Compute the cpu's hierarchical load factor for each task group.
* This needs to be done in a top-down fashion because the load of a child
* group is a fraction of its parents load.
*/
static int tg_load_down(struct task_group *tg, void *data)
{
unsigned long load;
long cpu = (long)data;

if (!tg->parent) {
load = cpu_rq(cpu)->load.weight;
} else {
load = tg->parent->cfs_rq[cpu]->h_load;
load *= tg->se[cpu]->load.weight;
load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
}

tg->cfs_rq[cpu]->h_load = load;

return 0;
}

static void update_h_load(long cpu)
{
walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
}

static unsigned long
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned)
{
long rem_load_move = max_load_move;
int busiest_cpu = cpu_of(busiest);
struct task_group *tg;
struct cfs_rq *busiest_cfs_rq;

rcu_read_lock();
update_h_load(busiest_cpu);
update_h_load(cpu_of(busiest));

list_for_each_entry_rcu(tg, &task_groups, list) {
struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) {
unsigned long busiest_h_load = busiest_cfs_rq->h_load;
unsigned long busiest_weight = busiest_cfs_rq->load.weight;
u64 rem_load, moved_load;
Expand Down

0 comments on commit 9763b67

Please sign in to comment.