Skip to content

Commit

Permalink
sched: Create a helper function to calculate sched_group stats for fbg()
Browse files Browse the repository at this point in the history
Impact: cleanup

Create a helper function named update_sg_lb_stats() which
can be invoked to calculate the individual group's statistics
in find_busiest_group().

This reduces the lenght of find_busiest_group() considerably.

Credit: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Aked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Balbir Singh" <balbir@in.ibm.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: "Dhaval Giani" <dhaval@linux.vnet.ibm.com>
Cc: Bharata B Rao <bharata@linux.vnet.ibm.com>
LKML-Reference: <20090325091351.13992.43461.stgit@sofia.in.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Gautham R Shenoy authored and Ingo Molnar committed Mar 25, 2009
1 parent 381be78 commit 1f8c553
Showing 1 changed file with 100 additions and 75 deletions.
175 changes: 100 additions & 75 deletions kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -3237,6 +3237,103 @@ static inline int get_sd_load_idx(struct sched_domain *sd,

return load_idx;
}


/**
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
* @group: sched_group whose statistics are to be updated.
* @this_cpu: Cpu for which load balance is currently performed.
* @idle: Idle status of this_cpu
* @load_idx: Load index of sched_domain of this_cpu for load calc.
* @sd_idle: Idle status of the sched_domain containing group.
* @local_group: Does group contain this_cpu.
* @cpus: Set of cpus considered for load balancing.
* @balance: Should we balance.
* @sgs: variable to hold the statistics for this group.
*/
static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu,
enum cpu_idle_type idle, int load_idx, int *sd_idle,
int local_group, const struct cpumask *cpus,
int *balance, struct sg_lb_stats *sgs)
{
unsigned long load, max_cpu_load, min_cpu_load;
int i;
unsigned int balance_cpu = -1, first_idle_cpu = 0;
unsigned long sum_avg_load_per_task;
unsigned long avg_load_per_task;

if (local_group)
balance_cpu = group_first_cpu(group);

/* Tally up the load of all CPUs in the group */
sum_avg_load_per_task = avg_load_per_task = 0;
max_cpu_load = 0;
min_cpu_load = ~0UL;

for_each_cpu_and(i, sched_group_cpus(group), cpus) {
struct rq *rq = cpu_rq(i);

if (*sd_idle && rq->nr_running)
*sd_idle = 0;

/* Bias balancing toward cpus of our domain */
if (local_group) {
if (idle_cpu(i) && !first_idle_cpu) {
first_idle_cpu = 1;
balance_cpu = i;
}

load = target_load(i, load_idx);
} else {
load = source_load(i, load_idx);
if (load > max_cpu_load)
max_cpu_load = load;
if (min_cpu_load > load)
min_cpu_load = load;
}

sgs->group_load += load;
sgs->sum_nr_running += rq->nr_running;
sgs->sum_weighted_load += weighted_cpuload(i);

sum_avg_load_per_task += cpu_avg_load_per_task(i);
}

/*
* First idle cpu or the first cpu(busiest) in this sched group
* is eligible for doing load balancing at this and above
* domains. In the newly idle case, we will allow all the cpu's
* to do the newly idle load balance.
*/
if (idle != CPU_NEWLY_IDLE && local_group &&
balance_cpu != this_cpu && balance) {
*balance = 0;
return;
}

/* Adjust by relative CPU power of the group */
sgs->avg_load = sg_div_cpu_power(group,
sgs->group_load * SCHED_LOAD_SCALE);


/*
* Consider the group unbalanced when the imbalance is larger
* than the average weight of two tasks.
*
* APZ: with cgroup the avg task weight can vary wildly and
* might not be a suitable number - should we keep a
* normalized nr_running number somewhere that negates
* the hierarchy?
*/
avg_load_per_task = sg_div_cpu_power(group,
sum_avg_load_per_task * SCHED_LOAD_SCALE);

if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
sgs->group_imb = 1;

sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;

}
/******* find_busiest_group() helpers end here *********************/

/*
Expand Down Expand Up @@ -3270,92 +3367,20 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,

do {
struct sg_lb_stats sgs;
unsigned long load, max_cpu_load, min_cpu_load;
int local_group;
int i;
unsigned int balance_cpu = -1, first_idle_cpu = 0;
unsigned long sum_avg_load_per_task;
unsigned long avg_load_per_task;

local_group = cpumask_test_cpu(this_cpu,
sched_group_cpus(group));
memset(&sgs, 0, sizeof(sgs));
update_sg_lb_stats(group, this_cpu, idle, load_idx, sd_idle,
local_group, cpus, balance, &sgs);

if (local_group)
balance_cpu = group_first_cpu(group);

/* Tally up the load of all CPUs in the group */
sum_avg_load_per_task = avg_load_per_task = 0;

max_cpu_load = 0;
min_cpu_load = ~0UL;

for_each_cpu_and(i, sched_group_cpus(group), cpus) {
struct rq *rq = cpu_rq(i);

if (*sd_idle && rq->nr_running)
*sd_idle = 0;

/* Bias balancing toward cpus of our domain */
if (local_group) {
if (idle_cpu(i) && !first_idle_cpu) {
first_idle_cpu = 1;
balance_cpu = i;
}

load = target_load(i, load_idx);
} else {
load = source_load(i, load_idx);
if (load > max_cpu_load)
max_cpu_load = load;
if (min_cpu_load > load)
min_cpu_load = load;
}

sgs.group_load += load;
sgs.sum_nr_running += rq->nr_running;
sgs.sum_weighted_load += weighted_cpuload(i);

sum_avg_load_per_task += cpu_avg_load_per_task(i);
}

/*
* First idle cpu or the first cpu(busiest) in this sched group
* is eligible for doing load balancing at this and above
* domains. In the newly idle case, we will allow all the cpu's
* to do the newly idle load balance.
*/
if (idle != CPU_NEWLY_IDLE && local_group &&
balance_cpu != this_cpu && balance) {
*balance = 0;
if (balance && !(*balance))
goto ret;
}

total_load += sgs.group_load;
total_pwr += group->__cpu_power;

/* Adjust by relative CPU power of the group */
sgs.avg_load = sg_div_cpu_power(group,
sgs.group_load * SCHED_LOAD_SCALE);


/*
* Consider the group unbalanced when the imbalance is larger
* than the average weight of two tasks.
*
* APZ: with cgroup the avg task weight can vary wildly and
* might not be a suitable number - should we keep a
* normalized nr_running number somewhere that negates
* the hierarchy?
*/
avg_load_per_task = sg_div_cpu_power(group,
sum_avg_load_per_task * SCHED_LOAD_SCALE);

if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
sgs.group_imb = 1;

sgs.group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;

if (local_group) {
this_load = sgs.avg_load;
this = group;
Expand Down

0 comments on commit 1f8c553

Please sign in to comment.