Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 205422
b: refs/heads/master
c: 532cb4c
h: refs/heads/master
v: v3
  • Loading branch information
Michael Neuling authored and Ingo Molnar committed Jun 9, 2010
1 parent 5b919f8 commit deffe13
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 19 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 9d5efe05eb0c904545a28b19c18b949f23334de0
refs/heads/master: 532cb4c401e225b084c14d6bd6a2f8ee561de2f1
4 changes: 3 additions & 1 deletion trunk/include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,7 @@ enum cpu_idle_type {
#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */

#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */

enum powersavings_balance_level {
Expand Down Expand Up @@ -836,6 +836,8 @@ static inline int sd_balance_for_package_power(void)
return SD_PREFER_SIBLING;
}

extern int __weak arch_sd_sibiling_asym_packing(void);

/*
* Optimise SD flags for power savings:
* SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
Expand Down
1 change: 1 addition & 0 deletions trunk/include/linux/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ int arch_update_cpu_topology(void);
| 1*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
| 0*SD_PREFER_SIBLING \
| arch_sd_sibiling_asym_packing() \
, \
.last_balance = jiffies, \
.balance_interval = 1, \
Expand Down
139 changes: 122 additions & 17 deletions trunk/kernel/sched_fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -2457,12 +2457,54 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
sgs->group_capacity = fix_small_capacity(sd, group);
}

/**
* update_sd_pick_busiest - return 1 on busiest group
* @sd: sched_domain whose statistics are to be checked
* @sds: sched_domain statistics
* @sg: sched_group candidate to be checked for being the busiest
* @sds: sched_group statistics
*
* Determine if @sg is a busier group than the previously selected
* busiest group.
*/
static bool update_sd_pick_busiest(struct sched_domain *sd,
struct sd_lb_stats *sds,
struct sched_group *sg,
struct sg_lb_stats *sgs,
int this_cpu)
{
if (sgs->avg_load <= sds->max_load)
return false;

if (sgs->sum_nr_running > sgs->group_capacity)
return true;

if (sgs->group_imb)
return true;

/*
* ASYM_PACKING needs to move all the work to the lowest
* numbered CPUs in the group, therefore mark all groups
* higher than ourself as busy.
*/
if ((sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running &&
this_cpu < group_first_cpu(sg)) {
if (!sds->busiest)
return true;

if (group_first_cpu(sds->busiest) > group_first_cpu(sg))
return true;
}

return false;
}

/**
* update_sd_lb_stats - Update sched_group's statistics for load balancing.
* @sd: sched_domain whose statistics are to be updated.
* @this_cpu: Cpu for which load balance is currently performed.
* @idle: Idle status of this_cpu
* @sd_idle: Idle status of the sched_domain containing group.
* @sd_idle: Idle status of the sched_domain containing sg.
* @cpus: Set of cpus considered for load balancing.
* @balance: Should we balance.
* @sds: variable to hold the statistics for this sched_domain.
Expand All @@ -2473,7 +2515,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
struct sd_lb_stats *sds)
{
struct sched_domain *child = sd->child;
struct sched_group *group = sd->groups;
struct sched_group *sg = sd->groups;
struct sg_lb_stats sgs;
int load_idx, prefer_sibling = 0;

Expand All @@ -2486,45 +2528,93 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
do {
int local_group;

local_group = cpumask_test_cpu(this_cpu,
sched_group_cpus(group));
local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg));
memset(&sgs, 0, sizeof(sgs));
update_sg_lb_stats(sd, group, this_cpu, idle, load_idx, sd_idle,
update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, sd_idle,
local_group, cpus, balance, &sgs);

if (local_group && !(*balance))
return;

sds->total_load += sgs.group_load;
sds->total_pwr += group->cpu_power;
sds->total_pwr += sg->cpu_power;

/*
* In case the child domain prefers tasks go to siblings
* first, lower the group capacity to one so that we'll try
* first, lower the sg capacity to one so that we'll try
* and move all the excess tasks away.
*/
if (prefer_sibling)
sgs.group_capacity = min(sgs.group_capacity, 1UL);

if (local_group) {
sds->this_load = sgs.avg_load;
sds->this = group;
sds->this = sg;
sds->this_nr_running = sgs.sum_nr_running;
sds->this_load_per_task = sgs.sum_weighted_load;
} else if (sgs.avg_load > sds->max_load &&
(sgs.sum_nr_running > sgs.group_capacity ||
sgs.group_imb)) {
} else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
sds->max_load = sgs.avg_load;
sds->busiest = group;
sds->busiest = sg;
sds->busiest_nr_running = sgs.sum_nr_running;
sds->busiest_group_capacity = sgs.group_capacity;
sds->busiest_load_per_task = sgs.sum_weighted_load;
sds->group_imb = sgs.group_imb;
}

update_sd_power_savings_stats(group, sds, local_group, &sgs);
group = group->next;
} while (group != sd->groups);
update_sd_power_savings_stats(sg, sds, local_group, &sgs);
sg = sg->next;
} while (sg != sd->groups);
}

int __weak arch_sd_sibiling_asym_packing(void)
{
return 0*SD_ASYM_PACKING;
}

/**
* check_asym_packing - Check to see if the group is packed into the
* sched doman.
*
* This is primarily intended to used at the sibling level. Some
* cores like POWER7 prefer to use lower numbered SMT threads. In the
* case of POWER7, it can move to lower SMT modes only when higher
* threads are idle. When in lower SMT modes, the threads will
* perform better since they share less core resources. Hence when we
* have idle threads, we want them to be the higher ones.
*
* This packing function is run on idle threads. It checks to see if
* the busiest CPU in this domain (core in the P7 case) has a higher
* CPU number than the packing function is being run on. Here we are
* assuming lower CPU number will be equivalent to lower a SMT thread
* number.
*
* @sd: The sched_domain whose packing is to be checked.
* @sds: Statistics of the sched_domain which is to be packed
* @this_cpu: The cpu at whose sched_domain we're performing load-balance.
* @imbalance: returns amount of imbalanced due to packing.
*
* Returns 1 when packing is required and a task should be moved to
* this CPU. The amount of the imbalance is returned in *imbalance.
*/
static int check_asym_packing(struct sched_domain *sd,
struct sd_lb_stats *sds,
int this_cpu, unsigned long *imbalance)
{
int busiest_cpu;

if (!(sd->flags & SD_ASYM_PACKING))
return 0;

if (!sds->busiest)
return 0;

busiest_cpu = group_first_cpu(sds->busiest);
if (this_cpu > busiest_cpu)
return 0;

*imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power,
SCHED_LOAD_SCALE);
return 1;
}

/**
Expand Down Expand Up @@ -2719,6 +2809,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
if (!(*balance))
goto ret;

if ((idle == CPU_IDLE || idle == CPU_NEWLY_IDLE) &&
check_asym_packing(sd, &sds, this_cpu, imbalance))
return sds.busiest;

if (!sds.busiest || sds.busiest_nr_running == 0)
goto out_balanced;

Expand Down Expand Up @@ -2808,9 +2902,19 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
/* Working cpumask for load_balance and load_balance_newidle. */
static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);

static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle)
static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle,
int busiest_cpu, int this_cpu)
{
if (idle == CPU_NEWLY_IDLE) {

/*
* ASYM_PACKING needs to force migrate tasks from busy but
* higher numbered CPUs in order to pack all tasks in the
* lowest numbered CPUs.
*/
if ((sd->flags & SD_ASYM_PACKING) && busiest_cpu > this_cpu)
return 1;

/*
* The only task running in a non-idle cpu can be moved to this
* cpu in an attempt to completely freeup the other CPU
Expand Down Expand Up @@ -2929,7 +3033,8 @@ static int load_balance(int this_cpu, struct rq *this_rq,
schedstat_inc(sd, lb_failed[idle]);
sd->nr_balance_failed++;

if (need_active_balance(sd, sd_idle, idle)) {
if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest),
this_cpu)) {
raw_spin_lock_irqsave(&busiest->lock, flags);

/* don't kick the active_load_balance_cpu_stop,
Expand Down

0 comments on commit deffe13

Please sign in to comment.