Skip to content

Commit

Permalink
sched: Merge select_task_rq_fair() and sched_balance_self()
Browse files Browse the repository at this point in the history
The problem with wake_idle() is that is doesn't respect things like
cpu_power, which means it doesn't deal well with SMT nor the recent
RT interaction.

To cure this, it needs to do what sched_balance_self() does, which
leads to the possibility of merging select_task_rq_fair() and
sched_balance_self().

Modify sched_balance_self() to:

  - update_shares() when walking up the domain tree,
    (it only called it for the top domain, but it should
     have done this anyway), which allows us to remove
    this ugly bit from try_to_wake_up().

  - do wake_affine() on the smallest domain that contains
    both this (the waking) and the prev (the wakee) cpu for
    WAKE invocations.

Then use the top-down balance steps it had to replace wake_idle().

This leads to the dissapearance of SD_WAKE_BALANCE and
SD_WAKE_IDLE_FAR, with SD_WAKE_IDLE replaced with SD_BALANCE_WAKE.

SD_WAKE_AFFINE needs SD_BALANCE_WAKE to be effective.

Touch all topology bits to replace the old with new SD flags --
platforms might need re-tuning, enabling SD_BALANCE_WAKE
conditionally on a NUMA distance seems like a good additional
feature, magny-core and small nehalem systems would want this
enabled, systems with slow interconnects would not.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Sep 15, 2009
1 parent e9c8431 commit c88d591
Show file tree
Hide file tree
Showing 10 changed files with 84 additions and 237 deletions.
5 changes: 3 additions & 2 deletions arch/ia64/include/asm/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ void build_cpu_to_node_map(void);
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \
| SD_BALANCE_WAKE \
| SD_WAKE_AFFINE, \
.last_balance = jiffies, \
.balance_interval = 1, \
Expand All @@ -91,8 +92,8 @@ void build_cpu_to_node_map(void);
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_EXEC \
| SD_BALANCE_FORK \
| SD_SERIALIZE \
| SD_WAKE_BALANCE, \
| SD_BALANCE_WAKE \
| SD_SERIALIZE, \
.last_balance = jiffies, \
.balance_interval = 64, \
.nr_balance_failed = 0, \
Expand Down
2 changes: 1 addition & 1 deletion arch/mips/include/asm/mach-ip27/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
.cache_nice_tries = 1, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_EXEC \
| SD_WAKE_BALANCE, \
| SD_BALANCE_WAKE, \
.last_balance = jiffies, \
.balance_interval = 1, \
.nr_balance_failed = 0, \
Expand Down
5 changes: 2 additions & 3 deletions arch/powerpc/include/asm/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,8 @@ static inline int pcibus_to_node(struct pci_bus *bus)
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_EXEC \
| SD_BALANCE_NEWIDLE \
| SD_WAKE_IDLE \
| SD_SERIALIZE \
| SD_WAKE_BALANCE, \
| SD_BALANCE_WAKE \
| SD_SERIALIZE, \
.last_balance = jiffies, \
.balance_interval = 1, \
.nr_balance_failed = 0, \
Expand Down
4 changes: 2 additions & 2 deletions arch/sh/include/asm/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_FORK \
| SD_BALANCE_EXEC \
| SD_SERIALIZE \
| SD_WAKE_BALANCE, \
| SD_BALANCE_WAKE \
| SD_SERIALIZE, \
.last_balance = jiffies, \
.balance_interval = 1, \
.nr_balance_failed = 0, \
Expand Down
4 changes: 2 additions & 2 deletions arch/sparc/include/asm/topology_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_FORK \
| SD_BALANCE_EXEC \
| SD_SERIALIZE \
| SD_WAKE_BALANCE, \
| SD_BALANCE_WAKE \
| SD_SERIALIZE, \
.last_balance = jiffies, \
.balance_interval = 1, \
}
Expand Down
4 changes: 1 addition & 3 deletions arch/x86/include/asm/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,14 +145,12 @@ extern unsigned long node_remap_size[];
| 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
| 0*SD_WAKE_IDLE \
| 1*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
| 1*SD_WAKE_BALANCE \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_POWERSAVINGS_BALANCE \
| 0*SD_SHARE_PKG_RESOURCES \
| 1*SD_SERIALIZE \
| 1*SD_WAKE_IDLE_FAR \
| 0*SD_PREFER_SIBLING \
, \
.last_balance = jiffies, \
Expand Down
7 changes: 3 additions & 4 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -803,16 +803,15 @@ enum cpu_idle_type {
#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */
#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */
#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
#define SD_WAKE_IDLE 0x0010 /* Wake to idle CPU on task wakeup */
#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
#define SD_WAKE_BALANCE 0x0040 /* Perform balancing at task wakeup */

#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */
#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
#define SD_WAKE_IDLE_FAR 0x0800 /* Gain latency sacrificing cache hit */

#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
#define SD_BALANCE_WAKE 0x2000 /* Balance on wakeup */

enum powersavings_balance_level {
POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */
Expand Down
16 changes: 4 additions & 12 deletions include/linux/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,12 @@ int arch_update_cpu_topology(void);
| 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
| 0*SD_WAKE_IDLE \
| 1*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
| 1*SD_WAKE_BALANCE \
| 1*SD_SHARE_CPUPOWER \
| 0*SD_POWERSAVINGS_BALANCE \
| 0*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
| 0*SD_WAKE_IDLE_FAR \
| 0*SD_PREFER_SIBLING \
, \
.last_balance = jiffies, \
Expand All @@ -129,13 +127,11 @@ int arch_update_cpu_topology(void);
| 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
| 1*SD_WAKE_IDLE \
| 1*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
| 1*SD_WAKE_BALANCE \
| 0*SD_SHARE_CPUPOWER \
| 1*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
| 0*SD_WAKE_IDLE_FAR \
| sd_balance_for_mc_power() \
| sd_power_saving_flags() \
, \
Expand Down Expand Up @@ -163,13 +159,11 @@ int arch_update_cpu_topology(void);
| 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
| 1*SD_WAKE_IDLE \
| 1*SD_BALANCE_WAKE \
| 0*SD_WAKE_AFFINE \
| 1*SD_WAKE_BALANCE \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
| 0*SD_WAKE_IDLE_FAR \
| sd_balance_for_package_power() \
| sd_power_saving_flags() \
, \
Expand All @@ -191,14 +185,12 @@ int arch_update_cpu_topology(void);
| 1*SD_BALANCE_NEWIDLE \
| 0*SD_BALANCE_EXEC \
| 0*SD_BALANCE_FORK \
| 0*SD_WAKE_IDLE \
| 0*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
| 0*SD_WAKE_BALANCE \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_POWERSAVINGS_BALANCE \
| 0*SD_SHARE_PKG_RESOURCES \
| 1*SD_SERIALIZE \
| 1*SD_WAKE_IDLE_FAR \
| 0*SD_PREFER_SIBLING \
, \
.last_balance = jiffies, \
Expand Down
41 changes: 3 additions & 38 deletions kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -512,14 +512,6 @@ struct root_domain {
#ifdef CONFIG_SMP
struct cpupri cpupri;
#endif
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
/*
* Preferred wake up cpu nominated by sched_mc balance that will be
* used when most cpus are idle in the system indicating overall very
* low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
*/
unsigned int sched_mc_preferred_wakeup_cpu;
#endif
};

/*
Expand Down Expand Up @@ -2315,22 +2307,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
if (!sched_feat(SYNC_WAKEUPS))
sync = 0;

#ifdef CONFIG_SMP
if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) {
struct sched_domain *sd;

this_cpu = raw_smp_processor_id();
cpu = task_cpu(p);

for_each_domain(this_cpu, sd) {
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
update_shares(sd);
break;
}
}
}
#endif

this_cpu = get_cpu();

smp_wmb();
Expand Down Expand Up @@ -3533,11 +3509,6 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
*imbalance = sds->min_load_per_task;
sds->busiest = sds->group_min;

if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
group_first_cpu(sds->group_leader);
}

return 1;

}
Expand Down Expand Up @@ -7850,9 +7821,7 @@ static int sd_degenerate(struct sched_domain *sd)
}

/* Following flags don't use groups */
if (sd->flags & (SD_WAKE_IDLE |
SD_WAKE_AFFINE |
SD_WAKE_BALANCE))
if (sd->flags & (SD_WAKE_AFFINE))
return 0;

return 1;
Expand All @@ -7869,10 +7838,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
return 0;

/* Does parent contain flags not in child? */
/* WAKE_BALANCE is a subset of WAKE_AFFINE */
if (cflags & SD_WAKE_AFFINE)
pflags &= ~SD_WAKE_BALANCE;
/* Flags needing groups don't count if only 1 group in parent */
if (parent->groups == parent->groups->next) {
pflags &= ~(SD_LOAD_BALANCE |
Expand Down Expand Up @@ -8558,10 +8523,10 @@ static void set_domain_attribute(struct sched_domain *sd,
request = attr->relax_domain_level;
if (request < sd->level) {
/* turn off idle balance on this domain */
sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE);
sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
} else {
/* turn on idle balance on this domain */
sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE);
sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
}
}

Expand Down
Loading

0 comments on commit c88d591

Please sign in to comment.