From ed6fba7acb018c3badcb33dc3025b5a67916df64 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 22 Nov 2012 11:16:36 +0000 Subject: [PATCH] --- yaml --- r: 344812 b: refs/heads/master c: 1a687c2e9a99335c9e77392f050fe607fa18a652 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/Documentation/kernel-parameters.txt | 3 ++ trunk/include/linux/sched.h | 4 ++ trunk/init/Kconfig | 8 ++++ trunk/kernel/sched/core.c | 48 ++++++++++++++++------- trunk/kernel/sched/fair.c | 3 ++ trunk/kernel/sched/features.h | 6 ++- trunk/mm/mempolicy.c | 46 ++++++++++++++++++++++ 8 files changed, 102 insertions(+), 18 deletions(-) diff --git a/[refs] b/[refs] index 3a1605d3b4ca..6f018b03f6c0 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: b8593bfda1652755136333cdd362de125b283a9c +refs/heads/master: 1a687c2e9a99335c9e77392f050fe607fa18a652 diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index 9776f068306b..2e8d2625b814 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -1996,6 +1996,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nr_uarts= [SERIAL] maximum number of UARTs to be registered. + numa_balancing= [KNL,X86] Enable or disable automatic NUMA balancing. + Allowed values are enable and disable + numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA. one of ['zone', 'node', 'default'] can be specified This can be set from sysctl after boot. diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index 0f4ff2bd03f6..b1e619f9ff1a 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -1563,10 +1563,14 @@ struct task_struct { #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int node, int pages, bool migrated); +extern void set_numabalancing_state(bool enabled); #else static inline void task_numa_fault(int node, int pages, bool migrated) { } +static inline void set_numabalancing_state(bool enabled) +{ +} #endif /* diff --git a/trunk/init/Kconfig b/trunk/init/Kconfig index 9f00f004796a..18e2a5920a34 100644 --- a/trunk/init/Kconfig +++ b/trunk/init/Kconfig @@ -720,6 +720,14 @@ config ARCH_USES_NUMA_PROT_NONE depends on ARCH_WANTS_PROT_NUMA_PROT_NONE depends on NUMA_BALANCING +config NUMA_BALANCING_DEFAULT_ENABLED + bool "Automatically enable NUMA aware memory/task placement" + default y + depends on NUMA_BALANCING + help + If set, autonumic NUMA balancing will be enabled if running on a NUMA + machine. + config NUMA_BALANCING bool "Memory placement aware NUMA scheduler" default y diff --git a/trunk/kernel/sched/core.c b/trunk/kernel/sched/core.c index 9d255bc0e278..7a45015274ab 100644 --- a/trunk/kernel/sched/core.c +++ b/trunk/kernel/sched/core.c @@ -192,23 +192,10 @@ static void sched_feat_disable(int i) { }; static void sched_feat_enable(int i) { }; #endif /* HAVE_JUMP_LABEL */ -static ssize_t -sched_feat_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +static int sched_feat_set(char *cmp) { - char buf[64]; - char *cmp; - int neg = 0; int i; - - if (cnt > 63) - cnt = 63; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - cmp = strstrip(buf); + int neg = 0; if (strncmp(cmp, "NO_", 3) == 0) { neg = 1; @@ -228,6 +215,27 @@ sched_feat_write(struct file *filp, const char __user *ubuf, } } + return i; +} + +static ssize_t +sched_feat_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + char *cmp; + int i; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + cmp = strstrip(buf); + + i = sched_feat_set(cmp); if (i == __SCHED_FEAT_NR) return -EINVAL; @@ -1549,6 +1557,16 @@ static void __sched_fork(struct task_struct *p) #endif /* CONFIG_NUMA_BALANCING */ } +#ifdef CONFIG_NUMA_BALANCING +void set_numabalancing_state(bool enabled) +{ + if (enabled) + sched_feat_set("NUMA"); + else + sched_feat_set("NO_NUMA"); +} +#endif /* CONFIG_NUMA_BALANCING */ + /* * fork()/clone()-time setup: */ diff --git a/trunk/kernel/sched/fair.c b/trunk/kernel/sched/fair.c index 4b577863933f..7a02a2082e95 100644 --- a/trunk/kernel/sched/fair.c +++ b/trunk/kernel/sched/fair.c @@ -811,6 +811,9 @@ void task_numa_fault(int node, int pages, bool migrated) { struct task_struct *p = current; + if (!sched_feat_numa(NUMA)) + return; + /* FIXME: Allocate task-specific structure for placement policy here */ /* diff --git a/trunk/kernel/sched/features.h b/trunk/kernel/sched/features.h index 5fb7aefbec80..d2373a3e3252 100644 --- a/trunk/kernel/sched/features.h +++ b/trunk/kernel/sched/features.h @@ -63,8 +63,10 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true) SCHED_FEAT(LB_MIN, false) /* - * Apply the automatic NUMA scheduling policy + * Apply the automatic NUMA scheduling policy. Enabled automatically + * at runtime if running on a NUMA machine. Can be controlled via + * numa_balancing= */ #ifdef CONFIG_NUMA_BALANCING -SCHED_FEAT(NUMA, true) +SCHED_FEAT(NUMA, false) #endif diff --git a/trunk/mm/mempolicy.c b/trunk/mm/mempolicy.c index fd20e28fd2ad..046308e9b999 100644 --- a/trunk/mm/mempolicy.c +++ b/trunk/mm/mempolicy.c @@ -2521,6 +2521,50 @@ void mpol_free_shared_policy(struct shared_policy *p) mutex_unlock(&p->mutex); } +#ifdef CONFIG_NUMA_BALANCING +static bool __initdata numabalancing_override; + +static void __init check_numabalancing_enable(void) +{ + bool numabalancing_default = false; + + if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED)) + numabalancing_default = true; + + if (nr_node_ids > 1 && !numabalancing_override) { + printk(KERN_INFO "Enabling automatic NUMA balancing. " + "Configure with numa_balancing= or sysctl"); + set_numabalancing_state(numabalancing_default); + } +} + +static int __init setup_numabalancing(char *str) +{ + int ret = 0; + if (!str) + goto out; + numabalancing_override = true; + + if (!strcmp(str, "enable")) { + set_numabalancing_state(true); + ret = 1; + } else if (!strcmp(str, "disable")) { + set_numabalancing_state(false); + ret = 1; + } +out: + if (!ret) + printk(KERN_WARNING "Unable to parse numa_balancing=\n"); + + return ret; +} +__setup("numa_balancing=", setup_numabalancing); +#else +static inline void __init check_numabalancing_enable(void) +{ +} +#endif /* CONFIG_NUMA_BALANCING */ + /* assumes fs == KERNEL_DS */ void __init numa_policy_init(void) { @@ -2571,6 +2615,8 @@ void __init numa_policy_init(void) if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) printk("numa_policy_init: interleaving failed\n"); + + check_numabalancing_enable(); } /* Reset policy of current process to default */