From 409cb0f14de7f44756da532e1d7d870f155dbd63 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 4 Oct 2012 01:50:47 +0200 Subject: [PATCH] --- yaml --- r: 344783 b: refs/heads/master c: be3a728427a605990a7a0b6dbf9e29b68e266146 h: refs/heads/master i: 344781: a19dcd620d0276a91c744e4059a59cf7f00e8d05 344779: fde54a181e0ad08b40855a155ce20a1f503cdcc4 344775: 468415ed05c5892c3ff8d5cb0aa5235073c9a251 344767: bcdf5b39cbcfcd68a9f4e8d163650141ddbbaf7a v: v3 --- [refs] | 2 +- trunk/arch/x86/include/asm/pgtable.h | 11 ++- trunk/include/asm-generic/pgtable.h | 106 +++++++++++++++++++++++++++ trunk/init/Kconfig | 37 ++++++++++ 4 files changed, 153 insertions(+), 3 deletions(-) diff --git a/[refs] b/[refs] index 91ff6619b8b4..0afd92c28264 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: dbe4d2035a5b273c910f8f7eb0b7189ee76f63ad +refs/heads/master: be3a728427a605990a7a0b6dbf9e29b68e266146 diff --git a/trunk/arch/x86/include/asm/pgtable.h b/trunk/arch/x86/include/asm/pgtable.h index 5fe03aaca92e..5199db2923d3 100644 --- a/trunk/arch/x86/include/asm/pgtable.h +++ b/trunk/arch/x86/include/asm/pgtable.h @@ -404,7 +404,8 @@ static inline int pte_same(pte_t a, pte_t b) static inline int pte_present(pte_t a) { - return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); + return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE | + _PAGE_NUMA); } #define pte_accessible pte_accessible @@ -426,7 +427,8 @@ static inline int pmd_present(pmd_t pmd) * the _PAGE_PSE flag will remain set at all times while the * _PAGE_PRESENT bit is clear). */ - return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); + return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE | + _PAGE_NUMA); } static inline int pmd_none(pmd_t pmd) @@ -485,6 +487,11 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) static inline int pmd_bad(pmd_t pmd) { +#ifdef CONFIG_NUMA_BALANCING + /* pmd_numa check */ + if ((pmd_flags(pmd) & (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA) + return 0; +#endif return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; } diff --git a/trunk/include/asm-generic/pgtable.h b/trunk/include/asm-generic/pgtable.h index 48fc1dc1c74b..f27c83668d10 100644 --- a/trunk/include/asm-generic/pgtable.h +++ b/trunk/include/asm-generic/pgtable.h @@ -558,6 +558,112 @@ static inline int pmd_trans_unstable(pmd_t *pmd) #endif } +#ifdef CONFIG_NUMA_BALANCING +#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE +/* + * _PAGE_NUMA works identical to _PAGE_PROTNONE (it's actually the + * same bit too). It's set only when _PAGE_PRESET is not set and it's + * never set if _PAGE_PRESENT is set. + * + * pte/pmd_present() returns true if pte/pmd_numa returns true. Page + * fault triggers on those regions if pte/pmd_numa returns true + * (because _PAGE_PRESENT is not set). + */ +#ifndef pte_numa +static inline int pte_numa(pte_t pte) +{ + return (pte_flags(pte) & + (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; +} +#endif + +#ifndef pmd_numa +static inline int pmd_numa(pmd_t pmd) +{ + return (pmd_flags(pmd) & + (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; +} +#endif + +/* + * pte/pmd_mknuma sets the _PAGE_ACCESSED bitflag automatically + * because they're called by the NUMA hinting minor page fault. If we + * wouldn't set the _PAGE_ACCESSED bitflag here, the TLB miss handler + * would be forced to set it later while filling the TLB after we + * return to userland. That would trigger a second write to memory + * that we optimize away by setting _PAGE_ACCESSED here. + */ +#ifndef pte_mknonnuma +static inline pte_t pte_mknonnuma(pte_t pte) +{ + pte = pte_clear_flags(pte, _PAGE_NUMA); + return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED); +} +#endif + +#ifndef pmd_mknonnuma +static inline pmd_t pmd_mknonnuma(pmd_t pmd) +{ + pmd = pmd_clear_flags(pmd, _PAGE_NUMA); + return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED); +} +#endif + +#ifndef pte_mknuma +static inline pte_t pte_mknuma(pte_t pte) +{ + pte = pte_set_flags(pte, _PAGE_NUMA); + return pte_clear_flags(pte, _PAGE_PRESENT); +} +#endif + +#ifndef pmd_mknuma +static inline pmd_t pmd_mknuma(pmd_t pmd) +{ + pmd = pmd_set_flags(pmd, _PAGE_NUMA); + return pmd_clear_flags(pmd, _PAGE_PRESENT); +} +#endif +#else +extern int pte_numa(pte_t pte); +extern int pmd_numa(pmd_t pmd); +extern pte_t pte_mknonnuma(pte_t pte); +extern pmd_t pmd_mknonnuma(pmd_t pmd); +extern pte_t pte_mknuma(pte_t pte); +extern pmd_t pmd_mknuma(pmd_t pmd); +#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ +#else +static inline int pmd_numa(pmd_t pmd) +{ + return 0; +} + +static inline int pte_numa(pte_t pte) +{ + return 0; +} + +static inline pte_t pte_mknonnuma(pte_t pte) +{ + return pte; +} + +static inline pmd_t pmd_mknonnuma(pmd_t pmd) +{ + return pmd; +} + +static inline pte_t pte_mknuma(pte_t pte) +{ + return pte; +} + +static inline pmd_t pmd_mknuma(pmd_t pmd) +{ + return pmd; +} +#endif /* CONFIG_NUMA_BALANCING */ + #endif /* CONFIG_MMU */ #endif /* !__ASSEMBLY__ */ diff --git a/trunk/init/Kconfig b/trunk/init/Kconfig index 6fdd6e339326..9f00f004796a 100644 --- a/trunk/init/Kconfig +++ b/trunk/init/Kconfig @@ -696,6 +696,43 @@ config LOG_BUF_SHIFT config HAVE_UNSTABLE_SCHED_CLOCK bool +# +# For architectures that want to enable the support for NUMA-affine scheduler +# balancing logic: +# +config ARCH_SUPPORTS_NUMA_BALANCING + bool + +# For architectures that (ab)use NUMA to represent different memory regions +# all cpu-local but of different latencies, such as SuperH. +# +config ARCH_WANT_NUMA_VARIABLE_LOCALITY + bool + +# +# For architectures that are willing to define _PAGE_NUMA as _PAGE_PROTNONE +config ARCH_WANTS_PROT_NUMA_PROT_NONE + bool + +config ARCH_USES_NUMA_PROT_NONE + bool + default y + depends on ARCH_WANTS_PROT_NUMA_PROT_NONE + depends on NUMA_BALANCING + +config NUMA_BALANCING + bool "Memory placement aware NUMA scheduler" + default y + depends on ARCH_SUPPORTS_NUMA_BALANCING + depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY + depends on SMP && NUMA && MIGRATION + help + This option adds support for automatic NUMA aware memory/task placement. + The mechanism is quite primitive and is based on migrating memory when + it is references to the node the task is running on. + + This system will be inactive on UMA systems. + menuconfig CGROUPS boolean "Control Group support" depends on EVENTFD