From 6425fea32e460cf37b817fc9bb4e137a80b5b6bc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Feb 2009 06:30:42 +0100 Subject: [PATCH] --- yaml --- r: 136980 b: refs/heads/master c: 13b2eda64d14d0a0c15c092664c7351ea58ea851 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/arch/alpha/mm/init.c | 20 +- trunk/arch/avr32/Kconfig | 2 +- trunk/arch/x86/Kconfig | 5 +- trunk/arch/x86/include/asm/cacheflush.h | 53 +- trunk/arch/x86/include/asm/iomap.h | 3 + trunk/arch/x86/include/asm/mmzone_32.h | 43 +- trunk/arch/x86/include/asm/percpu.h | 8 - trunk/arch/x86/include/asm/pgtable.h | 2 - trunk/arch/x86/include/asm/xen/page.h | 1 - .../x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 2 +- trunk/arch/x86/kernel/irq_32.c | 29 +- trunk/arch/x86/kernel/setup_percpu.c | 367 +------ trunk/arch/x86/mm/init_32.c | 17 - trunk/arch/x86/mm/init_64.c | 72 +- trunk/arch/x86/mm/iomap_32.c | 18 + trunk/arch/x86/xen/enlighten.c | 13 +- trunk/arch/x86/xen/mmu.c | 7 - trunk/arch/x86/xen/smp.c | 6 +- trunk/block/blktrace.c | 2 +- trunk/drivers/acpi/processor_perflib.c | 4 +- trunk/drivers/gpu/drm/i915/i915_dma.c | 9 +- trunk/include/linux/bootmem.h | 36 +- trunk/include/linux/io-mapping.h | 46 +- trunk/include/linux/percpu.h | 102 +- trunk/include/linux/vmalloc.h | 4 - trunk/kernel/module.c | 64 +- trunk/kernel/sched.c | 6 +- trunk/kernel/stop_machine.c | 2 +- trunk/mm/Makefile | 4 - trunk/mm/allocpercpu.c | 32 +- trunk/mm/bootmem.c | 35 +- trunk/mm/percpu.c | 979 ------------------ trunk/mm/vmalloc.c | 94 +- trunk/net/ipv4/af_inet.c | 4 +- trunk/net/ipv4/route.c | 2 +- 36 files changed, 293 insertions(+), 1802 deletions(-) delete mode 100644 trunk/mm/percpu.c diff --git a/[refs] b/[refs] index 5899df154b7c..8d93730c6891 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 9976b39b5031bbf76f715893cf080b6a17683881 +refs/heads/master: 13b2eda64d14d0a0c15c092664c7351ea58ea851 diff --git a/trunk/arch/alpha/mm/init.c b/trunk/arch/alpha/mm/init.c index af71d38c8e41..5d7a16eab312 100644 --- a/trunk/arch/alpha/mm/init.c +++ b/trunk/arch/alpha/mm/init.c @@ -189,21 +189,9 @@ callback_init(void * kernel_end) if (alpha_using_srm) { static struct vm_struct console_remap_vm; - unsigned long nr_pages = 0; - unsigned long vaddr; + unsigned long vaddr = VMALLOC_START; unsigned long i, j; - /* calculate needed size */ - for (i = 0; i < crb->map_entries; ++i) - nr_pages += crb->map[i].count; - - /* register the vm area */ - console_remap_vm.flags = VM_ALLOC; - console_remap_vm.size = nr_pages << PAGE_SHIFT; - vm_area_register_early(&console_remap_vm, PAGE_SIZE); - - vaddr = (unsigned long)console_remap_vm.addr; - /* Set up the third level PTEs and update the virtual addresses of the CRB entries. */ for (i = 0; i < crb->map_entries; ++i) { @@ -225,6 +213,12 @@ callback_init(void * kernel_end) vaddr += PAGE_SIZE; } } + + /* Let vmalloc know that we've allocated some space. */ + console_remap_vm.flags = VM_ALLOC; + console_remap_vm.addr = (void *) VMALLOC_START; + console_remap_vm.size = vaddr - VMALLOC_START; + vmlist = &console_remap_vm; } callback_init_done = 1; diff --git a/trunk/arch/avr32/Kconfig b/trunk/arch/avr32/Kconfig index 05fe3053dcae..b189680d18b0 100644 --- a/trunk/arch/avr32/Kconfig +++ b/trunk/arch/avr32/Kconfig @@ -181,7 +181,7 @@ source "kernel/Kconfig.preempt" config QUICKLIST def_bool y -config HAVE_ARCH_BOOTMEM +config HAVE_ARCH_BOOTMEM_NODE def_bool n config ARCH_HAVE_MEMORY_PRESENT diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 8015641478bd..5e2919c0ff92 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -135,9 +135,6 @@ config ARCH_HAS_CACHE_LINE_SIZE config HAVE_SETUP_PER_CPU_AREA def_bool y -config HAVE_DYNAMIC_PER_CPU_AREA - def_bool y - config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP @@ -1125,7 +1122,7 @@ config NODES_SHIFT Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accomodate various tables. -config HAVE_ARCH_BOOTMEM +config HAVE_ARCH_BOOTMEM_NODE def_bool y depends on X86_32 && NUMA diff --git a/trunk/arch/x86/include/asm/cacheflush.h b/trunk/arch/x86/include/asm/cacheflush.h index 5b301b7ff5f4..2f8466540fb5 100644 --- a/trunk/arch/x86/include/asm/cacheflush.h +++ b/trunk/arch/x86/include/asm/cacheflush.h @@ -5,43 +5,24 @@ #include /* Caches aren't brain-dead on the intel. */ -static inline void flush_cache_all(void) { } -static inline void flush_cache_mm(struct mm_struct *mm) { } -static inline void flush_cache_dup_mm(struct mm_struct *mm) { } -static inline void flush_cache_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) { } -static inline void flush_cache_page(struct vm_area_struct *vma, - unsigned long vmaddr, unsigned long pfn) { } -static inline void flush_dcache_page(struct page *page) { } -static inline void flush_dcache_mmap_lock(struct address_space *mapping) { } -static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { } -static inline void flush_icache_range(unsigned long start, - unsigned long end) { } -static inline void flush_icache_page(struct vm_area_struct *vma, - struct page *page) { } -static inline void flush_icache_user_range(struct vm_area_struct *vma, - struct page *page, - unsigned long addr, - unsigned long len) { } -static inline void flush_cache_vmap(unsigned long start, unsigned long end) { } -static inline void flush_cache_vunmap(unsigned long start, - unsigned long end) { } +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) +#define flush_icache_range(start, end) do { } while (0) +#define flush_icache_page(vma, pg) do { } while (0) +#define flush_icache_user_range(vma, pg, adr, len) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) -static inline void copy_to_user_page(struct vm_area_struct *vma, - struct page *page, unsigned long vaddr, - void *dst, const void *src, - unsigned long len) -{ - memcpy(dst, src, len); -} - -static inline void copy_from_user_page(struct vm_area_struct *vma, - struct page *page, unsigned long vaddr, - void *dst, const void *src, - unsigned long len) -{ - memcpy(dst, src, len); -} +#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ + memcpy((dst), (src), (len)) +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy((dst), (src), (len)) #define PG_non_WB PG_arch_1 PAGEFLAG(NonWB, non_WB) diff --git a/trunk/arch/x86/include/asm/iomap.h b/trunk/arch/x86/include/asm/iomap.h index c1f06289b14b..86af26091d6c 100644 --- a/trunk/arch/x86/include/asm/iomap.h +++ b/trunk/arch/x86/include/asm/iomap.h @@ -23,6 +23,9 @@ #include #include +int +is_io_mapping_possible(resource_size_t base, unsigned long size); + void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); diff --git a/trunk/arch/x86/include/asm/mmzone_32.h b/trunk/arch/x86/include/asm/mmzone_32.h index ede6998bd92c..105fb90a0635 100644 --- a/trunk/arch/x86/include/asm/mmzone_32.h +++ b/trunk/arch/x86/include/asm/mmzone_32.h @@ -91,9 +91,46 @@ static inline int pfn_valid(int pfn) #endif /* CONFIG_DISCONTIGMEM */ #ifdef CONFIG_NEED_MULTIPLE_NODES -/* always use node 0 for bootmem on this numa platform */ -#define bootmem_arch_preferred_node(__bdata, size, align, goal, limit) \ - (NODE_DATA(0)->bdata) + +/* + * Following are macros that are specific to this numa platform. + */ +#define reserve_bootmem(addr, size, flags) \ + reserve_bootmem_node(NODE_DATA(0), (addr), (size), (flags)) +#define alloc_bootmem(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_nopanic(x) \ + __alloc_bootmem_node_nopanic(NODE_DATA(0), (x), SMP_CACHE_BYTES, \ + __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0) +#define alloc_bootmem_pages(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_pages_nopanic(x) \ + __alloc_bootmem_node_nopanic(NODE_DATA(0), (x), PAGE_SIZE, \ + __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low_pages(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) +#define alloc_bootmem_node(pgdat, x) \ +({ \ + struct pglist_data __maybe_unused \ + *__alloc_bootmem_node__pgdat = (pgdat); \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, \ + __pa(MAX_DMA_ADDRESS)); \ +}) +#define alloc_bootmem_pages_node(pgdat, x) \ +({ \ + struct pglist_data __maybe_unused \ + *__alloc_bootmem_node__pgdat = (pgdat); \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, \ + __pa(MAX_DMA_ADDRESS)); \ +}) +#define alloc_bootmem_low_pages_node(pgdat, x) \ +({ \ + struct pglist_data __maybe_unused \ + *__alloc_bootmem_node__pgdat = (pgdat); \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0); \ +}) #endif /* CONFIG_NEED_MULTIPLE_NODES */ #endif /* _ASM_X86_MMZONE_32_H */ diff --git a/trunk/arch/x86/include/asm/percpu.h b/trunk/arch/x86/include/asm/percpu.h index 8f1d2fbec1d4..aee103b26d01 100644 --- a/trunk/arch/x86/include/asm/percpu.h +++ b/trunk/arch/x86/include/asm/percpu.h @@ -43,14 +43,6 @@ #else /* ...!ASSEMBLY */ #include -#include - -#define __addr_to_pcpu_ptr(addr) \ - (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \ - + (unsigned long)__per_cpu_start) -#define __pcpu_ptr_to_addr(ptr) \ - (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \ - - (unsigned long)__per_cpu_start) #ifdef CONFIG_SMP #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x diff --git a/trunk/arch/x86/include/asm/pgtable.h b/trunk/arch/x86/include/asm/pgtable.h index d0812e155f1d..1c097a3a6669 100644 --- a/trunk/arch/x86/include/asm/pgtable.h +++ b/trunk/arch/x86/include/asm/pgtable.h @@ -288,8 +288,6 @@ static inline int is_new_memtype_allowed(unsigned long flags, return 1; } -pmd_t *populate_extra_pmd(unsigned long vaddr); -pte_t *populate_extra_pte(unsigned long vaddr); #endif /* __ASSEMBLY__ */ #ifdef CONFIG_X86_32 diff --git a/trunk/arch/x86/include/asm/xen/page.h b/trunk/arch/x86/include/asm/xen/page.h index 1a918dde46b5..4bd990ee43df 100644 --- a/trunk/arch/x86/include/asm/xen/page.h +++ b/trunk/arch/x86/include/asm/xen/page.h @@ -164,7 +164,6 @@ static inline pte_t __pte_ma(pteval_t x) xmaddr_t arbitrary_virt_to_machine(void *address); -unsigned long arbitrary_virt_to_mfn(void *vaddr); void make_lowmem_page_readonly(void *vaddr); void make_lowmem_page_readwrite(void *vaddr); diff --git a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 22590cf688ae..4b1c319d30c3 100644 --- a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -601,7 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (!data) return -ENOMEM; - data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); + data->acpi_data = percpu_ptr(acpi_perf_data, cpu); per_cpu(drv_data, cpu) = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) diff --git a/trunk/arch/x86/kernel/irq_32.c b/trunk/arch/x86/kernel/irq_32.c index 3b09634a5153..9dc6b2b24275 100644 --- a/trunk/arch/x86/kernel/irq_32.c +++ b/trunk/arch/x86/kernel/irq_32.c @@ -16,7 +16,6 @@ #include #include #include -#include #include @@ -56,13 +55,13 @@ static inline void print_stack_overflow(void) { } union irq_ctx { struct thread_info tinfo; u32 stack[THREAD_SIZE/sizeof(u32)]; -} __attribute__((aligned(PAGE_SIZE))); +}; -static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); -static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); +static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; +static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; -static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack); -static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack); +static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; +static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; static void call_on_stack(void *func, void *stack) { @@ -82,7 +81,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) u32 *isp, arg1, arg2; curctx = (union irq_ctx *) current_thread_info(); - irqctx = __get_cpu_var(hardirq_ctx); + irqctx = hardirq_ctx[smp_processor_id()]; /* * this is where we switch to the IRQ stack. However, if we are @@ -126,34 +125,34 @@ void __cpuinit irq_ctx_init(int cpu) { union irq_ctx *irqctx; - if (per_cpu(hardirq_ctx, cpu)) + if (hardirq_ctx[cpu]) return; - irqctx = &per_cpu(hardirq_stack, cpu); + irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; irqctx->tinfo.task = NULL; irqctx->tinfo.exec_domain = NULL; irqctx->tinfo.cpu = cpu; irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - per_cpu(hardirq_ctx, cpu) = irqctx; + hardirq_ctx[cpu] = irqctx; - irqctx = &per_cpu(softirq_stack, cpu); + irqctx = (union irq_ctx *) &softirq_stack[cpu*THREAD_SIZE]; irqctx->tinfo.task = NULL; irqctx->tinfo.exec_domain = NULL; irqctx->tinfo.cpu = cpu; irqctx->tinfo.preempt_count = 0; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - per_cpu(softirq_ctx, cpu) = irqctx; + softirq_ctx[cpu] = irqctx; printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", - cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); + cpu, hardirq_ctx[cpu], softirq_ctx[cpu]); } void irq_ctx_exit(int cpu) { - per_cpu(hardirq_ctx, cpu) = NULL; + hardirq_ctx[cpu] = NULL; } asmlinkage void do_softirq(void) @@ -170,7 +169,7 @@ asmlinkage void do_softirq(void) if (local_softirq_pending()) { curctx = current_thread_info(); - irqctx = __get_cpu_var(softirq_ctx); + irqctx = softirq_ctx[smp_processor_id()]; irqctx->tinfo.task = curctx->task; irqctx->tinfo.previous_esp = current_stack_pointer; diff --git a/trunk/arch/x86/kernel/setup_percpu.c b/trunk/arch/x86/kernel/setup_percpu.c index c29f301d3885..d992e6cff730 100644 --- a/trunk/arch/x86/kernel/setup_percpu.c +++ b/trunk/arch/x86/kernel/setup_percpu.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -42,321 +41,6 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); -/** - * pcpu_need_numa - determine percpu allocation needs to consider NUMA - * - * If NUMA is not configured or there is only one NUMA node available, - * there is no reason to consider NUMA. This function determines - * whether percpu allocation should consider NUMA or not. - * - * RETURNS: - * true if NUMA should be considered; otherwise, false. - */ -static bool __init pcpu_need_numa(void) -{ -#ifdef CONFIG_NEED_MULTIPLE_NODES - pg_data_t *last = NULL; - unsigned int cpu; - - for_each_possible_cpu(cpu) { - int node = early_cpu_to_node(cpu); - - if (node_online(node) && NODE_DATA(node) && - last && last != NODE_DATA(node)) - return true; - - last = NODE_DATA(node); - } -#endif - return false; -} - -/** - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu - * @cpu: cpu to allocate for - * @size: size allocation in bytes - * @align: alignment - * - * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper - * does the right thing for NUMA regardless of the current - * configuration. - * - * RETURNS: - * Pointer to the allocated area on success, NULL on failure. - */ -static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, - unsigned long align) -{ - const unsigned long goal = __pa(MAX_DMA_ADDRESS); -#ifdef CONFIG_NEED_MULTIPLE_NODES - int node = early_cpu_to_node(cpu); - void *ptr; - - if (!node_online(node) || !NODE_DATA(node)) { - ptr = __alloc_bootmem_nopanic(size, align, goal); - pr_info("cpu %d has no node %d or node-local memory\n", - cpu, node); - pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", - cpu, size, __pa(ptr)); - } else { - ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), - size, align, goal); - pr_debug("per cpu data for cpu%d %lu bytes on node%d at " - "%016lx\n", cpu, size, node, __pa(ptr)); - } - return ptr; -#else - return __alloc_bootmem_nopanic(size, align, goal); -#endif -} - -/* - * Remap allocator - * - * This allocator uses PMD page as unit. A PMD page is allocated for - * each cpu and each is remapped into vmalloc area using PMD mapping. - * As PMD page is quite large, only part of it is used for the first - * chunk. Unused part is returned to the bootmem allocator. - * - * So, the PMD pages are mapped twice - once to the physical mapping - * and to the vmalloc area for the first percpu chunk. The double - * mapping does add one more PMD TLB entry pressure but still is much - * better than only using 4k mappings while still being NUMA friendly. - */ -#ifdef CONFIG_NEED_MULTIPLE_NODES -static size_t pcpur_size __initdata; -static void **pcpur_ptrs __initdata; - -static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) -{ - size_t off = (size_t)pageno << PAGE_SHIFT; - - if (off >= pcpur_size) - return NULL; - - return virt_to_page(pcpur_ptrs[cpu] + off); -} - -static ssize_t __init setup_pcpu_remap(size_t static_size) -{ - static struct vm_struct vm; - pg_data_t *last; - size_t ptrs_size; - unsigned int cpu; - ssize_t ret; - - /* - * If large page isn't supported, there's no benefit in doing - * this. Also, on non-NUMA, embedding is better. - */ - if (!cpu_has_pse || pcpu_need_numa()) - return -EINVAL; - - last = NULL; - for_each_possible_cpu(cpu) { - int node = early_cpu_to_node(cpu); - - if (node_online(node) && NODE_DATA(node) && - last && last != NODE_DATA(node)) - goto proceed; - - last = NODE_DATA(node); - } - return -EINVAL; - -proceed: - /* - * Currently supports only single page. Supporting multiple - * pages won't be too difficult if it ever becomes necessary. - */ - pcpur_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); - if (pcpur_size > PMD_SIZE) { - pr_warning("PERCPU: static data is larger than large page, " - "can't use large page\n"); - return -EINVAL; - } - - /* allocate pointer array and alloc large pages */ - ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); - pcpur_ptrs = alloc_bootmem(ptrs_size); - - for_each_possible_cpu(cpu) { - pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE); - if (!pcpur_ptrs[cpu]) - goto enomem; - - /* - * Only use pcpur_size bytes and give back the rest. - * - * Ingo: The 2MB up-rounding bootmem is needed to make - * sure the partial 2MB page is still fully RAM - it's - * not well-specified to have a PAT-incompatible area - * (unmapped RAM, device memory, etc.) in that hole. - */ - free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), - PMD_SIZE - pcpur_size); - - memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); - } - - /* allocate address and map */ - vm.flags = VM_ALLOC; - vm.size = num_possible_cpus() * PMD_SIZE; - vm_area_register_early(&vm, PMD_SIZE); - - for_each_possible_cpu(cpu) { - pmd_t *pmd; - - pmd = populate_extra_pmd((unsigned long)vm.addr - + cpu * PMD_SIZE); - set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])), - PAGE_KERNEL_LARGE)); - } - - /* we're ready, commit */ - pr_info("PERCPU: Remapped at %p with large pages, static data " - "%zu bytes\n", vm.addr, static_size); - - ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, PMD_SIZE, - pcpur_size - static_size, vm.addr, NULL); - goto out_free_ar; - -enomem: - for_each_possible_cpu(cpu) - if (pcpur_ptrs[cpu]) - free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE); - ret = -ENOMEM; -out_free_ar: - free_bootmem(__pa(pcpur_ptrs), ptrs_size); - return ret; -} -#else -static ssize_t __init setup_pcpu_remap(size_t static_size) -{ - return -EINVAL; -} -#endif - -/* - * Embedding allocator - * - * The first chunk is sized to just contain the static area plus - * PERCPU_DYNAMIC_RESERVE and allocated as a contiguous area using - * bootmem allocator and used as-is without being mapped into vmalloc - * area. This enables the first chunk to piggy back on the linear - * physical PMD mapping and doesn't add any additional pressure to - * TLB. - */ -static void *pcpue_ptr __initdata; -static size_t pcpue_unit_size __initdata; - -static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) -{ - return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size - + ((size_t)pageno << PAGE_SHIFT)); -} - -static ssize_t __init setup_pcpu_embed(size_t static_size) -{ - unsigned int cpu; - - /* - * If large page isn't supported, there's no benefit in doing - * this. Also, embedding allocation doesn't play well with - * NUMA. - */ - if (!cpu_has_pse || pcpu_need_numa()) - return -EINVAL; - - /* allocate and copy */ - pcpue_unit_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); - pcpue_unit_size = max_t(size_t, pcpue_unit_size, PCPU_MIN_UNIT_SIZE); - pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, - PAGE_SIZE); - if (!pcpue_ptr) - return -ENOMEM; - - for_each_possible_cpu(cpu) - memcpy(pcpue_ptr + cpu * pcpue_unit_size, __per_cpu_load, - static_size); - - /* we're ready, commit */ - pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", - pcpue_unit_size >> PAGE_SHIFT, pcpue_ptr, static_size); - - return pcpu_setup_first_chunk(pcpue_get_page, static_size, - pcpue_unit_size, - pcpue_unit_size - static_size, pcpue_ptr, - NULL); -} - -/* - * 4k page allocator - * - * This is the basic allocator. Static percpu area is allocated - * page-by-page and most of initialization is done by the generic - * setup function. - */ -static struct page **pcpu4k_pages __initdata; -static int pcpu4k_nr_static_pages __initdata; - -static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) -{ - if (pageno < pcpu4k_nr_static_pages) - return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno]; - return NULL; -} - -static void __init pcpu4k_populate_pte(unsigned long addr) -{ - populate_extra_pte(addr); -} - -static ssize_t __init setup_pcpu_4k(size_t static_size) -{ - size_t pages_size; - unsigned int cpu; - int i, j; - ssize_t ret; - - pcpu4k_nr_static_pages = PFN_UP(static_size); - - /* unaligned allocations can't be freed, round up to page size */ - pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() - * sizeof(pcpu4k_pages[0])); - pcpu4k_pages = alloc_bootmem(pages_size); - - /* allocate and copy */ - j = 0; - for_each_possible_cpu(cpu) - for (i = 0; i < pcpu4k_nr_static_pages; i++) { - void *ptr; - - ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); - if (!ptr) - goto enomem; - - memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); - pcpu4k_pages[j++] = virt_to_page(ptr); - } - - /* we're ready, commit */ - pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", - pcpu4k_nr_static_pages, static_size); - - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, 0, NULL, - pcpu4k_populate_pte); - goto out_free_ar; - -enomem: - while (--j >= 0) - free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE); - ret = -ENOMEM; -out_free_ar: - free_bootmem(__pa(pcpu4k_pages), pages_size); - return ret; -} - static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 @@ -377,35 +61,38 @@ static inline void setup_percpu_segment(int cpu) */ void __init setup_per_cpu_areas(void) { - size_t static_size = __per_cpu_end - __per_cpu_start; - unsigned int cpu; - unsigned long delta; - size_t pcpu_unit_size; - ssize_t ret; + ssize_t size; + char *ptr; + int cpu; + + /* Copy section for each CPU (we discard the original) */ + size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE); pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); - /* - * Allocate percpu area. If PSE is supported, try to make use - * of large page mappings. Please read comments on top of - * each allocator for details. - */ - ret = setup_pcpu_remap(static_size); - if (ret < 0) - ret = setup_pcpu_embed(static_size); - if (ret < 0) - ret = setup_pcpu_4k(static_size); - if (ret < 0) - panic("cannot allocate static percpu area (%zu bytes, err=%zd)", - static_size, ret); - - pcpu_unit_size = ret; + pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size); - /* alrighty, percpu areas up and running */ - delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { - per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; +#ifndef CONFIG_NEED_MULTIPLE_NODES + ptr = alloc_bootmem_pages(size); +#else + int node = early_cpu_to_node(cpu); + if (!node_online(node) || !NODE_DATA(node)) { + ptr = alloc_bootmem_pages(size); + pr_info("cpu %d has no node %d or node-local memory\n", + cpu, node); + pr_debug("per cpu data for cpu%d at %016lx\n", + cpu, __pa(ptr)); + } else { + ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); + pr_debug("per cpu data for cpu%d on node%d at %016lx\n", + cpu, node, __pa(ptr)); + } +#endif + + memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); + per_cpu_offset(cpu) = ptr - __per_cpu_start; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); @@ -438,6 +125,8 @@ void __init setup_per_cpu_areas(void) */ if (cpu == boot_cpu_id) switch_to_new_gdt(cpu); + + DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } /* indicate the early static arrays will soon be gone */ diff --git a/trunk/arch/x86/mm/init_32.c b/trunk/arch/x86/mm/init_32.c index ef0bb941cdf5..06708ee94aa4 100644 --- a/trunk/arch/x86/mm/init_32.c +++ b/trunk/arch/x86/mm/init_32.c @@ -137,23 +137,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) return pte_offset_kernel(pmd, 0); } -pmd_t * __init populate_extra_pmd(unsigned long vaddr) -{ - int pgd_idx = pgd_index(vaddr); - int pmd_idx = pmd_index(vaddr); - - return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx; -} - -pte_t * __init populate_extra_pte(unsigned long vaddr) -{ - int pte_idx = pte_index(vaddr); - pmd_t *pmd; - - pmd = populate_extra_pmd(vaddr); - return one_page_table_init(pmd) + pte_idx; -} - static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, unsigned long vaddr, pte_t *lastpte) { diff --git a/trunk/arch/x86/mm/init_64.c b/trunk/arch/x86/mm/init_64.c index 7d4e76da3368..e6d36b490250 100644 --- a/trunk/arch/x86/mm/init_64.c +++ b/trunk/arch/x86/mm/init_64.c @@ -168,51 +168,34 @@ static __ref void *spp_getpage(void) return ptr; } -static pud_t * __init fill_pud(pgd_t *pgd, unsigned long vaddr) +void +set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) { - if (pgd_none(*pgd)) { - pud_t *pud = (pud_t *)spp_getpage(); - pgd_populate(&init_mm, pgd, pud); - if (pud != pud_offset(pgd, 0)) - printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n", - pud, pud_offset(pgd, 0)); - } - return pud_offset(pgd, vaddr); -} + pud_t *pud; + pmd_t *pmd; + pte_t *pte; -static pmd_t * __init fill_pmd(pud_t *pud, unsigned long vaddr) -{ + pud = pud_page + pud_index(vaddr); if (pud_none(*pud)) { - pmd_t *pmd = (pmd_t *) spp_getpage(); + pmd = (pmd_t *) spp_getpage(); pud_populate(&init_mm, pud, pmd); - if (pmd != pmd_offset(pud, 0)) + if (pmd != pmd_offset(pud, 0)) { printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", - pmd, pmd_offset(pud, 0)); + pmd, pmd_offset(pud, 0)); + return; + } } - return pmd_offset(pud, vaddr); -} - -static pte_t * __init fill_pte(pmd_t *pmd, unsigned long vaddr) -{ + pmd = pmd_offset(pud, vaddr); if (pmd_none(*pmd)) { - pte_t *pte = (pte_t *) spp_getpage(); + pte = (pte_t *) spp_getpage(); pmd_populate_kernel(&init_mm, pmd, pte); - if (pte != pte_offset_kernel(pmd, 0)) + if (pte != pte_offset_kernel(pmd, 0)) { printk(KERN_ERR "PAGETABLE BUG #02!\n"); + return; + } } - return pte_offset_kernel(pmd, vaddr); -} - -void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) -{ - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pud = pud_page + pud_index(vaddr); - pmd = fill_pmd(pud, vaddr); - pte = fill_pte(pmd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); set_pte(pte, new_pte); /* @@ -222,7 +205,8 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) __flush_tlb_one(vaddr); } -void set_pte_vaddr(unsigned long vaddr, pte_t pteval) +void +set_pte_vaddr(unsigned long vaddr, pte_t pteval) { pgd_t *pgd; pud_t *pud_page; @@ -239,24 +223,6 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) set_pte_vaddr_pud(pud_page, vaddr, pteval); } -pmd_t * __init populate_extra_pmd(unsigned long vaddr) -{ - pgd_t *pgd; - pud_t *pud; - - pgd = pgd_offset_k(vaddr); - pud = fill_pud(pgd, vaddr); - return fill_pmd(pud, vaddr); -} - -pte_t * __init populate_extra_pte(unsigned long vaddr) -{ - pmd_t *pmd; - - pmd = populate_extra_pmd(vaddr); - return fill_pte(pmd, vaddr); -} - /* * Create large page table mappings for a range of physical addresses. */ diff --git a/trunk/arch/x86/mm/iomap_32.c b/trunk/arch/x86/mm/iomap_32.c index ca53224fc56c..6c2b1af16926 100644 --- a/trunk/arch/x86/mm/iomap_32.c +++ b/trunk/arch/x86/mm/iomap_32.c @@ -20,6 +20,24 @@ #include #include +#ifdef CONFIG_X86_PAE +int +is_io_mapping_possible(resource_size_t base, unsigned long size) +{ + return 1; +} +#else +int +is_io_mapping_possible(resource_size_t base, unsigned long size) +{ + /* There is no way to map greater than 1 << 32 address without PAE */ + if (base + size > 0x100000000ULL) + return 0; + + return 1; +} +#endif + /* Map 'pfn' using fixed map 'type' and protections 'prot' */ void * diff --git a/trunk/arch/x86/xen/enlighten.c b/trunk/arch/x86/xen/enlighten.c index 352ea6830659..c52f4034c7fd 100644 --- a/trunk/arch/x86/xen/enlighten.c +++ b/trunk/arch/x86/xen/enlighten.c @@ -103,7 +103,7 @@ static void xen_vcpu_setup(int cpu) vcpup = &per_cpu(xen_vcpu_info, cpu); - info.mfn = arbitrary_virt_to_mfn(vcpup); + info.mfn = virt_to_mfn(vcpup); info.offset = offset_in_page(vcpup); printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n", @@ -301,10 +301,8 @@ static void xen_load_gdt(const struct desc_ptr *dtr) frames = mcs.args; for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { - frames[f] = arbitrary_virt_to_mfn((void *)va); - + frames[f] = virt_to_mfn(va); make_lowmem_page_readonly((void *)va); - make_lowmem_page_readonly(mfn_to_virt(frames[f])); } MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); @@ -316,7 +314,7 @@ static void load_TLS_descriptor(struct thread_struct *t, unsigned int cpu, unsigned int i) { struct desc_struct *gdt = get_cpu_gdt_table(cpu); - xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); + xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); struct multicall_space mc = __xen_mc_entry(0); MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); @@ -490,7 +488,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, break; default: { - xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]); + xmaddr_t maddr = virt_to_machine(&dt[entry]); xen_mc_flush(); if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) @@ -942,6 +940,9 @@ asmlinkage void __init xen_start_kernel(void) possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; + local_irq_disable(); + early_boot_irqs_off(); + xen_raw_console_write("mapping kernel into physical memory\n"); pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); diff --git a/trunk/arch/x86/xen/mmu.c b/trunk/arch/x86/xen/mmu.c index cb6afa4ec95c..319bd40a57c2 100644 --- a/trunk/arch/x86/xen/mmu.c +++ b/trunk/arch/x86/xen/mmu.c @@ -276,13 +276,6 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn) p2m_top[topidx][idx] = mfn; } -unsigned long arbitrary_virt_to_mfn(void *vaddr) -{ - xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); - - return PFN_DOWN(maddr.maddr); -} - xmaddr_t arbitrary_virt_to_machine(void *vaddr) { unsigned long address = (unsigned long)vaddr; diff --git a/trunk/arch/x86/xen/smp.c b/trunk/arch/x86/xen/smp.c index 8d470562ffc9..035582ae815d 100644 --- a/trunk/arch/x86/xen/smp.c +++ b/trunk/arch/x86/xen/smp.c @@ -219,7 +219,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) { struct vcpu_guest_context *ctxt; struct desc_struct *gdt; - unsigned long gdt_mfn; if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) return 0; @@ -249,12 +248,9 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->ldt_ents = 0; BUG_ON((unsigned long)gdt & ~PAGE_MASK); - - gdt_mfn = arbitrary_virt_to_mfn(gdt); make_lowmem_page_readonly(gdt); - make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); - ctxt->gdt_frames[0] = gdt_mfn; + ctxt->gdt_frames[0] = virt_to_mfn(gdt); ctxt->gdt_ents = GDT_ENTRIES; ctxt->user_regs.cs = __KERNEL_CS; diff --git a/trunk/block/blktrace.c b/trunk/block/blktrace.c index 028120a0965a..7cf9d1ff45a0 100644 --- a/trunk/block/blktrace.c +++ b/trunk/block/blktrace.c @@ -363,7 +363,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!bt->sequence) goto err; - bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); + bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG); if (!bt->msg_data) goto err; diff --git a/trunk/drivers/acpi/processor_perflib.c b/trunk/drivers/acpi/processor_perflib.c index 68fd3d292799..9cc769b587ff 100644 --- a/trunk/drivers/acpi/processor_perflib.c +++ b/trunk/drivers/acpi/processor_perflib.c @@ -516,12 +516,12 @@ int acpi_processor_preregister_performance( continue; } - if (!performance || !per_cpu_ptr(performance, i)) { + if (!performance || !percpu_ptr(performance, i)) { retval = -EINVAL; continue; } - pr->performance = per_cpu_ptr(performance, i); + pr->performance = percpu_ptr(performance, i); cpumask_set_cpu(i, pr->performance->shared_cpu_map); if (acpi_processor_get_psd(pr)) { retval = -EINVAL; diff --git a/trunk/drivers/gpu/drm/i915/i915_dma.c b/trunk/drivers/gpu/drm/i915/i915_dma.c index 2d797ffe8137..6949c2d58f1d 100644 --- a/trunk/drivers/gpu/drm/i915/i915_dma.c +++ b/trunk/drivers/gpu/drm/i915/i915_dma.c @@ -1090,6 +1090,11 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) dev_priv->mm.gtt_mapping = io_mapping_create_wc(dev->agp->base, dev->agp->agp_info.aper_size * 1024*1024); + if (dev_priv->mm.gtt_mapping == NULL) { + ret = -EIO; + goto out_rmmap; + } + /* Set up a WC MTRR for non-PAT systems. This is more common than * one would think, because the kernel disables PAT on first * generation Core chips because WC PAT gets overridden by a UC @@ -1122,7 +1127,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!I915_NEED_GFX_HWS(dev)) { ret = i915_init_phys_hws(dev); if (ret != 0) - goto out_rmmap; + goto out_iomapfree; } /* On the 945G/GM, the chipset reports the MSI capability on the @@ -1161,6 +1166,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) return 0; +out_iomapfree: + io_mapping_free(dev_priv->mm.gtt_mapping); out_rmmap: iounmap(dev_priv->regs); free_priv: diff --git a/trunk/include/linux/bootmem.h b/trunk/include/linux/bootmem.h index 455d83219fae..95837bfb5256 100644 --- a/trunk/include/linux/bootmem.h +++ b/trunk/include/linux/bootmem.h @@ -65,20 +65,23 @@ extern void free_bootmem(unsigned long addr, unsigned long size); #define BOOTMEM_DEFAULT 0 #define BOOTMEM_EXCLUSIVE (1<<0) -extern int reserve_bootmem(unsigned long addr, - unsigned long size, - int flags); extern int reserve_bootmem_node(pg_data_t *pgdat, - unsigned long physaddr, - unsigned long size, - int flags); + unsigned long physaddr, + unsigned long size, + int flags); +#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE +extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags); +#endif -extern void *__alloc_bootmem(unsigned long size, +extern void *__alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal); -extern void *__alloc_bootmem_nopanic(unsigned long size, +extern void *__alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal); +extern void *__alloc_bootmem_low(unsigned long size, + unsigned long align, + unsigned long goal); extern void *__alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, @@ -87,35 +90,30 @@ extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); -extern void *__alloc_bootmem_low(unsigned long size, - unsigned long align, - unsigned long goal); extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); - +#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE #define alloc_bootmem(x) \ __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_nopanic(x) \ __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low(x) \ + __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_nopanic(x) \ __alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low_pages(x) \ + __alloc_bootmem_low(x, PAGE_SIZE, 0) #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_pages_node_nopanic(pgdat, x) \ - __alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) - -#define alloc_bootmem_low(x) \ - __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) -#define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_low(x, PAGE_SIZE, 0) #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) +#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, int flags); diff --git a/trunk/include/linux/io-mapping.h b/trunk/include/linux/io-mapping.h index 82df31726a54..cbc2f0cd631b 100644 --- a/trunk/include/linux/io-mapping.h +++ b/trunk/include/linux/io-mapping.h @@ -30,11 +30,14 @@ * See Documentation/io_mapping.txt */ -/* this struct isn't actually defined anywhere */ -struct io_mapping; - #ifdef CONFIG_HAVE_ATOMIC_IOMAP +struct io_mapping { + resource_size_t base; + unsigned long size; + pgprot_t prot; +}; + /* * For small address space machines, mapping large objects * into the kernel virtual space isn't practical. Where @@ -43,23 +46,40 @@ struct io_mapping; */ static inline struct io_mapping * -io_mapping_create_wc(unsigned long base, unsigned long size) +io_mapping_create_wc(resource_size_t base, unsigned long size) { - return (struct io_mapping *) base; + struct io_mapping *iomap; + + if (!is_io_mapping_possible(base, size)) + return NULL; + + iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); + if (!iomap) + return NULL; + + iomap->base = base; + iomap->size = size; + iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL)); + return iomap; } static inline void io_mapping_free(struct io_mapping *mapping) { + kfree(mapping); } /* Atomic map/unmap */ static inline void * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) { - offset += (unsigned long) mapping; - return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0, - __pgprot(__PAGE_KERNEL_WC)); + resource_size_t phys_addr; + unsigned long pfn; + + BUG_ON(offset >= mapping->size); + phys_addr = mapping->base + offset; + pfn = (unsigned long) (phys_addr >> PAGE_SHIFT); + return iomap_atomic_prot_pfn(pfn, KM_USER0, mapping->prot); } static inline void @@ -71,8 +91,9 @@ io_mapping_unmap_atomic(void *vaddr) static inline void * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - offset += (unsigned long) mapping; - return ioremap_wc(offset, PAGE_SIZE); + BUG_ON(offset >= mapping->size); + resource_size_t phys_addr = mapping->base + offset; + return ioremap_wc(phys_addr, PAGE_SIZE); } static inline void @@ -83,9 +104,12 @@ io_mapping_unmap(void *vaddr) #else +/* this struct isn't actually defined anywhere */ +struct io_mapping; + /* Create the io_mapping object*/ static inline struct io_mapping * -io_mapping_create_wc(unsigned long base, unsigned long size) +io_mapping_create_wc(resource_size_t base, unsigned long size) { return (struct io_mapping *) ioremap_wc(base, size); } diff --git a/trunk/include/linux/percpu.h b/trunk/include/linux/percpu.h index 545b068bcb70..3577ffd90d45 100644 --- a/trunk/include/linux/percpu.h +++ b/trunk/include/linux/percpu.h @@ -76,98 +76,52 @@ #ifdef CONFIG_SMP -#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA - -/* minimum unit size, also is the maximum supported allocation size */ -#define PCPU_MIN_UNIT_SIZE (16UL << PAGE_SHIFT) - -/* - * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy - * back on the first chunk if arch is manually allocating and mapping - * it for faster access (as a part of large page mapping for example). - * Note that dynamic percpu allocator covers both static and dynamic - * areas, so these values are bigger than PERCPU_MODULE_RESERVE. - * - * On typical configuration with modules, the following values leave - * about 8k of free space on the first chunk after boot on both x86_32 - * and 64 when module support is enabled. When module support is - * disabled, it's much tighter. - */ -#ifndef PERCPU_DYNAMIC_RESERVE -# if BITS_PER_LONG > 32 -# ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (6 << PAGE_SHIFT) -# else -# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) -# endif -# else -# ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) -# else -# define PERCPU_DYNAMIC_RESERVE (2 << PAGE_SHIFT) -# endif -# endif -#endif /* PERCPU_DYNAMIC_RESERVE */ - -extern void *pcpu_base_addr; - -typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); -typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); - -extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, size_t unit_size, - size_t free_size, void *base_addr, - pcpu_populate_pte_fn_t populate_pte_fn); - -/* - * Use this to get to a cpu's version of the per-cpu object - * dynamically allocated. Non-atomic access to the current CPU's - * version should probably be combined with get_cpu()/put_cpu(). - */ -#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) - -#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ - struct percpu_data { void *ptrs[1]; }; #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) - -#define per_cpu_ptr(ptr, cpu) \ -({ \ - struct percpu_data *__p = __percpu_disguise(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ +/* + * Use this to get to a cpu's version of the per-cpu object dynamically + * allocated. Non-atomic access to the current CPU's version should + * probably be combined with get_cpu()/put_cpu(). + */ +#define percpu_ptr(ptr, cpu) \ +({ \ + struct percpu_data *__p = __percpu_disguise(ptr); \ + (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) -#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ - -extern void *__alloc_percpu(size_t size, size_t align); -extern void free_percpu(void *__pdata); +extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); +extern void percpu_free(void *__pdata); #else /* CONFIG_SMP */ -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) -static inline void *__alloc_percpu(size_t size, size_t align) +static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) { - /* - * Can't easily make larger alignment work with kmalloc. WARN - * on it. Larger alignment should only be used for module - * percpu sections on SMP for which this path isn't used. - */ - WARN_ON_ONCE(align > SMP_CACHE_BYTES); - return kzalloc(size, GFP_KERNEL); + return kzalloc(size, gfp); } -static inline void free_percpu(void *p) +static inline void percpu_free(void *__pdata) { - kfree(p); + kfree(__pdata); } #endif /* CONFIG_SMP */ -#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ - __alignof__(type)) +#define percpu_alloc_mask(size, gfp, mask) \ + __percpu_alloc_mask((size), (gfp), &(mask)) + +#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map) + +/* (legacy) interface for use without CPU hotplug handling */ + +#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \ + cpu_possible_map) +#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) +#define free_percpu(ptr) percpu_free((ptr)) +#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu)) #endif /* __LINUX_PERCPU_H */ diff --git a/trunk/include/linux/vmalloc.h b/trunk/include/linux/vmalloc.h index a43ebec3a7b9..9c0890c7a06a 100644 --- a/trunk/include/linux/vmalloc.h +++ b/trunk/include/linux/vmalloc.h @@ -95,9 +95,6 @@ extern struct vm_struct *remove_vm_area(const void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); -extern int map_kernel_range_noflush(unsigned long start, unsigned long size, - pgprot_t prot, struct page **pages); -extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size); extern void unmap_kernel_range(unsigned long addr, unsigned long size); /* Allocate/destroy a 'vmalloc' VM area. */ @@ -113,6 +110,5 @@ extern long vwrite(char *buf, char *addr, unsigned long count); */ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; -extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); #endif /* _LINUX_VMALLOC_H */ diff --git a/trunk/kernel/module.c b/trunk/kernel/module.c index 1f0657ae555b..ba22484a987e 100644 --- a/trunk/kernel/module.c +++ b/trunk/kernel/module.c @@ -51,7 +51,6 @@ #include #include #include -#include #if 0 #define DEBUGP printk @@ -367,34 +366,6 @@ static struct module *find_module(const char *name) } #ifdef CONFIG_SMP - -#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA - -static void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) -{ - void *ptr; - - if (align > PAGE_SIZE) { - printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", - name, align, PAGE_SIZE); - align = PAGE_SIZE; - } - - ptr = __alloc_percpu(size, align); - if (!ptr) - printk(KERN_WARNING - "Could not allocate %lu bytes percpu data\n", size); - return ptr; -} - -static void percpu_modfree(void *freeme) -{ - free_percpu(freeme); -} - -#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ - /* Number of blocks used and allocated. */ static unsigned int pcpu_num_used, pcpu_num_allocated; /* Size of each block. -ve means used. */ @@ -509,6 +480,21 @@ static void percpu_modfree(void *freeme) } } +static unsigned int find_pcpusec(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + const char *secstrings) +{ + return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); +} + +static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) +{ + int cpu; + + for_each_possible_cpu(cpu) + memcpy(pcpudest + per_cpu_offset(cpu), from, size); +} + static int percpu_modinit(void) { pcpu_num_used = 2; @@ -527,26 +513,7 @@ static int percpu_modinit(void) return 0; } __initcall(percpu_modinit); - -#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ - -static unsigned int find_pcpusec(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings) -{ - return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); -} - -static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) -{ - int cpu; - - for_each_possible_cpu(cpu) - memcpy(pcpudest + per_cpu_offset(cpu), from, size); -} - #else /* ... !CONFIG_SMP */ - static inline void *percpu_modalloc(unsigned long size, unsigned long align, const char *name) { @@ -568,7 +535,6 @@ static inline void percpu_modcopy(void *pcpudst, const void *src, /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); } - #endif /* CONFIG_SMP */ #define MODINFO_ATTR(field) \ diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index 0e5c38e1c8b5..7d97ff7c4478 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -9476,7 +9476,7 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) { - u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); + u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); u64 data; #ifndef CONFIG_64BIT @@ -9495,7 +9495,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) { - u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); + u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); #ifndef CONFIG_64BIT /* @@ -9591,7 +9591,7 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) ca = task_ca(tsk); for (; ca; ca = ca->parent) { - u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); + u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); *cpuusage += cputime; } } diff --git a/trunk/kernel/stop_machine.c b/trunk/kernel/stop_machine.c index 74541ca49536..0cd415ee62a2 100644 --- a/trunk/kernel/stop_machine.c +++ b/trunk/kernel/stop_machine.c @@ -170,7 +170,7 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) * doesn't hit this CPU until we're ready. */ get_cpu(); for_each_online_cpu(i) { - sm_work = per_cpu_ptr(stop_machine_work, i); + sm_work = percpu_ptr(stop_machine_work, i); INIT_WORK(sm_work, stop_cpu); queue_work_on(i, stop_machine_wq, sm_work); } diff --git a/trunk/mm/Makefile b/trunk/mm/Makefile index 818569b68f46..72255be57f89 100644 --- a/trunk/mm/Makefile +++ b/trunk/mm/Makefile @@ -30,10 +30,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o -ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA -obj-$(CONFIG_SMP) += percpu.o -else obj-$(CONFIG_SMP) += allocpercpu.o -endif obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o diff --git a/trunk/mm/allocpercpu.c b/trunk/mm/allocpercpu.c index 3653c570232b..4297bc41bfd2 100644 --- a/trunk/mm/allocpercpu.c +++ b/trunk/mm/allocpercpu.c @@ -99,51 +99,45 @@ static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, __percpu_populate_mask((__pdata), (size), (gfp), &(mask)) /** - * alloc_percpu - initial setup of per-cpu data + * percpu_alloc_mask - initial setup of per-cpu data * @size: size of per-cpu object - * @align: alignment + * @gfp: may sleep or not etc. + * @mask: populate per-data for cpu's selected through mask bits * - * Allocate dynamic percpu area. Percpu objects are populated with - * zeroed buffers. + * Populating per-cpu data for all online cpu's would be a typical use case, + * which is simplified by the percpu_alloc() wrapper. + * Per-cpu objects are populated with zeroed buffers. */ -void *__alloc_percpu(size_t size, size_t align) +void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) { /* * We allocate whole cache lines to avoid false sharing */ size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size()); - void *pdata = kzalloc(sz, GFP_KERNEL); + void *pdata = kzalloc(sz, gfp); void *__pdata = __percpu_disguise(pdata); - /* - * Can't easily make larger alignment work with kmalloc. WARN - * on it. Larger alignment should only be used for module - * percpu sections on SMP for which this path isn't used. - */ - WARN_ON_ONCE(align > __alignof__(unsigned long long)); - if (unlikely(!pdata)) return NULL; - if (likely(!__percpu_populate_mask(__pdata, size, GFP_KERNEL, - &cpu_possible_map))) + if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask))) return __pdata; kfree(pdata); return NULL; } -EXPORT_SYMBOL_GPL(__alloc_percpu); +EXPORT_SYMBOL_GPL(__percpu_alloc_mask); /** - * free_percpu - final cleanup of per-cpu data + * percpu_free - final cleanup of per-cpu data * @__pdata: object to clean up * * We simply clean up any per-cpu object left. No need for the client to * track and specify through a bis mask which per-cpu objects are to free. */ -void free_percpu(void *__pdata) +void percpu_free(void *__pdata) { if (unlikely(!__pdata)) return; __percpu_depopulate_mask(__pdata, &cpu_possible_map); kfree(__percpu_disguise(__pdata)); } -EXPORT_SYMBOL_GPL(free_percpu); +EXPORT_SYMBOL_GPL(percpu_free); diff --git a/trunk/mm/bootmem.c b/trunk/mm/bootmem.c index daf92713f7de..51a0ccf61e0e 100644 --- a/trunk/mm/bootmem.c +++ b/trunk/mm/bootmem.c @@ -382,6 +382,7 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); } +#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE /** * reserve_bootmem - mark a page range as usable * @addr: starting address of the range @@ -402,6 +403,7 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size, return mark_bootmem(start, end, 1, flags); } +#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx, unsigned long step) @@ -427,8 +429,8 @@ static unsigned long align_off(struct bootmem_data *bdata, unsigned long off, } static void * __init alloc_bootmem_core(struct bootmem_data *bdata, - unsigned long size, unsigned long align, - unsigned long goal, unsigned long limit) + unsigned long size, unsigned long align, + unsigned long goal, unsigned long limit) { unsigned long fallback = 0; unsigned long min, max, start, sidx, midx, step; @@ -528,34 +530,17 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, return NULL; } -static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, - unsigned long size, unsigned long align, - unsigned long goal, unsigned long limit) -{ -#ifdef CONFIG_HAVE_ARCH_BOOTMEM - bootmem_data_t *p_bdata; - - p_bdata = bootmem_arch_preferred_node(bdata, size, align, goal, limit); - if (p_bdata) - return alloc_bootmem_core(p_bdata, size, align, goal, limit); -#endif - return NULL; -} - static void * __init ___alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { bootmem_data_t *bdata; - void *region; restart: - region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit); - if (region) - return region; - list_for_each_entry(bdata, &bdata_list, list) { + void *region; + if (goal && bdata->node_low_pfn <= PFN_DOWN(goal)) continue; if (limit && bdata->node_min_pfn >= PFN_DOWN(limit)) @@ -633,10 +618,6 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, { void *ptr; - ptr = alloc_arch_preferred_bootmem(bdata, size, align, goal, limit); - if (ptr) - return ptr; - ptr = alloc_bootmem_core(bdata, size, align, goal, limit); if (ptr) return ptr; @@ -693,10 +674,6 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, { void *ptr; - ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); - if (ptr) - return ptr; - ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); if (ptr) return ptr; diff --git a/trunk/mm/percpu.c b/trunk/mm/percpu.c deleted file mode 100644 index 3d0f5456827c..000000000000 --- a/trunk/mm/percpu.c +++ /dev/null @@ -1,979 +0,0 @@ -/* - * linux/mm/percpu.c - percpu memory allocator - * - * Copyright (C) 2009 SUSE Linux Products GmbH - * Copyright (C) 2009 Tejun Heo - * - * This file is released under the GPLv2. - * - * This is percpu allocator which can handle both static and dynamic - * areas. Percpu areas are allocated in chunks in vmalloc area. Each - * chunk is consisted of num_possible_cpus() units and the first chunk - * is used for static percpu variables in the kernel image (special - * boot time alloc/init handling necessary as these areas need to be - * brought up before allocation services are running). Unit grows as - * necessary and all units grow or shrink in unison. When a chunk is - * filled up, another chunk is allocated. ie. in vmalloc area - * - * c0 c1 c2 - * ------------------- ------------------- ------------ - * | u0 | u1 | u2 | u3 | | u0 | u1 | u2 | u3 | | u0 | u1 | u - * ------------------- ...... ------------------- .... ------------ - * - * Allocation is done in offset-size areas of single unit space. Ie, - * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0, - * c1:u1, c1:u2 and c1:u3. Percpu access can be done by configuring - * percpu base registers UNIT_SIZE apart. - * - * There are usually many small percpu allocations many of them as - * small as 4 bytes. The allocator organizes chunks into lists - * according to free size and tries to allocate from the fullest one. - * Each chunk keeps the maximum contiguous area size hint which is - * guaranteed to be eqaul to or larger than the maximum contiguous - * area in the chunk. This helps the allocator not to iterate the - * chunk maps unnecessarily. - * - * Allocation state in each chunk is kept using an array of integers - * on chunk->map. A positive value in the map represents a free - * region and negative allocated. Allocation inside a chunk is done - * by scanning this map sequentially and serving the first matching - * entry. This is mostly copied from the percpu_modalloc() allocator. - * Chunks are also linked into a rb tree to ease address to chunk - * mapping during free. - * - * To use this allocator, arch code should do the followings. - * - * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA - * - * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate - * regular address to percpu pointer and back - * - * - use pcpu_setup_first_chunk() during percpu area initialization to - * setup the first chunk containing the kernel static percpu area - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ -#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ - -struct pcpu_chunk { - struct list_head list; /* linked to pcpu_slot lists */ - struct rb_node rb_node; /* key is chunk->vm->addr */ - int free_size; /* free bytes in the chunk */ - int contig_hint; /* max contiguous size hint */ - struct vm_struct *vm; /* mapped vmalloc region */ - int map_used; /* # of map entries used */ - int map_alloc; /* # of map entries allocated */ - int *map; /* allocation map */ - bool immutable; /* no [de]population allowed */ - struct page *page[]; /* #cpus * UNIT_PAGES */ -}; - -static int pcpu_unit_pages __read_mostly; -static int pcpu_unit_size __read_mostly; -static int pcpu_chunk_size __read_mostly; -static int pcpu_nr_slots __read_mostly; -static size_t pcpu_chunk_struct_size __read_mostly; - -/* the address of the first chunk which starts with the kernel static area */ -void *pcpu_base_addr __read_mostly; -EXPORT_SYMBOL_GPL(pcpu_base_addr); - -/* the size of kernel static area */ -static int pcpu_static_size __read_mostly; - -/* - * One mutex to rule them all. - * - * The following mutex is grabbed in the outermost public alloc/free - * interface functions and released only when the operation is - * complete. As such, every function in this file other than the - * outermost functions are called under pcpu_mutex. - * - * It can easily be switched to use spinlock such that only the area - * allocation and page population commit are protected with it doing - * actual [de]allocation without holding any lock. However, given - * what this allocator does, I think it's better to let them run - * sequentially. - */ -static DEFINE_MUTEX(pcpu_mutex); - -static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ -static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ - -static int __pcpu_size_to_slot(int size) -{ - int highbit = fls(size); /* size is in bytes */ - return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1); -} - -static int pcpu_size_to_slot(int size) -{ - if (size == pcpu_unit_size) - return pcpu_nr_slots - 1; - return __pcpu_size_to_slot(size); -} - -static int pcpu_chunk_slot(const struct pcpu_chunk *chunk) -{ - if (chunk->free_size < sizeof(int) || chunk->contig_hint < sizeof(int)) - return 0; - - return pcpu_size_to_slot(chunk->free_size); -} - -static int pcpu_page_idx(unsigned int cpu, int page_idx) -{ - return cpu * pcpu_unit_pages + page_idx; -} - -static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk, - unsigned int cpu, int page_idx) -{ - return &chunk->page[pcpu_page_idx(cpu, page_idx)]; -} - -static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, - unsigned int cpu, int page_idx) -{ - return (unsigned long)chunk->vm->addr + - (pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT); -} - -static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, - int page_idx) -{ - return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL; -} - -/** - * pcpu_realloc - versatile realloc - * @p: the current pointer (can be NULL for new allocations) - * @size: the current size in bytes (can be 0 for new allocations) - * @new_size: the wanted new size in bytes (can be 0 for free) - * - * More robust realloc which can be used to allocate, resize or free a - * memory area of arbitrary size. If the needed size goes over - * PAGE_SIZE, kernel VM is used. - * - * RETURNS: - * The new pointer on success, NULL on failure. - */ -static void *pcpu_realloc(void *p, size_t size, size_t new_size) -{ - void *new; - - if (new_size <= PAGE_SIZE) - new = kmalloc(new_size, GFP_KERNEL); - else - new = vmalloc(new_size); - if (new_size && !new) - return NULL; - - memcpy(new, p, min(size, new_size)); - if (new_size > size) - memset(new + size, 0, new_size - size); - - if (size <= PAGE_SIZE) - kfree(p); - else - vfree(p); - - return new; -} - -/** - * pcpu_chunk_relocate - put chunk in the appropriate chunk slot - * @chunk: chunk of interest - * @oslot: the previous slot it was on - * - * This function is called after an allocation or free changed @chunk. - * New slot according to the changed state is determined and @chunk is - * moved to the slot. - */ -static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) -{ - int nslot = pcpu_chunk_slot(chunk); - - if (oslot != nslot) { - if (oslot < nslot) - list_move(&chunk->list, &pcpu_slot[nslot]); - else - list_move_tail(&chunk->list, &pcpu_slot[nslot]); - } -} - -static struct rb_node **pcpu_chunk_rb_search(void *addr, - struct rb_node **parentp) -{ - struct rb_node **p = &pcpu_addr_root.rb_node; - struct rb_node *parent = NULL; - struct pcpu_chunk *chunk; - - while (*p) { - parent = *p; - chunk = rb_entry(parent, struct pcpu_chunk, rb_node); - - if (addr < chunk->vm->addr) - p = &(*p)->rb_left; - else if (addr > chunk->vm->addr) - p = &(*p)->rb_right; - else - break; - } - - if (parentp) - *parentp = parent; - return p; -} - -/** - * pcpu_chunk_addr_search - search for chunk containing specified address - * @addr: address to search for - * - * Look for chunk which might contain @addr. More specifically, it - * searchs for the chunk with the highest start address which isn't - * beyond @addr. - * - * RETURNS: - * The address of the found chunk. - */ -static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) -{ - struct rb_node *n, *parent; - struct pcpu_chunk *chunk; - - n = *pcpu_chunk_rb_search(addr, &parent); - if (!n) { - /* no exactly matching chunk, the parent is the closest */ - n = parent; - BUG_ON(!n); - } - chunk = rb_entry(n, struct pcpu_chunk, rb_node); - - if (addr < chunk->vm->addr) { - /* the parent was the next one, look for the previous one */ - n = rb_prev(n); - BUG_ON(!n); - chunk = rb_entry(n, struct pcpu_chunk, rb_node); - } - - return chunk; -} - -/** - * pcpu_chunk_addr_insert - insert chunk into address rb tree - * @new: chunk to insert - * - * Insert @new into address rb tree. - */ -static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) -{ - struct rb_node **p, *parent; - - p = pcpu_chunk_rb_search(new->vm->addr, &parent); - BUG_ON(*p); - rb_link_node(&new->rb_node, parent, p); - rb_insert_color(&new->rb_node, &pcpu_addr_root); -} - -/** - * pcpu_split_block - split a map block - * @chunk: chunk of interest - * @i: index of map block to split - * @head: head size in bytes (can be 0) - * @tail: tail size in bytes (can be 0) - * - * Split the @i'th map block into two or three blocks. If @head is - * non-zero, @head bytes block is inserted before block @i moving it - * to @i+1 and reducing its size by @head bytes. - * - * If @tail is non-zero, the target block, which can be @i or @i+1 - * depending on @head, is reduced by @tail bytes and @tail byte block - * is inserted after the target block. - * - * RETURNS: - * 0 on success, -errno on failure. - */ -static int pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) -{ - int nr_extra = !!head + !!tail; - int target = chunk->map_used + nr_extra; - - /* reallocation required? */ - if (chunk->map_alloc < target) { - int new_alloc = chunk->map_alloc; - int *new; - - while (new_alloc < target) - new_alloc *= 2; - - new = pcpu_realloc(chunk->map, - chunk->map_alloc * sizeof(new[0]), - new_alloc * sizeof(new[0])); - if (!new) - return -ENOMEM; - - chunk->map_alloc = new_alloc; - chunk->map = new; - } - - /* insert a new subblock */ - memmove(&chunk->map[i + nr_extra], &chunk->map[i], - sizeof(chunk->map[0]) * (chunk->map_used - i)); - chunk->map_used += nr_extra; - - if (head) { - chunk->map[i + 1] = chunk->map[i] - head; - chunk->map[i++] = head; - } - if (tail) { - chunk->map[i++] -= tail; - chunk->map[i] = tail; - } - return 0; -} - -/** - * pcpu_alloc_area - allocate area from a pcpu_chunk - * @chunk: chunk of interest - * @size: wanted size in bytes - * @align: wanted align - * - * Try to allocate @size bytes area aligned at @align from @chunk. - * Note that this function only allocates the offset. It doesn't - * populate or map the area. - * - * RETURNS: - * Allocated offset in @chunk on success, -errno on failure. - */ -static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) -{ - int oslot = pcpu_chunk_slot(chunk); - int max_contig = 0; - int i, off; - - /* - * The static chunk initially doesn't have map attached - * because kmalloc wasn't available during init. Give it one. - */ - if (unlikely(!chunk->map)) { - chunk->map = pcpu_realloc(NULL, 0, - PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); - if (!chunk->map) - return -ENOMEM; - - chunk->map_alloc = PCPU_DFL_MAP_ALLOC; - chunk->map[chunk->map_used++] = -pcpu_static_size; - if (chunk->free_size) - chunk->map[chunk->map_used++] = chunk->free_size; - } - - for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) { - bool is_last = i + 1 == chunk->map_used; - int head, tail; - - /* extra for alignment requirement */ - head = ALIGN(off, align) - off; - BUG_ON(i == 0 && head != 0); - - if (chunk->map[i] < 0) - continue; - if (chunk->map[i] < head + size) { - max_contig = max(chunk->map[i], max_contig); - continue; - } - - /* - * If head is small or the previous block is free, - * merge'em. Note that 'small' is defined as smaller - * than sizeof(int), which is very small but isn't too - * uncommon for percpu allocations. - */ - if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) { - if (chunk->map[i - 1] > 0) - chunk->map[i - 1] += head; - else { - chunk->map[i - 1] -= head; - chunk->free_size -= head; - } - chunk->map[i] -= head; - off += head; - head = 0; - } - - /* if tail is small, just keep it around */ - tail = chunk->map[i] - head - size; - if (tail < sizeof(int)) - tail = 0; - - /* split if warranted */ - if (head || tail) { - if (pcpu_split_block(chunk, i, head, tail)) - return -ENOMEM; - if (head) { - i++; - off += head; - max_contig = max(chunk->map[i - 1], max_contig); - } - if (tail) - max_contig = max(chunk->map[i + 1], max_contig); - } - - /* update hint and mark allocated */ - if (is_last) - chunk->contig_hint = max_contig; /* fully scanned */ - else - chunk->contig_hint = max(chunk->contig_hint, - max_contig); - - chunk->free_size -= chunk->map[i]; - chunk->map[i] = -chunk->map[i]; - - pcpu_chunk_relocate(chunk, oslot); - return off; - } - - chunk->contig_hint = max_contig; /* fully scanned */ - pcpu_chunk_relocate(chunk, oslot); - - /* - * Tell the upper layer that this chunk has no area left. - * Note that this is not an error condition but a notification - * to upper layer that it needs to look at other chunks. - * -ENOSPC is chosen as it isn't used in memory subsystem and - * matches the meaning in a way. - */ - return -ENOSPC; -} - -/** - * pcpu_free_area - free area to a pcpu_chunk - * @chunk: chunk of interest - * @freeme: offset of area to free - * - * Free area starting from @freeme to @chunk. Note that this function - * only modifies the allocation map. It doesn't depopulate or unmap - * the area. - */ -static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) -{ - int oslot = pcpu_chunk_slot(chunk); - int i, off; - - for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) - if (off == freeme) - break; - BUG_ON(off != freeme); - BUG_ON(chunk->map[i] > 0); - - chunk->map[i] = -chunk->map[i]; - chunk->free_size += chunk->map[i]; - - /* merge with previous? */ - if (i > 0 && chunk->map[i - 1] >= 0) { - chunk->map[i - 1] += chunk->map[i]; - chunk->map_used--; - memmove(&chunk->map[i], &chunk->map[i + 1], - (chunk->map_used - i) * sizeof(chunk->map[0])); - i--; - } - /* merge with next? */ - if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) { - chunk->map[i] += chunk->map[i + 1]; - chunk->map_used--; - memmove(&chunk->map[i + 1], &chunk->map[i + 2], - (chunk->map_used - (i + 1)) * sizeof(chunk->map[0])); - } - - chunk->contig_hint = max(chunk->map[i], chunk->contig_hint); - pcpu_chunk_relocate(chunk, oslot); -} - -/** - * pcpu_unmap - unmap pages out of a pcpu_chunk - * @chunk: chunk of interest - * @page_start: page index of the first page to unmap - * @page_end: page index of the last page to unmap + 1 - * @flush: whether to flush cache and tlb or not - * - * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. - * If @flush is true, vcache is flushed before unmapping and tlb - * after. - */ -static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, - bool flush) -{ - unsigned int last = num_possible_cpus() - 1; - unsigned int cpu; - - /* unmap must not be done on immutable chunk */ - WARN_ON(chunk->immutable); - - /* - * Each flushing trial can be very expensive, issue flush on - * the whole region at once rather than doing it for each cpu. - * This could be an overkill but is more scalable. - */ - if (flush) - flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), - pcpu_chunk_addr(chunk, last, page_end)); - - for_each_possible_cpu(cpu) - unmap_kernel_range_noflush( - pcpu_chunk_addr(chunk, cpu, page_start), - (page_end - page_start) << PAGE_SHIFT); - - /* ditto as flush_cache_vunmap() */ - if (flush) - flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), - pcpu_chunk_addr(chunk, last, page_end)); -} - -/** - * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk - * @chunk: chunk to depopulate - * @off: offset to the area to depopulate - * @size: size of the area to depopulate in bytes - * @flush: whether to flush cache and tlb or not - * - * For each cpu, depopulate and unmap pages [@page_start,@page_end) - * from @chunk. If @flush is true, vcache is flushed before unmapping - * and tlb after. - */ -static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, - bool flush) -{ - int page_start = PFN_DOWN(off); - int page_end = PFN_UP(off + size); - int unmap_start = -1; - int uninitialized_var(unmap_end); - unsigned int cpu; - int i; - - for (i = page_start; i < page_end; i++) { - for_each_possible_cpu(cpu) { - struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i); - - if (!*pagep) - continue; - - __free_page(*pagep); - - /* - * If it's partial depopulation, it might get - * populated or depopulated again. Mark the - * page gone. - */ - *pagep = NULL; - - unmap_start = unmap_start < 0 ? i : unmap_start; - unmap_end = i + 1; - } - } - - if (unmap_start >= 0) - pcpu_unmap(chunk, unmap_start, unmap_end, flush); -} - -/** - * pcpu_map - map pages into a pcpu_chunk - * @chunk: chunk of interest - * @page_start: page index of the first page to map - * @page_end: page index of the last page to map + 1 - * - * For each cpu, map pages [@page_start,@page_end) into @chunk. - * vcache is flushed afterwards. - */ -static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) -{ - unsigned int last = num_possible_cpus() - 1; - unsigned int cpu; - int err; - - /* map must not be done on immutable chunk */ - WARN_ON(chunk->immutable); - - for_each_possible_cpu(cpu) { - err = map_kernel_range_noflush( - pcpu_chunk_addr(chunk, cpu, page_start), - (page_end - page_start) << PAGE_SHIFT, - PAGE_KERNEL, - pcpu_chunk_pagep(chunk, cpu, page_start)); - if (err < 0) - return err; - } - - /* flush at once, please read comments in pcpu_unmap() */ - flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start), - pcpu_chunk_addr(chunk, last, page_end)); - return 0; -} - -/** - * pcpu_populate_chunk - populate and map an area of a pcpu_chunk - * @chunk: chunk of interest - * @off: offset to the area to populate - * @size: size of the area to populate in bytes - * - * For each cpu, populate and map pages [@page_start,@page_end) into - * @chunk. The area is cleared on return. - */ -static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) -{ - const gfp_t alloc_mask = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; - int page_start = PFN_DOWN(off); - int page_end = PFN_UP(off + size); - int map_start = -1; - int uninitialized_var(map_end); - unsigned int cpu; - int i; - - for (i = page_start; i < page_end; i++) { - if (pcpu_chunk_page_occupied(chunk, i)) { - if (map_start >= 0) { - if (pcpu_map(chunk, map_start, map_end)) - goto err; - map_start = -1; - } - continue; - } - - map_start = map_start < 0 ? i : map_start; - map_end = i + 1; - - for_each_possible_cpu(cpu) { - struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i); - - *pagep = alloc_pages_node(cpu_to_node(cpu), - alloc_mask, 0); - if (!*pagep) - goto err; - } - } - - if (map_start >= 0 && pcpu_map(chunk, map_start, map_end)) - goto err; - - for_each_possible_cpu(cpu) - memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0, - size); - - return 0; -err: - /* likely under heavy memory pressure, give memory back */ - pcpu_depopulate_chunk(chunk, off, size, true); - return -ENOMEM; -} - -static void free_pcpu_chunk(struct pcpu_chunk *chunk) -{ - if (!chunk) - return; - if (chunk->vm) - free_vm_area(chunk->vm); - pcpu_realloc(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]), 0); - kfree(chunk); -} - -static struct pcpu_chunk *alloc_pcpu_chunk(void) -{ - struct pcpu_chunk *chunk; - - chunk = kzalloc(pcpu_chunk_struct_size, GFP_KERNEL); - if (!chunk) - return NULL; - - chunk->map = pcpu_realloc(NULL, 0, - PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); - chunk->map_alloc = PCPU_DFL_MAP_ALLOC; - chunk->map[chunk->map_used++] = pcpu_unit_size; - - chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); - if (!chunk->vm) { - free_pcpu_chunk(chunk); - return NULL; - } - - INIT_LIST_HEAD(&chunk->list); - chunk->free_size = pcpu_unit_size; - chunk->contig_hint = pcpu_unit_size; - - return chunk; -} - -/** - * __alloc_percpu - allocate percpu area - * @size: size of area to allocate in bytes - * @align: alignment of area (max PAGE_SIZE) - * - * Allocate percpu area of @size bytes aligned at @align. Might - * sleep. Might trigger writeouts. - * - * RETURNS: - * Percpu pointer to the allocated area on success, NULL on failure. - */ -void *__alloc_percpu(size_t size, size_t align) -{ - void *ptr = NULL; - struct pcpu_chunk *chunk; - int slot, off; - - if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { - WARN(true, "illegal size (%zu) or align (%zu) for " - "percpu allocation\n", size, align); - return NULL; - } - - mutex_lock(&pcpu_mutex); - - /* allocate area */ - for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { - list_for_each_entry(chunk, &pcpu_slot[slot], list) { - if (size > chunk->contig_hint) - continue; - off = pcpu_alloc_area(chunk, size, align); - if (off >= 0) - goto area_found; - if (off != -ENOSPC) - goto out_unlock; - } - } - - /* hmmm... no space left, create a new chunk */ - chunk = alloc_pcpu_chunk(); - if (!chunk) - goto out_unlock; - pcpu_chunk_relocate(chunk, -1); - pcpu_chunk_addr_insert(chunk); - - off = pcpu_alloc_area(chunk, size, align); - if (off < 0) - goto out_unlock; - -area_found: - /* populate, map and clear the area */ - if (pcpu_populate_chunk(chunk, off, size)) { - pcpu_free_area(chunk, off); - goto out_unlock; - } - - ptr = __addr_to_pcpu_ptr(chunk->vm->addr + off); -out_unlock: - mutex_unlock(&pcpu_mutex); - return ptr; -} -EXPORT_SYMBOL_GPL(__alloc_percpu); - -static void pcpu_kill_chunk(struct pcpu_chunk *chunk) -{ - WARN_ON(chunk->immutable); - pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); - list_del(&chunk->list); - rb_erase(&chunk->rb_node, &pcpu_addr_root); - free_pcpu_chunk(chunk); -} - -/** - * free_percpu - free percpu area - * @ptr: pointer to area to free - * - * Free percpu area @ptr. Might sleep. - */ -void free_percpu(void *ptr) -{ - void *addr = __pcpu_ptr_to_addr(ptr); - struct pcpu_chunk *chunk; - int off; - - if (!ptr) - return; - - mutex_lock(&pcpu_mutex); - - chunk = pcpu_chunk_addr_search(addr); - off = addr - chunk->vm->addr; - - pcpu_free_area(chunk, off); - - /* the chunk became fully free, kill one if there are other free ones */ - if (chunk->free_size == pcpu_unit_size) { - struct pcpu_chunk *pos; - - list_for_each_entry(pos, - &pcpu_slot[pcpu_chunk_slot(chunk)], list) - if (pos != chunk) { - pcpu_kill_chunk(pos); - break; - } - } - - mutex_unlock(&pcpu_mutex); -} -EXPORT_SYMBOL_GPL(free_percpu); - -/** - * pcpu_setup_first_chunk - initialize the first percpu chunk - * @get_page_fn: callback to fetch page pointer - * @static_size: the size of static percpu area in bytes - * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, 0 for auto - * @free_size: free size in bytes, 0 for auto - * @base_addr: mapped address, NULL for auto - * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary - * - * Initialize the first percpu chunk which contains the kernel static - * perpcu area. This function is to be called from arch percpu area - * setup path. The first two parameters are mandatory. The rest are - * optional. - * - * @get_page_fn() should return pointer to percpu page given cpu - * number and page number. It should at least return enough pages to - * cover the static area. The returned pages for static area should - * have been initialized with valid data. If @unit_size is specified, - * it can also return pages after the static area. NULL return - * indicates end of pages for the cpu. Note that @get_page_fn() must - * return the same number of pages for all cpus. - * - * @unit_size, if non-zero, determines unit size and must be aligned - * to PAGE_SIZE and equal to or larger than @static_size + @free_size. - * - * @free_size determines the number of free bytes after the static - * area in the first chunk. If zero, whatever left is available. - * Specifying non-zero value make percpu leave the area after - * @static_size + @free_size alone. - * - * Non-null @base_addr means that the caller already allocated virtual - * region for the first chunk and mapped it. percpu must not mess - * with the chunk. Note that @base_addr with 0 @unit_size or non-NULL - * @populate_pte_fn doesn't make any sense. - * - * @populate_pte_fn is used to populate the pagetable. NULL means the - * caller already populated the pagetable. - * - * RETURNS: - * The determined pcpu_unit_size which can be used to initialize - * percpu access. - */ -size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, size_t unit_size, - size_t free_size, void *base_addr, - pcpu_populate_pte_fn_t populate_pte_fn) -{ - static struct vm_struct static_vm; - struct pcpu_chunk *static_chunk; - unsigned int cpu; - int nr_pages; - int err, i; - - /* santiy checks */ - BUG_ON(!static_size); - BUG_ON(!unit_size && free_size); - BUG_ON(unit_size && unit_size < static_size + free_size); - BUG_ON(unit_size & ~PAGE_MASK); - BUG_ON(base_addr && !unit_size); - BUG_ON(base_addr && populate_pte_fn); - - if (unit_size) - pcpu_unit_pages = unit_size >> PAGE_SHIFT; - else - pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, - PFN_UP(static_size)); - - pcpu_static_size = static_size; - pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; - pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; - pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) - + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); - - /* - * Allocate chunk slots. The additional last slot is for - * empty chunks. - */ - pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2; - pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0])); - for (i = 0; i < pcpu_nr_slots; i++) - INIT_LIST_HEAD(&pcpu_slot[i]); - - /* init static_chunk */ - static_chunk = alloc_bootmem(pcpu_chunk_struct_size); - INIT_LIST_HEAD(&static_chunk->list); - static_chunk->vm = &static_vm; - - if (free_size) - static_chunk->free_size = free_size; - else - static_chunk->free_size = pcpu_unit_size - pcpu_static_size; - - static_chunk->contig_hint = static_chunk->free_size; - - /* allocate vm address */ - static_vm.flags = VM_ALLOC; - static_vm.size = pcpu_chunk_size; - - if (!base_addr) - vm_area_register_early(&static_vm, PAGE_SIZE); - else { - /* - * Pages already mapped. No need to remap into - * vmalloc area. In this case the static chunk can't - * be mapped or unmapped by percpu and is marked - * immutable. - */ - static_vm.addr = base_addr; - static_chunk->immutable = true; - } - - /* assign pages */ - nr_pages = -1; - for_each_possible_cpu(cpu) { - for (i = 0; i < pcpu_unit_pages; i++) { - struct page *page = get_page_fn(cpu, i); - - if (!page) - break; - *pcpu_chunk_pagep(static_chunk, cpu, i) = page; - } - - BUG_ON(i < PFN_UP(pcpu_static_size)); - - if (nr_pages < 0) - nr_pages = i; - else - BUG_ON(nr_pages != i); - } - - /* map them */ - if (populate_pte_fn) { - for_each_possible_cpu(cpu) - for (i = 0; i < nr_pages; i++) - populate_pte_fn(pcpu_chunk_addr(static_chunk, - cpu, i)); - - err = pcpu_map(static_chunk, 0, nr_pages); - if (err) - panic("failed to setup static percpu area, err=%d\n", - err); - } - - /* link static_chunk in */ - pcpu_chunk_relocate(static_chunk, -1); - pcpu_chunk_addr_insert(static_chunk); - - /* we're done */ - pcpu_base_addr = (void *)pcpu_chunk_addr(static_chunk, 0, 0); - return pcpu_unit_size; -} diff --git a/trunk/mm/vmalloc.c b/trunk/mm/vmalloc.c index fb6f59935fb2..903cad46e796 100644 --- a/trunk/mm/vmalloc.c +++ b/trunk/mm/vmalloc.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include @@ -153,8 +152,8 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr, * * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N] */ -static int vmap_page_range_noflush(unsigned long start, unsigned long end, - pgprot_t prot, struct page **pages) +static int vmap_page_range(unsigned long start, unsigned long end, + pgprot_t prot, struct page **pages) { pgd_t *pgd; unsigned long next; @@ -170,22 +169,13 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end, if (err) break; } while (pgd++, addr = next, addr != end); + flush_cache_vmap(start, end); if (unlikely(err)) return err; return nr; } -static int vmap_page_range(unsigned long start, unsigned long end, - pgprot_t prot, struct page **pages) -{ - int ret; - - ret = vmap_page_range_noflush(start, end, prot, pages); - flush_cache_vmap(start, end); - return ret; -} - static inline int is_vmalloc_or_module_addr(const void *x) { /* @@ -992,32 +982,6 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro } EXPORT_SYMBOL(vm_map_ram); -/** - * vm_area_register_early - register vmap area early during boot - * @vm: vm_struct to register - * @align: requested alignment - * - * This function is used to register kernel vm area before - * vmalloc_init() is called. @vm->size and @vm->flags should contain - * proper values on entry and other fields should be zero. On return, - * vm->addr contains the allocated address. - * - * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING. - */ -void __init vm_area_register_early(struct vm_struct *vm, size_t align) -{ - static size_t vm_init_off __initdata; - unsigned long addr; - - addr = ALIGN(VMALLOC_START + vm_init_off, align); - vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START; - - vm->addr = (void *)addr; - - vm->next = vmlist; - vmlist = vm; -} - void __init vmalloc_init(void) { struct vmap_area *va; @@ -1045,58 +1009,6 @@ void __init vmalloc_init(void) vmap_initialized = true; } -/** - * map_kernel_range_noflush - map kernel VM area with the specified pages - * @addr: start of the VM area to map - * @size: size of the VM area to map - * @prot: page protection flags to use - * @pages: pages to map - * - * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size - * specify should have been allocated using get_vm_area() and its - * friends. - * - * NOTE: - * This function does NOT do any cache flushing. The caller is - * responsible for calling flush_cache_vmap() on to-be-mapped areas - * before calling this function. - * - * RETURNS: - * The number of pages mapped on success, -errno on failure. - */ -int map_kernel_range_noflush(unsigned long addr, unsigned long size, - pgprot_t prot, struct page **pages) -{ - return vmap_page_range_noflush(addr, addr + size, prot, pages); -} - -/** - * unmap_kernel_range_noflush - unmap kernel VM area - * @addr: start of the VM area to unmap - * @size: size of the VM area to unmap - * - * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size - * specify should have been allocated using get_vm_area() and its - * friends. - * - * NOTE: - * This function does NOT do any cache flushing. The caller is - * responsible for calling flush_cache_vunmap() on to-be-mapped areas - * before calling this function and flush_tlb_kernel_range() after. - */ -void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) -{ - vunmap_page_range(addr, addr + size); -} - -/** - * unmap_kernel_range - unmap kernel VM area and flush cache and TLB - * @addr: start of the VM area to unmap - * @size: size of the VM area to unmap - * - * Similar to unmap_kernel_range_noflush() but flushes vcache before - * the unmapping and tlb after. - */ void unmap_kernel_range(unsigned long addr, unsigned long size) { unsigned long end = addr + size; diff --git a/trunk/net/ipv4/af_inet.c b/trunk/net/ipv4/af_inet.c index 3a3dad801354..743f5542d65a 100644 --- a/trunk/net/ipv4/af_inet.c +++ b/trunk/net/ipv4/af_inet.c @@ -1375,10 +1375,10 @@ EXPORT_SYMBOL_GPL(snmp_fold_field); int snmp_mib_init(void *ptr[2], size_t mibsize) { BUG_ON(ptr == NULL); - ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); + ptr[0] = __alloc_percpu(mibsize); if (!ptr[0]) goto err0; - ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); + ptr[1] = __alloc_percpu(mibsize); if (!ptr[1]) goto err1; return 0; diff --git a/trunk/net/ipv4/route.c b/trunk/net/ipv4/route.c index bf895401218f..97f71153584f 100644 --- a/trunk/net/ipv4/route.c +++ b/trunk/net/ipv4/route.c @@ -3376,7 +3376,7 @@ int __init ip_rt_init(void) int rc = 0; #ifdef CONFIG_NET_CLS_ROUTE - ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); + ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); if (!ip_rt_acct) panic("IP: failed to allocate ip_rt_acct\n"); #endif