diff --git a/[refs] b/[refs] index 37b15e1d1266..3f14e43b5bf3 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 61f6c7a47a2f84b7ba4b65240ffe9247df772b06 +refs/heads/master: bca0fa5f12a6744a2b2e53154af65a51402b3426 diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index 41996c68a5cd..c1601e5a8b71 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -508,11 +508,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Also note the kernel might malfunction if you disable some critical bits. - cma=nn[MG] [ARM,KNL] - Sets the size of kernel global memory area for contiguous - memory allocations. For more information, see - include/linux/dma-contiguous.h - cmo_free_hint= [PPC] Format: { yes | no } Specify whether pages are marked as being inactive when they are freed. This is used in CMO environments @@ -520,10 +515,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a hypervisor. Default: yes - coherent_pool=nn[KMG] [ARM,KNL] - Sets the size of memory pool for coherent, atomic dma - allocations if Contiguous Memory Allocator (CMA) is used. - code_bytes [X86] How many bytes of object code to print in an oops report. Range: 0 - 8192 diff --git a/trunk/arch/Kconfig b/trunk/arch/Kconfig index 0a3ffe46e567..684eb5af439d 100644 --- a/trunk/arch/Kconfig +++ b/trunk/arch/Kconfig @@ -142,9 +142,6 @@ config HAVE_ARCH_TRACEHOOK config HAVE_DMA_ATTRS bool -config HAVE_DMA_CONTIGUOUS - bool - config USE_GENERIC_SMP_HELPERS bool diff --git a/trunk/arch/arm/Kconfig b/trunk/arch/arm/Kconfig index cbbbc45f6b67..36586dba6fa6 100644 --- a/trunk/arch/arm/Kconfig +++ b/trunk/arch/arm/Kconfig @@ -4,8 +4,6 @@ config ARM select HAVE_AOUT select HAVE_DMA_API_DEBUG select HAVE_IDE if PCI || ISA || PCMCIA - select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7) - select CMA if (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_MEMBLOCK select RTC_LIB select SYS_SUPPORTS_APM_EMULATION diff --git a/trunk/arch/arm/include/asm/dma-contiguous.h b/trunk/arch/arm/include/asm/dma-contiguous.h deleted file mode 100644 index 3ed37b4d93da..000000000000 --- a/trunk/arch/arm/include/asm/dma-contiguous.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef ASMARM_DMA_CONTIGUOUS_H -#define ASMARM_DMA_CONTIGUOUS_H - -#ifdef __KERNEL__ -#ifdef CONFIG_CMA - -#include -#include - -void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); - -#endif -#endif - -#endif diff --git a/trunk/arch/arm/include/asm/mach/map.h b/trunk/arch/arm/include/asm/mach/map.h index a6efcdd6fd25..b36f3654bf54 100644 --- a/trunk/arch/arm/include/asm/mach/map.h +++ b/trunk/arch/arm/include/asm/mach/map.h @@ -30,7 +30,6 @@ struct map_desc { #define MT_MEMORY_DTCM 12 #define MT_MEMORY_ITCM 13 #define MT_MEMORY_SO 14 -#define MT_MEMORY_DMA_READY 15 #ifdef CONFIG_MMU extern void iotable_init(struct map_desc *, int); diff --git a/trunk/arch/arm/kernel/setup.c b/trunk/arch/arm/kernel/setup.c index 1b3096dfb964..ebfac782593f 100644 --- a/trunk/arch/arm/kernel/setup.c +++ b/trunk/arch/arm/kernel/setup.c @@ -81,7 +81,6 @@ __setup("fpe=", fpe_setup); extern void paging_init(struct machine_desc *desc); extern void sanity_check_meminfo(void); extern void reboot_setup(char *str); -extern void setup_dma_zone(struct machine_desc *desc); unsigned int processor_id; EXPORT_SYMBOL(processor_id); @@ -940,8 +939,12 @@ void __init setup_arch(char **cmdline_p) machine_desc = mdesc; machine_name = mdesc->name; - setup_dma_zone(mdesc); - +#ifdef CONFIG_ZONE_DMA + if (mdesc->dma_zone_size) { + extern unsigned long arm_dma_zone_size; + arm_dma_zone_size = mdesc->dma_zone_size; + } +#endif if (mdesc->restart_mode) reboot_setup(&mdesc->restart_mode); diff --git a/trunk/arch/arm/mm/dma-mapping.c b/trunk/arch/arm/mm/dma-mapping.c index 153f5559406a..db23ae4aaaab 100644 --- a/trunk/arch/arm/mm/dma-mapping.c +++ b/trunk/arch/arm/mm/dma-mapping.c @@ -17,9 +17,7 @@ #include #include #include -#include #include -#include #include #include @@ -28,9 +26,6 @@ #include #include #include -#include -#include -#include #include "mm.h" @@ -61,19 +56,6 @@ static u64 get_coherent_dma_mask(struct device *dev) return mask; } -static void __dma_clear_buffer(struct page *page, size_t size) -{ - void *ptr; - /* - * Ensure that the allocated pages are zeroed, and that any data - * lurking in the kernel direct-mapped region is invalidated. - */ - ptr = page_address(page); - memset(ptr, 0, size); - dmac_flush_range(ptr, ptr + size); - outer_flush_range(__pa(ptr), __pa(ptr) + size); -} - /* * Allocate a DMA buffer for 'dev' of size 'size' using the * specified gfp mask. Note that 'size' must be page aligned. @@ -82,6 +64,23 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf { unsigned long order = get_order(size); struct page *page, *p, *e; + void *ptr; + u64 mask = get_coherent_dma_mask(dev); + +#ifdef CONFIG_DMA_API_DEBUG + u64 limit = (mask + 1) & ~mask; + if (limit && size >= limit) { + dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", + size, mask); + return NULL; + } +#endif + + if (!mask) + return NULL; + + if (mask < 0xffffffffULL) + gfp |= GFP_DMA; page = alloc_pages(gfp, order); if (!page) @@ -94,7 +93,14 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) __free_page(p); - __dma_clear_buffer(page, size); + /* + * Ensure that the allocated pages are zeroed, and that any data + * lurking in the kernel direct-mapped region is invalidated. + */ + ptr = page_address(page); + memset(ptr, 0, size); + dmac_flush_range(ptr, ptr + size); + outer_flush_range(__pa(ptr), __pa(ptr) + size); return page; } @@ -164,9 +170,6 @@ static int __init consistent_init(void) unsigned long base = consistent_base; unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; - if (cpu_architecture() >= CPU_ARCH_ARMv6) - return 0; - consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); if (!consistent_pte) { pr_err("%s: no memory\n", __func__); @@ -207,100 +210,8 @@ static int __init consistent_init(void) return ret; } -core_initcall(consistent_init); - -static void *__alloc_from_contiguous(struct device *dev, size_t size, - pgprot_t prot, struct page **ret_page); - -static struct arm_vmregion_head coherent_head = { - .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), - .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), -}; - -size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; - -static int __init early_coherent_pool(char *p) -{ - coherent_pool_size = memparse(p, &p); - return 0; -} -early_param("coherent_pool", early_coherent_pool); - -/* - * Initialise the coherent pool for atomic allocations. - */ -static int __init coherent_init(void) -{ - pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); - size_t size = coherent_pool_size; - struct page *page; - void *ptr; - - if (cpu_architecture() < CPU_ARCH_ARMv6) - return 0; - - ptr = __alloc_from_contiguous(NULL, size, prot, &page); - if (ptr) { - coherent_head.vm_start = (unsigned long) ptr; - coherent_head.vm_end = (unsigned long) ptr + size; - printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", - (unsigned)size / 1024); - return 0; - } - printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", - (unsigned)size / 1024); - return -ENOMEM; -} -/* - * CMA is activated by core_initcall, so we must be called after it. - */ -postcore_initcall(coherent_init); -struct dma_contig_early_reserve { - phys_addr_t base; - unsigned long size; -}; - -static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; - -static int dma_mmu_remap_num __initdata; - -void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) -{ - dma_mmu_remap[dma_mmu_remap_num].base = base; - dma_mmu_remap[dma_mmu_remap_num].size = size; - dma_mmu_remap_num++; -} - -void __init dma_contiguous_remap(void) -{ - int i; - for (i = 0; i < dma_mmu_remap_num; i++) { - phys_addr_t start = dma_mmu_remap[i].base; - phys_addr_t end = start + dma_mmu_remap[i].size; - struct map_desc map; - unsigned long addr; - - if (end > arm_lowmem_limit) - end = arm_lowmem_limit; - if (start >= end) - return; - - map.pfn = __phys_to_pfn(start); - map.virtual = __phys_to_virt(start); - map.length = end - start; - map.type = MT_MEMORY_DMA_READY; - - /* - * Clear previous low-memory mapping - */ - for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); - addr += PMD_SIZE) - pmd_clear(pmd_off_k(addr)); - - iotable_init(&map, 1); - } -} +core_initcall(consistent_init); static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, @@ -408,173 +319,20 @@ static void __dma_free_remap(void *cpu_addr, size_t size) arm_vmregion_free(&consistent_head, c); } -static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, - void *data) -{ - struct page *page = virt_to_page(addr); - pgprot_t prot = *(pgprot_t *)data; - - set_pte_ext(pte, mk_pte(page, prot), 0); - return 0; -} - -static void __dma_remap(struct page *page, size_t size, pgprot_t prot) -{ - unsigned long start = (unsigned long) page_address(page); - unsigned end = start + size; - - apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); - dsb(); - flush_tlb_kernel_range(start, end); -} - -static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, - pgprot_t prot, struct page **ret_page, - const void *caller) -{ - struct page *page; - void *ptr; - page = __dma_alloc_buffer(dev, size, gfp); - if (!page) - return NULL; - - ptr = __dma_alloc_remap(page, size, gfp, prot, caller); - if (!ptr) { - __dma_free_buffer(page, size); - return NULL; - } - - *ret_page = page; - return ptr; -} - -static void *__alloc_from_pool(struct device *dev, size_t size, - struct page **ret_page, const void *caller) -{ - struct arm_vmregion *c; - size_t align; - - if (!coherent_head.vm_start) { - printk(KERN_ERR "%s: coherent pool not initialised!\n", - __func__); - dump_stack(); - return NULL; - } - - /* - * Align the region allocation - allocations from pool are rather - * small, so align them to their order in pages, minimum is a page - * size. This helps reduce fragmentation of the DMA space. - */ - align = PAGE_SIZE << get_order(size); - c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); - if (c) { - void *ptr = (void *)c->vm_start; - struct page *page = virt_to_page(ptr); - *ret_page = page; - return ptr; - } - return NULL; -} - -static int __free_from_pool(void *cpu_addr, size_t size) -{ - unsigned long start = (unsigned long)cpu_addr; - unsigned long end = start + size; - struct arm_vmregion *c; - - if (start < coherent_head.vm_start || end > coherent_head.vm_end) - return 0; - - c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); - - if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } - - arm_vmregion_free(&coherent_head, c); - return 1; -} - -static void *__alloc_from_contiguous(struct device *dev, size_t size, - pgprot_t prot, struct page **ret_page) -{ - unsigned long order = get_order(size); - size_t count = size >> PAGE_SHIFT; - struct page *page; - - page = dma_alloc_from_contiguous(dev, count, order); - if (!page) - return NULL; - - __dma_clear_buffer(page, size); - __dma_remap(page, size, prot); - - *ret_page = page; - return page_address(page); -} - -static void __free_from_contiguous(struct device *dev, struct page *page, - size_t size) -{ - __dma_remap(page, size, pgprot_kernel); - dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); -} - -#define nommu() 0 - #else /* !CONFIG_MMU */ -#define nommu() 1 - -#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL -#define __alloc_from_pool(dev, size, ret_page, c) NULL -#define __alloc_from_contiguous(dev, size, prot, ret) NULL -#define __free_from_pool(cpu_addr, size) 0 -#define __free_from_contiguous(dev, page, size) do { } while (0) -#define __dma_free_remap(cpu_addr, size) do { } while (0) +#define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) +#define __dma_free_remap(addr, size) do { } while (0) #endif /* CONFIG_MMU */ -static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, - struct page **ret_page) -{ - struct page *page; - page = __dma_alloc_buffer(dev, size, gfp); - if (!page) - return NULL; - - *ret_page = page; - return page_address(page); -} - - - -static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, pgprot_t prot, const void *caller) +static void * +__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, + pgprot_t prot, const void *caller) { - u64 mask = get_coherent_dma_mask(dev); struct page *page; void *addr; -#ifdef CONFIG_DMA_API_DEBUG - u64 limit = (mask + 1) & ~mask; - if (limit && size >= limit) { - dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", - size, mask); - return NULL; - } -#endif - - if (!mask) - return NULL; - - if (mask < 0xffffffffULL) - gfp |= GFP_DMA; - /* * Following is a work-around (a.k.a. hack) to prevent pages * with __GFP_COMP being passed to split_page() which cannot @@ -587,17 +345,19 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, *handle = ~0; size = PAGE_ALIGN(size); - if (arch_is_coherent() || nommu()) - addr = __alloc_simple_buffer(dev, size, gfp, &page); - else if (cpu_architecture() < CPU_ARCH_ARMv6) - addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); - else if (gfp & GFP_ATOMIC) - addr = __alloc_from_pool(dev, size, &page, caller); + page = __dma_alloc_buffer(dev, size, gfp); + if (!page) + return NULL; + + if (!arch_is_coherent()) + addr = __dma_alloc_remap(page, size, gfp, prot, caller); else - addr = __alloc_from_contiguous(dev, size, prot, &page); + addr = page_address(page); if (addr) *handle = pfn_to_dma(dev, page_to_pfn(page)); + else + __dma_free_buffer(page, size); return addr; } @@ -606,8 +366,8 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, * Allocate DMA-coherent memory space and return both the kernel remapped * virtual and bus address for that space. */ -void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp) +void * +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) { void *memory; @@ -638,11 +398,25 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, { int ret = -ENXIO; #ifdef CONFIG_MMU - unsigned long pfn = dma_to_pfn(dev, dma_addr); - ret = remap_pfn_range(vma, vma->vm_start, - pfn + vma->vm_pgoff, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); + unsigned long user_size, kern_size; + struct arm_vmregion *c; + + user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); + if (c) { + unsigned long off = vma->vm_pgoff; + + kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; + + if (off < kern_size && + user_size <= (kern_size - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + page_to_pfn(c->vm_pages) + off, + user_size << PAGE_SHIFT, + vma->vm_page_prot); + } + } #endif /* CONFIG_MMU */ return ret; @@ -664,33 +438,23 @@ int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, } EXPORT_SYMBOL(dma_mmap_writecombine); - /* - * Free a buffer as defined by the above mapping. + * free a page as defined by the above mapping. + * Must not be called with IRQs disabled. */ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) { - struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); + WARN_ON(irqs_disabled()); if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return; size = PAGE_ALIGN(size); - if (arch_is_coherent() || nommu()) { - __dma_free_buffer(page, size); - } else if (cpu_architecture() < CPU_ARCH_ARMv6) { + if (!arch_is_coherent()) __dma_free_remap(cpu_addr, size); - __dma_free_buffer(page, size); - } else { - if (__free_from_pool(cpu_addr, size)) - return; - /* - * Non-atomic allocations cannot be freed with IRQs disabled - */ - WARN_ON(irqs_disabled()); - __free_from_contiguous(dev, page, size); - } + + __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size); } EXPORT_SYMBOL(dma_free_coherent); diff --git a/trunk/arch/arm/mm/init.c b/trunk/arch/arm/mm/init.c index c21d06c7dd7e..8f5813bbffb5 100644 --- a/trunk/arch/arm/mm/init.c +++ b/trunk/arch/arm/mm/init.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -227,17 +226,6 @@ static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole, } #endif -void __init setup_dma_zone(struct machine_desc *mdesc) -{ -#ifdef CONFIG_ZONE_DMA - if (mdesc->dma_zone_size) { - arm_dma_zone_size = mdesc->dma_zone_size; - arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; - } else - arm_dma_limit = 0xffffffff; -#endif -} - static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, unsigned long max_high) { @@ -285,9 +273,12 @@ static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, * Adjust the sizes according to any special requirements for * this machine type. */ - if (arm_dma_zone_size) + if (arm_dma_zone_size) { arm_adjust_dma_zone(zone_size, zhole_size, arm_dma_zone_size >> PAGE_SHIFT); + arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; + } else + arm_dma_limit = 0xffffffff; #endif free_area_init_node(0, zone_size, min, zhole_size); @@ -373,12 +364,6 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) if (mdesc->reserve) mdesc->reserve(); - /* - * reserve memory for DMA contigouos allocations, - * must come from DMA area inside low memory - */ - dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit)); - arm_memblock_steal_permitted = false; memblock_allow_resize(); memblock_dump_all(); diff --git a/trunk/arch/arm/mm/mm.h b/trunk/arch/arm/mm/mm.h index 93dc0c17cdcb..27f4a619b35d 100644 --- a/trunk/arch/arm/mm/mm.h +++ b/trunk/arch/arm/mm/mm.h @@ -67,8 +67,5 @@ extern u32 arm_dma_limit; #define arm_dma_limit ((u32)~0) #endif -extern phys_addr_t arm_lowmem_limit; - void __init bootmem_init(void); void arm_mm_memblock_reserve(void); -void dma_contiguous_remap(void); diff --git a/trunk/arch/arm/mm/mmu.c b/trunk/arch/arm/mm/mmu.c index e5dad60b558b..aa78de8bfdd3 100644 --- a/trunk/arch/arm/mm/mmu.c +++ b/trunk/arch/arm/mm/mmu.c @@ -288,11 +288,6 @@ static struct mem_type mem_types[] = { PMD_SECT_UNCACHED | PMD_SECT_XN, .domain = DOMAIN_KERNEL, }, - [MT_MEMORY_DMA_READY] = { - .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, - .prot_l1 = PMD_TYPE_TABLE, - .domain = DOMAIN_KERNEL, - }, }; const struct mem_type *get_mem_type(unsigned int type) @@ -434,7 +429,6 @@ static void __init build_mem_type_table(void) if (arch_is_coherent() && cpu_is_xsc3()) { mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; - mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; } @@ -466,7 +460,6 @@ static void __init build_mem_type_table(void) mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; - mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; } @@ -519,7 +512,6 @@ static void __init build_mem_type_table(void) mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; mem_types[MT_MEMORY].prot_pte |= kern_pgprot; - mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; @@ -604,7 +596,7 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr, * L1 entries, whereas PGDs refer to a group of L1 entries making * up one logical pointer to an L2 table. */ - if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) { + if (((addr | end | phys) & ~SECTION_MASK) == 0) { pmd_t *p = pmd; #ifndef CONFIG_ARM_LPAE @@ -822,7 +814,7 @@ static int __init early_vmalloc(char *arg) } early_param("vmalloc", early_vmalloc); -phys_addr_t arm_lowmem_limit __initdata = 0; +static phys_addr_t lowmem_limit __initdata = 0; void __init sanity_check_meminfo(void) { @@ -905,8 +897,8 @@ void __init sanity_check_meminfo(void) bank->size = newsize; } #endif - if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit) - arm_lowmem_limit = bank->start + bank->size; + if (!bank->highmem && bank->start + bank->size > lowmem_limit) + lowmem_limit = bank->start + bank->size; j++; } @@ -931,8 +923,8 @@ void __init sanity_check_meminfo(void) } #endif meminfo.nr_banks = j; - high_memory = __va(arm_lowmem_limit - 1) + 1; - memblock_set_current_limit(arm_lowmem_limit); + high_memory = __va(lowmem_limit - 1) + 1; + memblock_set_current_limit(lowmem_limit); } static inline void prepare_page_table(void) @@ -957,8 +949,8 @@ static inline void prepare_page_table(void) * Find the end of the first block of lowmem. */ end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; - if (end >= arm_lowmem_limit) - end = arm_lowmem_limit; + if (end >= lowmem_limit) + end = lowmem_limit; /* * Clear out all the kernel space mappings, except for the first @@ -1101,8 +1093,8 @@ static void __init map_lowmem(void) phys_addr_t end = start + reg->size; struct map_desc map; - if (end > arm_lowmem_limit) - end = arm_lowmem_limit; + if (end > lowmem_limit) + end = lowmem_limit; if (start >= end) break; @@ -1123,12 +1115,11 @@ void __init paging_init(struct machine_desc *mdesc) { void *zero_page; - memblock_set_current_limit(arm_lowmem_limit); + memblock_set_current_limit(lowmem_limit); build_mem_type_table(); prepare_page_table(); map_lowmem(); - dma_contiguous_remap(); devicemaps_init(mdesc); kmap_init(); diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 7cbdfdac3c7c..c9866b0b77d8 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -31,7 +31,6 @@ config X86 select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_FRAME_POINTERS select HAVE_DMA_ATTRS - select HAVE_DMA_CONTIGUOUS if !SWIOTLB select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD diff --git a/trunk/arch/x86/include/asm/dma-contiguous.h b/trunk/arch/x86/include/asm/dma-contiguous.h deleted file mode 100644 index c09241659971..000000000000 --- a/trunk/arch/x86/include/asm/dma-contiguous.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef ASMX86_DMA_CONTIGUOUS_H -#define ASMX86_DMA_CONTIGUOUS_H - -#ifdef __KERNEL__ - -#include -#include - -static inline void -dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } - -#endif -#endif diff --git a/trunk/arch/x86/include/asm/dma-mapping.h b/trunk/arch/x86/include/asm/dma-mapping.h index 7b9227b44b9b..4b4331d71935 100644 --- a/trunk/arch/x86/include/asm/dma-mapping.h +++ b/trunk/arch/x86/include/asm/dma-mapping.h @@ -13,7 +13,6 @@ #include #include #include -#include #ifdef CONFIG_ISA # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) @@ -63,10 +62,6 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, struct dma_attrs *attrs); -extern void dma_generic_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - struct dma_attrs *attrs); - static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { if (!dev->dma_mask) diff --git a/trunk/arch/x86/kernel/pci-dma.c b/trunk/arch/x86/kernel/pci-dma.c index 62c9457ccd2f..3003250ac51d 100644 --- a/trunk/arch/x86/kernel/pci-dma.c +++ b/trunk/arch/x86/kernel/pci-dma.c @@ -100,18 +100,14 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, struct dma_attrs *attrs) { unsigned long dma_mask; - struct page *page = NULL; - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct page *page; dma_addr_t addr; dma_mask = dma_alloc_coherent_mask(dev, flag); flag |= __GFP_ZERO; again: - if (!(flag & GFP_ATOMIC)) - page = dma_alloc_from_contiguous(dev, count, get_order(size)); - if (!page) - page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); + page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); if (!page) return NULL; @@ -131,16 +127,6 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, return page_address(page); } -void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, struct dma_attrs *attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - struct page *page = virt_to_page(vaddr); - - if (!dma_release_from_contiguous(dev, page, count)) - free_pages((unsigned long)vaddr, get_order(size)); -} - /* * See for the iommu kernel * parameter documentation. diff --git a/trunk/arch/x86/kernel/pci-nommu.c b/trunk/arch/x86/kernel/pci-nommu.c index 871be4a84c7d..f96050685b46 100644 --- a/trunk/arch/x86/kernel/pci-nommu.c +++ b/trunk/arch/x86/kernel/pci-nommu.c @@ -74,6 +74,12 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } +static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr, struct dma_attrs *attrs) +{ + free_pages((unsigned long)vaddr, get_order(size)); +} + static void nommu_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) @@ -91,7 +97,7 @@ static void nommu_sync_sg_for_device(struct device *dev, struct dma_map_ops nommu_dma_ops = { .alloc = dma_generic_alloc_coherent, - .free = dma_generic_free_coherent, + .free = nommu_free_coherent, .map_sg = nommu_map_sg, .map_page = nommu_map_page, .sync_single_for_device = nommu_sync_single_for_device, diff --git a/trunk/arch/x86/kernel/setup.c b/trunk/arch/x86/kernel/setup.c index d6c956e674cc..1a2901562059 100644 --- a/trunk/arch/x86/kernel/setup.c +++ b/trunk/arch/x86/kernel/setup.c @@ -50,7 +50,6 @@ #include #include #include -#include #include #include @@ -935,7 +934,6 @@ void __init setup_arch(char **cmdline_p) } #endif memblock.current_limit = get_max_mapped(); - dma_contiguous_reserve(0); /* * NOTE: On x86-32, only from this point on, fixmaps are ready for use. diff --git a/trunk/drivers/base/Kconfig b/trunk/drivers/base/Kconfig index 9b21469482ae..9aa618acfe97 100644 --- a/trunk/drivers/base/Kconfig +++ b/trunk/drivers/base/Kconfig @@ -192,93 +192,4 @@ config DMA_SHARED_BUFFER APIs extension; the file's descriptor can then be passed on to other driver. -config CMA - bool "Contiguous Memory Allocator (EXPERIMENTAL)" - depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK && EXPERIMENTAL - select MIGRATION - help - This enables the Contiguous Memory Allocator which allows drivers - to allocate big physically-contiguous blocks of memory for use with - hardware components that do not support I/O map nor scatter-gather. - - For more information see . - If unsure, say "n". - -if CMA - -config CMA_DEBUG - bool "CMA debug messages (DEVELOPMENT)" - depends on DEBUG_KERNEL - help - Turns on debug messages in CMA. This produces KERN_DEBUG - messages for every CMA call as well as various messages while - processing calls such as dma_alloc_from_contiguous(). - This option does not affect warning and error messages. - -comment "Default contiguous memory area size:" - -config CMA_SIZE_MBYTES - int "Size in Mega Bytes" - depends on !CMA_SIZE_SEL_PERCENTAGE - default 16 - help - Defines the size (in MiB) of the default memory area for Contiguous - Memory Allocator. - -config CMA_SIZE_PERCENTAGE - int "Percentage of total memory" - depends on !CMA_SIZE_SEL_MBYTES - default 10 - help - Defines the size of the default memory area for Contiguous Memory - Allocator as a percentage of the total memory in the system. - -choice - prompt "Selected region size" - default CMA_SIZE_SEL_ABSOLUTE - -config CMA_SIZE_SEL_MBYTES - bool "Use mega bytes value only" - -config CMA_SIZE_SEL_PERCENTAGE - bool "Use percentage value only" - -config CMA_SIZE_SEL_MIN - bool "Use lower value (minimum)" - -config CMA_SIZE_SEL_MAX - bool "Use higher value (maximum)" - -endchoice - -config CMA_ALIGNMENT - int "Maximum PAGE_SIZE order of alignment for contiguous buffers" - range 4 9 - default 8 - help - DMA mapping framework by default aligns all buffers to the smallest - PAGE_SIZE order which is greater than or equal to the requested buffer - size. This works well for buffers up to a few hundreds kilobytes, but - for larger buffers it just a memory waste. With this parameter you can - specify the maximum PAGE_SIZE order for contiguous buffers. Larger - buffers will be aligned only to this specified order. The order is - expressed as a power of two multiplied by the PAGE_SIZE. - - For example, if your system defaults to 4KiB pages, the order value - of 8 means that the buffers will be aligned up to 1MiB only. - - If unsure, leave the default value "8". - -config CMA_AREAS - int "Maximum count of the CMA device-private areas" - default 7 - help - CMA allows to create CMA areas for particular devices. This parameter - sets the maximum number of such device private CMA areas in the - system. - - If unsure, leave the default value "7". - -endif - endmenu diff --git a/trunk/drivers/base/Makefile b/trunk/drivers/base/Makefile index 5aa2d703d19f..b6d1b9c4200c 100644 --- a/trunk/drivers/base/Makefile +++ b/trunk/drivers/base/Makefile @@ -6,7 +6,6 @@ obj-y := core.o bus.o dd.o syscore.o \ attribute_container.o transport_class.o \ topology.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o -obj-$(CONFIG_CMA) += dma-contiguous.o obj-y += power/ obj-$(CONFIG_HAS_DMA) += dma-mapping.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o diff --git a/trunk/drivers/base/dma-coherent.c b/trunk/drivers/base/dma-coherent.c index bb0025c510b3..1b85949e3d2f 100644 --- a/trunk/drivers/base/dma-coherent.c +++ b/trunk/drivers/base/dma-coherent.c @@ -10,6 +10,7 @@ struct dma_coherent_mem { void *virt_base; dma_addr_t device_base; + phys_addr_t pfn_base; int size; int flags; unsigned long *bitmap; @@ -44,6 +45,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, dev->dma_mem->virt_base = mem_base; dev->dma_mem->device_base = device_addr; + dev->dma_mem->pfn_base = PFN_DOWN(bus_addr); dev->dma_mem->size = pages; dev->dma_mem->flags = flags; @@ -176,3 +178,43 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr) return 0; } EXPORT_SYMBOL(dma_release_from_coherent); + +/** + * dma_mmap_from_coherent() - try to mmap the memory allocated from + * per-device coherent memory pool to userspace + * @dev: device from which the memory was allocated + * @vma: vm_area for the userspace memory + * @vaddr: cpu address returned by dma_alloc_from_coherent + * @size: size of the memory buffer allocated by dma_alloc_from_coherent + * + * This checks whether the memory was allocated from the per-device + * coherent memory pool and if so, maps that memory to the provided vma. + * + * Returns 1 if we correctly mapped the memory, or 0 if + * dma_release_coherent() should proceed with mapping memory from + * generic pools. + */ +int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, + void *vaddr, size_t size, int *ret) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + + if (mem && vaddr >= mem->virt_base && vaddr + size <= + (mem->virt_base + (mem->size << PAGE_SHIFT))) { + unsigned long off = vma->vm_pgoff; + int start = (vaddr - mem->virt_base) >> PAGE_SHIFT; + int user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int count = size >> PAGE_SHIFT; + + *ret = -ENXIO; + if (off < count && user_count <= count - off) { + unsigned pfn = mem->pfn_base + start + off; + *ret = remap_pfn_range(vma, vma->vm_start, pfn, + user_count << PAGE_SHIFT, + vma->vm_page_prot); + } + return 1; + } + return 0; +} +EXPORT_SYMBOL(dma_mmap_from_coherent); diff --git a/trunk/drivers/base/dma-contiguous.c b/trunk/drivers/base/dma-contiguous.c deleted file mode 100644 index 78efb0306a44..000000000000 --- a/trunk/drivers/base/dma-contiguous.c +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Contiguous Memory Allocator for DMA mapping framework - * Copyright (c) 2010-2011 by Samsung Electronics. - * Written by: - * Marek Szyprowski - * Michal Nazarewicz - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License or (at your optional) any later version of the license. - */ - -#define pr_fmt(fmt) "cma: " fmt - -#ifdef CONFIG_CMA_DEBUG -#ifndef DEBUG -# define DEBUG -#endif -#endif - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef SZ_1M -#define SZ_1M (1 << 20) -#endif - -struct cma { - unsigned long base_pfn; - unsigned long count; - unsigned long *bitmap; -}; - -struct cma *dma_contiguous_default_area; - -#ifdef CONFIG_CMA_SIZE_MBYTES -#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES -#else -#define CMA_SIZE_MBYTES 0 -#endif - -/* - * Default global CMA area size can be defined in kernel's .config. - * This is usefull mainly for distro maintainers to create a kernel - * that works correctly for most supported systems. - * The size can be set in bytes or as a percentage of the total memory - * in the system. - * - * Users, who want to set the size of global CMA area for their system - * should use cma= kernel parameter. - */ -static const unsigned long size_bytes = CMA_SIZE_MBYTES * SZ_1M; -static long size_cmdline = -1; - -static int __init early_cma(char *p) -{ - pr_debug("%s(%s)\n", __func__, p); - size_cmdline = memparse(p, &p); - return 0; -} -early_param("cma", early_cma); - -#ifdef CONFIG_CMA_SIZE_PERCENTAGE - -static unsigned long __init __maybe_unused cma_early_percent_memory(void) -{ - struct memblock_region *reg; - unsigned long total_pages = 0; - - /* - * We cannot use memblock_phys_mem_size() here, because - * memblock_analyze() has not been called yet. - */ - for_each_memblock(memory, reg) - total_pages += memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); - - return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT; -} - -#else - -static inline __maybe_unused unsigned long cma_early_percent_memory(void) -{ - return 0; -} - -#endif - -/** - * dma_contiguous_reserve() - reserve area for contiguous memory handling - * @limit: End address of the reserved memory (optional, 0 for any). - * - * This function reserves memory from early allocator. It should be - * called by arch specific code once the early allocator (memblock or bootmem) - * has been activated and all other subsystems have already allocated/reserved - * memory. - */ -void __init dma_contiguous_reserve(phys_addr_t limit) -{ - unsigned long selected_size = 0; - - pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); - - if (size_cmdline != -1) { - selected_size = size_cmdline; - } else { -#ifdef CONFIG_CMA_SIZE_SEL_MBYTES - selected_size = size_bytes; -#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE) - selected_size = cma_early_percent_memory(); -#elif defined(CONFIG_CMA_SIZE_SEL_MIN) - selected_size = min(size_bytes, cma_early_percent_memory()); -#elif defined(CONFIG_CMA_SIZE_SEL_MAX) - selected_size = max(size_bytes, cma_early_percent_memory()); -#endif - } - - if (selected_size) { - pr_debug("%s: reserving %ld MiB for global area\n", __func__, - selected_size / SZ_1M); - - dma_declare_contiguous(NULL, selected_size, 0, limit); - } -}; - -static DEFINE_MUTEX(cma_mutex); - -static __init int cma_activate_area(unsigned long base_pfn, unsigned long count) -{ - unsigned long pfn = base_pfn; - unsigned i = count >> pageblock_order; - struct zone *zone; - - WARN_ON_ONCE(!pfn_valid(pfn)); - zone = page_zone(pfn_to_page(pfn)); - - do { - unsigned j; - base_pfn = pfn; - for (j = pageblock_nr_pages; j; --j, pfn++) { - WARN_ON_ONCE(!pfn_valid(pfn)); - if (page_zone(pfn_to_page(pfn)) != zone) - return -EINVAL; - } - init_cma_reserved_pageblock(pfn_to_page(base_pfn)); - } while (--i); - return 0; -} - -static __init struct cma *cma_create_area(unsigned long base_pfn, - unsigned long count) -{ - int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); - struct cma *cma; - int ret = -ENOMEM; - - pr_debug("%s(base %08lx, count %lx)\n", __func__, base_pfn, count); - - cma = kmalloc(sizeof *cma, GFP_KERNEL); - if (!cma) - return ERR_PTR(-ENOMEM); - - cma->base_pfn = base_pfn; - cma->count = count; - cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - - if (!cma->bitmap) - goto no_mem; - - ret = cma_activate_area(base_pfn, count); - if (ret) - goto error; - - pr_debug("%s: returned %p\n", __func__, (void *)cma); - return cma; - -error: - kfree(cma->bitmap); -no_mem: - kfree(cma); - return ERR_PTR(ret); -} - -static struct cma_reserved { - phys_addr_t start; - unsigned long size; - struct device *dev; -} cma_reserved[MAX_CMA_AREAS] __initdata; -static unsigned cma_reserved_count __initdata; - -static int __init cma_init_reserved_areas(void) -{ - struct cma_reserved *r = cma_reserved; - unsigned i = cma_reserved_count; - - pr_debug("%s()\n", __func__); - - for (; i; --i, ++r) { - struct cma *cma; - cma = cma_create_area(PFN_DOWN(r->start), - r->size >> PAGE_SHIFT); - if (!IS_ERR(cma)) - dev_set_cma_area(r->dev, cma); - } - return 0; -} -core_initcall(cma_init_reserved_areas); - -/** - * dma_declare_contiguous() - reserve area for contiguous memory handling - * for particular device - * @dev: Pointer to device structure. - * @size: Size of the reserved memory. - * @base: Start address of the reserved memory (optional, 0 for any). - * @limit: End address of the reserved memory (optional, 0 for any). - * - * This function reserves memory for specified device. It should be - * called by board specific code when early allocator (memblock or bootmem) - * is still activate. - */ -int __init dma_declare_contiguous(struct device *dev, unsigned long size, - phys_addr_t base, phys_addr_t limit) -{ - struct cma_reserved *r = &cma_reserved[cma_reserved_count]; - unsigned long alignment; - - pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__, - (unsigned long)size, (unsigned long)base, - (unsigned long)limit); - - /* Sanity checks */ - if (cma_reserved_count == ARRAY_SIZE(cma_reserved)) { - pr_err("Not enough slots for CMA reserved regions!\n"); - return -ENOSPC; - } - - if (!size) - return -EINVAL; - - /* Sanitise input arguments */ - alignment = PAGE_SIZE << max(MAX_ORDER, pageblock_order); - base = ALIGN(base, alignment); - size = ALIGN(size, alignment); - limit &= ~(alignment - 1); - - /* Reserve memory */ - if (base) { - if (memblock_is_region_reserved(base, size) || - memblock_reserve(base, size) < 0) { - base = -EBUSY; - goto err; - } - } else { - /* - * Use __memblock_alloc_base() since - * memblock_alloc_base() panic()s. - */ - phys_addr_t addr = __memblock_alloc_base(size, alignment, limit); - if (!addr) { - base = -ENOMEM; - goto err; - } else if (addr + size > ~(unsigned long)0) { - memblock_free(addr, size); - base = -EINVAL; - goto err; - } else { - base = addr; - } - } - - /* - * Each reserved area must be initialised later, when more kernel - * subsystems (like slab allocator) are available. - */ - r->start = base; - r->size = size; - r->dev = dev; - cma_reserved_count++; - pr_info("CMA: reserved %ld MiB at %08lx\n", size / SZ_1M, - (unsigned long)base); - - /* Architecture specific contiguous memory fixup. */ - dma_contiguous_early_fixup(base, size); - return 0; -err: - pr_err("CMA: failed to reserve %ld MiB\n", size / SZ_1M); - return base; -} - -/** - * dma_alloc_from_contiguous() - allocate pages from contiguous area - * @dev: Pointer to device for which the allocation is performed. - * @count: Requested number of pages. - * @align: Requested alignment of pages (in PAGE_SIZE order). - * - * This function allocates memory buffer for specified device. It uses - * device specific contiguous memory area if available or the default - * global one. Requires architecture specific get_dev_cma_area() helper - * function. - */ -struct page *dma_alloc_from_contiguous(struct device *dev, int count, - unsigned int align) -{ - unsigned long mask, pfn, pageno, start = 0; - struct cma *cma = dev_get_cma_area(dev); - int ret; - - if (!cma || !cma->count) - return NULL; - - if (align > CONFIG_CMA_ALIGNMENT) - align = CONFIG_CMA_ALIGNMENT; - - pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, - count, align); - - if (!count) - return NULL; - - mask = (1 << align) - 1; - - mutex_lock(&cma_mutex); - - for (;;) { - pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count, - start, count, mask); - if (pageno >= cma->count) { - ret = -ENOMEM; - goto error; - } - - pfn = cma->base_pfn + pageno; - ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA); - if (ret == 0) { - bitmap_set(cma->bitmap, pageno, count); - break; - } else if (ret != -EBUSY) { - goto error; - } - pr_debug("%s(): memory range at %p is busy, retrying\n", - __func__, pfn_to_page(pfn)); - /* try again with a bit different memory target */ - start = pageno + mask + 1; - } - - mutex_unlock(&cma_mutex); - - pr_debug("%s(): returned %p\n", __func__, pfn_to_page(pfn)); - return pfn_to_page(pfn); -error: - mutex_unlock(&cma_mutex); - return NULL; -} - -/** - * dma_release_from_contiguous() - release allocated pages - * @dev: Pointer to device for which the pages were allocated. - * @pages: Allocated pages. - * @count: Number of allocated pages. - * - * This function releases memory allocated by dma_alloc_from_contiguous(). - * It returns false when provided pages do not belong to contiguous area and - * true otherwise. - */ -bool dma_release_from_contiguous(struct device *dev, struct page *pages, - int count) -{ - struct cma *cma = dev_get_cma_area(dev); - unsigned long pfn; - - if (!cma || !pages) - return false; - - pr_debug("%s(page %p)\n", __func__, (void *)pages); - - pfn = page_to_pfn(pages); - - if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) - return false; - - VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); - - mutex_lock(&cma_mutex); - bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count); - free_contig_range(pfn, count); - mutex_unlock(&cma_mutex); - - return true; -} diff --git a/trunk/include/asm-generic/dma-coherent.h b/trunk/include/asm-generic/dma-coherent.h index 85a3ffaa0242..abfb2682de7f 100644 --- a/trunk/include/asm-generic/dma-coherent.h +++ b/trunk/include/asm-generic/dma-coherent.h @@ -3,13 +3,15 @@ #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT /* - * These two functions are only for dma allocator. + * These three functions are only for dma allocator. * Don't use them in device drivers. */ int dma_alloc_from_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret); int dma_release_from_coherent(struct device *dev, int order, void *vaddr); +int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, size_t size, int *ret); /* * Standard interface */ diff --git a/trunk/include/asm-generic/dma-contiguous.h b/trunk/include/asm-generic/dma-contiguous.h deleted file mode 100644 index c544356b374b..000000000000 --- a/trunk/include/asm-generic/dma-contiguous.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef ASM_DMA_CONTIGUOUS_H -#define ASM_DMA_CONTIGUOUS_H - -#ifdef __KERNEL__ -#ifdef CONFIG_CMA - -#include -#include - -static inline struct cma *dev_get_cma_area(struct device *dev) -{ - if (dev && dev->cma_area) - return dev->cma_area; - return dma_contiguous_default_area; -} - -static inline void dev_set_cma_area(struct device *dev, struct cma *cma) -{ - if (dev) - dev->cma_area = cma; - if (!dev || !dma_contiguous_default_area) - dma_contiguous_default_area = cma; -} - -#endif -#endif - -#endif diff --git a/trunk/include/linux/device.h b/trunk/include/linux/device.h index e3399290436e..5ad17cccdd71 100644 --- a/trunk/include/linux/device.h +++ b/trunk/include/linux/device.h @@ -661,10 +661,6 @@ struct device { struct dma_coherent_mem *dma_mem; /* internal for coherent mem override */ -#ifdef CONFIG_CMA - struct cma *cma_area; /* contiguous memory area for dma - allocations */ -#endif /* arch specific additions */ struct dev_archdata archdata; diff --git a/trunk/include/linux/dma-contiguous.h b/trunk/include/linux/dma-contiguous.h deleted file mode 100644 index 2f303e4b7ed3..000000000000 --- a/trunk/include/linux/dma-contiguous.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef __LINUX_CMA_H -#define __LINUX_CMA_H - -/* - * Contiguous Memory Allocator for DMA mapping framework - * Copyright (c) 2010-2011 by Samsung Electronics. - * Written by: - * Marek Szyprowski - * Michal Nazarewicz - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License or (at your optional) any later version of the license. - */ - -/* - * Contiguous Memory Allocator - * - * The Contiguous Memory Allocator (CMA) makes it possible to - * allocate big contiguous chunks of memory after the system has - * booted. - * - * Why is it needed? - * - * Various devices on embedded systems have no scatter-getter and/or - * IO map support and require contiguous blocks of memory to - * operate. They include devices such as cameras, hardware video - * coders, etc. - * - * Such devices often require big memory buffers (a full HD frame - * is, for instance, more then 2 mega pixels large, i.e. more than 6 - * MB of memory), which makes mechanisms such as kmalloc() or - * alloc_page() ineffective. - * - * At the same time, a solution where a big memory region is - * reserved for a device is suboptimal since often more memory is - * reserved then strictly required and, moreover, the memory is - * inaccessible to page system even if device drivers don't use it. - * - * CMA tries to solve this issue by operating on memory regions - * where only movable pages can be allocated from. This way, kernel - * can use the memory for pagecache and when device driver requests - * it, allocated pages can be migrated. - * - * Driver usage - * - * CMA should not be used by the device drivers directly. It is - * only a helper framework for dma-mapping subsystem. - * - * For more information, see kernel-docs in drivers/base/dma-contiguous.c - */ - -#ifdef __KERNEL__ - -struct cma; -struct page; -struct device; - -#ifdef CONFIG_CMA - -/* - * There is always at least global CMA area and a few optional device - * private areas configured in kernel .config. - */ -#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS) - -extern struct cma *dma_contiguous_default_area; - -void dma_contiguous_reserve(phys_addr_t addr_limit); -int dma_declare_contiguous(struct device *dev, unsigned long size, - phys_addr_t base, phys_addr_t limit); - -struct page *dma_alloc_from_contiguous(struct device *dev, int count, - unsigned int order); -bool dma_release_from_contiguous(struct device *dev, struct page *pages, - int count); - -#else - -#define MAX_CMA_AREAS (0) - -static inline void dma_contiguous_reserve(phys_addr_t limit) { } - -static inline -int dma_declare_contiguous(struct device *dev, unsigned long size, - phys_addr_t base, phys_addr_t limit) -{ - return -ENOSYS; -} - -static inline -struct page *dma_alloc_from_contiguous(struct device *dev, int count, - unsigned int order) -{ - return NULL; -} - -static inline -bool dma_release_from_contiguous(struct device *dev, struct page *pages, - int count) -{ - return false; -} - -#endif - -#endif - -#endif diff --git a/trunk/include/linux/gfp.h b/trunk/include/linux/gfp.h index 1e49be49d324..581e74b7df95 100644 --- a/trunk/include/linux/gfp.h +++ b/trunk/include/linux/gfp.h @@ -391,16 +391,4 @@ static inline bool pm_suspended_storage(void) } #endif /* CONFIG_PM_SLEEP */ -#ifdef CONFIG_CMA - -/* The below functions must be run on a range from a single zone. */ -extern int alloc_contig_range(unsigned long start, unsigned long end, - unsigned migratetype); -extern void free_contig_range(unsigned long pfn, unsigned nr_pages); - -/* CMA stuff */ -extern void init_cma_reserved_pageblock(struct page *page); - -#endif - #endif /* __LINUX_GFP_H */ diff --git a/trunk/include/linux/mmzone.h b/trunk/include/linux/mmzone.h index 26f2040b8b04..dff711509661 100644 --- a/trunk/include/linux/mmzone.h +++ b/trunk/include/linux/mmzone.h @@ -35,39 +35,13 @@ */ #define PAGE_ALLOC_COSTLY_ORDER 3 -enum { - MIGRATE_UNMOVABLE, - MIGRATE_RECLAIMABLE, - MIGRATE_MOVABLE, - MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ - MIGRATE_RESERVE = MIGRATE_PCPTYPES, -#ifdef CONFIG_CMA - /* - * MIGRATE_CMA migration type is designed to mimic the way - * ZONE_MOVABLE works. Only movable pages can be allocated - * from MIGRATE_CMA pageblocks and page allocator never - * implicitly change migration type of MIGRATE_CMA pageblock. - * - * The way to use it is to change migratetype of a range of - * pageblocks to MIGRATE_CMA which can be done by - * __free_pageblock_cma() function. What is important though - * is that a range of pageblocks must be aligned to - * MAX_ORDER_NR_PAGES should biggest page be bigger then - * a single pageblock. - */ - MIGRATE_CMA, -#endif - MIGRATE_ISOLATE, /* can't allocate from here */ - MIGRATE_TYPES -}; - -#ifdef CONFIG_CMA -# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) -# define cma_wmark_pages(zone) zone->min_cma_pages -#else -# define is_migrate_cma(migratetype) false -# define cma_wmark_pages(zone) 0 -#endif +#define MIGRATE_UNMOVABLE 0 +#define MIGRATE_RECLAIMABLE 1 +#define MIGRATE_MOVABLE 2 +#define MIGRATE_PCPTYPES 3 /* the number of types on the pcp lists */ +#define MIGRATE_RESERVE 3 +#define MIGRATE_ISOLATE 4 /* can't allocate from here */ +#define MIGRATE_TYPES 5 #define for_each_migratetype_order(order, type) \ for (order = 0; order < MAX_ORDER; order++) \ @@ -372,13 +346,6 @@ struct zone { #ifdef CONFIG_MEMORY_HOTPLUG /* see spanned/present_pages for more description */ seqlock_t span_seqlock; -#endif -#ifdef CONFIG_CMA - /* - * CMA needs to increase watermark levels during the allocation - * process to make sure that the system is not starved. - */ - unsigned long min_cma_pages; #endif struct free_area free_area[MAX_ORDER]; diff --git a/trunk/include/linux/page-isolation.h b/trunk/include/linux/page-isolation.h index 3bdcab30ca41..051c1b1ede4e 100644 --- a/trunk/include/linux/page-isolation.h +++ b/trunk/include/linux/page-isolation.h @@ -3,7 +3,7 @@ /* * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. - * If specified range includes migrate types other than MOVABLE or CMA, + * If specified range includes migrate types other than MOVABLE, * this will fail with -EBUSY. * * For isolating all pages in the range finally, the caller have to @@ -11,27 +11,27 @@ * test it. */ extern int -start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - unsigned migratetype); +start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); /* * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE. * target range is [start_pfn, end_pfn) */ extern int -undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - unsigned migratetype); +undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); /* - * Test all pages in [start_pfn, end_pfn) are isolated or not. + * test all pages in [start_pfn, end_pfn)are isolated or not. */ -int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn); +extern int +test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn); /* - * Internal functions. Changes pageblock's migrate type. + * Internal funcs.Changes pageblock's migrate type. + * Please use make_pagetype_isolated()/make_pagetype_movable(). */ extern int set_migratetype_isolate(struct page *page); -extern void unset_migratetype_isolate(struct page *page, unsigned migratetype); +extern void unset_migratetype_isolate(struct page *page); #endif diff --git a/trunk/mm/Kconfig b/trunk/mm/Kconfig index 39220026c797..e338407f1225 100644 --- a/trunk/mm/Kconfig +++ b/trunk/mm/Kconfig @@ -198,7 +198,7 @@ config COMPACTION config MIGRATION bool "Page migration" def_bool y - depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA + depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION help Allows the migration of the physical location of pages of processes while the virtual addresses are not changed. This is useful in diff --git a/trunk/mm/Makefile b/trunk/mm/Makefile index 8aada89efbbb..50ec00ef2a0e 100644 --- a/trunk/mm/Makefile +++ b/trunk/mm/Makefile @@ -13,7 +13,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ page_isolation.o mm_init.o mmu_context.o percpu.o \ - compaction.o $(mmu-y) + $(mmu-y) obj-y += init-mm.o ifdef CONFIG_NO_BOOTMEM @@ -32,6 +32,7 @@ obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o obj-$(CONFIG_SLOB) += slob.o +obj-$(CONFIG_COMPACTION) += compaction.o obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o obj-$(CONFIG_KSM) += ksm.o obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o diff --git a/trunk/mm/compaction.c b/trunk/mm/compaction.c index da7d35ea5103..74a8c825ff28 100644 --- a/trunk/mm/compaction.c +++ b/trunk/mm/compaction.c @@ -16,11 +16,30 @@ #include #include "internal.h" -#if defined CONFIG_COMPACTION || defined CONFIG_CMA - #define CREATE_TRACE_POINTS #include +/* + * compact_control is used to track pages being migrated and the free pages + * they are being migrated to during memory compaction. The free_pfn starts + * at the end of a zone and migrate_pfn begins at the start. Movable pages + * are moved to the end of a zone during a compaction run and the run + * completes when free_pfn <= migrate_pfn + */ +struct compact_control { + struct list_head freepages; /* List of free pages to migrate to */ + struct list_head migratepages; /* List of pages being migrated */ + unsigned long nr_freepages; /* Number of isolated free pages */ + unsigned long nr_migratepages; /* Number of pages to migrate */ + unsigned long free_pfn; /* isolate_freepages search base */ + unsigned long migrate_pfn; /* isolate_migratepages search base */ + bool sync; /* Synchronous migration */ + + int order; /* order a direct compactor needs */ + int migratetype; /* MOVABLE, RECLAIMABLE etc */ + struct zone *zone; +}; + static unsigned long release_freepages(struct list_head *freelist) { struct page *page, *next; @@ -35,35 +54,24 @@ static unsigned long release_freepages(struct list_head *freelist) return count; } -static void map_pages(struct list_head *list) -{ - struct page *page; - - list_for_each_entry(page, list, lru) { - arch_alloc_page(page, 0); - kernel_map_pages(page, 1, 1); - } -} - -static inline bool migrate_async_suitable(int migratetype) -{ - return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; -} - -/* - * Isolate free pages onto a private freelist. Caller must hold zone->lock. - * If @strict is true, will abort returning 0 on any invalid PFNs or non-free - * pages inside of the pageblock (even though it may still end up isolating - * some pages). - */ -static unsigned long isolate_freepages_block(unsigned long blockpfn, - unsigned long end_pfn, - struct list_head *freelist, - bool strict) +/* Isolate free pages onto a private freelist. Must hold zone->lock */ +static unsigned long isolate_freepages_block(struct zone *zone, + unsigned long blockpfn, + struct list_head *freelist) { + unsigned long zone_end_pfn, end_pfn; int nr_scanned = 0, total_isolated = 0; struct page *cursor; + /* Get the last PFN we should scan for free pages at */ + zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; + end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn); + + /* Find the first usable PFN in the block to initialse page cursor */ + for (; blockpfn < end_pfn; blockpfn++) { + if (pfn_valid_within(blockpfn)) + break; + } cursor = pfn_to_page(blockpfn); /* Isolate free pages. This assumes the block is valid */ @@ -71,23 +79,15 @@ static unsigned long isolate_freepages_block(unsigned long blockpfn, int isolated, i; struct page *page = cursor; - if (!pfn_valid_within(blockpfn)) { - if (strict) - return 0; + if (!pfn_valid_within(blockpfn)) continue; - } nr_scanned++; - if (!PageBuddy(page)) { - if (strict) - return 0; + if (!PageBuddy(page)) continue; - } /* Found a free page, break it into order-0 pages */ isolated = split_free_page(page); - if (!isolated && strict) - return 0; total_isolated += isolated; for (i = 0; i < isolated; i++) { list_add(&page->lru, freelist); @@ -105,71 +105,114 @@ static unsigned long isolate_freepages_block(unsigned long blockpfn, return total_isolated; } -/** - * isolate_freepages_range() - isolate free pages. - * @start_pfn: The first PFN to start isolating. - * @end_pfn: The one-past-last PFN. - * - * Non-free pages, invalid PFNs, or zone boundaries within the - * [start_pfn, end_pfn) range are considered errors, cause function to - * undo its actions and return zero. - * - * Otherwise, function returns one-past-the-last PFN of isolated page - * (which may be greater then end_pfn if end fell in a middle of - * a free page). +/* Returns true if the page is within a block suitable for migration to */ +static bool suitable_migration_target(struct page *page) +{ + + int migratetype = get_pageblock_migratetype(page); + + /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ + if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) + return false; + + /* If the page is a large free page, then allow migration */ + if (PageBuddy(page) && page_order(page) >= pageblock_order) + return true; + + /* If the block is MIGRATE_MOVABLE, allow migration */ + if (migratetype == MIGRATE_MOVABLE) + return true; + + /* Otherwise skip the block */ + return false; +} + +/* + * Based on information in the current compact_control, find blocks + * suitable for isolating free pages from and then isolate them. */ -unsigned long -isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn) +static void isolate_freepages(struct zone *zone, + struct compact_control *cc) { - unsigned long isolated, pfn, block_end_pfn, flags; - struct zone *zone = NULL; - LIST_HEAD(freelist); + struct page *page; + unsigned long high_pfn, low_pfn, pfn; + unsigned long flags; + int nr_freepages = cc->nr_freepages; + struct list_head *freelist = &cc->freepages; + + /* + * Initialise the free scanner. The starting point is where we last + * scanned from (or the end of the zone if starting). The low point + * is the end of the pageblock the migration scanner is using. + */ + pfn = cc->free_pfn; + low_pfn = cc->migrate_pfn + pageblock_nr_pages; - if (pfn_valid(start_pfn)) - zone = page_zone(pfn_to_page(start_pfn)); + /* + * Take care that if the migration scanner is at the end of the zone + * that the free scanner does not accidentally move to the next zone + * in the next isolation cycle. + */ + high_pfn = min(low_pfn, pfn); - for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) { - if (!pfn_valid(pfn) || zone != page_zone(pfn_to_page(pfn))) - break; + /* + * Isolate free pages until enough are available to migrate the + * pages on cc->migratepages. We stop searching if the migrate + * and free page scanners meet or enough free pages are isolated. + */ + for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; + pfn -= pageblock_nr_pages) { + unsigned long isolated; + + if (!pfn_valid(pfn)) + continue; /* - * On subsequent iterations ALIGN() is actually not needed, - * but we keep it that we not to complicate the code. + * Check for overlapping nodes/zones. It's possible on some + * configurations to have a setup like + * node0 node1 node0 + * i.e. it's possible that all pages within a zones range of + * pages do not belong to a single zone. */ - block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); - block_end_pfn = min(block_end_pfn, end_pfn); + page = pfn_to_page(pfn); + if (page_zone(page) != zone) + continue; - spin_lock_irqsave(&zone->lock, flags); - isolated = isolate_freepages_block(pfn, block_end_pfn, - &freelist, true); - spin_unlock_irqrestore(&zone->lock, flags); + /* Check the block is suitable for migration */ + if (!suitable_migration_target(page)) + continue; /* - * In strict mode, isolate_freepages_block() returns 0 if - * there are any holes in the block (ie. invalid PFNs or - * non-free pages). + * Found a block suitable for isolating free pages from. Now + * we disabled interrupts, double check things are ok and + * isolate the pages. This is to minimise the time IRQs + * are disabled */ - if (!isolated) - break; + isolated = 0; + spin_lock_irqsave(&zone->lock, flags); + if (suitable_migration_target(page)) { + isolated = isolate_freepages_block(zone, pfn, freelist); + nr_freepages += isolated; + } + spin_unlock_irqrestore(&zone->lock, flags); /* - * If we managed to isolate pages, it is always (1 << n) * - * pageblock_nr_pages for some non-negative n. (Max order - * page may span two pageblocks). + * Record the highest PFN we isolated pages from. When next + * looking for free pages, the search will restart here as + * page migration may have returned some pages to the allocator */ + if (isolated) + high_pfn = max(high_pfn, pfn); } /* split_free_page does not map the pages */ - map_pages(&freelist); - - if (pfn < end_pfn) { - /* Loop terminated early, cleanup. */ - release_freepages(&freelist); - return 0; + list_for_each_entry(page, freelist, lru) { + arch_alloc_page(page, 0); + kernel_map_pages(page, 1, 1); } - /* We don't use freelists for anything. */ - return pfn; + cc->free_pfn = high_pfn; + cc->nr_freepages = nr_freepages; } /* Update the number of anon and file isolated pages in the zone */ @@ -200,34 +243,38 @@ static bool too_many_isolated(struct zone *zone) return isolated > (inactive + active) / 2; } -/** - * isolate_migratepages_range() - isolate all migrate-able pages in range. - * @zone: Zone pages are in. - * @cc: Compaction control structure. - * @low_pfn: The first PFN of the range. - * @end_pfn: The one-past-the-last PFN of the range. - * - * Isolate all pages that can be migrated from the range specified by - * [low_pfn, end_pfn). Returns zero if there is a fatal signal - * pending), otherwise PFN of the first page that was not scanned - * (which may be both less, equal to or more then end_pfn). - * - * Assumes that cc->migratepages is empty and cc->nr_migratepages is - * zero. - * - * Apart from cc->migratepages and cc->nr_migratetypes this function - * does not modify any cc's fields, in particular it does not modify - * (or read for that matter) cc->migrate_pfn. +/* possible outcome of isolate_migratepages */ +typedef enum { + ISOLATE_ABORT, /* Abort compaction now */ + ISOLATE_NONE, /* No pages isolated, continue scanning */ + ISOLATE_SUCCESS, /* Pages isolated, migrate */ +} isolate_migrate_t; + +/* + * Isolate all pages that can be migrated from the block pointed to by + * the migrate scanner within compact_control. */ -unsigned long -isolate_migratepages_range(struct zone *zone, struct compact_control *cc, - unsigned long low_pfn, unsigned long end_pfn) +static isolate_migrate_t isolate_migratepages(struct zone *zone, + struct compact_control *cc) { + unsigned long low_pfn, end_pfn; unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; struct list_head *migratelist = &cc->migratepages; isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; + /* Do not scan outside zone boundaries */ + low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); + + /* Only scan within a pageblock boundary */ + end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); + + /* Do not cross the free scanner or scan within a memory hole */ + if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { + cc->migrate_pfn = end_pfn; + return ISOLATE_NONE; + } + /* * Ensure that there are not too many pages isolated from the LRU * list by either parallel reclaimers or compaction. If there are, @@ -236,12 +283,12 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, while (unlikely(too_many_isolated(zone))) { /* async migration should just abort */ if (!cc->sync) - return 0; + return ISOLATE_ABORT; congestion_wait(BLK_RW_ASYNC, HZ/10); if (fatal_signal_pending(current)) - return 0; + return ISOLATE_ABORT; } /* Time to isolate some pages for migration */ @@ -304,7 +351,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, */ pageblock_nr = low_pfn >> pageblock_order; if (!cc->sync && last_pageblock_nr != pageblock_nr && - !migrate_async_suitable(get_pageblock_migratetype(page))) { + get_pageblock_migratetype(page) != MIGRATE_MOVABLE) { low_pfn += pageblock_nr_pages; low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; last_pageblock_nr = pageblock_nr; @@ -349,124 +396,11 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, acct_isolated(zone, cc); spin_unlock_irq(&zone->lru_lock); + cc->migrate_pfn = low_pfn; trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); - return low_pfn; -} - -#endif /* CONFIG_COMPACTION || CONFIG_CMA */ -#ifdef CONFIG_COMPACTION - -/* Returns true if the page is within a block suitable for migration to */ -static bool suitable_migration_target(struct page *page) -{ - - int migratetype = get_pageblock_migratetype(page); - - /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ - if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) - return false; - - /* If the page is a large free page, then allow migration */ - if (PageBuddy(page) && page_order(page) >= pageblock_order) - return true; - - /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ - if (migrate_async_suitable(migratetype)) - return true; - - /* Otherwise skip the block */ - return false; -} - -/* - * Based on information in the current compact_control, find blocks - * suitable for isolating free pages from and then isolate them. - */ -static void isolate_freepages(struct zone *zone, - struct compact_control *cc) -{ - struct page *page; - unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn; - unsigned long flags; - int nr_freepages = cc->nr_freepages; - struct list_head *freelist = &cc->freepages; - - /* - * Initialise the free scanner. The starting point is where we last - * scanned from (or the end of the zone if starting). The low point - * is the end of the pageblock the migration scanner is using. - */ - pfn = cc->free_pfn; - low_pfn = cc->migrate_pfn + pageblock_nr_pages; - - /* - * Take care that if the migration scanner is at the end of the zone - * that the free scanner does not accidentally move to the next zone - * in the next isolation cycle. - */ - high_pfn = min(low_pfn, pfn); - - zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; - - /* - * Isolate free pages until enough are available to migrate the - * pages on cc->migratepages. We stop searching if the migrate - * and free page scanners meet or enough free pages are isolated. - */ - for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; - pfn -= pageblock_nr_pages) { - unsigned long isolated; - - if (!pfn_valid(pfn)) - continue; - - /* - * Check for overlapping nodes/zones. It's possible on some - * configurations to have a setup like - * node0 node1 node0 - * i.e. it's possible that all pages within a zones range of - * pages do not belong to a single zone. - */ - page = pfn_to_page(pfn); - if (page_zone(page) != zone) - continue; - - /* Check the block is suitable for migration */ - if (!suitable_migration_target(page)) - continue; - - /* - * Found a block suitable for isolating free pages from. Now - * we disabled interrupts, double check things are ok and - * isolate the pages. This is to minimise the time IRQs - * are disabled - */ - isolated = 0; - spin_lock_irqsave(&zone->lock, flags); - if (suitable_migration_target(page)) { - end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); - isolated = isolate_freepages_block(pfn, end_pfn, - freelist, false); - nr_freepages += isolated; - } - spin_unlock_irqrestore(&zone->lock, flags); - - /* - * Record the highest PFN we isolated pages from. When next - * looking for free pages, the search will restart here as - * page migration may have returned some pages to the allocator - */ - if (isolated) - high_pfn = max(high_pfn, pfn); - } - - /* split_free_page does not map the pages */ - map_pages(freelist); - - cc->free_pfn = high_pfn; - cc->nr_freepages = nr_freepages; + return ISOLATE_SUCCESS; } /* @@ -515,44 +449,6 @@ static void update_nr_listpages(struct compact_control *cc) cc->nr_freepages = nr_freepages; } -/* possible outcome of isolate_migratepages */ -typedef enum { - ISOLATE_ABORT, /* Abort compaction now */ - ISOLATE_NONE, /* No pages isolated, continue scanning */ - ISOLATE_SUCCESS, /* Pages isolated, migrate */ -} isolate_migrate_t; - -/* - * Isolate all pages that can be migrated from the block pointed to by - * the migrate scanner within compact_control. - */ -static isolate_migrate_t isolate_migratepages(struct zone *zone, - struct compact_control *cc) -{ - unsigned long low_pfn, end_pfn; - - /* Do not scan outside zone boundaries */ - low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); - - /* Only scan within a pageblock boundary */ - end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); - - /* Do not cross the free scanner or scan within a memory hole */ - if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { - cc->migrate_pfn = end_pfn; - return ISOLATE_NONE; - } - - /* Perform the isolation */ - low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn); - if (!low_pfn) - return ISOLATE_ABORT; - - cc->migrate_pfn = low_pfn; - - return ISOLATE_SUCCESS; -} - static int compact_finished(struct zone *zone, struct compact_control *cc) { @@ -899,5 +795,3 @@ void compaction_unregister_node(struct node *node) return device_remove_file(&node->dev, &dev_attr_compact); } #endif /* CONFIG_SYSFS && CONFIG_NUMA */ - -#endif /* CONFIG_COMPACTION */ diff --git a/trunk/mm/internal.h b/trunk/mm/internal.h index aee4761cf9a9..2189af491783 100644 --- a/trunk/mm/internal.h +++ b/trunk/mm/internal.h @@ -100,39 +100,6 @@ extern void prep_compound_page(struct page *page, unsigned long order); extern bool is_free_buddy_page(struct page *page); #endif -#if defined CONFIG_COMPACTION || defined CONFIG_CMA - -/* - * in mm/compaction.c - */ -/* - * compact_control is used to track pages being migrated and the free pages - * they are being migrated to during memory compaction. The free_pfn starts - * at the end of a zone and migrate_pfn begins at the start. Movable pages - * are moved to the end of a zone during a compaction run and the run - * completes when free_pfn <= migrate_pfn - */ -struct compact_control { - struct list_head freepages; /* List of free pages to migrate to */ - struct list_head migratepages; /* List of pages being migrated */ - unsigned long nr_freepages; /* Number of isolated free pages */ - unsigned long nr_migratepages; /* Number of pages to migrate */ - unsigned long free_pfn; /* isolate_freepages search base */ - unsigned long migrate_pfn; /* isolate_migratepages search base */ - bool sync; /* Synchronous migration */ - - int order; /* order a direct compactor needs */ - int migratetype; /* MOVABLE, RECLAIMABLE etc */ - struct zone *zone; -}; - -unsigned long -isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn); -unsigned long -isolate_migratepages_range(struct zone *zone, struct compact_control *cc, - unsigned long low_pfn, unsigned long end_pfn); - -#endif /* * function for dealing with page's order in buddy system. diff --git a/trunk/mm/memory-failure.c b/trunk/mm/memory-failure.c index c99ad4e6b88c..97cc2733551a 100644 --- a/trunk/mm/memory-failure.c +++ b/trunk/mm/memory-failure.c @@ -1404,7 +1404,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) /* Not a free page */ ret = 1; } - unset_migratetype_isolate(p, MIGRATE_MOVABLE); + unset_migratetype_isolate(p); unlock_memory_hotplug(); return ret; } diff --git a/trunk/mm/memory_hotplug.c b/trunk/mm/memory_hotplug.c index fc898cb4fe8f..6629fafd6ce4 100644 --- a/trunk/mm/memory_hotplug.c +++ b/trunk/mm/memory_hotplug.c @@ -891,7 +891,7 @@ static int __ref offline_pages(unsigned long start_pfn, nr_pages = end_pfn - start_pfn; /* set above range as isolated */ - ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); + ret = start_isolate_page_range(start_pfn, end_pfn); if (ret) goto out; @@ -956,7 +956,7 @@ static int __ref offline_pages(unsigned long start_pfn, We cannot do rollback at this point. */ offline_isolated_pages(start_pfn, end_pfn); /* reset pagetype flags and makes migrate type to be MOVABLE */ - undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); + undo_isolate_page_range(start_pfn, end_pfn); /* removal success */ zone->present_pages -= offlined_pages; zone->zone_pgdat->node_present_pages -= offlined_pages; @@ -981,7 +981,7 @@ static int __ref offline_pages(unsigned long start_pfn, start_pfn, end_pfn); memory_notify(MEM_CANCEL_OFFLINE, &arg); /* pushback to free area */ - undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); + undo_isolate_page_range(start_pfn, end_pfn); out: unlock_memory_hotplug(); diff --git a/trunk/mm/page_alloc.c b/trunk/mm/page_alloc.c index ed85c02c1f6e..918330f71dba 100644 --- a/trunk/mm/page_alloc.c +++ b/trunk/mm/page_alloc.c @@ -57,7 +57,6 @@ #include #include #include -#include #include #include @@ -514,10 +513,10 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, * free pages of length of (1 << order) and marked with _mapcount -2. Page's * order is recorded in page_private(page) field. * So when we are allocating or freeing one, we can derive the state of the - * other. That is, if we allocate a small block, and both were - * free, the remainder of the region must be split into blocks. + * other. That is, if we allocate a small block, and both were + * free, the remainder of the region must be split into blocks. * If a block is freed, and its buddy is also free, then this - * triggers coalescing into a block of larger size. + * triggers coalescing into a block of larger size. * * -- wli */ @@ -750,24 +749,6 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order) __free_pages(page, order); } -#ifdef CONFIG_CMA -/* Free whole pageblock and set it's migration type to MIGRATE_CMA. */ -void __init init_cma_reserved_pageblock(struct page *page) -{ - unsigned i = pageblock_nr_pages; - struct page *p = page; - - do { - __ClearPageReserved(p); - set_page_count(p, 0); - } while (++p, --i); - - set_page_refcounted(page); - set_pageblock_migratetype(page, MIGRATE_CMA); - __free_pages(page, pageblock_order); - totalram_pages += pageblock_nr_pages; -} -#endif /* * The order of subdivision here is critical for the IO subsystem. @@ -893,17 +874,11 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, * This array describes the order lists are fallen back to when * the free lists for the desirable migrate type are depleted */ -static int fallbacks[MIGRATE_TYPES][4] = { - [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, - [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, -#ifdef CONFIG_CMA - [MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, - [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ -#else - [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, -#endif - [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ - [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ +static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { + [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, + [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, + [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, + [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ }; /* @@ -998,12 +973,12 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) /* Find the largest possible block of pages in the other list */ for (current_order = MAX_ORDER-1; current_order >= order; --current_order) { - for (i = 0;; i++) { + for (i = 0; i < MIGRATE_TYPES - 1; i++) { migratetype = fallbacks[start_migratetype][i]; /* MIGRATE_RESERVE handled later if necessary */ if (migratetype == MIGRATE_RESERVE) - break; + continue; area = &(zone->free_area[current_order]); if (list_empty(&area->free_list[migratetype])) @@ -1018,18 +993,11 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) * pages to the preferred allocation list. If falling * back for a reclaimable kernel allocation, be more * aggressive about taking ownership of free pages - * - * On the other hand, never change migration - * type of MIGRATE_CMA pageblocks nor move CMA - * pages on different free lists. We don't - * want unmovable pages to be allocated from - * MIGRATE_CMA areas. */ - if (!is_migrate_cma(migratetype) && - (unlikely(current_order >= pageblock_order / 2) || - start_migratetype == MIGRATE_RECLAIMABLE || - page_group_by_mobility_disabled)) { - int pages; + if (unlikely(current_order >= (pageblock_order >> 1)) || + start_migratetype == MIGRATE_RECLAIMABLE || + page_group_by_mobility_disabled) { + unsigned long pages; pages = move_freepages_block(zone, page, start_migratetype); @@ -1047,14 +1015,11 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) rmv_page_order(page); /* Take ownership for orders >= pageblock_order */ - if (current_order >= pageblock_order && - !is_migrate_cma(migratetype)) + if (current_order >= pageblock_order) change_pageblock_range(page, current_order, start_migratetype); - expand(zone, page, order, current_order, area, - is_migrate_cma(migratetype) - ? migratetype : start_migratetype); + expand(zone, page, order, current_order, area, migratetype); trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, migratetype); @@ -1096,17 +1061,17 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order, return page; } -/* +/* * Obtain a specified number of elements from the buddy allocator, all under * a single hold of the lock, for efficiency. Add them to the supplied list. * Returns the number of new pages which were placed at *list. */ -static int rmqueue_bulk(struct zone *zone, unsigned int order, +static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, int cold) { - int mt = migratetype, i; - + int i; + spin_lock(&zone->lock); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype); @@ -1126,12 +1091,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, list_add(&page->lru, list); else list_add_tail(&page->lru, list); - if (IS_ENABLED(CONFIG_CMA)) { - mt = get_pageblock_migratetype(page); - if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) - mt = migratetype; - } - set_page_private(page, mt); + set_page_private(page, migratetype); list = &page->lru; } __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); @@ -1411,12 +1371,8 @@ int split_free_page(struct page *page) if (order >= pageblock_order - 1) { struct page *endpage = page + (1 << order) - 1; - for (; page < endpage; page += pageblock_nr_pages) { - int mt = get_pageblock_migratetype(page); - if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt)) - set_pageblock_migratetype(page, - MIGRATE_MOVABLE); - } + for (; page < endpage; page += pageblock_nr_pages) + set_pageblock_migratetype(page, MIGRATE_MOVABLE); } return 1 << order; @@ -2130,13 +2086,16 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, } #endif /* CONFIG_COMPACTION */ -/* Perform direct synchronous page reclaim */ -static int -__perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, - nodemask_t *nodemask) +/* The really slow allocator path where we enter direct reclaim */ +static inline struct page * +__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist, enum zone_type high_zoneidx, + nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, + int migratetype, unsigned long *did_some_progress) { + struct page *page = NULL; struct reclaim_state reclaim_state; - int progress; + bool drained = false; cond_resched(); @@ -2147,7 +2106,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, reclaim_state.reclaimed_slab = 0; current->reclaim_state = &reclaim_state; - progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); + *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); current->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); @@ -2155,21 +2114,6 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, cond_resched(); - return progress; -} - -/* The really slow allocator path where we enter direct reclaim */ -static inline struct page * -__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, enum zone_type high_zoneidx, - nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, unsigned long *did_some_progress) -{ - struct page *page = NULL; - bool drained = false; - - *did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, - nodemask); if (unlikely(!(*did_some_progress))) return NULL; @@ -4357,7 +4301,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, init_waitqueue_head(&pgdat->kswapd_wait); pgdat->kswapd_max_order = 0; pgdat_page_cgroup_init(pgdat); - + for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, memmap_pages; @@ -5032,7 +4976,14 @@ static void setup_per_zone_lowmem_reserve(void) calculate_totalreserve_pages(); } -static void __setup_per_zone_wmarks(void) +/** + * setup_per_zone_wmarks - called when min_free_kbytes changes + * or when memory is hot-{added|removed} + * + * Ensures that the watermark[min,low,high] values for each zone are set + * correctly with respect to min_free_kbytes. + */ +void setup_per_zone_wmarks(void) { unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); unsigned long lowmem_pages = 0; @@ -5079,11 +5030,6 @@ static void __setup_per_zone_wmarks(void) zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); - - zone->watermark[WMARK_MIN] += cma_wmark_pages(zone); - zone->watermark[WMARK_LOW] += cma_wmark_pages(zone); - zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone); - setup_zone_migrate_reserve(zone); spin_unlock_irqrestore(&zone->lock, flags); } @@ -5092,20 +5038,6 @@ static void __setup_per_zone_wmarks(void) calculate_totalreserve_pages(); } -/** - * setup_per_zone_wmarks - called when min_free_kbytes changes - * or when memory is hot-{added|removed} - * - * Ensures that the watermark[min,low,high] values for each zone are set - * correctly with respect to min_free_kbytes. - */ -void setup_per_zone_wmarks(void) -{ - mutex_lock(&zonelists_mutex); - __setup_per_zone_wmarks(); - mutex_unlock(&zonelists_mutex); -} - /* * The inactive anon list should be small enough that the VM never has to * do too much work, but large enough that each inactive page has a chance @@ -5480,16 +5412,14 @@ static int __count_immobile_pages(struct zone *zone, struct page *page, int count) { unsigned long pfn, iter, found; - int mt; - /* * For avoiding noise data, lru_add_drain_all() should be called * If ZONE_MOVABLE, the zone never contains immobile pages */ if (zone_idx(zone) == ZONE_MOVABLE) return true; - mt = get_pageblock_migratetype(page); - if (mt == MIGRATE_MOVABLE || is_migrate_cma(mt)) + + if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE) return true; pfn = page_to_pfn(page); @@ -5606,7 +5536,7 @@ int set_migratetype_isolate(struct page *page) return ret; } -void unset_migratetype_isolate(struct page *page, unsigned migratetype) +void unset_migratetype_isolate(struct page *page) { struct zone *zone; unsigned long flags; @@ -5614,259 +5544,12 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) spin_lock_irqsave(&zone->lock, flags); if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) goto out; - set_pageblock_migratetype(page, migratetype); - move_freepages_block(zone, page, migratetype); + set_pageblock_migratetype(page, MIGRATE_MOVABLE); + move_freepages_block(zone, page, MIGRATE_MOVABLE); out: spin_unlock_irqrestore(&zone->lock, flags); } -#ifdef CONFIG_CMA - -static unsigned long pfn_max_align_down(unsigned long pfn) -{ - return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES, - pageblock_nr_pages) - 1); -} - -static unsigned long pfn_max_align_up(unsigned long pfn) -{ - return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES, - pageblock_nr_pages)); -} - -static struct page * -__alloc_contig_migrate_alloc(struct page *page, unsigned long private, - int **resultp) -{ - return alloc_page(GFP_HIGHUSER_MOVABLE); -} - -/* [start, end) must belong to a single zone. */ -static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) -{ - /* This function is based on compact_zone() from compaction.c. */ - - unsigned long pfn = start; - unsigned int tries = 0; - int ret = 0; - - struct compact_control cc = { - .nr_migratepages = 0, - .order = -1, - .zone = page_zone(pfn_to_page(start)), - .sync = true, - }; - INIT_LIST_HEAD(&cc.migratepages); - - migrate_prep_local(); - - while (pfn < end || !list_empty(&cc.migratepages)) { - if (fatal_signal_pending(current)) { - ret = -EINTR; - break; - } - - if (list_empty(&cc.migratepages)) { - cc.nr_migratepages = 0; - pfn = isolate_migratepages_range(cc.zone, &cc, - pfn, end); - if (!pfn) { - ret = -EINTR; - break; - } - tries = 0; - } else if (++tries == 5) { - ret = ret < 0 ? ret : -EBUSY; - break; - } - - ret = migrate_pages(&cc.migratepages, - __alloc_contig_migrate_alloc, - 0, false, MIGRATE_SYNC); - } - - putback_lru_pages(&cc.migratepages); - return ret > 0 ? 0 : ret; -} - -/* - * Update zone's cma pages counter used for watermark level calculation. - */ -static inline void __update_cma_watermarks(struct zone *zone, int count) -{ - unsigned long flags; - spin_lock_irqsave(&zone->lock, flags); - zone->min_cma_pages += count; - spin_unlock_irqrestore(&zone->lock, flags); - setup_per_zone_wmarks(); -} - -/* - * Trigger memory pressure bump to reclaim some pages in order to be able to - * allocate 'count' pages in single page units. Does similar work as - *__alloc_pages_slowpath() function. - */ -static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count) -{ - enum zone_type high_zoneidx = gfp_zone(gfp_mask); - struct zonelist *zonelist = node_zonelist(0, gfp_mask); - int did_some_progress = 0; - int order = 1; - - /* - * Increase level of watermarks to force kswapd do his job - * to stabilise at new watermark level. - */ - __update_cma_watermarks(zone, count); - - /* Obey watermarks as if the page was being allocated */ - while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) { - wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone)); - - did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, - NULL); - if (!did_some_progress) { - /* Exhausted what can be done so it's blamo time */ - out_of_memory(zonelist, gfp_mask, order, NULL, false); - } - } - - /* Restore original watermark levels. */ - __update_cma_watermarks(zone, -count); - - return count; -} - -/** - * alloc_contig_range() -- tries to allocate given range of pages - * @start: start PFN to allocate - * @end: one-past-the-last PFN to allocate - * @migratetype: migratetype of the underlaying pageblocks (either - * #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks - * in range must have the same migratetype and it must - * be either of the two. - * - * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES - * aligned, however it's the caller's responsibility to guarantee that - * we are the only thread that changes migrate type of pageblocks the - * pages fall in. - * - * The PFN range must belong to a single zone. - * - * Returns zero on success or negative error code. On success all - * pages which PFN is in [start, end) are allocated for the caller and - * need to be freed with free_contig_range(). - */ -int alloc_contig_range(unsigned long start, unsigned long end, - unsigned migratetype) -{ - struct zone *zone = page_zone(pfn_to_page(start)); - unsigned long outer_start, outer_end; - int ret = 0, order; - - /* - * What we do here is we mark all pageblocks in range as - * MIGRATE_ISOLATE. Because pageblock and max order pages may - * have different sizes, and due to the way page allocator - * work, we align the range to biggest of the two pages so - * that page allocator won't try to merge buddies from - * different pageblocks and change MIGRATE_ISOLATE to some - * other migration type. - * - * Once the pageblocks are marked as MIGRATE_ISOLATE, we - * migrate the pages from an unaligned range (ie. pages that - * we are interested in). This will put all the pages in - * range back to page allocator as MIGRATE_ISOLATE. - * - * When this is done, we take the pages in range from page - * allocator removing them from the buddy system. This way - * page allocator will never consider using them. - * - * This lets us mark the pageblocks back as - * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the - * aligned range but not in the unaligned, original range are - * put back to page allocator so that buddy can use them. - */ - - ret = start_isolate_page_range(pfn_max_align_down(start), - pfn_max_align_up(end), migratetype); - if (ret) - goto done; - - ret = __alloc_contig_migrate_range(start, end); - if (ret) - goto done; - - /* - * Pages from [start, end) are within a MAX_ORDER_NR_PAGES - * aligned blocks that are marked as MIGRATE_ISOLATE. What's - * more, all pages in [start, end) are free in page allocator. - * What we are going to do is to allocate all pages from - * [start, end) (that is remove them from page allocator). - * - * The only problem is that pages at the beginning and at the - * end of interesting range may be not aligned with pages that - * page allocator holds, ie. they can be part of higher order - * pages. Because of this, we reserve the bigger range and - * once this is done free the pages we are not interested in. - * - * We don't have to hold zone->lock here because the pages are - * isolated thus they won't get removed from buddy. - */ - - lru_add_drain_all(); - drain_all_pages(); - - order = 0; - outer_start = start; - while (!PageBuddy(pfn_to_page(outer_start))) { - if (++order >= MAX_ORDER) { - ret = -EBUSY; - goto done; - } - outer_start &= ~0UL << order; - } - - /* Make sure the range is really isolated. */ - if (test_pages_isolated(outer_start, end)) { - pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", - outer_start, end); - ret = -EBUSY; - goto done; - } - - /* - * Reclaim enough pages to make sure that contiguous allocation - * will not starve the system. - */ - __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); - - /* Grab isolated pages from freelists. */ - outer_end = isolate_freepages_range(outer_start, end); - if (!outer_end) { - ret = -EBUSY; - goto done; - } - - /* Free head and tail (if any) */ - if (start != outer_start) - free_contig_range(outer_start, start - outer_start); - if (end != outer_end) - free_contig_range(end, outer_end - end); - -done: - undo_isolate_page_range(pfn_max_align_down(start), - pfn_max_align_up(end), migratetype); - return ret; -} - -void free_contig_range(unsigned long pfn, unsigned nr_pages) -{ - for (; nr_pages--; ++pfn) - __free_page(pfn_to_page(pfn)); -} -#endif - #ifdef CONFIG_MEMORY_HOTREMOVE /* * All pages in the range must be isolated before calling this. diff --git a/trunk/mm/page_isolation.c b/trunk/mm/page_isolation.c index c9f04774f2b8..4ae42bb40892 100644 --- a/trunk/mm/page_isolation.c +++ b/trunk/mm/page_isolation.c @@ -24,7 +24,6 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * to be MIGRATE_ISOLATE. * @start_pfn: The lower PFN of the range to be isolated. * @end_pfn: The upper PFN of the range to be isolated. - * @migratetype: migrate type to set in error recovery. * * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in * the range will never be allocated. Any free pages and pages freed in the @@ -33,8 +32,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * start_pfn/end_pfn must be aligned to pageblock_order. * Returns 0 on success and -EBUSY if any part of range cannot be isolated. */ -int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - unsigned migratetype) +int +start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; unsigned long undo_pfn; @@ -57,7 +56,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, for (pfn = start_pfn; pfn < undo_pfn; pfn += pageblock_nr_pages) - unset_migratetype_isolate(pfn_to_page(pfn), migratetype); + unset_migratetype_isolate(pfn_to_page(pfn)); return -EBUSY; } @@ -65,8 +64,8 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, /* * Make isolated pages available again. */ -int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - unsigned migratetype) +int +undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; struct page *page; @@ -78,7 +77,7 @@ int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, page = __first_valid_page(pfn, pageblock_nr_pages); if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) continue; - unset_migratetype_isolate(page, migratetype); + unset_migratetype_isolate(page); } return 0; } @@ -87,7 +86,7 @@ int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, * all pages in [start_pfn...end_pfn) must be in the same zone. * zone->lock must be held before call this. * - * Returns 1 if all pages in the range are isolated. + * Returns 1 if all pages in the range is isolated. */ static int __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn) diff --git a/trunk/mm/vmstat.c b/trunk/mm/vmstat.c index 0dad31dc1618..7db1b9bab492 100644 --- a/trunk/mm/vmstat.c +++ b/trunk/mm/vmstat.c @@ -613,9 +613,6 @@ static char * const migratetype_names[MIGRATE_TYPES] = { "Reclaimable", "Movable", "Reserve", -#ifdef CONFIG_CMA - "CMA", -#endif "Isolate", };