diff --git a/[refs] b/[refs] index 914785d4fb15..6ecef899b2f6 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 05f96ef1189ecbf2e8548056a0ca016e4f630cef +refs/heads/master: fa4adc614922c24601320e55bc5a1f837abad6e9 diff --git a/trunk/arch/arm/kernel/setup.c b/trunk/arch/arm/kernel/setup.c index 75f1764933ab..29efc9f82057 100644 --- a/trunk/arch/arm/kernel/setup.c +++ b/trunk/arch/arm/kernel/setup.c @@ -441,19 +441,16 @@ __early_param("initrd=", early_initrd); static void __init arm_add_memory(unsigned long start, unsigned long size) { - struct membank *bank; - /* * Ensure that start/size are aligned to a page boundary. * Size is appropriately rounded down, start is rounded up. */ size -= start & ~PAGE_MASK; - bank = &meminfo.bank[meminfo.nr_banks++]; - - bank->start = PAGE_ALIGN(start); - bank->size = size & PAGE_MASK; - bank->node = PHYS_TO_NID(start); + meminfo.bank[meminfo.nr_banks].start = PAGE_ALIGN(start); + meminfo.bank[meminfo.nr_banks].size = size & PAGE_MASK; + meminfo.bank[meminfo.nr_banks].node = PHYS_TO_NID(start); + meminfo.nr_banks += 1; } /* diff --git a/trunk/include/asm-arm/arch-aaec2000/memory.h b/trunk/include/asm-arm/arch-aaec2000/memory.h index 9eceb4148922..24b51cccde8f 100644 --- a/trunk/include/asm-arm/arch-aaec2000/memory.h +++ b/trunk/include/asm-arm/arch-aaec2000/memory.h @@ -17,6 +17,8 @@ #define __virt_to_bus(x) __virt_to_phys(x) #define __bus_to_virt(x) __phys_to_virt(x) +#ifdef CONFIG_DISCONTIGMEM + /* * The nodes are the followings: * @@ -25,6 +27,42 @@ * node 2: 0xf800.0000 - 0xfbff.ffff * node 3: 0xfc00.0000 - 0xffff.ffff */ -#define NODE_MEM_SIZE_BITS 26 + +/* + * Given a kernel address, find the home node of the underlying memory. + */ +#define KVADDR_TO_NID(addr) \ + (((unsigned long)(addr) - PAGE_OFFSET) >> NODE_MAX_MEM_SHIFT) + +/* + * Given a page frame number, convert it to a node id. + */ +#define PFN_TO_NID(pfn) \ + (((pfn) - PHYS_PFN_OFFSET) >> (NODE_MAX_MEM_SHIFT - PAGE_SHIFT)) + +/* + * Given a kaddr, ADDR_TO_MAPBASE finds the owning node of the memory + * and return the mem_map of that node. + */ +#define ADDR_TO_MAPBASE(kaddr) NODE_MEM_MAP(KVADDR_TO_NID(kaddr)) + +/* + * Given a page frame number, find the owning node of the memory + * and return the mem_map of that node. + */ +#define PFN_TO_MAPBASE(pfn) NODE_MEM_MAP(PFN_TO_NID(pfn)) + +/* + * Given a kaddr, LOCAL_MEM_MAP finds the owning node of the memory + * and returns the index corresponding to the appropriate page in the + * node's mem_map. + */ +#define LOCAL_MAP_NR(addr) \ + (((unsigned long)(addr) & (NODE_MAX_MEM_SIZE - 1)) >> PAGE_SHIFT) + +#define NODE_MAX_MEM_SHIFT 26 +#define NODE_MAX_MEM_SIZE (1 << NODE_MAX_MEM_SHIFT) + +#endif /* CONFIG_DISCONTIGMEM */ #endif /* __ASM_ARCH_MEMORY_H */ diff --git a/trunk/include/asm-arm/arch-clps711x/memory.h b/trunk/include/asm-arm/arch-clps711x/memory.h index 42768cc8bfb4..c6e8dcf674de 100644 --- a/trunk/include/asm-arm/arch-clps711x/memory.h +++ b/trunk/include/asm-arm/arch-clps711x/memory.h @@ -62,15 +62,7 @@ * memory bank. For those systems, simply undefine CONFIG_DISCONTIGMEM. */ -/* - * The PS7211 allows up to 256MB max per DRAM bank, but the EDB7211 - * uses only one of the two banks (bank #1). However, even within - * bank #1, memory is discontiguous. - * - * The EDB7211 has two 8MB DRAM areas with 8MB of empty space between - * them, so we use 24 for the node max shift to get 16MB node sizes. - */ - +#ifdef CONFIG_DISCONTIGMEM /* * Because of the wide memory address space between physical RAM banks on the * SA1100, it's much more convenient to use Linux's NUMA support to implement @@ -88,7 +80,48 @@ * node 2: 0xd0000000 - 0xd7ffffff * node 3: 0xd8000000 - 0xdfffffff */ -#define NODE_MEM_SIZE_BITS 24 + +/* + * Given a kernel address, find the home node of the underlying memory. + */ +#define KVADDR_TO_NID(addr) \ + (((unsigned long)(addr) - PAGE_OFFSET) >> NODE_MAX_MEM_SHIFT) + +/* + * Given a page frame number, convert it to a node id. + */ +#define PFN_TO_NID(pfn) \ + (((pfn) - PHYS_PFN_OFFSET) >> (NODE_MAX_MEM_SHIFT - PAGE_SHIFT)) + +/* + * Given a kaddr, ADDR_TO_MAPBASE finds the owning node of the memory + * and returns the mem_map of that node. + */ +#define ADDR_TO_MAPBASE(kaddr) \ + NODE_MEM_MAP(KVADDR_TO_NID((unsigned long)(kaddr))) + +#define PFN_TO_MAPBASE(pfn) NODE_MEM_MAP(PFN_TO_NID(pfn)) + +/* + * Given a kaddr, LOCAL_MAR_NR finds the owning node of the memory + * and returns the index corresponding to the appropriate page in the + * node's mem_map. + */ +#define LOCAL_MAP_NR(addr) \ + (((unsigned long)(addr) & (NODE_MAX_MEM_SIZE - 1)) >> PAGE_SHIFT) + +/* + * The PS7211 allows up to 256MB max per DRAM bank, but the EDB7211 + * uses only one of the two banks (bank #1). However, even within + * bank #1, memory is discontiguous. + * + * The EDB7211 has two 8MB DRAM areas with 8MB of empty space between + * them, so we use 24 for the node max shift to get 16MB node sizes. + */ +#define NODE_MAX_MEM_SHIFT 24 +#define NODE_MAX_MEM_SIZE (1<> (26 - PAGE_SHIFT)) #endif +/* + * Given a kaddr, ADDR_TO_MAPBASE finds the owning node of the memory + * and return the mem_map of that node. + */ +# define ADDR_TO_MAPBASE(kaddr) NODE_MEM_MAP(KVADDR_TO_NID(kaddr)) + +/* + * Given a page frame number, find the owning node of the memory + * and return the mem_map of that node. + */ +# define PFN_TO_MAPBASE(pfn) NODE_MEM_MAP(PFN_TO_NID(pfn)) + /* * Given a kaddr, LOCAL_MEM_MAP finds the owning node of the memory * and returns the index corresponding to the appropriate page in the diff --git a/trunk/include/asm-arm/arch-pxa/memory.h b/trunk/include/asm-arm/arch-pxa/memory.h index e17f9881faf0..eaf6d43939e9 100644 --- a/trunk/include/asm-arm/arch-pxa/memory.h +++ b/trunk/include/asm-arm/arch-pxa/memory.h @@ -27,6 +27,7 @@ #define __virt_to_bus(x) __virt_to_phys(x) #define __bus_to_virt(x) __phys_to_virt(x) +#ifdef CONFIG_DISCONTIGMEM /* * The nodes are matched with the physical SDRAM banks as follows: * @@ -34,9 +35,38 @@ * node 1: 0xa4000000-0xa7ffffff --> 0xc4000000-0xc7ffffff * node 2: 0xa8000000-0xabffffff --> 0xc8000000-0xcbffffff * node 3: 0xac000000-0xafffffff --> 0xcc000000-0xcfffffff - * - * This needs a node mem size of 26 bits. */ -#define NODE_MEM_SIZE_BITS 26 + +/* + * Given a kernel address, find the home node of the underlying memory. + */ +#define KVADDR_TO_NID(addr) (((unsigned long)(addr) - PAGE_OFFSET) >> 26) + +/* + * Given a page frame number, convert it to a node id. + */ +#define PFN_TO_NID(pfn) (((pfn) - PHYS_PFN_OFFSET) >> (26 - PAGE_SHIFT)) + +/* + * Given a kaddr, ADDR_TO_MAPBASE finds the owning node of the memory + * and returns the mem_map of that node. + */ +#define ADDR_TO_MAPBASE(kaddr) NODE_MEM_MAP(KVADDR_TO_NID(kaddr)) + +/* + * Given a page frame number, find the owning node of the memory + * and returns the mem_map of that node. + */ +#define PFN_TO_MAPBASE(pfn) NODE_MEM_MAP(PFN_TO_NID(pfn)) + +/* + * Given a kaddr, LOCAL_MEM_MAP finds the owning node of the memory + * and returns the index corresponding to the appropriate page in the + * node's mem_map. + */ +#define LOCAL_MAP_NR(addr) \ + (((unsigned long)(addr) & 0x03ffffff) >> PAGE_SHIFT) + +#endif #endif diff --git a/trunk/include/asm-arm/arch-sa1100/memory.h b/trunk/include/asm-arm/arch-sa1100/memory.h index 0e907fc6d42a..1ff172dc8e33 100644 --- a/trunk/include/asm-arm/arch-sa1100/memory.h +++ b/trunk/include/asm-arm/arch-sa1100/memory.h @@ -39,6 +39,7 @@ void sa1111_adjust_zones(int node, unsigned long *size, unsigned long *holes); #define __virt_to_bus(x) __virt_to_phys(x) #define __bus_to_virt(x) __phys_to_virt(x) +#ifdef CONFIG_DISCONTIGMEM /* * Because of the wide memory address space between physical RAM banks on the * SA1100, it's much convenient to use Linux's NUMA support to implement our @@ -56,7 +57,38 @@ void sa1111_adjust_zones(int node, unsigned long *size, unsigned long *holes); * node 2: 0xd0000000 - 0xd7ffffff * node 3: 0xd8000000 - 0xdfffffff */ -#define NODE_MEM_SIZE_BITS 27 + +/* + * Given a kernel address, find the home node of the underlying memory. + */ +#define KVADDR_TO_NID(addr) (((unsigned long)(addr) - PAGE_OFFSET) >> 27) + +/* + * Given a page frame number, convert it to a node id. + */ +#define PFN_TO_NID(pfn) (((pfn) - PHYS_PFN_OFFSET) >> (27 - PAGE_SHIFT)) + +/* + * Given a kaddr, ADDR_TO_MAPBASE finds the owning node of the memory + * and return the mem_map of that node. + */ +#define ADDR_TO_MAPBASE(kaddr) NODE_MEM_MAP(KVADDR_TO_NID(kaddr)) + +/* + * Given a page frame number, find the owning node of the memory + * and return the mem_map of that node. + */ +#define PFN_TO_MAPBASE(pfn) NODE_MEM_MAP(PFN_TO_NID(pfn)) + +/* + * Given a kaddr, LOCAL_MEM_MAP finds the owning node of the memory + * and returns the index corresponding to the appropriate page in the + * node's mem_map. + */ +#define LOCAL_MAP_NR(addr) \ + (((unsigned long)(addr) & 0x07ffffff) >> PAGE_SHIFT) + +#endif /* * Cache flushing area - SA1100 zero bank diff --git a/trunk/include/asm-arm/div64.h b/trunk/include/asm-arm/div64.h index 3682616804ca..37e0a96e8789 100644 --- a/trunk/include/asm-arm/div64.h +++ b/trunk/include/asm-arm/div64.h @@ -27,7 +27,7 @@ #define __xh "r1" #endif -#define do_div(n,base) \ +#define __do_div_asm(n, base) \ ({ \ register unsigned int __base asm("r4") = base; \ register unsigned long long __n asm("r0") = n; \ @@ -45,4 +45,182 @@ __rem; \ }) +#if __GNUC__ < 4 + +/* + * gcc versions earlier than 4.0 are simply too problematic for the + * optimized implementation below. First there is gcc PR 15089 that + * tend to trig on more complex constructs, spurious .global __udivsi3 + * are inserted even if none of those symbols are referenced in the + * generated code, and those gcc versions are not able to do constant + * propagation on long long values anyway. + */ +#define do_div(n, base) __do_div_asm(n, base) + +#elif __GNUC__ >= 4 + +#include + +/* + * If the divisor happens to be constant, we determine the appropriate + * inverse at compile time to turn the division into a few inline + * multiplications instead which is much faster. And yet only if compiling + * for ARMv4 or higher (we need umull/umlal) and if the gcc version is + * sufficiently recent to perform proper long long constant propagation. + * (It is unfortunate that gcc doesn't perform all this internally.) + */ +#define do_div(n, base) \ +({ \ + unsigned int __r, __b = (base); \ + if (!__builtin_constant_p(__b) || __b == 0 || \ + (__LINUX_ARM_ARCH__ < 4 && (__b & (__b - 1)) != 0)) { \ + /* non-constant divisor (or zero): slow path */ \ + __r = __do_div_asm(n, __b); \ + } else if ((__b & (__b - 1)) == 0) { \ + /* Trivial: __b is constant and a power of 2 */ \ + /* gcc does the right thing with this code. */ \ + __r = n; \ + __r &= (__b - 1); \ + n /= __b; \ + } else { \ + /* Multiply by inverse of __b: n/b = n*(p/b)/p */ \ + /* We rely on the fact that most of this code gets */ \ + /* optimized away at compile time due to constant */ \ + /* propagation and only a couple inline assembly */ \ + /* instructions should remain. Better avoid any */ \ + /* code construct that might prevent that. */ \ + unsigned long long __res, __x, __t, __m, __n = n; \ + unsigned int __c, __p, __z = 0; \ + /* preserve low part of n for reminder computation */ \ + __r = __n; \ + /* determine number of bits to represent __b */ \ + __p = 1 << __div64_fls(__b); \ + /* compute __m = ((__p << 64) + __b - 1) / __b */ \ + __m = (~0ULL / __b) * __p; \ + __m += (((~0ULL % __b + 1) * __p) + __b - 1) / __b; \ + /* compute __res = __m*(~0ULL/__b*__b-1)/(__p << 64) */ \ + __x = ~0ULL / __b * __b - 1; \ + __res = (__m & 0xffffffff) * (__x & 0xffffffff); \ + __res >>= 32; \ + __res += (__m & 0xffffffff) * (__x >> 32); \ + __t = __res; \ + __res += (__x & 0xffffffff) * (__m >> 32); \ + __t = (__res < __t) ? (1ULL << 32) : 0; \ + __res = (__res >> 32) + __t; \ + __res += (__m >> 32) * (__x >> 32); \ + __res /= __p; \ + /* Now sanitize and optimize what we've got. */ \ + if (~0ULL % (__b / (__b & -__b)) == 0) { \ + /* those cases can be simplified with: */ \ + __n /= (__b & -__b); \ + __m = ~0ULL / (__b / (__b & -__b)); \ + __p = 1; \ + __c = 1; \ + } else if (__res != __x / __b) { \ + /* We can't get away without a correction */ \ + /* to compensate for bit truncation errors. */ \ + /* To avoid it we'd need an additional bit */ \ + /* to represent __m which would overflow it. */ \ + /* Instead we do m=p/b and n/b=(n*m+m)/p. */ \ + __c = 1; \ + /* Compute __m = (__p << 64) / __b */ \ + __m = (~0ULL / __b) * __p; \ + __m += ((~0ULL % __b + 1) * __p) / __b; \ + } else { \ + /* Reduce __m/__p, and try to clear bit 31 */ \ + /* of __m when possible otherwise that'll */ \ + /* need extra overflow handling later. */ \ + unsigned int __bits = -(__m & -__m); \ + __bits |= __m >> 32; \ + __bits = (~__bits) << 1; \ + /* If __bits == 0 then setting bit 31 is */ \ + /* unavoidable. Simply apply the maximum */ \ + /* possible reduction in that case. */ \ + /* Otherwise the MSB of __bits indicates the */ \ + /* best reduction we should apply. */ \ + if (!__bits) { \ + __p /= (__m & -__m); \ + __m /= (__m & -__m); \ + } else { \ + __p >>= __div64_fls(__bits); \ + __m >>= __div64_fls(__bits); \ + } \ + /* No correction needed. */ \ + __c = 0; \ + } \ + /* Now we have a combination of 2 conditions: */ \ + /* 1) whether or not we need a correction (__c), and */ \ + /* 2) whether or not there might be an overflow in */ \ + /* the cross product (__m & ((1<<63) | (1<<31))) */ \ + /* Select the best insn combination to perform the */ \ + /* actual __m * __n / (__p << 64) operation. */ \ + if (!__c) { \ + asm ( "umull %Q0, %R0, %1, %Q2\n\t" \ + "mov %Q0, #0" \ + : "=&r" (__res) \ + : "r" (__m), "r" (__n) \ + : "cc" ); \ + } else if (!(__m & ((1ULL << 63) | (1ULL << 31)))) { \ + __res = __m; \ + asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t" \ + "mov %Q0, #0" \ + : "+r" (__res) \ + : "r" (__m), "r" (__n) \ + : "cc" ); \ + } else { \ + asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" \ + "cmn %Q0, %Q1\n\t" \ + "adcs %R0, %R0, %R1\n\t" \ + "adc %Q0, %3, #0" \ + : "=&r" (__res) \ + : "r" (__m), "r" (__n), "r" (__z) \ + : "cc" ); \ + } \ + if (!(__m & ((1ULL << 63) | (1ULL << 31)))) { \ + asm ( "umlal %R0, %Q0, %R1, %Q2\n\t" \ + "umlal %R0, %Q0, %Q1, %R2\n\t" \ + "mov %R0, #0\n\t" \ + "umlal %Q0, %R0, %R1, %R2" \ + : "+r" (__res) \ + : "r" (__m), "r" (__n) \ + : "cc" ); \ + } else { \ + asm ( "umlal %R0, %Q0, %R2, %Q3\n\t" \ + "umlal %R0, %1, %Q2, %R3\n\t" \ + "mov %R0, #0\n\t" \ + "adds %Q0, %1, %Q0\n\t" \ + "adc %R0, %R0, #0\n\t" \ + "umlal %Q0, %R0, %R2, %R3" \ + : "+r" (__res), "+r" (__z) \ + : "r" (__m), "r" (__n) \ + : "cc" ); \ + } \ + __res /= __p; \ + /* The reminder can be computed with 32-bit regs */ \ + /* only, and gcc is good at that. */ \ + { \ + unsigned int __res0 = __res; \ + unsigned int __b0 = __b; \ + __r -= __res0 * __b0; \ + } \ + /* BUG_ON(__r >= __b || __res * __b + __r != n); */ \ + n = __res; \ + } \ + __r; \ +}) + +/* our own fls implementation to make sure constant propagation is fine */ +#define __div64_fls(bits) \ +({ \ + unsigned int __left = (bits), __nr = 0; \ + if (__left & 0xffff0000) __nr += 16, __left >>= 16; \ + if (__left & 0x0000ff00) __nr += 8, __left >>= 8; \ + if (__left & 0x000000f0) __nr += 4, __left >>= 4; \ + if (__left & 0x0000000c) __nr += 2, __left >>= 2; \ + if (__left & 0x00000002) __nr += 1; \ + __nr; \ +}) + +#endif + #endif diff --git a/trunk/include/asm-arm/memory.h b/trunk/include/asm-arm/memory.h index d9bfb39adabf..91d536c215d7 100644 --- a/trunk/include/asm-arm/memory.h +++ b/trunk/include/asm-arm/memory.h @@ -215,7 +215,6 @@ static inline __deprecated void *bus_to_virt(unsigned long x) * virt_addr_valid(k) indicates whether a virtual address is valid */ #ifndef CONFIG_DISCONTIGMEM - #define ARCH_PFN_OFFSET PHYS_PFN_OFFSET #define pfn_valid(pfn) ((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr)) @@ -231,7 +230,6 @@ static inline __deprecated void *bus_to_virt(unsigned long x) * around in memory. */ #include - #define arch_pfn_to_nid(pfn) PFN_TO_NID(pfn) #define arch_local_page_offset(pfn, nid) LOCAL_MAP_NR((pfn) << PAGE_SHIFT) @@ -258,43 +256,6 @@ static inline __deprecated void *bus_to_virt(unsigned long x) */ #define PHYS_TO_NID(addr) PFN_TO_NID((addr) >> PAGE_SHIFT) -/* - * Given a kaddr, ADDR_TO_MAPBASE finds the owning node of the memory - * and returns the mem_map of that node. - */ -#define ADDR_TO_MAPBASE(kaddr) NODE_MEM_MAP(KVADDR_TO_NID(kaddr)) - -/* - * Given a page frame number, find the owning node of the memory - * and returns the mem_map of that node. - */ -#define PFN_TO_MAPBASE(pfn) NODE_MEM_MAP(PFN_TO_NID(pfn)) - -#ifdef NODE_MEM_SIZE_BITS -#define NODE_MEM_SIZE_MASK ((1 << NODE_MEM_SIZE_BITS) - 1) - -/* - * Given a kernel address, find the home node of the underlying memory. - */ -#define KVADDR_TO_NID(addr) \ - (((unsigned long)(addr) - PAGE_OFFSET) >> NODE_MEM_SIZE_BITS) - -/* - * Given a page frame number, convert it to a node id. - */ -#define PFN_TO_NID(pfn) \ - (((pfn) - PHYS_PFN_OFFSET) >> (NODE_MEM_SIZE_BITS - PAGE_SHIFT)) - -/* - * Given a kaddr, LOCAL_MEM_MAP finds the owning node of the memory - * and returns the index corresponding to the appropriate page in the - * node's mem_map. - */ -#define LOCAL_MAP_NR(addr) \ - (((unsigned long)(addr) & NODE_MEM_SIZE_MASK) >> PAGE_SHIFT) - -#endif /* NODE_MEM_SIZE_BITS */ - #endif /* !CONFIG_DISCONTIGMEM */ /*