Skip to content

Commit

Permalink
x86,percpu: generalize lpage first chunk allocator
Browse files Browse the repository at this point in the history
Generalize and move x86 setup_pcpu_lpage() into
pcpu_lpage_first_chunk().  setup_pcpu_lpage() now is a simple wrapper
around the generalized version.  Other than taking size parameters and
using arch supplied callbacks to allocate/free/map memory,
pcpu_lpage_first_chunk() is identical to the original implementation.

This simplifies arch code and will help converting more archs to
dynamic percpu allocator.

While at it, factor out pcpu_calc_fc_sizes() which is common to
pcpu_embed_first_chunk() and pcpu_lpage_first_chunk().

[ Impact: code reorganization and generalization ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Tejun Heo committed Jul 3, 2009
1 parent 8f05a6a commit 8c4bfc6
Show file tree
Hide file tree
Showing 5 changed files with 244 additions and 171 deletions.
9 changes: 0 additions & 9 deletions arch/x86/include/asm/percpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,6 @@ do { \
/* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off);

#ifdef CONFIG_NEED_MULTIPLE_NODES
void *pcpu_lpage_remapped(void *kaddr);
#else
static inline void *pcpu_lpage_remapped(void *kaddr)
{
return NULL;
}
#endif

#endif /* !__ASSEMBLY__ */

#ifdef CONFIG_SMP
Expand Down
169 changes: 11 additions & 158 deletions arch/x86/kernel/setup_percpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,44 +137,21 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
}

/*
* Large page remap allocator
*
* This allocator uses PMD page as unit. A PMD page is allocated for
* each cpu and each is remapped into vmalloc area using PMD mapping.
* As PMD page is quite large, only part of it is used for the first
* chunk. Unused part is returned to the bootmem allocator.
*
* So, the PMD pages are mapped twice - once to the physical mapping
* and to the vmalloc area for the first percpu chunk. The double
* mapping does add one more PMD TLB entry pressure but still is much
* better than only using 4k mappings while still being NUMA friendly.
* Large page remapping allocator
*/
#ifdef CONFIG_NEED_MULTIPLE_NODES
struct pcpul_ent {
unsigned int cpu;
void *ptr;
};

static size_t pcpul_size;
static struct pcpul_ent *pcpul_map;
static struct vm_struct pcpul_vm;

static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
static void __init pcpul_map(void *ptr, size_t size, void *addr)
{
size_t off = (size_t)pageno << PAGE_SHIFT;
pmd_t *pmd, pmd_v;

if (off >= pcpul_size)
return NULL;

return virt_to_page(pcpul_map[cpu].ptr + off);
pmd = populate_extra_pmd((unsigned long)addr);
pmd_v = pfn_pmd(page_to_pfn(virt_to_page(ptr)), PAGE_KERNEL_LARGE);
set_pmd(pmd, pmd_v);
}

static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
{
size_t map_size, dyn_size;
unsigned int cpu;
int i, j;
ssize_t ret;
size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;

if (!chosen) {
size_t vm_size = VMALLOC_END - VMALLOC_START;
Expand All @@ -198,134 +175,10 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
return -EINVAL;
}

/*
* Currently supports only single page. Supporting multiple
* pages won't be too difficult if it ever becomes necessary.
*/
pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
PERCPU_DYNAMIC_RESERVE);
if (pcpul_size > PMD_SIZE) {
pr_warning("PERCPU: static data is larger than large page, "
"can't use large page\n");
return -EINVAL;
}
dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;

/* allocate pointer array and alloc large pages */
map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
pcpul_map = alloc_bootmem(map_size);

for_each_possible_cpu(cpu) {
pcpul_map[cpu].cpu = cpu;
pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE,
PMD_SIZE);
if (!pcpul_map[cpu].ptr) {
pr_warning("PERCPU: failed to allocate large page "
"for cpu%u\n", cpu);
goto enomem;
}

/*
* Only use pcpul_size bytes and give back the rest.
*
* Ingo: The 2MB up-rounding bootmem is needed to make
* sure the partial 2MB page is still fully RAM - it's
* not well-specified to have a PAT-incompatible area
* (unmapped RAM, device memory, etc.) in that hole.
*/
free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size),
PMD_SIZE - pcpul_size);

memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size);
}

/* allocate address and map */
pcpul_vm.flags = VM_ALLOC;
pcpul_vm.size = num_possible_cpus() * PMD_SIZE;
vm_area_register_early(&pcpul_vm, PMD_SIZE);

for_each_possible_cpu(cpu) {
pmd_t *pmd, pmd_v;

pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr +
cpu * PMD_SIZE);
pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)),
PAGE_KERNEL_LARGE);
set_pmd(pmd, pmd_v);
}

/* we're ready, commit */
pr_info("PERCPU: Remapped at %p with large pages, static data "
"%zu bytes\n", pcpul_vm.addr, static_size);

ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
PMD_SIZE, pcpul_vm.addr, NULL);

/* sort pcpul_map array for pcpu_lpage_remapped() */
for (i = 0; i < num_possible_cpus() - 1; i++)
for (j = i + 1; j < num_possible_cpus(); j++)
if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
struct pcpul_ent tmp = pcpul_map[i];
pcpul_map[i] = pcpul_map[j];
pcpul_map[j] = tmp;
}

return ret;

enomem:
for_each_possible_cpu(cpu)
if (pcpul_map[cpu].ptr)
free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size);
free_bootmem(__pa(pcpul_map), map_size);
return -ENOMEM;
}

/**
* pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
* @kaddr: the kernel address in question
*
* Determine whether @kaddr falls in the pcpul recycled area. This is
* used by pageattr to detect VM aliases and break up the pcpu PMD
* mapping such that the same physical page is not mapped under
* different attributes.
*
* The recycled area is always at the tail of a partially used PMD
* page.
*
* RETURNS:
* Address of corresponding remapped pcpu address if match is found;
* otherwise, NULL.
*/
void *pcpu_lpage_remapped(void *kaddr)
{
void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
int left = 0, right = num_possible_cpus() - 1;
int pos;

/* pcpul in use at all? */
if (!pcpul_map)
return NULL;

/* okay, perform binary search */
while (left <= right) {
pos = (left + right) / 2;

if (pcpul_map[pos].ptr < pmd_addr)
left = pos + 1;
else if (pcpul_map[pos].ptr > pmd_addr)
right = pos - 1;
else {
/* it shouldn't be in the area for the first chunk */
WARN_ON(offset < pcpul_size);

return pcpul_vm.addr +
pcpul_map[pos].cpu * PMD_SIZE + offset;
}
}

return NULL;
return pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
reserve - PERCPU_FIRST_CHUNK_RESERVE,
PMD_SIZE,
pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
}
#else
static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
Expand Down
1 change: 1 addition & 0 deletions arch/x86/mm/pageattr.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/pfn.h>
#include <linux/percpu.h>

#include <asm/e820.h>
#include <asm/processor.h>
Expand Down
27 changes: 27 additions & 0 deletions include/linux/percpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno);
typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);

extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
size_t static_size, size_t reserved_size,
Expand All @@ -79,6 +80,32 @@ extern ssize_t __init pcpu_4k_first_chunk(
pcpu_fc_free_fn_t free_fn,
pcpu_fc_populate_pte_fn_t populate_pte_fn);

#ifdef CONFIG_NEED_MULTIPLE_NODES
extern ssize_t __init pcpu_lpage_first_chunk(
size_t static_size, size_t reserved_size,
ssize_t dyn_size, size_t lpage_size,
pcpu_fc_alloc_fn_t alloc_fn,
pcpu_fc_free_fn_t free_fn,
pcpu_fc_map_fn_t map_fn);

extern void *pcpu_lpage_remapped(void *kaddr);
#else
static inline ssize_t __init pcpu_lpage_first_chunk(
size_t static_size, size_t reserved_size,
ssize_t dyn_size, size_t lpage_size,
pcpu_fc_alloc_fn_t alloc_fn,
pcpu_fc_free_fn_t free_fn,
pcpu_fc_map_fn_t map_fn)
{
return -EINVAL;
}

static inline void *pcpu_lpage_remapped(void *kaddr)
{
return NULL;
}
#endif

/*
* Use this to get to a cpu's version of the per-cpu object
* dynamically allocated. Non-atomic access to the current CPU's
Expand Down
Loading

0 comments on commit 8c4bfc6

Please sign in to comment.