Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 185017
b: refs/heads/master
c: 99dcc3e
h: refs/heads/master
i:
  185015: 81eabd4
v: v3
  • Loading branch information
Christoph Lameter authored and Tejun Heo committed Jan 5, 2010
1 parent 55c2b95 commit 6e9e3bb
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 152 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 5917dae83cb02dfe74c9167b79e86e6d65183fa3
refs/heads/master: 99dcc3e5a94ed491fbef402831d8c0bbb267f995
4 changes: 0 additions & 4 deletions trunk/include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1079,11 +1079,7 @@ extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
extern int after_bootmem;

#ifdef CONFIG_NUMA
extern void setup_per_cpu_pageset(void);
#else
static inline void setup_per_cpu_pageset(void) {}
#endif

extern void zone_pcp_update(struct zone *zone);

Expand Down
12 changes: 2 additions & 10 deletions trunk/include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,7 @@ struct per_cpu_pageset {
s8 stat_threshold;
s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
#endif
} ____cacheline_aligned_in_smp;

#ifdef CONFIG_NUMA
#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)])
#else
#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
#endif
};

#endif /* !__GENERATING_BOUNDS.H */

Expand Down Expand Up @@ -306,10 +300,8 @@ struct zone {
*/
unsigned long min_unmapped_pages;
unsigned long min_slab_pages;
struct per_cpu_pageset *pageset[NR_CPUS];
#else
struct per_cpu_pageset pageset[NR_CPUS];
#endif
struct per_cpu_pageset *pageset;
/*
* free areas of different sizes
*/
Expand Down
202 changes: 71 additions & 131 deletions trunk/mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1008,10 +1008,10 @@ static void drain_pages(unsigned int cpu)
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;

pset = zone_pcp(zone, cpu);
local_irq_save(flags);
pset = per_cpu_ptr(zone->pageset, cpu);

pcp = &pset->pcp;
local_irq_save(flags);
free_pcppages_bulk(zone, pcp->count, pcp);
pcp->count = 0;
local_irq_restore(flags);
Expand Down Expand Up @@ -1095,7 +1095,6 @@ static void free_hot_cold_page(struct page *page, int cold)
arch_free_page(page, 0);
kernel_map_pages(page, 1, 0);

pcp = &zone_pcp(zone, get_cpu())->pcp;
migratetype = get_pageblock_migratetype(page);
set_page_private(page, migratetype);
local_irq_save(flags);
Expand All @@ -1118,6 +1117,7 @@ static void free_hot_cold_page(struct page *page, int cold)
migratetype = MIGRATE_MOVABLE;
}

pcp = &this_cpu_ptr(zone->pageset)->pcp;
if (cold)
list_add_tail(&page->lru, &pcp->lists[migratetype]);
else
Expand All @@ -1130,7 +1130,6 @@ static void free_hot_cold_page(struct page *page, int cold)

out:
local_irq_restore(flags);
put_cpu();
}

void free_hot_page(struct page *page)
Expand Down Expand Up @@ -1180,17 +1179,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
unsigned long flags;
struct page *page;
int cold = !!(gfp_flags & __GFP_COLD);
int cpu;

again:
cpu = get_cpu();
if (likely(order == 0)) {
struct per_cpu_pages *pcp;
struct list_head *list;

pcp = &zone_pcp(zone, cpu)->pcp;
list = &pcp->lists[migratetype];
local_irq_save(flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
pcp->batch, list,
Expand Down Expand Up @@ -1231,7 +1228,6 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone);
local_irq_restore(flags);
put_cpu();

VM_BUG_ON(bad_range(zone, page));
if (prep_new_page(page, order, gfp_flags))
Expand All @@ -1240,7 +1236,6 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,

failed:
local_irq_restore(flags);
put_cpu();
return NULL;
}

Expand Down Expand Up @@ -2179,7 +2174,7 @@ void show_free_areas(void)
for_each_online_cpu(cpu) {
struct per_cpu_pageset *pageset;

pageset = zone_pcp(zone, cpu);
pageset = per_cpu_ptr(zone->pageset, cpu);

printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
cpu, pageset->pcp.high,
Expand Down Expand Up @@ -2744,10 +2739,29 @@ static void build_zonelist_cache(pg_data_t *pgdat)

#endif /* CONFIG_NUMA */

/*
* Boot pageset table. One per cpu which is going to be used for all
* zones and all nodes. The parameters will be set in such a way
* that an item put on a list will immediately be handed over to
* the buddy list. This is safe since pageset manipulation is done
* with interrupts disabled.
*
* The boot_pagesets must be kept even after bootup is complete for
* unused processors and/or zones. They do play a role for bootstrapping
* hotplugged processors.
*
* zoneinfo_show() and maybe other functions do
* not check if the processor is online before following the pageset pointer.
* Other parts of the kernel may not check if the zone is available.
*/
static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);

/* return values int ....just for stop_machine() */
static int __build_all_zonelists(void *dummy)
{
int nid;
int cpu;

#ifdef CONFIG_NUMA
memset(node_load, 0, sizeof(node_load));
Expand All @@ -2758,6 +2772,23 @@ static int __build_all_zonelists(void *dummy)
build_zonelists(pgdat);
build_zonelist_cache(pgdat);
}

/*
* Initialize the boot_pagesets that are going to be used
* for bootstrapping processors. The real pagesets for
* each zone will be allocated later when the per cpu
* allocator is available.
*
* boot_pagesets are used also for bootstrapping offline
* cpus if the system is already booted because the pagesets
* are needed to initialize allocators on a specific cpu too.
* F.e. the percpu allocator needs the page allocator which
* needs the percpu allocator in order to allocate its pagesets
* (a chicken-egg dilemma).
*/
for_each_possible_cpu(cpu)
setup_pageset(&per_cpu(boot_pageset, cpu), 0);

return 0;
}

Expand Down Expand Up @@ -3095,121 +3126,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
pcp->batch = PAGE_SHIFT * 8;
}


#ifdef CONFIG_NUMA
/*
* Boot pageset table. One per cpu which is going to be used for all
* zones and all nodes. The parameters will be set in such a way
* that an item put on a list will immediately be handed over to
* the buddy list. This is safe since pageset manipulation is done
* with interrupts disabled.
*
* Some NUMA counter updates may also be caught by the boot pagesets.
*
* The boot_pagesets must be kept even after bootup is complete for
* unused processors and/or zones. They do play a role for bootstrapping
* hotplugged processors.
*
* zoneinfo_show() and maybe other functions do
* not check if the processor is online before following the pageset pointer.
* Other parts of the kernel may not check if the zone is available.
*/
static struct per_cpu_pageset boot_pageset[NR_CPUS];

/*
* Dynamically allocate memory for the
* per cpu pageset array in struct zone.
* Allocate per cpu pagesets and initialize them.
* Before this call only boot pagesets were available.
* Boot pagesets will no longer be used by this processorr
* after setup_per_cpu_pageset().
*/
static int __cpuinit process_zones(int cpu)
void __init setup_per_cpu_pageset(void)
{
struct zone *zone, *dzone;
int node = cpu_to_node(cpu);

node_set_state(node, N_CPU); /* this node has a cpu */
struct zone *zone;
int cpu;

for_each_populated_zone(zone) {
zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
GFP_KERNEL, node);
if (!zone_pcp(zone, cpu))
goto bad;

setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));

if (percpu_pagelist_fraction)
setup_pagelist_highmark(zone_pcp(zone, cpu),
(zone->present_pages / percpu_pagelist_fraction));
}

return 0;
bad:
for_each_zone(dzone) {
if (!populated_zone(dzone))
continue;
if (dzone == zone)
break;
kfree(zone_pcp(dzone, cpu));
zone_pcp(dzone, cpu) = &boot_pageset[cpu];
}
return -ENOMEM;
}
zone->pageset = alloc_percpu(struct per_cpu_pageset);

static inline void free_zone_pagesets(int cpu)
{
struct zone *zone;

for_each_zone(zone) {
struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
for_each_possible_cpu(cpu) {
struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);

/* Free per_cpu_pageset if it is slab allocated */
if (pset != &boot_pageset[cpu])
kfree(pset);
zone_pcp(zone, cpu) = &boot_pageset[cpu];
}
}
setup_pageset(pcp, zone_batchsize(zone));

static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
int cpu = (long)hcpu;
int ret = NOTIFY_OK;

switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
if (process_zones(cpu))
ret = NOTIFY_BAD;
break;
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
free_zone_pagesets(cpu);
break;
default:
break;
if (percpu_pagelist_fraction)
setup_pagelist_highmark(pcp,
(zone->present_pages /
percpu_pagelist_fraction));
}
}
return ret;
}

static struct notifier_block __cpuinitdata pageset_notifier =
{ &pageset_cpuup_callback, NULL, 0 };

void __init setup_per_cpu_pageset(void)
{
int err;

/* Initialize per_cpu_pageset for cpu 0.
* A cpuup callback will do this for every cpu
* as it comes online
*/
err = process_zones(smp_processor_id());
BUG_ON(err);
register_cpu_notifier(&pageset_notifier);
}

#endif

static noinline __init_refok
int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
{
Expand Down Expand Up @@ -3263,7 +3206,7 @@ static int __zone_pcp_update(void *data)
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;

pset = zone_pcp(zone, cpu);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;

local_irq_save(flags);
Expand All @@ -3281,21 +3224,17 @@ void zone_pcp_update(struct zone *zone)

static __meminit void zone_pcp_init(struct zone *zone)
{
int cpu;
unsigned long batch = zone_batchsize(zone);
/*
* per cpu subsystem is not up at this point. The following code
* relies on the ability of the linker to provide the
* offset of a (static) per cpu variable into the per cpu area.
*/
zone->pageset = &boot_pageset;

for (cpu = 0; cpu < NR_CPUS; cpu++) {
#ifdef CONFIG_NUMA
/* Early boot. Slab allocator not functional yet */
zone_pcp(zone, cpu) = &boot_pageset[cpu];
setup_pageset(&boot_pageset[cpu],0);
#else
setup_pageset(zone_pcp(zone,cpu), batch);
#endif
}
if (zone->present_pages)
printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
zone->name, zone->present_pages, batch);
printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n",
zone->name, zone->present_pages,
zone_batchsize(zone));
}

__meminit int init_currently_empty_zone(struct zone *zone,
Expand Down Expand Up @@ -4809,10 +4748,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
if (!write || (ret == -EINVAL))
return ret;
for_each_populated_zone(zone) {
for_each_online_cpu(cpu) {
for_each_possible_cpu(cpu) {
unsigned long high;
high = zone->present_pages / percpu_pagelist_fraction;
setup_pagelist_highmark(zone_pcp(zone, cpu), high);
setup_pagelist_highmark(
per_cpu_ptr(zone->pageset, cpu), high);
}
}
return 0;
Expand Down
Loading

0 comments on commit 6e9e3bb

Please sign in to comment.