Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 16830
b: refs/heads/master
c: 8ad4b1f
h: refs/heads/master
v: v3
  • Loading branch information
Rohit Seth authored and Linus Torvalds committed Jan 9, 2006
1 parent 2d5e18e commit 88cbca1
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 1 deletion.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 9d0243bca345d5ce25d3f4b74b7facb3a6df1232
refs/heads/master: 8ad4b1fb8205340dba16b63467bb23efc27264d6
17 changes: 17 additions & 0 deletions trunk/Documentation/sysctl/vm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,20 @@ This is used to force the Linux VM to keep a minimum number
of kilobytes free. The VM uses this number to compute a pages_min
value for each lowmem zone in the system. Each lowmem zone gets
a number of reserved free pages based proportionally on its size.

==============================================================

percpu_pagelist_fraction

This is the fraction of pages at most (high mark pcp->high) in each zone that
are allocated for each per cpu page list. The min value for this is 8. It
means that we don't allow more than 1/8th of pages in each zone to be
allocated in any single per_cpu_pagelist. This entry only changes the value
of hot per cpu pagelists. User can specify a number like 100 to allocate
1/100th of each zone to each per cpu page list.

The batch value of each per cpu pagelist is also updated as a result. It is
set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8)

The initial value is zero. Kernel does not use this value at boot time to set
the high water marks for each per cpu page list.
2 changes: 2 additions & 0 deletions trunk/include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,8 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *,
extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
void __user *, size_t *, loff_t *);
int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
void __user *, size_t *, loff_t *);

#include <linux/topology.h>
/* Returns the number of the current Node. */
Expand Down
1 change: 1 addition & 0 deletions trunk/include/linux/sysctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ enum
VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */
VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */
VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
};


Expand Down
12 changes: 12 additions & 0 deletions trunk/kernel/sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ extern int printk_ratelimit_jiffies;
extern int printk_ratelimit_burst;
extern int pid_max_min, pid_max_max;
extern int sysctl_drop_caches;
extern int percpu_pagelist_fraction;

#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
int unknown_nmi_panic;
Expand All @@ -79,6 +80,7 @@ extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
static int maxolduid = 65535;
static int minolduid;
static int min_percpu_pagelist_fract = 8;

static int ngroups_max = NGROUPS_MAX;

Expand Down Expand Up @@ -794,6 +796,16 @@ static ctl_table vm_table[] = {
.strategy = &sysctl_intvec,
.extra1 = &zero,
},
{
.ctl_name = VM_PERCPU_PAGELIST_FRACTION,
.procname = "percpu_pagelist_fraction",
.data = &percpu_pagelist_fraction,
.maxlen = sizeof(percpu_pagelist_fraction),
.mode = 0644,
.proc_handler = &percpu_pagelist_fraction_sysctl_handler,
.strategy = &sysctl_intvec,
.extra1 = &min_percpu_pagelist_fract,
},
#ifdef CONFIG_MMU
{
.ctl_name = VM_MAX_MAP_COUNT,
Expand Down
49 changes: 49 additions & 0 deletions trunk/mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ struct pglist_data *pgdat_list __read_mostly;
unsigned long totalram_pages __read_mostly;
unsigned long totalhigh_pages __read_mostly;
long nr_swap_pages;
int percpu_pagelist_fraction;

static void fastcall free_hot_cold_page(struct page *page, int cold);

Expand Down Expand Up @@ -1831,6 +1832,24 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
INIT_LIST_HEAD(&pcp->list);
}

/*
* setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
* to the value high for the pageset p.
*/

static void setup_pagelist_highmark(struct per_cpu_pageset *p,
unsigned long high)
{
struct per_cpu_pages *pcp;

pcp = &p->pcp[0]; /* hot list */
pcp->high = high;
pcp->batch = max(1UL, high/4);
if ((high/4) > (PAGE_SHIFT * 8))
pcp->batch = PAGE_SHIFT * 8;
}


#ifdef CONFIG_NUMA
/*
* Boot pageset table. One per cpu which is going to be used for all
Expand Down Expand Up @@ -1868,6 +1887,10 @@ static int __devinit process_zones(int cpu)
goto bad;

setup_pageset(zone->pageset[cpu], zone_batchsize(zone));

if (percpu_pagelist_fraction)
setup_pagelist_highmark(zone_pcp(zone, cpu),
(zone->present_pages / percpu_pagelist_fraction));
}

return 0;
Expand Down Expand Up @@ -2567,6 +2590,32 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
return 0;
}

/*
* percpu_pagelist_fraction - changes the pcp->high for each zone on each
* cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist
* can have before it gets flushed back to buddy allocator.
*/

int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
unsigned int cpu;
int ret;

ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
if (!write || (ret == -EINVAL))
return ret;
for_each_zone(zone) {
for_each_online_cpu(cpu) {
unsigned long high;
high = zone->present_pages / percpu_pagelist_fraction;
setup_pagelist_highmark(zone_pcp(zone, cpu), high);
}
}
return 0;
}

__initdata int hashdist = HASHDIST_DEFAULT;

#ifdef CONFIG_NUMA
Expand Down

0 comments on commit 88cbca1

Please sign in to comment.