Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 33752
b: refs/heads/master
c: df9ecab
h: refs/heads/master
v: v3
  • Loading branch information
Christoph Lameter authored and Linus Torvalds committed Sep 1, 2006
1 parent 2d9abd9 commit 5f455bd
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 21 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: a302eb4e4602d6444ae75a0e516fb2f2c62d6642
refs/heads/master: df9ecaba3f152d1ea79f2a5e0b87505e03f47590
1 change: 1 addition & 0 deletions trunk/include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ struct per_cpu_pages {
struct per_cpu_pageset {
struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */
#ifdef CONFIG_SMP
s8 stat_threshold;
s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
#endif
} ____cacheline_aligned_in_smp;
Expand Down
139 changes: 119 additions & 20 deletions trunk/mm/vmstat.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <linux/config.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/cpu.h>

void __get_zone_counts(unsigned long *active, unsigned long *inactive,
unsigned long *free, struct pglist_data *pgdat)
Expand Down Expand Up @@ -114,17 +115,72 @@ EXPORT_SYMBOL(vm_stat);

#ifdef CONFIG_SMP

#define STAT_THRESHOLD 32
static int calculate_threshold(struct zone *zone)
{
int threshold;
int mem; /* memory in 128 MB units */

/*
* The threshold scales with the number of processors and the amount
* of memory per zone. More memory means that we can defer updates for
* longer, more processors could lead to more contention.
* fls() is used to have a cheap way of logarithmic scaling.
*
* Some sample thresholds:
*
* Threshold Processors (fls) Zonesize fls(mem+1)
* ------------------------------------------------------------------
* 8 1 1 0.9-1 GB 4
* 16 2 2 0.9-1 GB 4
* 20 2 2 1-2 GB 5
* 24 2 2 2-4 GB 6
* 28 2 2 4-8 GB 7
* 32 2 2 8-16 GB 8
* 4 2 2 <128M 1
* 30 4 3 2-4 GB 5
* 48 4 3 8-16 GB 8
* 32 8 4 1-2 GB 4
* 32 8 4 0.9-1GB 4
* 10 16 5 <128M 1
* 40 16 5 900M 4
* 70 64 7 2-4 GB 5
* 84 64 7 4-8 GB 6
* 108 512 9 4-8 GB 6
* 125 1024 10 8-16 GB 8
* 125 1024 10 16-32 GB 9
*/

mem = zone->present_pages >> (27 - PAGE_SHIFT);

threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));

/*
* Maximum threshold is 125
*/
threshold = min(125, threshold);

return threshold;
}

/*
* Determine pointer to currently valid differential byte given a zone and
* the item number.
*
* Preemption must be off
* Refresh the thresholds for each zone.
*/
static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item)
static void refresh_zone_stat_thresholds(void)
{
return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item];
struct zone *zone;
int cpu;
int threshold;

for_each_zone(zone) {

if (!zone->present_pages)
continue;

threshold = calculate_threshold(zone);

for_each_online_cpu(cpu)
zone_pcp(zone, cpu)->stat_threshold = threshold;
}
}

/*
Expand All @@ -133,17 +189,16 @@ static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item)
void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
int delta)
{
s8 *p;
struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
s8 *p = pcp->vm_stat_diff + item;
long x;

p = diff_pointer(zone, item);
x = delta + *p;

if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) {
if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) {
zone_page_state_add(x, zone, item);
x = 0;
}

*p = x;
}
EXPORT_SYMBOL(__mod_zone_page_state);
Expand Down Expand Up @@ -172,10 +227,12 @@ EXPORT_SYMBOL(mod_zone_page_state);
* No overflow check is necessary and therefore the differential can be
* incremented or decremented in place which may allow the compilers to
* generate better code.
*
* The increment or decrement is known and therefore one boundary check can
* be omitted.
*
* NOTE: These functions are very performance sensitive. Change only
* with care.
*
* Some processors have inc/dec instructions that are atomic vs an interrupt.
* However, the code must first determine the differential location in a zone
* based on the processor number and then inc/dec the counter. There is no
Expand All @@ -185,13 +242,16 @@ EXPORT_SYMBOL(mod_zone_page_state);
*/
static void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
{
s8 *p = diff_pointer(zone, item);
struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
s8 *p = pcp->vm_stat_diff + item;

(*p)++;

if (unlikely(*p > STAT_THRESHOLD)) {
zone_page_state_add(*p + STAT_THRESHOLD / 2, zone, item);
*p = -STAT_THRESHOLD / 2;
if (unlikely(*p > pcp->stat_threshold)) {
int overstep = pcp->stat_threshold / 2;

zone_page_state_add(*p + overstep, zone, item);
*p = -overstep;
}
}

Expand All @@ -204,13 +264,16 @@ EXPORT_SYMBOL(__inc_zone_page_state);
void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
{
struct zone *zone = page_zone(page);
s8 *p = diff_pointer(zone, item);
struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
s8 *p = pcp->vm_stat_diff + item;

(*p)--;

if (unlikely(*p < -STAT_THRESHOLD)) {
zone_page_state_add(*p - STAT_THRESHOLD / 2, zone, item);
*p = STAT_THRESHOLD /2;
if (unlikely(*p < - pcp->stat_threshold)) {
int overstep = pcp->stat_threshold / 2;

zone_page_state_add(*p - overstep, zone, item);
*p = overstep;
}
}
EXPORT_SYMBOL(__dec_zone_page_state);
Expand Down Expand Up @@ -515,6 +578,10 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
pageset->pcp[j].high,
pageset->pcp[j].batch);
}
#ifdef CONFIG_SMP
seq_printf(m, "\n vm stats threshold: %d",
pageset->stat_threshold);
#endif
}
seq_printf(m,
"\n all_unreclaimable: %u"
Expand Down Expand Up @@ -603,3 +670,35 @@ struct seq_operations vmstat_op = {

#endif /* CONFIG_PROC_FS */

#ifdef CONFIG_SMP
/*
* Use the cpu notifier to insure that the thresholds are recalculated
* when necessary.
*/
static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_CANCELED:
case CPU_DEAD:
refresh_zone_stat_thresholds();
break;
default:
break;
}
return NOTIFY_OK;
}

static struct notifier_block __cpuinitdata vmstat_notifier =
{ &vmstat_cpuup_callback, NULL, 0 };

int __init setup_vmstat(void)
{
refresh_zone_stat_thresholds();
register_cpu_notifier(&vmstat_notifier);
return 0;
}
module_init(setup_vmstat)
#endif

0 comments on commit 5f455bd

Please sign in to comment.