Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 230697
b: refs/heads/master
c: 88f5acf
h: refs/heads/master
i:
  230695: 21a61bc
v: v3
  • Loading branch information
Mel Gorman authored and Linus Torvalds committed Jan 14, 2011
1 parent f569630 commit 439b67a
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 48 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 43bb40c9e3aa51a3b038c9df2c9afb4d4685614d
refs/heads/master: 88f5acf88ae6a9778f6d25d0d5d7ec2d57764a97
10 changes: 3 additions & 7 deletions trunk/include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -458,12 +458,6 @@ static inline int zone_is_oom_locked(const struct zone *zone)
return test_bit(ZONE_OOM_LOCKED, &zone->flags);
}

#ifdef CONFIG_SMP
unsigned long zone_nr_free_pages(struct zone *zone);
#else
#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES)
#endif /* CONFIG_SMP */

/*
* The "priority" of VM scanning is how much of the queues we will scan in one
* go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
Expand Down Expand Up @@ -661,7 +655,9 @@ typedef struct pglist_data {
extern struct mutex zonelists_mutex;
void build_all_zonelists(void *data);
void wakeup_kswapd(struct zone *zone, int order);
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags);
bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags);
enum memmap_context {
MEMMAP_EARLY,
Expand Down
5 changes: 5 additions & 0 deletions trunk/include/linux/vmstat.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item);
extern void __dec_zone_state(struct zone *, enum zone_stat_item);

void refresh_cpu_vm_stats(int);
void reduce_pgdat_percpu_threshold(pg_data_t *pgdat);
void restore_pgdat_percpu_threshold(pg_data_t *pgdat);
#else /* CONFIG_SMP */

/*
Expand Down Expand Up @@ -298,6 +300,9 @@ static inline void __dec_zone_page_state(struct page *page,
#define dec_zone_page_state __dec_zone_page_state
#define mod_zone_page_state __mod_zone_page_state

static inline void reduce_pgdat_percpu_threshold(pg_data_t *pgdat) { }
static inline void restore_pgdat_percpu_threshold(pg_data_t *pgdat) { }

static inline void refresh_cpu_vm_stats(int cpu) { }
#endif

Expand Down
21 changes: 0 additions & 21 deletions trunk/mm/mmzone.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,24 +87,3 @@ int memmap_valid_within(unsigned long pfn,
return 1;
}
#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */

#ifdef CONFIG_SMP
/* Called when a more accurate view of NR_FREE_PAGES is needed */
unsigned long zone_nr_free_pages(struct zone *zone)
{
unsigned long nr_free_pages = zone_page_state(zone, NR_FREE_PAGES);

/*
* While kswapd is awake, it is considered the zone is under some
* memory pressure. Under pressure, there is a risk that
* per-cpu-counter-drift will allow the min watermark to be breached
* potentially causing a live-lock. While kswapd is awake and
* free pages are low, get a better estimate for free pages
*/
if (nr_free_pages < zone->percpu_drift_mark &&
!waitqueue_active(&zone->zone_pgdat->kswapd_wait))
return zone_page_state_snapshot(zone, NR_FREE_PAGES);

return nr_free_pages;
}
#endif /* CONFIG_SMP */
35 changes: 27 additions & 8 deletions trunk/mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1460,24 +1460,24 @@ static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
#endif /* CONFIG_FAIL_PAGE_ALLOC */

/*
* Return 1 if free pages are above 'mark'. This takes into account the order
* Return true if free pages are above 'mark'. This takes into account the order
* of the allocation.
*/
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags)
static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags, long free_pages)
{
/* free_pages my go negative - that's OK */
long min = mark;
long free_pages = zone_nr_free_pages(z) - (1 << order) + 1;
int o;

free_pages -= (1 << order) + 1;
if (alloc_flags & ALLOC_HIGH)
min -= min / 2;
if (alloc_flags & ALLOC_HARDER)
min -= min / 4;

if (free_pages <= min + z->lowmem_reserve[classzone_idx])
return 0;
return false;
for (o = 0; o < order; o++) {
/* At the next order, this order's pages become unavailable */
free_pages -= z->free_area[o].nr_free << o;
Expand All @@ -1486,9 +1486,28 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
min >>= 1;

if (free_pages <= min)
return 0;
return false;
}
return 1;
return true;
}

bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags)
{
return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
zone_page_state(z, NR_FREE_PAGES));
}

bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags)
{
long free_pages = zone_page_state(z, NR_FREE_PAGES);

if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);

return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
free_pages);
}

#ifdef CONFIG_NUMA
Expand Down Expand Up @@ -2442,7 +2461,7 @@ void show_free_areas(void)
" all_unreclaimable? %s"
"\n",
zone->name,
K(zone_nr_free_pages(zone)),
K(zone_page_state(zone, NR_FREE_PAGES)),
K(min_wmark_pages(zone)),
K(low_wmark_pages(zone)),
K(high_wmark_pages(zone)),
Expand Down
23 changes: 13 additions & 10 deletions trunk/mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -2143,7 +2143,7 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
if (zone->all_unreclaimable)
continue;

if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
0, 0))
return 1;
}
Expand Down Expand Up @@ -2230,7 +2230,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
shrink_active_list(SWAP_CLUSTER_MAX, zone,
&sc, priority, 0);

if (!zone_watermark_ok(zone, order,
if (!zone_watermark_ok_safe(zone, order,
high_wmark_pages(zone), 0, 0)) {
end_zone = i;
break;
Expand Down Expand Up @@ -2276,7 +2276,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
* We put equal pressure on every zone, unless one
* zone has way too many pages free already.
*/
if (!zone_watermark_ok(zone, order,
if (!zone_watermark_ok_safe(zone, order,
8*high_wmark_pages(zone), end_zone, 0))
shrink_zone(priority, zone, &sc);
reclaim_state->reclaimed_slab = 0;
Expand All @@ -2297,15 +2297,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
sc.may_writepage = 1;

if (!zone_watermark_ok(zone, order,
if (!zone_watermark_ok_safe(zone, order,
high_wmark_pages(zone), end_zone, 0)) {
all_zones_ok = 0;
/*
* We are still under min water mark. This
* means that we have a GFP_ATOMIC allocation
* failure risk. Hurry up!
*/
if (!zone_watermark_ok(zone, order,
if (!zone_watermark_ok_safe(zone, order,
min_wmark_pages(zone), end_zone, 0))
has_under_min_watermark_zone = 1;
} else {
Expand Down Expand Up @@ -2448,7 +2448,9 @@ static int kswapd(void *p)
*/
if (!sleeping_prematurely(pgdat, order, remaining)) {
trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
restore_pgdat_percpu_threshold(pgdat);
schedule();
reduce_pgdat_percpu_threshold(pgdat);
} else {
if (remaining)
count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
Expand Down Expand Up @@ -2487,16 +2489,17 @@ void wakeup_kswapd(struct zone *zone, int order)
if (!populated_zone(zone))
return;

pgdat = zone->zone_pgdat;
if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
return;
pgdat = zone->zone_pgdat;
if (pgdat->kswapd_max_order < order)
pgdat->kswapd_max_order = order;
trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
return;
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
return;

trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
wake_up_interruptible(&pgdat->kswapd_wait);
}

Expand Down
68 changes: 67 additions & 1 deletion trunk/mm/vmstat.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,30 @@ EXPORT_SYMBOL(vm_stat);

#ifdef CONFIG_SMP

static int calculate_pressure_threshold(struct zone *zone)
{
int threshold;
int watermark_distance;

/*
* As vmstats are not up to date, there is drift between the estimated
* and real values. For high thresholds and a high number of CPUs, it
* is possible for the min watermark to be breached while the estimated
* value looks fine. The pressure threshold is a reduced value such
* that even the maximum amount of drift will not accidentally breach
* the min watermark
*/
watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
threshold = max(1, (int)(watermark_distance / num_online_cpus()));

/*
* Maximum threshold is 125
*/
threshold = min(125, threshold);

return threshold;
}

static int calculate_threshold(struct zone *zone)
{
int threshold;
Expand Down Expand Up @@ -161,6 +185,48 @@ static void refresh_zone_stat_thresholds(void)
}
}

void reduce_pgdat_percpu_threshold(pg_data_t *pgdat)
{
struct zone *zone;
int cpu;
int threshold;
int i;

get_online_cpus();
for (i = 0; i < pgdat->nr_zones; i++) {
zone = &pgdat->node_zones[i];
if (!zone->percpu_drift_mark)
continue;

threshold = calculate_pressure_threshold(zone);
for_each_online_cpu(cpu)
per_cpu_ptr(zone->pageset, cpu)->stat_threshold
= threshold;
}
put_online_cpus();
}

void restore_pgdat_percpu_threshold(pg_data_t *pgdat)
{
struct zone *zone;
int cpu;
int threshold;
int i;

get_online_cpus();
for (i = 0; i < pgdat->nr_zones; i++) {
zone = &pgdat->node_zones[i];
if (!zone->percpu_drift_mark)
continue;

threshold = calculate_threshold(zone);
for_each_online_cpu(cpu)
per_cpu_ptr(zone->pageset, cpu)->stat_threshold
= threshold;
}
put_online_cpus();
}

/*
* For use when we know that interrupts are disabled.
*/
Expand Down Expand Up @@ -911,7 +977,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
"\n scanned %lu"
"\n spanned %lu"
"\n present %lu",
zone_nr_free_pages(zone),
zone_page_state(zone, NR_FREE_PAGES),
min_wmark_pages(zone),
low_wmark_pages(zone),
high_wmark_pages(zone),
Expand Down

0 comments on commit 439b67a

Please sign in to comment.