Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 217462
b: refs/heads/master
c: 0e093d9
h: refs/heads/master
v: v3
  • Loading branch information
Mel Gorman authored and Linus Torvalds committed Oct 26, 2010
1 parent a7061d0 commit edcdb1c
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 13 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 08fc468f4eaf6683bae5bdb94743a09d8630cb80
refs/heads/master: 0e093d99763eb4cea09f8ca4f1d01f34e121d10b
2 changes: 1 addition & 1 deletion trunk/include/linux/backing-dev.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ enum {
void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
void set_bdi_congested(struct backing_dev_info *bdi, int sync);
long congestion_wait(int sync, long timeout);

long wait_iff_congested(struct zone *zone, int sync, long timeout);

static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
{
Expand Down
8 changes: 8 additions & 0 deletions trunk/include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,9 @@ struct zone {
typedef enum {
ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */
ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */
ZONE_CONGESTED, /* zone has many dirty pages backed by
* a congested BDI
*/
} zone_flags_t;

static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
Expand All @@ -440,6 +443,11 @@ static inline void zone_clear_flag(struct zone *zone, zone_flags_t flag)
clear_bit(flag, &zone->flags);
}

static inline int zone_is_reclaim_congested(const struct zone *zone)
{
return test_bit(ZONE_CONGESTED, &zone->flags);
}

static inline int zone_is_reclaim_locked(const struct zone *zone)
{
return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
Expand Down
7 changes: 7 additions & 0 deletions trunk/include/trace/events/writeback.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,13 @@ DEFINE_EVENT(writeback_congest_waited_template, writeback_congestion_wait,
TP_ARGS(usec_timeout, usec_delayed)
);

DEFINE_EVENT(writeback_congest_waited_template, writeback_wait_iff_congested,

TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed),

TP_ARGS(usec_timeout, usec_delayed)
);

#endif /* _TRACE_WRITEBACK_H */

/* This part must be outside protection */
Expand Down
61 changes: 59 additions & 2 deletions trunk/mm/backing-dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -729,14 +729,16 @@ static wait_queue_head_t congestion_wqh[2] = {
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
};
static atomic_t nr_bdi_congested[2];

void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
{
enum bdi_state bit;
wait_queue_head_t *wqh = &congestion_wqh[sync];

bit = sync ? BDI_sync_congested : BDI_async_congested;
clear_bit(bit, &bdi->state);
if (test_and_clear_bit(bit, &bdi->state))
atomic_dec(&nr_bdi_congested[sync]);
smp_mb__after_clear_bit();
if (waitqueue_active(wqh))
wake_up(wqh);
Expand All @@ -748,7 +750,8 @@ void set_bdi_congested(struct backing_dev_info *bdi, int sync)
enum bdi_state bit;

bit = sync ? BDI_sync_congested : BDI_async_congested;
set_bit(bit, &bdi->state);
if (!test_and_set_bit(bit, &bdi->state))
atomic_inc(&nr_bdi_congested[sync]);
}
EXPORT_SYMBOL(set_bdi_congested);

Expand Down Expand Up @@ -779,3 +782,57 @@ long congestion_wait(int sync, long timeout)
}
EXPORT_SYMBOL(congestion_wait);

/**
* wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a zone to complete writes
* @zone: A zone to check if it is heavily congested
* @sync: SYNC or ASYNC IO
* @timeout: timeout in jiffies
*
* In the event of a congested backing_dev (any backing_dev) and the given
* @zone has experienced recent congestion, this waits for up to @timeout
* jiffies for either a BDI to exit congestion of the given @sync queue
* or a write to complete.
*
* In the absense of zone congestion, cond_resched() is called to yield
* the processor if necessary but otherwise does not sleep.
*
* The return value is 0 if the sleep is for the full timeout. Otherwise,
* it is the number of jiffies that were still remaining when the function
* returned. return_value == timeout implies the function did not sleep.
*/
long wait_iff_congested(struct zone *zone, int sync, long timeout)
{
long ret;
unsigned long start = jiffies;
DEFINE_WAIT(wait);
wait_queue_head_t *wqh = &congestion_wqh[sync];

/*
* If there is no congestion, or heavy congestion is not being
* encountered in the current zone, yield if necessary instead
* of sleeping on the congestion queue
*/
if (atomic_read(&nr_bdi_congested[sync]) == 0 ||
!zone_is_reclaim_congested(zone)) {
cond_resched();

/* In case we scheduled, work out time remaining */
ret = timeout - (jiffies - start);
if (ret < 0)
ret = 0;

goto out;
}

/* Sleep until uncongested or a write happens */
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
ret = io_schedule_timeout(timeout);
finish_wait(wqh, &wait);

out:
trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
jiffies_to_usecs(jiffies - start));

return ret;
}
EXPORT_SYMBOL(wait_iff_congested);
4 changes: 2 additions & 2 deletions trunk/mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1907,7 +1907,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
preferred_zone, migratetype);

if (!page && gfp_mask & __GFP_NOFAIL)
congestion_wait(BLK_RW_ASYNC, HZ/50);
wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
} while (!page && (gfp_mask & __GFP_NOFAIL));

return page;
Expand Down Expand Up @@ -2095,7 +2095,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
pages_reclaimed += did_some_progress;
if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
/* Wait for some write requests to complete then retry */
congestion_wait(BLK_RW_ASYNC, HZ/50);
wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
goto rebalance;
}

Expand Down
42 changes: 35 additions & 7 deletions trunk/mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -401,10 +401,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
}
if (mapping->a_ops->writepage == NULL)
return PAGE_ACTIVATE;
if (!may_write_to_queue(mapping->backing_dev_info, sc)) {
disable_lumpy_reclaim_mode(sc);
if (!may_write_to_queue(mapping->backing_dev_info, sc))
return PAGE_KEEP;
}

if (clear_page_dirty_for_io(page)) {
int res;
Expand Down Expand Up @@ -681,11 +679,14 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages)
* shrink_page_list() returns the number of reclaimed pages
*/
static unsigned long shrink_page_list(struct list_head *page_list,
struct zone *zone,
struct scan_control *sc)
{
LIST_HEAD(ret_pages);
LIST_HEAD(free_pages);
int pgactivate = 0;
unsigned long nr_dirty = 0;
unsigned long nr_congested = 0;
unsigned long nr_reclaimed = 0;

cond_resched();
Expand All @@ -705,6 +706,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
goto keep;

VM_BUG_ON(PageActive(page));
VM_BUG_ON(page_zone(page) != zone);

sc->nr_scanned++;

Expand Down Expand Up @@ -782,6 +784,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
}

if (PageDirty(page)) {
nr_dirty++;

if (references == PAGEREF_RECLAIM_CLEAN)
goto keep_locked;
if (!may_enter_fs)
Expand All @@ -792,6 +796,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
/* Page is dirty, try to write it out here */
switch (pageout(page, mapping, sc)) {
case PAGE_KEEP:
nr_congested++;
goto keep_locked;
case PAGE_ACTIVATE:
goto activate_locked;
Expand Down Expand Up @@ -902,6 +907,15 @@ static unsigned long shrink_page_list(struct list_head *page_list,
VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
}

/*
* Tag a zone as congested if all the dirty pages encountered were
* backed by a congested BDI. In this case, reclaimers should just
* back off and wait for congestion to clear because further reclaim
* will encounter the same problem
*/
if (nr_dirty == nr_congested)
zone_set_flag(zone, ZONE_CONGESTED);

free_page_list(&free_pages);

list_splice(&ret_pages, page_list);
Expand Down Expand Up @@ -1386,12 +1400,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,

spin_unlock_irq(&zone->lru_lock);

nr_reclaimed = shrink_page_list(&page_list, sc);
nr_reclaimed = shrink_page_list(&page_list, zone, sc);

/* Check if we should syncronously wait for writeback */
if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
set_lumpy_reclaim_mode(priority, sc, true);
nr_reclaimed += shrink_page_list(&page_list, sc);
nr_reclaimed += shrink_page_list(&page_list, zone, sc);
}

local_irq_disable();
Expand Down Expand Up @@ -1982,8 +1996,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,

/* Take a nap, wait for some writeback to complete */
if (!sc->hibernation_mode && sc->nr_scanned &&
priority < DEF_PRIORITY - 2)
congestion_wait(BLK_RW_ASYNC, HZ/10);
priority < DEF_PRIORITY - 2) {
struct zone *preferred_zone;

first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask),
NULL, &preferred_zone);
wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10);
}
}

out:
Expand Down Expand Up @@ -2282,6 +2301,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
if (!zone_watermark_ok(zone, order,
min_wmark_pages(zone), end_zone, 0))
has_under_min_watermark_zone = 1;
} else {
/*
* If a zone reaches its high watermark,
* consider it to be no longer congested. It's
* possible there are dirty pages backed by
* congested BDIs but as pressure is relieved,
* spectulatively avoid congestion waits
*/
zone_clear_flag(zone, ZONE_CONGESTED);
}

}
Expand Down

0 comments on commit edcdb1c

Please sign in to comment.