Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 252041
b: refs/heads/master
c: 889976d
h: refs/heads/master
i:
  252039: 9116d87
v: v3
  • Loading branch information
Ying Han authored and Linus Torvalds committed May 27, 2011
1 parent 0aa299f commit a985140
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 8 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 4e4c941c108eff10844d2b441d96dab44f32f424
refs/heads/master: 889976dbcb1218119fdd950fb7819084e37d7d37
1 change: 1 addition & 0 deletions trunk/include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
*/
int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg);
int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg);
int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
struct zone *zone,
enum lru_list lru);
Expand Down
102 changes: 96 additions & 6 deletions trunk/mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,11 @@ struct mem_cgroup {
* reclaimed from.
*/
int last_scanned_child;
int last_scanned_node;
#if MAX_NUMNODES > 1
nodemask_t scan_nodes;
unsigned long next_scan_node_update;
#endif
/*
* Should the accounting and control be hierarchical, per subtree?
*/
Expand Down Expand Up @@ -624,18 +629,27 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
preempt_enable();
}

static unsigned long
mem_cgroup_get_zonestat_node(struct mem_cgroup *mem, int nid, enum lru_list idx)
{
struct mem_cgroup_per_zone *mz;
u64 total = 0;
int zid;

for (zid = 0; zid < MAX_NR_ZONES; zid++) {
mz = mem_cgroup_zoneinfo(mem, nid, zid);
total += MEM_CGROUP_ZSTAT(mz, idx);
}
return total;
}
static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
enum lru_list idx)
{
int nid, zid;
struct mem_cgroup_per_zone *mz;
int nid;
u64 total = 0;

for_each_online_node(nid)
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
mz = mem_cgroup_zoneinfo(mem, nid, zid);
total += MEM_CGROUP_ZSTAT(mz, idx);
}
total += mem_cgroup_get_zonestat_node(mem, nid, idx);
return total;
}

Expand Down Expand Up @@ -1418,6 +1432,81 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
return ret;
}

#if MAX_NUMNODES > 1

/*
* Always updating the nodemask is not very good - even if we have an empty
* list or the wrong list here, we can start from some node and traverse all
* nodes based on the zonelist. So update the list loosely once per 10 secs.
*
*/
static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
{
int nid;

if (time_after(mem->next_scan_node_update, jiffies))
return;

mem->next_scan_node_update = jiffies + 10*HZ;
/* make a nodemask where this memcg uses memory from */
mem->scan_nodes = node_states[N_HIGH_MEMORY];

for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) {

if (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_FILE) ||
mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_FILE))
continue;

if (total_swap_pages &&
(mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_ANON) ||
mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_ANON)))
continue;
node_clear(nid, mem->scan_nodes);
}
}

/*
* Selecting a node where we start reclaim from. Because what we need is just
* reducing usage counter, start from anywhere is O,K. Considering
* memory reclaim from current node, there are pros. and cons.
*
* Freeing memory from current node means freeing memory from a node which
* we'll use or we've used. So, it may make LRU bad. And if several threads
* hit limits, it will see a contention on a node. But freeing from remote
* node means more costs for memory reclaim because of memory latency.
*
* Now, we use round-robin. Better algorithm is welcomed.
*/
int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
{
int node;

mem_cgroup_may_update_nodemask(mem);
node = mem->last_scanned_node;

node = next_node(node, mem->scan_nodes);
if (node == MAX_NUMNODES)
node = first_node(mem->scan_nodes);
/*
* We call this when we hit limit, not when pages are added to LRU.
* No LRU may hold pages because all pages are UNEVICTABLE or
* memcg is too small and all pages are not on LRU. In that case,
* we use curret node.
*/
if (unlikely(node == MAX_NUMNODES))
node = numa_node_id();

mem->last_scanned_node = node;
return node;
}

#else
int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
{
return 0;
}
#endif

/*
* Scan the hierarchy if needed to reclaim memory. We remember the last child
* we reclaimed from, so that we don't end up penalizing one child extensively
Expand Down Expand Up @@ -4606,6 +4695,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
res_counter_init(&mem->memsw, NULL);
}
mem->last_scanned_child = 0;
mem->last_scanned_node = MAX_NUMNODES;
INIT_LIST_HEAD(&mem->oom_notify);

if (parent)
Expand Down
10 changes: 9 additions & 1 deletion trunk/mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -2226,6 +2226,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
{
struct zonelist *zonelist;
unsigned long nr_reclaimed;
int nid;
struct scan_control sc = {
.may_writepage = !laptop_mode,
.may_unmap = 1,
Expand All @@ -2242,7 +2243,14 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
.gfp_mask = sc.gfp_mask,
};

zonelist = NODE_DATA(numa_node_id())->node_zonelists;
/*
* Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
* take care of from where we get pages. So the node where we start the
* scan does not need to be the current node.
*/
nid = mem_cgroup_select_victim_node(mem_cont);

zonelist = NODE_DATA(nid)->node_zonelists;

trace_mm_vmscan_memcg_reclaim_begin(0,
sc.may_writepage,
Expand Down

0 comments on commit a985140

Please sign in to comment.