Skip to content

Commit

Permalink
memcg: swappiness
Browse files Browse the repository at this point in the history
Currently, /proc/sys/vm/swappiness can change swappiness ratio for global
reclaim.  However, memcg reclaim doesn't have tuning parameter for itself.

In general, the optimal swappiness depend on workload.  (e.g.  hpc
workload need to low swappiness than the others.)

Then, per cgroup swappiness improve administrator tunability.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
KOSAKI Motohiro authored and Linus Torvalds committed Jan 8, 2009
1 parent 2733c06 commit a7885eb
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 11 deletions.
9 changes: 9 additions & 0 deletions Documentation/controllers/memory.txt
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,15 @@ will be charged as a new owner of it.
showing for better debug please see the code for meanings.


5.3 swappiness
Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.

Following cgroup's swapiness can't be changed.
- root cgroup (uses /proc/sys/vm/swappiness).
- a cgroup which uses hierarchy and it has child cgroup.
- a cgroup which uses hierarchy and not the root of hierarchy.


6. Hierarchy support

The memory controller supports a deep hierarchy and hierarchical accounting.
Expand Down
3 changes: 2 additions & 1 deletion include/linux/swap.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,8 @@ static inline void lru_cache_add_active_file(struct page *page)
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask);
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap);
gfp_t gfp_mask, bool noswap,
unsigned int swappiness);
extern int __isolate_lru_page(struct page *page, int mode, int file);
extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
Expand Down
78 changes: 71 additions & 7 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ struct mem_cgroup {
int obsolete;
atomic_t refcnt;

unsigned int swappiness;


unsigned int inactive_ratio;

/*
Expand Down Expand Up @@ -636,6 +639,22 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
return false;
}

static unsigned int get_swappiness(struct mem_cgroup *memcg)
{
struct cgroup *cgrp = memcg->css.cgroup;
unsigned int swappiness;

/* root ? */
if (cgrp->parent == NULL)
return vm_swappiness;

spin_lock(&memcg->reclaim_param_lock);
swappiness = memcg->swappiness;
spin_unlock(&memcg->reclaim_param_lock);

return swappiness;
}

/*
* Dance down the hierarchy if needed to reclaim memory. We remember the
* last child we reclaimed from, so that we don't end up penalizing
Expand All @@ -656,7 +675,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
* but there might be left over accounting, even after children
* have left.
*/
ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap);
ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,
get_swappiness(root_mem));
if (mem_cgroup_check_under_limit(root_mem))
return 0;
if (!root_mem->use_hierarchy)
Expand All @@ -672,7 +692,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
cgroup_unlock();
continue;
}
ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap);
ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
get_swappiness(next_mem));
if (mem_cgroup_check_under_limit(root_mem))
return 0;
cgroup_lock();
Expand Down Expand Up @@ -1400,7 +1421,8 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
rcu_read_unlock();

do {
progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true);
progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true,
get_swappiness(mem));
progress += mem_cgroup_check_under_limit(mem);
} while (!progress && --retry);

Expand Down Expand Up @@ -1468,7 +1490,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
break;

progress = try_to_free_mem_cgroup_pages(memcg,
GFP_KERNEL, false);
GFP_KERNEL,
false,
get_swappiness(memcg));
if (!progress) retry_count--;
}

Expand Down Expand Up @@ -1512,7 +1536,8 @@ int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
break;

oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, true);
try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, true,
get_swappiness(memcg));
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
if (curusage >= oldusage)
retry_count--;
Expand Down Expand Up @@ -1643,8 +1668,8 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
ret = -EINTR;
goto out;
}
progress = try_to_free_mem_cgroup_pages(mem,
GFP_KERNEL, false);
progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
false, get_swappiness(mem));
if (!progress) {
nr_retries--;
/* maybe some writeback is necessary */
Expand Down Expand Up @@ -1864,6 +1889,37 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
return 0;
}

static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft)
{
struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);

return get_swappiness(memcg);
}

static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
u64 val)
{
struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
struct mem_cgroup *parent;
if (val > 100)
return -EINVAL;

if (cgrp->parent == NULL)
return -EINVAL;

parent = mem_cgroup_from_cont(cgrp->parent);
/* If under hierarchy, only empty-root can set this value */
if ((parent->use_hierarchy) ||
(memcg->use_hierarchy && !list_empty(&cgrp->children)))
return -EINVAL;

spin_lock(&memcg->reclaim_param_lock);
memcg->swappiness = val;
spin_unlock(&memcg->reclaim_param_lock);

return 0;
}


static struct cftype mem_cgroup_files[] = {
{
Expand Down Expand Up @@ -1902,6 +1958,11 @@ static struct cftype mem_cgroup_files[] = {
.write_u64 = mem_cgroup_hierarchy_write,
.read_u64 = mem_cgroup_hierarchy_read,
},
{
.name = "swappiness",
.read_u64 = mem_cgroup_swappiness_read,
.write_u64 = mem_cgroup_swappiness_write,
},
};

#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
Expand Down Expand Up @@ -2093,6 +2154,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
mem->last_scanned_child = NULL;
spin_lock_init(&mem->reclaim_param_lock);

if (parent)
mem->swappiness = get_swappiness(parent);

return &mem->css;
free_out:
for_each_node_state(node, N_POSSIBLE)
Expand Down
7 changes: 4 additions & 3 deletions mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1707,14 +1707,15 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
#ifdef CONFIG_CGROUP_MEM_RES_CTLR

unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
gfp_t gfp_mask,
bool noswap)
gfp_t gfp_mask,
bool noswap,
unsigned int swappiness)
{
struct scan_control sc = {
.may_writepage = !laptop_mode,
.may_swap = 1,
.swap_cluster_max = SWAP_CLUSTER_MAX,
.swappiness = vm_swappiness,
.swappiness = swappiness,
.order = 0,
.mem_cgroup = mem_cont,
.isolate_pages = mem_cgroup_isolate_pages,
Expand Down

0 comments on commit a7885eb

Please sign in to comment.