Skip to content

Commit

Permalink
memcg: use for_each_mem_cgroup
Browse files Browse the repository at this point in the history
In memory cgroup management, we sometimes have to walk through
subhierarchy of cgroup to gather informaiton, or lock something, etc.

Now, to do that, mem_cgroup_walk_tree() function is provided.  It calls
given callback function per cgroup found.  But the bad thing is that it
has to pass a fixed style function and argument, "void*" and it adds much
type casting to memcontrol.c.

To make the code clean, this patch replaces walk_tree() with

  for_each_mem_cgroup_tree(iter, root)

An iterator style call.  The good point is that iterator call doesn't have
to assume what kind of function is called under it.  A bad point is that
it may cause reference-count leak if a caller use "break" from the loop by
mistake.

I think the benefit is larger.  The modified code seems straigtforward and
easy to read because we don't have misterious callbacks and pointer cast.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
KAMEZAWA Hiroyuki authored and Linus Torvalds committed Oct 28, 2010
1 parent 32047e2 commit 7d74b06
Showing 1 changed file with 83 additions and 87 deletions.
170 changes: 83 additions & 87 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -660,40 +660,57 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
return mem;
}

/*
* Call callback function against all cgroup under hierarchy tree.
*/
static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
int (*func)(struct mem_cgroup *, void *))
/* The caller has to guarantee "mem" exists before calling this */
static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem)
{
int found, ret, nextid;
if (mem && css_tryget(&mem->css))
return mem;
return NULL;
}

static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
struct mem_cgroup *root,
bool cond)
{
int nextid = css_id(&iter->css) + 1;
int found;
int hierarchy_used;
struct cgroup_subsys_state *css;
struct mem_cgroup *mem;

if (!root->use_hierarchy)
return (*func)(root, data);
hierarchy_used = iter->use_hierarchy;

nextid = 1;
do {
ret = 0;
mem = NULL;
css_put(&iter->css);
if (!cond || !hierarchy_used)
return NULL;

do {
iter = NULL;
rcu_read_lock();
css = css_get_next(&mem_cgroup_subsys, nextid, &root->css,
&found);

css = css_get_next(&mem_cgroup_subsys, nextid,
&root->css, &found);
if (css && css_tryget(css))
mem = container_of(css, struct mem_cgroup, css);
iter = container_of(css, struct mem_cgroup, css);
rcu_read_unlock();

if (mem) {
ret = (*func)(mem, data);
css_put(&mem->css);
}
/* If css is NULL, no more cgroups will be found */
nextid = found + 1;
} while (!ret && css);
} while (css && !iter);

return ret;
return iter;
}
/*
* for_eacn_mem_cgroup_tree() for visiting all cgroup under tree. Please
* be careful that "break" loop is not allowed. We have reference count.
* Instead of that modify "cond" to be false and "continue" to exit the loop.
*/
#define for_each_mem_cgroup_tree_cond(iter, root, cond) \
for (iter = mem_cgroup_start_loop(root);\
iter != NULL;\
iter = mem_cgroup_get_next(iter, root, cond))

#define for_each_mem_cgroup_tree(iter, root) \
for_each_mem_cgroup_tree_cond(iter, root, true)


static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
{
Expand Down Expand Up @@ -1132,13 +1149,6 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem)
return false;
}

static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data)
{
int *val = data;
(*val)++;
return 0;
}

/**
* mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode.
* @memcg: The memory cgroup that went over limit
Expand Down Expand Up @@ -1213,7 +1223,10 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
static int mem_cgroup_count_children(struct mem_cgroup *mem)
{
int num = 0;
mem_cgroup_walk_tree(mem, &num, mem_cgroup_count_children_cb);
struct mem_cgroup *iter;

for_each_mem_cgroup_tree(iter, mem)
num++;
return num;
}

Expand Down Expand Up @@ -1362,49 +1375,39 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
return total;
}

static int mem_cgroup_oom_lock_cb(struct mem_cgroup *mem, void *data)
{
int *val = (int *)data;
int x;
/*
* Logically, we can stop scanning immediately when we find
* a memcg is already locked. But condidering unlock ops and
* creation/removal of memcg, scan-all is simple operation.
*/
x = atomic_inc_return(&mem->oom_lock);
*val = max(x, *val);
return 0;
}
/*
* Check OOM-Killer is already running under our hierarchy.
* If someone is running, return false.
*/
static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
{
int lock_count = 0;
int x, lock_count = 0;
struct mem_cgroup *iter;

mem_cgroup_walk_tree(mem, &lock_count, mem_cgroup_oom_lock_cb);
for_each_mem_cgroup_tree(iter, mem) {
x = atomic_inc_return(&iter->oom_lock);
lock_count = max(x, lock_count);
}

if (lock_count == 1)
return true;
return false;
}

static int mem_cgroup_oom_unlock_cb(struct mem_cgroup *mem, void *data)
static int mem_cgroup_oom_unlock(struct mem_cgroup *mem)
{
struct mem_cgroup *iter;

/*
* When a new child is created while the hierarchy is under oom,
* mem_cgroup_oom_lock() may not be called. We have to use
* atomic_add_unless() here.
*/
atomic_add_unless(&mem->oom_lock, -1, 0);
for_each_mem_cgroup_tree(iter, mem)
atomic_add_unless(&iter->oom_lock, -1, 0);
return 0;
}

static void mem_cgroup_oom_unlock(struct mem_cgroup *mem)
{
mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_unlock_cb);
}

static DEFINE_MUTEX(memcg_oom_mutex);
static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
Expand Down Expand Up @@ -3207,33 +3210,25 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
return retval;
}

struct mem_cgroup_idx_data {
s64 val;
enum mem_cgroup_stat_index idx;
};

static int
mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data)
static u64 mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem,
enum mem_cgroup_stat_index idx)
{
struct mem_cgroup_idx_data *d = data;
d->val += mem_cgroup_read_stat(mem, d->idx);
return 0;
}
struct mem_cgroup *iter;
s64 val = 0;

static void
mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem,
enum mem_cgroup_stat_index idx, s64 *val)
{
struct mem_cgroup_idx_data d;
d.idx = idx;
d.val = 0;
mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat);
*val = d.val;
/* each per cpu's value can be minus.Then, use s64 */
for_each_mem_cgroup_tree(iter, mem)
val += mem_cgroup_read_stat(iter, idx);

if (val < 0) /* race ? */
val = 0;
return val;
}

static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap)
{
u64 idx_val, val;
u64 val;

if (!mem_cgroup_is_root(mem)) {
if (!swap)
Expand All @@ -3242,16 +3237,12 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap)
return res_counter_read_u64(&mem->memsw, RES_USAGE);
}

mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE, &idx_val);
val = idx_val;
mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS, &idx_val);
val += idx_val;
val = mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE);
val += mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS);

if (swap) {
mem_cgroup_get_recursive_idx_stat(mem,
MEM_CGROUP_STAT_SWAPOUT, &idx_val);
val += idx_val;
}
if (swap)
val += mem_cgroup_get_recursive_idx_stat(mem,
MEM_CGROUP_STAT_SWAPOUT);

return val << PAGE_SHIFT;
}
Expand Down Expand Up @@ -3459,9 +3450,9 @@ struct {
};


static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
static void
mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
{
struct mcs_total_stat *s = data;
s64 val;

/* per cpu stat */
Expand Down Expand Up @@ -3491,13 +3482,15 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE;
val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE);
s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE;
return 0;
}

static void
mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
{
mem_cgroup_walk_tree(mem, s, mem_cgroup_get_local_stat);
struct mem_cgroup *iter;

for_each_mem_cgroup_tree(iter, mem)
mem_cgroup_get_local_stat(iter, s);
}

static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
Expand Down Expand Up @@ -3674,7 +3667,7 @@ static int compare_thresholds(const void *a, const void *b)
return _a->threshold - _b->threshold;
}

static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data)
static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem)
{
struct mem_cgroup_eventfd_list *ev;

Expand All @@ -3685,7 +3678,10 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data)

static void mem_cgroup_oom_notify(struct mem_cgroup *mem)
{
mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_notify_cb);
struct mem_cgroup *iter;

for_each_mem_cgroup_tree(iter, mem)
mem_cgroup_oom_notify_cb(iter);
}

static int mem_cgroup_usage_register_event(struct cgroup *cgrp,
Expand Down

0 comments on commit 7d74b06

Please sign in to comment.