Skip to content

Commit

Permalink
memcg: relocate charge moving from ->attach to ->post_attach
Browse files Browse the repository at this point in the history
Hello,

So, this ended up a lot simpler than I originally expected.  I tested
it lightly and it seems to work fine.  Petr, can you please test these
two patches w/o the lru drain drop patch and see whether the problem
is gone?

Thanks.
------ 8< ------
If charge moving is used, memcg performs relabeling of the affected
pages from its ->attach callback which is called under both
cgroup_threadgroup_rwsem and thus can't create new kthreads.  This is
fragile as various operations may depend on workqueues making forward
progress which relies on the ability to create new kthreads.

There's no reason to perform charge moving from ->attach which is deep
in the task migration path.  Move it to ->post_attach which is called
after the actual migration is finished and cgroup_threadgroup_rwsem is
dropped.

* move_charge_struct->mm is added and ->can_attach is now responsible
  for pinning and recording the target mm.  mem_cgroup_clear_mc() is
  updated accordingly.  This also simplifies mem_cgroup_move_task().

* mem_cgroup_move_task() is now called from ->post_attach instead of
  ->attach.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@kernel.org>
Debugged-and-tested-by: Petr Mladek <pmladek@suse.com>
Reported-by: Cyril Hrubis <chrubis@suse.cz>
Reported-by: Johannes Weiner <hannes@cmpxchg.org>
Fixes: 1ed1328 ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem")
Cc: <stable@vger.kernel.org> # 4.4+
  • Loading branch information
Tejun Heo committed Apr 25, 2016
1 parent 5cf1cac commit 264a0ae
Showing 1 changed file with 19 additions and 18 deletions.
37 changes: 19 additions & 18 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
/* "mc" and its members are protected by cgroup_mutex */
static struct move_charge_struct {
spinlock_t lock; /* for from, to */
struct mm_struct *mm;
struct mem_cgroup *from;
struct mem_cgroup *to;
unsigned long flags;
Expand Down Expand Up @@ -4667,6 +4668,8 @@ static void __mem_cgroup_clear_mc(void)

static void mem_cgroup_clear_mc(void)
{
struct mm_struct *mm = mc.mm;

/*
* we must clear moving_task before waking up waiters at the end of
* task migration.
Expand All @@ -4676,7 +4679,10 @@ static void mem_cgroup_clear_mc(void)
spin_lock(&mc.lock);
mc.from = NULL;
mc.to = NULL;
mc.mm = NULL;
spin_unlock(&mc.lock);

mmput(mm);
}

static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
Expand Down Expand Up @@ -4733,6 +4739,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
VM_BUG_ON(mc.moved_swap);

spin_lock(&mc.lock);
mc.mm = mm;
mc.from = from;
mc.to = memcg;
mc.flags = move_flags;
Expand All @@ -4742,8 +4749,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
ret = mem_cgroup_precharge_mc(mm);
if (ret)
mem_cgroup_clear_mc();
} else {
mmput(mm);
}
mmput(mm);
return ret;
}

Expand Down Expand Up @@ -4852,11 +4860,11 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
return ret;
}

static void mem_cgroup_move_charge(struct mm_struct *mm)
static void mem_cgroup_move_charge(void)
{
struct mm_walk mem_cgroup_move_charge_walk = {
.pmd_entry = mem_cgroup_move_charge_pte_range,
.mm = mm,
.mm = mc.mm,
};

lru_add_drain_all();
Expand All @@ -4868,7 +4876,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
atomic_inc(&mc.from->moving_account);
synchronize_rcu();
retry:
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) {
/*
* Someone who are holding the mmap_sem might be waiting in
* waitq. So we cancel all extra charges, wake up all waiters,
Expand All @@ -4885,23 +4893,16 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
* additional charge, the page walk just aborts.
*/
walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
up_read(&mm->mmap_sem);
up_read(&mc.mm->mmap_sem);
atomic_dec(&mc.from->moving_account);
}

static void mem_cgroup_move_task(struct cgroup_taskset *tset)
static void mem_cgroup_move_task(void)
{
struct cgroup_subsys_state *css;
struct task_struct *p = cgroup_taskset_first(tset, &css);
struct mm_struct *mm = get_task_mm(p);

if (mm) {
if (mc.to)
mem_cgroup_move_charge(mm);
mmput(mm);
}
if (mc.to)
if (mc.to) {
mem_cgroup_move_charge();
mem_cgroup_clear_mc();
}
}
#else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
Expand All @@ -4911,7 +4912,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
{
}
static void mem_cgroup_move_task(struct cgroup_taskset *tset)
static void mem_cgroup_move_task(void)
{
}
#endif
Expand Down Expand Up @@ -5195,7 +5196,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_reset = mem_cgroup_css_reset,
.can_attach = mem_cgroup_can_attach,
.cancel_attach = mem_cgroup_cancel_attach,
.attach = mem_cgroup_move_task,
.post_attach = mem_cgroup_move_task,
.bind = mem_cgroup_bind,
.dfl_cftypes = memory_files,
.legacy_cftypes = mem_cgroup_legacy_files,
Expand Down

0 comments on commit 264a0ae

Please sign in to comment.