Skip to content

Commit

Permalink
cgroup: Optimize single thread migration
Browse files Browse the repository at this point in the history
There are reports of users who use thread migrations between cgroups and
they report performance drop after d59cfc0 ("sched, cgroup: replace
signal_struct->group_rwsem with a global percpu_rwsem"). The effect is
pronounced on machines with more CPUs.

The migration is affected by forking noise happening in the background,
after the mentioned commit a migrating thread must wait for all
(forking) processes on the system, not only of its threadgroup.

There are several places that need to synchronize with migration:
	a) do_exit,
	b) de_thread,
	c) copy_process,
	d) cgroup_update_dfl_csses,
	e) parallel migration (cgroup_{proc,thread}s_write).

In the case of self-migrating thread, we relax the synchronization on
cgroup_threadgroup_rwsem to avoid the cost of waiting. d) and e) are
excluded with cgroup_mutex, c) does not matter in case of single thread
migration and the executing thread cannot exec(2) or exit(2) while it is
writing into cgroup.threads. In case of do_exit because of signal
delivery, we either exit before the migration or finish the migration
(of not yet PF_EXITING thread) and die afterwards.

This patch handles only the case of self-migration by writing "0" into
cgroup.threads. For simplicity, we always take cgroup_threadgroup_rwsem
with numeric PIDs.

This change improves migration dependent workload performance similar
to per-signal_struct state.

Signed-off-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
  • Loading branch information
Michal Koutný authored and Tejun Heo committed Oct 7, 2019
1 parent e7c7b1d commit 9a3284f
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 13 deletions.
5 changes: 3 additions & 2 deletions kernel/cgroup/cgroup-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,10 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup,

int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
bool threadgroup);
struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
bool *locked)
__acquires(&cgroup_threadgroup_rwsem);
void cgroup_procs_write_finish(struct task_struct *task)
void cgroup_procs_write_finish(struct task_struct *task, bool locked)
__releases(&cgroup_threadgroup_rwsem);

void cgroup_lock_and_drain_offline(struct cgroup *cgrp);
Expand Down
5 changes: 3 additions & 2 deletions kernel/cgroup/cgroup-v1.c
Original file line number Diff line number Diff line change
Expand Up @@ -495,12 +495,13 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
struct task_struct *task;
const struct cred *cred, *tcred;
ssize_t ret;
bool locked;

cgrp = cgroup_kn_lock_live(of->kn, false);
if (!cgrp)
return -ENODEV;

task = cgroup_procs_write_start(buf, threadgroup);
task = cgroup_procs_write_start(buf, threadgroup, &locked);
ret = PTR_ERR_OR_ZERO(task);
if (ret)
goto out_unlock;
Expand All @@ -522,7 +523,7 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
ret = cgroup_attach_task(cgrp, task, threadgroup);

out_finish:
cgroup_procs_write_finish(task);
cgroup_procs_write_finish(task, locked);
out_unlock:
cgroup_kn_unlock(of->kn);

Expand Down
39 changes: 30 additions & 9 deletions kernel/cgroup/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -2824,7 +2824,8 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
return ret;
}

struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
bool *locked)
__acquires(&cgroup_threadgroup_rwsem)
{
struct task_struct *tsk;
Expand All @@ -2833,7 +2834,21 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return ERR_PTR(-EINVAL);

percpu_down_write(&cgroup_threadgroup_rwsem);
/*
* If we migrate a single thread, we don't care about threadgroup
* stability. If the thread is `current`, it won't exit(2) under our
* hands or change PID through exec(2). We exclude
* cgroup_update_dfl_csses and other cgroup_{proc,thread}s_write
* callers by cgroup_mutex.
* Therefore, we can skip the global lock.
*/
lockdep_assert_held(&cgroup_mutex);
if (pid || threadgroup) {
percpu_down_write(&cgroup_threadgroup_rwsem);
*locked = true;
} else {
*locked = false;
}

rcu_read_lock();
if (pid) {
Expand Down Expand Up @@ -2864,13 +2879,16 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
goto out_unlock_rcu;

out_unlock_threadgroup:
percpu_up_write(&cgroup_threadgroup_rwsem);
if (*locked) {
percpu_up_write(&cgroup_threadgroup_rwsem);
*locked = false;
}
out_unlock_rcu:
rcu_read_unlock();
return tsk;
}

void cgroup_procs_write_finish(struct task_struct *task)
void cgroup_procs_write_finish(struct task_struct *task, bool locked)
__releases(&cgroup_threadgroup_rwsem)
{
struct cgroup_subsys *ss;
Expand All @@ -2879,7 +2897,8 @@ void cgroup_procs_write_finish(struct task_struct *task)
/* release reference from cgroup_procs_write_start() */
put_task_struct(task);

percpu_up_write(&cgroup_threadgroup_rwsem);
if (locked)
percpu_up_write(&cgroup_threadgroup_rwsem);
for_each_subsys(ss, ssid)
if (ss->post_attach)
ss->post_attach();
Expand Down Expand Up @@ -4754,12 +4773,13 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
struct cgroup *src_cgrp, *dst_cgrp;
struct task_struct *task;
ssize_t ret;
bool locked;

dst_cgrp = cgroup_kn_lock_live(of->kn, false);
if (!dst_cgrp)
return -ENODEV;

task = cgroup_procs_write_start(buf, true);
task = cgroup_procs_write_start(buf, true, &locked);
ret = PTR_ERR_OR_ZERO(task);
if (ret)
goto out_unlock;
Expand All @@ -4777,7 +4797,7 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
ret = cgroup_attach_task(dst_cgrp, task, true);

out_finish:
cgroup_procs_write_finish(task);
cgroup_procs_write_finish(task, locked);
out_unlock:
cgroup_kn_unlock(of->kn);

Expand All @@ -4795,14 +4815,15 @@ static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
struct cgroup *src_cgrp, *dst_cgrp;
struct task_struct *task;
ssize_t ret;
bool locked;

buf = strstrip(buf);

dst_cgrp = cgroup_kn_lock_live(of->kn, false);
if (!dst_cgrp)
return -ENODEV;

task = cgroup_procs_write_start(buf, false);
task = cgroup_procs_write_start(buf, false, &locked);
ret = PTR_ERR_OR_ZERO(task);
if (ret)
goto out_unlock;
Expand All @@ -4826,7 +4847,7 @@ static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
ret = cgroup_attach_task(dst_cgrp, task, false);

out_finish:
cgroup_procs_write_finish(task);
cgroup_procs_write_finish(task, locked);
out_unlock:
cgroup_kn_unlock(of->kn);

Expand Down

0 comments on commit 9a3284f

Please sign in to comment.