Skip to content

Commit

Permalink
Merge branch 'for-4.4-fixes' of git://git.kernel.org/pub/scm/linux/ke…
Browse files Browse the repository at this point in the history
…rnel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "More change than I'd have liked at this stage.  The pids controller
  and the changes made to cgroup core to support it introduced and
  revealed several important issues.

   - Assigning membership to a newly created task and migrating it can
     race leading to incorrect accounting.  Oleg fixed it by widening
     threadgroup synchronization.  It looks like we'll be able to merge
     it with a different percpu rwsem which is used in fork path making
     things simpler and cheaper.

   - The recent change to extend cgroup membership to zombies (so that
     pid accounting can extend till the pid is actually released) missed
     pinning the underlying data structures leading to use-after-free.
     Fixed.

   - v2 hierarchy was calling subsystem callbacks with the wrong target
     cgroup_subsys_state based on the incorrect assumption that they
     share the same target.  pids is the first controller affected by
     this.  Subsys callbacks updated so that they can deal with
     multi-target migrations"

* 'for-4.4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup_pids: don't account for the root cgroup
  cgroup: fix handling of multi-destination migration from subtree_control enabling
  cgroup_freezer: simplify propagation of CGROUP_FROZEN clearing in freezer_attach()
  cgroup: pids: kill pids_fork(), simplify pids_can_fork() and pids_cancel_fork()
  cgroup: pids: fix race between cgroup_post_fork() and cgroup_migrate()
  cgroup: make css_set pin its css's to avoid use-afer-free
  cgroup: fix cftype->file_offset handling
  • Loading branch information
Linus Torvalds committed Dec 8, 2015
2 parents 633bb73 + 0b98f0c commit 5406812
Show file tree
Hide file tree
Showing 13 changed files with 202 additions and 183 deletions.
6 changes: 3 additions & 3 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1127,15 +1127,15 @@ void blkcg_exit_queue(struct request_queue *q)
* of the main cic data structures. For now we allow a task to change
* its cgroup only if it's the only owner of its ioc.
*/
static int blkcg_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static int blkcg_can_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
struct cgroup_subsys_state *dst_css;
struct io_context *ioc;
int ret = 0;

/* task_lock() is needed to avoid races with exit_io_context() */
cgroup_taskset_for_each(task, tset) {
cgroup_taskset_for_each(task, dst_css, tset) {
task_lock(task);
ioc = task->io_context;
if (ioc && atomic_read(&ioc->nr_tasks) > 1)
Expand Down
13 changes: 3 additions & 10 deletions include/linux/cgroup-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ enum {
*/
struct cgroup_file {
/* do not access any fields from outside cgroup core */
struct list_head node; /* anchored at css->files */
struct kernfs_node *kn;
};

Expand Down Expand Up @@ -134,9 +133,6 @@ struct cgroup_subsys_state {
*/
u64 serial_nr;

/* all cgroup_files associated with this css */
struct list_head files;

/* percpu_ref killing and RCU release */
struct rcu_head rcu_head;
struct work_struct destroy_work;
Expand Down Expand Up @@ -426,12 +422,9 @@ struct cgroup_subsys {
void (*css_reset)(struct cgroup_subsys_state *css);
void (*css_e_css_changed)(struct cgroup_subsys_state *css);

int (*can_attach)(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset);
void (*cancel_attach)(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset);
void (*attach)(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset);
int (*can_attach)(struct cgroup_taskset *tset);
void (*cancel_attach)(struct cgroup_taskset *tset);
void (*attach)(struct cgroup_taskset *tset);
int (*can_fork)(struct task_struct *task, void **priv_p);
void (*cancel_fork)(struct task_struct *task, void *priv);
void (*fork)(struct task_struct *task, void *priv);
Expand Down
47 changes: 23 additions & 24 deletions include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts);
void cgroup_file_notify(struct cgroup_file *cfile);

char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
Expand Down Expand Up @@ -119,8 +120,10 @@ struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state
struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *css);

struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp);
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp);

void css_task_iter_start(struct cgroup_subsys_state *css,
struct css_task_iter *it);
Expand Down Expand Up @@ -235,30 +238,39 @@ void css_task_iter_end(struct css_task_iter *it);
/**
* cgroup_taskset_for_each - iterate cgroup_taskset
* @task: the loop cursor
* @dst_css: the destination css
* @tset: taskset to iterate
*
* @tset may contain multiple tasks and they may belong to multiple
* processes. When there are multiple tasks in @tset, if a task of a
* process is in @tset, all tasks of the process are in @tset. Also, all
* are guaranteed to share the same source and destination csses.
* processes.
*
* On the v2 hierarchy, there may be tasks from multiple processes and they
* may not share the source or destination csses.
*
* On traditional hierarchies, when there are multiple tasks in @tset, if a
* task of a process is in @tset, all tasks of the process are in @tset.
* Also, all are guaranteed to share the same source and destination csses.
*
* Iteration is not in any specific order.
*/
#define cgroup_taskset_for_each(task, tset) \
for ((task) = cgroup_taskset_first((tset)); (task); \
(task) = cgroup_taskset_next((tset)))
#define cgroup_taskset_for_each(task, dst_css, tset) \
for ((task) = cgroup_taskset_first((tset), &(dst_css)); \
(task); \
(task) = cgroup_taskset_next((tset), &(dst_css)))

/**
* cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
* @leader: the loop cursor
* @dst_css: the destination css
* @tset: takset to iterate
*
* Iterate threadgroup leaders of @tset. For single-task migrations, @tset
* may not contain any.
*/
#define cgroup_taskset_for_each_leader(leader, tset) \
for ((leader) = cgroup_taskset_first((tset)); (leader); \
(leader) = cgroup_taskset_next((tset))) \
#define cgroup_taskset_for_each_leader(leader, dst_css, tset) \
for ((leader) = cgroup_taskset_first((tset), &(dst_css)); \
(leader); \
(leader) = cgroup_taskset_next((tset), &(dst_css))) \
if ((leader) != (leader)->group_leader) \
; \
else
Expand Down Expand Up @@ -516,19 +528,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
pr_cont_kernfs_path(cgrp->kn);
}

/**
* cgroup_file_notify - generate a file modified event for a cgroup_file
* @cfile: target cgroup_file
*
* @cfile must have been obtained by setting cftype->file_offset.
*/
static inline void cgroup_file_notify(struct cgroup_file *cfile)
{
/* might not have been created due to one of the CFTYPE selector flags */
if (cfile->kn)
kernfs_notify(cfile->kn);
}

#else /* !CONFIG_CGROUPS */

struct cgroup_subsys_state;
Expand Down
99 changes: 78 additions & 21 deletions kernel/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ static DEFINE_SPINLOCK(css_set_lock);
*/
static DEFINE_SPINLOCK(cgroup_idr_lock);

/*
* Protects cgroup_file->kn for !self csses. It synchronizes notifications
* against file removal/re-creation across css hiding.
*/
static DEFINE_SPINLOCK(cgroup_file_kn_lock);

/*
* Protects cgroup_subsys->release_agent_path. Modifying it also requires
* cgroup_mutex. Reading requires either cgroup_mutex or this spinlock.
Expand Down Expand Up @@ -754,9 +760,11 @@ static void put_css_set_locked(struct css_set *cset)
if (!atomic_dec_and_test(&cset->refcount))
return;

/* This css_set is dead. unlink it and release cgroup refcounts */
for_each_subsys(ss, ssid)
/* This css_set is dead. unlink it and release cgroup and css refs */
for_each_subsys(ss, ssid) {
list_del(&cset->e_cset_node[ssid]);
css_put(cset->subsys[ssid]);
}
hash_del(&cset->hlist);
css_set_count--;

Expand Down Expand Up @@ -1056,9 +1064,13 @@ static struct css_set *find_css_set(struct css_set *old_cset,
key = css_set_hash(cset->subsys);
hash_add(css_set_table, &cset->hlist, key);

for_each_subsys(ss, ssid)
for_each_subsys(ss, ssid) {
struct cgroup_subsys_state *css = cset->subsys[ssid];

list_add_tail(&cset->e_cset_node[ssid],
&cset->subsys[ssid]->cgroup->e_csets[ssid]);
&css->cgroup->e_csets[ssid]);
css_get(css);
}

spin_unlock_bh(&css_set_lock);

Expand Down Expand Up @@ -1393,6 +1405,16 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
char name[CGROUP_FILE_NAME_MAX];

lockdep_assert_held(&cgroup_mutex);

if (cft->file_offset) {
struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
struct cgroup_file *cfile = (void *)css + cft->file_offset;

spin_lock_irq(&cgroup_file_kn_lock);
cfile->kn = NULL;
spin_unlock_irq(&cgroup_file_kn_lock);
}

kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
}

Expand Down Expand Up @@ -1856,7 +1878,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)

INIT_LIST_HEAD(&cgrp->self.sibling);
INIT_LIST_HEAD(&cgrp->self.children);
INIT_LIST_HEAD(&cgrp->self.files);
INIT_LIST_HEAD(&cgrp->cset_links);
INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex);
Expand Down Expand Up @@ -2216,6 +2237,9 @@ struct cgroup_taskset {
struct list_head src_csets;
struct list_head dst_csets;

/* the subsys currently being processed */
int ssid;

/*
* Fields for cgroup_taskset_*() iteration.
*
Expand Down Expand Up @@ -2278,25 +2302,29 @@ static void cgroup_taskset_add(struct task_struct *task,
/**
* cgroup_taskset_first - reset taskset and return the first task
* @tset: taskset of interest
* @dst_cssp: output variable for the destination css
*
* @tset iteration is initialized and the first task is returned.
*/
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp)
{
tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
tset->cur_task = NULL;

return cgroup_taskset_next(tset);
return cgroup_taskset_next(tset, dst_cssp);
}

/**
* cgroup_taskset_next - iterate to the next task in taskset
* @tset: taskset of interest
* @dst_cssp: output variable for the destination css
*
* Return the next task in @tset. Iteration must have been initialized
* with cgroup_taskset_first().
*/
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp)
{
struct css_set *cset = tset->cur_cset;
struct task_struct *task = tset->cur_task;
Expand All @@ -2311,6 +2339,18 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
if (&task->cg_list != &cset->mg_tasks) {
tset->cur_cset = cset;
tset->cur_task = task;

/*
* This function may be called both before and
* after cgroup_taskset_migrate(). The two cases
* can be distinguished by looking at whether @cset
* has its ->mg_dst_cset set.
*/
if (cset->mg_dst_cset)
*dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
else
*dst_cssp = cset->subsys[tset->ssid];

return task;
}

Expand Down Expand Up @@ -2346,7 +2386,8 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
/* check that we can legitimately attach to the cgroup */
for_each_e_css(css, i, dst_cgrp) {
if (css->ss->can_attach) {
ret = css->ss->can_attach(css, tset);
tset->ssid = i;
ret = css->ss->can_attach(tset);
if (ret) {
failed_css = css;
goto out_cancel_attach;
Expand Down Expand Up @@ -2379,9 +2420,12 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
*/
tset->csets = &tset->dst_csets;

for_each_e_css(css, i, dst_cgrp)
if (css->ss->attach)
css->ss->attach(css, tset);
for_each_e_css(css, i, dst_cgrp) {
if (css->ss->attach) {
tset->ssid = i;
css->ss->attach(tset);
}
}

ret = 0;
goto out_release_tset;
Expand All @@ -2390,8 +2434,10 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
for_each_e_css(css, i, dst_cgrp) {
if (css == failed_css)
break;
if (css->ss->cancel_attach)
css->ss->cancel_attach(css, tset);
if (css->ss->cancel_attach) {
tset->ssid = i;
css->ss->cancel_attach(tset);
}
}
out_release_tset:
spin_lock_bh(&css_set_lock);
Expand Down Expand Up @@ -3313,9 +3359,9 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
if (cft->file_offset) {
struct cgroup_file *cfile = (void *)css + cft->file_offset;

kernfs_get(kn);
spin_lock_irq(&cgroup_file_kn_lock);
cfile->kn = kn;
list_add(&cfile->node, &css->files);
spin_unlock_irq(&cgroup_file_kn_lock);
}

return 0;
Expand Down Expand Up @@ -3552,6 +3598,22 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
return cgroup_add_cftypes(ss, cfts);
}

/**
* cgroup_file_notify - generate a file modified event for a cgroup_file
* @cfile: target cgroup_file
*
* @cfile must have been obtained by setting cftype->file_offset.
*/
void cgroup_file_notify(struct cgroup_file *cfile)
{
unsigned long flags;

spin_lock_irqsave(&cgroup_file_kn_lock, flags);
if (cfile->kn)
kernfs_notify(cfile->kn);
spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
}

/**
* cgroup_task_count - count the number of tasks in a cgroup.
* @cgrp: the cgroup in question
Expand Down Expand Up @@ -4613,13 +4675,9 @@ static void css_free_work_fn(struct work_struct *work)
container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
struct cgroup_file *cfile;

percpu_ref_exit(&css->refcnt);

list_for_each_entry(cfile, &css->files, node)
kernfs_put(cfile->kn);

if (ss) {
/* css free path */
int id = css->id;
Expand Down Expand Up @@ -4724,7 +4782,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
css->ss = ss;
INIT_LIST_HEAD(&css->sibling);
INIT_LIST_HEAD(&css->children);
INIT_LIST_HEAD(&css->files);
css->serial_nr = css_serial_nr_next++;

if (cgroup_parent(cgrp)) {
Expand Down
Loading

0 comments on commit 5406812

Please sign in to comment.