Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 139653
b: refs/heads/master
c: ec64f51
h: refs/heads/master
i:
  139651: f8febc9
v: v3
  • Loading branch information
KAMEZAWA Hiroyuki authored and Linus Torvalds committed Apr 3, 2009
1 parent 4c71949 commit e683106
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 20 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 38460b48d06440de46b34cb778bd6c4855030754
refs/heads/master: ec64f51545fffbc4cb968f0cea56341a4b07e85a
6 changes: 4 additions & 2 deletions trunk/Documentation/cgroups/cgroups.txt
Original file line number Diff line number Diff line change
Expand Up @@ -476,11 +476,13 @@ cgroup->parent is still valid. (Note - can also be called for a
newly-created cgroup if an error occurs after this subsystem's
create() method has been called for the new cgroup).

void pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
int pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);

Called before checking the reference count on each subsystem. This may
be useful for subsystems which have some extra references even if
there are not tasks in the cgroup.
there are not tasks in the cgroup. If pre_destroy() returns error code,
rmdir() will fail with it. From this behavior, pre_destroy() can be
called multiple times against a cgroup.

int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
struct task_struct *task)
Expand Down
6 changes: 5 additions & 1 deletion trunk/include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ enum {
CGRP_RELEASABLE,
/* Control Group requires release notifications to userspace */
CGRP_NOTIFY_ON_RELEASE,
/*
* A thread in rmdir() is wating for this cgroup.
*/
CGRP_WAIT_ON_RMDIR,
};

struct cgroup {
Expand Down Expand Up @@ -360,7 +364,7 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task);
struct cgroup_subsys {
struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss,
struct cgroup *cgrp);
void (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
int (*can_attach)(struct cgroup_subsys *ss,
struct cgroup *cgrp, struct task_struct *tsk);
Expand Down
81 changes: 67 additions & 14 deletions trunk/kernel/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -622,13 +622,18 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
* Call subsys's pre_destroy handler.
* This is called before css refcnt check.
*/
static void cgroup_call_pre_destroy(struct cgroup *cgrp)
static int cgroup_call_pre_destroy(struct cgroup *cgrp)
{
struct cgroup_subsys *ss;
int ret = 0;

for_each_subsys(cgrp->root, ss)
if (ss->pre_destroy)
ss->pre_destroy(ss, cgrp);
return;
if (ss->pre_destroy) {
ret = ss->pre_destroy(ss, cgrp);
if (ret)
break;
}
return ret;
}

static void free_cgroup_rcu(struct rcu_head *obj)
Expand Down Expand Up @@ -722,6 +727,22 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
remove_dir(dentry);
}

/*
* A queue for waiters to do rmdir() cgroup. A tasks will sleep when
* cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
* reference to css->refcnt. In general, this refcnt is expected to goes down
* to zero, soon.
*
* CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex;
*/
DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);

static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
{
if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
wake_up_all(&cgroup_rmdir_waitq);
}

static int rebind_subsystems(struct cgroupfs_root *root,
unsigned long final_bits)
{
Expand Down Expand Up @@ -1317,6 +1338,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
synchronize_rcu();
put_css_set(cg);

/*
* wake up rmdir() waiter. the rmdir should fail since the cgroup
* is no longer empty.
*/
cgroup_wakeup_rmdir_waiters(cgrp);
return 0;
}

Expand Down Expand Up @@ -2608,9 +2635,11 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
struct cgroup *cgrp = dentry->d_fsdata;
struct dentry *d;
struct cgroup *parent;
DEFINE_WAIT(wait);
int ret;

/* the vfs holds both inode->i_mutex already */

again:
mutex_lock(&cgroup_mutex);
if (atomic_read(&cgrp->count) != 0) {
mutex_unlock(&cgroup_mutex);
Expand All @@ -2626,17 +2655,39 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
* Call pre_destroy handlers of subsys. Notify subsystems
* that rmdir() request comes.
*/
cgroup_call_pre_destroy(cgrp);
ret = cgroup_call_pre_destroy(cgrp);
if (ret)
return ret;

mutex_lock(&cgroup_mutex);
parent = cgrp->parent;

if (atomic_read(&cgrp->count)
|| !list_empty(&cgrp->children)
|| !cgroup_clear_css_refs(cgrp)) {
if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
mutex_unlock(&cgroup_mutex);
return -EBUSY;
}
/*
* css_put/get is provided for subsys to grab refcnt to css. In typical
* case, subsystem has no reference after pre_destroy(). But, under
* hierarchy management, some *temporal* refcnt can be hold.
* To avoid returning -EBUSY to a user, waitqueue is used. If subsys
* is really busy, it should return -EBUSY at pre_destroy(). wake_up
* is called when css_put() is called and refcnt goes down to 0.
*/
set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);

if (!cgroup_clear_css_refs(cgrp)) {
mutex_unlock(&cgroup_mutex);
schedule();
finish_wait(&cgroup_rmdir_waitq, &wait);
clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
if (signal_pending(current))
return -EINTR;
goto again;
}
/* NO css_tryget() can success after here. */
finish_wait(&cgroup_rmdir_waitq, &wait);
clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);

spin_lock(&release_list_lock);
set_bit(CGRP_REMOVED, &cgrp->flags);
Expand Down Expand Up @@ -3194,10 +3245,12 @@ void __css_put(struct cgroup_subsys_state *css)
{
struct cgroup *cgrp = css->cgroup;
rcu_read_lock();
if ((atomic_dec_return(&css->refcnt) == 1) &&
notify_on_release(cgrp)) {
set_bit(CGRP_RELEASABLE, &cgrp->flags);
check_for_release(cgrp);
if (atomic_dec_return(&css->refcnt) == 1) {
if (notify_on_release(cgrp)) {
set_bit(CGRP_RELEASABLE, &cgrp->flags);
check_for_release(cgrp);
}
cgroup_wakeup_rmdir_waiters(cgrp);
}
rcu_read_unlock();
}
Expand Down
5 changes: 3 additions & 2 deletions trunk/mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -2272,11 +2272,12 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
return ERR_PTR(-ENOMEM);
}

static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
{
struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
mem_cgroup_force_empty(mem, false);

return mem_cgroup_force_empty(mem, false);
}

static void mem_cgroup_destroy(struct cgroup_subsys *ss,
Expand Down

0 comments on commit e683106

Please sign in to comment.