Skip to content

Commit

Permalink
Merge branch 'for-5.16-fixes' of git://git.kernel.org/pub/scm/linux/k…
Browse files Browse the repository at this point in the history
…ernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "This contains the cgroup.procs permission check fixes so that they use
  the credentials at the time of open rather than write, which also
  fixes the cgroup namespace lifetime bug"

* 'for-5.16-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  selftests: cgroup: Test open-time cgroup namespace usage for migration checks
  selftests: cgroup: Test open-time credential usage for migration checks
  selftests: cgroup: Make cg_create() use 0755 for permission instead of 0644
  cgroup: Use open-time cgroup namespace for process migration perm checks
  cgroup: Allocate cgroup_file_ctx for kernfs_open_file->priv
  cgroup: Use open-time credentials for process migraton perm checks
  • Loading branch information
Linus Torvalds committed Jan 7, 2022
2 parents 35632d9 + bf35a78 commit d1587f7
Show file tree
Hide file tree
Showing 5 changed files with 263 additions and 44 deletions.
19 changes: 19 additions & 0 deletions kernel/cgroup/cgroup-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,25 @@ static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc)
return container_of(kfc, struct cgroup_fs_context, kfc);
}

struct cgroup_pidlist;

struct cgroup_file_ctx {
struct cgroup_namespace *ns;

struct {
void *trigger;
} psi;

struct {
bool started;
struct css_task_iter iter;
} procs;

struct {
struct cgroup_pidlist *pidlist;
} procs1;
};

/*
* A cgroup can be associated with multiple css_sets as different tasks may
* belong to different cgroups on different hierarchies. In the other
Expand Down
33 changes: 18 additions & 15 deletions kernel/cgroup/cgroup-v1.c
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
* next pid to display, if any
*/
struct kernfs_open_file *of = s->private;
struct cgroup_file_ctx *ctx = of->priv;
struct cgroup *cgrp = seq_css(s)->cgroup;
struct cgroup_pidlist *l;
enum cgroup_filetype type = seq_cft(s)->private;
Expand All @@ -403,25 +404,24 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
mutex_lock(&cgrp->pidlist_mutex);

/*
* !NULL @of->priv indicates that this isn't the first start()
* after open. If the matching pidlist is around, we can use that.
* Look for it. Note that @of->priv can't be used directly. It
* could already have been destroyed.
* !NULL @ctx->procs1.pidlist indicates that this isn't the first
* start() after open. If the matching pidlist is around, we can use
* that. Look for it. Note that @ctx->procs1.pidlist can't be used
* directly. It could already have been destroyed.
*/
if (of->priv)
of->priv = cgroup_pidlist_find(cgrp, type);
if (ctx->procs1.pidlist)
ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type);

/*
* Either this is the first start() after open or the matching
* pidlist has been destroyed inbetween. Create a new one.
*/
if (!of->priv) {
ret = pidlist_array_load(cgrp, type,
(struct cgroup_pidlist **)&of->priv);
if (!ctx->procs1.pidlist) {
ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist);
if (ret)
return ERR_PTR(ret);
}
l = of->priv;
l = ctx->procs1.pidlist;

if (pid) {
int end = l->length;
Expand Down Expand Up @@ -449,7 +449,8 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
static void cgroup_pidlist_stop(struct seq_file *s, void *v)
{
struct kernfs_open_file *of = s->private;
struct cgroup_pidlist *l = of->priv;
struct cgroup_file_ctx *ctx = of->priv;
struct cgroup_pidlist *l = ctx->procs1.pidlist;

if (l)
mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
Expand All @@ -460,7 +461,8 @@ static void cgroup_pidlist_stop(struct seq_file *s, void *v)
static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
{
struct kernfs_open_file *of = s->private;
struct cgroup_pidlist *l = of->priv;
struct cgroup_file_ctx *ctx = of->priv;
struct cgroup_pidlist *l = ctx->procs1.pidlist;
pid_t *p = v;
pid_t *end = l->list + l->length;
/*
Expand Down Expand Up @@ -504,10 +506,11 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
goto out_unlock;

/*
* Even if we're attaching all tasks in the thread group, we only
* need to check permissions on one of them.
* Even if we're attaching all tasks in the thread group, we only need
* to check permissions on one of them. Check permissions using the
* credentials from file open to protect against inherited fd attacks.
*/
cred = current_cred();
cred = of->file->f_cred;
tcred = get_task_cred(task);
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
Expand Down
88 changes: 60 additions & 28 deletions kernel/cgroup/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -3630,6 +3630,7 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, enum psi_res res)
{
struct cgroup_file_ctx *ctx = of->priv;
struct psi_trigger *new;
struct cgroup *cgrp;
struct psi_group *psi;
Expand All @@ -3648,7 +3649,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
return PTR_ERR(new);
}

psi_trigger_replace(&of->priv, new);
psi_trigger_replace(&ctx->psi.trigger, new);

cgroup_put(cgrp);

Expand Down Expand Up @@ -3679,12 +3680,16 @@ static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
poll_table *pt)
{
return psi_trigger_poll(&of->priv, of->file, pt);
struct cgroup_file_ctx *ctx = of->priv;

return psi_trigger_poll(&ctx->psi.trigger, of->file, pt);
}

static void cgroup_pressure_release(struct kernfs_open_file *of)
{
psi_trigger_replace(&of->priv, NULL);
struct cgroup_file_ctx *ctx = of->priv;

psi_trigger_replace(&ctx->psi.trigger, NULL);
}

bool cgroup_psi_enabled(void)
Expand Down Expand Up @@ -3811,24 +3816,43 @@ static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf,
static int cgroup_file_open(struct kernfs_open_file *of)
{
struct cftype *cft = of_cft(of);
struct cgroup_file_ctx *ctx;
int ret;

if (cft->open)
return cft->open(of);
return 0;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;

ctx->ns = current->nsproxy->cgroup_ns;
get_cgroup_ns(ctx->ns);
of->priv = ctx;

if (!cft->open)
return 0;

ret = cft->open(of);
if (ret) {
put_cgroup_ns(ctx->ns);
kfree(ctx);
}
return ret;
}

static void cgroup_file_release(struct kernfs_open_file *of)
{
struct cftype *cft = of_cft(of);
struct cgroup_file_ctx *ctx = of->priv;

if (cft->release)
cft->release(of);
put_cgroup_ns(ctx->ns);
kfree(ctx);
}

static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
struct cgroup_file_ctx *ctx = of->priv;
struct cgroup *cgrp = of->kn->parent->priv;
struct cftype *cft = of_cft(of);
struct cgroup_subsys_state *css;
Expand All @@ -3845,7 +3869,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
*/
if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
!(cft->flags & CFTYPE_NS_DELEGATABLE) &&
ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp)
return -EPERM;

if (cft->write)
Expand Down Expand Up @@ -4751,43 +4775,40 @@ void css_task_iter_end(struct css_task_iter *it)

static void cgroup_procs_release(struct kernfs_open_file *of)
{
if (of->priv) {
css_task_iter_end(of->priv);
kfree(of->priv);
}
struct cgroup_file_ctx *ctx = of->priv;

if (ctx->procs.started)
css_task_iter_end(&ctx->procs.iter);
}

static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
{
struct kernfs_open_file *of = s->private;
struct css_task_iter *it = of->priv;
struct cgroup_file_ctx *ctx = of->priv;

if (pos)
(*pos)++;

return css_task_iter_next(it);
return css_task_iter_next(&ctx->procs.iter);
}

static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
unsigned int iter_flags)
{
struct kernfs_open_file *of = s->private;
struct cgroup *cgrp = seq_css(s)->cgroup;
struct css_task_iter *it = of->priv;
struct cgroup_file_ctx *ctx = of->priv;
struct css_task_iter *it = &ctx->procs.iter;

/*
* When a seq_file is seeked, it's always traversed sequentially
* from position 0, so we can simply keep iterating on !0 *pos.
*/
if (!it) {
if (!ctx->procs.started) {
if (WARN_ON_ONCE((*pos)))
return ERR_PTR(-EINVAL);

it = kzalloc(sizeof(*it), GFP_KERNEL);
if (!it)
return ERR_PTR(-ENOMEM);
of->priv = it;
css_task_iter_start(&cgrp->self, iter_flags, it);
ctx->procs.started = true;
} else if (!(*pos)) {
css_task_iter_end(it);
css_task_iter_start(&cgrp->self, iter_flags, it);
Expand Down Expand Up @@ -4838,9 +4859,9 @@ static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb)

static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
struct cgroup *dst_cgrp,
struct super_block *sb)
struct super_block *sb,
struct cgroup_namespace *ns)
{
struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
struct cgroup *com_cgrp = src_cgrp;
int ret;

Expand Down Expand Up @@ -4869,11 +4890,12 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp,

static int cgroup_attach_permissions(struct cgroup *src_cgrp,
struct cgroup *dst_cgrp,
struct super_block *sb, bool threadgroup)
struct super_block *sb, bool threadgroup,
struct cgroup_namespace *ns)
{
int ret = 0;

ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb);
ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns);
if (ret)
return ret;

Expand All @@ -4890,8 +4912,10 @@ static int cgroup_attach_permissions(struct cgroup *src_cgrp,
static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
bool threadgroup)
{
struct cgroup_file_ctx *ctx = of->priv;
struct cgroup *src_cgrp, *dst_cgrp;
struct task_struct *task;
const struct cred *saved_cred;
ssize_t ret;
bool locked;

Expand All @@ -4909,9 +4933,16 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
spin_unlock_irq(&css_set_lock);

/* process and thread migrations follow same delegation rule */
/*
* Process and thread migrations follow same delegation rule. Check
* permissions using the credentials from file open to protect against
* inherited fd attacks.
*/
saved_cred = override_creds(of->file->f_cred);
ret = cgroup_attach_permissions(src_cgrp, dst_cgrp,
of->file->f_path.dentry->d_sb, threadgroup);
of->file->f_path.dentry->d_sb,
threadgroup, ctx->ns);
revert_creds(saved_cred);
if (ret)
goto out_finish;

Expand Down Expand Up @@ -6130,7 +6161,8 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
goto err;

ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb,
!(kargs->flags & CLONE_THREAD));
!(kargs->flags & CLONE_THREAD),
current->nsproxy->cgroup_ns);
if (ret)
goto err;

Expand Down
2 changes: 1 addition & 1 deletion tools/testing/selftests/cgroup/cgroup_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ int cg_find_unified_root(char *root, size_t len)

int cg_create(const char *cgroup)
{
return mkdir(cgroup, 0644);
return mkdir(cgroup, 0755);
}

int cg_wait_for_proc_count(const char *cgroup, int count)
Expand Down
Loading

0 comments on commit d1587f7

Please sign in to comment.