Skip to content

Commit

Permalink
Merge tag 'cgroup-for-6.3-rc6-fixes' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "This is a relatively big pull request this late in the cycle but the
  major contributor is the cpuset bug which is rather significant:

   - Fix several cpuset bugs including one where it wasn't applying the
     target cgroup when tasks are created with CLONE_INTO_CGROUP

  With a few smaller fixes:

   - Fix inversed locking order in cgroup1 freezer implementation

   - Fix garbage cpu.stat::core_sched.forceidle_usec reporting in the
     root cgroup"

* tag 'cgroup-for-6.3-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup/cpuset: Make cpuset_attach_task() skip subpartitions CPUs for top_cpuset
  cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods
  cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly
  cgroup/cpuset: Wake up cpuset_attach_wq tasks in cpuset_cancel_attach()
  cgroup,freezer: hold cpu_hotplug_lock before freezer_mutex
  cgroup/cpuset: Fix partition root's cpuset.cpus update bug
  cgroup: fix display of forceidle time at root
  • Loading branch information
Linus Torvalds committed Apr 13, 2023
2 parents e44f45f + 7e27cb6 commit 4414975
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 39 deletions.
178 changes: 144 additions & 34 deletions kernel/cgroup/cpuset.c
Original file line number Diff line number Diff line change
Expand Up @@ -1513,7 +1513,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
spin_unlock_irq(&callback_lock);

if (adding || deleting)
update_tasks_cpumask(parent, tmp->new_cpus);
update_tasks_cpumask(parent, tmp->addmask);

/*
* Set or clear CS_SCHED_LOAD_BALANCE when partcmd_update, if necessary.
Expand Down Expand Up @@ -1770,10 +1770,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
/*
* Use the cpumasks in trialcs for tmpmasks when they are pointers
* to allocated cpumasks.
*
* Note that update_parent_subparts_cpumask() uses only addmask &
* delmask, but not new_cpus.
*/
tmp.addmask = trialcs->subparts_cpus;
tmp.delmask = trialcs->effective_cpus;
tmp.new_cpus = trialcs->cpus_allowed;
tmp.new_cpus = NULL;
#endif

retval = validate_change(cs, trialcs);
Expand Down Expand Up @@ -1838,6 +1841,11 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
}
spin_unlock_irq(&callback_lock);

#ifdef CONFIG_CPUMASK_OFFSTACK
/* Now trialcs->cpus_allowed is available */
tmp.new_cpus = trialcs->cpus_allowed;
#endif

/* effective_cpus will be updated here */
update_cpumasks_hier(cs, &tmp, false);

Expand Down Expand Up @@ -2445,6 +2453,20 @@ static int fmeter_getrate(struct fmeter *fmp)

static struct cpuset *cpuset_attach_old_cs;

/*
* Check to see if a cpuset can accept a new task
* For v1, cpus_allowed and mems_allowed can't be empty.
* For v2, effective_cpus can't be empty.
* Note that in v1, effective_cpus = cpus_allowed.
*/
static int cpuset_can_attach_check(struct cpuset *cs)
{
if (cpumask_empty(cs->effective_cpus) ||
(!is_in_v2_mode() && nodes_empty(cs->mems_allowed)))
return -ENOSPC;
return 0;
}

/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
static int cpuset_can_attach(struct cgroup_taskset *tset)
{
Expand All @@ -2459,16 +2481,9 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)

percpu_down_write(&cpuset_rwsem);

/* allow moving tasks into an empty cpuset if on default hierarchy */
ret = -ENOSPC;
if (!is_in_v2_mode() &&
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
goto out_unlock;

/*
* Task cannot be moved to a cpuset with empty effective cpus.
*/
if (cpumask_empty(cs->effective_cpus))
/* Check to see if task is allowed in the cpuset */
ret = cpuset_can_attach_check(cs);
if (ret)
goto out_unlock;

cgroup_taskset_for_each(task, css, tset) {
Expand All @@ -2485,7 +2500,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
* changes which zero cpus/mems_allowed.
*/
cs->attach_in_progress++;
ret = 0;
out_unlock:
percpu_up_write(&cpuset_rwsem);
return ret;
Expand All @@ -2494,25 +2508,47 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
static void cpuset_cancel_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
struct cpuset *cs;

cgroup_taskset_first(tset, &css);
cs = css_cs(css);

percpu_down_write(&cpuset_rwsem);
css_cs(css)->attach_in_progress--;
cs->attach_in_progress--;
if (!cs->attach_in_progress)
wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem);
}

/*
* Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
* Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
* but we can't allocate it dynamically there. Define it global and
* allocate from cpuset_init().
*/
static cpumask_var_t cpus_attach;
static nodemask_t cpuset_attach_nodemask_to;

static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
{
percpu_rwsem_assert_held(&cpuset_rwsem);

if (cs != &top_cpuset)
guarantee_online_cpus(task, cpus_attach);
else
cpumask_andnot(cpus_attach, task_cpu_possible_mask(task),
cs->subparts_cpus);
/*
* can_attach beforehand should guarantee that this doesn't
* fail. TODO: have a better way to handle failure here
*/
WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));

cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
cpuset_update_task_spread_flags(cs, task);
}

static void cpuset_attach(struct cgroup_taskset *tset)
{
/* static buf protected by cpuset_rwsem */
static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task;
struct task_struct *leader;
struct cgroup_subsys_state *css;
Expand Down Expand Up @@ -2543,20 +2579,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)

guarantee_online_mems(cs, &cpuset_attach_nodemask_to);

cgroup_taskset_for_each(task, css, tset) {
if (cs != &top_cpuset)
guarantee_online_cpus(task, cpus_attach);
else
cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
/*
* can_attach beforehand should guarantee that this doesn't
* fail. TODO: have a better way to handle failure here
*/
WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));

cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
cpuset_update_task_spread_flags(cs, task);
}
cgroup_taskset_for_each(task, css, tset)
cpuset_attach_task(cs, task);

/*
* Change mm for all threadgroup leaders. This is expensive and may
Expand Down Expand Up @@ -3247,18 +3271,102 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
percpu_up_write(&cpuset_rwsem);
}

/*
* In case the child is cloned into a cpuset different from its parent,
* additional checks are done to see if the move is allowed.
*/
static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
{
struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
bool same_cs;
int ret;

rcu_read_lock();
same_cs = (cs == task_cs(current));
rcu_read_unlock();

if (same_cs)
return 0;

lockdep_assert_held(&cgroup_mutex);
percpu_down_write(&cpuset_rwsem);

/* Check to see if task is allowed in the cpuset */
ret = cpuset_can_attach_check(cs);
if (ret)
goto out_unlock;

ret = task_can_attach(task, cs->effective_cpus);
if (ret)
goto out_unlock;

ret = security_task_setscheduler(task);
if (ret)
goto out_unlock;

/*
* Mark attach is in progress. This makes validate_change() fail
* changes which zero cpus/mems_allowed.
*/
cs->attach_in_progress++;
out_unlock:
percpu_up_write(&cpuset_rwsem);
return ret;
}

static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
{
struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
bool same_cs;

rcu_read_lock();
same_cs = (cs == task_cs(current));
rcu_read_unlock();

if (same_cs)
return;

percpu_down_write(&cpuset_rwsem);
cs->attach_in_progress--;
if (!cs->attach_in_progress)
wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem);
}

/*
* Make sure the new task conform to the current state of its parent,
* which could have been changed by cpuset just after it inherits the
* state from the parent and before it sits on the cgroup's task list.
*/
static void cpuset_fork(struct task_struct *task)
{
if (task_css_is_root(task, cpuset_cgrp_id))
struct cpuset *cs;
bool same_cs;

rcu_read_lock();
cs = task_cs(task);
same_cs = (cs == task_cs(current));
rcu_read_unlock();

if (same_cs) {
if (cs == &top_cpuset)
return;

set_cpus_allowed_ptr(task, current->cpus_ptr);
task->mems_allowed = current->mems_allowed;
return;
}

/* CLONE_INTO_CGROUP */
percpu_down_write(&cpuset_rwsem);
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
cpuset_attach_task(cs, task);

cs->attach_in_progress--;
if (!cs->attach_in_progress)
wake_up(&cpuset_attach_wq);

set_cpus_allowed_ptr(task, current->cpus_ptr);
task->mems_allowed = current->mems_allowed;
percpu_up_write(&cpuset_rwsem);
}

struct cgroup_subsys cpuset_cgrp_subsys = {
Expand All @@ -3271,6 +3379,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.attach = cpuset_attach,
.post_attach = cpuset_post_attach,
.bind = cpuset_bind,
.can_fork = cpuset_can_fork,
.cancel_fork = cpuset_cancel_fork,
.fork = cpuset_fork,
.legacy_cftypes = legacy_files,
.dfl_cftypes = dfl_files,
Expand Down
7 changes: 5 additions & 2 deletions kernel/cgroup/legacy_freezer.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <linux/freezer.h>
#include <linux/seq_file.h>
#include <linux/mutex.h>
#include <linux/cpu.h>

/*
* A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
Expand Down Expand Up @@ -350,7 +351,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,

if (freeze) {
if (!(freezer->state & CGROUP_FREEZING))
static_branch_inc(&freezer_active);
static_branch_inc_cpuslocked(&freezer_active);
freezer->state |= state;
freeze_cgroup(freezer);
} else {
Expand All @@ -361,7 +362,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
if (!(freezer->state & CGROUP_FREEZING)) {
freezer->state &= ~CGROUP_FROZEN;
if (was_freezing)
static_branch_dec(&freezer_active);
static_branch_dec_cpuslocked(&freezer_active);
unfreeze_cgroup(freezer);
}
}
Expand All @@ -379,6 +380,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
{
struct cgroup_subsys_state *pos;

cpus_read_lock();
/*
* Update all its descendants in pre-order traversal. Each
* descendant will try to inherit its parent's FREEZING state as
Expand Down Expand Up @@ -407,6 +409,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
}
rcu_read_unlock();
mutex_unlock(&freezer_mutex);
cpus_read_unlock();
}

static ssize_t freezer_write(struct kernfs_open_file *of,
Expand Down
4 changes: 1 addition & 3 deletions kernel/cgroup/rstat.c
Original file line number Diff line number Diff line change
Expand Up @@ -457,9 +457,7 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
struct task_cputime *cputime = &bstat->cputime;
int i;

cputime->stime = 0;
cputime->utime = 0;
cputime->sum_exec_runtime = 0;
memset(bstat, 0, sizeof(*bstat));
for_each_possible_cpu(i) {
struct kernel_cpustat kcpustat;
u64 *cpustat = kcpustat.cpustat;
Expand Down

0 comments on commit 4414975

Please sign in to comment.