Skip to content

Commit

Permalink
cpuset: drop async_rebuild_sched_domains()
Browse files Browse the repository at this point in the history
In general, we want to make cgroup_mutex one of the outermost locks
and be able to use get_online_cpus() and friends from cgroup methods.
With cpuset hotplug made async, get_online_cpus() can now be nested
inside cgroup_mutex.

Currently, cpuset avoids nesting get_online_cpus() inside cgroup_mutex
by bouncing sched_domain rebuilding to a work item.  As such nesting
is allowed now, remove the workqueue bouncing code and always rebuild
sched_domains synchronously.  This also nests sched_domains_mutex
inside cgroup_mutex, which is intended and should be okay.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
  • Loading branch information
Tejun Heo committed Jan 7, 2013
1 parent 3a5a6d0 commit 699140b
Showing 1 changed file with 16 additions and 60 deletions.
76 changes: 16 additions & 60 deletions kernel/cpuset.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,6 @@
#include <linux/workqueue.h>
#include <linux/cgroup.h>

/*
* Workqueue for cpuset related tasks.
*
* Using kevent workqueue may cause deadlock when memory_migrate
* is set. So we create a separate workqueue thread for cpuset.
*/
static struct workqueue_struct *cpuset_wq;

/*
* Tracks how many cpusets are currently defined in system.
* When there is only one cpuset (the root cpuset) we can
Expand Down Expand Up @@ -753,33 +745,33 @@ static int generate_sched_domains(cpumask_var_t **domains,
/*
* Rebuild scheduler domains.
*
* Call with neither cgroup_mutex held nor within get_online_cpus().
* Takes both cgroup_mutex and get_online_cpus().
* If the flag 'sched_load_balance' of any cpuset with non-empty
* 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
* which has that flag enabled, or if any cpuset with a non-empty
* 'cpus' is removed, then call this routine to rebuild the
* scheduler's dynamic sched domains.
*
* Cannot be directly called from cpuset code handling changes
* to the cpuset pseudo-filesystem, because it cannot be called
* from code that already holds cgroup_mutex.
* Call with cgroup_mutex held. Takes get_online_cpus().
*/
static void do_rebuild_sched_domains(struct work_struct *unused)
static void rebuild_sched_domains_locked(void)
{
struct sched_domain_attr *attr;
cpumask_var_t *doms;
int ndoms;

WARN_ON_ONCE(!cgroup_lock_is_held());
get_online_cpus();

/* Generate domain masks and attrs */
cgroup_lock();
ndoms = generate_sched_domains(&doms, &attr);
cgroup_unlock();

/* Have scheduler rebuild the domains */
partition_sched_domains(ndoms, doms, attr);

put_online_cpus();
}
#else /* !CONFIG_SMP */
static void do_rebuild_sched_domains(struct work_struct *unused)
static void rebuild_sched_domains_locked(void)
{
}

Expand All @@ -791,44 +783,11 @@ static int generate_sched_domains(cpumask_var_t **domains,
}
#endif /* CONFIG_SMP */

static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);

/*
* Rebuild scheduler domains, asynchronously via workqueue.
*
* If the flag 'sched_load_balance' of any cpuset with non-empty
* 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
* which has that flag enabled, or if any cpuset with a non-empty
* 'cpus' is removed, then call this routine to rebuild the
* scheduler's dynamic sched domains.
*
* The rebuild_sched_domains() and partition_sched_domains()
* routines must nest cgroup_lock() inside get_online_cpus(),
* but such cpuset changes as these must nest that locking the
* other way, holding cgroup_lock() for much of the code.
*
* So in order to avoid an ABBA deadlock, the cpuset code handling
* these user changes delegates the actual sched domain rebuilding
* to a separate workqueue thread, which ends up processing the
* above do_rebuild_sched_domains() function.
*/
static void async_rebuild_sched_domains(void)
{
queue_work(cpuset_wq, &rebuild_sched_domains_work);
}

/*
* Accomplishes the same scheduler domain rebuild as the above
* async_rebuild_sched_domains(), however it directly calls the
* rebuild routine synchronously rather than calling it via an
* asynchronous work thread.
*
* This can only be called from code that is not holding
* cgroup_mutex (not nested in a cgroup_lock() call.)
*/
void rebuild_sched_domains(void)
{
do_rebuild_sched_domains(NULL);
cgroup_lock();
rebuild_sched_domains_locked();
cgroup_unlock();
}

/**
Expand Down Expand Up @@ -948,7 +907,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
heap_free(&heap);

if (is_load_balanced)
async_rebuild_sched_domains();
rebuild_sched_domains_locked();
return 0;
}

Expand Down Expand Up @@ -1196,7 +1155,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
cs->relax_domain_level = val;
if (!cpumask_empty(cs->cpus_allowed) &&
is_sched_load_balance(cs))
async_rebuild_sched_domains();
rebuild_sched_domains_locked();
}

return 0;
Expand Down Expand Up @@ -1288,7 +1247,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
mutex_unlock(&callback_mutex);

if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
async_rebuild_sched_domains();
rebuild_sched_domains_locked();

if (spread_flag_changed)
update_tasks_flags(cs, &heap);
Expand Down Expand Up @@ -1925,7 +1884,7 @@ static void cpuset_css_offline(struct cgroup *cgrp)
/*
* If the cpuset being removed has its flag 'sched_load_balance'
* enabled, then simulate turning sched_load_balance off, which
* will call async_rebuild_sched_domains().
* will call rebuild_sched_domains_locked().
*/

static void cpuset_css_free(struct cgroup *cont)
Expand Down Expand Up @@ -2237,9 +2196,6 @@ void __init cpuset_init_smp(void)
top_cpuset.mems_allowed = node_states[N_MEMORY];

hotplug_memory_notifier(cpuset_track_online_nodes, 10);

cpuset_wq = create_singlethread_workqueue("cpuset");
BUG_ON(!cpuset_wq);
}

/**
Expand Down

0 comments on commit 699140b

Please sign in to comment.