From f44dfc440863562141d4c14e214472a9a5abafa2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Jan 2013 08:51:07 -0800 Subject: [PATCH] --- yaml --- r: 350343 b: refs/heads/master c: 3a5a6d0c2b0391e159fa5bf1dddb9bf1f35178a0 h: refs/heads/master i: 350341: 3239239548d6061ef600761bdb2054d9bb662876 350339: c428a0d4ec8e96e716f637137e56ee6240c560af 350335: 1f00c4633bfeb95197737053cf2539a061ad5d8e v: v3 --- [refs] | 2 +- trunk/kernel/cpuset.c | 39 +++++++++++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/[refs] b/[refs] index 09d49c7d8d51..2d75cda7da3e 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: deb7aa308ea264b374d1db970174f5728a2faa27 +refs/heads/master: 3a5a6d0c2b0391e159fa5bf1dddb9bf1f35178a0 diff --git a/trunk/kernel/cpuset.c b/trunk/kernel/cpuset.c index 3d448e646a4a..658eb1a32084 100644 --- a/trunk/kernel/cpuset.c +++ b/trunk/kernel/cpuset.c @@ -259,6 +259,13 @@ static char cpuset_name[CPUSET_NAME_LEN]; static char cpuset_nodelist[CPUSET_NODELIST_LEN]; static DEFINE_SPINLOCK(cpuset_buffer_lock); +/* + * CPU / memory hotplug is handled asynchronously. + */ +static void cpuset_hotplug_workfn(struct work_struct *work); + +static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); + /* * This is ugly, but preserves the userspace API for existing cpuset * users. If someone tries to mount the "cpuset" filesystem, we @@ -1565,6 +1572,19 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, struct cpuset *cs = cgroup_cs(cgrp); struct cpuset *trialcs; + /* + * CPU or memory hotunplug may leave @cs w/o any execution + * resources, in which case the hotplug code asynchronously updates + * configuration and transfers all tasks to the nearest ancestor + * which can execute. + * + * As writes to "cpus" or "mems" may restore @cs's execution + * resources, wait for the previously scheduled operations before + * proceeding, so that we don't end up keep removing tasks added + * after execution capability is restored. + */ + flush_work(&cpuset_hotplug_work); + if (!cgroup_lock_live_group(cgrp)) return -ENODEV; @@ -2095,7 +2115,7 @@ static void cpuset_propagate_hotplug(struct cpuset *cs) } /** - * cpuset_handle_hotplug - handle CPU/memory hot[un]plug + * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset * * This function is called after either CPU or memory configuration has * changed and updates cpuset accordingly. The top_cpuset is always @@ -2110,7 +2130,7 @@ static void cpuset_propagate_hotplug(struct cpuset *cs) * Note that CPU offlining during suspend is ignored. We don't modify * cpusets across suspend/resume cycles at all. */ -static void cpuset_handle_hotplug(void) +static void cpuset_hotplug_workfn(struct work_struct *work) { static cpumask_t new_cpus, tmp_cpus; static nodemask_t new_mems, tmp_mems; @@ -2177,7 +2197,18 @@ static void cpuset_handle_hotplug(void) void cpuset_update_active_cpus(bool cpu_online) { - cpuset_handle_hotplug(); + /* + * We're inside cpu hotplug critical region which usually nests + * inside cgroup synchronization. Bounce actual hotplug processing + * to a work item to avoid reverse locking order. + * + * We still need to do partition_sched_domains() synchronously; + * otherwise, the scheduler will get confused and put tasks to the + * dead CPU. Fall back to the default single domain. + * cpuset_hotplug_workfn() will rebuild it as necessary. + */ + partition_sched_domains(1, NULL, NULL); + schedule_work(&cpuset_hotplug_work); } #ifdef CONFIG_MEMORY_HOTPLUG @@ -2189,7 +2220,7 @@ void cpuset_update_active_cpus(bool cpu_online) static int cpuset_track_online_nodes(struct notifier_block *self, unsigned long action, void *arg) { - cpuset_handle_hotplug(); + schedule_work(&cpuset_hotplug_work); return NOTIFY_OK; } #endif