From 97b116b02275a5ca0e2fa9c5735abb3ba63ca31b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 23 Aug 2012 14:11:25 +0200 Subject: [PATCH] --- yaml --- r: 323556 b: refs/heads/master c: 6a6c0272f17cc80a8286d915f2ddf31557c2d559 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/arch/alpha/kernel/process.c | 3 +- trunk/arch/alpha/kernel/smp.c | 1 + trunk/arch/x86/kernel/cpuid.c | 5 -- trunk/arch/x86/kernel/msr.c | 5 -- trunk/kernel/rcutree.c | 93 ++++++++++++++++++++++++------- trunk/kernel/rcutree.h | 3 + trunk/kernel/rcutree_trace.c | 4 +- trunk/kernel/sched/core.c | 41 +++++++------- 9 files changed, 102 insertions(+), 55 deletions(-) diff --git a/[refs] b/[refs] index ae5ef0d1370c..7adf5c9f74b9 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 429227bbe55647aa42f8f63cac61e4544e248629 +refs/heads/master: 6a6c0272f17cc80a8286d915f2ddf31557c2d559 diff --git a/trunk/arch/alpha/kernel/process.c b/trunk/arch/alpha/kernel/process.c index d6fde98b74b3..db56f31dbf70 100644 --- a/trunk/arch/alpha/kernel/process.c +++ b/trunk/arch/alpha/kernel/process.c @@ -56,7 +56,8 @@ cpu_idle(void) while (!need_resched()) cpu_relax(); - schedule(); + + schedule_preempt_disabled(); } } diff --git a/trunk/arch/alpha/kernel/smp.c b/trunk/arch/alpha/kernel/smp.c index 35ddc02bfa4a..a41ad90a97a6 100644 --- a/trunk/arch/alpha/kernel/smp.c +++ b/trunk/arch/alpha/kernel/smp.c @@ -166,6 +166,7 @@ smp_callin(void) DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n", cpuid, current, current->active_mm)); + preempt_disable(); /* Do nothing. */ cpu_idle(); } diff --git a/trunk/arch/x86/kernel/cpuid.c b/trunk/arch/x86/kernel/cpuid.c index 60c78917190c..39472dd2323f 100644 --- a/trunk/arch/x86/kernel/cpuid.c +++ b/trunk/arch/x86/kernel/cpuid.c @@ -199,14 +199,12 @@ static int __init cpuid_init(void) goto out_chrdev; } cpuid_class->devnode = cpuid_devnode; - get_online_cpus(); for_each_online_cpu(i) { err = cpuid_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&cpuid_class_cpu_notifier); - put_online_cpus(); err = 0; goto out; @@ -216,7 +214,6 @@ static int __init cpuid_init(void) for_each_online_cpu(i) { cpuid_device_destroy(i); } - put_online_cpus(); class_destroy(cpuid_class); out_chrdev: __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); @@ -228,13 +225,11 @@ static void __exit cpuid_exit(void) { int cpu = 0; - get_online_cpus(); for_each_online_cpu(cpu) cpuid_device_destroy(cpu); class_destroy(cpuid_class); __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); - put_online_cpus(); } module_init(cpuid_init); diff --git a/trunk/arch/x86/kernel/msr.c b/trunk/arch/x86/kernel/msr.c index a7c5661f8496..eb113693f043 100644 --- a/trunk/arch/x86/kernel/msr.c +++ b/trunk/arch/x86/kernel/msr.c @@ -257,14 +257,12 @@ static int __init msr_init(void) goto out_chrdev; } msr_class->devnode = msr_devnode; - get_online_cpus(); for_each_online_cpu(i) { err = msr_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&msr_class_cpu_notifier); - put_online_cpus(); err = 0; goto out; @@ -273,7 +271,6 @@ static int __init msr_init(void) i = 0; for_each_online_cpu(i) msr_device_destroy(i); - put_online_cpus(); class_destroy(msr_class); out_chrdev: __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); @@ -284,13 +281,11 @@ static int __init msr_init(void) static void __exit msr_exit(void) { int cpu = 0; - get_online_cpus(); for_each_online_cpu(cpu) msr_device_destroy(cpu); class_destroy(msr_class); __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); unregister_hotcpu_notifier(&msr_class_cpu_notifier); - put_online_cpus(); } module_init(msr_init); diff --git a/trunk/kernel/rcutree.c b/trunk/kernel/rcutree.c index be76c80a14d1..f7bcd9e6c054 100644 --- a/trunk/kernel/rcutree.c +++ b/trunk/kernel/rcutree.c @@ -1392,6 +1392,17 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) int i; struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); + /* + * If there is an rcu_barrier() operation in progress, then + * only the task doing that operation is permitted to adopt + * callbacks. To do otherwise breaks rcu_barrier() and friends + * by causing them to fail to wait for the callbacks in the + * orphanage. + */ + if (rsp->rcu_barrier_in_progress && + rsp->rcu_barrier_in_progress != current) + return; + /* Do the accounting first. */ rdp->qlen_lazy += rsp->qlen_lazy; rdp->qlen += rsp->qlen; @@ -1446,8 +1457,9 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) * The CPU has been completely removed, and some other CPU is reporting * this fact from process context. Do the remainder of the cleanup, * including orphaning the outgoing CPU's RCU callbacks, and also - * adopting them. There can only be one CPU hotplug operation at a time, - * so no other CPU can be attempting to update rcu_cpu_kthread_task. + * adopting them, if there is no _rcu_barrier() instance running. + * There can only be one CPU hotplug operation at a time, so no other + * CPU can be attempting to update rcu_cpu_kthread_task. */ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) { @@ -1505,13 +1517,14 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", cpu, rdp->qlen, rdp->nxtlist); - init_callback_list(rdp); - /* Disallow further callbacks on this CPU. */ - rdp->nxttail[RCU_NEXT_TAIL] = NULL; } #else /* #ifdef CONFIG_HOTPLUG_CPU */ +static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) +{ +} + static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) { } @@ -1930,12 +1943,6 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), rdp = this_cpu_ptr(rsp->rda); /* Add the callback to our list. */ - if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) { - /* _call_rcu() is illegal on offline CPU; leak the callback. */ - WARN_ON_ONCE(1); - local_irq_restore(flags); - return; - } ACCESS_ONCE(rdp->qlen)++; if (lazy) rdp->qlen_lazy++; @@ -2321,10 +2328,13 @@ static void rcu_barrier_func(void *type) static void _rcu_barrier(struct rcu_state *rsp) { int cpu; + unsigned long flags; struct rcu_data *rdp; + struct rcu_data rd; unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); unsigned long snap_done; + init_rcu_head_on_stack(&rd.barrier_head); _rcu_barrier_trace(rsp, "Begin", -1, snap); /* Take mutex to serialize concurrent rcu_barrier() requests. */ @@ -2364,30 +2374,70 @@ static void _rcu_barrier(struct rcu_state *rsp) /* * Initialize the count to one rather than to zero in order to * avoid a too-soon return to zero in case of a short grace period - * (or preemption of this task). Exclude CPU-hotplug operations - * to ensure that no offline CPU has callbacks queued. + * (or preemption of this task). Also flag this task as doing + * an rcu_barrier(). This will prevent anyone else from adopting + * orphaned callbacks, which could cause otherwise failure if a + * CPU went offline and quickly came back online. To see this, + * consider the following sequence of events: + * + * 1. We cause CPU 0 to post an rcu_barrier_callback() callback. + * 2. CPU 1 goes offline, orphaning its callbacks. + * 3. CPU 0 adopts CPU 1's orphaned callbacks. + * 4. CPU 1 comes back online. + * 5. We cause CPU 1 to post an rcu_barrier_callback() callback. + * 6. Both rcu_barrier_callback() callbacks are invoked, awakening + * us -- but before CPU 1's orphaned callbacks are invoked!!! */ init_completion(&rsp->barrier_completion); atomic_set(&rsp->barrier_cpu_count, 1); - get_online_cpus(); + raw_spin_lock_irqsave(&rsp->onofflock, flags); + rsp->rcu_barrier_in_progress = current; + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); /* - * Force each CPU with callbacks to register a new callback. - * When that callback is invoked, we will know that all of the - * corresponding CPU's preceding callbacks have been invoked. + * Force every CPU with callbacks to register a new callback + * that will tell us when all the preceding callbacks have + * been invoked. If an offline CPU has callbacks, wait for + * it to either come back online or to finish orphaning those + * callbacks. */ - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { + preempt_disable(); rdp = per_cpu_ptr(rsp->rda, cpu); - if (ACCESS_ONCE(rdp->qlen)) { + if (cpu_is_offline(cpu)) { + _rcu_barrier_trace(rsp, "Offline", cpu, + rsp->n_barrier_done); + preempt_enable(); + while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) + schedule_timeout_interruptible(1); + } else if (ACCESS_ONCE(rdp->qlen)) { _rcu_barrier_trace(rsp, "OnlineQ", cpu, rsp->n_barrier_done); smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); + preempt_enable(); } else { _rcu_barrier_trace(rsp, "OnlineNQ", cpu, rsp->n_barrier_done); + preempt_enable(); } } - put_online_cpus(); + + /* + * Now that all online CPUs have rcu_barrier_callback() callbacks + * posted, we can adopt all of the orphaned callbacks and place + * an rcu_barrier_callback() callback after them. When that is done, + * we are guaranteed to have an rcu_barrier_callback() callback + * following every callback that could possibly have been + * registered before _rcu_barrier() was called. + */ + raw_spin_lock_irqsave(&rsp->onofflock, flags); + rcu_adopt_orphan_cbs(rsp); + rsp->rcu_barrier_in_progress = NULL; + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); + atomic_inc(&rsp->barrier_cpu_count); + smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ + rd.rsp = rsp; + rsp->call(&rd.barrier_head, rcu_barrier_callback); /* * Now that we have an rcu_barrier_callback() callback on each @@ -2408,6 +2458,8 @@ static void _rcu_barrier(struct rcu_state *rsp) /* Other rcu_barrier() invocations can now safely proceed. */ mutex_unlock(&rsp->barrier_mutex); + + destroy_rcu_head_on_stack(&rd.barrier_head); } /** @@ -2473,7 +2525,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; - init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); diff --git a/trunk/kernel/rcutree.h b/trunk/kernel/rcutree.h index 94dfdf1f31f5..4d29169f2124 100644 --- a/trunk/kernel/rcutree.h +++ b/trunk/kernel/rcutree.h @@ -398,6 +398,9 @@ struct rcu_state { struct rcu_head **orphan_donetail; /* Tail of above. */ long qlen_lazy; /* Number of lazy callbacks. */ long qlen; /* Total number of callbacks. */ + struct task_struct *rcu_barrier_in_progress; + /* Task doing rcu_barrier(), */ + /* or NULL if no barrier. */ struct mutex barrier_mutex; /* Guards barrier fields. */ atomic_t barrier_cpu_count; /* # CPUs waiting on. */ struct completion barrier_completion; /* Wake at barrier end. */ diff --git a/trunk/kernel/rcutree_trace.c b/trunk/kernel/rcutree_trace.c index 6a2e52a85d77..abffb486e94e 100644 --- a/trunk/kernel/rcutree_trace.c +++ b/trunk/kernel/rcutree_trace.c @@ -51,8 +51,8 @@ static int show_rcubarrier(struct seq_file *m, void *unused) struct rcu_state *rsp; for_each_rcu_flavor(rsp) - seq_printf(m, "%s: bcc: %d nbd: %lu\n", - rsp->name, + seq_printf(m, "%s: %c bcc: %d nbd: %lu\n", + rsp->name, rsp->rcu_barrier_in_progress ? 'B' : '.', atomic_read(&rsp->barrier_cpu_count), rsp->n_barrier_done); return 0; diff --git a/trunk/kernel/sched/core.c b/trunk/kernel/sched/core.c index 8c38b5e7ce47..fbf1fd098dc6 100644 --- a/trunk/kernel/sched/core.c +++ b/trunk/kernel/sched/core.c @@ -5304,17 +5304,27 @@ void idle_task_exit(void) } /* - * Since this CPU is going 'away' for a while, fold any nr_active delta - * we might have. Assumes we're called after migrate_tasks() so that the - * nr_active count is stable. - * - * Also see the comment "Global load-average calculations". + * While a dead CPU has no uninterruptible tasks queued at this point, + * it might still have a nonzero ->nr_uninterruptible counter, because + * for performance reasons the counter is not stricly tracking tasks to + * their home CPUs. So we just add the counter to another CPU's counter, + * to keep the global sum constant after CPU-down: */ -static void calc_load_migrate(struct rq *rq) +static void migrate_nr_uninterruptible(struct rq *rq_src) { - long delta = calc_load_fold_active(rq); - if (delta) - atomic_long_add(delta, &calc_load_tasks); + struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); + + rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; + rq_src->nr_uninterruptible = 0; +} + +/* + * remove the tasks which were accounted by rq from calc_load_tasks. + */ +static void calc_global_load_remove(struct rq *rq) +{ + atomic_long_sub(rq->calc_load_active, &calc_load_tasks); + rq->calc_load_active = 0; } /* @@ -5607,18 +5617,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) migrate_tasks(cpu); BUG_ON(rq->nr_running != 1); /* the migration thread */ raw_spin_unlock_irqrestore(&rq->lock, flags); - break; - case CPU_DEAD: - { - struct rq *dest_rq; - - local_irq_save(flags); - dest_rq = cpu_rq(smp_processor_id()); - raw_spin_lock(&dest_rq->lock); - calc_load_migrate(rq); - raw_spin_unlock_irqrestore(&dest_rq->lock, flags); - } + migrate_nr_uninterruptible(rq); + calc_global_load_remove(rq); break; #endif }