From a25d96ea7fdfa4370a523109322d0c1bb06ee7f5 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 14 Mar 2012 22:17:39 -0400 Subject: [PATCH] --- yaml --- r: 302744 b: refs/heads/master c: 559f9badd11ddf399f88b18b4c0f110fd511ae53 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/include/linux/rculist.h | 7 +- trunk/include/trace/events/rcu.h | 2 - trunk/kernel/rcutree.c | 297 +++++++++---------------------- trunk/kernel/rcutree.h | 12 -- trunk/kernel/rcutree_plugin.h | 124 +++---------- trunk/kernel/rcutree_trace.c | 4 +- trunk/lib/list_debug.c | 22 +++ 8 files changed, 145 insertions(+), 325 deletions(-) diff --git a/[refs] b/[refs] index 91bd8f76173c..26f4606b4175 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: b1420f1c8bfc30ecf6380a31d0f686884834b599 +refs/heads/master: 559f9badd11ddf399f88b18b4c0f110fd511ae53 diff --git a/trunk/include/linux/rculist.h b/trunk/include/linux/rculist.h index d079290843a9..a20c05096231 100644 --- a/trunk/include/linux/rculist.h +++ b/trunk/include/linux/rculist.h @@ -30,6 +30,7 @@ * This is only for internal list manipulation where we know * the prev/next entries already! */ +#ifndef CONFIG_DEBUG_LIST static inline void __list_add_rcu(struct list_head *new, struct list_head *prev, struct list_head *next) { @@ -38,6 +39,10 @@ static inline void __list_add_rcu(struct list_head *new, rcu_assign_pointer(list_next_rcu(prev), new); next->prev = new; } +#else +extern void __list_add_rcu(struct list_head *new, + struct list_head *prev, struct list_head *next); +#endif /** * list_add_rcu - add a new entry to rcu-protected list @@ -108,7 +113,7 @@ static inline void list_add_tail_rcu(struct list_head *new, */ static inline void list_del_rcu(struct list_head *entry) { - __list_del(entry->prev, entry->next); + __list_del_entry(entry); entry->prev = LIST_POISON2; } diff --git a/trunk/include/trace/events/rcu.h b/trunk/include/trace/events/rcu.h index 1480900c511c..337099783f37 100644 --- a/trunk/include/trace/events/rcu.h +++ b/trunk/include/trace/events/rcu.h @@ -292,8 +292,6 @@ TRACE_EVENT(rcu_dyntick, * "More callbacks": Still more callbacks, try again to clear them out. * "Callbacks drained": All callbacks processed, off to dyntick idle! * "Timer": Timer fired to cause CPU to continue processing callbacks. - * "Demigrate": Timer fired on wrong CPU, woke up correct CPU. - * "Cleanup after idle": Idle exited, timer canceled. */ TRACE_EVENT(rcu_prep_idle, diff --git a/trunk/kernel/rcutree.c b/trunk/kernel/rcutree.c index e578dd327c64..1050d6d3922c 100644 --- a/trunk/kernel/rcutree.c +++ b/trunk/kernel/rcutree.c @@ -75,8 +75,6 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; .gpnum = -300, \ .completed = -300, \ .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ - .orphan_nxttail = &structname##_state.orphan_nxtlist, \ - .orphan_donetail = &structname##_state.orphan_donelist, \ .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ .n_force_qs = 0, \ .n_force_qs_ngp = 0, \ @@ -147,13 +145,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); unsigned long rcutorture_testseq; unsigned long rcutorture_vernum; -/* State information for rcu_barrier() and friends. */ - -static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; -static atomic_t rcu_barrier_cpu_count; -static DEFINE_MUTEX(rcu_barrier_mutex); -static struct completion rcu_barrier_completion; - /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s * permit this function to be invoked without holding the root rcu_node @@ -1320,133 +1311,95 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) #ifdef CONFIG_HOTPLUG_CPU /* - * Send the specified CPU's RCU callbacks to the orphanage. The - * specified CPU must be offline, and the caller must hold the - * ->onofflock. + * Move a dying CPU's RCU callbacks to online CPU's callback list. + * Also record a quiescent state for this CPU for the current grace period. + * Synchronization and interrupt disabling are not required because + * this function executes in stop_machine() context. Therefore, cleanup + * operations that might block must be done later from the CPU_DEAD + * notifier. + * + * Note that the outgoing CPU's bit has already been cleared in the + * cpu_online_mask. This allows us to randomly pick a callback + * destination from the bits set in that mask. */ -static void -rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, - struct rcu_node *rnp, struct rcu_data *rdp) +static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) { int i; + unsigned long mask; + int receive_cpu = cpumask_any(cpu_online_mask); + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); + struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); + RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */ - /* - * Orphan the callbacks. First adjust the counts. This is safe - * because ->onofflock excludes _rcu_barrier()'s adoption of - * the callbacks, thus no memory barrier is required. - */ + /* First, adjust the counts. */ if (rdp->nxtlist != NULL) { - rsp->qlen_lazy += rdp->qlen_lazy; - rsp->qlen += rdp->qlen; - rdp->n_cbs_orphaned += rdp->qlen; + receive_rdp->qlen_lazy += rdp->qlen_lazy; + receive_rdp->qlen += rdp->qlen; rdp->qlen_lazy = 0; rdp->qlen = 0; } /* - * Next, move those callbacks still needing a grace period to - * the orphanage, where some other CPU will pick them up. - * Some of the callbacks might have gone partway through a grace - * period, but that is too bad. They get to start over because we - * cannot assume that grace periods are synchronized across CPUs. - * We don't bother updating the ->nxttail[] array yet, instead - * we just reset the whole thing later on. + * Next, move ready-to-invoke callbacks to be invoked on some + * other CPU. These will not be required to pass through another + * grace period: They are done, regardless of CPU. */ - if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) { - *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL]; - rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL]; - *rdp->nxttail[RCU_DONE_TAIL] = NULL; + if (rdp->nxtlist != NULL && + rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) { + struct rcu_head *oldhead; + struct rcu_head **oldtail; + struct rcu_head **newtail; + + oldhead = rdp->nxtlist; + oldtail = receive_rdp->nxttail[RCU_DONE_TAIL]; + rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; + *rdp->nxttail[RCU_DONE_TAIL] = *oldtail; + *receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead; + newtail = rdp->nxttail[RCU_DONE_TAIL]; + for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) { + if (receive_rdp->nxttail[i] == oldtail) + receive_rdp->nxttail[i] = newtail; + if (rdp->nxttail[i] == newtail) + rdp->nxttail[i] = &rdp->nxtlist; + } } /* - * Then move the ready-to-invoke callbacks to the orphanage, - * where some other CPU will pick them up. These will not be - * required to pass though another grace period: They are done. + * Finally, put the rest of the callbacks at the end of the list. + * The ones that made it partway through get to start over: We + * cannot assume that grace periods are synchronized across CPUs. + * (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but + * this does not seem compelling. Not yet, anyway.) */ if (rdp->nxtlist != NULL) { - *rsp->orphan_donetail = rdp->nxtlist; - rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL]; - } - - /* Finally, initialize the rcu_data structure's list to empty. */ - rdp->nxtlist = NULL; - for (i = 0; i < RCU_NEXT_SIZE; i++) - rdp->nxttail[i] = &rdp->nxtlist; -} - -/* - * Adopt the RCU callbacks from the specified rcu_state structure's - * orphanage. The caller must hold the ->onofflock. - */ -static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) -{ - int i; - struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); - - /* - * If there is an rcu_barrier() operation in progress, then - * only the task doing that operation is permitted to adopt - * callbacks. To do otherwise breaks rcu_barrier() and friends - * by causing them to fail to wait for the callbacks in the - * orphanage. - */ - if (rsp->rcu_barrier_in_progress && - rsp->rcu_barrier_in_progress != current) - return; + *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; + receive_rdp->nxttail[RCU_NEXT_TAIL] = + rdp->nxttail[RCU_NEXT_TAIL]; + receive_rdp->n_cbs_adopted += rdp->qlen; + rdp->n_cbs_orphaned += rdp->qlen; - /* Do the accounting first. */ - rdp->qlen_lazy += rsp->qlen_lazy; - rdp->qlen += rsp->qlen; - rdp->n_cbs_adopted += rsp->qlen; - rsp->qlen_lazy = 0; - rsp->qlen = 0; + rdp->nxtlist = NULL; + for (i = 0; i < RCU_NEXT_SIZE; i++) + rdp->nxttail[i] = &rdp->nxtlist; + } /* - * We do not need a memory barrier here because the only way we - * can get here if there is an rcu_barrier() in flight is if - * we are the task doing the rcu_barrier(). + * Record a quiescent state for the dying CPU. This is safe + * only because we have already cleared out the callbacks. + * (Otherwise, the RCU core might try to schedule the invocation + * of callbacks on this now-offline CPU, which would be bad.) */ - - /* First adopt the ready-to-invoke callbacks. */ - if (rsp->orphan_donelist != NULL) { - *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL]; - *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist; - for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--) - if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) - rdp->nxttail[i] = rsp->orphan_donetail; - rsp->orphan_donelist = NULL; - rsp->orphan_donetail = &rsp->orphan_donelist; - } - - /* And then adopt the callbacks that still need a grace period. */ - if (rsp->orphan_nxtlist != NULL) { - *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist; - rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail; - rsp->orphan_nxtlist = NULL; - rsp->orphan_nxttail = &rsp->orphan_nxtlist; - } -} - -/* - * Trace the fact that this CPU is going offline. - */ -static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) -{ - RCU_TRACE(unsigned long mask); - RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda)); - RCU_TRACE(struct rcu_node *rnp = rdp->mynode); - - RCU_TRACE(mask = rdp->grpmask); + mask = rdp->grpmask; /* rnp->grplo is constant. */ trace_rcu_grace_period(rsp->name, rnp->gpnum + 1 - !!(rnp->qsmask & mask), "cpuofl"); + rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum); + /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */ } /* * The CPU has been completely removed, and some other CPU is reporting - * this fact from process context. Do the remainder of the cleanup, - * including orphaning the outgoing CPU's RCU callbacks, and also - * adopting them, if there is no _rcu_barrier() instance running. + * this fact from process context. Do the remainder of the cleanup. * There can only be one CPU hotplug operation at a time, so no other * CPU can be attempting to update rcu_cpu_kthread_task. */ @@ -1456,21 +1409,17 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) unsigned long mask; int need_report = 0; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); - struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ + struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rnp. */ /* Adjust any no-longer-needed kthreads. */ rcu_stop_cpu_kthread(cpu); rcu_node_kthread_setaffinity(rnp, -1); - /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ + /* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */ /* Exclude any attempts to start a new grace period. */ raw_spin_lock_irqsave(&rsp->onofflock, flags); - /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ - rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); - rcu_adopt_orphan_cbs(rsp); - /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ mask = rdp->grpmask; /* rnp->grplo is constant. */ do { @@ -1507,10 +1456,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) #else /* #ifdef CONFIG_HOTPLUG_CPU */ -static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) -{ -} - static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) { } @@ -1579,6 +1524,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) rcu_is_callbacks_kthread()); /* Update count, and requeue any remaining callbacks. */ + rdp->qlen_lazy -= count_lazy; + rdp->qlen -= count; + rdp->n_cbs_invoked += count; if (list != NULL) { *tail = rdp->nxtlist; rdp->nxtlist = list; @@ -1588,10 +1536,6 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) else break; } - smp_mb(); /* List handling before counting for rcu_barrier(). */ - rdp->qlen_lazy -= count_lazy; - rdp->qlen -= count; - rdp->n_cbs_invoked += count; /* Reinstate batch limit if we have worked down the excess. */ if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) @@ -1880,14 +1824,11 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), rdp = this_cpu_ptr(rsp->rda); /* Add the callback to our list. */ + *rdp->nxttail[RCU_NEXT_TAIL] = head; + rdp->nxttail[RCU_NEXT_TAIL] = &head->next; rdp->qlen++; if (lazy) rdp->qlen_lazy++; - else - rcu_idle_count_callbacks_posted(); - smp_mb(); /* Count before adding callback for rcu_barrier(). */ - *rdp->nxttail[RCU_NEXT_TAIL] = head; - rdp->nxttail[RCU_NEXT_TAIL] = &head->next; if (__is_kfree_rcu_offset((unsigned long)func)) trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, @@ -2226,10 +2167,11 @@ static int rcu_cpu_has_callbacks(int cpu) rcu_preempt_cpu_has_callbacks(cpu); } -/* - * RCU callback function for _rcu_barrier(). If we are last, wake - * up the task executing _rcu_barrier(). - */ +static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; +static atomic_t rcu_barrier_cpu_count; +static DEFINE_MUTEX(rcu_barrier_mutex); +static struct completion rcu_barrier_completion; + static void rcu_barrier_callback(struct rcu_head *notused) { if (atomic_dec_and_test(&rcu_barrier_cpu_count)) @@ -2259,94 +2201,27 @@ static void _rcu_barrier(struct rcu_state *rsp, void (*call_rcu_func)(struct rcu_head *head, void (*func)(struct rcu_head *head))) { - int cpu; - unsigned long flags; - struct rcu_data *rdp; - struct rcu_head rh; - - init_rcu_head_on_stack(&rh); - + BUG_ON(in_interrupt()); /* Take mutex to serialize concurrent rcu_barrier() requests. */ mutex_lock(&rcu_barrier_mutex); - - smp_mb(); /* Prevent any prior operations from leaking in. */ - - /* - * Initialize the count to one rather than to zero in order to - * avoid a too-soon return to zero in case of a short grace period - * (or preemption of this task). Also flag this task as doing - * an rcu_barrier(). This will prevent anyone else from adopting - * orphaned callbacks, which could cause otherwise failure if a - * CPU went offline and quickly came back online. To see this, - * consider the following sequence of events: - * - * 1. We cause CPU 0 to post an rcu_barrier_callback() callback. - * 2. CPU 1 goes offline, orphaning its callbacks. - * 3. CPU 0 adopts CPU 1's orphaned callbacks. - * 4. CPU 1 comes back online. - * 5. We cause CPU 1 to post an rcu_barrier_callback() callback. - * 6. Both rcu_barrier_callback() callbacks are invoked, awakening - * us -- but before CPU 1's orphaned callbacks are invoked!!! - */ init_completion(&rcu_barrier_completion); - atomic_set(&rcu_barrier_cpu_count, 1); - raw_spin_lock_irqsave(&rsp->onofflock, flags); - rsp->rcu_barrier_in_progress = current; - raw_spin_unlock_irqrestore(&rsp->onofflock, flags); - - /* - * Force every CPU with callbacks to register a new callback - * that will tell us when all the preceding callbacks have - * been invoked. If an offline CPU has callbacks, wait for - * it to either come back online or to finish orphaning those - * callbacks. - */ - for_each_possible_cpu(cpu) { - preempt_disable(); - rdp = per_cpu_ptr(rsp->rda, cpu); - if (cpu_is_offline(cpu)) { - preempt_enable(); - while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) - schedule_timeout_interruptible(1); - } else if (ACCESS_ONCE(rdp->qlen)) { - smp_call_function_single(cpu, rcu_barrier_func, - (void *)call_rcu_func, 1); - preempt_enable(); - } else { - preempt_enable(); - } - } - - /* - * Now that all online CPUs have rcu_barrier_callback() callbacks - * posted, we can adopt all of the orphaned callbacks and place - * an rcu_barrier_callback() callback after them. When that is done, - * we are guaranteed to have an rcu_barrier_callback() callback - * following every callback that could possibly have been - * registered before _rcu_barrier() was called. - */ - raw_spin_lock_irqsave(&rsp->onofflock, flags); - rcu_adopt_orphan_cbs(rsp); - rsp->rcu_barrier_in_progress = NULL; - raw_spin_unlock_irqrestore(&rsp->onofflock, flags); - atomic_inc(&rcu_barrier_cpu_count); - smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ - call_rcu_func(&rh, rcu_barrier_callback); - /* - * Now that we have an rcu_barrier_callback() callback on each - * CPU, and thus each counted, remove the initial count. + * Initialize rcu_barrier_cpu_count to 1, then invoke + * rcu_barrier_func() on each CPU, so that each CPU also has + * incremented rcu_barrier_cpu_count. Only then is it safe to + * decrement rcu_barrier_cpu_count -- otherwise the first CPU + * might complete its grace period before all of the other CPUs + * did their increment, causing this function to return too + * early. Note that on_each_cpu() disables irqs, which prevents + * any CPUs from coming online or going offline until each online + * CPU has queued its RCU-barrier callback. */ + atomic_set(&rcu_barrier_cpu_count, 1); + on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); if (atomic_dec_and_test(&rcu_barrier_cpu_count)) complete(&rcu_barrier_completion); - - /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ wait_for_completion(&rcu_barrier_completion); - - /* Other rcu_barrier() invocations can now safely proceed. */ mutex_unlock(&rcu_barrier_mutex); - - destroy_rcu_head_on_stack(&rh); } /** diff --git a/trunk/kernel/rcutree.h b/trunk/kernel/rcutree.h index 1e49c5685960..cdd1be0a4072 100644 --- a/trunk/kernel/rcutree.h +++ b/trunk/kernel/rcutree.h @@ -371,17 +371,6 @@ struct rcu_state { raw_spinlock_t onofflock; /* exclude on/offline and */ /* starting new GP. */ - struct rcu_head *orphan_nxtlist; /* Orphaned callbacks that */ - /* need a grace period. */ - struct rcu_head **orphan_nxttail; /* Tail of above. */ - struct rcu_head *orphan_donelist; /* Orphaned callbacks that */ - /* are ready to invoke. */ - struct rcu_head **orphan_donetail; /* Tail of above. */ - long qlen_lazy; /* Number of lazy callbacks. */ - long qlen; /* Total number of callbacks. */ - struct task_struct *rcu_barrier_in_progress; - /* Task doing rcu_barrier(), */ - /* or NULL if no barrier. */ raw_spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ @@ -482,7 +471,6 @@ static void __cpuinit rcu_prepare_kthreads(int cpu); static void rcu_prepare_for_idle_init(int cpu); static void rcu_cleanup_after_idle(int cpu); static void rcu_prepare_for_idle(int cpu); -static void rcu_idle_count_callbacks_posted(void); static void print_cpu_stall_info_begin(void); static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); static void print_cpu_stall_info_end(void); diff --git a/trunk/kernel/rcutree_plugin.h b/trunk/kernel/rcutree_plugin.h index 7082ea93566f..c023464816be 100644 --- a/trunk/kernel/rcutree_plugin.h +++ b/trunk/kernel/rcutree_plugin.h @@ -1938,14 +1938,6 @@ static void rcu_prepare_for_idle(int cpu) { } -/* - * Don't bother keeping a running count of the number of RCU callbacks - * posted because CONFIG_RCU_FAST_NO_HZ=n. - */ -static void rcu_idle_count_callbacks_posted(void) -{ -} - #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ /* @@ -1986,20 +1978,11 @@ static void rcu_idle_count_callbacks_posted(void) #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ -/* Loop counter for rcu_prepare_for_idle(). */ static DEFINE_PER_CPU(int, rcu_dyntick_drain); -/* If rcu_dyntick_holdoff==jiffies, don't try to enter dyntick-idle mode. */ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); -/* Timer to awaken the CPU if it enters dyntick-idle mode with callbacks. */ -static DEFINE_PER_CPU(struct timer_list, rcu_idle_gp_timer); -/* Scheduled expiry time for rcu_idle_gp_timer to allow reposting. */ -static DEFINE_PER_CPU(unsigned long, rcu_idle_gp_timer_expires); -/* Enable special processing on first attempt to enter dyntick-idle mode. */ -static DEFINE_PER_CPU(bool, rcu_idle_first_pass); -/* Running count of non-lazy callbacks posted, never decremented. */ -static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted); -/* Snapshot of rcu_nonlazy_posted to detect meaningful exits from idle. */ -static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted_snap); +static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer); +static ktime_t rcu_idle_gp_wait; /* If some non-lazy callbacks. */ +static ktime_t rcu_idle_lazy_gp_wait; /* If only lazy callbacks. */ /* * Allow the CPU to enter dyntick-idle mode if either: (1) There are no @@ -2012,8 +1995,6 @@ static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted_snap); */ int rcu_needs_cpu(int cpu) { - /* Flag a new idle sojourn to the idle-entry state machine. */ - per_cpu(rcu_idle_first_pass, cpu) = 1; /* If no callbacks, RCU doesn't need the CPU. */ if (!rcu_cpu_has_callbacks(cpu)) return 0; @@ -2063,35 +2044,17 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu) rcu_preempt_cpu_has_nonlazy_callbacks(cpu); } -/* - * Handler for smp_call_function_single(). The only point of this - * handler is to wake the CPU up, so the handler does only tracing. - */ -void rcu_idle_demigrate(void *unused) -{ - trace_rcu_prep_idle("Demigrate"); -} - /* * Timer handler used to force CPU to start pushing its remaining RCU * callbacks in the case where it entered dyntick-idle mode with callbacks * pending. The hander doesn't really need to do anything because the * real work is done upon re-entry to idle, or by the next scheduling-clock * interrupt should idle not be re-entered. - * - * One special case: the timer gets migrated without awakening the CPU - * on which the timer was scheduled on. In this case, we must wake up - * that CPU. We do so with smp_call_function_single(). */ -static void rcu_idle_gp_timer_func(unsigned long cpu_in) +static enum hrtimer_restart rcu_idle_gp_timer_func(struct hrtimer *hrtp) { - int cpu = (int)cpu_in; - trace_rcu_prep_idle("Timer"); - if (cpu != smp_processor_id()) - smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0); - else - WARN_ON_ONCE(1); /* Getting here can hang the system... */ + return HRTIMER_NORESTART; } /* @@ -2099,11 +2062,19 @@ static void rcu_idle_gp_timer_func(unsigned long cpu_in) */ static void rcu_prepare_for_idle_init(int cpu) { - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; - setup_timer(&per_cpu(rcu_idle_gp_timer, cpu), - rcu_idle_gp_timer_func, cpu); - per_cpu(rcu_idle_gp_timer_expires, cpu) = jiffies - 1; - per_cpu(rcu_idle_first_pass, cpu) = 1; + static int firsttime = 1; + struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu); + + hrtimer_init(hrtp, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtp->function = rcu_idle_gp_timer_func; + if (firsttime) { + unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY); + + rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000); + upj = jiffies_to_usecs(RCU_IDLE_LAZY_GP_DELAY); + rcu_idle_lazy_gp_wait = ns_to_ktime(upj * (u64)1000); + firsttime = 0; + } } /* @@ -2113,8 +2084,7 @@ static void rcu_prepare_for_idle_init(int cpu) */ static void rcu_cleanup_after_idle(int cpu) { - del_timer(&per_cpu(rcu_idle_gp_timer, cpu)); - trace_rcu_prep_idle("Cleanup after idle"); + hrtimer_cancel(&per_cpu(rcu_idle_gp_timer, cpu)); } /* @@ -2138,29 +2108,6 @@ static void rcu_cleanup_after_idle(int cpu) */ static void rcu_prepare_for_idle(int cpu) { - struct timer_list *tp; - - /* - * If this is an idle re-entry, for example, due to use of - * RCU_NONIDLE() or the new idle-loop tracing API within the idle - * loop, then don't take any state-machine actions, unless the - * momentary exit from idle queued additional non-lazy callbacks. - * Instead, repost the rcu_idle_gp_timer if this CPU has callbacks - * pending. - */ - if (!per_cpu(rcu_idle_first_pass, cpu) && - (per_cpu(rcu_nonlazy_posted, cpu) == - per_cpu(rcu_nonlazy_posted_snap, cpu))) { - if (rcu_cpu_has_callbacks(cpu)) { - tp = &per_cpu(rcu_idle_gp_timer, cpu); - mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); - } - return; - } - per_cpu(rcu_idle_first_pass, cpu) = 0; - per_cpu(rcu_nonlazy_posted_snap, cpu) = - per_cpu(rcu_nonlazy_posted, cpu) - 1; - /* * If there are no callbacks on this CPU, enter dyntick-idle mode. * Also reset state to avoid prejudicing later attempts. @@ -2193,15 +2140,11 @@ static void rcu_prepare_for_idle(int cpu) per_cpu(rcu_dyntick_drain, cpu) = 0; per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; if (rcu_cpu_has_nonlazy_callbacks(cpu)) - per_cpu(rcu_idle_gp_timer_expires, cpu) = - jiffies + RCU_IDLE_GP_DELAY; + hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), + rcu_idle_gp_wait, HRTIMER_MODE_REL); else - per_cpu(rcu_idle_gp_timer_expires, cpu) = - jiffies + RCU_IDLE_LAZY_GP_DELAY; - tp = &per_cpu(rcu_idle_gp_timer, cpu); - mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); - per_cpu(rcu_nonlazy_posted_snap, cpu) = - per_cpu(rcu_nonlazy_posted, cpu); + hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), + rcu_idle_lazy_gp_wait, HRTIMER_MODE_REL); return; /* Nothing more to do immediately. */ } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { /* We have hit the limit, so time to give up. */ @@ -2241,19 +2184,6 @@ static void rcu_prepare_for_idle(int cpu) trace_rcu_prep_idle("Callbacks drained"); } -/* - * Keep a running count of the number of non-lazy callbacks posted - * on this CPU. This running counter (which is never decremented) allows - * rcu_prepare_for_idle() to detect when something out of the idle loop - * posts a callback, even if an equal number of callbacks are invoked. - * Of course, callbacks should only be posted from within a trace event - * designed to be called from idle or from within RCU_NONIDLE(). - */ -static void rcu_idle_count_callbacks_posted(void) -{ - __this_cpu_add(rcu_nonlazy_posted, 1); -} - #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ #ifdef CONFIG_RCU_CPU_STALL_INFO @@ -2262,12 +2192,14 @@ static void rcu_idle_count_callbacks_posted(void) static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { - struct timer_list *tltp = &per_cpu(rcu_idle_gp_timer, cpu); + struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu); - sprintf(cp, "drain=%d %c timer=%lu", + sprintf(cp, "drain=%d %c timer=%lld", per_cpu(rcu_dyntick_drain, cpu), per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.', - timer_pending(tltp) ? tltp->expires - jiffies : -1); + hrtimer_active(hrtp) + ? ktime_to_us(hrtimer_get_remaining(hrtp)) + : -1); } #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ diff --git a/trunk/kernel/rcutree_trace.c b/trunk/kernel/rcutree_trace.c index d4bc16ddd1d4..ed459edeff43 100644 --- a/trunk/kernel/rcutree_trace.c +++ b/trunk/kernel/rcutree_trace.c @@ -271,13 +271,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) gpnum = rsp->gpnum; seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " - "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", + "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", rsp->completed, gpnum, rsp->fqs_state, (long)(rsp->jiffies_force_qs - jiffies), (int)(jiffies & 0xffff), rsp->n_force_qs, rsp->n_force_qs_ngp, rsp->n_force_qs - rsp->n_force_qs_ngp, - rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); + rsp->n_force_qs_lh); for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { if (rnp->level != level) { seq_puts(m, "\n"); diff --git a/trunk/lib/list_debug.c b/trunk/lib/list_debug.c index 982b850d4e7a..3810b481f940 100644 --- a/trunk/lib/list_debug.c +++ b/trunk/lib/list_debug.c @@ -10,6 +10,7 @@ #include #include #include +#include /* * Insert a new entry between two known consecutive entries. @@ -75,3 +76,24 @@ void list_del(struct list_head *entry) entry->prev = LIST_POISON2; } EXPORT_SYMBOL(list_del); + +/* + * RCU variants. + */ +void __list_add_rcu(struct list_head *new, + struct list_head *prev, struct list_head *next) +{ + WARN(next->prev != prev, + "list_add_rcu corruption. next->prev should be " + "prev (%p), but was %p. (next=%p).\n", + prev, next->prev, next); + WARN(prev->next != next, + "list_add_rcu corruption. prev->next should be " + "next (%p), but was %p. (prev=%p).\n", + next, prev->next, prev); + new->next = next; + new->prev = prev; + rcu_assign_pointer(list_next_rcu(prev), new); + next->prev = new; +} +EXPORT_SYMBOL(__list_add_rcu);