diff --git a/[refs] b/[refs] index ef88aa3923f6..f60fdf99b7dc 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 048a0e8f5e1d94c01a5fc70f5b2f2fd2f4527326 +refs/heads/master: 616c310e83b872024271c915c1b9ab505b9efad9 diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index ab84a01c8d68..c1601e5a8b71 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -2330,100 +2330,18 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ramdisk_size= [RAM] Sizes of RAM disks in kilobytes See Documentation/blockdev/ramdisk.txt. - rcutree.blimit= [KNL,BOOT] + rcupdate.blimit= [KNL,BOOT] Set maximum number of finished RCU callbacks to process in one batch. - rcutree.qhimark= [KNL,BOOT] + rcupdate.qhimark= [KNL,BOOT] Set threshold of queued RCU callbacks over which batch limiting is disabled. - rcutree.qlowmark= [KNL,BOOT] + rcupdate.qlowmark= [KNL,BOOT] Set threshold of queued RCU callbacks below which batch limiting is re-enabled. - rcutree.rcu_cpu_stall_suppress= [KNL,BOOT] - Suppress RCU CPU stall warning messages. - - rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] - Set timeout for RCU CPU stall warning messages. - - rcutorture.fqs_duration= [KNL,BOOT] - Set duration of force_quiescent_state bursts. - - rcutorture.fqs_holdoff= [KNL,BOOT] - Set holdoff time within force_quiescent_state bursts. - - rcutorture.fqs_stutter= [KNL,BOOT] - Set wait time between force_quiescent_state bursts. - - rcutorture.irqreader= [KNL,BOOT] - Test RCU readers from irq handlers. - - rcutorture.n_barrier_cbs= [KNL,BOOT] - Set callbacks/threads for rcu_barrier() testing. - - rcutorture.nfakewriters= [KNL,BOOT] - Set number of concurrent RCU writers. These just - stress RCU, they don't participate in the actual - test, hence the "fake". - - rcutorture.nreaders= [KNL,BOOT] - Set number of RCU readers. - - rcutorture.onoff_holdoff= [KNL,BOOT] - Set time (s) after boot for CPU-hotplug testing. - - rcutorture.onoff_interval= [KNL,BOOT] - Set time (s) between CPU-hotplug operations, or - zero to disable CPU-hotplug testing. - - rcutorture.shuffle_interval= [KNL,BOOT] - Set task-shuffle interval (s). Shuffling tasks - allows some CPUs to go into dyntick-idle mode - during the rcutorture test. - - rcutorture.shutdown_secs= [KNL,BOOT] - Set time (s) after boot system shutdown. This - is useful for hands-off automated testing. - - rcutorture.stall_cpu= [KNL,BOOT] - Duration of CPU stall (s) to test RCU CPU stall - warnings, zero to disable. - - rcutorture.stall_cpu_holdoff= [KNL,BOOT] - Time to wait (s) after boot before inducing stall. - - rcutorture.stat_interval= [KNL,BOOT] - Time (s) between statistics printk()s. - - rcutorture.stutter= [KNL,BOOT] - Time (s) to stutter testing, for example, specifying - five seconds causes the test to run for five seconds, - wait for five seconds, and so on. This tests RCU's - ability to transition abruptly to and from idle. - - rcutorture.test_boost= [KNL,BOOT] - Test RCU priority boosting? 0=no, 1=maybe, 2=yes. - "Maybe" means test if the RCU implementation - under test support RCU priority boosting. - - rcutorture.test_boost_duration= [KNL,BOOT] - Duration (s) of each individual boost test. - - rcutorture.test_boost_interval= [KNL,BOOT] - Interval (s) between each boost test. - - rcutorture.test_no_idle_hz= [KNL,BOOT] - Test RCU's dyntick-idle handling. See also the - rcutorture.shuffle_interval parameter. - - rcutorture.torture_type= [KNL,BOOT] - Specify the RCU implementation to test. - - rcutorture.verbose= [KNL,BOOT] - Enable additional printk() statements. - rdinit= [KNL] Format: Run specified binary instead of /init from the ramdisk, diff --git a/trunk/arch/um/drivers/mconsole_kern.c b/trunk/arch/um/drivers/mconsole_kern.c index 43b39d61b538..88e466b159dc 100644 --- a/trunk/arch/um/drivers/mconsole_kern.c +++ b/trunk/arch/um/drivers/mconsole_kern.c @@ -705,6 +705,7 @@ static void stack_proc(void *arg) struct task_struct *from = current, *to = arg; to->thread.saved_task = from; + rcu_switch_from(from); switch_to(from, to, from); } diff --git a/trunk/include/linux/rculist.h b/trunk/include/linux/rculist.h index e0f0fab20415..d079290843a9 100644 --- a/trunk/include/linux/rculist.h +++ b/trunk/include/linux/rculist.h @@ -30,7 +30,6 @@ * This is only for internal list manipulation where we know * the prev/next entries already! */ -#ifndef CONFIG_DEBUG_LIST static inline void __list_add_rcu(struct list_head *new, struct list_head *prev, struct list_head *next) { @@ -39,10 +38,6 @@ static inline void __list_add_rcu(struct list_head *new, rcu_assign_pointer(list_next_rcu(prev), new); next->prev = new; } -#else -extern void __list_add_rcu(struct list_head *new, - struct list_head *prev, struct list_head *next); -#endif /** * list_add_rcu - add a new entry to rcu-protected list @@ -113,7 +108,7 @@ static inline void list_add_tail_rcu(struct list_head *new, */ static inline void list_del_rcu(struct list_head *entry) { - __list_del_entry(entry); + __list_del(entry->prev, entry->next); entry->prev = LIST_POISON2; } @@ -233,43 +228,18 @@ static inline void list_splice_init_rcu(struct list_head *list, }) /** - * Where are list_empty_rcu() and list_first_entry_rcu()? - * - * Implementing those functions following their counterparts list_empty() and - * list_first_entry() is not advisable because they lead to subtle race - * conditions as the following snippet shows: - * - * if (!list_empty_rcu(mylist)) { - * struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member); - * do_something(bar); - * } - * - * The list may not be empty when list_empty_rcu checks it, but it may be when - * list_first_entry_rcu rereads the ->next pointer. - * - * Rereading the ->next pointer is not a problem for list_empty() and - * list_first_entry() because they would be protected by a lock that blocks - * writers. - * - * See list_first_or_null_rcu for an alternative. - */ - -/** - * list_first_or_null_rcu - get the first element from a list + * list_first_entry_rcu - get the first element from a list * @ptr: the list head to take the element from. * @type: the type of the struct this is embedded in. * @member: the name of the list_struct within the struct. * - * Note that if the list is empty, it returns NULL. + * Note, that list is expected to be not empty. * * This primitive may safely run concurrently with the _rcu list-mutation * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ -#define list_first_or_null_rcu(ptr, type, member) \ - ({struct list_head *__ptr = (ptr); \ - struct list_head __rcu *__next = list_next_rcu(__ptr); \ - likely(__ptr != __next) ? container_of(__next, type, member) : NULL; \ - }) +#define list_first_entry_rcu(ptr, type, member) \ + list_entry_rcu((ptr)->next, type, member) /** * list_for_each_entry_rcu - iterate over rcu list of given type diff --git a/trunk/include/linux/rcupdate.h b/trunk/include/linux/rcupdate.h index d5dfb109dfe1..bbfe7854a6a6 100644 --- a/trunk/include/linux/rcupdate.h +++ b/trunk/include/linux/rcupdate.h @@ -184,6 +184,7 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); +extern void rcu_preempt_note_context_switch(void); extern void rcu_check_callbacks(int cpu, int user); struct notifier_block; extern void rcu_idle_enter(void); @@ -922,21 +923,6 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) kfree_call_rcu(head, (rcu_callback)offset); } -/* - * Does the specified offset indicate that the corresponding rcu_head - * structure can be handled by kfree_rcu()? - */ -#define __is_kfree_rcu_offset(offset) ((offset) < 4096) - -/* - * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain. - */ -#define __kfree_rcu(head, offset) \ - do { \ - BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ - call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ - } while (0) - /** * kfree_rcu() - kfree an object after a grace period. * @ptr: pointer to kfree @@ -959,9 +945,6 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) * * Note that the allowable offset might decrease in the future, for example, * to allow something like kmem_cache_free_rcu(). - * - * The BUILD_BUG_ON check must not involve any function calls, hence the - * checks are done in macros here. */ #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) diff --git a/trunk/include/linux/rcutiny.h b/trunk/include/linux/rcutiny.h index e93df77176d1..080b5bdda28e 100644 --- a/trunk/include/linux/rcutiny.h +++ b/trunk/include/linux/rcutiny.h @@ -87,10 +87,6 @@ static inline void kfree_call_rcu(struct rcu_head *head, #ifdef CONFIG_TINY_RCU -static inline void rcu_preempt_note_context_switch(void) -{ -} - static inline void exit_rcu(void) { } @@ -102,7 +98,6 @@ static inline int rcu_needs_cpu(int cpu) #else /* #ifdef CONFIG_TINY_RCU */ -void rcu_preempt_note_context_switch(void); extern void exit_rcu(void); int rcu_preempt_needs_cpu(void); @@ -116,7 +111,6 @@ static inline int rcu_needs_cpu(int cpu) static inline void rcu_note_context_switch(int cpu) { rcu_sched_qs(cpu); - rcu_preempt_note_context_switch(); } /* diff --git a/trunk/include/linux/rcutree.h b/trunk/include/linux/rcutree.h index b06363055ef8..e8ee5dd0854c 100644 --- a/trunk/include/linux/rcutree.h +++ b/trunk/include/linux/rcutree.h @@ -98,6 +98,13 @@ extern void rcu_force_quiescent_state(void); extern void rcu_bh_force_quiescent_state(void); extern void rcu_sched_force_quiescent_state(void); +/* A context switch is a grace period for RCU-sched and RCU-bh. */ +static inline int rcu_blocking_is_gp(void) +{ + might_sleep(); /* Check for RCU read-side critical section. */ + return num_online_cpus() == 1; +} + extern void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index 81a173c0897d..8f3fd945070f 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -1905,12 +1905,22 @@ static inline void rcu_copy_process(struct task_struct *p) INIT_LIST_HEAD(&p->rcu_node_entry); } +static inline void rcu_switch_from(struct task_struct *prev) +{ + if (prev->rcu_read_lock_nesting != 0) + rcu_preempt_note_context_switch(); +} + #else static inline void rcu_copy_process(struct task_struct *p) { } +static inline void rcu_switch_from(struct task_struct *prev) +{ +} + #endif #ifdef CONFIG_SMP diff --git a/trunk/init/Kconfig b/trunk/init/Kconfig index 6d18ef8071b5..6cfd71d06463 100644 --- a/trunk/init/Kconfig +++ b/trunk/init/Kconfig @@ -458,33 +458,6 @@ config RCU_FANOUT Select a specific number if testing RCU itself. Take the default if unsure. -config RCU_FANOUT_LEAF - int "Tree-based hierarchical RCU leaf-level fanout value" - range 2 RCU_FANOUT if 64BIT - range 2 RCU_FANOUT if !64BIT - depends on TREE_RCU || TREE_PREEMPT_RCU - default 16 - help - This option controls the leaf-level fanout of hierarchical - implementations of RCU, and allows trading off cache misses - against lock contention. Systems that synchronize their - scheduling-clock interrupts for energy-efficiency reasons will - want the default because the smaller leaf-level fanout keeps - lock contention levels acceptably low. Very large systems - (hundreds or thousands of CPUs) will instead want to set this - value to the maximum value possible in order to reduce the - number of cache misses incurred during RCU's grace-period - initialization. These systems tend to run CPU-bound, and thus - are not helped by synchronized interrupts, and thus tend to - skew them, which reduces lock contention enough that large - leaf-level fanouts work well. - - Select a specific number if testing RCU itself. - - Select the maximum permissible value for large systems. - - Take the default if unsure. - config RCU_FANOUT_EXACT bool "Disable tree-based hierarchical RCU auto-balancing" depends on TREE_RCU || TREE_PREEMPT_RCU @@ -542,25 +515,10 @@ config RCU_BOOST_PRIO depends on RCU_BOOST default 1 help - This option specifies the real-time priority to which long-term - preempted RCU readers are to be boosted. If you are working - with a real-time application that has one or more CPU-bound - threads running at a real-time priority level, you should set - RCU_BOOST_PRIO to a priority higher then the highest-priority - real-time CPU-bound thread. The default RCU_BOOST_PRIO value - of 1 is appropriate in the common case, which is real-time - applications that do not have any CPU-bound threads. - - Some real-time applications might not have a single real-time - thread that saturates a given CPU, but instead might have - multiple real-time threads that, taken together, fully utilize - that CPU. In this case, you should set RCU_BOOST_PRIO to - a priority higher than the lowest-priority thread that is - conspiring to prevent the CPU from running any non-real-time - tasks. For example, if one thread at priority 10 and another - thread at priority 5 are between themselves fully consuming - the CPU time on a given CPU, then RCU_BOOST_PRIO should be - set to priority 6 or higher. + This option specifies the real-time priority to which preempted + RCU readers are to be boosted. If you are working with CPU-bound + real-time applications, you should specify a priority higher then + the highest-priority CPU-bound application. Specify the real-time priority, or take the default if unsure. diff --git a/trunk/kernel/rcutree.c b/trunk/kernel/rcutree.c index 8f6a344306e6..61351505ec78 100644 --- a/trunk/kernel/rcutree.c +++ b/trunk/kernel/rcutree.c @@ -192,7 +192,6 @@ void rcu_note_context_switch(int cpu) { trace_rcu_utilization("Start context switch"); rcu_sched_qs(cpu); - rcu_preempt_note_context_switch(cpu); trace_rcu_utilization("End context switch"); } EXPORT_SYMBOL_GPL(rcu_note_context_switch); @@ -1894,38 +1893,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu_bh); -/* - * Because a context switch is a grace period for RCU-sched and RCU-bh, - * any blocking grace-period wait automatically implies a grace period - * if there is only one CPU online at any point time during execution - * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to - * occasionally incorrectly indicate that there are multiple CPUs online - * when there was in fact only one the whole time, as this just adds - * some overhead: RCU still operates correctly. - * - * Of course, sampling num_online_cpus() with preemption enabled can - * give erroneous results if there are concurrent CPU-hotplug operations. - * For example, given a demonic sequence of preemptions in num_online_cpus() - * and CPU-hotplug operations, there could be two or more CPUs online at - * all times, but num_online_cpus() might well return one (or even zero). - * - * However, all such demonic sequences require at least one CPU-offline - * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer - * is only a problem if there is an RCU read-side critical section executing - * throughout. But RCU-sched and RCU-bh read-side critical sections - * disable either preemption or bh, which prevents a CPU from going offline. - * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return - * that there is only one CPU when in fact there was more than one throughout - * is when there were no RCU readers in the system. If there are no - * RCU readers, the grace period by definition can be of zero length, - * regardless of the number of online CPUs. - */ -static inline int rcu_blocking_is_gp(void) -{ - might_sleep(); /* Check for RCU read-side critical section. */ - return num_online_cpus() <= 1; -} - /** * synchronize_sched - wait until an rcu-sched grace period has elapsed. * @@ -2450,7 +2417,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) for (i = NUM_RCU_LVLS - 1; i > 0; i--) rsp->levelspread[i] = CONFIG_RCU_FANOUT; - rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; + rsp->levelspread[0] = RCU_FANOUT_LEAF; } #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ static void __init rcu_init_levelspread(struct rcu_state *rsp) diff --git a/trunk/kernel/rcutree.h b/trunk/kernel/rcutree.h index a905c200405c..d6b70b08a01a 100644 --- a/trunk/kernel/rcutree.h +++ b/trunk/kernel/rcutree.h @@ -29,14 +29,18 @@ #include /* - * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and - * CONFIG_RCU_FANOUT_LEAF. + * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. * In theory, it should be possible to add more levels straightforwardly. * In practice, this did work well going from three levels to four. * Of course, your mileage may vary. */ #define MAX_RCU_LVLS 4 -#define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF) +#if CONFIG_RCU_FANOUT > 16 +#define RCU_FANOUT_LEAF 16 +#else /* #if CONFIG_RCU_FANOUT > 16 */ +#define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT) +#endif /* #else #if CONFIG_RCU_FANOUT > 16 */ +#define RCU_FANOUT_1 (RCU_FANOUT_LEAF) #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) @@ -419,7 +423,6 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work); /* Forward declarations for rcutree_plugin.h */ static void rcu_bootup_announce(void); long rcu_batches_completed(void); -static void rcu_preempt_note_context_switch(int cpu); static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, diff --git a/trunk/kernel/rcutree_plugin.h b/trunk/kernel/rcutree_plugin.h index c023464816be..b1ac22e6fa31 100644 --- a/trunk/kernel/rcutree_plugin.h +++ b/trunk/kernel/rcutree_plugin.h @@ -153,7 +153,7 @@ static void rcu_preempt_qs(int cpu) * * Caller must disable preemption. */ -static void rcu_preempt_note_context_switch(int cpu) +void rcu_preempt_note_context_switch(void) { struct task_struct *t = current; unsigned long flags; @@ -164,7 +164,7 @@ static void rcu_preempt_note_context_switch(int cpu) (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { /* Possibly blocking in an RCU read-side critical section. */ - rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); + rdp = __this_cpu_ptr(rcu_preempt_state.rda); rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; @@ -228,7 +228,7 @@ static void rcu_preempt_note_context_switch(int cpu) * means that we continue to block the current grace period. */ local_irq_save(flags); - rcu_preempt_qs(cpu); + rcu_preempt_qs(smp_processor_id()); local_irq_restore(flags); } @@ -1017,14 +1017,6 @@ void rcu_force_quiescent_state(void) } EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); -/* - * Because preemptible RCU does not exist, we never have to check for - * CPUs being in quiescent states. - */ -static void rcu_preempt_note_context_switch(int cpu) -{ -} - /* * Because preemptible RCU does not exist, there are never any preempted * RCU readers. diff --git a/trunk/kernel/sched/core.c b/trunk/kernel/sched/core.c index 4603b9d8f30a..5d89eb93f7e4 100644 --- a/trunk/kernel/sched/core.c +++ b/trunk/kernel/sched/core.c @@ -2083,6 +2083,7 @@ context_switch(struct rq *rq, struct task_struct *prev, #endif /* Here we just switch the register state and the stack. */ + rcu_switch_from(prev); switch_to(prev, next, prev); barrier(); diff --git a/trunk/kernel/timer.c b/trunk/kernel/timer.c index 837c552fe838..a297ffcf888e 100644 --- a/trunk/kernel/timer.c +++ b/trunk/kernel/timer.c @@ -861,13 +861,7 @@ EXPORT_SYMBOL(mod_timer); * * mod_timer_pinned() is a way to update the expire field of an * active timer (if the timer is inactive it will be activated) - * and to ensure that the timer is scheduled on the current CPU. - * - * Note that this does not prevent the timer from being migrated - * when the current CPU goes offline. If this is a problem for - * you, use CPU-hotplug notifiers to handle it correctly, for - * example, cancelling the timer when the corresponding CPU goes - * offline. + * and not allow the timer to be migrated to a different CPU. * * mod_timer_pinned(timer, expires) is equivalent to: * diff --git a/trunk/lib/list_debug.c b/trunk/lib/list_debug.c index 3810b481f940..982b850d4e7a 100644 --- a/trunk/lib/list_debug.c +++ b/trunk/lib/list_debug.c @@ -10,7 +10,6 @@ #include #include #include -#include /* * Insert a new entry between two known consecutive entries. @@ -76,24 +75,3 @@ void list_del(struct list_head *entry) entry->prev = LIST_POISON2; } EXPORT_SYMBOL(list_del); - -/* - * RCU variants. - */ -void __list_add_rcu(struct list_head *new, - struct list_head *prev, struct list_head *next) -{ - WARN(next->prev != prev, - "list_add_rcu corruption. next->prev should be " - "prev (%p), but was %p. (next=%p).\n", - prev, next->prev, next); - WARN(prev->next != next, - "list_add_rcu corruption. prev->next should be " - "next (%p), but was %p. (prev=%p).\n", - next, prev->next, prev); - new->next = next; - new->prev = prev; - rcu_assign_pointer(list_next_rcu(prev), new); - next->prev = new; -} -EXPORT_SYMBOL(__list_add_rcu);