Skip to content

Commit

Permalink
Merge branch 'rcu/urgent' of git://git.kernel.org/pub/scm/linux/kerne…
Browse files Browse the repository at this point in the history
…l/git/paulmck/linux-rcu into rcu/urgent

Pull an urgent RCU fix from Paul E. McKenney:

 "This series contains a pair of commits that permit RCU synchronous grace
  periods (synchronize_rcu() and friends) to work correctly throughout boot.
  This eliminates the current "dead time" starting when the scheduler spawns
  its first taks and ending when the last of RCU's kthreads is spawned
  (this last happens during early_initcall() time).  Although RCU's
  synchronous grace periods have long been documented as not working
  during this time, prior to 4.9, the expedited grace periods worked by
  accident, and some ACPI code came to rely on this unintentional behavior.
  (Note that this unintentional behavior was -not- reliable.  For example,
  failures from ACPI could occur on !SMP systems and on systems booting
  with the rcu_normal kernel boot parameter.)

  Either way, there is a bug that needs fixing, and the 4.9 switch of RCU's
  expedited grace periods to workqueues could be considered to have caused
  a regression.  This series therefore makes RCU's expedited grace periods
  operate correctly throughout the boot process.  This has been demonstrated
  to fix the problems ACPI was encountering, and has the added longer-term
  benefit of simplifying RCU's behavior."

Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Jan 16, 2017
2 parents f4d3935 + 52d7e48 commit 3e4f7a4
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 39 deletions.
4 changes: 4 additions & 0 deletions include/linux/rcupdate.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,10 @@ bool __rcu_is_watching(void);
#error "Unknown RCU implementation specified to kernel configuration"
#endif

#define RCU_SCHEDULER_INACTIVE 0
#define RCU_SCHEDULER_INIT 1
#define RCU_SCHEDULER_RUNNING 2

/*
* init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
* initialization and destruction of rcu_head on the stack. rcu_head structures
Expand Down
1 change: 1 addition & 0 deletions kernel/rcu/rcu.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ int rcu_jiffies_till_stall_check(void);
#define TPS(x) tracepoint_string(x)

void rcu_early_boot_tests(void);
void rcu_test_sync_prims(void);

/*
* This function really isn't for public consumption, but RCU is special in
Expand Down
4 changes: 0 additions & 4 deletions kernel/rcu/tiny.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,17 +185,13 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused
* benefits of doing might_sleep() to reduce latency.)
*
* Cool, huh? (Due to Josh Triplett.)
*
* But we want to make this a static inline later. The cond_resched()
* currently makes this problematic.
*/
void synchronize_sched(void)
{
RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
lock_is_held(&rcu_lock_map) ||
lock_is_held(&rcu_sched_lock_map),
"Illegal synchronize_sched() in RCU read-side critical section");
cond_resched();
}
EXPORT_SYMBOL_GPL(synchronize_sched);

Expand Down
9 changes: 7 additions & 2 deletions kernel/rcu/tiny_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,17 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active);

/*
* During boot, we forgive RCU lockdep issues. After this function is
* invoked, we start taking RCU lockdep issues seriously.
* invoked, we start taking RCU lockdep issues seriously. Note that unlike
* Tree RCU, Tiny RCU transitions directly from RCU_SCHEDULER_INACTIVE
* to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage.
* The reason for this is that Tiny RCU does not need kthreads, so does
* not have to care about the fact that the scheduler is half-initialized
* at a certain phase of the boot process.
*/
void __init rcu_scheduler_starting(void)
{
WARN_ON(nr_context_switches() > 0);
rcu_scheduler_active = 1;
rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
}

#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
Expand Down
33 changes: 20 additions & 13 deletions kernel/rcu/tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,16 @@ int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
int sysctl_panic_on_rcu_stall __read_mostly;

/*
* The rcu_scheduler_active variable transitions from zero to one just
* before the first task is spawned. So when this variable is zero, RCU
* can assume that there is but one task, allowing RCU to (for example)
* The rcu_scheduler_active variable is initialized to the value
* RCU_SCHEDULER_INACTIVE and transitions RCU_SCHEDULER_INIT just before the
* first task is spawned. So when this variable is RCU_SCHEDULER_INACTIVE,
* RCU can assume that there is but one task, allowing RCU to (for example)
* optimize synchronize_rcu() to a simple barrier(). When this variable
* is one, RCU must actually do all the hard work required to detect real
* grace periods. This variable is also used to suppress boot-time false
* positives from lockdep-RCU error checking.
* is RCU_SCHEDULER_INIT, RCU must actually do all the hard work required
* to detect real grace periods. This variable is also used to suppress
* boot-time false positives from lockdep-RCU error checking. Finally, it
* transitions from RCU_SCHEDULER_INIT to RCU_SCHEDULER_RUNNING after RCU
* is fully initialized, including all of its kthreads having been spawned.
*/
int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
Expand Down Expand Up @@ -3980,18 +3983,22 @@ static int __init rcu_spawn_gp_kthread(void)
early_initcall(rcu_spawn_gp_kthread);

/*
* This function is invoked towards the end of the scheduler's initialization
* process. Before this is called, the idle task might contain
* RCU read-side critical sections (during which time, this idle
* task is booting the system). After this function is called, the
* idle tasks are prohibited from containing RCU read-side critical
* sections. This function also enables RCU lockdep checking.
* This function is invoked towards the end of the scheduler's
* initialization process. Before this is called, the idle task might
* contain synchronous grace-period primitives (during which time, this idle
* task is booting the system, and such primitives are no-ops). After this
* function is called, any synchronous grace-period primitives are run as
* expedited, with the requesting task driving the grace period forward.
* A later core_initcall() rcu_exp_runtime_mode() will switch to full
* runtime RCU functionality.
*/
void rcu_scheduler_starting(void)
{
WARN_ON(num_online_cpus() != 1);
WARN_ON(nr_context_switches() > 0);
rcu_scheduler_active = 1;
rcu_test_sync_prims();
rcu_scheduler_active = RCU_SCHEDULER_INIT;
rcu_test_sync_prims();
}

/*
Expand Down
52 changes: 41 additions & 11 deletions kernel/rcu/tree_exp.h
Original file line number Diff line number Diff line change
Expand Up @@ -531,19 +531,29 @@ struct rcu_exp_work {
struct work_struct rew_work;
};

/*
* Common code to drive an expedited grace period forward, used by
* workqueues and mid-boot-time tasks.
*/
static void rcu_exp_sel_wait_wake(struct rcu_state *rsp,
smp_call_func_t func, unsigned long s)
{
/* Initialize the rcu_node tree in preparation for the wait. */
sync_rcu_exp_select_cpus(rsp, func);

/* Wait and clean up, including waking everyone. */
rcu_exp_wait_wake(rsp, s);
}

/*
* Work-queue handler to drive an expedited grace period forward.
*/
static void wait_rcu_exp_gp(struct work_struct *wp)
{
struct rcu_exp_work *rewp;

/* Initialize the rcu_node tree in preparation for the wait. */
rewp = container_of(wp, struct rcu_exp_work, rew_work);
sync_rcu_exp_select_cpus(rewp->rew_rsp, rewp->rew_func);

/* Wait and clean up, including waking everyone. */
rcu_exp_wait_wake(rewp->rew_rsp, rewp->rew_s);
rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s);
}

/*
Expand All @@ -569,12 +579,18 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
if (exp_funnel_lock(rsp, s))
return; /* Someone else did our work for us. */

/* Marshall arguments and schedule the expedited grace period. */
rew.rew_func = func;
rew.rew_rsp = rsp;
rew.rew_s = s;
INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
schedule_work(&rew.rew_work);
/* Ensure that load happens before action based on it. */
if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) {
/* Direct call during scheduler init and early_initcalls(). */
rcu_exp_sel_wait_wake(rsp, func, s);
} else {
/* Marshall arguments & schedule the expedited grace period. */
rew.rew_func = func;
rew.rew_rsp = rsp;
rew.rew_s = s;
INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
schedule_work(&rew.rew_work);
}

/* Wait for expedited grace period to complete. */
rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
Expand Down Expand Up @@ -676,6 +692,8 @@ void synchronize_rcu_expedited(void)
{
struct rcu_state *rsp = rcu_state_p;

if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
return;
_synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
Expand All @@ -693,3 +711,15 @@ void synchronize_rcu_expedited(void)
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);

#endif /* #else #ifdef CONFIG_PREEMPT_RCU */

/*
* Switch to run-time mode once Tree RCU has fully initialized.
*/
static int __init rcu_exp_runtime_mode(void)
{
rcu_test_sync_prims();
rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
rcu_test_sync_prims();
return 0;
}
core_initcall(rcu_exp_runtime_mode);
2 changes: 1 addition & 1 deletion kernel/rcu/tree_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ void synchronize_rcu(void)
lock_is_held(&rcu_lock_map) ||
lock_is_held(&rcu_sched_lock_map),
"Illegal synchronize_rcu() in RCU read-side critical section");
if (!rcu_scheduler_active)
if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
return;
if (rcu_gp_is_expedited())
synchronize_rcu_expedited();
Expand Down
38 changes: 30 additions & 8 deletions kernel/rcu/update.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,14 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held);
* Should expedited grace-period primitives always fall back to their
* non-expedited counterparts? Intended for use within RCU. Note
* that if the user specifies both rcu_expedited and rcu_normal, then
* rcu_normal wins.
* rcu_normal wins. (Except during the time period during boot from
* when the first task is spawned until the rcu_exp_runtime_mode()
* core_initcall() is invoked, at which point everything is expedited.)
*/
bool rcu_gp_is_normal(void)
{
return READ_ONCE(rcu_normal);
return READ_ONCE(rcu_normal) &&
rcu_scheduler_active != RCU_SCHEDULER_INIT;
}
EXPORT_SYMBOL_GPL(rcu_gp_is_normal);

Expand All @@ -135,13 +138,14 @@ static atomic_t rcu_expedited_nesting =
/*
* Should normal grace-period primitives be expedited? Intended for
* use within RCU. Note that this function takes the rcu_expedited
* sysfs/boot variable into account as well as the rcu_expedite_gp()
* nesting. So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited()
* returns false is a -really- bad idea.
* sysfs/boot variable and rcu_scheduler_active into account as well
* as the rcu_expedite_gp() nesting. So looping on rcu_unexpedite_gp()
* until rcu_gp_is_expedited() returns false is a -really- bad idea.
*/
bool rcu_gp_is_expedited(void)
{
return rcu_expedited || atomic_read(&rcu_expedited_nesting);
return rcu_expedited || atomic_read(&rcu_expedited_nesting) ||
rcu_scheduler_active == RCU_SCHEDULER_INIT;
}
EXPORT_SYMBOL_GPL(rcu_gp_is_expedited);

Expand Down Expand Up @@ -257,7 +261,7 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);

int notrace debug_lockdep_rcu_enabled(void)
{
return rcu_scheduler_active && debug_locks &&
return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
current->lockdep_recursion == 0;
}
EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
Expand Down Expand Up @@ -591,7 +595,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks);
void synchronize_rcu_tasks(void)
{
/* Complain if the scheduler has not started. */
RCU_LOCKDEP_WARN(!rcu_scheduler_active,
RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
"synchronize_rcu_tasks called too soon");

/* Wait for the grace period. */
Expand Down Expand Up @@ -813,6 +817,23 @@ static void rcu_spawn_tasks_kthread(void)

#endif /* #ifdef CONFIG_TASKS_RCU */

/*
* Test each non-SRCU synchronous grace-period wait API. This is
* useful just after a change in mode for these primitives, and
* during early boot.
*/
void rcu_test_sync_prims(void)
{
if (!IS_ENABLED(CONFIG_PROVE_RCU))
return;
synchronize_rcu();
synchronize_rcu_bh();
synchronize_sched();
synchronize_rcu_expedited();
synchronize_rcu_bh_expedited();
synchronize_sched_expedited();
}

#ifdef CONFIG_PROVE_RCU

/*
Expand Down Expand Up @@ -865,6 +886,7 @@ void rcu_early_boot_tests(void)
early_boot_test_call_rcu_bh();
if (rcu_self_test_sched)
early_boot_test_call_rcu_sched();
rcu_test_sync_prims();
}

static int rcu_verify_early_boot_tests(void)
Expand Down

0 comments on commit 3e4f7a4

Please sign in to comment.