Skip to content

Commit

Permalink
rcu: Track idleness independent of idle tasks
Browse files Browse the repository at this point in the history
Earlier versions of RCU used the scheduling-clock tick to detect idleness
by checking for the idle task, but handled idleness differently for
CONFIG_NO_HZ=y.  But there are now a number of uses of RCU read-side
critical sections in the idle task, for example, for tracing.  A more
fine-grained detection of idleness is therefore required.

This commit presses the old dyntick-idle code into full-time service,
so that rcu_idle_enter(), previously known as rcu_enter_nohz(), is
always invoked at the beginning of an idle loop iteration.  Similarly,
rcu_idle_exit(), previously known as rcu_exit_nohz(), is always invoked
at the end of an idle-loop iteration.  This allows the idle task to
use RCU everywhere except between consecutive rcu_idle_enter() and
rcu_idle_exit() calls, in turn allowing architecture maintainers to
specify exactly where in the idle loop that RCU may be used.

Because some of the userspace upcall uses can result in what looks
to RCU like half of an interrupt, it is not possible to expect that
the irq_enter() and irq_exit() hooks will give exact counts.  This
patch therefore expands the ->dynticks_nesting counter to 64 bits
and uses two separate bitfields to count process/idle transitions
and interrupt entry/exit transitions.  It is presumed that userspace
upcalls do not happen in the idle loop or from usermode execution
(though usermode might do a system call that results in an upcall).
The counter is hard-reset on each process/idle transition, which
avoids the interrupt entry/exit error from accumulating.  Overflow
is avoided by the 64-bitness of the ->dyntick_nesting counter.

This commit also adds warnings if a non-idle task asks RCU to enter
idle state (and these checks will need some adjustment before applying
Frederic's OS-jitter patches (http://lkml.org/lkml/2011/10/7/246).
In addition, validation of ->dynticks and ->dynticks_nesting is added.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
  • Loading branch information
Paul E. McKenney authored and Paul E. McKenney committed Dec 11, 2011
1 parent b804cb9 commit 9b2e4f1
Show file tree
Hide file tree
Showing 10 changed files with 297 additions and 154 deletions.
4 changes: 0 additions & 4 deletions Documentation/RCU/trace.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,10 @@ o "dt" is the current value of the dyntick counter that is incremented
or one greater than the interrupt-nesting depth otherwise.
The number after the second "/" is the NMI nesting depth.

This field is displayed only for CONFIG_NO_HZ kernels.

o "df" is the number of times that some other CPU has forced a
quiescent state on behalf of this CPU due to this CPU being in
dynticks-idle state.

This field is displayed only for CONFIG_NO_HZ kernels.

o "of" is the number of times that some other CPU has forced a
quiescent state on behalf of this CPU due to this CPU being
offline. In a perfect world, this might never happen, but it
Expand Down
21 changes: 0 additions & 21 deletions include/linux/hardirq.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,20 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
extern void account_system_vtime(struct task_struct *tsk);
#endif

#if defined(CONFIG_NO_HZ)
#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
extern void rcu_enter_nohz(void);
extern void rcu_exit_nohz(void);

static inline void rcu_irq_enter(void)
{
rcu_exit_nohz();
}

static inline void rcu_irq_exit(void)
{
rcu_enter_nohz();
}

static inline void rcu_nmi_enter(void)
{
Expand All @@ -163,17 +150,9 @@ static inline void rcu_nmi_exit(void)
}

#else
extern void rcu_irq_enter(void);
extern void rcu_irq_exit(void);
extern void rcu_nmi_enter(void);
extern void rcu_nmi_exit(void);
#endif
#else
# define rcu_irq_enter() do { } while (0)
# define rcu_irq_exit() do { } while (0)
# define rcu_nmi_enter() do { } while (0)
# define rcu_nmi_exit() do { } while (0)
#endif /* #if defined(CONFIG_NO_HZ) */

/*
* It is safe to do non-atomic ops on ->hardirq_context,
Expand Down
21 changes: 4 additions & 17 deletions include/linux/rcupdate.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,23 +177,10 @@ extern void rcu_sched_qs(int cpu);
extern void rcu_bh_qs(int cpu);
extern void rcu_check_callbacks(int cpu, int user);
struct notifier_block;

#ifdef CONFIG_NO_HZ

extern void rcu_enter_nohz(void);
extern void rcu_exit_nohz(void);

#else /* #ifdef CONFIG_NO_HZ */

static inline void rcu_enter_nohz(void)
{
}

static inline void rcu_exit_nohz(void)
{
}

#endif /* #else #ifdef CONFIG_NO_HZ */
extern void rcu_idle_enter(void);
extern void rcu_idle_exit(void);
extern void rcu_irq_enter(void);
extern void rcu_irq_exit(void);

/*
* Infrastructure to implement the synchronize_() primitives in
Expand Down
11 changes: 9 additions & 2 deletions include/linux/tick.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,15 @@ extern ktime_t tick_nohz_get_sleep_length(void);
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
# else
static inline void tick_nohz_stop_sched_tick(int inidle) { }
static inline void tick_nohz_restart_sched_tick(void) { }
static inline void tick_nohz_stop_sched_tick(int inidle)
{
if (inidle)
rcu_idle_enter();
}
static inline void tick_nohz_restart_sched_tick(void)
{
rcu_idle_exit();
}
static inline ktime_t tick_nohz_get_sleep_length(void)
{
ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
Expand Down
10 changes: 6 additions & 4 deletions include/trace/events/rcu.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,19 +246,21 @@ TRACE_EVENT(rcu_fqs,
*/
TRACE_EVENT(rcu_dyntick,

TP_PROTO(char *polarity),
TP_PROTO(char *polarity, int nesting),

TP_ARGS(polarity),
TP_ARGS(polarity, nesting),

TP_STRUCT__entry(
__field(char *, polarity)
__field(int, nesting)
),

TP_fast_assign(
__entry->polarity = polarity;
__entry->nesting = nesting;
),

TP_printk("%s", __entry->polarity)
TP_printk("%s %d", __entry->polarity, __entry->nesting)
);

/*
Expand Down Expand Up @@ -443,7 +445,7 @@ TRACE_EVENT(rcu_batch_end,
#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0)
#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
#define trace_rcu_dyntick(polarity) do { } while (0)
#define trace_rcu_dyntick(polarity, nesting) do { } while (0)
#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0)
#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0)
#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0)
Expand Down
124 changes: 107 additions & 17 deletions kernel/rcutiny.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,31 +53,122 @@ static void __call_rcu(struct rcu_head *head,

#include "rcutiny_plugin.h"

#ifdef CONFIG_NO_HZ
static long long rcu_dynticks_nesting = LLONG_MAX / 2;

static long rcu_dynticks_nesting = 1;
/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
static void rcu_idle_enter_common(void)
{
if (rcu_dynticks_nesting) {
RCU_TRACE(trace_rcu_dyntick("--=", rcu_dynticks_nesting));
return;
}
RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting));
if (!idle_cpu(smp_processor_id())) {
WARN_ON_ONCE(1); /* must be idle task! */
RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task",
rcu_dynticks_nesting));
ftrace_dump(DUMP_ALL);
}
rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
}

/*
* Enter dynticks-idle mode, which is an extended quiescent state
* if we have fully entered that mode (i.e., if the new value of
* dynticks_nesting is zero).
* Enter idle, which is an extended quiescent state if we have fully
* entered that mode (i.e., if the new value of dynticks_nesting is zero).
*/
void rcu_enter_nohz(void)
void rcu_idle_enter(void)
{
if (--rcu_dynticks_nesting == 0)
rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
unsigned long flags;

local_irq_save(flags);
rcu_dynticks_nesting = 0;
rcu_idle_enter_common();
local_irq_restore(flags);
}

/*
* Exit dynticks-idle mode, so that we are no longer in an extended
* quiescent state.
* Exit an interrupt handler towards idle.
*/
void rcu_irq_exit(void)
{
unsigned long flags;

local_irq_save(flags);
rcu_dynticks_nesting--;
WARN_ON_ONCE(rcu_dynticks_nesting < 0);
rcu_idle_enter_common();
local_irq_restore(flags);
}

/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */
static void rcu_idle_exit_common(long long oldval)
{
if (oldval) {
RCU_TRACE(trace_rcu_dyntick("++=", rcu_dynticks_nesting));
return;
}
RCU_TRACE(trace_rcu_dyntick("End", oldval));
if (!idle_cpu(smp_processor_id())) {
WARN_ON_ONCE(1); /* must be idle task! */
RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task",
oldval));
ftrace_dump(DUMP_ALL);
}
}

/*
* Exit idle, so that we are no longer in an extended quiescent state.
*/
void rcu_exit_nohz(void)
void rcu_idle_exit(void)
{
unsigned long flags;
long long oldval;

local_irq_save(flags);
oldval = rcu_dynticks_nesting;
WARN_ON_ONCE(oldval != 0);
rcu_dynticks_nesting = LLONG_MAX / 2;
rcu_idle_exit_common(oldval);
local_irq_restore(flags);
}

/*
* Enter an interrupt handler, moving away from idle.
*/
void rcu_irq_enter(void)
{
unsigned long flags;
long long oldval;

local_irq_save(flags);
oldval = rcu_dynticks_nesting;
rcu_dynticks_nesting++;
WARN_ON_ONCE(rcu_dynticks_nesting == 0);
rcu_idle_exit_common(oldval);
local_irq_restore(flags);
}

#ifdef CONFIG_PROVE_RCU

/*
* Test whether RCU thinks that the current CPU is idle.
*/
int rcu_is_cpu_idle(void)
{
return !rcu_dynticks_nesting;
}

#endif /* #ifdef CONFIG_NO_HZ */
#endif /* #ifdef CONFIG_PROVE_RCU */

/*
* Test whether the current CPU was interrupted from idle. Nested
* interrupts don't count, we must be running at the first interrupt
* level.
*/
int rcu_is_cpu_rrupt_from_idle(void)
{
return rcu_dynticks_nesting <= 0;
}

/*
* Helper function for rcu_sched_qs() and rcu_bh_qs().
Expand Down Expand Up @@ -126,14 +217,13 @@ void rcu_bh_qs(int cpu)

/*
* Check to see if the scheduling-clock interrupt came from an extended
* quiescent state, and, if so, tell RCU about it.
* quiescent state, and, if so, tell RCU about it. This function must
* be called from hardirq context. It is normally called from the
* scheduling-clock interrupt.
*/
void rcu_check_callbacks(int cpu, int user)
{
if (user ||
(idle_cpu(cpu) &&
!in_softirq() &&
hardirq_count() <= (1 << HARDIRQ_SHIFT)))
if (user || rcu_is_cpu_rrupt_from_idle())
rcu_sched_qs(cpu);
else if (!in_softirq())
rcu_bh_qs(cpu);
Expand Down
Loading

0 comments on commit 9b2e4f1

Please sign in to comment.