Skip to content

Commit

Permalink
Preempt-RCU: implementation
Browse files Browse the repository at this point in the history
This patch implements a new version of RCU which allows its read-side
critical sections to be preempted. It uses a set of counter pairs
to keep track of the read-side critical sections and flips them
when all tasks exit read-side critical section. The details
of this implementation can be found in this paper -

	http://www.rdrop.com/users/paulmck/RCU/OLSrtRCU.2006.08.11a.pdf

and the article-

	http://lwn.net/Articles/253651/

This patch was developed as a part of the -rt kernel development and
meant to provide better latencies when read-side critical sections of
RCU don't disable preemption.  As a consequence of keeping track of RCU
readers, the readers have a slight overhead (optimizations in the paper).
This implementation co-exists with the "classic" RCU implementations
and can be switched to at compiler.

Also includes RCU tracing summarized in debugfs.

[ akpm@linux-foundation.org: build fixes on non-preempt architectures ]

Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com>
Reviewed-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Paul E. McKenney authored and Ingo Molnar committed Jan 25, 2008
1 parent e0ecfa7 commit e260be6
Show file tree
Hide file tree
Showing 13 changed files with 1,394 additions and 7 deletions.
1 change: 0 additions & 1 deletion fs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -2130,4 +2130,3 @@ source "fs/nls/Kconfig"
source "fs/dlm/Kconfig"

endmenu

3 changes: 3 additions & 0 deletions include/linux/rcuclassic.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,5 +157,8 @@ extern void __rcu_init(void);
extern void rcu_check_callbacks(int cpu, int user);
extern void rcu_restart_cpu(int cpu);

extern long rcu_batches_completed(void);
extern long rcu_batches_completed_bh(void);

#endif /* __KERNEL__ */
#endif /* __LINUX_RCUCLASSIC_H */
11 changes: 7 additions & 4 deletions include/linux/rcupdate.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ struct rcu_head {
void (*func)(struct rcu_head *head);
};

#ifdef CONFIG_CLASSIC_RCU
#include <linux/rcuclassic.h>
#else /* #ifdef CONFIG_CLASSIC_RCU */
#include <linux/rcupreempt.h>
#endif /* #else #ifdef CONFIG_CLASSIC_RCU */

#define RCU_HEAD_INIT { .next = NULL, .func = NULL }
#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
Expand Down Expand Up @@ -231,13 +235,12 @@ extern void call_rcu_bh(struct rcu_head *head,
/* Exported common interfaces */
extern void synchronize_rcu(void);
extern void rcu_barrier(void);
extern long rcu_batches_completed(void);
extern long rcu_batches_completed_bh(void);

/* Internal to kernel */
extern void rcu_init(void);
extern void rcu_check_callbacks(int cpu, int user);

extern long rcu_batches_completed(void);
extern long rcu_batches_completed_bh(void);
extern int rcu_needs_cpu(int cpu);

#endif /* __KERNEL__ */
#endif /* __LINUX_RCUPDATE_H */
86 changes: 86 additions & 0 deletions include/linux/rcupreempt.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Read-Copy Update mechanism for mutual exclusion (RT implementation)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2006
*
* Author: Paul McKenney <paulmck@us.ibm.com>
*
* Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
* Papers:
* http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
* http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
*
* For detailed explanation of Read-Copy Update mechanism see -
* Documentation/RCU
*
*/

#ifndef __LINUX_RCUPREEMPT_H
#define __LINUX_RCUPREEMPT_H

#ifdef __KERNEL__

#include <linux/cache.h>
#include <linux/spinlock.h>
#include <linux/threads.h>
#include <linux/percpu.h>
#include <linux/cpumask.h>
#include <linux/seqlock.h>

#define rcu_qsctr_inc(cpu)
#define rcu_bh_qsctr_inc(cpu)
#define call_rcu_bh(head, rcu) call_rcu(head, rcu)

extern void __rcu_read_lock(void);
extern void __rcu_read_unlock(void);
extern int rcu_pending(int cpu);
extern int rcu_needs_cpu(int cpu);

#define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); }
#define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); }

extern void __synchronize_sched(void);

extern void __rcu_init(void);
extern void rcu_check_callbacks(int cpu, int user);
extern void rcu_restart_cpu(int cpu);
extern long rcu_batches_completed(void);

/*
* Return the number of RCU batches processed thus far. Useful for debug
* and statistic. The _bh variant is identifcal to straight RCU
*/
static inline long rcu_batches_completed_bh(void)
{
return rcu_batches_completed();
}

#ifdef CONFIG_RCU_TRACE
struct rcupreempt_trace;
extern long *rcupreempt_flipctr(int cpu);
extern long rcupreempt_data_completed(void);
extern int rcupreempt_flip_flag(int cpu);
extern int rcupreempt_mb_flag(int cpu);
extern char *rcupreempt_try_flip_state_name(void);
extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
#endif

struct softirq_action;

#endif /* __KERNEL__ */
#endif /* __LINUX_RCUPREEMPT_H */
99 changes: 99 additions & 0 deletions include/linux/rcupreempt_trace.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Read-Copy Update mechanism for mutual exclusion (RT implementation)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2006
*
* Author: Paul McKenney <paulmck@us.ibm.com>
*
* Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
* Papers:
* http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
* http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
*
* For detailed explanation of the Preemptible Read-Copy Update mechanism see -
* http://lwn.net/Articles/253651/
*/

#ifndef __LINUX_RCUPREEMPT_TRACE_H
#define __LINUX_RCUPREEMPT_TRACE_H

#ifdef __KERNEL__
#include <linux/types.h>
#include <linux/kernel.h>

#include <asm/atomic.h>

/*
* PREEMPT_RCU data structures.
*/

struct rcupreempt_trace {
long next_length;
long next_add;
long wait_length;
long wait_add;
long done_length;
long done_add;
long done_remove;
atomic_t done_invoked;
long rcu_check_callbacks;
atomic_t rcu_try_flip_1;
atomic_t rcu_try_flip_e1;
long rcu_try_flip_i1;
long rcu_try_flip_ie1;
long rcu_try_flip_g1;
long rcu_try_flip_a1;
long rcu_try_flip_ae1;
long rcu_try_flip_a2;
long rcu_try_flip_z1;
long rcu_try_flip_ze1;
long rcu_try_flip_z2;
long rcu_try_flip_m1;
long rcu_try_flip_me1;
long rcu_try_flip_m2;
};

#ifdef CONFIG_RCU_TRACE
#define RCU_TRACE(fn, arg) fn(arg);
#else
#define RCU_TRACE(fn, arg)
#endif

extern void rcupreempt_trace_move2done(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_invoke(struct rcupreempt_trace *trace);
extern void rcupreempt_trace_next_add(struct rcupreempt_trace *trace);

#endif /* __KERNEL__ */
#endif /* __LINUX_RCUPREEMPT_TRACE_H */
5 changes: 5 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,11 @@ struct task_struct {
int nr_cpus_allowed;
unsigned int time_slice;

#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
int rcu_flipctr_idx;
#endif /* #ifdef CONFIG_PREEMPT_RCU */

#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
#endif
Expand Down
28 changes: 28 additions & 0 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -763,3 +763,31 @@ source "block/Kconfig"

config PREEMPT_NOTIFIERS
bool

choice
prompt "RCU implementation type:"
default CLASSIC_RCU

config CLASSIC_RCU
bool "Classic RCU"
help
This option selects the classic RCU implementation that is
designed for best read-side performance on non-realtime
systems.

Say Y if you are unsure.

config PREEMPT_RCU
bool "Preemptible RCU"
depends on PREEMPT
help
This option reduces the latency of the kernel by making certain
RCU sections preemptible. Normally RCU code is non-preemptible, if
this option is selected then read-only RCU sections become
preemptible. This helps latency, but may expose bugs due to
now-naive assumptions about each RCU read-side critical section
remaining on a given CPU through its execution.

Say N if you are unsure.

endchoice
10 changes: 10 additions & 0 deletions kernel/Kconfig.preempt
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,13 @@ config PREEMPT_BKL
Say Y here if you are building a kernel for a desktop system.
Say N if you are unsure.

config RCU_TRACE
bool "Enable tracing for RCU - currently stats in debugfs"
select DEBUG_FS
default y
help
This option provides tracing in RCU which presents stats
in debugfs for debugging RCU implementation.

Say Y here if you want to enable RCU tracing
Say N if you are unsure.
7 changes: 6 additions & 1 deletion kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
exit.o itimer.o time.o softirq.o resource.o \
sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o rcuclassic.o extable.o params.o posix-timers.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \
utsname.o notifier.o
Expand Down Expand Up @@ -52,6 +52,11 @@ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
ifeq ($(CONFIG_PREEMPT_RCU),y)
obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
endif
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
Expand Down
4 changes: 4 additions & 0 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -1045,6 +1045,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
copy_flags(clone_flags, p);
INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling);
#ifdef CONFIG_PREEMPT_RCU
p->rcu_read_lock_nesting = 0;
p->rcu_flipctr_idx = 0;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
p->vfork_done = NULL;
spin_lock_init(&p->alloc_lock);

Expand Down
1 change: 0 additions & 1 deletion kernel/rcuclassic.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
/* #include <linux/rcupdate.h> @@@ */
#include <linux/cpu.h>
#include <linux/mutex.h>

Expand Down
Loading

0 comments on commit e260be6

Please sign in to comment.