Skip to content

Commit

Permalink
rcu: Have the RCU tracepoints use the tracepoint_string infrastructure
Browse files Browse the repository at this point in the history
Currently, RCU tracepoints save only a pointer to strings in the
ring buffer. When displayed via the /sys/kernel/debug/tracing/trace file
they are referenced like the printf "%s" that looks at the address
in the ring buffer and prints out the string it points too. This requires
that the strings are constant and persistent in the kernel.

The problem with this is for tools like trace-cmd and perf that read the
binary data from the buffers but have no access to the kernel memory to
find out what string is represented by the address in the buffer.

By using the tracepoint_string infrastructure, the RCU tracepoint strings
can be exported such that userspace tools can map the addresses to
the strings.

 # cat /sys/kernel/debug/tracing/printk_formats
0xffffffff81a4a0e8 : "rcu_preempt"
0xffffffff81a4a0f4 : "rcu_bh"
0xffffffff81a4a100 : "rcu_sched"
0xffffffff818437a0 : "cpuqs"
0xffffffff818437a6 : "rcu_sched"
0xffffffff818437a0 : "cpuqs"
0xffffffff818437b0 : "rcu_bh"
0xffffffff818437b7 : "Start context switch"
0xffffffff818437cc : "End context switch"
0xffffffff818437a0 : "cpuqs"
[...]

Now userspaces tools can display:

 rcu_utilization:      Start context switch
 rcu_dyntick:          Start 1 0
 rcu_utilization:      End context switch
 rcu_batch_start:      rcu_preempt CBs=0/5 bl=10
 rcu_dyntick:          End 0 140000000000000
 rcu_invoke_callback:  rcu_preempt rhp=0xffff880071c0d600 func=proc_i_callback
 rcu_invoke_callback:  rcu_preempt rhp=0xffff880077b5b230 func=__d_free
 rcu_dyntick:          Start 140000000000000 0
 rcu_invoke_callback:  rcu_preempt rhp=0xffff880077563980 func=file_free_rcu
 rcu_batch_end:        rcu_preempt CBs-invoked=3 idle=>c<>c<>c<>c<
 rcu_utilization:      End RCU core
 rcu_grace_period:     rcu_preempt 9741 start
 rcu_dyntick:          Start 1 0
 rcu_dyntick:          End 0 140000000000000
 rcu_dyntick:          Start 140000000000000 0

Instead of:

 rcu_utilization:      ffffffff81843110
 rcu_future_grace_period: ffffffff81842f1d 9939 9939 9940 0 0 3 ffffffff81842f32
 rcu_batch_start:      ffffffff81842f1d CBs=0/4 bl=10
 rcu_future_grace_period: ffffffff81842f1d 9939 9939 9940 0 0 3 ffffffff81842f3c
 rcu_grace_period:     ffffffff81842f1d 9939 ffffffff81842f80
 rcu_invoke_callback:  ffffffff81842f1d rhp=0xffff88007888aac0 func=file_free_rcu
 rcu_grace_period:     ffffffff81842f1d 9939 ffffffff81842f95
 rcu_invoke_callback:  ffffffff81842f1d rhp=0xffff88006aeb4600 func=proc_i_callback
 rcu_future_grace_period: ffffffff81842f1d 9939 9939 9940 0 0 3 ffffffff81842f32
 rcu_future_grace_period: ffffffff81842f1d 9939 9939 9940 0 0 3 ffffffff81842f3c
 rcu_invoke_callback:  ffffffff81842f1d rhp=0xffff880071cb9fc0 func=__d_free
 rcu_grace_period:     ffffffff81842f1d 9939 ffffffff81842f80
 rcu_invoke_callback:  ffffffff81842f1d rhp=0xffff88007888ae80 func=file_free_rcu
 rcu_batch_end:        ffffffff81842f1d CBs-invoked=4 idle=>c<>c<>c<>c<
 rcu_utilization:      ffffffff8184311f

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
  • Loading branch information
Steven Rostedt (Red Hat) committed Jul 29, 2013
1 parent a41bfeb commit f7f7bac
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 50 deletions.
87 changes: 53 additions & 34 deletions kernel/rcutree.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,36 @@
#include <linux/delay.h>
#include <linux/stop_machine.h>
#include <linux/random.h>
#include <linux/ftrace_event.h>

#include "rcutree.h"
#include <trace/events/rcu.h>

#include "rcu.h"

/*
* Strings used in tracepoints need to be exported via the
* tracing system such that tools like perf and trace-cmd can
* translate the string address pointers to actual text.
*/
#define TPS(x) tracepoint_string(x)

/* Data structures. */

static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];

/*
* In order to export the rcu_state name to the tracing tools, it
* needs to be added in the __tracepoint_string section.
* This requires defining a separate variable tp_<sname>_varname
* that points to the string being used, and this will allow
* the tracing userspace tools to be able to decipher the string
* address to the matching string.
*/
#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
static char sname##_varname[] = #sname; \
static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \
struct rcu_state sname##_state = { \
.level = { &sname##_state.node[0] }, \
.call = cr, \
Expand All @@ -76,7 +94,7 @@ struct rcu_state sname##_state = { \
.orphan_donetail = &sname##_state.orphan_donelist, \
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
.onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
.name = #sname, \
.name = sname##_varname, \
.abbr = sabbr, \
}; \
DEFINE_PER_CPU(struct rcu_data, sname##_data)
Expand Down Expand Up @@ -176,7 +194,7 @@ void rcu_sched_qs(int cpu)
struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);

if (rdp->passed_quiesce == 0)
trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs");
trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
rdp->passed_quiesce = 1;
}

Expand All @@ -185,7 +203,7 @@ void rcu_bh_qs(int cpu)
struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);

if (rdp->passed_quiesce == 0)
trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs");
trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
rdp->passed_quiesce = 1;
}

Expand All @@ -196,10 +214,10 @@ void rcu_bh_qs(int cpu)
*/
void rcu_note_context_switch(int cpu)
{
trace_rcu_utilization("Start context switch");
trace_rcu_utilization(TPS("Start context switch"));
rcu_sched_qs(cpu);
rcu_preempt_note_context_switch(cpu);
trace_rcu_utilization("End context switch");
trace_rcu_utilization(TPS("End context switch"));
}
EXPORT_SYMBOL_GPL(rcu_note_context_switch);

Expand Down Expand Up @@ -343,11 +361,11 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
bool user)
{
trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting);
trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
if (!user && !is_idle_task(current)) {
struct task_struct *idle = idle_task(smp_processor_id());

trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
ftrace_dump(DUMP_ORIG);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
Expand Down Expand Up @@ -477,7 +495,7 @@ void rcu_irq_exit(void)
rdtp->dynticks_nesting--;
WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
if (rdtp->dynticks_nesting)
trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
else
rcu_eqs_enter_common(rdtp, oldval, true);
local_irq_restore(flags);
Expand All @@ -499,11 +517,11 @@ static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
smp_mb__after_atomic_inc(); /* See above. */
WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
rcu_cleanup_after_idle(smp_processor_id());
trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
if (!user && !is_idle_task(current)) {
struct task_struct *idle = idle_task(smp_processor_id());

trace_rcu_dyntick("Error on exit: not idle task",
trace_rcu_dyntick(TPS("Error on exit: not idle task"),
oldval, rdtp->dynticks_nesting);
ftrace_dump(DUMP_ORIG);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
Expand Down Expand Up @@ -618,7 +636,7 @@ void rcu_irq_enter(void)
rdtp->dynticks_nesting++;
WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
if (oldval)
trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
else
rcu_eqs_exit_common(rdtp, oldval, true);
local_irq_restore(flags);
Expand Down Expand Up @@ -773,7 +791,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
* of the current RCU grace period.
*/
if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti");
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
rdp->dynticks_fqs++;
return 1;
}
Expand All @@ -793,7 +811,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
return 0; /* Grace period is not old enough. */
barrier();
if (cpu_is_offline(rdp->cpu)) {
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
rdp->offline_fqs++;
return 1;
}
Expand Down Expand Up @@ -1056,9 +1074,9 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
* grace period is already marked as needed, return to the caller.
*/
c = rcu_cbs_completed(rdp->rsp, rnp);
trace_rcu_future_gp(rnp, rdp, c, "Startleaf");
trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
if (rnp->need_future_gp[c & 0x1]) {
trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf");
trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
return c;
}

Expand All @@ -1072,7 +1090,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
if (rnp->gpnum != rnp->completed ||
ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
rnp->need_future_gp[c & 0x1]++;
trace_rcu_future_gp(rnp, rdp, c, "Startedleaf");
trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
return c;
}

Expand Down Expand Up @@ -1100,7 +1118,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
* recorded, trace and leave.
*/
if (rnp_root->need_future_gp[c & 0x1]) {
trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot");
trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
goto unlock_out;
}

Expand All @@ -1109,9 +1127,9 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)

/* If a grace period is not already in progress, start one. */
if (rnp_root->gpnum != rnp_root->completed) {
trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot");
trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
} else {
trace_rcu_future_gp(rnp, rdp, c, "Startedroot");
trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
}
unlock_out:
Expand All @@ -1135,7 +1153,8 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
rcu_nocb_gp_cleanup(rsp, rnp);
rnp->need_future_gp[c & 0x1] = 0;
needmore = rnp->need_future_gp[(c + 1) & 0x1];
trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup");
trace_rcu_future_gp(rnp, rdp, c,
needmore ? TPS("CleanupMore") : TPS("Cleanup"));
return needmore;
}

Expand Down Expand Up @@ -1203,9 +1222,9 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,

/* Trace depending on how much we were able to accelerate. */
if (!*rdp->nxttail[RCU_WAIT_TAIL])
trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB");
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
else
trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB");
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
}

/*
Expand Down Expand Up @@ -1271,7 +1290,7 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc

/* Remember that we saw this grace-period completion. */
rdp->completed = rnp->completed;
trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend");
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
}

if (rdp->gpnum != rnp->gpnum) {
Expand All @@ -1281,7 +1300,7 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc
* go looking for one.
*/
rdp->gpnum = rnp->gpnum;
trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
rdp->passed_quiesce = 0;
rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
zero_cpu_stall_ticks(rdp);
Expand Down Expand Up @@ -1324,7 +1343,7 @@ static int rcu_gp_init(struct rcu_state *rsp)

/* Advance to a new grace period and initialize state. */
rsp->gpnum++;
trace_rcu_grace_period(rsp->name, rsp->gpnum, "start");
trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
record_gp_stall_check_time(rsp);
raw_spin_unlock_irq(&rnp->lock);

Expand Down Expand Up @@ -1446,7 +1465,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
rcu_nocb_gp_set(rnp, nocb);

rsp->completed = rsp->gpnum; /* Declare grace period done. */
trace_rcu_grace_period(rsp->name, rsp->completed, "end");
trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
rsp->fqs_state = RCU_GP_IDLE;
rdp = this_cpu_ptr(rsp->rda);
rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */
Expand Down Expand Up @@ -1855,7 +1874,7 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
RCU_TRACE(mask = rdp->grpmask);
trace_rcu_grace_period(rsp->name,
rnp->gpnum + 1 - !!(rnp->qsmask & mask),
"cpuofl");
TPS("cpuofl"));
}

/*
Expand Down Expand Up @@ -2042,7 +2061,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
*/
void rcu_check_callbacks(int cpu, int user)
{
trace_rcu_utilization("Start scheduler-tick");
trace_rcu_utilization(TPS("Start scheduler-tick"));
increment_cpu_stall_ticks();
if (user || rcu_is_cpu_rrupt_from_idle()) {

Expand Down Expand Up @@ -2075,7 +2094,7 @@ void rcu_check_callbacks(int cpu, int user)
rcu_preempt_check_callbacks(cpu);
if (rcu_pending(cpu))
invoke_rcu_core();
trace_rcu_utilization("End scheduler-tick");
trace_rcu_utilization(TPS("End scheduler-tick"));
}

/*
Expand Down Expand Up @@ -2206,10 +2225,10 @@ static void rcu_process_callbacks(struct softirq_action *unused)

if (cpu_is_offline(smp_processor_id()))
return;
trace_rcu_utilization("Start RCU core");
trace_rcu_utilization(TPS("Start RCU core"));
for_each_rcu_flavor(rsp)
__rcu_process_callbacks(rsp);
trace_rcu_utilization("End RCU core");
trace_rcu_utilization(TPS("End RCU core"));
}

/*
Expand Down Expand Up @@ -2950,7 +2969,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
rdp->completed = rnp->completed;
rdp->passed_quiesce = 0;
rdp->qs_pending = 0;
trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl");
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
}
raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
rnp = rnp->parent;
Expand Down Expand Up @@ -2980,7 +2999,7 @@ static int rcu_cpu_notify(struct notifier_block *self,
struct rcu_node *rnp = rdp->mynode;
struct rcu_state *rsp;

trace_rcu_utilization("Start CPU hotplug");
trace_rcu_utilization(TPS("Start CPU hotplug"));
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
Expand Down Expand Up @@ -3009,7 +3028,7 @@ static int rcu_cpu_notify(struct notifier_block *self,
default:
break;
}
trace_rcu_utilization("End CPU hotplug");
trace_rcu_utilization(TPS("End CPU hotplug"));
return NOTIFY_OK;
}

Expand Down
Loading

0 comments on commit f7f7bac

Please sign in to comment.