Skip to content

Commit

Permalink
tracing: Add NMI tracing in hwlat detector
Browse files Browse the repository at this point in the history
As NMIs can also cause latency when interrupts are disabled, the hwlat
detectory has no way to know if the latency it detects is from an NMI or an
SMI or some other hardware glitch.

As ftrace_nmi_enter/exit() funtions are no longer used (except for sh, which
isn't supported anymore), I converted those to "arch_ftrace_nmi_enter/exit"
and use ftrace_nmi_enter/exit() to check if hwlat detector is tracing or
not, and if so, it calls into the hwlat utility.

Since the hwlat detector only has a single kthread that is spinning with
interrupts disabled, it marks what CPU it is on, and if the NMI callback
happens on that CPU, it records the time spent in that NMI. This is added to
the output that is generated by the hwlat detector as:

 #3     inner/outer(us):    9/9     ts:1470836488.206734548
 #4     inner/outer(us):    0/8     ts:1470836497.140808588
 #5     inner/outer(us):    0/6     ts:1470836499.140825168 nmi-total:5 nmi-count:1
 #6     inner/outer(us):    9/9     ts:1470836501.140841748

All time is still tracked in microseconds.

The NMI information is only shown when an NMI occurred during the sample.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
  • Loading branch information
Steven Rostedt (Red Hat) committed Sep 2, 2016
1 parent 0330f7a commit 7b2c862
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 9 deletions.
4 changes: 2 additions & 2 deletions arch/sh/kernel/ftrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ static void ftrace_mod_code(void)
clear_mod_flag();
}

void ftrace_nmi_enter(void)
void arch_ftrace_nmi_enter(void)
{
if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
smp_rmb();
Expand All @@ -150,7 +150,7 @@ void ftrace_nmi_enter(void)
smp_mb();
}

void ftrace_nmi_exit(void)
void arch_ftrace_nmi_exit(void)
{
/* Finish all executions before clearing nmi_running */
smp_mb();
Expand Down
31 changes: 27 additions & 4 deletions include/linux/ftrace_irq.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,34 @@


#ifdef CONFIG_FTRACE_NMI_ENTER
extern void ftrace_nmi_enter(void);
extern void ftrace_nmi_exit(void);
extern void arch_ftrace_nmi_enter(void);
extern void arch_ftrace_nmi_exit(void);
#else
static inline void ftrace_nmi_enter(void) { }
static inline void ftrace_nmi_exit(void) { }
static inline void arch_ftrace_nmi_enter(void) { }
static inline void arch_ftrace_nmi_exit(void) { }
#endif

#ifdef CONFIG_HWLAT_TRACER
extern bool trace_hwlat_callback_enabled;
extern void trace_hwlat_callback(bool enter);
#endif

static inline void ftrace_nmi_enter(void)
{
#ifdef CONFIG_HWLAT_TRACER
if (trace_hwlat_callback_enabled)
trace_hwlat_callback(true);
#endif
arch_ftrace_nmi_enter();
}

static inline void ftrace_nmi_exit(void)
{
arch_ftrace_nmi_exit();
#ifdef CONFIG_HWLAT_TRACER
if (trace_hwlat_callback_enabled)
trace_hwlat_callback(false);
#endif
}

#endif /* _LINUX_FTRACE_IRQ_H */
8 changes: 6 additions & 2 deletions kernel/trace/trace_entries.h
Original file line number Diff line number Diff line change
Expand Up @@ -330,18 +330,22 @@ FTRACE_ENTRY(hwlat, hwlat_entry,
F_STRUCT(
__field( u64, duration )
__field( u64, outer_duration )
__field( u64, nmi_total_ts )
__field_struct( struct timespec, timestamp )
__field_desc( long, timestamp, tv_sec )
__field_desc( long, timestamp, tv_nsec )
__field( unsigned int, nmi_count )
__field( unsigned int, seqnum )
),

F_printk("cnt:%u\tts:%010lu.%010lu\tinner:%llu\touter:%llu\n",
F_printk("cnt:%u\tts:%010lu.%010lu\tinner:%llu\touter:%llunmi-ts:%llu\tnmi-count:%u\n",
__entry->seqnum,
__entry->tv_sec,
__entry->tv_nsec,
__entry->duration,
__entry->outer_duration),
__entry->outer_duration,
__entry->nmi_total_ts,
__entry->nmi_count),

FILTER_OTHER
);
51 changes: 51 additions & 0 deletions kernel/trace/trace_hwlat.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ static struct dentry *hwlat_sample_window; /* sample window us */
/* Save the previous tracing_thresh value */
static unsigned long save_tracing_thresh;

/* NMI timestamp counters */
static u64 nmi_ts_start;
static u64 nmi_total_ts;
static int nmi_count;
static int nmi_cpu;

/* Tells NMIs to call back to the hwlat tracer to record timestamps */
bool trace_hwlat_callback_enabled;

/* If the user changed threshold, remember it */
static u64 last_tracing_thresh = DEFAULT_LAT_THRESHOLD * NSEC_PER_USEC;

Expand All @@ -72,7 +81,9 @@ struct hwlat_sample {
u64 seqnum; /* unique sequence */
u64 duration; /* delta */
u64 outer_duration; /* delta (outer loop) */
u64 nmi_total_ts; /* Total time spent in NMIs */
struct timespec timestamp; /* wall time */
int nmi_count; /* # NMIs during this sample */
};

/* keep the global state somewhere. */
Expand Down Expand Up @@ -112,6 +123,8 @@ static void trace_hwlat_sample(struct hwlat_sample *sample)
entry->duration = sample->duration;
entry->outer_duration = sample->outer_duration;
entry->timestamp = sample->timestamp;
entry->nmi_total_ts = sample->nmi_total_ts;
entry->nmi_count = sample->nmi_count;

if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
Expand All @@ -125,6 +138,26 @@ static void trace_hwlat_sample(struct hwlat_sample *sample)
#define init_time(a, b) (a = b)
#define time_u64(a) a

void trace_hwlat_callback(bool enter)
{
if (smp_processor_id() != nmi_cpu)
return;

/*
* Currently trace_clock_local() calls sched_clock() and the
* generic version is not NMI safe.
*/
if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
if (enter)
nmi_ts_start = time_get();
else
nmi_total_ts = time_get() - nmi_ts_start;
}

if (enter)
nmi_count++;
}

/**
* get_sample - sample the CPU TSC and look for likely hardware latencies
*
Expand All @@ -144,6 +177,14 @@ static int get_sample(void)

do_div(thresh, NSEC_PER_USEC); /* modifies interval value */

nmi_cpu = smp_processor_id();
nmi_total_ts = 0;
nmi_count = 0;
/* Make sure NMIs see this first */
barrier();

trace_hwlat_callback_enabled = true;

init_time(last_t2, 0);
start = time_get(); /* start timestamp */

Expand Down Expand Up @@ -188,6 +229,10 @@ static int get_sample(void)

} while (total <= hwlat_data.sample_width);

barrier(); /* finish the above in the view for NMIs */
trace_hwlat_callback_enabled = false;
barrier(); /* Make sure nmi_total_ts is no longer updated */

ret = 0;

/* If we exceed the threshold value, we have found a hardware latency */
Expand All @@ -196,11 +241,17 @@ static int get_sample(void)

ret = 1;

/* We read in microseconds */
if (nmi_total_ts)
do_div(nmi_total_ts, NSEC_PER_USEC);

hwlat_data.count++;
s.seqnum = hwlat_data.count;
s.duration = sample;
s.outer_duration = outer_sample;
s.timestamp = CURRENT_TIME;
s.nmi_total_ts = nmi_total_ts;
s.nmi_count = nmi_count;
trace_hwlat_sample(&s);

/* Keep a running maximum ever recorded hardware latency */
Expand Down
16 changes: 15 additions & 1 deletion kernel/trace/trace_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -1109,13 +1109,27 @@ trace_hwlat_print(struct trace_iterator *iter, int flags,

trace_assign_type(field, entry);

trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ld.%09ld\n",
trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ld.%09ld",
field->seqnum,
field->duration,
field->outer_duration,
field->timestamp.tv_sec,
field->timestamp.tv_nsec);

if (field->nmi_count) {
/*
* The generic sched_clock() is not NMI safe, thus
* we only record the count and not the time.
*/
if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK))
trace_seq_printf(s, " nmi-total:%llu",
field->nmi_total_ts);
trace_seq_printf(s, " nmi-count:%u",
field->nmi_count);
}

trace_seq_putc(s, '\n');

return trace_handle_return(s);
}

Expand Down

0 comments on commit 7b2c862

Please sign in to comment.