Skip to content

Commit

Permalink
perf/x86/intel: Account interrupts for PEBS errors
Browse files Browse the repository at this point in the history
It's possible to set up PEBS events to get only errors and not
any data, like on SNB-X (model 45) and IVB-EP (model 62)
via 2 perf commands running simultaneously:

    taskset -c 1 ./perf record -c 4 -e branches:pp -j any -C 10

This leads to a soft lock up, because the error path of the
intel_pmu_drain_pebs_nhm() does not account event->hw.interrupt
for error PEBS interrupts, so in case you're getting ONLY
errors you don't have a way to stop the event when it's over
the max_samples_per_tick limit:

  NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [perf_fuzzer:5816]
  ...
  RIP: 0010:[<ffffffff81159232>]  [<ffffffff81159232>] smp_call_function_single+0xe2/0x140
  ...
  Call Trace:
   ? trace_hardirqs_on_caller+0xf5/0x1b0
   ? perf_cgroup_attach+0x70/0x70
   perf_install_in_context+0x199/0x1b0
   ? ctx_resched+0x90/0x90
   SYSC_perf_event_open+0x641/0xf90
   SyS_perf_event_open+0x9/0x10
   do_syscall_64+0x6c/0x1f0
   entry_SYSCALL64_slow_path+0x25/0x25

Add perf_event_account_interrupt() which does the interrupt
and frequency checks and call it from intel_pmu_drain_pebs_nhm()'s
error path.

We keep the pending_kill and pending_wakeup logic only in the
__perf_event_overflow() path, because they make sense only if
there's any data to deliver.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vince@deater.net>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1482931866-6018-2-git-send-email-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Jiri Olsa authored and Ingo Molnar committed Jan 14, 2017
1 parent 321027c commit 475113d
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 17 deletions.
6 changes: 5 additions & 1 deletion arch/x86/events/intel/ds.c
Original file line number Diff line number Diff line change
Expand Up @@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
continue;

/* log dropped samples number */
if (error[bit])
if (error[bit]) {
perf_log_lost_samples(event, error[bit]);

if (perf_event_account_interrupt(event))
x86_pmu_stop(event, 0);
}

if (counts[bit]) {
__intel_pmu_pebs_event(event, iregs, base,
top, bit, counts[bit]);
Expand Down
1 change: 1 addition & 0 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event);
extern void perf_event_disable_local(struct perf_event *event);
extern void perf_event_disable_inatomic(struct perf_event *event);
extern void perf_event_task_tick(void);
extern int perf_event_account_interrupt(struct perf_event *event);
#else /* !CONFIG_PERF_EVENTS: */
static inline void *
perf_aux_output_begin(struct perf_output_handle *handle,
Expand Down
47 changes: 31 additions & 16 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -7060,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event)
perf_output_end(&handle);
}

/*
* Generic event overflow handling, sampling.
*/

static int __perf_event_overflow(struct perf_event *event,
int throttle, struct perf_sample_data *data,
struct pt_regs *regs)
static int
__perf_event_account_interrupt(struct perf_event *event, int throttle)
{
int events = atomic_read(&event->event_limit);
struct hw_perf_event *hwc = &event->hw;
u64 seq;
int ret = 0;

/*
* Non-sampling counters might still use the PMI to fold short
* hardware counters, ignore those.
*/
if (unlikely(!is_sampling_event(event)))
return 0;
u64 seq;

seq = __this_cpu_read(perf_throttled_seq);
if (seq != hwc->interrupts_seq) {
Expand Down Expand Up @@ -7106,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event,
perf_adjust_period(event, delta, hwc->last_period, true);
}

return ret;
}

int perf_event_account_interrupt(struct perf_event *event)
{
return __perf_event_account_interrupt(event, 1);
}

/*
* Generic event overflow handling, sampling.
*/

static int __perf_event_overflow(struct perf_event *event,
int throttle, struct perf_sample_data *data,
struct pt_regs *regs)
{
int events = atomic_read(&event->event_limit);
int ret = 0;

/*
* Non-sampling counters might still use the PMI to fold short
* hardware counters, ignore those.
*/
if (unlikely(!is_sampling_event(event)))
return 0;

ret = __perf_event_account_interrupt(event, throttle);

/*
* XXX event_limit might not quite work as expected on inherited
* events
Expand Down

0 comments on commit 475113d

Please sign in to comment.