Skip to content

Commit

Permalink
powerpc/perf_event: Fix oops due to perf_event_do_pending call
Browse files Browse the repository at this point in the history
Anton Blanchard found that large POWER systems would occasionally
crash in the exception exit path when profiling with perf_events.
The symptom was that an interrupt would occur late in the exit path
when the MSR[RI] (recoverable interrupt) bit was clear.  Interrupts
should be hard-disabled at this point but they were enabled.  Because
the interrupt was not recoverable the system panicked.

The reason is that the exception exit path was calling
perf_event_do_pending after hard-disabling interrupts, and
perf_event_do_pending will re-enable interrupts.

The simplest and cleanest fix for this is to use the same mechanism
that 32-bit powerpc does, namely to cause a self-IPI by setting the
decrementer to 1.  This means we can remove the tests in the exception
exit path and raw_local_irq_restore.

This also makes sure that the call to perf_event_do_pending from
timer_interrupt() happens within irq_enter/irq_exit.  (Note that
calling perf_event_do_pending from timer_interrupt does not mean that
there is a possible 1/HZ latency; setting the decrementer to 1 ensures
that the timer interrupt will happen immediately, i.e. within one
timebase tick, which is a few nanoseconds or 10s of nanoseconds.)

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: stable@kernel.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
  • Loading branch information
Paul Mackerras authored and Benjamin Herrenschmidt committed May 12, 2010
1 parent cea0d76 commit 0fe1ac4
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 66 deletions.
38 changes: 0 additions & 38 deletions arch/powerpc/include/asm/hw_irq.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,43 +130,5 @@ static inline int irqs_disabled_flags(unsigned long flags)
*/
struct irq_chip;

#ifdef CONFIG_PERF_EVENTS

#ifdef CONFIG_PPC64
static inline unsigned long test_perf_event_pending(void)
{
unsigned long x;

asm volatile("lbz %0,%1(13)"
: "=r" (x)
: "i" (offsetof(struct paca_struct, perf_event_pending)));
return x;
}

static inline void set_perf_event_pending(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (1),
"i" (offsetof(struct paca_struct, perf_event_pending)));
}

static inline void clear_perf_event_pending(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (0),
"i" (offsetof(struct paca_struct, perf_event_pending)));
}
#endif /* CONFIG_PPC64 */

#else /* CONFIG_PERF_EVENTS */

static inline unsigned long test_perf_event_pending(void)
{
return 0;
}

static inline void clear_perf_event_pending(void) {}
#endif /* CONFIG_PERF_EVENTS */

#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HW_IRQ_H */
1 change: 0 additions & 1 deletion arch/powerpc/kernel/asm-offsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ int main(void)
DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
#ifdef CONFIG_PPC_MM_SLICES
DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
Expand Down
9 changes: 0 additions & 9 deletions arch/powerpc/kernel/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
2:
TRACE_AND_RESTORE_IRQ(r5);

#ifdef CONFIG_PERF_EVENTS
/* check paca->perf_event_pending if we're enabling ints */
lbz r3,PACAPERFPEND(r13)
and. r3,r3,r5
beq 27f
bl .perf_event_do_pending
27:
#endif /* CONFIG_PERF_EVENTS */

/* extract EE bit and use it to restore paca->hard_enabled */
ld r3,_MSR(r1)
rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
Expand Down
6 changes: 0 additions & 6 deletions arch/powerpc/kernel/irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
#include <linux/bootmem.h>
#include <linux/pci.h>
#include <linux/debugfs.h>
#include <linux/perf_event.h>

#include <asm/uaccess.h>
#include <asm/system.h>
Expand Down Expand Up @@ -145,11 +144,6 @@ notrace void raw_local_irq_restore(unsigned long en)
}
#endif /* CONFIG_PPC_STD_MMU_64 */

if (test_perf_event_pending()) {
clear_perf_event_pending();
perf_event_do_pending();
}

/*
* if (get_paca()->hard_enabled) return;
* But again we need to take care that gcc gets hard_enabled directly
Expand Down
60 changes: 48 additions & 12 deletions arch/powerpc/kernel/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void)
}
#endif /* CONFIG_PPC_ISERIES */

#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
DEFINE_PER_CPU(u8, perf_event_pending);
#ifdef CONFIG_PERF_EVENTS

void set_perf_event_pending(void)
/*
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/
#ifdef CONFIG_PPC64
static inline unsigned long test_perf_event_pending(void)
{
get_cpu_var(perf_event_pending) = 1;
set_dec(1);
put_cpu_var(perf_event_pending);
unsigned long x;

asm volatile("lbz %0,%1(13)"
: "=r" (x)
: "i" (offsetof(struct paca_struct, perf_event_pending)));
return x;
}

static inline void set_perf_event_pending_flag(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (1),
"i" (offsetof(struct paca_struct, perf_event_pending)));
}

static inline void clear_perf_event_pending(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (0),
"i" (offsetof(struct paca_struct, perf_event_pending)));
}

#else /* 32-bit */

DEFINE_PER_CPU(u8, perf_event_pending);

#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1
#define test_perf_event_pending() __get_cpu_var(perf_event_pending)
#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0

#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
#endif /* 32 vs 64 bit */

void set_perf_event_pending(void)
{
preempt_disable();
set_perf_event_pending_flag();
set_dec(1);
preempt_enable();
}

#else /* CONFIG_PERF_EVENTS */

#define test_perf_event_pending() 0
#define clear_perf_event_pending()

#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
#endif /* CONFIG_PERF_EVENTS */

/*
* For iSeries shared processors, we have to let the hypervisor
Expand Down Expand Up @@ -582,10 +617,6 @@ void timer_interrupt(struct pt_regs * regs)
set_dec(DECREMENTER_MAX);

#ifdef CONFIG_PPC32
if (test_perf_event_pending()) {
clear_perf_event_pending();
perf_event_do_pending();
}
if (atomic_read(&ppc_n_lost_interrupts) != 0)
do_IRQ(regs);
#endif
Expand All @@ -604,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs)

calculate_steal_time();

if (test_perf_event_pending()) {
clear_perf_event_pending();
perf_event_do_pending();
}

#ifdef CONFIG_PPC_ISERIES
if (firmware_has_feature(FW_FEATURE_ISERIES))
get_lppaca()->int_dword.fields.decr_int = 0;
Expand Down

0 comments on commit 0fe1ac4

Please sign in to comment.