diff --git a/[refs] b/[refs]
index fd1019635c5b..cf9a4197b4cf 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: ebf31f502492527e2b6b5e5cf85a4ebc7fc8a52e
+refs/heads/master: d7b4d6de57d414a6384376880f2caf7125a45494
diff --git a/trunk/arch/alpha/Kconfig b/trunk/arch/alpha/Kconfig
index d04ccd73af45..b9647bb66d13 100644
--- a/trunk/arch/alpha/Kconfig
+++ b/trunk/arch/alpha/Kconfig
@@ -9,7 +9,6 @@ config ALPHA
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_SYSCALL_WRAPPERS
-	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_DMA_ATTRS
 	help
diff --git a/trunk/arch/alpha/include/asm/perf_event.h b/trunk/arch/alpha/include/asm/perf_event.h
index fe792ca818f6..4157cd3c44a9 100644
--- a/trunk/arch/alpha/include/asm/perf_event.h
+++ b/trunk/arch/alpha/include/asm/perf_event.h
@@ -1,6 +1,11 @@
 #ifndef __ASM_ALPHA_PERF_EVENT_H
 #define __ASM_ALPHA_PERF_EVENT_H
 
+/* Alpha only supports software events through this interface. */
+extern void set_perf_event_pending(void);
+
+#define PERF_EVENT_INDEX_OFFSET 0
+
 #ifdef CONFIG_PERF_EVENTS
 extern void init_hw_perf_events(void);
 #else
diff --git a/trunk/arch/alpha/kernel/time.c b/trunk/arch/alpha/kernel/time.c
index 0f1d8493cfca..396af1799ea4 100644
--- a/trunk/arch/alpha/kernel/time.c
+++ b/trunk/arch/alpha/kernel/time.c
@@ -41,7 +41,7 @@
 #include <linux/init.h>
 #include <linux/bcd.h>
 #include <linux/profile.h>
-#include <linux/irq_work.h>
+#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -83,25 +83,25 @@ static struct {
 
 unsigned long est_cycle_freq;
 
-#ifdef CONFIG_IRQ_WORK
+#ifdef CONFIG_PERF_EVENTS
 
-DEFINE_PER_CPU(u8, irq_work_pending);
+DEFINE_PER_CPU(u8, perf_event_pending);
 
-#define set_irq_work_pending_flag()  __get_cpu_var(irq_work_pending) = 1
-#define test_irq_work_pending()      __get_cpu_var(irq_work_pending)
-#define clear_irq_work_pending()     __get_cpu_var(irq_work_pending) = 0
+#define set_perf_event_pending_flag()  __get_cpu_var(perf_event_pending) = 1
+#define test_perf_event_pending()      __get_cpu_var(perf_event_pending)
+#define clear_perf_event_pending()     __get_cpu_var(perf_event_pending) = 0
 
-void set_irq_work_pending(void)
+void set_perf_event_pending(void)
 {
-	set_irq_work_pending_flag();
+	set_perf_event_pending_flag();
 }
 
-#else  /* CONFIG_IRQ_WORK */
+#else  /* CONFIG_PERF_EVENTS */
 
-#define test_irq_work_pending()      0
-#define clear_irq_work_pending()
+#define test_perf_event_pending()      0
+#define clear_perf_event_pending()
 
-#endif /* CONFIG_IRQ_WORK */
+#endif /* CONFIG_PERF_EVENTS */
 
 
 static inline __u32 rpcc(void)
@@ -191,9 +191,9 @@ irqreturn_t timer_interrupt(int irq, void *dev)
 
 	write_sequnlock(&xtime_lock);
 
-	if (test_irq_work_pending()) {
-		clear_irq_work_pending();
-		irq_work_run();
+	if (test_perf_event_pending()) {
+		clear_perf_event_pending();
+		perf_event_do_pending();
 	}
 
 #ifndef CONFIG_SMP
diff --git a/trunk/arch/arm/Kconfig b/trunk/arch/arm/Kconfig
index 7c0dfccd05bd..88c97bc7a6f5 100644
--- a/trunk/arch/arm/Kconfig
+++ b/trunk/arch/arm/Kconfig
@@ -23,7 +23,6 @@ config ARM
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_LZMA
-	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/trunk/arch/arm/include/asm/perf_event.h b/trunk/arch/arm/include/asm/perf_event.h
index c4aa4e8c6af9..b5799a3b7117 100644
--- a/trunk/arch/arm/include/asm/perf_event.h
+++ b/trunk/arch/arm/include/asm/perf_event.h
@@ -12,6 +12,18 @@
 #ifndef __ARM_PERF_EVENT_H__
 #define __ARM_PERF_EVENT_H__
 
+/*
+ * NOP: on *most* (read: all supported) ARM platforms, the performance
+ * counter interrupts are regular interrupts and not an NMI. This
+ * means that when we receive the interrupt we can call
+ * perf_event_do_pending() that handles all of the work with
+ * interrupts disabled.
+ */
+static inline void
+set_perf_event_pending(void)
+{
+}
+
 /* ARM performance counters start from 1 (in the cp15 accesses) so use the
  * same indexes here for consistency. */
 #define PERF_EVENT_INDEX_OFFSET 1
diff --git a/trunk/arch/arm/kernel/perf_event.c b/trunk/arch/arm/kernel/perf_event.c
index 49643b1467e6..6cc6521881aa 100644
--- a/trunk/arch/arm/kernel/perf_event.c
+++ b/trunk/arch/arm/kernel/perf_event.c
@@ -1092,7 +1092,7 @@ armv6pmu_handle_irq(int irq_num,
 	 * platforms that can have the PMU interrupts raised as an NMI, this
 	 * will not work.
 	 */
-	irq_work_run();
+	perf_event_do_pending();
 
 	return IRQ_HANDLED;
 }
@@ -2068,7 +2068,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	 * platforms that can have the PMU interrupts raised as an NMI, this
 	 * will not work.
 	 */
-	irq_work_run();
+	perf_event_do_pending();
 
 	return IRQ_HANDLED;
 }
@@ -2436,7 +2436,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 			armpmu->disable(hwc, idx);
 	}
 
-	irq_work_run();
+	perf_event_do_pending();
 
 	/*
 	 * Re-enable the PMU.
@@ -2763,7 +2763,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 			armpmu->disable(hwc, idx);
 	}
 
-	irq_work_run();
+	perf_event_do_pending();
 
 	/*
 	 * Re-enable the PMU.
diff --git a/trunk/arch/frv/Kconfig b/trunk/arch/frv/Kconfig
index 0f2417df6323..16399bd24993 100644
--- a/trunk/arch/frv/Kconfig
+++ b/trunk/arch/frv/Kconfig
@@ -7,7 +7,6 @@ config FRV
 	default y
 	select HAVE_IDE
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 
 config ZONE_DMA
diff --git a/trunk/arch/frv/lib/Makefile b/trunk/arch/frv/lib/Makefile
index 4ff2fb1e6b16..f4709756d0d9 100644
--- a/trunk/arch/frv/lib/Makefile
+++ b/trunk/arch/frv/lib/Makefile
@@ -5,4 +5,4 @@
 lib-y := \
 	__ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
 	checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
-	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o
+	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o
diff --git a/trunk/arch/frv/lib/perf_event.c b/trunk/arch/frv/lib/perf_event.c
new file mode 100644
index 000000000000..9ac5acfd2e91
--- /dev/null
+++ b/trunk/arch/frv/lib/perf_event.c
@@ -0,0 +1,19 @@
+/* Performance event handling
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/perf_event.h>
+
+/*
+ * mark the performance event as pending
+ */
+void set_perf_event_pending(void)
+{
+}
diff --git a/trunk/arch/parisc/Kconfig b/trunk/arch/parisc/Kconfig
index 79a04a9394d5..907417d187e1 100644
--- a/trunk/arch/parisc/Kconfig
+++ b/trunk/arch/parisc/Kconfig
@@ -16,7 +16,6 @@ config PARISC
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
 	select BUG
-	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select GENERIC_ATOMIC64 if !64BIT
 	help
diff --git a/trunk/arch/parisc/include/asm/perf_event.h b/trunk/arch/parisc/include/asm/perf_event.h
index 1e0fd8ba6c03..cc146427d8f9 100644
--- a/trunk/arch/parisc/include/asm/perf_event.h
+++ b/trunk/arch/parisc/include/asm/perf_event.h
@@ -1,6 +1,7 @@
 #ifndef __ASM_PARISC_PERF_EVENT_H
 #define __ASM_PARISC_PERF_EVENT_H
 
-/* Empty, just to avoid compiling error */
+/* parisc only supports software events through this interface. */
+static inline void set_perf_event_pending(void) { }
 
 #endif /* __ASM_PARISC_PERF_EVENT_H */
diff --git a/trunk/arch/powerpc/Kconfig b/trunk/arch/powerpc/Kconfig
index 4b1e521d966f..631e5a0fb6ab 100644
--- a/trunk/arch/powerpc/Kconfig
+++ b/trunk/arch/powerpc/Kconfig
@@ -138,7 +138,6 @@ config PPC
 	select HAVE_OPROFILE
 	select HAVE_SYSCALL_WRAPPERS if PPC64
 	select GENERIC_ATOMIC64 if PPC32
-	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
diff --git a/trunk/arch/powerpc/include/asm/paca.h b/trunk/arch/powerpc/include/asm/paca.h
index 9b287fdd8ea3..1ff6662f7faf 100644
--- a/trunk/arch/powerpc/include/asm/paca.h
+++ b/trunk/arch/powerpc/include/asm/paca.h
@@ -129,7 +129,7 @@ struct paca_struct {
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
-	u8 irq_work_pending;		/* IRQ_WORK interrupt while soft-disable */
+	u8 perf_event_pending;		/* PM interrupt while soft-disabled */
 
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
diff --git a/trunk/arch/powerpc/kernel/time.c b/trunk/arch/powerpc/kernel/time.c
index 54888eb10c3b..8533b3b83f5d 100644
--- a/trunk/arch/powerpc/kernel/time.c
+++ b/trunk/arch/powerpc/kernel/time.c
@@ -53,7 +53,7 @@
 #include <linux/posix-timers.h>
 #include <linux/irq.h>
 #include <linux/delay.h>
-#include <linux/irq_work.h>
+#include <linux/perf_event.h>
 #include <asm/trace.h>
 
 #include <asm/io.h>
@@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void)
 }
 #endif /* CONFIG_PPC_ISERIES */
 
-#ifdef CONFIG_IRQ_WORK
+#ifdef CONFIG_PERF_EVENTS
 
 /*
  * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
  */
 #ifdef CONFIG_PPC64
-static inline unsigned long test_irq_work_pending(void)
+static inline unsigned long test_perf_event_pending(void)
 {
 	unsigned long x;
 
 	asm volatile("lbz %0,%1(13)"
 		: "=r" (x)
-		: "i" (offsetof(struct paca_struct, irq_work_pending)));
+		: "i" (offsetof(struct paca_struct, perf_event_pending)));
 	return x;
 }
 
-static inline void set_irq_work_pending_flag(void)
+static inline void set_perf_event_pending_flag(void)
 {
 	asm volatile("stb %0,%1(13)" : :
 		"r" (1),
-		"i" (offsetof(struct paca_struct, irq_work_pending)));
+		"i" (offsetof(struct paca_struct, perf_event_pending)));
 }
 
-static inline void clear_irq_work_pending(void)
+static inline void clear_perf_event_pending(void)
 {
 	asm volatile("stb %0,%1(13)" : :
 		"r" (0),
-		"i" (offsetof(struct paca_struct, irq_work_pending)));
+		"i" (offsetof(struct paca_struct, perf_event_pending)));
 }
 
 #else /* 32-bit */
 
-DEFINE_PER_CPU(u8, irq_work_pending);
+DEFINE_PER_CPU(u8, perf_event_pending);
 
-#define set_irq_work_pending_flag()	__get_cpu_var(irq_work_pending) = 1
-#define test_irq_work_pending()		__get_cpu_var(irq_work_pending)
-#define clear_irq_work_pending()	__get_cpu_var(irq_work_pending) = 0
+#define set_perf_event_pending_flag()	__get_cpu_var(perf_event_pending) = 1
+#define test_perf_event_pending()	__get_cpu_var(perf_event_pending)
+#define clear_perf_event_pending()	__get_cpu_var(perf_event_pending) = 0
 
 #endif /* 32 vs 64 bit */
 
-void set_irq_work_pending(void)
+void set_perf_event_pending(void)
 {
 	preempt_disable();
-	set_irq_work_pending_flag();
+	set_perf_event_pending_flag();
 	set_dec(1);
 	preempt_enable();
 }
 
-#else  /* CONFIG_IRQ_WORK */
+#else  /* CONFIG_PERF_EVENTS */
 
-#define test_irq_work_pending()	0
-#define clear_irq_work_pending()
+#define test_perf_event_pending()	0
+#define clear_perf_event_pending()
 
-#endif /* CONFIG_IRQ_WORK */
+#endif /* CONFIG_PERF_EVENTS */
 
 /*
  * For iSeries shared processors, we have to let the hypervisor
@@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs)
 
 	calculate_steal_time();
 
-	if (test_irq_work_pending()) {
-		clear_irq_work_pending();
-		irq_work_run();
+	if (test_perf_event_pending()) {
+		clear_perf_event_pending();
+		perf_event_do_pending();
 	}
 
 #ifdef CONFIG_PPC_ISERIES
diff --git a/trunk/arch/s390/Kconfig b/trunk/arch/s390/Kconfig
index 958f0dadeadf..f0777a47e3a5 100644
--- a/trunk/arch/s390/Kconfig
+++ b/trunk/arch/s390/Kconfig
@@ -95,7 +95,6 @@ config S390
 	select HAVE_KVM if 64BIT
 	select HAVE_ARCH_TRACEHOOK
 	select INIT_ALL_POSSIBLE
-	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
diff --git a/trunk/arch/s390/include/asm/perf_event.h b/trunk/arch/s390/include/asm/perf_event.h
index a75f168d2718..3840cbe77637 100644
--- a/trunk/arch/s390/include/asm/perf_event.h
+++ b/trunk/arch/s390/include/asm/perf_event.h
@@ -4,6 +4,7 @@
  * Copyright 2009 Martin Schwidefsky, IBM Corporation.
  */
 
-/* Empty, just to avoid compiling error */
+static inline void set_perf_event_pending(void) {}
+static inline void clear_perf_event_pending(void) {}
 
 #define PERF_EVENT_INDEX_OFFSET 0
diff --git a/trunk/arch/sh/Kconfig b/trunk/arch/sh/Kconfig
index 35b6879628a0..35b6c3f85173 100644
--- a/trunk/arch/sh/Kconfig
+++ b/trunk/arch/sh/Kconfig
@@ -16,7 +16,6 @@ config SUPERH
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_ATTRS
-	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select HAVE_KERNEL_GZIP
diff --git a/trunk/arch/sh/include/asm/perf_event.h b/trunk/arch/sh/include/asm/perf_event.h
index 14308bed7ea5..3d0c9f36d150 100644
--- a/trunk/arch/sh/include/asm/perf_event.h
+++ b/trunk/arch/sh/include/asm/perf_event.h
@@ -26,4 +26,11 @@ extern int register_sh_pmu(struct sh_pmu *);
 extern int reserve_pmc_hardware(void);
 extern void release_pmc_hardware(void);
 
+static inline void set_perf_event_pending(void)
+{
+	/* Nothing to see here, move along. */
+}
+
+#define PERF_EVENT_INDEX_OFFSET	0
+
 #endif /* __ASM_SH_PERF_EVENT_H */
diff --git a/trunk/arch/sparc/Kconfig b/trunk/arch/sparc/Kconfig
index 3e9d31401fb2..9212cd42a832 100644
--- a/trunk/arch/sparc/Kconfig
+++ b/trunk/arch/sparc/Kconfig
@@ -26,7 +26,6 @@ config SPARC
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select RTC_CLASS
 	select RTC_DRV_M48T59
-	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select HAVE_DMA_ATTRS
@@ -55,7 +54,6 @@ config SPARC64
 	select RTC_DRV_BQ4802
 	select RTC_DRV_SUN4V
 	select RTC_DRV_STARFIRE
-	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 
diff --git a/trunk/arch/sparc/include/asm/perf_event.h b/trunk/arch/sparc/include/asm/perf_event.h
index 6e8bfa1786da..727af70646cb 100644
--- a/trunk/arch/sparc/include/asm/perf_event.h
+++ b/trunk/arch/sparc/include/asm/perf_event.h
@@ -1,6 +1,10 @@
 #ifndef __ASM_SPARC_PERF_EVENT_H
 #define __ASM_SPARC_PERF_EVENT_H
 
+extern void set_perf_event_pending(void);
+
+#define	PERF_EVENT_INDEX_OFFSET	0
+
 #ifdef CONFIG_PERF_EVENTS
 #include <asm/ptrace.h>
 
diff --git a/trunk/arch/sparc/kernel/pcr.c b/trunk/arch/sparc/kernel/pcr.c
index b87873c0e8ea..c4a6a50b4849 100644
--- a/trunk/arch/sparc/kernel/pcr.c
+++ b/trunk/arch/sparc/kernel/pcr.c
@@ -7,7 +7,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 
-#include <linux/irq_work.h>
+#include <linux/perf_event.h>
 #include <linux/ftrace.h>
 
 #include <asm/pil.h>
@@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs)
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
-#ifdef CONFIG_IRQ_WORK
-	irq_work_run();
+#ifdef CONFIG_PERF_EVENTS
+	perf_event_do_pending();
 #endif
 	irq_exit();
 	set_irq_regs(old_regs);
 }
 
-void arch_irq_work_raise(void)
+void set_perf_event_pending(void)
 {
 	set_softint(1 << PIL_DEFERRED_PCR_WORK);
 }
diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig
index fd227d6b8d9c..9815221976a7 100644
--- a/trunk/arch/x86/Kconfig
+++ b/trunk/arch/x86/Kconfig
@@ -25,7 +25,6 @@ config X86
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS if (!M386 && !M486)
-	select HAVE_IRQ_WORK
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select ARCH_WANT_OPTIONAL_GPIOLIB
diff --git a/trunk/arch/x86/include/asm/entry_arch.h b/trunk/arch/x86/include/asm/entry_arch.h
index b8e96a18676b..8e8ec663a98f 100644
--- a/trunk/arch/x86/include/asm/entry_arch.h
+++ b/trunk/arch/x86/include/asm/entry_arch.h
@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 
-#ifdef CONFIG_IRQ_WORK
-BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
+#ifdef CONFIG_PERF_EVENTS
+BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
 #endif
 
 #ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/trunk/arch/x86/include/asm/hardirq.h b/trunk/arch/x86/include/asm/hardirq.h
index 55e4de613f0e..aeab29aee617 100644
--- a/trunk/arch/x86/include/asm/hardirq.h
+++ b/trunk/arch/x86/include/asm/hardirq.h
@@ -14,7 +14,7 @@ typedef struct {
 #endif
 	unsigned int x86_platform_ipis;	/* arch dependent */
 	unsigned int apic_perf_irqs;
-	unsigned int apic_irq_work_irqs;
+	unsigned int apic_pending_irqs;
 #ifdef CONFIG_SMP
 	unsigned int irq_resched_count;
 	unsigned int irq_call_count;
diff --git a/trunk/arch/x86/include/asm/hw_irq.h b/trunk/arch/x86/include/asm/hw_irq.h
index 3a54a1ca1a02..46c0fe05f230 100644
--- a/trunk/arch/x86/include/asm/hw_irq.h
+++ b/trunk/arch/x86/include/asm/hw_irq.h
@@ -29,7 +29,7 @@
 extern void apic_timer_interrupt(void);
 extern void x86_platform_ipi(void);
 extern void error_interrupt(void);
-extern void irq_work_interrupt(void);
+extern void perf_pending_interrupt(void);
 
 extern void spurious_interrupt(void);
 extern void thermal_interrupt(void);
diff --git a/trunk/arch/x86/include/asm/irq_vectors.h b/trunk/arch/x86/include/asm/irq_vectors.h
index 6af0894dafb4..e2ca30092557 100644
--- a/trunk/arch/x86/include/asm/irq_vectors.h
+++ b/trunk/arch/x86/include/asm/irq_vectors.h
@@ -114,9 +114,9 @@
 #define X86_PLATFORM_IPI_VECTOR		0xed
 
 /*
- * IRQ work vector:
+ * Performance monitoring pending work vector:
  */
-#define IRQ_WORK_VECTOR			0xec
+#define LOCAL_PENDING_VECTOR		0xec
 
 #define UV_BAU_MESSAGE			0xea
 
diff --git a/trunk/arch/x86/kernel/Makefile b/trunk/arch/x86/kernel/Makefile
index 7490bf8d1459..9d3f485e5dd0 100644
--- a/trunk/arch/x86/kernel/Makefile
+++ b/trunk/arch/x86/kernel/Makefile
@@ -35,7 +35,6 @@ obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time.o ioport.o ldt.o dumpstack.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
-obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-$(CONFIG_X86_VISWS)	+= visws_quirks.o
 obj-$(CONFIG_X86_32)	+= probe_roms_32.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
diff --git a/trunk/arch/x86/kernel/cpu/perf_event.c b/trunk/arch/x86/kernel/cpu/perf_event.c
index fe73c1844a9a..e2513f26ba8b 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event.c
@@ -1196,6 +1196,25 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 	return handled;
 }
 
+void smp_perf_pending_interrupt(struct pt_regs *regs)
+{
+	irq_enter();
+	ack_APIC_irq();
+	inc_irq_stat(apic_pending_irqs);
+	perf_event_do_pending();
+	irq_exit();
+}
+
+void set_perf_event_pending(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	if (!x86_pmu.apic || !x86_pmu_initialized())
+		return;
+
+	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
+#endif
+}
+
 void perf_events_lapic_init(void)
 {
 	if (!x86_pmu.apic || !x86_pmu_initialized())
diff --git a/trunk/arch/x86/kernel/cpu/perf_event_amd.c b/trunk/arch/x86/kernel/cpu/perf_event_amd.c
index 46d58448c3af..c2897b7b4a3b 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event_amd.c
@@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids
  [ C(DTLB) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
+		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = 0,
@@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids
  [ C(ITLB) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
-		[ C(RESULT_MISS)   ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
+		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = -1,
diff --git a/trunk/arch/x86/kernel/entry_64.S b/trunk/arch/x86/kernel/entry_64.S
index c375c79065f8..17be5ec7cbba 100644
--- a/trunk/arch/x86/kernel/entry_64.S
+++ b/trunk/arch/x86/kernel/entry_64.S
@@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \
 apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
 
-#ifdef CONFIG_IRQ_WORK
-apicinterrupt IRQ_WORK_VECTOR \
-	irq_work_interrupt smp_irq_work_interrupt
+#ifdef CONFIG_PERF_EVENTS
+apicinterrupt LOCAL_PENDING_VECTOR \
+	perf_pending_interrupt smp_perf_pending_interrupt
 #endif
 
 /*
diff --git a/trunk/arch/x86/kernel/irq.c b/trunk/arch/x86/kernel/irq.c
index 44edb03fc9ec..91fd0c70a18a 100644
--- a/trunk/arch/x86/kernel/irq.c
+++ b/trunk/arch/x86/kernel/irq.c
@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
 	seq_printf(p, "  Performance monitoring interrupts\n");
-	seq_printf(p, "%*s: ", prec, "IWI");
+	seq_printf(p, "%*s: ", prec, "PND");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
-	seq_printf(p, "  IRQ work interrupts\n");
+		seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
+	seq_printf(p, "  Performance pending work\n");
 #endif
 	if (x86_platform_ipi_callback) {
 		seq_printf(p, "%*s: ", prec, "PLT");
@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->apic_timer_irqs;
 	sum += irq_stats(cpu)->irq_spurious_count;
 	sum += irq_stats(cpu)->apic_perf_irqs;
-	sum += irq_stats(cpu)->apic_irq_work_irqs;
+	sum += irq_stats(cpu)->apic_pending_irqs;
 #endif
 	if (x86_platform_ipi_callback)
 		sum += irq_stats(cpu)->x86_platform_ipis;
diff --git a/trunk/arch/x86/kernel/irq_work.c b/trunk/arch/x86/kernel/irq_work.c
deleted file mode 100644
index ca8f703a1e70..000000000000
--- a/trunk/arch/x86/kernel/irq_work.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * x86 specific code for irq_work
- *
- * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/irq_work.h>
-#include <linux/hardirq.h>
-#include <asm/apic.h>
-
-void smp_irq_work_interrupt(struct pt_regs *regs)
-{
-	irq_enter();
-	ack_APIC_irq();
-	inc_irq_stat(apic_irq_work_irqs);
-	irq_work_run();
-	irq_exit();
-}
-
-void arch_irq_work_raise(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	if (!cpu_has_apic)
-		return;
-
-	apic->send_IPI_self(IRQ_WORK_VECTOR);
-	apic_wait_icr_idle();
-#endif
-}
diff --git a/trunk/arch/x86/kernel/irqinit.c b/trunk/arch/x86/kernel/irqinit.c
index 713969b9266b..990ae7cfc578 100644
--- a/trunk/arch/x86/kernel/irqinit.c
+++ b/trunk/arch/x86/kernel/irqinit.c
@@ -224,9 +224,9 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 
-	/* IRQ work interrupts: */
-# ifdef CONFIG_IRQ_WORK
-	alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
+	/* Performance monitoring interrupts: */
+# ifdef CONFIG_PERF_EVENTS
+	alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
 # endif
 
 #endif
diff --git a/trunk/include/linux/irq_work.h b/trunk/include/linux/irq_work.h
deleted file mode 100644
index 4fa09d4d0b71..000000000000
--- a/trunk/include/linux/irq_work.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _LINUX_IRQ_WORK_H
-#define _LINUX_IRQ_WORK_H
-
-struct irq_work {
-	struct irq_work *next;
-	void (*func)(struct irq_work *);
-};
-
-static inline
-void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
-{
-	entry->next = NULL;
-	entry->func = func;
-}
-
-bool irq_work_queue(struct irq_work *entry);
-void irq_work_run(void);
-void irq_work_sync(struct irq_work *entry);
-
-#endif /* _LINUX_IRQ_WORK_H */
diff --git a/trunk/include/linux/jump_label.h b/trunk/include/linux/jump_label.h
index b67cb180e6e9..b72cd9f92c2e 100644
--- a/trunk/include/linux/jump_label.h
+++ b/trunk/include/linux/jump_label.h
@@ -25,10 +25,10 @@ extern void jump_label_update(unsigned long key, enum jump_label_type type);
 extern void jump_label_apply_nops(struct module *mod);
 extern int jump_label_text_reserved(void *start, void *end);
 
-#define jump_label_enable(key) \
+#define enable_jump_label(key) \
 	jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
 
-#define jump_label_disable(key) \
+#define disable_jump_label(key) \
 	jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE);
 
 #else
@@ -39,12 +39,12 @@ do {						\
 		goto label;			\
 } while (0)
 
-#define jump_label_enable(cond_var)	\
+#define enable_jump_label(cond_var)	\
 do {					\
        *(cond_var) = 1;			\
 } while (0)
 
-#define jump_label_disable(cond_var)	\
+#define disable_jump_label(cond_var)	\
 do {					\
        *(cond_var) = 0;			\
 } while (0)
@@ -61,14 +61,4 @@ static inline int jump_label_text_reserved(void *start, void *end)
 
 #endif
 
-#define COND_STMT(key, stmt)					\
-do {								\
-	__label__ jl_enabled;					\
-	JUMP_LABEL(key, jl_enabled);				\
-	if (0) {						\
-jl_enabled:							\
-		stmt;						\
-	}							\
-} while (0)
-
 #endif
diff --git a/trunk/include/linux/jump_label_ref.h b/trunk/include/linux/jump_label_ref.h
deleted file mode 100644
index e5d012ad92c6..000000000000
--- a/trunk/include/linux/jump_label_ref.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef _LINUX_JUMP_LABEL_REF_H
-#define _LINUX_JUMP_LABEL_REF_H
-
-#include <linux/jump_label.h>
-#include <asm/atomic.h>
-
-#ifdef HAVE_JUMP_LABEL
-
-static inline void jump_label_inc(atomic_t *key)
-{
-	if (atomic_add_return(1, key) == 1)
-		jump_label_enable(key);
-}
-
-static inline void jump_label_dec(atomic_t *key)
-{
-	if (atomic_dec_and_test(key))
-		jump_label_disable(key);
-}
-
-#else /* !HAVE_JUMP_LABEL */
-
-static inline void jump_label_inc(atomic_t *key)
-{
-	atomic_inc(key);
-}
-
-static inline void jump_label_dec(atomic_t *key)
-{
-	atomic_dec(key);
-}
-
-#undef JUMP_LABEL
-#define JUMP_LABEL(key, label)						\
-do {									\
-	if (unlikely(__builtin_choose_expr(				\
-	      __builtin_types_compatible_p(typeof(key), atomic_t *),	\
-	      atomic_read((atomic_t *)(key)), *(key))))			\
-		goto label;						\
-} while (0)
-
-#endif /* HAVE_JUMP_LABEL */
-
-#endif /* _LINUX_JUMP_LABEL_REF_H */
diff --git a/trunk/include/linux/perf_event.h b/trunk/include/linux/perf_event.h
index 057bf22a8323..a9227e985207 100644
--- a/trunk/include/linux/perf_event.h
+++ b/trunk/include/linux/perf_event.h
@@ -486,8 +486,6 @@ struct perf_guest_info_callbacks {
 #include <linux/workqueue.h>
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
-#include <linux/irq_work.h>
-#include <linux/jump_label_ref.h>
 #include <asm/atomic.h>
 #include <asm/local.h>
 
@@ -537,12 +535,6 @@ struct hw_perf_event {
 		struct { /* breakpoint */
 			struct arch_hw_breakpoint	info;
 			struct list_head		bp_list;
-			/*
-			 * Crufty hack to avoid the chicken and egg
-			 * problem hw_breakpoint has with context
-			 * creation and event initalization.
-			 */
-			struct task_struct		*bp_target;
 		};
 #endif
 	};
@@ -680,6 +672,11 @@ struct perf_buffer {
 	void				*data_pages[0];
 };
 
+struct perf_pending_entry {
+	struct perf_pending_entry *next;
+	void (*func)(struct perf_pending_entry *);
+};
+
 struct perf_sample_data;
 
 typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
@@ -700,7 +697,6 @@ struct swevent_hlist {
 
 #define PERF_ATTACH_CONTEXT	0x01
 #define PERF_ATTACH_GROUP	0x02
-#define PERF_ATTACH_TASK	0x04
 
 /**
  * struct perf_event - performance event kernel representation:
@@ -788,7 +784,7 @@ struct perf_event {
 	int				pending_wakeup;
 	int				pending_kill;
 	int				pending_disable;
-	struct irq_work			pending;
+	struct perf_pending_entry	pending;
 
 	atomic_t			event_limit;
 
@@ -896,26 +892,14 @@ extern void perf_pmu_unregister(struct pmu *pmu);
 
 extern int perf_num_counters(void);
 extern const char *perf_pmu_name(void);
-extern void __perf_event_task_sched_in(struct task_struct *task);
-extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
-
-extern atomic_t perf_task_events;
-
-static inline void perf_event_task_sched_in(struct task_struct *task)
-{
-	COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
-}
-
-static inline
-void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
-{
-	COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
-}
-
+extern void perf_event_task_sched_in(struct task_struct *task);
+extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_delayed_put(struct task_struct *task);
+extern void set_perf_event_pending(void);
+extern void perf_event_do_pending(void);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
 extern void perf_pmu_enable(struct pmu *pmu);
@@ -1004,20 +988,18 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs)
 	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
 }
 
-static __always_inline void
+static inline void
 perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 {
-	struct pt_regs hot_regs;
-
-	JUMP_LABEL(&perf_swevent_enabled[event_id], have_event);
-	return;
-
-have_event:
-	if (!regs) {
-		perf_fetch_caller_regs(&hot_regs);
-		regs = &hot_regs;
+	if (atomic_read(&perf_swevent_enabled[event_id])) {
+		struct pt_regs hot_regs;
+
+		if (!regs) {
+			perf_fetch_caller_regs(&hot_regs);
+			regs = &hot_regs;
+		}
+		__perf_sw_event(event_id, nr, nmi, regs, addr);
 	}
-	__perf_sw_event(event_id, nr, nmi, regs, addr);
 }
 
 extern void perf_event_mmap(struct vm_area_struct *vma);
@@ -1096,6 +1078,7 @@ static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
 static inline void perf_event_delayed_put(struct task_struct *task)	{ }
+static inline void perf_event_do_pending(void)				{ }
 static inline void perf_event_print_debug(void)				{ }
 static inline int perf_event_task_disable(void)				{ return -EINVAL; }
 static inline int perf_event_task_enable(void)				{ return -EINVAL; }
diff --git a/trunk/init/Kconfig b/trunk/init/Kconfig
index 1ef0b439908e..2de5b1cbadd9 100644
--- a/trunk/init/Kconfig
+++ b/trunk/init/Kconfig
@@ -21,13 +21,6 @@ config CONSTRUCTORS
 	depends on !UML
 	default y
 
-config HAVE_IRQ_WORK
-	bool
-
-config IRQ_WORK
-	bool
-	depends on HAVE_IRQ_WORK
-
 menu "General setup"
 
 config EXPERIMENTAL
@@ -994,7 +987,6 @@ config PERF_EVENTS
 	default y if (PROFILING || PERF_COUNTERS)
 	depends on HAVE_PERF_EVENTS
 	select ANON_INODES
-	select IRQ_WORK
 	help
 	  Enable kernel support for various performance events provided
 	  by software and hardware.
diff --git a/trunk/kernel/Makefile b/trunk/kernel/Makefile
index 4d9bf5f8531f..d52b473c99a1 100644
--- a/trunk/kernel/Makefile
+++ b/trunk/kernel/Makefile
@@ -23,7 +23,6 @@ CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_perf_event.o = -pg
-CFLAGS_REMOVE_irq_work.o = -pg
 endif
 
 obj-$(CONFIG_FREEZER) += freezer.o
@@ -101,7 +100,6 @@ obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
-obj-$(CONFIG_IRQ_WORK) += irq_work.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
 obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
diff --git a/trunk/kernel/hw_breakpoint.c b/trunk/kernel/hw_breakpoint.c
index 2c9120f0afca..3b714e839c10 100644
--- a/trunk/kernel/hw_breakpoint.c
+++ b/trunk/kernel/hw_breakpoint.c
@@ -113,12 +113,12 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
  */
 static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
 {
-	struct task_struct *tsk = bp->hw.bp_target;
+	struct perf_event_context *ctx = bp->ctx;
 	struct perf_event *iter;
 	int count = 0;
 
 	list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
-		if (iter->hw.bp_target == tsk && find_slot_idx(iter) == type)
+		if (iter->ctx == ctx && find_slot_idx(iter) == type)
 			count += hw_breakpoint_weight(iter);
 	}
 
@@ -134,7 +134,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
 		    enum bp_type_idx type)
 {
 	int cpu = bp->cpu;
-	struct task_struct *tsk = bp->hw.bp_target;
+	struct task_struct *tsk = bp->ctx->task;
 
 	if (cpu >= 0) {
 		slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
@@ -213,7 +213,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
 	       int weight)
 {
 	int cpu = bp->cpu;
-	struct task_struct *tsk = bp->hw.bp_target;
+	struct task_struct *tsk = bp->ctx->task;
 
 	/* Pinned counter cpu profiling */
 	if (!tsk) {
diff --git a/trunk/kernel/irq_work.c b/trunk/kernel/irq_work.c
deleted file mode 100644
index f16763ff8481..000000000000
--- a/trunk/kernel/irq_work.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
- *
- * Provides a framework for enqueueing and running callbacks from hardirq
- * context. The enqueueing is NMI-safe.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/irq_work.h>
-#include <linux/hardirq.h>
-
-/*
- * An entry can be in one of four states:
- *
- * free	     NULL, 0 -> {claimed}       : free to be used
- * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
- * pending   next, 3 -> {busy}          : queued, pending callback
- * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
- *
- * We use the lower two bits of the next pointer to keep PENDING and BUSY
- * flags.
- */
-
-#define IRQ_WORK_PENDING	1UL
-#define IRQ_WORK_BUSY		2UL
-#define IRQ_WORK_FLAGS		3UL
-
-static inline bool irq_work_is_set(struct irq_work *entry, int flags)
-{
-	return (unsigned long)entry->next & flags;
-}
-
-static inline struct irq_work *irq_work_next(struct irq_work *entry)
-{
-	unsigned long next = (unsigned long)entry->next;
-	next &= ~IRQ_WORK_FLAGS;
-	return (struct irq_work *)next;
-}
-
-static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
-{
-	unsigned long next = (unsigned long)entry;
-	next |= flags;
-	return (struct irq_work *)next;
-}
-
-static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
-
-/*
- * Claim the entry so that no one else will poke at it.
- */
-static bool irq_work_claim(struct irq_work *entry)
-{
-	struct irq_work *next, *nflags;
-
-	do {
-		next = entry->next;
-		if ((unsigned long)next & IRQ_WORK_PENDING)
-			return false;
-		nflags = next_flags(next, IRQ_WORK_FLAGS);
-	} while (cmpxchg(&entry->next, next, nflags) != next);
-
-	return true;
-}
-
-
-void __weak arch_irq_work_raise(void)
-{
-	/*
-	 * Lame architectures will get the timer tick callback
-	 */
-}
-
-/*
- * Queue the entry and raise the IPI if needed.
- */
-static void __irq_work_queue(struct irq_work *entry)
-{
-	struct irq_work **head, *next;
-
-	head = &get_cpu_var(irq_work_list);
-
-	do {
-		next = *head;
-		/* Can assign non-atomic because we keep the flags set. */
-		entry->next = next_flags(next, IRQ_WORK_FLAGS);
-	} while (cmpxchg(head, next, entry) != next);
-
-	/* The list was empty, raise self-interrupt to start processing. */
-	if (!irq_work_next(entry))
-		arch_irq_work_raise();
-
-	put_cpu_var(irq_work_list);
-}
-
-/*
- * Enqueue the irq_work @entry, returns true on success, failure when the
- * @entry was already enqueued by someone else.
- *
- * Can be re-enqueued while the callback is still in progress.
- */
-bool irq_work_queue(struct irq_work *entry)
-{
-	if (!irq_work_claim(entry)) {
-		/*
-		 * Already enqueued, can't do!
-		 */
-		return false;
-	}
-
-	__irq_work_queue(entry);
-	return true;
-}
-EXPORT_SYMBOL_GPL(irq_work_queue);
-
-/*
- * Run the irq_work entries on this cpu. Requires to be ran from hardirq
- * context with local IRQs disabled.
- */
-void irq_work_run(void)
-{
-	struct irq_work *list, **head;
-
-	head = &__get_cpu_var(irq_work_list);
-	if (*head == NULL)
-		return;
-
-	BUG_ON(!in_irq());
-	BUG_ON(!irqs_disabled());
-
-	list = xchg(head, NULL);
-	while (list != NULL) {
-		struct irq_work *entry = list;
-
-		list = irq_work_next(list);
-
-		/*
-		 * Clear the PENDING bit, after this point the @entry
-		 * can be re-used.
-		 */
-		entry->next = next_flags(NULL, IRQ_WORK_BUSY);
-		entry->func(entry);
-		/*
-		 * Clear the BUSY bit and return to the free state if
-		 * no-one else claimed it meanwhile.
-		 */
-		cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
-	}
-}
-EXPORT_SYMBOL_GPL(irq_work_run);
-
-/*
- * Synchronize against the irq_work @entry, ensures the entry is not
- * currently in use.
- */
-void irq_work_sync(struct irq_work *entry)
-{
-	WARN_ON_ONCE(irqs_disabled());
-
-	while (irq_work_is_set(entry, IRQ_WORK_BUSY))
-		cpu_relax();
-}
-EXPORT_SYMBOL_GPL(irq_work_sync);
diff --git a/trunk/kernel/perf_event.c b/trunk/kernel/perf_event.c
index 05ecf6f7c672..1ec3916ffef0 100644
--- a/trunk/kernel/perf_event.c
+++ b/trunk/kernel/perf_event.c
@@ -34,7 +34,7 @@
 
 #include <asm/irq_regs.h>
 
-atomic_t perf_task_events __read_mostly;
+static atomic_t nr_events __read_mostly;
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
 static atomic_t nr_task_events __read_mostly;
@@ -315,12 +315,7 @@ static void perf_group_attach(struct perf_event *event)
 {
 	struct perf_event *group_leader = event->group_leader;
 
-	/*
-	 * We can have double attach due to group movement in perf_event_open.
-	 */
-	if (event->attach_state & PERF_ATTACH_GROUP)
-		return;
-
+	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP);
 	event->attach_state |= PERF_ATTACH_GROUP;
 
 	if (group_leader == event)
@@ -417,8 +412,8 @@ event_filter_match(struct perf_event *event)
 	return event->cpu == -1 || event->cpu == smp_processor_id();
 }
 
-static int
-__event_sched_out(struct perf_event *event,
+static void
+event_sched_out(struct perf_event *event,
 		  struct perf_cpu_context *cpuctx,
 		  struct perf_event_context *ctx)
 {
@@ -437,13 +432,14 @@ __event_sched_out(struct perf_event *event,
 	}
 
 	if (event->state != PERF_EVENT_STATE_ACTIVE)
-		return 0;
+		return;
 
 	event->state = PERF_EVENT_STATE_INACTIVE;
 	if (event->pending_disable) {
 		event->pending_disable = 0;
 		event->state = PERF_EVENT_STATE_OFF;
 	}
+	event->tstamp_stopped = ctx->time;
 	event->pmu->del(event, 0);
 	event->oncpu = -1;
 
@@ -452,19 +448,6 @@ __event_sched_out(struct perf_event *event,
 	ctx->nr_active--;
 	if (event->attr.exclusive || !cpuctx->active_oncpu)
 		cpuctx->exclusive = 0;
-	return 1;
-}
-
-static void
-event_sched_out(struct perf_event *event,
-		  struct perf_cpu_context *cpuctx,
-		  struct perf_event_context *ctx)
-{
-	int ret;
-
-	ret = __event_sched_out(event, cpuctx, ctx);
-	if (ret)
-		event->tstamp_stopped = ctx->time;
 }
 
 static void
@@ -664,7 +647,7 @@ void perf_event_disable(struct perf_event *event)
 }
 
 static int
-__event_sched_in(struct perf_event *event,
+event_sched_in(struct perf_event *event,
 		 struct perf_cpu_context *cpuctx,
 		 struct perf_event_context *ctx)
 {
@@ -684,6 +667,8 @@ __event_sched_in(struct perf_event *event,
 		return -EAGAIN;
 	}
 
+	event->tstamp_running += ctx->time - event->tstamp_stopped;
+
 	if (!is_software_event(event))
 		cpuctx->active_oncpu++;
 	ctx->nr_active++;
@@ -694,35 +679,6 @@ __event_sched_in(struct perf_event *event,
 	return 0;
 }
 
-static inline int
-event_sched_in(struct perf_event *event,
-		 struct perf_cpu_context *cpuctx,
-		 struct perf_event_context *ctx)
-{
-	int ret = __event_sched_in(event, cpuctx, ctx);
-	if (ret)
-		return ret;
-	event->tstamp_running += ctx->time - event->tstamp_stopped;
-	return 0;
-}
-
-static void
-group_commit_event_sched_in(struct perf_event *group_event,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx)
-{
-	struct perf_event *event;
-	u64 now = ctx->time;
-
-	group_event->tstamp_running += now - group_event->tstamp_stopped;
-	/*
-	 * Schedule in siblings as one group (if any):
-	 */
-	list_for_each_entry(event, &group_event->sibling_list, group_entry) {
-		event->tstamp_running += now - event->tstamp_stopped;
-	}
-}
-
 static int
 group_sched_in(struct perf_event *group_event,
 	       struct perf_cpu_context *cpuctx,
@@ -736,13 +692,7 @@ group_sched_in(struct perf_event *group_event,
 
 	pmu->start_txn(pmu);
 
-	/*
-	 * use __event_sched_in() to delay updating tstamp_running
-	 * until the transaction is committed. In case of failure
-	 * we will keep an unmodified tstamp_running which is a
-	 * requirement to get correct timing information
-	 */
-	if (__event_sched_in(group_event, cpuctx, ctx)) {
+	if (event_sched_in(group_event, cpuctx, ctx)) {
 		pmu->cancel_txn(pmu);
 		return -EAGAIN;
 	}
@@ -751,31 +701,26 @@ group_sched_in(struct perf_event *group_event,
 	 * Schedule in siblings as one group (if any):
 	 */
 	list_for_each_entry(event, &group_event->sibling_list, group_entry) {
-		if (__event_sched_in(event, cpuctx, ctx)) {
+		if (event_sched_in(event, cpuctx, ctx)) {
 			partial_group = event;
 			goto group_error;
 		}
 	}
 
-	if (!pmu->commit_txn(pmu)) {
-		/* commit tstamp_running */
-		group_commit_event_sched_in(group_event, cpuctx, ctx);
+	if (!pmu->commit_txn(pmu))
 		return 0;
-	}
+
 group_error:
 	/*
 	 * Groups can be scheduled in as one unit only, so undo any
 	 * partial group before returning:
-	 *
-	 * use __event_sched_out() to avoid updating tstamp_stopped
-	 * because the event never actually ran
 	 */
 	list_for_each_entry(event, &group_event->sibling_list, group_entry) {
 		if (event == partial_group)
 			break;
-		__event_sched_out(event, cpuctx, ctx);
+		event_sched_out(event, cpuctx, ctx);
 	}
-	__event_sched_out(group_event, cpuctx, ctx);
+	event_sched_out(group_event, cpuctx, ctx);
 
 	pmu->cancel_txn(pmu);
 
@@ -1311,8 +1256,8 @@ void perf_event_context_sched_out(struct task_struct *task, int ctxn,
  * accessing the event control register. If a NMI hits, then it will
  * not restart the event.
  */
-void __perf_event_task_sched_out(struct task_struct *task,
-				 struct task_struct *next)
+void perf_event_task_sched_out(struct task_struct *task,
+			       struct task_struct *next)
 {
 	int ctxn;
 
@@ -1337,6 +1282,14 @@ static void task_ctx_sched_out(struct perf_event_context *ctx,
 	cpuctx->task_ctx = NULL;
 }
 
+/*
+ * Called with IRQs disabled
+ */
+static void __perf_event_task_sched_out(struct perf_event_context *ctx)
+{
+	task_ctx_sched_out(ctx, EVENT_ALL);
+}
+
 /*
  * Called with IRQs disabled
  */
@@ -1486,7 +1439,7 @@ void perf_event_context_sched_in(struct perf_event_context *ctx)
  * accessing the event control register. If a NMI hits, then it will
  * keep the event running.
  */
-void __perf_event_task_sched_in(struct task_struct *task)
+void perf_event_task_sched_in(struct task_struct *task)
 {
 	struct perf_event_context *ctx;
 	int ctxn;
@@ -1827,13 +1780,7 @@ static u64 perf_event_read(struct perf_event *event)
 		unsigned long flags;
 
 		raw_spin_lock_irqsave(&ctx->lock, flags);
-		/*
-		 * may read while context is not active
-		 * (e.g., thread is blocked), in that case
-		 * we cannot update context time
-		 */
-		if (ctx->is_active)
-			update_context_time(ctx);
+		update_context_time(ctx);
 		update_event_times(event);
 		raw_spin_unlock_irqrestore(&ctx->lock, flags);
 	}
@@ -2182,9 +2129,11 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
 		}
 	}
 
+	put_task_struct(task);
 	return ctx;
 
 errout:
+	put_task_struct(task);
 	return ERR_PTR(err);
 }
 
@@ -2201,15 +2150,15 @@ static void free_event_rcu(struct rcu_head *head)
 	kfree(event);
 }
 
+static void perf_pending_sync(struct perf_event *event);
 static void perf_buffer_put(struct perf_buffer *buffer);
 
 static void free_event(struct perf_event *event)
 {
-	irq_work_sync(&event->pending);
+	perf_pending_sync(event);
 
 	if (!event->parent) {
-		if (event->attach_state & PERF_ATTACH_TASK)
-			jump_label_dec(&perf_task_events);
+		atomic_dec(&nr_events);
 		if (event->attr.mmap || event->attr.mmap_data)
 			atomic_dec(&nr_mmap_events);
 		if (event->attr.comm)
@@ -3157,7 +3106,16 @@ void perf_event_wakeup(struct perf_event *event)
 	}
 }
 
-static void perf_pending_event(struct irq_work *entry)
+/*
+ * Pending wakeups
+ *
+ * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
+ *
+ * The NMI bit means we cannot possibly take locks. Therefore, maintain a
+ * single linked list and use cmpxchg() to add entries lockless.
+ */
+
+static void perf_pending_event(struct perf_pending_entry *entry)
 {
 	struct perf_event *event = container_of(entry,
 			struct perf_event, pending);
@@ -3173,6 +3131,89 @@ static void perf_pending_event(struct irq_work *entry)
 	}
 }
 
+#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
+
+static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
+	PENDING_TAIL,
+};
+
+static void perf_pending_queue(struct perf_pending_entry *entry,
+			       void (*func)(struct perf_pending_entry *))
+{
+	struct perf_pending_entry **head;
+
+	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
+		return;
+
+	entry->func = func;
+
+	head = &get_cpu_var(perf_pending_head);
+
+	do {
+		entry->next = *head;
+	} while (cmpxchg(head, entry->next, entry) != entry->next);
+
+	set_perf_event_pending();
+
+	put_cpu_var(perf_pending_head);
+}
+
+static int __perf_pending_run(void)
+{
+	struct perf_pending_entry *list;
+	int nr = 0;
+
+	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
+	while (list != PENDING_TAIL) {
+		void (*func)(struct perf_pending_entry *);
+		struct perf_pending_entry *entry = list;
+
+		list = list->next;
+
+		func = entry->func;
+		entry->next = NULL;
+		/*
+		 * Ensure we observe the unqueue before we issue the wakeup,
+		 * so that we won't be waiting forever.
+		 * -- see perf_not_pending().
+		 */
+		smp_wmb();
+
+		func(entry);
+		nr++;
+	}
+
+	return nr;
+}
+
+static inline int perf_not_pending(struct perf_event *event)
+{
+	/*
+	 * If we flush on whatever cpu we run, there is a chance we don't
+	 * need to wait.
+	 */
+	get_cpu();
+	__perf_pending_run();
+	put_cpu();
+
+	/*
+	 * Ensure we see the proper queue state before going to sleep
+	 * so that we do not miss the wakeup. -- see perf_pending_handle()
+	 */
+	smp_rmb();
+	return event->pending.next == NULL;
+}
+
+static void perf_pending_sync(struct perf_event *event)
+{
+	wait_event(event->waitq, perf_not_pending(event));
+}
+
+void perf_event_do_pending(void)
+{
+	__perf_pending_run();
+}
+
 /*
  * We assume there is only KVM supporting the callbacks.
  * Later on, we might change it to a list if there is
@@ -3222,7 +3263,8 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
 
 	if (handle->nmi) {
 		handle->event->pending_wakeup = 1;
-		irq_work_queue(&handle->event->pending);
+		perf_pending_queue(&handle->event->pending,
+				   perf_pending_event);
 	} else
 		perf_event_wakeup(handle->event);
 }
@@ -4258,7 +4300,8 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
 		event->pending_kill = POLL_HUP;
 		if (nmi) {
 			event->pending_disable = 1;
-			irq_work_queue(&event->pending);
+			perf_pending_queue(&event->pending,
+					   perf_pending_event);
 		} else
 			perf_event_disable(event);
 	}
@@ -4669,7 +4712,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
 
 	WARN_ON(event->parent);
 
-	jump_label_dec(&perf_swevent_enabled[event_id]);
+	atomic_dec(&perf_swevent_enabled[event_id]);
 	swevent_hlist_put(event);
 }
 
@@ -4699,7 +4742,7 @@ static int perf_swevent_init(struct perf_event *event)
 		if (err)
 			return err;
 
-		jump_label_inc(&perf_swevent_enabled[event_id]);
+		atomic_inc(&perf_swevent_enabled[event_id]);
 		event->destroy = sw_perf_event_destroy;
 	}
 
@@ -5248,10 +5291,9 @@ struct pmu *perf_init_event(struct perf_event *event)
  */
 static struct perf_event *
 perf_event_alloc(struct perf_event_attr *attr, int cpu,
-		 struct task_struct *task,
-		 struct perf_event *group_leader,
-		 struct perf_event *parent_event,
-		 perf_overflow_handler_t overflow_handler)
+		   struct perf_event *group_leader,
+		   struct perf_event *parent_event,
+		   perf_overflow_handler_t overflow_handler)
 {
 	struct pmu *pmu;
 	struct perf_event *event;
@@ -5276,7 +5318,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	INIT_LIST_HEAD(&event->event_entry);
 	INIT_LIST_HEAD(&event->sibling_list);
 	init_waitqueue_head(&event->waitq);
-	init_irq_work(&event->pending, perf_pending_event);
 
 	mutex_init(&event->mmap_mutex);
 
@@ -5293,17 +5334,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
 	event->state		= PERF_EVENT_STATE_INACTIVE;
 
-	if (task) {
-		event->attach_state = PERF_ATTACH_TASK;
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
-		/*
-		 * hw_breakpoint is a bit difficult here..
-		 */
-		if (attr->type == PERF_TYPE_BREAKPOINT)
-			event->hw.bp_target = task;
-#endif
-	}
-
 	if (!overflow_handler && parent_event)
 		overflow_handler = parent_event->overflow_handler;
 	
@@ -5347,8 +5377,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	event->pmu = pmu;
 
 	if (!event->parent) {
-		if (event->attach_state & PERF_ATTACH_TASK)
-			jump_label_inc(&perf_task_events);
+		atomic_inc(&nr_events);
 		if (event->attr.mmap || event->attr.mmap_data)
 			atomic_inc(&nr_mmap_events);
 		if (event->attr.comm)
@@ -5557,18 +5586,10 @@ SYSCALL_DEFINE5(perf_event_open,
 			group_leader = NULL;
 	}
 
-	if (pid != -1) {
-		task = find_lively_task_by_vpid(pid);
-		if (IS_ERR(task)) {
-			err = PTR_ERR(task);
-			goto err_group_fd;
-		}
-	}
-
-	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL);
+	event = perf_event_alloc(&attr, cpu, group_leader, NULL, NULL);
 	if (IS_ERR(event)) {
 		err = PTR_ERR(event);
-		goto err_task;
+		goto err_fd;
 	}
 
 	/*
@@ -5600,13 +5621,21 @@ SYSCALL_DEFINE5(perf_event_open,
 		}
 	}
 
+	if (pid != -1) {
+		task = find_lively_task_by_vpid(pid);
+		if (IS_ERR(task)) {
+			err = PTR_ERR(task);
+			goto err_group_fd;
+		}
+	}
+
 	/*
 	 * Get the target context (task or percpu):
 	 */
 	ctx = find_get_context(pmu, task, cpu);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
-		goto err_alloc;
+		goto err_group_fd;
 	}
 
 	/*
@@ -5702,13 +5731,9 @@ SYSCALL_DEFINE5(perf_event_open,
 
 err_context:
 	put_ctx(ctx);
-err_alloc:
-	free_event(event);
-err_task:
-	if (task)
-		put_task_struct(task);
 err_group_fd:
 	fput_light(group_file, fput_needed);
+	free_event(event);
 err_fd:
 	put_unused_fd(event_fd);
 	return err;
@@ -5734,7 +5759,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	 * Get the target context (task or percpu):
 	 */
 
-	event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler);
+	event = perf_event_alloc(attr, cpu, NULL, NULL, overflow_handler);
 	if (IS_ERR(event)) {
 		err = PTR_ERR(event);
 		goto err;
@@ -5843,7 +5868,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	 * our context.
 	 */
 	child_ctx = child->perf_event_ctxp[ctxn];
-	task_ctx_sched_out(child_ctx, EVENT_ALL);
+	__perf_event_task_sched_out(child_ctx);
 
 	/*
 	 * Take the context lock here so that if find_get_context is
@@ -6002,7 +6027,6 @@ inherit_event(struct perf_event *parent_event,
 
 	child_event = perf_event_alloc(&parent_event->attr,
 					   parent_event->cpu,
-					   child,
 					   group_leader, parent_event,
 					   NULL);
 	if (IS_ERR(child_event))
diff --git a/trunk/kernel/timer.c b/trunk/kernel/timer.c
index 68a9ae7679b7..97bf05baade7 100644
--- a/trunk/kernel/timer.c
+++ b/trunk/kernel/timer.c
@@ -37,7 +37,7 @@
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
-#include <linux/irq_work.h>
+#include <linux/perf_event.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 
@@ -1279,10 +1279,7 @@ void update_process_times(int user_tick)
 	run_local_timers();
 	rcu_check_callbacks(cpu, user_tick);
 	printk_tick();
-#ifdef CONFIG_IRQ_WORK
-	if (in_irq())
-		irq_work_run();
-#endif
+	perf_event_do_pending();
 	scheduler_tick();
 	run_posix_cpu_timers(p);
 }
diff --git a/trunk/kernel/tracepoint.c b/trunk/kernel/tracepoint.c
index e95ee7f31d43..d6073a50a6ca 100644
--- a/trunk/kernel/tracepoint.c
+++ b/trunk/kernel/tracepoint.c
@@ -265,10 +265,10 @@ static void set_tracepoint(struct tracepoint_entry **entry,
 	 */
 	rcu_assign_pointer(elem->funcs, (*entry)->funcs);
 	if (!elem->state && active) {
-		jump_label_enable(&elem->state);
+		enable_jump_label(&elem->state);
 		elem->state = active;
 	} else if (elem->state && !active) {
-		jump_label_disable(&elem->state);
+		disable_jump_label(&elem->state);
 		elem->state = active;
 	}
 }
@@ -285,7 +285,7 @@ static void disable_tracepoint(struct tracepoint *elem)
 		elem->unregfunc();
 
 	if (elem->state) {
-		jump_label_disable(&elem->state);
+		disable_jump_label(&elem->state);
 		elem->state = 0;
 	}
 	rcu_assign_pointer(elem->funcs, NULL);
diff --git a/trunk/lib/dynamic_debug.c b/trunk/lib/dynamic_debug.c
index 7bd6df781ce5..e925c7b960f1 100644
--- a/trunk/lib/dynamic_debug.c
+++ b/trunk/lib/dynamic_debug.c
@@ -142,9 +142,9 @@ static void ddebug_change(const struct ddebug_query *query,
 				dt->num_enabled++;
 			dp->flags = newflags;
 			if (newflags) {
-				jump_label_enable(&dp->enabled);
+				enable_jump_label(&dp->enabled);
 			} else {
-				jump_label_disable(&dp->enabled);
+				disable_jump_label(&dp->enabled);
 			}
 			if (verbose)
 				printk(KERN_INFO
diff --git a/trunk/scripts/Makefile.build b/trunk/scripts/Makefile.build
index 4db60b2e2a76..843bd4f4ffc9 100644
--- a/trunk/scripts/Makefile.build
+++ b/trunk/scripts/Makefile.build
@@ -210,7 +210,13 @@ endif
 
 ifdef CONFIG_FTRACE_MCOUNT_RECORD
 ifdef BUILD_C_RECORDMCOUNT
-cmd_record_mcount = $(objtree)/scripts/recordmcount "$(@)";
+# Due to recursion, we must skip empty.o.
+# The empty.o file is created in the make process in order to determine
+#  the target endianness and word size. It is made before all other C
+#  files, including recordmcount.
+cmd_record_mcount = if [ $(@) != "scripts/mod/empty.o" ]; then			\
+			$(objtree)/scripts/recordmcount "$(@)";			\
+		    fi;
 else
 cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
 	"$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \