From 9772a87b0b54946da6a6f54a4aa0bdc345b8b6d6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 26 Sep 2011 12:33:28 -0300
Subject: [PATCH] --- yaml --- r: 269041 b: refs/heads/master c:
 42b28ac071a1a239d2a48965e9d6be0e061dd103 h: refs/heads/master i:   269039:
 c4de2037c65e3db2c81737167769cace30375375 v: v3

---
 [refs]                                        |   2 +-
 trunk/arch/x86/include/asm/nmi.h              |  37 +-
 trunk/arch/x86/include/asm/perf_event.h       |  55 +--
 trunk/arch/x86/include/asm/reboot.h           |   2 +-
 trunk/arch/x86/kernel/Makefile                |   2 +-
 trunk/arch/x86/kernel/apic/hw_nmi.c           |  27 +-
 trunk/arch/x86/kernel/apic/x2apic_uv_x.c      |  20 +-
 trunk/arch/x86/kernel/cpu/Makefile            |   2 +-
 trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c |  20 +-
 trunk/arch/x86/kernel/cpu/mcheck/mce.c        |  23 +-
 trunk/arch/x86/kernel/cpu/perf_event.c        |  69 ++-
 trunk/arch/x86/kernel/cpu/perf_event.h        |  12 -
 trunk/arch/x86/kernel/cpu/perf_event_amd.c    |  17 +-
 .../arch/x86/kernel/cpu/perf_event_amd_ibs.c  | 294 ------------
 trunk/arch/x86/kernel/cpu/perf_event_intel.c  |  91 +---
 trunk/arch/x86/kernel/crash.c                 |   5 +-
 trunk/arch/x86/kernel/kgdb.c                  |  60 +--
 trunk/arch/x86/kernel/nmi.c                   | 433 ------------------
 trunk/arch/x86/kernel/process_32.c            |   2 -
 trunk/arch/x86/kernel/process_64.c            |   2 -
 trunk/arch/x86/kernel/reboot.c                |  23 +-
 trunk/arch/x86/kernel/traps.c                 | 155 +++++++
 trunk/arch/x86/oprofile/nmi_int.c             |  40 +-
 trunk/arch/x86/oprofile/nmi_timer_int.c       |  28 +-
 trunk/arch/x86/oprofile/op_model_amd.c        | 234 +++++++++-
 trunk/arch/x86/oprofile/op_x86_model.h        |   1 +
 trunk/drivers/acpi/apei/ghes.c                |  22 +-
 trunk/drivers/char/ipmi/ipmi_watchdog.c       |  33 +-
 trunk/drivers/watchdog/hpwdt.c                |  25 +-
 trunk/include/linux/perf_event.h              |   5 +-
 trunk/tools/perf/util/hist.c                  | 156 +++----
 31 files changed, 762 insertions(+), 1135 deletions(-)
 delete mode 100644 trunk/arch/x86/kernel/cpu/perf_event_amd_ibs.c
 delete mode 100644 trunk/arch/x86/kernel/nmi.c

diff --git a/[refs] b/[refs]
index 0777a9766a67..59a1c2ca157a 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: d48b0e173715f678698d3678fefd40f2893ce798
+refs/heads/master: 42b28ac071a1a239d2a48965e9d6be0e061dd103
diff --git a/trunk/arch/x86/include/asm/nmi.h b/trunk/arch/x86/include/asm/nmi.h
index fd3f9f18cf3f..4886a68f267e 100644
--- a/trunk/arch/x86/include/asm/nmi.h
+++ b/trunk/arch/x86/include/asm/nmi.h
@@ -22,26 +22,27 @@ void arch_trigger_all_cpu_backtrace(void);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
 #endif
 
-#define NMI_FLAG_FIRST	1
-
-enum {
-	NMI_LOCAL=0,
-	NMI_UNKNOWN,
-	NMI_MAX
-};
-
-#define NMI_DONE	0
-#define NMI_HANDLED	1
-
-typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *);
-
-int register_nmi_handler(unsigned int, nmi_handler_t, unsigned long,
-			 const char *);
-
-void unregister_nmi_handler(unsigned int, const char *);
+/*
+ * Define some priorities for the nmi notifier call chain.
+ *
+ * Create a local nmi bit that has a higher priority than
+ * external nmis, because the local ones are more frequent.
+ *
+ * Also setup some default high/normal/low settings for
+ * subsystems to registers with.  Using 4 bits to separate
+ * the priorities.  This can go a lot higher if needed be.
+ */
+
+#define NMI_LOCAL_SHIFT		16	/* randomly picked */
+#define NMI_LOCAL_BIT		(1ULL << NMI_LOCAL_SHIFT)
+#define NMI_HIGH_PRIOR		(1ULL << 8)
+#define NMI_NORMAL_PRIOR	(1ULL << 4)
+#define NMI_LOW_PRIOR		(1ULL << 0)
+#define NMI_LOCAL_HIGH_PRIOR	(NMI_LOCAL_BIT | NMI_HIGH_PRIOR)
+#define NMI_LOCAL_NORMAL_PRIOR	(NMI_LOCAL_BIT | NMI_NORMAL_PRIOR)
+#define NMI_LOCAL_LOW_PRIOR	(NMI_LOCAL_BIT | NMI_LOW_PRIOR)
 
 void stop_nmi(void);
 void restart_nmi(void);
-void local_touch_nmi(void);
 
 #endif /* _ASM_X86_NMI_H */
diff --git a/trunk/arch/x86/include/asm/perf_event.h b/trunk/arch/x86/include/asm/perf_event.h
index f61c62f7d5d8..094fb30817ab 100644
--- a/trunk/arch/x86/include/asm/perf_event.h
+++ b/trunk/arch/x86/include/asm/perf_event.h
@@ -29,9 +29,6 @@
 #define ARCH_PERFMON_EVENTSEL_INV			(1ULL << 23)
 #define ARCH_PERFMON_EVENTSEL_CMASK			0xFF000000ULL
 
-#define AMD_PERFMON_EVENTSEL_GUESTONLY			(1ULL << 40)
-#define AMD_PERFMON_EVENTSEL_HOSTONLY			(1ULL << 41)
-
 #define AMD64_EVENTSEL_EVENT	\
 	(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
 #define INTEL_ARCH_EVENT_MASK	\
@@ -46,17 +43,14 @@
 #define AMD64_RAW_EVENT_MASK		\
 	(X86_RAW_EVENT_MASK          |  \
 	 AMD64_EVENTSEL_EVENT)
-#define AMD64_NUM_COUNTERS				4
-#define AMD64_NUM_COUNTERS_F15H				6
-#define AMD64_NUM_COUNTERS_MAX				AMD64_NUM_COUNTERS_F15H
 
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		0x3c
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX		0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX			 0
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
 		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
 
-#define ARCH_PERFMON_BRANCH_MISSES_RETIRED		6
+#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
 
 /*
  * Intel "Architectural Performance Monitoring" CPUID
@@ -116,35 +110,6 @@ union cpuid10_edx {
  */
 #define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)
 
-/*
- * IBS cpuid feature detection
- */
-
-#define IBS_CPUID_FEATURES		0x8000001b
-
-/*
- * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
- * bit 0 is used to indicate the existence of IBS.
- */
-#define IBS_CAPS_AVAIL			(1U<<0)
-#define IBS_CAPS_FETCHSAM		(1U<<1)
-#define IBS_CAPS_OPSAM			(1U<<2)
-#define IBS_CAPS_RDWROPCNT		(1U<<3)
-#define IBS_CAPS_OPCNT			(1U<<4)
-#define IBS_CAPS_BRNTRGT		(1U<<5)
-#define IBS_CAPS_OPCNTEXT		(1U<<6)
-
-#define IBS_CAPS_DEFAULT		(IBS_CAPS_AVAIL		\
-					 | IBS_CAPS_FETCHSAM	\
-					 | IBS_CAPS_OPSAM)
-
-/*
- * IBS APIC setup
- */
-#define IBSCTL				0x1cc
-#define IBSCTL_LVT_OFFSET_VALID		(1ULL<<8)
-#define IBSCTL_LVT_OFFSET_MASK		0x0F
-
 /* IbsFetchCtl bits/masks */
 #define IBS_FETCH_RAND_EN	(1ULL<<57)
 #define IBS_FETCH_VAL		(1ULL<<49)
@@ -159,8 +124,6 @@ union cpuid10_edx {
 #define IBS_OP_MAX_CNT		0x0000FFFFULL
 #define IBS_OP_MAX_CNT_EXT	0x007FFFFFULL	/* not a register bit mask */
 
-extern u32 get_ibs_caps(void);
-
 #ifdef CONFIG_PERF_EVENTS
 extern void perf_events_lapic_init(void);
 
@@ -196,19 +159,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 	);							\
 }
 
-struct perf_guest_switch_msr {
-	unsigned msr;
-	u64 host, guest;
-};
-
-extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
 #else
-static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
-{
-	*nr = 0;
-	return NULL;
-}
-
 static inline void perf_events_lapic_init(void)	{ }
 #endif
 
diff --git a/trunk/arch/x86/include/asm/reboot.h b/trunk/arch/x86/include/asm/reboot.h
index 92f297069e87..3250e3d605d9 100644
--- a/trunk/arch/x86/include/asm/reboot.h
+++ b/trunk/arch/x86/include/asm/reboot.h
@@ -23,7 +23,7 @@ void machine_real_restart(unsigned int type);
 #define MRR_BIOS	0
 #define MRR_APM		1
 
-typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
+typedef void (*nmi_shootdown_cb)(int, struct die_args*);
 void nmi_shootdown_cpus(nmi_shootdown_cb callback);
 
 #endif /* _ASM_X86_REBOOT_H */
diff --git a/trunk/arch/x86/kernel/Makefile b/trunk/arch/x86/kernel/Makefile
index 8baca3c4871c..82f2912155a5 100644
--- a/trunk/arch/x86/kernel/Makefile
+++ b/trunk/arch/x86/kernel/Makefile
@@ -19,7 +19,7 @@ endif
 
 obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
-obj-y			+= time.o ioport.o ldt.o dumpstack.o nmi.o
+obj-y			+= time.o ioport.o ldt.o dumpstack.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-y			+= probe_roms.o
diff --git a/trunk/arch/x86/kernel/apic/hw_nmi.c b/trunk/arch/x86/kernel/apic/hw_nmi.c
index 31cb9ae992b7..d5e57db0f7be 100644
--- a/trunk/arch/x86/kernel/apic/hw_nmi.c
+++ b/trunk/arch/x86/kernel/apic/hw_nmi.c
@@ -60,10 +60,22 @@ void arch_trigger_all_cpu_backtrace(void)
 }
 
 static int __kprobes
-arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
+arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
+			 unsigned long cmd, void *__args)
 {
+	struct die_args *args = __args;
+	struct pt_regs *regs;
 	int cpu;
 
+	switch (cmd) {
+	case DIE_NMI:
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	regs = args->regs;
 	cpu = smp_processor_id();
 
 	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
@@ -74,16 +86,21 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
 		show_regs(regs);
 		arch_spin_unlock(&lock);
 		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
-		return NMI_HANDLED;
+		return NOTIFY_STOP;
 	}
 
-	return NMI_DONE;
+	return NOTIFY_DONE;
 }
 
+static __read_mostly struct notifier_block backtrace_notifier = {
+	.notifier_call          = arch_trigger_all_cpu_backtrace_handler,
+	.next                   = NULL,
+	.priority               = NMI_LOCAL_LOW_PRIOR,
+};
+
 static int __init register_trigger_all_cpu_backtrace(void)
 {
-	register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler,
-				0, "arch_bt");
+	register_die_notifier(&backtrace_notifier);
 	return 0;
 }
 early_initcall(register_trigger_all_cpu_backtrace);
diff --git a/trunk/arch/x86/kernel/apic/x2apic_uv_x.c b/trunk/arch/x86/kernel/apic/x2apic_uv_x.c
index 75be00ecfff2..34b18594e724 100644
--- a/trunk/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/trunk/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -672,11 +672,18 @@ void __cpuinit uv_cpu_init(void)
 /*
  * When NMI is received, print a stack trace.
  */
-int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
+int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 {
 	unsigned long real_uv_nmi;
 	int bid;
 
+	if (reason != DIE_NMIUNKNOWN)
+		return NOTIFY_OK;
+
+	if (in_crash_kexec)
+		/* do nothing if entering the crash kernel */
+		return NOTIFY_OK;
+
 	/*
 	 * Each blade has an MMR that indicates when an NMI has been sent
 	 * to cpus on the blade. If an NMI is detected, atomically
@@ -697,7 +704,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
 	}
 
 	if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
-		return NMI_DONE;
+		return NOTIFY_DONE;
 
 	__get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
 
@@ -710,12 +717,17 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
 	dump_stack();
 	spin_unlock(&uv_nmi_lock);
 
-	return NMI_HANDLED;
+	return NOTIFY_STOP;
 }
 
+static struct notifier_block uv_dump_stack_nmi_nb = {
+	.notifier_call	= uv_handle_nmi,
+	.priority = NMI_LOCAL_LOW_PRIOR - 1,
+};
+
 void uv_register_nmi_notifier(void)
 {
-	if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
+	if (register_die_notifier(&uv_dump_stack_nmi_nb))
 		printk(KERN_WARNING "UV NMI handler failed to register\n");
 }
 
diff --git a/trunk/arch/x86/kernel/cpu/Makefile b/trunk/arch/x86/kernel/cpu/Makefile
index fe6eb197f848..1044fd787db8 100644
--- a/trunk/arch/x86/kernel/cpu/Makefile
+++ b/trunk/arch/x86/kernel/cpu/Makefile
@@ -36,7 +36,7 @@ endif
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
 
-obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o perf_event_amd_ibs.o
+obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
 
 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c b/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 6199232161cf..0ed633c5048b 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -78,20 +78,27 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
 
 static cpumask_var_t mce_inject_cpumask;
 
-static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
+static int mce_raise_notify(struct notifier_block *self,
+			    unsigned long val, void *data)
 {
+	struct die_args *args = (struct die_args *)data;
 	int cpu = smp_processor_id();
 	struct mce *m = &__get_cpu_var(injectm);
-	if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
-		return NMI_DONE;
+	if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
+		return NOTIFY_DONE;
 	cpumask_clear_cpu(cpu, mce_inject_cpumask);
 	if (m->inject_flags & MCJ_EXCEPTION)
-		raise_exception(m, regs);
+		raise_exception(m, args->regs);
 	else if (m->status)
 		raise_poll(m);
-	return NMI_HANDLED;
+	return NOTIFY_STOP;
 }
 
+static struct notifier_block mce_raise_nb = {
+	.notifier_call = mce_raise_notify,
+	.priority = NMI_LOCAL_NORMAL_PRIOR,
+};
+
 /* Inject mce on current CPU */
 static int raise_local(void)
 {
@@ -209,8 +216,7 @@ static int inject_init(void)
 		return -ENOMEM;
 	printk(KERN_INFO "Machine check injector initialized\n");
 	mce_chrdev_ops.write = mce_write;
-	register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
-				"mce_notify");
+	register_die_notifier(&mce_raise_nb);
 	return 0;
 }
 
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce.c b/trunk/arch/x86/kernel/cpu/mcheck/mce.c
index fce51ad1f362..08363b042122 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/mce.c
@@ -908,6 +908,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
 	percpu_inc(mce_exception_count);
 
+	if (notify_die(DIE_NMI, "machine check", regs, error_code,
+			   18, SIGKILL) == NOTIFY_STOP)
+		goto out;
 	if (!banks)
 		goto out;
 
@@ -1137,15 +1140,6 @@ static void mce_start_timer(unsigned long data)
 	add_timer_on(t, smp_processor_id());
 }
 
-/* Must not be called in IRQ context where del_timer_sync() can deadlock */
-static void mce_timer_delete_all(void)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu)
-		del_timer_sync(&per_cpu(mce_timer, cpu));
-}
-
 static void mce_do_trigger(struct work_struct *work)
 {
 	call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
@@ -1756,6 +1750,7 @@ static struct syscore_ops mce_syscore_ops = {
 
 static void mce_cpu_restart(void *data)
 {
+	del_timer_sync(&__get_cpu_var(mce_timer));
 	if (!mce_available(__this_cpu_ptr(&cpu_info)))
 		return;
 	__mcheck_cpu_init_generic();
@@ -1765,15 +1760,16 @@ static void mce_cpu_restart(void *data)
 /* Reinit MCEs after user configuration changes */
 static void mce_restart(void)
 {
-	mce_timer_delete_all();
 	on_each_cpu(mce_cpu_restart, NULL, 1);
 }
 
 /* Toggle features for corrected errors */
-static void mce_disable_cmci(void *data)
+static void mce_disable_ce(void *all)
 {
 	if (!mce_available(__this_cpu_ptr(&cpu_info)))
 		return;
+	if (all)
+		del_timer_sync(&__get_cpu_var(mce_timer));
 	cmci_clear();
 }
 
@@ -1856,8 +1852,7 @@ static ssize_t set_ignore_ce(struct sys_device *s,
 	if (mce_ignore_ce ^ !!new) {
 		if (new) {
 			/* disable ce features */
-			mce_timer_delete_all();
-			on_each_cpu(mce_disable_cmci, NULL, 1);
+			on_each_cpu(mce_disable_ce, (void *)1, 1);
 			mce_ignore_ce = 1;
 		} else {
 			/* enable ce features */
@@ -1880,7 +1875,7 @@ static ssize_t set_cmci_disabled(struct sys_device *s,
 	if (mce_cmci_disabled ^ !!new) {
 		if (new) {
 			/* disable cmci */
-			on_each_cpu(mce_disable_cmci, NULL, 1);
+			on_each_cpu(mce_disable_ce, NULL, 1);
 			mce_cmci_disabled = 1;
 		} else {
 			/* enable cmci */
diff --git a/trunk/arch/x86/kernel/cpu/perf_event.c b/trunk/arch/x86/kernel/cpu/perf_event.c
index 640891014b2a..8ab89112f93c 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event.c
@@ -1058,15 +1058,76 @@ void perf_events_lapic_init(void)
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 }
 
+struct pmu_nmi_state {
+	unsigned int	marked;
+	int		handled;
+};
+
+static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
+
 static int __kprobes
-perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+perf_event_nmi_handler(struct notifier_block *self,
+			 unsigned long cmd, void *__args)
 {
+	struct die_args *args = __args;
+	unsigned int this_nmi;
+	int handled;
+
 	if (!atomic_read(&active_events))
-		return NMI_DONE;
+		return NOTIFY_DONE;
+
+	switch (cmd) {
+	case DIE_NMI:
+		break;
+	case DIE_NMIUNKNOWN:
+		this_nmi = percpu_read(irq_stat.__nmi_count);
+		if (this_nmi != __this_cpu_read(pmu_nmi.marked))
+			/* let the kernel handle the unknown nmi */
+			return NOTIFY_DONE;
+		/*
+		 * This one is a PMU back-to-back nmi. Two events
+		 * trigger 'simultaneously' raising two back-to-back
+		 * NMIs. If the first NMI handles both, the latter
+		 * will be empty and daze the CPU. So, we drop it to
+		 * avoid false-positive 'unknown nmi' messages.
+		 */
+		return NOTIFY_STOP;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	handled = x86_pmu.handle_irq(args->regs);
+	if (!handled)
+		return NOTIFY_DONE;
 
-	return x86_pmu.handle_irq(regs);
+	this_nmi = percpu_read(irq_stat.__nmi_count);
+	if ((handled > 1) ||
+		/* the next nmi could be a back-to-back nmi */
+	    ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
+	     (__this_cpu_read(pmu_nmi.handled) > 1))) {
+		/*
+		 * We could have two subsequent back-to-back nmis: The
+		 * first handles more than one counter, the 2nd
+		 * handles only one counter and the 3rd handles no
+		 * counter.
+		 *
+		 * This is the 2nd nmi because the previous was
+		 * handling more than one counter. We will mark the
+		 * next (3rd) and then drop it if unhandled.
+		 */
+		__this_cpu_write(pmu_nmi.marked, this_nmi + 1);
+		__this_cpu_write(pmu_nmi.handled, handled);
+	}
+
+	return NOTIFY_STOP;
 }
 
+static __read_mostly struct notifier_block perf_event_nmi_notifier = {
+	.notifier_call		= perf_event_nmi_handler,
+	.next			= NULL,
+	.priority		= NMI_LOCAL_LOW_PRIOR,
+};
+
 struct event_constraint emptyconstraint;
 struct event_constraint unconstrained;
 
@@ -1171,7 +1232,7 @@ static int __init init_hw_perf_events(void)
 		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
 
 	perf_events_lapic_init();
-	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
+	register_die_notifier(&perf_event_nmi_notifier);
 
 	unconstrained = (struct event_constraint)
 		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
diff --git a/trunk/arch/x86/kernel/cpu/perf_event.h b/trunk/arch/x86/kernel/cpu/perf_event.h
index b9698d40ac4b..fb330b0a816e 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event.h
+++ b/trunk/arch/x86/kernel/cpu/perf_event.h
@@ -130,13 +130,6 @@ struct cpu_hw_events {
 	struct perf_branch_stack	lbr_stack;
 	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
 
-	/*
-	 * Intel host/guest exclude bits
-	 */
-	u64				intel_ctrl_guest_mask;
-	u64				intel_ctrl_host_mask;
-	struct perf_guest_switch_msr	guest_switch_msrs[X86_PMC_IDX_MAX];
-
 	/*
 	 * manage shared (per-core, per-cpu) registers
 	 * used on Intel NHM/WSM/SNB
@@ -302,11 +295,6 @@ struct x86_pmu {
 	 */
 	struct extra_reg *extra_regs;
 	unsigned int er_flags;
-
-	/*
-	 * Intel host/guest support (KVM)
-	 */
-	struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
 };
 
 #define ERF_NO_HT_SHARING	1
diff --git a/trunk/arch/x86/kernel/cpu/perf_event_amd.c b/trunk/arch/x86/kernel/cpu/perf_event_amd.c
index aeefd45697a2..384450d67128 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event_amd.c
@@ -138,19 +138,6 @@ static int amd_pmu_hw_config(struct perf_event *event)
 	if (ret)
 		return ret;
 
-	if (event->attr.exclude_host && event->attr.exclude_guest)
-		/*
-		 * When HO == GO == 1 the hardware treats that as GO == HO == 0
-		 * and will count in both modes. We don't want to count in that
-		 * case so we emulate no-counting by setting US = OS = 0.
-		 */
-		event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
-				      ARCH_PERFMON_EVENTSEL_OS);
-	else if (event->attr.exclude_host)
-		event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY;
-	else if (event->attr.exclude_guest)
-		event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
-
 	if (event->attr.type != PERF_TYPE_RAW)
 		return 0;
 
@@ -411,7 +398,7 @@ static __initconst const struct x86_pmu amd_pmu = {
 	.perfctr		= MSR_K7_PERFCTR0,
 	.event_map		= amd_pmu_event_map,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		= AMD64_NUM_COUNTERS,
+	.num_counters		= 4,
 	.cntval_bits		= 48,
 	.cntval_mask		= (1ULL << 48) - 1,
 	.apic			= 1,
@@ -575,7 +562,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
 	.perfctr		= MSR_F15H_PERF_CTR,
 	.event_map		= amd_pmu_event_map,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		= AMD64_NUM_COUNTERS_F15H,
+	.num_counters		= 6,
 	.cntval_bits		= 48,
 	.cntval_mask		= (1ULL << 48) - 1,
 	.apic			= 1,
diff --git a/trunk/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/trunk/arch/x86/kernel/cpu/perf_event_amd_ibs.c
deleted file mode 100644
index ab6343d21825..000000000000
--- a/trunk/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ /dev/null
@@ -1,294 +0,0 @@
-/*
- * Performance events - AMD IBS
- *
- *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
- *
- *  For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-
-#include <asm/apic.h>
-
-static u32 ibs_caps;
-
-#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
-
-static struct pmu perf_ibs;
-
-static int perf_ibs_init(struct perf_event *event)
-{
-	if (perf_ibs.type != event->attr.type)
-		return -ENOENT;
-	return 0;
-}
-
-static int perf_ibs_add(struct perf_event *event, int flags)
-{
-	return 0;
-}
-
-static void perf_ibs_del(struct perf_event *event, int flags)
-{
-}
-
-static struct pmu perf_ibs = {
-	.event_init= perf_ibs_init,
-	.add= perf_ibs_add,
-	.del= perf_ibs_del,
-};
-
-static __init int perf_event_ibs_init(void)
-{
-	if (!ibs_caps)
-		return -ENODEV;	/* ibs not supported by the cpu */
-
-	perf_pmu_register(&perf_ibs, "ibs", -1);
-	printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
-
-	return 0;
-}
-
-#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
-
-static __init int perf_event_ibs_init(void) { return 0; }
-
-#endif
-
-/* IBS - apic initialization, for perf and oprofile */
-
-static __init u32 __get_ibs_caps(void)
-{
-	u32 caps;
-	unsigned int max_level;
-
-	if (!boot_cpu_has(X86_FEATURE_IBS))
-		return 0;
-
-	/* check IBS cpuid feature flags */
-	max_level = cpuid_eax(0x80000000);
-	if (max_level < IBS_CPUID_FEATURES)
-		return IBS_CAPS_DEFAULT;
-
-	caps = cpuid_eax(IBS_CPUID_FEATURES);
-	if (!(caps & IBS_CAPS_AVAIL))
-		/* cpuid flags not valid */
-		return IBS_CAPS_DEFAULT;
-
-	return caps;
-}
-
-u32 get_ibs_caps(void)
-{
-	return ibs_caps;
-}
-
-EXPORT_SYMBOL(get_ibs_caps);
-
-static inline int get_eilvt(int offset)
-{
-	return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
-}
-
-static inline int put_eilvt(int offset)
-{
-	return !setup_APIC_eilvt(offset, 0, 0, 1);
-}
-
-/*
- * Check and reserve APIC extended interrupt LVT offset for IBS if available.
- */
-static inline int ibs_eilvt_valid(void)
-{
-	int offset;
-	u64 val;
-	int valid = 0;
-
-	preempt_disable();
-
-	rdmsrl(MSR_AMD64_IBSCTL, val);
-	offset = val & IBSCTL_LVT_OFFSET_MASK;
-
-	if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
-		pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
-		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
-		goto out;
-	}
-
-	if (!get_eilvt(offset)) {
-		pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
-		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
-		goto out;
-	}
-
-	valid = 1;
-out:
-	preempt_enable();
-
-	return valid;
-}
-
-static int setup_ibs_ctl(int ibs_eilvt_off)
-{
-	struct pci_dev *cpu_cfg;
-	int nodes;
-	u32 value = 0;
-
-	nodes = 0;
-	cpu_cfg = NULL;
-	do {
-		cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
-					 PCI_DEVICE_ID_AMD_10H_NB_MISC,
-					 cpu_cfg);
-		if (!cpu_cfg)
-			break;
-		++nodes;
-		pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
-				       | IBSCTL_LVT_OFFSET_VALID);
-		pci_read_config_dword(cpu_cfg, IBSCTL, &value);
-		if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
-			pci_dev_put(cpu_cfg);
-			printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
-			       "IBSCTL = 0x%08x\n", value);
-			return -EINVAL;
-		}
-	} while (1);
-
-	if (!nodes) {
-		printk(KERN_DEBUG "No CPU node configured for IBS\n");
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-/*
- * This runs only on the current cpu. We try to find an LVT offset and
- * setup the local APIC. For this we must disable preemption. On
- * success we initialize all nodes with this offset. This updates then
- * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
- * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
- * is using the new offset.
- */
-static int force_ibs_eilvt_setup(void)
-{
-	int offset;
-	int ret;
-
-	preempt_disable();
-	/* find the next free available EILVT entry, skip offset 0 */
-	for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
-		if (get_eilvt(offset))
-			break;
-	}
-	preempt_enable();
-
-	if (offset == APIC_EILVT_NR_MAX) {
-		printk(KERN_DEBUG "No EILVT entry available\n");
-		return -EBUSY;
-	}
-
-	ret = setup_ibs_ctl(offset);
-	if (ret)
-		goto out;
-
-	if (!ibs_eilvt_valid()) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
-	pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
-
-	return 0;
-out:
-	preempt_disable();
-	put_eilvt(offset);
-	preempt_enable();
-	return ret;
-}
-
-static inline int get_ibs_lvt_offset(void)
-{
-	u64 val;
-
-	rdmsrl(MSR_AMD64_IBSCTL, val);
-	if (!(val & IBSCTL_LVT_OFFSET_VALID))
-		return -EINVAL;
-
-	return val & IBSCTL_LVT_OFFSET_MASK;
-}
-
-static void setup_APIC_ibs(void *dummy)
-{
-	int offset;
-
-	offset = get_ibs_lvt_offset();
-	if (offset < 0)
-		goto failed;
-
-	if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
-		return;
-failed:
-	pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
-		smp_processor_id());
-}
-
-static void clear_APIC_ibs(void *dummy)
-{
-	int offset;
-
-	offset = get_ibs_lvt_offset();
-	if (offset >= 0)
-		setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
-}
-
-static int __cpuinit
-perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
-{
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_STARTING:
-		setup_APIC_ibs(NULL);
-		break;
-	case CPU_DYING:
-		clear_APIC_ibs(NULL);
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static __init int amd_ibs_init(void)
-{
-	u32 caps;
-	int ret;
-
-	caps = __get_ibs_caps();
-	if (!caps)
-		return -ENODEV;	/* ibs not supported by the cpu */
-
-	if (!ibs_eilvt_valid()) {
-		ret = force_ibs_eilvt_setup();
-		if (ret) {
-			pr_err("Failed to setup IBS, %d\n", ret);
-			return ret;
-		}
-	}
-
-	get_online_cpus();
-	ibs_caps = caps;
-	/* make ibs_caps visible to other cpus: */
-	smp_mb();
-	perf_cpu_notifier(perf_ibs_cpu_notifier);
-	smp_call_function(setup_APIC_ibs, NULL, 1);
-	put_online_cpus();
-
-	return perf_event_ibs_init();
-}
-
-/* Since we need the pci subsystem to init ibs we can't do this earlier: */
-device_initcall(amd_ibs_init);
diff --git a/trunk/arch/x86/kernel/cpu/perf_event_intel.c b/trunk/arch/x86/kernel/cpu/perf_event_intel.c
index e09ca20e86ee..61fa35750b98 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event_intel.c
@@ -749,8 +749,7 @@ static void intel_pmu_enable_all(int added)
 
 	intel_pmu_pebs_enable_all();
 	intel_pmu_lbr_enable_all();
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
-			x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
 
 	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
 		struct perf_event *event =
@@ -873,7 +872,6 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
 static void intel_pmu_disable_event(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
 		intel_pmu_disable_bts();
@@ -881,9 +879,6 @@ static void intel_pmu_disable_event(struct perf_event *event)
 		return;
 	}
 
-	cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
-	cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
-
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
 		intel_pmu_disable_fixed(hwc);
 		return;
@@ -929,7 +924,6 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
 static void intel_pmu_enable_event(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
 		if (!__this_cpu_read(cpu_hw_events.enabled))
@@ -939,11 +933,6 @@ static void intel_pmu_enable_event(struct perf_event *event)
 		return;
 	}
 
-	if (event->attr.exclude_host)
-		cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
-	if (event->attr.exclude_guest)
-		cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
-
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
 		intel_pmu_enable_fixed(hwc);
 		return;
@@ -1313,84 +1302,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
 	return 0;
 }
 
-struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
-{
-	if (x86_pmu.guest_get_msrs)
-		return x86_pmu.guest_get_msrs(nr);
-	*nr = 0;
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
-
-static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
-
-	arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
-	arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
-	arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
-
-	*nr = 1;
-	return arr;
-}
-
-static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
-	int idx;
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++)  {
-		struct perf_event *event = cpuc->events[idx];
-
-		arr[idx].msr = x86_pmu_config_addr(idx);
-		arr[idx].host = arr[idx].guest = 0;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		arr[idx].host = arr[idx].guest =
-			event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
-
-		if (event->attr.exclude_host)
-			arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-		else if (event->attr.exclude_guest)
-			arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-	}
-
-	*nr = x86_pmu.num_counters;
-	return arr;
-}
-
-static void core_pmu_enable_event(struct perf_event *event)
-{
-	if (!event->attr.exclude_host)
-		x86_pmu_enable_event(event);
-}
-
-static void core_pmu_enable_all(int added)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	int idx;
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
-
-		if (!test_bit(idx, cpuc->active_mask) ||
-				cpuc->events[idx]->attr.exclude_host)
-			continue;
-
-		__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
-	}
-}
-
 static __initconst const struct x86_pmu core_pmu = {
 	.name			= "core",
 	.handle_irq		= x86_pmu_handle_irq,
 	.disable_all		= x86_pmu_disable_all,
-	.enable_all		= core_pmu_enable_all,
-	.enable			= core_pmu_enable_event,
+	.enable_all		= x86_pmu_enable_all,
+	.enable			= x86_pmu_enable_event,
 	.disable		= x86_pmu_disable_event,
 	.hw_config		= x86_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
@@ -1408,7 +1325,6 @@ static __initconst const struct x86_pmu core_pmu = {
 	.get_event_constraints	= intel_get_event_constraints,
 	.put_event_constraints	= intel_put_event_constraints,
 	.event_constraints	= intel_core_event_constraints,
-	.guest_get_msrs		= core_guest_get_msrs,
 };
 
 struct intel_shared_regs *allocate_shared_regs(int cpu)
@@ -1515,7 +1431,6 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.cpu_prepare		= intel_pmu_cpu_prepare,
 	.cpu_starting		= intel_pmu_cpu_starting,
 	.cpu_dying		= intel_pmu_cpu_dying,
-	.guest_get_msrs		= intel_guest_get_msrs,
 };
 
 static void intel_clovertown_quirks(void)
diff --git a/trunk/arch/x86/kernel/crash.c b/trunk/arch/x86/kernel/crash.c
index 13ad89971d47..764c7c2b1811 100644
--- a/trunk/arch/x86/kernel/crash.c
+++ b/trunk/arch/x86/kernel/crash.c
@@ -32,12 +32,15 @@ int in_crash_kexec;
 
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 
-static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
+static void kdump_nmi_callback(int cpu, struct die_args *args)
 {
+	struct pt_regs *regs;
 #ifdef CONFIG_X86_32
 	struct pt_regs fixed_regs;
 #endif
 
+	regs = args->regs;
+
 #ifdef CONFIG_X86_32
 	if (!user_mode_vm(regs)) {
 		crash_fixup_ss_esp(&fixed_regs, regs);
diff --git a/trunk/arch/x86/kernel/kgdb.c b/trunk/arch/x86/kernel/kgdb.c
index faba5771acad..00354d4919a9 100644
--- a/trunk/arch/x86/kernel/kgdb.c
+++ b/trunk/arch/x86/kernel/kgdb.c
@@ -511,37 +511,28 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
 
 static int was_in_debug_nmi[NR_CPUS];
 
-static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
 {
+	struct pt_regs *regs = args->regs;
+
 	switch (cmd) {
-	case NMI_LOCAL:
+	case DIE_NMI:
 		if (atomic_read(&kgdb_active) != -1) {
 			/* KGDB CPU roundup */
 			kgdb_nmicallback(raw_smp_processor_id(), regs);
 			was_in_debug_nmi[raw_smp_processor_id()] = 1;
 			touch_nmi_watchdog();
-			return NMI_HANDLED;
+			return NOTIFY_STOP;
 		}
-		break;
+		return NOTIFY_DONE;
 
-	case NMI_UNKNOWN:
+	case DIE_NMIUNKNOWN:
 		if (was_in_debug_nmi[raw_smp_processor_id()]) {
 			was_in_debug_nmi[raw_smp_processor_id()] = 0;
-			return NMI_HANDLED;
+			return NOTIFY_STOP;
 		}
-		break;
-	default:
-		/* do nothing */
-		break;
-	}
-	return NMI_DONE;
-}
-
-static int __kgdb_notify(struct die_args *args, unsigned long cmd)
-{
-	struct pt_regs *regs = args->regs;
+		return NOTIFY_DONE;
 
-	switch (cmd) {
 	case DIE_DEBUG:
 		if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
 			if (user_mode(regs))
@@ -599,6 +590,11 @@ kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
 
 static struct notifier_block kgdb_notifier = {
 	.notifier_call	= kgdb_notify,
+
+	/*
+	 * Lowest-prio notifier priority, we want to be notified last:
+	 */
+	.priority	= NMI_LOCAL_LOW_PRIOR,
 };
 
 /**
@@ -609,31 +605,7 @@ static struct notifier_block kgdb_notifier = {
  */
 int kgdb_arch_init(void)
 {
-	int retval;
-
-	retval = register_die_notifier(&kgdb_notifier);
-	if (retval)
-		goto out;
-
-	retval = register_nmi_handler(NMI_LOCAL, kgdb_nmi_handler,
-					0, "kgdb");
-	if (retval)
-		goto out1;
-
-	retval = register_nmi_handler(NMI_UNKNOWN, kgdb_nmi_handler,
-					0, "kgdb");
-
-	if (retval)
-		goto out2;
-
-	return retval;
-
-out2:
-	unregister_nmi_handler(NMI_LOCAL, "kgdb");
-out1:
-	unregister_die_notifier(&kgdb_notifier);
-out:
-	return retval;
+	return register_die_notifier(&kgdb_notifier);
 }
 
 static void kgdb_hw_overflow_handler(struct perf_event *event,
@@ -701,8 +673,6 @@ void kgdb_arch_exit(void)
 			breakinfo[i].pev = NULL;
 		}
 	}
-	unregister_nmi_handler(NMI_UNKNOWN, "kgdb");
-	unregister_nmi_handler(NMI_LOCAL, "kgdb");
 	unregister_die_notifier(&kgdb_notifier);
 }
 
diff --git a/trunk/arch/x86/kernel/nmi.c b/trunk/arch/x86/kernel/nmi.c
deleted file mode 100644
index 7ec5bd140b87..000000000000
--- a/trunk/arch/x86/kernel/nmi.c
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
- *  Copyright (C) 2011	Don Zickus Red Hat, Inc.
- *
- *  Pentium III FXSR, SSE support
- *	Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-/*
- * Handle hardware traps and faults.
- */
-#include <linux/spinlock.h>
-#include <linux/kprobes.h>
-#include <linux/kdebug.h>
-#include <linux/nmi.h>
-#include <linux/delay.h>
-#include <linux/hardirq.h>
-#include <linux/slab.h>
-
-#include <linux/mca.h>
-
-#if defined(CONFIG_EDAC)
-#include <linux/edac.h>
-#endif
-
-#include <linux/atomic.h>
-#include <asm/traps.h>
-#include <asm/mach_traps.h>
-#include <asm/nmi.h>
-
-#define NMI_MAX_NAMELEN	16
-struct nmiaction {
-	struct list_head list;
-	nmi_handler_t handler;
-	unsigned int flags;
-	char *name;
-};
-
-struct nmi_desc {
-	spinlock_t lock;
-	struct list_head head;
-};
-
-static struct nmi_desc nmi_desc[NMI_MAX] = 
-{
-	{
-		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
-		.head = LIST_HEAD_INIT(nmi_desc[0].head),
-	},
-	{
-		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
-		.head = LIST_HEAD_INIT(nmi_desc[1].head),
-	},
-
-};
-
-struct nmi_stats {
-	unsigned int normal;
-	unsigned int unknown;
-	unsigned int external;
-	unsigned int swallow;
-};
-
-static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
-
-static int ignore_nmis;
-
-int unknown_nmi_panic;
-/*
- * Prevent NMI reason port (0x61) being accessed simultaneously, can
- * only be used in NMI handler.
- */
-static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
-
-static int __init setup_unknown_nmi_panic(char *str)
-{
-	unknown_nmi_panic = 1;
-	return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-#define nmi_to_desc(type) (&nmi_desc[type])
-
-static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
-{
-	struct nmi_desc *desc = nmi_to_desc(type);
-	struct nmiaction *a;
-	int handled=0;
-
-	rcu_read_lock();
-
-	/*
-	 * NMIs are edge-triggered, which means if you have enough
-	 * of them concurrently, you can lose some because only one
-	 * can be latched at any given time.  Walk the whole list
-	 * to handle those situations.
-	 */
-	list_for_each_entry_rcu(a, &desc->head, list)
-		handled += a->handler(type, regs);
-
-	rcu_read_unlock();
-
-	/* return total number of NMI events handled */
-	return handled;
-}
-
-static int __setup_nmi(unsigned int type, struct nmiaction *action)
-{
-	struct nmi_desc *desc = nmi_to_desc(type);
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-
-	/*
-	 * most handlers of type NMI_UNKNOWN never return because
-	 * they just assume the NMI is theirs.  Just a sanity check
-	 * to manage expectations
-	 */
-	WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
-
-	/*
-	 * some handlers need to be executed first otherwise a fake
-	 * event confuses some handlers (kdump uses this flag)
-	 */
-	if (action->flags & NMI_FLAG_FIRST)
-		list_add_rcu(&action->list, &desc->head);
-	else
-		list_add_tail_rcu(&action->list, &desc->head);
-	
-	spin_unlock_irqrestore(&desc->lock, flags);
-	return 0;
-}
-
-static struct nmiaction *__free_nmi(unsigned int type, const char *name)
-{
-	struct nmi_desc *desc = nmi_to_desc(type);
-	struct nmiaction *n;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-
-	list_for_each_entry_rcu(n, &desc->head, list) {
-		/*
-		 * the name passed in to describe the nmi handler
-		 * is used as the lookup key
-		 */
-		if (!strcmp(n->name, name)) {
-			WARN(in_nmi(),
-				"Trying to free NMI (%s) from NMI context!\n", n->name);
-			list_del_rcu(&n->list);
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&desc->lock, flags);
-	synchronize_rcu();
-	return (n);
-}
-
-int register_nmi_handler(unsigned int type, nmi_handler_t handler,
-			unsigned long nmiflags, const char *devname)
-{
-	struct nmiaction *action;
-	int retval = -ENOMEM;
-
-	if (!handler)
-		return -EINVAL;
-
-	action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL);
-	if (!action)
-		goto fail_action;
-
-	action->handler = handler;
-	action->flags = nmiflags;
-	action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL);
-	if (!action->name)
-		goto fail_action_name;
-
-	retval = __setup_nmi(type, action);
-
-	if (retval)
-		goto fail_setup_nmi;
-
-	return retval;
-
-fail_setup_nmi:
-	kfree(action->name);
-fail_action_name:
-	kfree(action);
-fail_action:	
-
-	return retval;
-}
-EXPORT_SYMBOL_GPL(register_nmi_handler);
-
-void unregister_nmi_handler(unsigned int type, const char *name)
-{
-	struct nmiaction *a;
-
-	a = __free_nmi(type, name);
-	if (a) {
-		kfree(a->name);
-		kfree(a);
-	}
-}
-
-EXPORT_SYMBOL_GPL(unregister_nmi_handler);
-
-static notrace __kprobes void
-pci_serr_error(unsigned char reason, struct pt_regs *regs)
-{
-	pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
-		 reason, smp_processor_id());
-
-	/*
-	 * On some machines, PCI SERR line is used to report memory
-	 * errors. EDAC makes use of it.
-	 */
-#if defined(CONFIG_EDAC)
-	if (edac_handler_set()) {
-		edac_atomic_assert_error();
-		return;
-	}
-#endif
-
-	if (panic_on_unrecovered_nmi)
-		panic("NMI: Not continuing");
-
-	pr_emerg("Dazed and confused, but trying to continue\n");
-
-	/* Clear and disable the PCI SERR error line. */
-	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
-	outb(reason, NMI_REASON_PORT);
-}
-
-static notrace __kprobes void
-io_check_error(unsigned char reason, struct pt_regs *regs)
-{
-	unsigned long i;
-
-	pr_emerg(
-	"NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
-		 reason, smp_processor_id());
-	show_registers(regs);
-
-	if (panic_on_io_nmi)
-		panic("NMI IOCK error: Not continuing");
-
-	/* Re-enable the IOCK line, wait for a few seconds */
-	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
-	outb(reason, NMI_REASON_PORT);
-
-	i = 20000;
-	while (--i) {
-		touch_nmi_watchdog();
-		udelay(100);
-	}
-
-	reason &= ~NMI_REASON_CLEAR_IOCHK;
-	outb(reason, NMI_REASON_PORT);
-}
-
-static notrace __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
-{
-	int handled;
-
-	/*
-	 * Use 'false' as back-to-back NMIs are dealt with one level up.
-	 * Of course this makes having multiple 'unknown' handlers useless
-	 * as only the first one is ever run (unless it can actually determine
-	 * if it caused the NMI)
-	 */
-	handled = nmi_handle(NMI_UNKNOWN, regs, false);
-	if (handled) {
-		__this_cpu_add(nmi_stats.unknown, handled);
-		return;
-	}
-
-	__this_cpu_add(nmi_stats.unknown, 1);
-
-#ifdef CONFIG_MCA
-	/*
-	 * Might actually be able to figure out what the guilty party
-	 * is:
-	 */
-	if (MCA_bus) {
-		mca_handle_nmi();
-		return;
-	}
-#endif
-	pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
-		 reason, smp_processor_id());
-
-	pr_emerg("Do you have a strange power saving mode enabled?\n");
-	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
-		panic("NMI: Not continuing");
-
-	pr_emerg("Dazed and confused, but trying to continue\n");
-}
-
-static DEFINE_PER_CPU(bool, swallow_nmi);
-static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
-
-static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
-{
-	unsigned char reason = 0;
-	int handled;
-	bool b2b = false;
-
-	/*
-	 * CPU-specific NMI must be processed before non-CPU-specific
-	 * NMI, otherwise we may lose it, because the CPU-specific
-	 * NMI can not be detected/processed on other CPUs.
-	 */
-
-	/*
-	 * Back-to-back NMIs are interesting because they can either
-	 * be two NMI or more than two NMIs (any thing over two is dropped
-	 * due to NMI being edge-triggered).  If this is the second half
-	 * of the back-to-back NMI, assume we dropped things and process
-	 * more handlers.  Otherwise reset the 'swallow' NMI behaviour
-	 */
-	if (regs->ip == __this_cpu_read(last_nmi_rip))
-		b2b = true;
-	else
-		__this_cpu_write(swallow_nmi, false);
-
-	__this_cpu_write(last_nmi_rip, regs->ip);
-
-	handled = nmi_handle(NMI_LOCAL, regs, b2b);
-	__this_cpu_add(nmi_stats.normal, handled);
-	if (handled) {
-		/*
-		 * There are cases when a NMI handler handles multiple
-		 * events in the current NMI.  One of these events may
-		 * be queued for in the next NMI.  Because the event is
-		 * already handled, the next NMI will result in an unknown
-		 * NMI.  Instead lets flag this for a potential NMI to
-		 * swallow.
-		 */
-		if (handled > 1)
-			__this_cpu_write(swallow_nmi, true);
-		return;
-	}
-
-	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
-	raw_spin_lock(&nmi_reason_lock);
-	reason = get_nmi_reason();
-
-	if (reason & NMI_REASON_MASK) {
-		if (reason & NMI_REASON_SERR)
-			pci_serr_error(reason, regs);
-		else if (reason & NMI_REASON_IOCHK)
-			io_check_error(reason, regs);
-#ifdef CONFIG_X86_32
-		/*
-		 * Reassert NMI in case it became active
-		 * meanwhile as it's edge-triggered:
-		 */
-		reassert_nmi();
-#endif
-		__this_cpu_add(nmi_stats.external, 1);
-		raw_spin_unlock(&nmi_reason_lock);
-		return;
-	}
-	raw_spin_unlock(&nmi_reason_lock);
-
-	/*
-	 * Only one NMI can be latched at a time.  To handle
-	 * this we may process multiple nmi handlers at once to
-	 * cover the case where an NMI is dropped.  The downside
-	 * to this approach is we may process an NMI prematurely,
-	 * while its real NMI is sitting latched.  This will cause
-	 * an unknown NMI on the next run of the NMI processing.
-	 *
-	 * We tried to flag that condition above, by setting the
-	 * swallow_nmi flag when we process more than one event.
-	 * This condition is also only present on the second half
-	 * of a back-to-back NMI, so we flag that condition too.
-	 *
-	 * If both are true, we assume we already processed this
-	 * NMI previously and we swallow it.  Otherwise we reset
-	 * the logic.
-	 *
-	 * There are scenarios where we may accidentally swallow
-	 * a 'real' unknown NMI.  For example, while processing
-	 * a perf NMI another perf NMI comes in along with a
-	 * 'real' unknown NMI.  These two NMIs get combined into
-	 * one (as descibed above).  When the next NMI gets
-	 * processed, it will be flagged by perf as handled, but
-	 * noone will know that there was a 'real' unknown NMI sent
-	 * also.  As a result it gets swallowed.  Or if the first
-	 * perf NMI returns two events handled then the second
-	 * NMI will get eaten by the logic below, again losing a
-	 * 'real' unknown NMI.  But this is the best we can do
-	 * for now.
-	 */
-	if (b2b && __this_cpu_read(swallow_nmi))
-		__this_cpu_add(nmi_stats.swallow, 1);
-	else
-		unknown_nmi_error(reason, regs);
-}
-
-dotraplinkage notrace __kprobes void
-do_nmi(struct pt_regs *regs, long error_code)
-{
-	nmi_enter();
-
-	inc_irq_stat(__nmi_count);
-
-	if (!ignore_nmis)
-		default_do_nmi(regs);
-
-	nmi_exit();
-}
-
-void stop_nmi(void)
-{
-	ignore_nmis++;
-}
-
-void restart_nmi(void)
-{
-	ignore_nmis--;
-}
-
-/* reset the back-to-back NMI logic */
-void local_touch_nmi(void)
-{
-	__this_cpu_write(last_nmi_rip, 0);
-}
diff --git a/trunk/arch/x86/kernel/process_32.c b/trunk/arch/x86/kernel/process_32.c
index 46ff054ebaaa..7a3b65107a27 100644
--- a/trunk/arch/x86/kernel/process_32.c
+++ b/trunk/arch/x86/kernel/process_32.c
@@ -57,7 +57,6 @@
 #include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
-#include <asm/nmi.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -108,7 +107,6 @@ void cpu_idle(void)
 			if (cpu_is_offline(cpu))
 				play_dead();
 
-			local_touch_nmi();
 			local_irq_disable();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
diff --git a/trunk/arch/x86/kernel/process_64.c b/trunk/arch/x86/kernel/process_64.c
index 3bd7e6eebf31..f693e44e1bf6 100644
--- a/trunk/arch/x86/kernel/process_64.c
+++ b/trunk/arch/x86/kernel/process_64.c
@@ -51,7 +51,6 @@
 #include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
-#include <asm/nmi.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -134,7 +133,6 @@ void cpu_idle(void)
 			 * from here on, until they go to idle.
 			 * Otherwise, idle callbacks can misfire.
 			 */
-			local_touch_nmi();
 			local_irq_disable();
 			enter_idle();
 			/* Don't trace irqs off for idle */
diff --git a/trunk/arch/x86/kernel/reboot.c b/trunk/arch/x86/kernel/reboot.c
index e334be1182b9..9242436e9937 100644
--- a/trunk/arch/x86/kernel/reboot.c
+++ b/trunk/arch/x86/kernel/reboot.c
@@ -464,7 +464,7 @@ static inline void kb_wait(void)
 	}
 }
 
-static void vmxoff_nmi(int cpu, struct pt_regs *regs)
+static void vmxoff_nmi(int cpu, struct die_args *args)
 {
 	cpu_emergency_vmxoff();
 }
@@ -736,10 +736,14 @@ static nmi_shootdown_cb shootdown_callback;
 
 static atomic_t waiting_for_crash_ipi;
 
-static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
+static int crash_nmi_callback(struct notifier_block *self,
+			unsigned long val, void *data)
 {
 	int cpu;
 
+	if (val != DIE_NMI)
+		return NOTIFY_OK;
+
 	cpu = raw_smp_processor_id();
 
 	/* Don't do anything if this handler is invoked on crashing cpu.
@@ -747,10 +751,10 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
 	 * an NMI if system was initially booted with nmi_watchdog parameter.
 	 */
 	if (cpu == crashing_cpu)
-		return NMI_HANDLED;
+		return NOTIFY_STOP;
 	local_irq_disable();
 
-	shootdown_callback(cpu, regs);
+	shootdown_callback(cpu, (struct die_args *)data);
 
 	atomic_dec(&waiting_for_crash_ipi);
 	/* Assume hlt works */
@@ -758,7 +762,7 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
 	for (;;)
 		cpu_relax();
 
-	return NMI_HANDLED;
+	return 1;
 }
 
 static void smp_send_nmi_allbutself(void)
@@ -766,6 +770,12 @@ static void smp_send_nmi_allbutself(void)
 	apic->send_IPI_allbutself(NMI_VECTOR);
 }
 
+static struct notifier_block crash_nmi_nb = {
+	.notifier_call = crash_nmi_callback,
+	/* we want to be the first one called */
+	.priority = NMI_LOCAL_HIGH_PRIOR+1,
+};
+
 /* Halt all other CPUs, calling the specified function on each of them
  *
  * This function can be used to halt all other CPUs on crash
@@ -784,8 +794,7 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 
 	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
 	/* Would it be better to replace the trap vector here? */
-	if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
-				 NMI_FLAG_FIRST, "crash"))
+	if (register_die_notifier(&crash_nmi_nb))
 		return;		/* return what? */
 	/* Ensure the new callback function is set before sending
 	 * out the NMI
diff --git a/trunk/arch/x86/kernel/traps.c b/trunk/arch/x86/kernel/traps.c
index a8e3eb83466c..6913369c234c 100644
--- a/trunk/arch/x86/kernel/traps.c
+++ b/trunk/arch/x86/kernel/traps.c
@@ -81,6 +81,15 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
 DECLARE_BITMAP(used_vectors, NR_VECTORS);
 EXPORT_SYMBOL_GPL(used_vectors);
 
+static int ignore_nmis;
+
+int unknown_nmi_panic;
+/*
+ * Prevent NMI reason port (0x61) being accessed simultaneously, can
+ * only be used in NMI handler.
+ */
+static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
+
 static inline void conditional_sti(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
@@ -298,6 +307,152 @@ do_general_protection(struct pt_regs *regs, long error_code)
 	die("general protection fault", regs, error_code);
 }
 
+static int __init setup_unknown_nmi_panic(char *str)
+{
+	unknown_nmi_panic = 1;
+	return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+
+static notrace __kprobes void
+pci_serr_error(unsigned char reason, struct pt_regs *regs)
+{
+	pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
+		 reason, smp_processor_id());
+
+	/*
+	 * On some machines, PCI SERR line is used to report memory
+	 * errors. EDAC makes use of it.
+	 */
+#if defined(CONFIG_EDAC)
+	if (edac_handler_set()) {
+		edac_atomic_assert_error();
+		return;
+	}
+#endif
+
+	if (panic_on_unrecovered_nmi)
+		panic("NMI: Not continuing");
+
+	pr_emerg("Dazed and confused, but trying to continue\n");
+
+	/* Clear and disable the PCI SERR error line. */
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
+	outb(reason, NMI_REASON_PORT);
+}
+
+static notrace __kprobes void
+io_check_error(unsigned char reason, struct pt_regs *regs)
+{
+	unsigned long i;
+
+	pr_emerg(
+	"NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
+		 reason, smp_processor_id());
+	show_registers(regs);
+
+	if (panic_on_io_nmi)
+		panic("NMI IOCK error: Not continuing");
+
+	/* Re-enable the IOCK line, wait for a few seconds */
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
+	outb(reason, NMI_REASON_PORT);
+
+	i = 20000;
+	while (--i) {
+		touch_nmi_watchdog();
+		udelay(100);
+	}
+
+	reason &= ~NMI_REASON_CLEAR_IOCHK;
+	outb(reason, NMI_REASON_PORT);
+}
+
+static notrace __kprobes void
+unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
+{
+	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
+			NOTIFY_STOP)
+		return;
+#ifdef CONFIG_MCA
+	/*
+	 * Might actually be able to figure out what the guilty party
+	 * is:
+	 */
+	if (MCA_bus) {
+		mca_handle_nmi();
+		return;
+	}
+#endif
+	pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+		 reason, smp_processor_id());
+
+	pr_emerg("Do you have a strange power saving mode enabled?\n");
+	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
+		panic("NMI: Not continuing");
+
+	pr_emerg("Dazed and confused, but trying to continue\n");
+}
+
+static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
+{
+	unsigned char reason = 0;
+
+	/*
+	 * CPU-specific NMI must be processed before non-CPU-specific
+	 * NMI, otherwise we may lose it, because the CPU-specific
+	 * NMI can not be detected/processed on other CPUs.
+	 */
+	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
+		return;
+
+	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
+	raw_spin_lock(&nmi_reason_lock);
+	reason = get_nmi_reason();
+
+	if (reason & NMI_REASON_MASK) {
+		if (reason & NMI_REASON_SERR)
+			pci_serr_error(reason, regs);
+		else if (reason & NMI_REASON_IOCHK)
+			io_check_error(reason, regs);
+#ifdef CONFIG_X86_32
+		/*
+		 * Reassert NMI in case it became active
+		 * meanwhile as it's edge-triggered:
+		 */
+		reassert_nmi();
+#endif
+		raw_spin_unlock(&nmi_reason_lock);
+		return;
+	}
+	raw_spin_unlock(&nmi_reason_lock);
+
+	unknown_nmi_error(reason, regs);
+}
+
+dotraplinkage notrace __kprobes void
+do_nmi(struct pt_regs *regs, long error_code)
+{
+	nmi_enter();
+
+	inc_irq_stat(__nmi_count);
+
+	if (!ignore_nmis)
+		default_do_nmi(regs);
+
+	nmi_exit();
+}
+
+void stop_nmi(void)
+{
+	ignore_nmis++;
+}
+
+void restart_nmi(void)
+{
+	ignore_nmis--;
+}
+
 /* May run on IST stack. */
 dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 {
diff --git a/trunk/arch/x86/oprofile/nmi_int.c b/trunk/arch/x86/oprofile/nmi_int.c
index c04dc145a4b7..68894fdc034b 100644
--- a/trunk/arch/x86/oprofile/nmi_int.c
+++ b/trunk/arch/x86/oprofile/nmi_int.c
@@ -61,15 +61,26 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
 }
 
 
-static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs)
+static int profile_exceptions_notify(struct notifier_block *self,
+				     unsigned long val, void *data)
 {
-	if (ctr_running)
-		model->check_ctrs(regs, &__get_cpu_var(cpu_msrs));
-	else if (!nmi_enabled)
-		return NMI_DONE;
-	else
-		model->stop(&__get_cpu_var(cpu_msrs));
-	return NMI_HANDLED;
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_NMI:
+		if (ctr_running)
+			model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
+		else if (!nmi_enabled)
+			break;
+		else
+			model->stop(&__get_cpu_var(cpu_msrs));
+		ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return ret;
 }
 
 static void nmi_cpu_save_registers(struct op_msrs *msrs)
@@ -352,6 +363,12 @@ static void nmi_cpu_setup(void *dummy)
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 }
 
+static struct notifier_block profile_exceptions_nb = {
+	.notifier_call = profile_exceptions_notify,
+	.next = NULL,
+	.priority = NMI_LOCAL_LOW_PRIOR,
+};
+
 static void nmi_cpu_restore_registers(struct op_msrs *msrs)
 {
 	struct op_msr *counters = msrs->counters;
@@ -385,6 +402,8 @@ static void nmi_cpu_shutdown(void *dummy)
 	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
 	apic_write(APIC_LVTERR, v);
 	nmi_cpu_restore_registers(msrs);
+	if (model->cpu_down)
+		model->cpu_down();
 }
 
 static void nmi_cpu_up(void *dummy)
@@ -489,8 +508,7 @@ static int nmi_setup(void)
 	ctr_running = 0;
 	/* make variables visible to the nmi handler: */
 	smp_mb();
-	err = register_nmi_handler(NMI_LOCAL, profile_exceptions_notify,
-					0, "oprofile");
+	err = register_die_notifier(&profile_exceptions_nb);
 	if (err)
 		goto fail;
 
@@ -520,7 +538,7 @@ static void nmi_shutdown(void)
 	put_online_cpus();
 	/* make variables visible to the nmi handler: */
 	smp_mb();
-	unregister_nmi_handler(NMI_LOCAL, "oprofile");
+	unregister_die_notifier(&profile_exceptions_nb);
 	msrs = &get_cpu_var(cpu_msrs);
 	model->shutdown(msrs);
 	free_msrs();
diff --git a/trunk/arch/x86/oprofile/nmi_timer_int.c b/trunk/arch/x86/oprofile/nmi_timer_int.c
index 7f8052cd6620..720bf5a53c51 100644
--- a/trunk/arch/x86/oprofile/nmi_timer_int.c
+++ b/trunk/arch/x86/oprofile/nmi_timer_int.c
@@ -18,16 +18,32 @@
 #include <asm/apic.h>
 #include <asm/ptrace.h>
 
-static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs)
+static int profile_timer_exceptions_notify(struct notifier_block *self,
+					   unsigned long val, void *data)
 {
-	oprofile_add_sample(regs, 0);
-	return NMI_HANDLED;
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_NMI:
+		oprofile_add_sample(args->regs, 0);
+		ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return ret;
 }
 
+static struct notifier_block profile_timer_exceptions_nb = {
+	.notifier_call = profile_timer_exceptions_notify,
+	.next = NULL,
+	.priority = NMI_LOW_PRIOR,
+};
+
 static int timer_start(void)
 {
-	if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify,
-					0, "oprofile-timer"))
+	if (register_die_notifier(&profile_timer_exceptions_nb))
 		return 1;
 	return 0;
 }
@@ -35,7 +51,7 @@ static int timer_start(void)
 
 static void timer_stop(void)
 {
-	unregister_nmi_handler(NMI_LOCAL, "oprofile-timer");
+	unregister_die_notifier(&profile_timer_exceptions_nb);
 	synchronize_sched();  /* Allow already-started NMIs to complete. */
 }
 
diff --git a/trunk/arch/x86/oprofile/op_model_amd.c b/trunk/arch/x86/oprofile/op_model_amd.c
index 303f08637826..9cbb710dc94b 100644
--- a/trunk/arch/x86/oprofile/op_model_amd.c
+++ b/trunk/arch/x86/oprofile/op_model_amd.c
@@ -29,6 +29,8 @@
 #include "op_x86_model.h"
 #include "op_counter.h"
 
+#define NUM_COUNTERS		4
+#define NUM_COUNTERS_F15H	6
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 #define NUM_VIRT_COUNTERS	32
 #else
@@ -67,6 +69,35 @@ struct ibs_state {
 static struct ibs_config ibs_config;
 static struct ibs_state ibs_state;
 
+/*
+ * IBS cpuid feature detection
+ */
+
+#define IBS_CPUID_FEATURES		0x8000001b
+
+/*
+ * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
+ * bit 0 is used to indicate the existence of IBS.
+ */
+#define IBS_CAPS_AVAIL			(1U<<0)
+#define IBS_CAPS_FETCHSAM		(1U<<1)
+#define IBS_CAPS_OPSAM			(1U<<2)
+#define IBS_CAPS_RDWROPCNT		(1U<<3)
+#define IBS_CAPS_OPCNT			(1U<<4)
+#define IBS_CAPS_BRNTRGT		(1U<<5)
+#define IBS_CAPS_OPCNTEXT		(1U<<6)
+
+#define IBS_CAPS_DEFAULT		(IBS_CAPS_AVAIL		\
+					 | IBS_CAPS_FETCHSAM	\
+					 | IBS_CAPS_OPSAM)
+
+/*
+ * IBS APIC setup
+ */
+#define IBSCTL				0x1cc
+#define IBSCTL_LVT_OFFSET_VALID		(1ULL<<8)
+#define IBSCTL_LVT_OFFSET_MASK		0x0F
+
 /*
  * IBS randomization macros
  */
@@ -74,6 +105,27 @@ static struct ibs_state ibs_state;
 #define IBS_RANDOM_MASK			((1ULL << IBS_RANDOM_BITS) - 1)
 #define IBS_RANDOM_MAXCNT_OFFSET	(1ULL << (IBS_RANDOM_BITS - 5))
 
+static u32 get_ibs_caps(void)
+{
+	u32 ibs_caps;
+	unsigned int max_level;
+
+	if (!boot_cpu_has(X86_FEATURE_IBS))
+		return 0;
+
+	/* check IBS cpuid feature flags */
+	max_level = cpuid_eax(0x80000000);
+	if (max_level < IBS_CPUID_FEATURES)
+		return IBS_CAPS_DEFAULT;
+
+	ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
+	if (!(ibs_caps & IBS_CAPS_AVAIL))
+		/* cpuid flags not valid */
+		return IBS_CAPS_DEFAULT;
+
+	return ibs_caps;
+}
+
 /*
  * 16-bit Linear Feedback Shift Register (LFSR)
  *
@@ -264,6 +316,81 @@ static void op_amd_stop_ibs(void)
 		wrmsrl(MSR_AMD64_IBSOPCTL, 0);
 }
 
+static inline int get_eilvt(int offset)
+{
+	return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
+}
+
+static inline int put_eilvt(int offset)
+{
+	return !setup_APIC_eilvt(offset, 0, 0, 1);
+}
+
+static inline int ibs_eilvt_valid(void)
+{
+	int offset;
+	u64 val;
+	int valid = 0;
+
+	preempt_disable();
+
+	rdmsrl(MSR_AMD64_IBSCTL, val);
+	offset = val & IBSCTL_LVT_OFFSET_MASK;
+
+	if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
+		pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
+		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+		goto out;
+	}
+
+	if (!get_eilvt(offset)) {
+		pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
+		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+		goto out;
+	}
+
+	valid = 1;
+out:
+	preempt_enable();
+
+	return valid;
+}
+
+static inline int get_ibs_offset(void)
+{
+	u64 val;
+
+	rdmsrl(MSR_AMD64_IBSCTL, val);
+	if (!(val & IBSCTL_LVT_OFFSET_VALID))
+		return -EINVAL;
+
+	return val & IBSCTL_LVT_OFFSET_MASK;
+}
+
+static void setup_APIC_ibs(void)
+{
+	int offset;
+
+	offset = get_ibs_offset();
+	if (offset < 0)
+		goto failed;
+
+	if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
+		return;
+failed:
+	pr_warn("oprofile: IBS APIC setup failed on cpu #%d\n",
+		smp_processor_id());
+}
+
+static void clear_APIC_ibs(void)
+{
+	int offset;
+
+	offset = get_ibs_offset();
+	if (offset >= 0)
+		setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
+}
+
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 
 static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
@@ -312,7 +439,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
 			goto fail;
 		}
 		/* both registers must be reserved */
-		if (num_counters == AMD64_NUM_COUNTERS_F15H) {
+		if (num_counters == NUM_COUNTERS_F15H) {
 			msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
 			msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
 		} else {
@@ -377,6 +504,15 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 		val |= op_x86_get_ctrl(model, &counter_config[virt]);
 		wrmsrl(msrs->controls[i].addr, val);
 	}
+
+	if (ibs_caps)
+		setup_APIC_ibs();
+}
+
+static void op_amd_cpu_shutdown(void)
+{
+	if (ibs_caps)
+		clear_APIC_ibs();
 }
 
 static int op_amd_check_ctrs(struct pt_regs * const regs,
@@ -439,6 +575,86 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 	op_amd_stop_ibs();
 }
 
+static int setup_ibs_ctl(int ibs_eilvt_off)
+{
+	struct pci_dev *cpu_cfg;
+	int nodes;
+	u32 value = 0;
+
+	nodes = 0;
+	cpu_cfg = NULL;
+	do {
+		cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
+					 PCI_DEVICE_ID_AMD_10H_NB_MISC,
+					 cpu_cfg);
+		if (!cpu_cfg)
+			break;
+		++nodes;
+		pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
+				       | IBSCTL_LVT_OFFSET_VALID);
+		pci_read_config_dword(cpu_cfg, IBSCTL, &value);
+		if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
+			pci_dev_put(cpu_cfg);
+			printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
+			       "IBSCTL = 0x%08x\n", value);
+			return -EINVAL;
+		}
+	} while (1);
+
+	if (!nodes) {
+		printk(KERN_DEBUG "No CPU node configured for IBS\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/*
+ * This runs only on the current cpu. We try to find an LVT offset and
+ * setup the local APIC. For this we must disable preemption. On
+ * success we initialize all nodes with this offset. This updates then
+ * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
+ * the IBS interrupt vector is called from op_amd_setup_ctrs()/op_-
+ * amd_cpu_shutdown() using the new offset.
+ */
+static int force_ibs_eilvt_setup(void)
+{
+	int offset;
+	int ret;
+
+	preempt_disable();
+	/* find the next free available EILVT entry, skip offset 0 */
+	for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
+		if (get_eilvt(offset))
+			break;
+	}
+	preempt_enable();
+
+	if (offset == APIC_EILVT_NR_MAX) {
+		printk(KERN_DEBUG "No EILVT entry available\n");
+		return -EBUSY;
+	}
+
+	ret = setup_ibs_ctl(offset);
+	if (ret)
+		goto out;
+
+	if (!ibs_eilvt_valid()) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
+	pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
+
+	return 0;
+out:
+	preempt_disable();
+	put_eilvt(offset);
+	preempt_enable();
+	return ret;
+}
+
 /*
  * check and reserve APIC extended interrupt LVT offset for IBS if
  * available
@@ -451,6 +667,17 @@ static void init_ibs(void)
 	if (!ibs_caps)
 		return;
 
+	if (ibs_eilvt_valid())
+		goto out;
+
+	if (!force_ibs_eilvt_setup())
+		goto out;
+
+	/* Failed to setup ibs */
+	ibs_caps = 0;
+	return;
+
+out:
 	printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
 }
 
@@ -514,9 +741,9 @@ static int op_amd_init(struct oprofile_operations *ops)
 	ops->create_files = setup_ibs_files;
 
 	if (boot_cpu_data.x86 == 0x15) {
-		num_counters = AMD64_NUM_COUNTERS_F15H;
+		num_counters = NUM_COUNTERS_F15H;
 	} else {
-		num_counters = AMD64_NUM_COUNTERS;
+		num_counters = NUM_COUNTERS;
 	}
 
 	op_amd_spec.num_counters = num_counters;
@@ -533,6 +760,7 @@ struct op_x86_model_spec op_amd_spec = {
 	.init			= op_amd_init,
 	.fill_in_addresses	= &op_amd_fill_in_addresses,
 	.setup_ctrs		= &op_amd_setup_ctrs,
+	.cpu_down		= &op_amd_cpu_shutdown,
 	.check_ctrs		= &op_amd_check_ctrs,
 	.start			= &op_amd_start,
 	.stop			= &op_amd_stop,
diff --git a/trunk/arch/x86/oprofile/op_x86_model.h b/trunk/arch/x86/oprofile/op_x86_model.h
index 71e8a67337e2..89017fa1fd63 100644
--- a/trunk/arch/x86/oprofile/op_x86_model.h
+++ b/trunk/arch/x86/oprofile/op_x86_model.h
@@ -43,6 +43,7 @@ struct op_x86_model_spec {
 	int		(*fill_in_addresses)(struct op_msrs * const msrs);
 	void		(*setup_ctrs)(struct op_x86_model_spec const *model,
 				      struct op_msrs const * const msrs);
+	void		(*cpu_down)(void);
 	int		(*check_ctrs)(struct pt_regs * const regs,
 				      struct op_msrs const * const msrs);
 	void		(*start)(struct op_msrs const * const msrs);
diff --git a/trunk/drivers/acpi/apei/ghes.c b/trunk/drivers/acpi/apei/ghes.c
index b8e08cb67a18..0784f99a4665 100644
--- a/trunk/drivers/acpi/apei/ghes.c
+++ b/trunk/drivers/acpi/apei/ghes.c
@@ -50,7 +50,6 @@
 #include <acpi/hed.h>
 #include <asm/mce.h>
 #include <asm/tlbflush.h>
-#include <asm/nmi.h>
 
 #include "apei-internal.h"
 
@@ -750,11 +749,15 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
 	}
 }
 
-static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
+static int ghes_notify_nmi(struct notifier_block *this,
+				  unsigned long cmd, void *data)
 {
 	struct ghes *ghes, *ghes_global = NULL;
 	int sev, sev_global = -1;
-	int ret = NMI_DONE;
+	int ret = NOTIFY_DONE;
+
+	if (cmd != DIE_NMI)
+		return ret;
 
 	raw_spin_lock(&ghes_nmi_lock);
 	list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
@@ -767,10 +770,10 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
 			sev_global = sev;
 			ghes_global = ghes;
 		}
-		ret = NMI_HANDLED;
+		ret = NOTIFY_STOP;
 	}
 
-	if (ret == NMI_DONE)
+	if (ret == NOTIFY_DONE)
 		goto out;
 
 	if (sev_global >= GHES_SEV_PANIC) {
@@ -822,6 +825,10 @@ static struct notifier_block ghes_notifier_sci = {
 	.notifier_call = ghes_notify_sci,
 };
 
+static struct notifier_block ghes_notifier_nmi = {
+	.notifier_call = ghes_notify_nmi,
+};
+
 static unsigned long ghes_esource_prealloc_size(
 	const struct acpi_hest_generic *generic)
 {
@@ -911,8 +918,7 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev)
 		ghes_estatus_pool_expand(len);
 		mutex_lock(&ghes_list_mutex);
 		if (list_empty(&ghes_nmi))
-			register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0,
-						"ghes");
+			register_die_notifier(&ghes_notifier_nmi);
 		list_add_rcu(&ghes->list, &ghes_nmi);
 		mutex_unlock(&ghes_list_mutex);
 		break;
@@ -958,7 +964,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
 		mutex_lock(&ghes_list_mutex);
 		list_del_rcu(&ghes->list);
 		if (list_empty(&ghes_nmi))
-			unregister_nmi_handler(NMI_LOCAL, "ghes");
+			unregister_die_notifier(&ghes_notifier_nmi);
 		mutex_unlock(&ghes_list_mutex);
 		/*
 		 * To synchronize with NMI handler, ghes can only be
diff --git a/trunk/drivers/char/ipmi/ipmi_watchdog.c b/trunk/drivers/char/ipmi/ipmi_watchdog.c
index c2917ffad2c2..3302586655c4 100644
--- a/trunk/drivers/char/ipmi/ipmi_watchdog.c
+++ b/trunk/drivers/char/ipmi/ipmi_watchdog.c
@@ -65,7 +65,6 @@
  * mechanism for it at that time.
  */
 #include <asm/kdebug.h>
-#include <asm/nmi.h>
 #define HAVE_DIE_NMI
 #endif
 
@@ -1078,8 +1077,17 @@ static void ipmi_unregister_watchdog(int ipmi_intf)
 
 #ifdef HAVE_DIE_NMI
 static int
-ipmi_nmi(unsigned int val, struct pt_regs *regs)
+ipmi_nmi(struct notifier_block *self, unsigned long val, void *data)
 {
+	struct die_args *args = data;
+
+	if (val != DIE_NMIUNKNOWN)
+		return NOTIFY_OK;
+
+	/* Hack, if it's a memory or I/O error, ignore it. */
+	if (args->err & 0xc0)
+		return NOTIFY_OK;
+
 	/*
 	 * If we get here, it's an NMI that's not a memory or I/O
 	 * error.  We can't truly tell if it's from IPMI or not
@@ -1089,15 +1097,15 @@ ipmi_nmi(unsigned int val, struct pt_regs *regs)
 
 	if (testing_nmi) {
 		testing_nmi = 2;
-		return NMI_HANDLED;
+		return NOTIFY_STOP;
 	}
 
 	/* If we are not expecting a timeout, ignore it. */
 	if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE)
-		return NMI_DONE;
+		return NOTIFY_OK;
 
 	if (preaction_val != WDOG_PRETIMEOUT_NMI)
-		return NMI_DONE;
+		return NOTIFY_OK;
 
 	/*
 	 * If no one else handled the NMI, we assume it was the IPMI
@@ -1112,8 +1120,12 @@ ipmi_nmi(unsigned int val, struct pt_regs *regs)
 			panic(PFX "pre-timeout");
 	}
 
-	return NMI_HANDLED;
+	return NOTIFY_STOP;
 }
+
+static struct notifier_block ipmi_nmi_handler = {
+	.notifier_call = ipmi_nmi
+};
 #endif
 
 static int wdog_reboot_handler(struct notifier_block *this,
@@ -1278,8 +1290,7 @@ static void check_parms(void)
 		}
 	}
 	if (do_nmi && !nmi_handler_registered) {
-		rv = register_nmi_handler(NMI_UNKNOWN, ipmi_nmi, 0,
-						"ipmi");
+		rv = register_die_notifier(&ipmi_nmi_handler);
 		if (rv) {
 			printk(KERN_WARNING PFX
 			       "Can't register nmi handler\n");
@@ -1287,7 +1298,7 @@ static void check_parms(void)
 		} else
 			nmi_handler_registered = 1;
 	} else if (!do_nmi && nmi_handler_registered) {
-		unregister_nmi_handler(NMI_UNKNOWN, "ipmi");
+		unregister_die_notifier(&ipmi_nmi_handler);
 		nmi_handler_registered = 0;
 	}
 #endif
@@ -1325,7 +1336,7 @@ static int __init ipmi_wdog_init(void)
 	if (rv) {
 #ifdef HAVE_DIE_NMI
 		if (nmi_handler_registered)
-			unregister_nmi_handler(NMI_UNKNOWN, "ipmi");
+			unregister_die_notifier(&ipmi_nmi_handler);
 #endif
 		atomic_notifier_chain_unregister(&panic_notifier_list,
 						 &wdog_panic_notifier);
@@ -1346,7 +1357,7 @@ static void __exit ipmi_wdog_exit(void)
 
 #ifdef HAVE_DIE_NMI
 	if (nmi_handler_registered)
-		unregister_nmi_handler(NMI_UNKNOWN, "ipmi");
+		unregister_die_notifier(&ipmi_nmi_handler);
 #endif
 
 	atomic_notifier_chain_unregister(&panic_notifier_list,
diff --git a/trunk/drivers/watchdog/hpwdt.c b/trunk/drivers/watchdog/hpwdt.c
index 3774c9b8dac9..809cbda03d7a 100644
--- a/trunk/drivers/watchdog/hpwdt.c
+++ b/trunk/drivers/watchdog/hpwdt.c
@@ -35,7 +35,6 @@
 #include <linux/notifier.h>
 #include <asm/cacheflush.h>
 #endif /* CONFIG_HPWDT_NMI_DECODING */
-#include <asm/nmi.h>
 
 #define HPWDT_VERSION			"1.3.0"
 #define SECS_TO_TICKS(secs)		((secs) * 1000 / 128)
@@ -478,11 +477,15 @@ static int hpwdt_time_left(void)
 /*
  *	NMI Handler
  */
-static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs)
+static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason,
+				void *data)
 {
 	unsigned long rom_pl;
 	static int die_nmi_called;
 
+	if (ulReason != DIE_NMIUNKNOWN)
+		goto out;
+
 	if (!hpwdt_nmi_decoding)
 		goto out;
 
@@ -505,7 +508,7 @@ static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs)
 		"Management Log for details.\n");
 
 out:
-	return NMI_DONE;
+	return NOTIFY_OK;
 }
 #endif /* CONFIG_HPWDT_NMI_DECODING */
 
@@ -645,6 +648,13 @@ static struct miscdevice hpwdt_miscdev = {
 	.fops = &hpwdt_fops,
 };
 
+#ifdef CONFIG_HPWDT_NMI_DECODING
+static struct notifier_block die_notifier = {
+	.notifier_call = hpwdt_pretimeout,
+	.priority = 0,
+};
+#endif /* CONFIG_HPWDT_NMI_DECODING */
+
 /*
  *	Init & Exit
  */
@@ -730,9 +740,10 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
 	 * die notify list to handle a critical NMI. The default is to
 	 * be last so other users of the NMI signal can function.
 	 */
-	retval = register_nmi_handler(NMI_UNKNOWN, hpwdt_pretimeout,
-					(priority) ? NMI_FLAG_FIRST : 0,
-					"hpwdt");
+	if (priority)
+		die_notifier.priority = 0x7FFFFFFF;
+
+	retval = register_die_notifier(&die_notifier);
 	if (retval != 0) {
 		dev_warn(&dev->dev,
 			"Unable to register a die notifier (err=%d).\n",
@@ -752,7 +763,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
 
 static void hpwdt_exit_nmi_decoding(void)
 {
-	unregister_nmi_handler(NMI_UNKNOWN, "hpwdt");
+	unregister_die_notifier(&die_notifier);
 	if (cru_rom_addr)
 		iounmap(cru_rom_addr);
 }
diff --git a/trunk/include/linux/perf_event.h b/trunk/include/linux/perf_event.h
index 1e9ebe5e0091..c816075c01ce 100644
--- a/trunk/include/linux/perf_event.h
+++ b/trunk/include/linux/perf_event.h
@@ -220,10 +220,7 @@ struct perf_event_attr {
 				mmap_data      :  1, /* non-exec mmap data    */
 				sample_id_all  :  1, /* sample_type all events */
 
-				exclude_host   :  1, /* don't count in host   */
-				exclude_guest  :  1, /* don't count in guest  */
-
-				__reserved_1   : 43;
+				__reserved_1   : 45;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
diff --git a/trunk/tools/perf/util/hist.c b/trunk/tools/perf/util/hist.c
index 677e1da6bb3e..dd277897ff0b 100644
--- a/trunk/tools/perf/util/hist.c
+++ b/trunk/tools/perf/util/hist.c
@@ -18,56 +18,56 @@ struct callchain_param	callchain_param = {
 	.order  = ORDER_CALLEE
 };
 
-u16 hists__col_len(struct hists *self, enum hist_column col)
+u16 hists__col_len(struct hists *hists, enum hist_column col)
 {
-	return self->col_len[col];
+	return hists->col_len[col];
 }
 
-void hists__set_col_len(struct hists *self, enum hist_column col, u16 len)
+void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len)
 {
-	self->col_len[col] = len;
+	hists->col_len[col] = len;
 }
 
-bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len)
+bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len)
 {
-	if (len > hists__col_len(self, col)) {
-		hists__set_col_len(self, col, len);
+	if (len > hists__col_len(hists, col)) {
+		hists__set_col_len(hists, col, len);
 		return true;
 	}
 	return false;
 }
 
-static void hists__reset_col_len(struct hists *self)
+static void hists__reset_col_len(struct hists *hists)
 {
 	enum hist_column col;
 
 	for (col = 0; col < HISTC_NR_COLS; ++col)
-		hists__set_col_len(self, col, 0);
+		hists__set_col_len(hists, col, 0);
 }
 
-static void hists__calc_col_len(struct hists *self, struct hist_entry *h)
+static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 {
 	u16 len;
 
 	if (h->ms.sym)
-		hists__new_col_len(self, HISTC_SYMBOL, h->ms.sym->namelen);
+		hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen);
 	else {
 		const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
 
-		if (hists__col_len(self, HISTC_DSO) < unresolved_col_width &&
+		if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width &&
 		    !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
 		    !symbol_conf.dso_list)
-			hists__set_col_len(self, HISTC_DSO,
+			hists__set_col_len(hists, HISTC_DSO,
 					   unresolved_col_width);
 	}
 
 	len = thread__comm_len(h->thread);
-	if (hists__new_col_len(self, HISTC_COMM, len))
-		hists__set_col_len(self, HISTC_THREAD, len + 6);
+	if (hists__new_col_len(hists, HISTC_COMM, len))
+		hists__set_col_len(hists, HISTC_THREAD, len + 6);
 
 	if (h->ms.map) {
 		len = dso__name_len(h->ms.map->dso);
-		hists__new_col_len(self, HISTC_DSO, len);
+		hists__new_col_len(hists, HISTC_DSO, len);
 	}
 }
 
@@ -113,11 +113,11 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
 	return self;
 }
 
-static void hists__inc_nr_entries(struct hists *self, struct hist_entry *h)
+static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
 {
 	if (!h->filtered) {
-		hists__calc_col_len(self, h);
-		++self->nr_entries;
+		hists__calc_col_len(hists, h);
+		++hists->nr_entries;
 	}
 }
 
@@ -128,11 +128,11 @@ static u8 symbol__parent_filter(const struct symbol *parent)
 	return 0;
 }
 
-struct hist_entry *__hists__add_entry(struct hists *self,
+struct hist_entry *__hists__add_entry(struct hists *hists,
 				      struct addr_location *al,
 				      struct symbol *sym_parent, u64 period)
 {
-	struct rb_node **p = &self->entries.rb_node;
+	struct rb_node **p = &hists->entries.rb_node;
 	struct rb_node *parent = NULL;
 	struct hist_entry *he;
 	struct hist_entry entry = {
@@ -172,8 +172,8 @@ struct hist_entry *__hists__add_entry(struct hists *self,
 	if (!he)
 		return NULL;
 	rb_link_node(&he->rb_node, parent, p);
-	rb_insert_color(&he->rb_node, &self->entries);
-	hists__inc_nr_entries(self, he);
+	rb_insert_color(&he->rb_node, &hists->entries);
+	hists__inc_nr_entries(hists, he);
 out:
 	hist_entry__add_cpumode_period(he, al->cpumode, period);
 	return he;
@@ -222,7 +222,7 @@ void hist_entry__free(struct hist_entry *he)
  * collapse the histogram
  */
 
-static bool hists__collapse_insert_entry(struct hists *self,
+static bool hists__collapse_insert_entry(struct hists *hists,
 					 struct rb_root *root,
 					 struct hist_entry *he)
 {
@@ -240,8 +240,8 @@ static bool hists__collapse_insert_entry(struct hists *self,
 		if (!cmp) {
 			iter->period += he->period;
 			if (symbol_conf.use_callchain) {
-				callchain_cursor_reset(&self->callchain_cursor);
-				callchain_merge(&self->callchain_cursor, iter->callchain,
+				callchain_cursor_reset(&hists->callchain_cursor);
+				callchain_merge(&hists->callchain_cursor, iter->callchain,
 						he->callchain);
 			}
 			hist_entry__free(he);
@@ -259,7 +259,7 @@ static bool hists__collapse_insert_entry(struct hists *self,
 	return true;
 }
 
-void hists__collapse_resort(struct hists *self)
+void hists__collapse_resort(struct hists *hists)
 {
 	struct rb_root tmp;
 	struct rb_node *next;
@@ -269,20 +269,20 @@ void hists__collapse_resort(struct hists *self)
 		return;
 
 	tmp = RB_ROOT;
-	next = rb_first(&self->entries);
-	self->nr_entries = 0;
-	hists__reset_col_len(self);
+	next = rb_first(&hists->entries);
+	hists->nr_entries = 0;
+	hists__reset_col_len(hists);
 
 	while (next) {
 		n = rb_entry(next, struct hist_entry, rb_node);
 		next = rb_next(&n->rb_node);
 
-		rb_erase(&n->rb_node, &self->entries);
-		if (hists__collapse_insert_entry(self, &tmp, n))
-			hists__inc_nr_entries(self, n);
+		rb_erase(&n->rb_node, &hists->entries);
+		if (hists__collapse_insert_entry(hists, &tmp, n))
+			hists__inc_nr_entries(hists, n);
 	}
 
-	self->entries = tmp;
+	hists->entries = tmp;
 }
 
 /*
@@ -315,31 +315,31 @@ static void __hists__insert_output_entry(struct rb_root *entries,
 	rb_insert_color(&he->rb_node, entries);
 }
 
-void hists__output_resort(struct hists *self)
+void hists__output_resort(struct hists *hists)
 {
 	struct rb_root tmp;
 	struct rb_node *next;
 	struct hist_entry *n;
 	u64 min_callchain_hits;
 
-	min_callchain_hits = self->stats.total_period * (callchain_param.min_percent / 100);
+	min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);
 
 	tmp = RB_ROOT;
-	next = rb_first(&self->entries);
+	next = rb_first(&hists->entries);
 
-	self->nr_entries = 0;
-	hists__reset_col_len(self);
+	hists->nr_entries = 0;
+	hists__reset_col_len(hists);
 
 	while (next) {
 		n = rb_entry(next, struct hist_entry, rb_node);
 		next = rb_next(&n->rb_node);
 
-		rb_erase(&n->rb_node, &self->entries);
+		rb_erase(&n->rb_node, &hists->entries);
 		__hists__insert_output_entry(&tmp, n, min_callchain_hits);
-		hists__inc_nr_entries(self, n);
+		hists__inc_nr_entries(hists, n);
 	}
 
-	self->entries = tmp;
+	hists->entries = tmp;
 }
 
 static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin)
@@ -738,7 +738,7 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self,
 					     left_margin);
 }
 
-size_t hists__fprintf(struct hists *self, struct hists *pair,
+size_t hists__fprintf(struct hists *hists, struct hists *pair,
 		      bool show_displacement, FILE *fp)
 {
 	struct sort_entry *se;
@@ -803,15 +803,15 @@ size_t hists__fprintf(struct hists *self, struct hists *pair,
 		width = strlen(se->se_header);
 		if (symbol_conf.col_width_list_str) {
 			if (col_width) {
-				hists__set_col_len(self, se->se_width_idx,
+				hists__set_col_len(hists, se->se_width_idx,
 						   atoi(col_width));
 				col_width = strchr(col_width, ',');
 				if (col_width)
 					++col_width;
 			}
 		}
-		if (!hists__new_col_len(self, se->se_width_idx, width))
-			width = hists__col_len(self, se->se_width_idx);
+		if (!hists__new_col_len(hists, se->se_width_idx, width))
+			width = hists__col_len(hists, se->se_width_idx);
 		fprintf(fp, "  %*s", width, se->se_header);
 	}
 	fprintf(fp, "\n");
@@ -834,7 +834,7 @@ size_t hists__fprintf(struct hists *self, struct hists *pair,
 			continue;
 
 		fprintf(fp, "  ");
-		width = hists__col_len(self, se->se_width_idx);
+		width = hists__col_len(hists, se->se_width_idx);
 		if (width == 0)
 			width = strlen(se->se_header);
 		for (i = 0; i < width; i++)
@@ -844,7 +844,7 @@ size_t hists__fprintf(struct hists *self, struct hists *pair,
 	fprintf(fp, "\n#\n");
 
 print_entries:
-	for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
 		if (h->filtered)
@@ -858,12 +858,12 @@ size_t hists__fprintf(struct hists *self, struct hists *pair,
 				displacement = 0;
 			++position;
 		}
-		ret += hist_entry__fprintf(h, self, pair, show_displacement,
-					   displacement, fp, self->stats.total_period);
+		ret += hist_entry__fprintf(h, hists, pair, show_displacement,
+					   displacement, fp, hists->stats.total_period);
 
 		if (symbol_conf.use_callchain)
-			ret += hist_entry__fprintf_callchain(h, self, fp,
-							     self->stats.total_period);
+			ret += hist_entry__fprintf_callchain(h, hists, fp,
+							     hists->stats.total_period);
 		if (h->ms.map == NULL && verbose > 1) {
 			__map_groups__fprintf_maps(&h->thread->mg,
 						   MAP__FUNCTION, verbose, fp);
@@ -879,7 +879,7 @@ size_t hists__fprintf(struct hists *self, struct hists *pair,
 /*
  * See hists__fprintf to match the column widths
  */
-unsigned int hists__sort_list_width(struct hists *self)
+unsigned int hists__sort_list_width(struct hists *hists)
 {
 	struct sort_entry *se;
 	int ret = 9; /* total % */
@@ -898,7 +898,7 @@ unsigned int hists__sort_list_width(struct hists *self)
 
 	list_for_each_entry(se, &hist_entry__sort_list, list)
 		if (!se->elide)
-			ret += 2 + hists__col_len(self, se->se_width_idx);
+			ret += 2 + hists__col_len(hists, se->se_width_idx);
 
 	if (verbose) /* Addr + origin */
 		ret += 3 + BITS_PER_LONG / 4;
@@ -906,32 +906,32 @@ unsigned int hists__sort_list_width(struct hists *self)
 	return ret;
 }
 
-static void hists__remove_entry_filter(struct hists *self, struct hist_entry *h,
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h,
 				       enum hist_filter filter)
 {
 	h->filtered &= ~(1 << filter);
 	if (h->filtered)
 		return;
 
-	++self->nr_entries;
+	++hists->nr_entries;
 	if (h->ms.unfolded)
-		self->nr_entries += h->nr_rows;
+		hists->nr_entries += h->nr_rows;
 	h->row_offset = 0;
-	self->stats.total_period += h->period;
-	self->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events;
+	hists->stats.total_period += h->period;
+	hists->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events;
 
-	hists__calc_col_len(self, h);
+	hists__calc_col_len(hists, h);
 }
 
-void hists__filter_by_dso(struct hists *self, const struct dso *dso)
+void hists__filter_by_dso(struct hists *hists, const struct dso *dso)
 {
 	struct rb_node *nd;
 
-	self->nr_entries = self->stats.total_period = 0;
-	self->stats.nr_events[PERF_RECORD_SAMPLE] = 0;
-	hists__reset_col_len(self);
+	hists->nr_entries = hists->stats.total_period = 0;
+	hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0;
+	hists__reset_col_len(hists);
 
-	for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
 		if (symbol_conf.exclude_other && !h->parent)
@@ -942,19 +942,19 @@ void hists__filter_by_dso(struct hists *self, const struct dso *dso)
 			continue;
 		}
 
-		hists__remove_entry_filter(self, h, HIST_FILTER__DSO);
+		hists__remove_entry_filter(hists, h, HIST_FILTER__DSO);
 	}
 }
 
-void hists__filter_by_thread(struct hists *self, const struct thread *thread)
+void hists__filter_by_thread(struct hists *hists, const struct thread *thread)
 {
 	struct rb_node *nd;
 
-	self->nr_entries = self->stats.total_period = 0;
-	self->stats.nr_events[PERF_RECORD_SAMPLE] = 0;
-	hists__reset_col_len(self);
+	hists->nr_entries = hists->stats.total_period = 0;
+	hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0;
+	hists__reset_col_len(hists);
 
-	for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
 		if (thread != NULL && h->thread != thread) {
@@ -962,7 +962,7 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread)
 			continue;
 		}
 
-		hists__remove_entry_filter(self, h, HIST_FILTER__THREAD);
+		hists__remove_entry_filter(hists, h, HIST_FILTER__THREAD);
 	}
 }
 
@@ -976,13 +976,13 @@ int hist_entry__annotate(struct hist_entry *he, size_t privsize)
 	return symbol__annotate(he->ms.sym, he->ms.map, privsize);
 }
 
-void hists__inc_nr_events(struct hists *self, u32 type)
+void hists__inc_nr_events(struct hists *hists, u32 type)
 {
-	++self->stats.nr_events[0];
-	++self->stats.nr_events[type];
+	++hists->stats.nr_events[0];
+	++hists->stats.nr_events[type];
 }
 
-size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
+size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp)
 {
 	int i;
 	size_t ret = 0;
@@ -990,7 +990,7 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
 	for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
 		const char *name;
 
-		if (self->stats.nr_events[i] == 0)
+		if (hists->stats.nr_events[i] == 0)
 			continue;
 
 		name = perf_event__name(i);
@@ -998,7 +998,7 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
 			continue;
 
 		ret += fprintf(fp, "%16s events: %10d\n", name,
-			       self->stats.nr_events[i]);
+			       hists->stats.nr_events[i]);
 	}
 
 	return ret;