From 9772a87b0b54946da6a6f54a4aa0bdc345b8b6d6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 26 Sep 2011 12:33:28 -0300 Subject: [PATCH] --- yaml --- r: 269041 b: refs/heads/master c: 42b28ac071a1a239d2a48965e9d6be0e061dd103 h: refs/heads/master i: 269039: c4de2037c65e3db2c81737167769cace30375375 v: v3 --- [refs] | 2 +- trunk/arch/x86/include/asm/nmi.h | 37 +- trunk/arch/x86/include/asm/perf_event.h | 55 +-- trunk/arch/x86/include/asm/reboot.h | 2 +- trunk/arch/x86/kernel/Makefile | 2 +- trunk/arch/x86/kernel/apic/hw_nmi.c | 27 +- trunk/arch/x86/kernel/apic/x2apic_uv_x.c | 20 +- trunk/arch/x86/kernel/cpu/Makefile | 2 +- trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c | 20 +- trunk/arch/x86/kernel/cpu/mcheck/mce.c | 23 +- trunk/arch/x86/kernel/cpu/perf_event.c | 69 ++- trunk/arch/x86/kernel/cpu/perf_event.h | 12 - trunk/arch/x86/kernel/cpu/perf_event_amd.c | 17 +- .../arch/x86/kernel/cpu/perf_event_amd_ibs.c | 294 ------------ trunk/arch/x86/kernel/cpu/perf_event_intel.c | 91 +--- trunk/arch/x86/kernel/crash.c | 5 +- trunk/arch/x86/kernel/kgdb.c | 60 +-- trunk/arch/x86/kernel/nmi.c | 433 ------------------ trunk/arch/x86/kernel/process_32.c | 2 - trunk/arch/x86/kernel/process_64.c | 2 - trunk/arch/x86/kernel/reboot.c | 23 +- trunk/arch/x86/kernel/traps.c | 155 +++++++ trunk/arch/x86/oprofile/nmi_int.c | 40 +- trunk/arch/x86/oprofile/nmi_timer_int.c | 28 +- trunk/arch/x86/oprofile/op_model_amd.c | 234 +++++++++- trunk/arch/x86/oprofile/op_x86_model.h | 1 + trunk/drivers/acpi/apei/ghes.c | 22 +- trunk/drivers/char/ipmi/ipmi_watchdog.c | 33 +- trunk/drivers/watchdog/hpwdt.c | 25 +- trunk/include/linux/perf_event.h | 5 +- trunk/tools/perf/util/hist.c | 156 +++---- 31 files changed, 762 insertions(+), 1135 deletions(-) delete mode 100644 trunk/arch/x86/kernel/cpu/perf_event_amd_ibs.c delete mode 100644 trunk/arch/x86/kernel/nmi.c diff --git a/[refs] b/[refs] index 0777a9766a67..59a1c2ca157a 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: d48b0e173715f678698d3678fefd40f2893ce798 +refs/heads/master: 42b28ac071a1a239d2a48965e9d6be0e061dd103 diff --git a/trunk/arch/x86/include/asm/nmi.h b/trunk/arch/x86/include/asm/nmi.h index fd3f9f18cf3f..4886a68f267e 100644 --- a/trunk/arch/x86/include/asm/nmi.h +++ b/trunk/arch/x86/include/asm/nmi.h @@ -22,26 +22,27 @@ void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace #endif -#define NMI_FLAG_FIRST 1 - -enum { - NMI_LOCAL=0, - NMI_UNKNOWN, - NMI_MAX -}; - -#define NMI_DONE 0 -#define NMI_HANDLED 1 - -typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *); - -int register_nmi_handler(unsigned int, nmi_handler_t, unsigned long, - const char *); - -void unregister_nmi_handler(unsigned int, const char *); +/* + * Define some priorities for the nmi notifier call chain. + * + * Create a local nmi bit that has a higher priority than + * external nmis, because the local ones are more frequent. + * + * Also setup some default high/normal/low settings for + * subsystems to registers with. Using 4 bits to separate + * the priorities. This can go a lot higher if needed be. + */ + +#define NMI_LOCAL_SHIFT 16 /* randomly picked */ +#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT) +#define NMI_HIGH_PRIOR (1ULL << 8) +#define NMI_NORMAL_PRIOR (1ULL << 4) +#define NMI_LOW_PRIOR (1ULL << 0) +#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR) +#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR) +#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR) void stop_nmi(void); void restart_nmi(void); -void local_touch_nmi(void); #endif /* _ASM_X86_NMI_H */ diff --git a/trunk/arch/x86/include/asm/perf_event.h b/trunk/arch/x86/include/asm/perf_event.h index f61c62f7d5d8..094fb30817ab 100644 --- a/trunk/arch/x86/include/asm/perf_event.h +++ b/trunk/arch/x86/include/asm/perf_event.h @@ -29,9 +29,6 @@ #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL -#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40) -#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41) - #define AMD64_EVENTSEL_EVENT \ (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) #define INTEL_ARCH_EVENT_MASK \ @@ -46,17 +43,14 @@ #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) -#define AMD64_NUM_COUNTERS 4 -#define AMD64_NUM_COUNTERS_F15H 6 -#define AMD64_NUM_COUNTERS_MAX AMD64_NUM_COUNTERS_F15H -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) -#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 +#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 /* * Intel "Architectural Performance Monitoring" CPUID @@ -116,35 +110,6 @@ union cpuid10_edx { */ #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) -/* - * IBS cpuid feature detection - */ - -#define IBS_CPUID_FEATURES 0x8000001b - -/* - * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but - * bit 0 is used to indicate the existence of IBS. - */ -#define IBS_CAPS_AVAIL (1U<<0) -#define IBS_CAPS_FETCHSAM (1U<<1) -#define IBS_CAPS_OPSAM (1U<<2) -#define IBS_CAPS_RDWROPCNT (1U<<3) -#define IBS_CAPS_OPCNT (1U<<4) -#define IBS_CAPS_BRNTRGT (1U<<5) -#define IBS_CAPS_OPCNTEXT (1U<<6) - -#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ - | IBS_CAPS_FETCHSAM \ - | IBS_CAPS_OPSAM) - -/* - * IBS APIC setup - */ -#define IBSCTL 0x1cc -#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) -#define IBSCTL_LVT_OFFSET_MASK 0x0F - /* IbsFetchCtl bits/masks */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) @@ -159,8 +124,6 @@ union cpuid10_edx { #define IBS_OP_MAX_CNT 0x0000FFFFULL #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ -extern u32 get_ibs_caps(void); - #ifdef CONFIG_PERF_EVENTS extern void perf_events_lapic_init(void); @@ -196,19 +159,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); ); \ } -struct perf_guest_switch_msr { - unsigned msr; - u64 host, guest; -}; - -extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); #else -static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) -{ - *nr = 0; - return NULL; -} - static inline void perf_events_lapic_init(void) { } #endif diff --git a/trunk/arch/x86/include/asm/reboot.h b/trunk/arch/x86/include/asm/reboot.h index 92f297069e87..3250e3d605d9 100644 --- a/trunk/arch/x86/include/asm/reboot.h +++ b/trunk/arch/x86/include/asm/reboot.h @@ -23,7 +23,7 @@ void machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 -typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); +typedef void (*nmi_shootdown_cb)(int, struct die_args*); void nmi_shootdown_cpus(nmi_shootdown_cb callback); #endif /* _ASM_X86_REBOOT_H */ diff --git a/trunk/arch/x86/kernel/Makefile b/trunk/arch/x86/kernel/Makefile index 8baca3c4871c..82f2912155a5 100644 --- a/trunk/arch/x86/kernel/Makefile +++ b/trunk/arch/x86/kernel/Makefile @@ -19,7 +19,7 @@ endif obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o -obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o +obj-y += time.o ioport.o ldt.o dumpstack.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-y += probe_roms.o diff --git a/trunk/arch/x86/kernel/apic/hw_nmi.c b/trunk/arch/x86/kernel/apic/hw_nmi.c index 31cb9ae992b7..d5e57db0f7be 100644 --- a/trunk/arch/x86/kernel/apic/hw_nmi.c +++ b/trunk/arch/x86/kernel/apic/hw_nmi.c @@ -60,10 +60,22 @@ void arch_trigger_all_cpu_backtrace(void) } static int __kprobes -arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) +arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, + unsigned long cmd, void *__args) { + struct die_args *args = __args; + struct pt_regs *regs; int cpu; + switch (cmd) { + case DIE_NMI: + break; + + default: + return NOTIFY_DONE; + } + + regs = args->regs; cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { @@ -74,16 +86,21 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) show_regs(regs); arch_spin_unlock(&lock); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - return NMI_HANDLED; + return NOTIFY_STOP; } - return NMI_DONE; + return NOTIFY_DONE; } +static __read_mostly struct notifier_block backtrace_notifier = { + .notifier_call = arch_trigger_all_cpu_backtrace_handler, + .next = NULL, + .priority = NMI_LOCAL_LOW_PRIOR, +}; + static int __init register_trigger_all_cpu_backtrace(void) { - register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler, - 0, "arch_bt"); + register_die_notifier(&backtrace_notifier); return 0; } early_initcall(register_trigger_all_cpu_backtrace); diff --git a/trunk/arch/x86/kernel/apic/x2apic_uv_x.c b/trunk/arch/x86/kernel/apic/x2apic_uv_x.c index 75be00ecfff2..34b18594e724 100644 --- a/trunk/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/trunk/arch/x86/kernel/apic/x2apic_uv_x.c @@ -672,11 +672,18 @@ void __cpuinit uv_cpu_init(void) /* * When NMI is received, print a stack trace. */ -int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) +int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) { unsigned long real_uv_nmi; int bid; + if (reason != DIE_NMIUNKNOWN) + return NOTIFY_OK; + + if (in_crash_kexec) + /* do nothing if entering the crash kernel */ + return NOTIFY_OK; + /* * Each blade has an MMR that indicates when an NMI has been sent * to cpus on the blade. If an NMI is detected, atomically @@ -697,7 +704,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) } if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) - return NMI_DONE; + return NOTIFY_DONE; __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; @@ -710,12 +717,17 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) dump_stack(); spin_unlock(&uv_nmi_lock); - return NMI_HANDLED; + return NOTIFY_STOP; } +static struct notifier_block uv_dump_stack_nmi_nb = { + .notifier_call = uv_handle_nmi, + .priority = NMI_LOCAL_LOW_PRIOR - 1, +}; + void uv_register_nmi_notifier(void) { - if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) + if (register_die_notifier(&uv_dump_stack_nmi_nb)) printk(KERN_WARNING "UV NMI handler failed to register\n"); } diff --git a/trunk/arch/x86/kernel/cpu/Makefile b/trunk/arch/x86/kernel/cpu/Makefile index fe6eb197f848..1044fd787db8 100644 --- a/trunk/arch/x86/kernel/cpu/Makefile +++ b/trunk/arch/x86/kernel/cpu/Makefile @@ -36,7 +36,7 @@ endif obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ -obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o +obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c b/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c index 6199232161cf..0ed633c5048b 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -78,20 +78,27 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) static cpumask_var_t mce_inject_cpumask; -static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) +static int mce_raise_notify(struct notifier_block *self, + unsigned long val, void *data) { + struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (!cpumask_test_cpu(cpu, mce_inject_cpumask)) - return NMI_DONE; + if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) + return NOTIFY_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) - raise_exception(m, regs); + raise_exception(m, args->regs); else if (m->status) raise_poll(m); - return NMI_HANDLED; + return NOTIFY_STOP; } +static struct notifier_block mce_raise_nb = { + .notifier_call = mce_raise_notify, + .priority = NMI_LOCAL_NORMAL_PRIOR, +}; + /* Inject mce on current CPU */ static int raise_local(void) { @@ -209,8 +216,7 @@ static int inject_init(void) return -ENOMEM; printk(KERN_INFO "Machine check injector initialized\n"); mce_chrdev_ops.write = mce_write; - register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, - "mce_notify"); + register_die_notifier(&mce_raise_nb); return 0; } diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce.c b/trunk/arch/x86/kernel/cpu/mcheck/mce.c index fce51ad1f362..08363b042122 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce.c @@ -908,6 +908,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) percpu_inc(mce_exception_count); + if (notify_die(DIE_NMI, "machine check", regs, error_code, + 18, SIGKILL) == NOTIFY_STOP) + goto out; if (!banks) goto out; @@ -1137,15 +1140,6 @@ static void mce_start_timer(unsigned long data) add_timer_on(t, smp_processor_id()); } -/* Must not be called in IRQ context where del_timer_sync() can deadlock */ -static void mce_timer_delete_all(void) -{ - int cpu; - - for_each_online_cpu(cpu) - del_timer_sync(&per_cpu(mce_timer, cpu)); -} - static void mce_do_trigger(struct work_struct *work) { call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); @@ -1756,6 +1750,7 @@ static struct syscore_ops mce_syscore_ops = { static void mce_cpu_restart(void *data) { + del_timer_sync(&__get_cpu_var(mce_timer)); if (!mce_available(__this_cpu_ptr(&cpu_info))) return; __mcheck_cpu_init_generic(); @@ -1765,15 +1760,16 @@ static void mce_cpu_restart(void *data) /* Reinit MCEs after user configuration changes */ static void mce_restart(void) { - mce_timer_delete_all(); on_each_cpu(mce_cpu_restart, NULL, 1); } /* Toggle features for corrected errors */ -static void mce_disable_cmci(void *data) +static void mce_disable_ce(void *all) { if (!mce_available(__this_cpu_ptr(&cpu_info))) return; + if (all) + del_timer_sync(&__get_cpu_var(mce_timer)); cmci_clear(); } @@ -1856,8 +1852,7 @@ static ssize_t set_ignore_ce(struct sys_device *s, if (mce_ignore_ce ^ !!new) { if (new) { /* disable ce features */ - mce_timer_delete_all(); - on_each_cpu(mce_disable_cmci, NULL, 1); + on_each_cpu(mce_disable_ce, (void *)1, 1); mce_ignore_ce = 1; } else { /* enable ce features */ @@ -1880,7 +1875,7 @@ static ssize_t set_cmci_disabled(struct sys_device *s, if (mce_cmci_disabled ^ !!new) { if (new) { /* disable cmci */ - on_each_cpu(mce_disable_cmci, NULL, 1); + on_each_cpu(mce_disable_ce, NULL, 1); mce_cmci_disabled = 1; } else { /* enable cmci */ diff --git a/trunk/arch/x86/kernel/cpu/perf_event.c b/trunk/arch/x86/kernel/cpu/perf_event.c index 640891014b2a..8ab89112f93c 100644 --- a/trunk/arch/x86/kernel/cpu/perf_event.c +++ b/trunk/arch/x86/kernel/cpu/perf_event.c @@ -1058,15 +1058,76 @@ void perf_events_lapic_init(void) apic_write(APIC_LVTPC, APIC_DM_NMI); } +struct pmu_nmi_state { + unsigned int marked; + int handled; +}; + +static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi); + static int __kprobes -perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) +perf_event_nmi_handler(struct notifier_block *self, + unsigned long cmd, void *__args) { + struct die_args *args = __args; + unsigned int this_nmi; + int handled; + if (!atomic_read(&active_events)) - return NMI_DONE; + return NOTIFY_DONE; + + switch (cmd) { + case DIE_NMI: + break; + case DIE_NMIUNKNOWN: + this_nmi = percpu_read(irq_stat.__nmi_count); + if (this_nmi != __this_cpu_read(pmu_nmi.marked)) + /* let the kernel handle the unknown nmi */ + return NOTIFY_DONE; + /* + * This one is a PMU back-to-back nmi. Two events + * trigger 'simultaneously' raising two back-to-back + * NMIs. If the first NMI handles both, the latter + * will be empty and daze the CPU. So, we drop it to + * avoid false-positive 'unknown nmi' messages. + */ + return NOTIFY_STOP; + default: + return NOTIFY_DONE; + } + + handled = x86_pmu.handle_irq(args->regs); + if (!handled) + return NOTIFY_DONE; - return x86_pmu.handle_irq(regs); + this_nmi = percpu_read(irq_stat.__nmi_count); + if ((handled > 1) || + /* the next nmi could be a back-to-back nmi */ + ((__this_cpu_read(pmu_nmi.marked) == this_nmi) && + (__this_cpu_read(pmu_nmi.handled) > 1))) { + /* + * We could have two subsequent back-to-back nmis: The + * first handles more than one counter, the 2nd + * handles only one counter and the 3rd handles no + * counter. + * + * This is the 2nd nmi because the previous was + * handling more than one counter. We will mark the + * next (3rd) and then drop it if unhandled. + */ + __this_cpu_write(pmu_nmi.marked, this_nmi + 1); + __this_cpu_write(pmu_nmi.handled, handled); + } + + return NOTIFY_STOP; } +static __read_mostly struct notifier_block perf_event_nmi_notifier = { + .notifier_call = perf_event_nmi_handler, + .next = NULL, + .priority = NMI_LOCAL_LOW_PRIOR, +}; + struct event_constraint emptyconstraint; struct event_constraint unconstrained; @@ -1171,7 +1232,7 @@ static int __init init_hw_perf_events(void) ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; perf_events_lapic_init(); - register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); + register_die_notifier(&perf_event_nmi_notifier); unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, diff --git a/trunk/arch/x86/kernel/cpu/perf_event.h b/trunk/arch/x86/kernel/cpu/perf_event.h index b9698d40ac4b..fb330b0a816e 100644 --- a/trunk/arch/x86/kernel/cpu/perf_event.h +++ b/trunk/arch/x86/kernel/cpu/perf_event.h @@ -130,13 +130,6 @@ struct cpu_hw_events { struct perf_branch_stack lbr_stack; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; - /* - * Intel host/guest exclude bits - */ - u64 intel_ctrl_guest_mask; - u64 intel_ctrl_host_mask; - struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; - /* * manage shared (per-core, per-cpu) registers * used on Intel NHM/WSM/SNB @@ -302,11 +295,6 @@ struct x86_pmu { */ struct extra_reg *extra_regs; unsigned int er_flags; - - /* - * Intel host/guest support (KVM) - */ - struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); }; #define ERF_NO_HT_SHARING 1 diff --git a/trunk/arch/x86/kernel/cpu/perf_event_amd.c b/trunk/arch/x86/kernel/cpu/perf_event_amd.c index aeefd45697a2..384450d67128 100644 --- a/trunk/arch/x86/kernel/cpu/perf_event_amd.c +++ b/trunk/arch/x86/kernel/cpu/perf_event_amd.c @@ -138,19 +138,6 @@ static int amd_pmu_hw_config(struct perf_event *event) if (ret) return ret; - if (event->attr.exclude_host && event->attr.exclude_guest) - /* - * When HO == GO == 1 the hardware treats that as GO == HO == 0 - * and will count in both modes. We don't want to count in that - * case so we emulate no-counting by setting US = OS = 0. - */ - event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | - ARCH_PERFMON_EVENTSEL_OS); - else if (event->attr.exclude_host) - event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY; - else if (event->attr.exclude_guest) - event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY; - if (event->attr.type != PERF_TYPE_RAW) return 0; @@ -411,7 +398,7 @@ static __initconst const struct x86_pmu amd_pmu = { .perfctr = MSR_K7_PERFCTR0, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = AMD64_NUM_COUNTERS, + .num_counters = 4, .cntval_bits = 48, .cntval_mask = (1ULL << 48) - 1, .apic = 1, @@ -575,7 +562,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { .perfctr = MSR_F15H_PERF_CTR, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = AMD64_NUM_COUNTERS_F15H, + .num_counters = 6, .cntval_bits = 48, .cntval_mask = (1ULL << 48) - 1, .apic = 1, diff --git a/trunk/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/trunk/arch/x86/kernel/cpu/perf_event_amd_ibs.c deleted file mode 100644 index ab6343d21825..000000000000 --- a/trunk/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Performance events - AMD IBS - * - * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter - * - * For licencing details see kernel-base/COPYING - */ - -#include -#include -#include - -#include - -static u32 ibs_caps; - -#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) - -static struct pmu perf_ibs; - -static int perf_ibs_init(struct perf_event *event) -{ - if (perf_ibs.type != event->attr.type) - return -ENOENT; - return 0; -} - -static int perf_ibs_add(struct perf_event *event, int flags) -{ - return 0; -} - -static void perf_ibs_del(struct perf_event *event, int flags) -{ -} - -static struct pmu perf_ibs = { - .event_init= perf_ibs_init, - .add= perf_ibs_add, - .del= perf_ibs_del, -}; - -static __init int perf_event_ibs_init(void) -{ - if (!ibs_caps) - return -ENODEV; /* ibs not supported by the cpu */ - - perf_pmu_register(&perf_ibs, "ibs", -1); - printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); - - return 0; -} - -#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ - -static __init int perf_event_ibs_init(void) { return 0; } - -#endif - -/* IBS - apic initialization, for perf and oprofile */ - -static __init u32 __get_ibs_caps(void) -{ - u32 caps; - unsigned int max_level; - - if (!boot_cpu_has(X86_FEATURE_IBS)) - return 0; - - /* check IBS cpuid feature flags */ - max_level = cpuid_eax(0x80000000); - if (max_level < IBS_CPUID_FEATURES) - return IBS_CAPS_DEFAULT; - - caps = cpuid_eax(IBS_CPUID_FEATURES); - if (!(caps & IBS_CAPS_AVAIL)) - /* cpuid flags not valid */ - return IBS_CAPS_DEFAULT; - - return caps; -} - -u32 get_ibs_caps(void) -{ - return ibs_caps; -} - -EXPORT_SYMBOL(get_ibs_caps); - -static inline int get_eilvt(int offset) -{ - return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); -} - -static inline int put_eilvt(int offset) -{ - return !setup_APIC_eilvt(offset, 0, 0, 1); -} - -/* - * Check and reserve APIC extended interrupt LVT offset for IBS if available. - */ -static inline int ibs_eilvt_valid(void) -{ - int offset; - u64 val; - int valid = 0; - - preempt_disable(); - - rdmsrl(MSR_AMD64_IBSCTL, val); - offset = val & IBSCTL_LVT_OFFSET_MASK; - - if (!(val & IBSCTL_LVT_OFFSET_VALID)) { - pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", - smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - goto out; - } - - if (!get_eilvt(offset)) { - pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", - smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - goto out; - } - - valid = 1; -out: - preempt_enable(); - - return valid; -} - -static int setup_ibs_ctl(int ibs_eilvt_off) -{ - struct pci_dev *cpu_cfg; - int nodes; - u32 value = 0; - - nodes = 0; - cpu_cfg = NULL; - do { - cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_AMD_10H_NB_MISC, - cpu_cfg); - if (!cpu_cfg) - break; - ++nodes; - pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off - | IBSCTL_LVT_OFFSET_VALID); - pci_read_config_dword(cpu_cfg, IBSCTL, &value); - if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { - pci_dev_put(cpu_cfg); - printk(KERN_DEBUG "Failed to setup IBS LVT offset, " - "IBSCTL = 0x%08x\n", value); - return -EINVAL; - } - } while (1); - - if (!nodes) { - printk(KERN_DEBUG "No CPU node configured for IBS\n"); - return -ENODEV; - } - - return 0; -} - -/* - * This runs only on the current cpu. We try to find an LVT offset and - * setup the local APIC. For this we must disable preemption. On - * success we initialize all nodes with this offset. This updates then - * the offset in the IBS_CTL per-node msr. The per-core APIC setup of - * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that - * is using the new offset. - */ -static int force_ibs_eilvt_setup(void) -{ - int offset; - int ret; - - preempt_disable(); - /* find the next free available EILVT entry, skip offset 0 */ - for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { - if (get_eilvt(offset)) - break; - } - preempt_enable(); - - if (offset == APIC_EILVT_NR_MAX) { - printk(KERN_DEBUG "No EILVT entry available\n"); - return -EBUSY; - } - - ret = setup_ibs_ctl(offset); - if (ret) - goto out; - - if (!ibs_eilvt_valid()) { - ret = -EFAULT; - goto out; - } - - pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); - pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); - - return 0; -out: - preempt_disable(); - put_eilvt(offset); - preempt_enable(); - return ret; -} - -static inline int get_ibs_lvt_offset(void) -{ - u64 val; - - rdmsrl(MSR_AMD64_IBSCTL, val); - if (!(val & IBSCTL_LVT_OFFSET_VALID)) - return -EINVAL; - - return val & IBSCTL_LVT_OFFSET_MASK; -} - -static void setup_APIC_ibs(void *dummy) -{ - int offset; - - offset = get_ibs_lvt_offset(); - if (offset < 0) - goto failed; - - if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) - return; -failed: - pr_warn("perf: IBS APIC setup failed on cpu #%d\n", - smp_processor_id()); -} - -static void clear_APIC_ibs(void *dummy) -{ - int offset; - - offset = get_ibs_lvt_offset(); - if (offset >= 0) - setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); -} - -static int __cpuinit -perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) -{ - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_STARTING: - setup_APIC_ibs(NULL); - break; - case CPU_DYING: - clear_APIC_ibs(NULL); - break; - default: - break; - } - - return NOTIFY_OK; -} - -static __init int amd_ibs_init(void) -{ - u32 caps; - int ret; - - caps = __get_ibs_caps(); - if (!caps) - return -ENODEV; /* ibs not supported by the cpu */ - - if (!ibs_eilvt_valid()) { - ret = force_ibs_eilvt_setup(); - if (ret) { - pr_err("Failed to setup IBS, %d\n", ret); - return ret; - } - } - - get_online_cpus(); - ibs_caps = caps; - /* make ibs_caps visible to other cpus: */ - smp_mb(); - perf_cpu_notifier(perf_ibs_cpu_notifier); - smp_call_function(setup_APIC_ibs, NULL, 1); - put_online_cpus(); - - return perf_event_ibs_init(); -} - -/* Since we need the pci subsystem to init ibs we can't do this earlier: */ -device_initcall(amd_ibs_init); diff --git a/trunk/arch/x86/kernel/cpu/perf_event_intel.c b/trunk/arch/x86/kernel/cpu/perf_event_intel.c index e09ca20e86ee..61fa35750b98 100644 --- a/trunk/arch/x86/kernel/cpu/perf_event_intel.c +++ b/trunk/arch/x86/kernel/cpu/perf_event_intel.c @@ -749,8 +749,7 @@ static void intel_pmu_enable_all(int added) intel_pmu_pebs_enable_all(); intel_pmu_lbr_enable_all(); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, - x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { struct perf_event *event = @@ -873,7 +872,6 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) static void intel_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { intel_pmu_disable_bts(); @@ -881,9 +879,6 @@ static void intel_pmu_disable_event(struct perf_event *event) return; } - cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); - cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc); return; @@ -929,7 +924,6 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) static void intel_pmu_enable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { if (!__this_cpu_read(cpu_hw_events.enabled)) @@ -939,11 +933,6 @@ static void intel_pmu_enable_event(struct perf_event *event) return; } - if (event->attr.exclude_host) - cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); - if (event->attr.exclude_guest) - cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_enable_fixed(hwc); return; @@ -1313,84 +1302,12 @@ static int intel_pmu_hw_config(struct perf_event *event) return 0; } -struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) -{ - if (x86_pmu.guest_get_msrs) - return x86_pmu.guest_get_msrs(nr); - *nr = 0; - return NULL; -} -EXPORT_SYMBOL_GPL(perf_guest_get_msrs); - -static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; - - arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; - arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; - arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; - - *nr = 1; - return arr; -} - -static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; - int idx; - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct perf_event *event = cpuc->events[idx]; - - arr[idx].msr = x86_pmu_config_addr(idx); - arr[idx].host = arr[idx].guest = 0; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - arr[idx].host = arr[idx].guest = - event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE; - - if (event->attr.exclude_host) - arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - else if (event->attr.exclude_guest) - arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - } - - *nr = x86_pmu.num_counters; - return arr; -} - -static void core_pmu_enable_event(struct perf_event *event) -{ - if (!event->attr.exclude_host) - x86_pmu_enable_event(event); -} - -static void core_pmu_enable_all(int added) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int idx; - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct hw_perf_event *hwc = &cpuc->events[idx]->hw; - - if (!test_bit(idx, cpuc->active_mask) || - cpuc->events[idx]->attr.exclude_host) - continue; - - __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); - } -} - static __initconst const struct x86_pmu core_pmu = { .name = "core", .handle_irq = x86_pmu_handle_irq, .disable_all = x86_pmu_disable_all, - .enable_all = core_pmu_enable_all, - .enable = core_pmu_enable_event, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, .disable = x86_pmu_disable_event, .hw_config = x86_pmu_hw_config, .schedule_events = x86_schedule_events, @@ -1408,7 +1325,6 @@ static __initconst const struct x86_pmu core_pmu = { .get_event_constraints = intel_get_event_constraints, .put_event_constraints = intel_put_event_constraints, .event_constraints = intel_core_event_constraints, - .guest_get_msrs = core_guest_get_msrs, }; struct intel_shared_regs *allocate_shared_regs(int cpu) @@ -1515,7 +1431,6 @@ static __initconst const struct x86_pmu intel_pmu = { .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, - .guest_get_msrs = intel_guest_get_msrs, }; static void intel_clovertown_quirks(void) diff --git a/trunk/arch/x86/kernel/crash.c b/trunk/arch/x86/kernel/crash.c index 13ad89971d47..764c7c2b1811 100644 --- a/trunk/arch/x86/kernel/crash.c +++ b/trunk/arch/x86/kernel/crash.c @@ -32,12 +32,15 @@ int in_crash_kexec; #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) -static void kdump_nmi_callback(int cpu, struct pt_regs *regs) +static void kdump_nmi_callback(int cpu, struct die_args *args) { + struct pt_regs *regs; #ifdef CONFIG_X86_32 struct pt_regs fixed_regs; #endif + regs = args->regs; + #ifdef CONFIG_X86_32 if (!user_mode_vm(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); diff --git a/trunk/arch/x86/kernel/kgdb.c b/trunk/arch/x86/kernel/kgdb.c index faba5771acad..00354d4919a9 100644 --- a/trunk/arch/x86/kernel/kgdb.c +++ b/trunk/arch/x86/kernel/kgdb.c @@ -511,37 +511,28 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) static int was_in_debug_nmi[NR_CPUS]; -static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs) +static int __kgdb_notify(struct die_args *args, unsigned long cmd) { + struct pt_regs *regs = args->regs; + switch (cmd) { - case NMI_LOCAL: + case DIE_NMI: if (atomic_read(&kgdb_active) != -1) { /* KGDB CPU roundup */ kgdb_nmicallback(raw_smp_processor_id(), regs); was_in_debug_nmi[raw_smp_processor_id()] = 1; touch_nmi_watchdog(); - return NMI_HANDLED; + return NOTIFY_STOP; } - break; + return NOTIFY_DONE; - case NMI_UNKNOWN: + case DIE_NMIUNKNOWN: if (was_in_debug_nmi[raw_smp_processor_id()]) { was_in_debug_nmi[raw_smp_processor_id()] = 0; - return NMI_HANDLED; + return NOTIFY_STOP; } - break; - default: - /* do nothing */ - break; - } - return NMI_DONE; -} - -static int __kgdb_notify(struct die_args *args, unsigned long cmd) -{ - struct pt_regs *regs = args->regs; + return NOTIFY_DONE; - switch (cmd) { case DIE_DEBUG: if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { if (user_mode(regs)) @@ -599,6 +590,11 @@ kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) static struct notifier_block kgdb_notifier = { .notifier_call = kgdb_notify, + + /* + * Lowest-prio notifier priority, we want to be notified last: + */ + .priority = NMI_LOCAL_LOW_PRIOR, }; /** @@ -609,31 +605,7 @@ static struct notifier_block kgdb_notifier = { */ int kgdb_arch_init(void) { - int retval; - - retval = register_die_notifier(&kgdb_notifier); - if (retval) - goto out; - - retval = register_nmi_handler(NMI_LOCAL, kgdb_nmi_handler, - 0, "kgdb"); - if (retval) - goto out1; - - retval = register_nmi_handler(NMI_UNKNOWN, kgdb_nmi_handler, - 0, "kgdb"); - - if (retval) - goto out2; - - return retval; - -out2: - unregister_nmi_handler(NMI_LOCAL, "kgdb"); -out1: - unregister_die_notifier(&kgdb_notifier); -out: - return retval; + return register_die_notifier(&kgdb_notifier); } static void kgdb_hw_overflow_handler(struct perf_event *event, @@ -701,8 +673,6 @@ void kgdb_arch_exit(void) breakinfo[i].pev = NULL; } } - unregister_nmi_handler(NMI_UNKNOWN, "kgdb"); - unregister_nmi_handler(NMI_LOCAL, "kgdb"); unregister_die_notifier(&kgdb_notifier); } diff --git a/trunk/arch/x86/kernel/nmi.c b/trunk/arch/x86/kernel/nmi.c deleted file mode 100644 index 7ec5bd140b87..000000000000 --- a/trunk/arch/x86/kernel/nmi.c +++ /dev/null @@ -1,433 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs - * Copyright (C) 2011 Don Zickus Red Hat, Inc. - * - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - */ - -/* - * Handle hardware traps and faults. - */ -#include -#include -#include -#include -#include -#include -#include - -#include - -#if defined(CONFIG_EDAC) -#include -#endif - -#include -#include -#include -#include - -#define NMI_MAX_NAMELEN 16 -struct nmiaction { - struct list_head list; - nmi_handler_t handler; - unsigned int flags; - char *name; -}; - -struct nmi_desc { - spinlock_t lock; - struct list_head head; -}; - -static struct nmi_desc nmi_desc[NMI_MAX] = -{ - { - .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock), - .head = LIST_HEAD_INIT(nmi_desc[0].head), - }, - { - .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), - .head = LIST_HEAD_INIT(nmi_desc[1].head), - }, - -}; - -struct nmi_stats { - unsigned int normal; - unsigned int unknown; - unsigned int external; - unsigned int swallow; -}; - -static DEFINE_PER_CPU(struct nmi_stats, nmi_stats); - -static int ignore_nmis; - -int unknown_nmi_panic; -/* - * Prevent NMI reason port (0x61) being accessed simultaneously, can - * only be used in NMI handler. - */ -static DEFINE_RAW_SPINLOCK(nmi_reason_lock); - -static int __init setup_unknown_nmi_panic(char *str) -{ - unknown_nmi_panic = 1; - return 1; -} -__setup("unknown_nmi_panic", setup_unknown_nmi_panic); - -#define nmi_to_desc(type) (&nmi_desc[type]) - -static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) -{ - struct nmi_desc *desc = nmi_to_desc(type); - struct nmiaction *a; - int handled=0; - - rcu_read_lock(); - - /* - * NMIs are edge-triggered, which means if you have enough - * of them concurrently, you can lose some because only one - * can be latched at any given time. Walk the whole list - * to handle those situations. - */ - list_for_each_entry_rcu(a, &desc->head, list) - handled += a->handler(type, regs); - - rcu_read_unlock(); - - /* return total number of NMI events handled */ - return handled; -} - -static int __setup_nmi(unsigned int type, struct nmiaction *action) -{ - struct nmi_desc *desc = nmi_to_desc(type); - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - - /* - * most handlers of type NMI_UNKNOWN never return because - * they just assume the NMI is theirs. Just a sanity check - * to manage expectations - */ - WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); - - /* - * some handlers need to be executed first otherwise a fake - * event confuses some handlers (kdump uses this flag) - */ - if (action->flags & NMI_FLAG_FIRST) - list_add_rcu(&action->list, &desc->head); - else - list_add_tail_rcu(&action->list, &desc->head); - - spin_unlock_irqrestore(&desc->lock, flags); - return 0; -} - -static struct nmiaction *__free_nmi(unsigned int type, const char *name) -{ - struct nmi_desc *desc = nmi_to_desc(type); - struct nmiaction *n; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - - list_for_each_entry_rcu(n, &desc->head, list) { - /* - * the name passed in to describe the nmi handler - * is used as the lookup key - */ - if (!strcmp(n->name, name)) { - WARN(in_nmi(), - "Trying to free NMI (%s) from NMI context!\n", n->name); - list_del_rcu(&n->list); - break; - } - } - - spin_unlock_irqrestore(&desc->lock, flags); - synchronize_rcu(); - return (n); -} - -int register_nmi_handler(unsigned int type, nmi_handler_t handler, - unsigned long nmiflags, const char *devname) -{ - struct nmiaction *action; - int retval = -ENOMEM; - - if (!handler) - return -EINVAL; - - action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL); - if (!action) - goto fail_action; - - action->handler = handler; - action->flags = nmiflags; - action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL); - if (!action->name) - goto fail_action_name; - - retval = __setup_nmi(type, action); - - if (retval) - goto fail_setup_nmi; - - return retval; - -fail_setup_nmi: - kfree(action->name); -fail_action_name: - kfree(action); -fail_action: - - return retval; -} -EXPORT_SYMBOL_GPL(register_nmi_handler); - -void unregister_nmi_handler(unsigned int type, const char *name) -{ - struct nmiaction *a; - - a = __free_nmi(type, name); - if (a) { - kfree(a->name); - kfree(a); - } -} - -EXPORT_SYMBOL_GPL(unregister_nmi_handler); - -static notrace __kprobes void -pci_serr_error(unsigned char reason, struct pt_regs *regs) -{ - pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - /* - * On some machines, PCI SERR line is used to report memory - * errors. EDAC makes use of it. - */ -#if defined(CONFIG_EDAC) - if (edac_handler_set()) { - edac_atomic_assert_error(); - return; - } -#endif - - if (panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); - - pr_emerg("Dazed and confused, but trying to continue\n"); - - /* Clear and disable the PCI SERR error line. */ - reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; - outb(reason, NMI_REASON_PORT); -} - -static notrace __kprobes void -io_check_error(unsigned char reason, struct pt_regs *regs) -{ - unsigned long i; - - pr_emerg( - "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", - reason, smp_processor_id()); - show_registers(regs); - - if (panic_on_io_nmi) - panic("NMI IOCK error: Not continuing"); - - /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; - outb(reason, NMI_REASON_PORT); - - i = 20000; - while (--i) { - touch_nmi_watchdog(); - udelay(100); - } - - reason &= ~NMI_REASON_CLEAR_IOCHK; - outb(reason, NMI_REASON_PORT); -} - -static notrace __kprobes void -unknown_nmi_error(unsigned char reason, struct pt_regs *regs) -{ - int handled; - - /* - * Use 'false' as back-to-back NMIs are dealt with one level up. - * Of course this makes having multiple 'unknown' handlers useless - * as only the first one is ever run (unless it can actually determine - * if it caused the NMI) - */ - handled = nmi_handle(NMI_UNKNOWN, regs, false); - if (handled) { - __this_cpu_add(nmi_stats.unknown, handled); - return; - } - - __this_cpu_add(nmi_stats.unknown, 1); - -#ifdef CONFIG_MCA - /* - * Might actually be able to figure out what the guilty party - * is: - */ - if (MCA_bus) { - mca_handle_nmi(); - return; - } -#endif - pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - pr_emerg("Do you have a strange power saving mode enabled?\n"); - if (unknown_nmi_panic || panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); - - pr_emerg("Dazed and confused, but trying to continue\n"); -} - -static DEFINE_PER_CPU(bool, swallow_nmi); -static DEFINE_PER_CPU(unsigned long, last_nmi_rip); - -static notrace __kprobes void default_do_nmi(struct pt_regs *regs) -{ - unsigned char reason = 0; - int handled; - bool b2b = false; - - /* - * CPU-specific NMI must be processed before non-CPU-specific - * NMI, otherwise we may lose it, because the CPU-specific - * NMI can not be detected/processed on other CPUs. - */ - - /* - * Back-to-back NMIs are interesting because they can either - * be two NMI or more than two NMIs (any thing over two is dropped - * due to NMI being edge-triggered). If this is the second half - * of the back-to-back NMI, assume we dropped things and process - * more handlers. Otherwise reset the 'swallow' NMI behaviour - */ - if (regs->ip == __this_cpu_read(last_nmi_rip)) - b2b = true; - else - __this_cpu_write(swallow_nmi, false); - - __this_cpu_write(last_nmi_rip, regs->ip); - - handled = nmi_handle(NMI_LOCAL, regs, b2b); - __this_cpu_add(nmi_stats.normal, handled); - if (handled) { - /* - * There are cases when a NMI handler handles multiple - * events in the current NMI. One of these events may - * be queued for in the next NMI. Because the event is - * already handled, the next NMI will result in an unknown - * NMI. Instead lets flag this for a potential NMI to - * swallow. - */ - if (handled > 1) - __this_cpu_write(swallow_nmi, true); - return; - } - - /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ - raw_spin_lock(&nmi_reason_lock); - reason = get_nmi_reason(); - - if (reason & NMI_REASON_MASK) { - if (reason & NMI_REASON_SERR) - pci_serr_error(reason, regs); - else if (reason & NMI_REASON_IOCHK) - io_check_error(reason, regs); -#ifdef CONFIG_X86_32 - /* - * Reassert NMI in case it became active - * meanwhile as it's edge-triggered: - */ - reassert_nmi(); -#endif - __this_cpu_add(nmi_stats.external, 1); - raw_spin_unlock(&nmi_reason_lock); - return; - } - raw_spin_unlock(&nmi_reason_lock); - - /* - * Only one NMI can be latched at a time. To handle - * this we may process multiple nmi handlers at once to - * cover the case where an NMI is dropped. The downside - * to this approach is we may process an NMI prematurely, - * while its real NMI is sitting latched. This will cause - * an unknown NMI on the next run of the NMI processing. - * - * We tried to flag that condition above, by setting the - * swallow_nmi flag when we process more than one event. - * This condition is also only present on the second half - * of a back-to-back NMI, so we flag that condition too. - * - * If both are true, we assume we already processed this - * NMI previously and we swallow it. Otherwise we reset - * the logic. - * - * There are scenarios where we may accidentally swallow - * a 'real' unknown NMI. For example, while processing - * a perf NMI another perf NMI comes in along with a - * 'real' unknown NMI. These two NMIs get combined into - * one (as descibed above). When the next NMI gets - * processed, it will be flagged by perf as handled, but - * noone will know that there was a 'real' unknown NMI sent - * also. As a result it gets swallowed. Or if the first - * perf NMI returns two events handled then the second - * NMI will get eaten by the logic below, again losing a - * 'real' unknown NMI. But this is the best we can do - * for now. - */ - if (b2b && __this_cpu_read(swallow_nmi)) - __this_cpu_add(nmi_stats.swallow, 1); - else - unknown_nmi_error(reason, regs); -} - -dotraplinkage notrace __kprobes void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_enter(); - - inc_irq_stat(__nmi_count); - - if (!ignore_nmis) - default_do_nmi(regs); - - nmi_exit(); -} - -void stop_nmi(void) -{ - ignore_nmis++; -} - -void restart_nmi(void) -{ - ignore_nmis--; -} - -/* reset the back-to-back NMI logic */ -void local_touch_nmi(void) -{ - __this_cpu_write(last_nmi_rip, 0); -} diff --git a/trunk/arch/x86/kernel/process_32.c b/trunk/arch/x86/kernel/process_32.c index 46ff054ebaaa..7a3b65107a27 100644 --- a/trunk/arch/x86/kernel/process_32.c +++ b/trunk/arch/x86/kernel/process_32.c @@ -57,7 +57,6 @@ #include #include #include -#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -108,7 +107,6 @@ void cpu_idle(void) if (cpu_is_offline(cpu)) play_dead(); - local_touch_nmi(); local_irq_disable(); /* Don't trace irqs off for idle */ stop_critical_timings(); diff --git a/trunk/arch/x86/kernel/process_64.c b/trunk/arch/x86/kernel/process_64.c index 3bd7e6eebf31..f693e44e1bf6 100644 --- a/trunk/arch/x86/kernel/process_64.c +++ b/trunk/arch/x86/kernel/process_64.c @@ -51,7 +51,6 @@ #include #include #include -#include asmlinkage extern void ret_from_fork(void); @@ -134,7 +133,6 @@ void cpu_idle(void) * from here on, until they go to idle. * Otherwise, idle callbacks can misfire. */ - local_touch_nmi(); local_irq_disable(); enter_idle(); /* Don't trace irqs off for idle */ diff --git a/trunk/arch/x86/kernel/reboot.c b/trunk/arch/x86/kernel/reboot.c index e334be1182b9..9242436e9937 100644 --- a/trunk/arch/x86/kernel/reboot.c +++ b/trunk/arch/x86/kernel/reboot.c @@ -464,7 +464,7 @@ static inline void kb_wait(void) } } -static void vmxoff_nmi(int cpu, struct pt_regs *regs) +static void vmxoff_nmi(int cpu, struct die_args *args) { cpu_emergency_vmxoff(); } @@ -736,10 +736,14 @@ static nmi_shootdown_cb shootdown_callback; static atomic_t waiting_for_crash_ipi; -static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) +static int crash_nmi_callback(struct notifier_block *self, + unsigned long val, void *data) { int cpu; + if (val != DIE_NMI) + return NOTIFY_OK; + cpu = raw_smp_processor_id(); /* Don't do anything if this handler is invoked on crashing cpu. @@ -747,10 +751,10 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) * an NMI if system was initially booted with nmi_watchdog parameter. */ if (cpu == crashing_cpu) - return NMI_HANDLED; + return NOTIFY_STOP; local_irq_disable(); - shootdown_callback(cpu, regs); + shootdown_callback(cpu, (struct die_args *)data); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ @@ -758,7 +762,7 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) for (;;) cpu_relax(); - return NMI_HANDLED; + return 1; } static void smp_send_nmi_allbutself(void) @@ -766,6 +770,12 @@ static void smp_send_nmi_allbutself(void) apic->send_IPI_allbutself(NMI_VECTOR); } +static struct notifier_block crash_nmi_nb = { + .notifier_call = crash_nmi_callback, + /* we want to be the first one called */ + .priority = NMI_LOCAL_HIGH_PRIOR+1, +}; + /* Halt all other CPUs, calling the specified function on each of them * * This function can be used to halt all other CPUs on crash @@ -784,8 +794,7 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); /* Would it be better to replace the trap vector here? */ - if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, - NMI_FLAG_FIRST, "crash")) + if (register_die_notifier(&crash_nmi_nb)) return; /* return what? */ /* Ensure the new callback function is set before sending * out the NMI diff --git a/trunk/arch/x86/kernel/traps.c b/trunk/arch/x86/kernel/traps.c index a8e3eb83466c..6913369c234c 100644 --- a/trunk/arch/x86/kernel/traps.c +++ b/trunk/arch/x86/kernel/traps.c @@ -81,6 +81,15 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, }; DECLARE_BITMAP(used_vectors, NR_VECTORS); EXPORT_SYMBOL_GPL(used_vectors); +static int ignore_nmis; + +int unknown_nmi_panic; +/* + * Prevent NMI reason port (0x61) being accessed simultaneously, can + * only be used in NMI handler. + */ +static DEFINE_RAW_SPINLOCK(nmi_reason_lock); + static inline void conditional_sti(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -298,6 +307,152 @@ do_general_protection(struct pt_regs *regs, long error_code) die("general protection fault", regs, error_code); } +static int __init setup_unknown_nmi_panic(char *str) +{ + unknown_nmi_panic = 1; + return 1; +} +__setup("unknown_nmi_panic", setup_unknown_nmi_panic); + +static notrace __kprobes void +pci_serr_error(unsigned char reason, struct pt_regs *regs) +{ + pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); + + /* + * On some machines, PCI SERR line is used to report memory + * errors. EDAC makes use of it. + */ +#if defined(CONFIG_EDAC) + if (edac_handler_set()) { + edac_atomic_assert_error(); + return; + } +#endif + + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + + pr_emerg("Dazed and confused, but trying to continue\n"); + + /* Clear and disable the PCI SERR error line. */ + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; + outb(reason, NMI_REASON_PORT); +} + +static notrace __kprobes void +io_check_error(unsigned char reason, struct pt_regs *regs) +{ + unsigned long i; + + pr_emerg( + "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); + show_registers(regs); + + if (panic_on_io_nmi) + panic("NMI IOCK error: Not continuing"); + + /* Re-enable the IOCK line, wait for a few seconds */ + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; + outb(reason, NMI_REASON_PORT); + + i = 20000; + while (--i) { + touch_nmi_watchdog(); + udelay(100); + } + + reason &= ~NMI_REASON_CLEAR_IOCHK; + outb(reason, NMI_REASON_PORT); +} + +static notrace __kprobes void +unknown_nmi_error(unsigned char reason, struct pt_regs *regs) +{ + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == + NOTIFY_STOP) + return; +#ifdef CONFIG_MCA + /* + * Might actually be able to figure out what the guilty party + * is: + */ + if (MCA_bus) { + mca_handle_nmi(); + return; + } +#endif + pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); + + pr_emerg("Do you have a strange power saving mode enabled?\n"); + if (unknown_nmi_panic || panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + + pr_emerg("Dazed and confused, but trying to continue\n"); +} + +static notrace __kprobes void default_do_nmi(struct pt_regs *regs) +{ + unsigned char reason = 0; + + /* + * CPU-specific NMI must be processed before non-CPU-specific + * NMI, otherwise we may lose it, because the CPU-specific + * NMI can not be detected/processed on other CPUs. + */ + if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; + + /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ + raw_spin_lock(&nmi_reason_lock); + reason = get_nmi_reason(); + + if (reason & NMI_REASON_MASK) { + if (reason & NMI_REASON_SERR) + pci_serr_error(reason, regs); + else if (reason & NMI_REASON_IOCHK) + io_check_error(reason, regs); +#ifdef CONFIG_X86_32 + /* + * Reassert NMI in case it became active + * meanwhile as it's edge-triggered: + */ + reassert_nmi(); +#endif + raw_spin_unlock(&nmi_reason_lock); + return; + } + raw_spin_unlock(&nmi_reason_lock); + + unknown_nmi_error(reason, regs); +} + +dotraplinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) +{ + nmi_enter(); + + inc_irq_stat(__nmi_count); + + if (!ignore_nmis) + default_do_nmi(regs); + + nmi_exit(); +} + +void stop_nmi(void) +{ + ignore_nmis++; +} + +void restart_nmi(void) +{ + ignore_nmis--; +} + /* May run on IST stack. */ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) { diff --git a/trunk/arch/x86/oprofile/nmi_int.c b/trunk/arch/x86/oprofile/nmi_int.c index c04dc145a4b7..68894fdc034b 100644 --- a/trunk/arch/x86/oprofile/nmi_int.c +++ b/trunk/arch/x86/oprofile/nmi_int.c @@ -61,15 +61,26 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, } -static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs) +static int profile_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { - if (ctr_running) - model->check_ctrs(regs, &__get_cpu_var(cpu_msrs)); - else if (!nmi_enabled) - return NMI_DONE; - else - model->stop(&__get_cpu_var(cpu_msrs)); - return NMI_HANDLED; + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + switch (val) { + case DIE_NMI: + if (ctr_running) + model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); + else if (!nmi_enabled) + break; + else + model->stop(&__get_cpu_var(cpu_msrs)); + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; } static void nmi_cpu_save_registers(struct op_msrs *msrs) @@ -352,6 +363,12 @@ static void nmi_cpu_setup(void *dummy) apic_write(APIC_LVTPC, APIC_DM_NMI); } +static struct notifier_block profile_exceptions_nb = { + .notifier_call = profile_exceptions_notify, + .next = NULL, + .priority = NMI_LOCAL_LOW_PRIOR, +}; + static void nmi_cpu_restore_registers(struct op_msrs *msrs) { struct op_msr *counters = msrs->counters; @@ -385,6 +402,8 @@ static void nmi_cpu_shutdown(void *dummy) apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); apic_write(APIC_LVTERR, v); nmi_cpu_restore_registers(msrs); + if (model->cpu_down) + model->cpu_down(); } static void nmi_cpu_up(void *dummy) @@ -489,8 +508,7 @@ static int nmi_setup(void) ctr_running = 0; /* make variables visible to the nmi handler: */ smp_mb(); - err = register_nmi_handler(NMI_LOCAL, profile_exceptions_notify, - 0, "oprofile"); + err = register_die_notifier(&profile_exceptions_nb); if (err) goto fail; @@ -520,7 +538,7 @@ static void nmi_shutdown(void) put_online_cpus(); /* make variables visible to the nmi handler: */ smp_mb(); - unregister_nmi_handler(NMI_LOCAL, "oprofile"); + unregister_die_notifier(&profile_exceptions_nb); msrs = &get_cpu_var(cpu_msrs); model->shutdown(msrs); free_msrs(); diff --git a/trunk/arch/x86/oprofile/nmi_timer_int.c b/trunk/arch/x86/oprofile/nmi_timer_int.c index 7f8052cd6620..720bf5a53c51 100644 --- a/trunk/arch/x86/oprofile/nmi_timer_int.c +++ b/trunk/arch/x86/oprofile/nmi_timer_int.c @@ -18,16 +18,32 @@ #include #include -static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs) +static int profile_timer_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { - oprofile_add_sample(regs, 0); - return NMI_HANDLED; + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + switch (val) { + case DIE_NMI: + oprofile_add_sample(args->regs, 0); + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; } +static struct notifier_block profile_timer_exceptions_nb = { + .notifier_call = profile_timer_exceptions_notify, + .next = NULL, + .priority = NMI_LOW_PRIOR, +}; + static int timer_start(void) { - if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify, - 0, "oprofile-timer")) + if (register_die_notifier(&profile_timer_exceptions_nb)) return 1; return 0; } @@ -35,7 +51,7 @@ static int timer_start(void) static void timer_stop(void) { - unregister_nmi_handler(NMI_LOCAL, "oprofile-timer"); + unregister_die_notifier(&profile_timer_exceptions_nb); synchronize_sched(); /* Allow already-started NMIs to complete. */ } diff --git a/trunk/arch/x86/oprofile/op_model_amd.c b/trunk/arch/x86/oprofile/op_model_amd.c index 303f08637826..9cbb710dc94b 100644 --- a/trunk/arch/x86/oprofile/op_model_amd.c +++ b/trunk/arch/x86/oprofile/op_model_amd.c @@ -29,6 +29,8 @@ #include "op_x86_model.h" #include "op_counter.h" +#define NUM_COUNTERS 4 +#define NUM_COUNTERS_F15H 6 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX #define NUM_VIRT_COUNTERS 32 #else @@ -67,6 +69,35 @@ struct ibs_state { static struct ibs_config ibs_config; static struct ibs_state ibs_state; +/* + * IBS cpuid feature detection + */ + +#define IBS_CPUID_FEATURES 0x8000001b + +/* + * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but + * bit 0 is used to indicate the existence of IBS. + */ +#define IBS_CAPS_AVAIL (1U<<0) +#define IBS_CAPS_FETCHSAM (1U<<1) +#define IBS_CAPS_OPSAM (1U<<2) +#define IBS_CAPS_RDWROPCNT (1U<<3) +#define IBS_CAPS_OPCNT (1U<<4) +#define IBS_CAPS_BRNTRGT (1U<<5) +#define IBS_CAPS_OPCNTEXT (1U<<6) + +#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ + | IBS_CAPS_FETCHSAM \ + | IBS_CAPS_OPSAM) + +/* + * IBS APIC setup + */ +#define IBSCTL 0x1cc +#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) +#define IBSCTL_LVT_OFFSET_MASK 0x0F + /* * IBS randomization macros */ @@ -74,6 +105,27 @@ static struct ibs_state ibs_state; #define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1) #define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5)) +static u32 get_ibs_caps(void) +{ + u32 ibs_caps; + unsigned int max_level; + + if (!boot_cpu_has(X86_FEATURE_IBS)) + return 0; + + /* check IBS cpuid feature flags */ + max_level = cpuid_eax(0x80000000); + if (max_level < IBS_CPUID_FEATURES) + return IBS_CAPS_DEFAULT; + + ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); + if (!(ibs_caps & IBS_CAPS_AVAIL)) + /* cpuid flags not valid */ + return IBS_CAPS_DEFAULT; + + return ibs_caps; +} + /* * 16-bit Linear Feedback Shift Register (LFSR) * @@ -264,6 +316,81 @@ static void op_amd_stop_ibs(void) wrmsrl(MSR_AMD64_IBSOPCTL, 0); } +static inline int get_eilvt(int offset) +{ + return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); +} + +static inline int put_eilvt(int offset) +{ + return !setup_APIC_eilvt(offset, 0, 0, 1); +} + +static inline int ibs_eilvt_valid(void) +{ + int offset; + u64 val; + int valid = 0; + + preempt_disable(); + + rdmsrl(MSR_AMD64_IBSCTL, val); + offset = val & IBSCTL_LVT_OFFSET_MASK; + + if (!(val & IBSCTL_LVT_OFFSET_VALID)) { + pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", + smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); + goto out; + } + + if (!get_eilvt(offset)) { + pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", + smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); + goto out; + } + + valid = 1; +out: + preempt_enable(); + + return valid; +} + +static inline int get_ibs_offset(void) +{ + u64 val; + + rdmsrl(MSR_AMD64_IBSCTL, val); + if (!(val & IBSCTL_LVT_OFFSET_VALID)) + return -EINVAL; + + return val & IBSCTL_LVT_OFFSET_MASK; +} + +static void setup_APIC_ibs(void) +{ + int offset; + + offset = get_ibs_offset(); + if (offset < 0) + goto failed; + + if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) + return; +failed: + pr_warn("oprofile: IBS APIC setup failed on cpu #%d\n", + smp_processor_id()); +} + +static void clear_APIC_ibs(void) +{ + int offset; + + offset = get_ibs_offset(); + if (offset >= 0) + setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); +} + #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, @@ -312,7 +439,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs) goto fail; } /* both registers must be reserved */ - if (num_counters == AMD64_NUM_COUNTERS_F15H) { + if (num_counters == NUM_COUNTERS_F15H) { msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); } else { @@ -377,6 +504,15 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, val |= op_x86_get_ctrl(model, &counter_config[virt]); wrmsrl(msrs->controls[i].addr, val); } + + if (ibs_caps) + setup_APIC_ibs(); +} + +static void op_amd_cpu_shutdown(void) +{ + if (ibs_caps) + clear_APIC_ibs(); } static int op_amd_check_ctrs(struct pt_regs * const regs, @@ -439,6 +575,86 @@ static void op_amd_stop(struct op_msrs const * const msrs) op_amd_stop_ibs(); } +static int setup_ibs_ctl(int ibs_eilvt_off) +{ + struct pci_dev *cpu_cfg; + int nodes; + u32 value = 0; + + nodes = 0; + cpu_cfg = NULL; + do { + cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, + PCI_DEVICE_ID_AMD_10H_NB_MISC, + cpu_cfg); + if (!cpu_cfg) + break; + ++nodes; + pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off + | IBSCTL_LVT_OFFSET_VALID); + pci_read_config_dword(cpu_cfg, IBSCTL, &value); + if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { + pci_dev_put(cpu_cfg); + printk(KERN_DEBUG "Failed to setup IBS LVT offset, " + "IBSCTL = 0x%08x\n", value); + return -EINVAL; + } + } while (1); + + if (!nodes) { + printk(KERN_DEBUG "No CPU node configured for IBS\n"); + return -ENODEV; + } + + return 0; +} + +/* + * This runs only on the current cpu. We try to find an LVT offset and + * setup the local APIC. For this we must disable preemption. On + * success we initialize all nodes with this offset. This updates then + * the offset in the IBS_CTL per-node msr. The per-core APIC setup of + * the IBS interrupt vector is called from op_amd_setup_ctrs()/op_- + * amd_cpu_shutdown() using the new offset. + */ +static int force_ibs_eilvt_setup(void) +{ + int offset; + int ret; + + preempt_disable(); + /* find the next free available EILVT entry, skip offset 0 */ + for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { + if (get_eilvt(offset)) + break; + } + preempt_enable(); + + if (offset == APIC_EILVT_NR_MAX) { + printk(KERN_DEBUG "No EILVT entry available\n"); + return -EBUSY; + } + + ret = setup_ibs_ctl(offset); + if (ret) + goto out; + + if (!ibs_eilvt_valid()) { + ret = -EFAULT; + goto out; + } + + pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); + pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); + + return 0; +out: + preempt_disable(); + put_eilvt(offset); + preempt_enable(); + return ret; +} + /* * check and reserve APIC extended interrupt LVT offset for IBS if * available @@ -451,6 +667,17 @@ static void init_ibs(void) if (!ibs_caps) return; + if (ibs_eilvt_valid()) + goto out; + + if (!force_ibs_eilvt_setup()) + goto out; + + /* Failed to setup ibs */ + ibs_caps = 0; + return; + +out: printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); } @@ -514,9 +741,9 @@ static int op_amd_init(struct oprofile_operations *ops) ops->create_files = setup_ibs_files; if (boot_cpu_data.x86 == 0x15) { - num_counters = AMD64_NUM_COUNTERS_F15H; + num_counters = NUM_COUNTERS_F15H; } else { - num_counters = AMD64_NUM_COUNTERS; + num_counters = NUM_COUNTERS; } op_amd_spec.num_counters = num_counters; @@ -533,6 +760,7 @@ struct op_x86_model_spec op_amd_spec = { .init = op_amd_init, .fill_in_addresses = &op_amd_fill_in_addresses, .setup_ctrs = &op_amd_setup_ctrs, + .cpu_down = &op_amd_cpu_shutdown, .check_ctrs = &op_amd_check_ctrs, .start = &op_amd_start, .stop = &op_amd_stop, diff --git a/trunk/arch/x86/oprofile/op_x86_model.h b/trunk/arch/x86/oprofile/op_x86_model.h index 71e8a67337e2..89017fa1fd63 100644 --- a/trunk/arch/x86/oprofile/op_x86_model.h +++ b/trunk/arch/x86/oprofile/op_x86_model.h @@ -43,6 +43,7 @@ struct op_x86_model_spec { int (*fill_in_addresses)(struct op_msrs * const msrs); void (*setup_ctrs)(struct op_x86_model_spec const *model, struct op_msrs const * const msrs); + void (*cpu_down)(void); int (*check_ctrs)(struct pt_regs * const regs, struct op_msrs const * const msrs); void (*start)(struct op_msrs const * const msrs); diff --git a/trunk/drivers/acpi/apei/ghes.c b/trunk/drivers/acpi/apei/ghes.c index b8e08cb67a18..0784f99a4665 100644 --- a/trunk/drivers/acpi/apei/ghes.c +++ b/trunk/drivers/acpi/apei/ghes.c @@ -50,7 +50,6 @@ #include #include #include -#include #include "apei-internal.h" @@ -750,11 +749,15 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) } } -static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) +static int ghes_notify_nmi(struct notifier_block *this, + unsigned long cmd, void *data) { struct ghes *ghes, *ghes_global = NULL; int sev, sev_global = -1; - int ret = NMI_DONE; + int ret = NOTIFY_DONE; + + if (cmd != DIE_NMI) + return ret; raw_spin_lock(&ghes_nmi_lock); list_for_each_entry_rcu(ghes, &ghes_nmi, list) { @@ -767,10 +770,10 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) sev_global = sev; ghes_global = ghes; } - ret = NMI_HANDLED; + ret = NOTIFY_STOP; } - if (ret == NMI_DONE) + if (ret == NOTIFY_DONE) goto out; if (sev_global >= GHES_SEV_PANIC) { @@ -822,6 +825,10 @@ static struct notifier_block ghes_notifier_sci = { .notifier_call = ghes_notify_sci, }; +static struct notifier_block ghes_notifier_nmi = { + .notifier_call = ghes_notify_nmi, +}; + static unsigned long ghes_esource_prealloc_size( const struct acpi_hest_generic *generic) { @@ -911,8 +918,7 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev) ghes_estatus_pool_expand(len); mutex_lock(&ghes_list_mutex); if (list_empty(&ghes_nmi)) - register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, - "ghes"); + register_die_notifier(&ghes_notifier_nmi); list_add_rcu(&ghes->list, &ghes_nmi); mutex_unlock(&ghes_list_mutex); break; @@ -958,7 +964,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev) mutex_lock(&ghes_list_mutex); list_del_rcu(&ghes->list); if (list_empty(&ghes_nmi)) - unregister_nmi_handler(NMI_LOCAL, "ghes"); + unregister_die_notifier(&ghes_notifier_nmi); mutex_unlock(&ghes_list_mutex); /* * To synchronize with NMI handler, ghes can only be diff --git a/trunk/drivers/char/ipmi/ipmi_watchdog.c b/trunk/drivers/char/ipmi/ipmi_watchdog.c index c2917ffad2c2..3302586655c4 100644 --- a/trunk/drivers/char/ipmi/ipmi_watchdog.c +++ b/trunk/drivers/char/ipmi/ipmi_watchdog.c @@ -65,7 +65,6 @@ * mechanism for it at that time. */ #include -#include #define HAVE_DIE_NMI #endif @@ -1078,8 +1077,17 @@ static void ipmi_unregister_watchdog(int ipmi_intf) #ifdef HAVE_DIE_NMI static int -ipmi_nmi(unsigned int val, struct pt_regs *regs) +ipmi_nmi(struct notifier_block *self, unsigned long val, void *data) { + struct die_args *args = data; + + if (val != DIE_NMIUNKNOWN) + return NOTIFY_OK; + + /* Hack, if it's a memory or I/O error, ignore it. */ + if (args->err & 0xc0) + return NOTIFY_OK; + /* * If we get here, it's an NMI that's not a memory or I/O * error. We can't truly tell if it's from IPMI or not @@ -1089,15 +1097,15 @@ ipmi_nmi(unsigned int val, struct pt_regs *regs) if (testing_nmi) { testing_nmi = 2; - return NMI_HANDLED; + return NOTIFY_STOP; } /* If we are not expecting a timeout, ignore it. */ if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) - return NMI_DONE; + return NOTIFY_OK; if (preaction_val != WDOG_PRETIMEOUT_NMI) - return NMI_DONE; + return NOTIFY_OK; /* * If no one else handled the NMI, we assume it was the IPMI @@ -1112,8 +1120,12 @@ ipmi_nmi(unsigned int val, struct pt_regs *regs) panic(PFX "pre-timeout"); } - return NMI_HANDLED; + return NOTIFY_STOP; } + +static struct notifier_block ipmi_nmi_handler = { + .notifier_call = ipmi_nmi +}; #endif static int wdog_reboot_handler(struct notifier_block *this, @@ -1278,8 +1290,7 @@ static void check_parms(void) } } if (do_nmi && !nmi_handler_registered) { - rv = register_nmi_handler(NMI_UNKNOWN, ipmi_nmi, 0, - "ipmi"); + rv = register_die_notifier(&ipmi_nmi_handler); if (rv) { printk(KERN_WARNING PFX "Can't register nmi handler\n"); @@ -1287,7 +1298,7 @@ static void check_parms(void) } else nmi_handler_registered = 1; } else if (!do_nmi && nmi_handler_registered) { - unregister_nmi_handler(NMI_UNKNOWN, "ipmi"); + unregister_die_notifier(&ipmi_nmi_handler); nmi_handler_registered = 0; } #endif @@ -1325,7 +1336,7 @@ static int __init ipmi_wdog_init(void) if (rv) { #ifdef HAVE_DIE_NMI if (nmi_handler_registered) - unregister_nmi_handler(NMI_UNKNOWN, "ipmi"); + unregister_die_notifier(&ipmi_nmi_handler); #endif atomic_notifier_chain_unregister(&panic_notifier_list, &wdog_panic_notifier); @@ -1346,7 +1357,7 @@ static void __exit ipmi_wdog_exit(void) #ifdef HAVE_DIE_NMI if (nmi_handler_registered) - unregister_nmi_handler(NMI_UNKNOWN, "ipmi"); + unregister_die_notifier(&ipmi_nmi_handler); #endif atomic_notifier_chain_unregister(&panic_notifier_list, diff --git a/trunk/drivers/watchdog/hpwdt.c b/trunk/drivers/watchdog/hpwdt.c index 3774c9b8dac9..809cbda03d7a 100644 --- a/trunk/drivers/watchdog/hpwdt.c +++ b/trunk/drivers/watchdog/hpwdt.c @@ -35,7 +35,6 @@ #include #include #endif /* CONFIG_HPWDT_NMI_DECODING */ -#include #define HPWDT_VERSION "1.3.0" #define SECS_TO_TICKS(secs) ((secs) * 1000 / 128) @@ -478,11 +477,15 @@ static int hpwdt_time_left(void) /* * NMI Handler */ -static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs) +static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason, + void *data) { unsigned long rom_pl; static int die_nmi_called; + if (ulReason != DIE_NMIUNKNOWN) + goto out; + if (!hpwdt_nmi_decoding) goto out; @@ -505,7 +508,7 @@ static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs) "Management Log for details.\n"); out: - return NMI_DONE; + return NOTIFY_OK; } #endif /* CONFIG_HPWDT_NMI_DECODING */ @@ -645,6 +648,13 @@ static struct miscdevice hpwdt_miscdev = { .fops = &hpwdt_fops, }; +#ifdef CONFIG_HPWDT_NMI_DECODING +static struct notifier_block die_notifier = { + .notifier_call = hpwdt_pretimeout, + .priority = 0, +}; +#endif /* CONFIG_HPWDT_NMI_DECODING */ + /* * Init & Exit */ @@ -730,9 +740,10 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) * die notify list to handle a critical NMI. The default is to * be last so other users of the NMI signal can function. */ - retval = register_nmi_handler(NMI_UNKNOWN, hpwdt_pretimeout, - (priority) ? NMI_FLAG_FIRST : 0, - "hpwdt"); + if (priority) + die_notifier.priority = 0x7FFFFFFF; + + retval = register_die_notifier(&die_notifier); if (retval != 0) { dev_warn(&dev->dev, "Unable to register a die notifier (err=%d).\n", @@ -752,7 +763,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) static void hpwdt_exit_nmi_decoding(void) { - unregister_nmi_handler(NMI_UNKNOWN, "hpwdt"); + unregister_die_notifier(&die_notifier); if (cru_rom_addr) iounmap(cru_rom_addr); } diff --git a/trunk/include/linux/perf_event.h b/trunk/include/linux/perf_event.h index 1e9ebe5e0091..c816075c01ce 100644 --- a/trunk/include/linux/perf_event.h +++ b/trunk/include/linux/perf_event.h @@ -220,10 +220,7 @@ struct perf_event_attr { mmap_data : 1, /* non-exec mmap data */ sample_id_all : 1, /* sample_type all events */ - exclude_host : 1, /* don't count in host */ - exclude_guest : 1, /* don't count in guest */ - - __reserved_1 : 43; + __reserved_1 : 45; union { __u32 wakeup_events; /* wakeup every n events */ diff --git a/trunk/tools/perf/util/hist.c b/trunk/tools/perf/util/hist.c index 677e1da6bb3e..dd277897ff0b 100644 --- a/trunk/tools/perf/util/hist.c +++ b/trunk/tools/perf/util/hist.c @@ -18,56 +18,56 @@ struct callchain_param callchain_param = { .order = ORDER_CALLEE }; -u16 hists__col_len(struct hists *self, enum hist_column col) +u16 hists__col_len(struct hists *hists, enum hist_column col) { - return self->col_len[col]; + return hists->col_len[col]; } -void hists__set_col_len(struct hists *self, enum hist_column col, u16 len) +void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len) { - self->col_len[col] = len; + hists->col_len[col] = len; } -bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len) +bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len) { - if (len > hists__col_len(self, col)) { - hists__set_col_len(self, col, len); + if (len > hists__col_len(hists, col)) { + hists__set_col_len(hists, col, len); return true; } return false; } -static void hists__reset_col_len(struct hists *self) +static void hists__reset_col_len(struct hists *hists) { enum hist_column col; for (col = 0; col < HISTC_NR_COLS; ++col) - hists__set_col_len(self, col, 0); + hists__set_col_len(hists, col, 0); } -static void hists__calc_col_len(struct hists *self, struct hist_entry *h) +static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) { u16 len; if (h->ms.sym) - hists__new_col_len(self, HISTC_SYMBOL, h->ms.sym->namelen); + hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen); else { const unsigned int unresolved_col_width = BITS_PER_LONG / 4; - if (hists__col_len(self, HISTC_DSO) < unresolved_col_width && + if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width && !symbol_conf.col_width_list_str && !symbol_conf.field_sep && !symbol_conf.dso_list) - hists__set_col_len(self, HISTC_DSO, + hists__set_col_len(hists, HISTC_DSO, unresolved_col_width); } len = thread__comm_len(h->thread); - if (hists__new_col_len(self, HISTC_COMM, len)) - hists__set_col_len(self, HISTC_THREAD, len + 6); + if (hists__new_col_len(hists, HISTC_COMM, len)) + hists__set_col_len(hists, HISTC_THREAD, len + 6); if (h->ms.map) { len = dso__name_len(h->ms.map->dso); - hists__new_col_len(self, HISTC_DSO, len); + hists__new_col_len(hists, HISTC_DSO, len); } } @@ -113,11 +113,11 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template) return self; } -static void hists__inc_nr_entries(struct hists *self, struct hist_entry *h) +static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h) { if (!h->filtered) { - hists__calc_col_len(self, h); - ++self->nr_entries; + hists__calc_col_len(hists, h); + ++hists->nr_entries; } } @@ -128,11 +128,11 @@ static u8 symbol__parent_filter(const struct symbol *parent) return 0; } -struct hist_entry *__hists__add_entry(struct hists *self, +struct hist_entry *__hists__add_entry(struct hists *hists, struct addr_location *al, struct symbol *sym_parent, u64 period) { - struct rb_node **p = &self->entries.rb_node; + struct rb_node **p = &hists->entries.rb_node; struct rb_node *parent = NULL; struct hist_entry *he; struct hist_entry entry = { @@ -172,8 +172,8 @@ struct hist_entry *__hists__add_entry(struct hists *self, if (!he) return NULL; rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &self->entries); - hists__inc_nr_entries(self, he); + rb_insert_color(&he->rb_node, &hists->entries); + hists__inc_nr_entries(hists, he); out: hist_entry__add_cpumode_period(he, al->cpumode, period); return he; @@ -222,7 +222,7 @@ void hist_entry__free(struct hist_entry *he) * collapse the histogram */ -static bool hists__collapse_insert_entry(struct hists *self, +static bool hists__collapse_insert_entry(struct hists *hists, struct rb_root *root, struct hist_entry *he) { @@ -240,8 +240,8 @@ static bool hists__collapse_insert_entry(struct hists *self, if (!cmp) { iter->period += he->period; if (symbol_conf.use_callchain) { - callchain_cursor_reset(&self->callchain_cursor); - callchain_merge(&self->callchain_cursor, iter->callchain, + callchain_cursor_reset(&hists->callchain_cursor); + callchain_merge(&hists->callchain_cursor, iter->callchain, he->callchain); } hist_entry__free(he); @@ -259,7 +259,7 @@ static bool hists__collapse_insert_entry(struct hists *self, return true; } -void hists__collapse_resort(struct hists *self) +void hists__collapse_resort(struct hists *hists) { struct rb_root tmp; struct rb_node *next; @@ -269,20 +269,20 @@ void hists__collapse_resort(struct hists *self) return; tmp = RB_ROOT; - next = rb_first(&self->entries); - self->nr_entries = 0; - hists__reset_col_len(self); + next = rb_first(&hists->entries); + hists->nr_entries = 0; + hists__reset_col_len(hists); while (next) { n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - rb_erase(&n->rb_node, &self->entries); - if (hists__collapse_insert_entry(self, &tmp, n)) - hists__inc_nr_entries(self, n); + rb_erase(&n->rb_node, &hists->entries); + if (hists__collapse_insert_entry(hists, &tmp, n)) + hists__inc_nr_entries(hists, n); } - self->entries = tmp; + hists->entries = tmp; } /* @@ -315,31 +315,31 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_insert_color(&he->rb_node, entries); } -void hists__output_resort(struct hists *self) +void hists__output_resort(struct hists *hists) { struct rb_root tmp; struct rb_node *next; struct hist_entry *n; u64 min_callchain_hits; - min_callchain_hits = self->stats.total_period * (callchain_param.min_percent / 100); + min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); tmp = RB_ROOT; - next = rb_first(&self->entries); + next = rb_first(&hists->entries); - self->nr_entries = 0; - hists__reset_col_len(self); + hists->nr_entries = 0; + hists__reset_col_len(hists); while (next) { n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - rb_erase(&n->rb_node, &self->entries); + rb_erase(&n->rb_node, &hists->entries); __hists__insert_output_entry(&tmp, n, min_callchain_hits); - hists__inc_nr_entries(self, n); + hists__inc_nr_entries(hists, n); } - self->entries = tmp; + hists->entries = tmp; } static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) @@ -738,7 +738,7 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self, left_margin); } -size_t hists__fprintf(struct hists *self, struct hists *pair, +size_t hists__fprintf(struct hists *hists, struct hists *pair, bool show_displacement, FILE *fp) { struct sort_entry *se; @@ -803,15 +803,15 @@ size_t hists__fprintf(struct hists *self, struct hists *pair, width = strlen(se->se_header); if (symbol_conf.col_width_list_str) { if (col_width) { - hists__set_col_len(self, se->se_width_idx, + hists__set_col_len(hists, se->se_width_idx, atoi(col_width)); col_width = strchr(col_width, ','); if (col_width) ++col_width; } } - if (!hists__new_col_len(self, se->se_width_idx, width)) - width = hists__col_len(self, se->se_width_idx); + if (!hists__new_col_len(hists, se->se_width_idx, width)) + width = hists__col_len(hists, se->se_width_idx); fprintf(fp, " %*s", width, se->se_header); } fprintf(fp, "\n"); @@ -834,7 +834,7 @@ size_t hists__fprintf(struct hists *self, struct hists *pair, continue; fprintf(fp, " "); - width = hists__col_len(self, se->se_width_idx); + width = hists__col_len(hists, se->se_width_idx); if (width == 0) width = strlen(se->se_header); for (i = 0; i < width; i++) @@ -844,7 +844,7 @@ size_t hists__fprintf(struct hists *self, struct hists *pair, fprintf(fp, "\n#\n"); print_entries: - for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); if (h->filtered) @@ -858,12 +858,12 @@ size_t hists__fprintf(struct hists *self, struct hists *pair, displacement = 0; ++position; } - ret += hist_entry__fprintf(h, self, pair, show_displacement, - displacement, fp, self->stats.total_period); + ret += hist_entry__fprintf(h, hists, pair, show_displacement, + displacement, fp, hists->stats.total_period); if (symbol_conf.use_callchain) - ret += hist_entry__fprintf_callchain(h, self, fp, - self->stats.total_period); + ret += hist_entry__fprintf_callchain(h, hists, fp, + hists->stats.total_period); if (h->ms.map == NULL && verbose > 1) { __map_groups__fprintf_maps(&h->thread->mg, MAP__FUNCTION, verbose, fp); @@ -879,7 +879,7 @@ size_t hists__fprintf(struct hists *self, struct hists *pair, /* * See hists__fprintf to match the column widths */ -unsigned int hists__sort_list_width(struct hists *self) +unsigned int hists__sort_list_width(struct hists *hists) { struct sort_entry *se; int ret = 9; /* total % */ @@ -898,7 +898,7 @@ unsigned int hists__sort_list_width(struct hists *self) list_for_each_entry(se, &hist_entry__sort_list, list) if (!se->elide) - ret += 2 + hists__col_len(self, se->se_width_idx); + ret += 2 + hists__col_len(hists, se->se_width_idx); if (verbose) /* Addr + origin */ ret += 3 + BITS_PER_LONG / 4; @@ -906,32 +906,32 @@ unsigned int hists__sort_list_width(struct hists *self) return ret; } -static void hists__remove_entry_filter(struct hists *self, struct hist_entry *h, +static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, enum hist_filter filter) { h->filtered &= ~(1 << filter); if (h->filtered) return; - ++self->nr_entries; + ++hists->nr_entries; if (h->ms.unfolded) - self->nr_entries += h->nr_rows; + hists->nr_entries += h->nr_rows; h->row_offset = 0; - self->stats.total_period += h->period; - self->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events; + hists->stats.total_period += h->period; + hists->stats.nr_events[PERF_RECORD_SAMPLE] += h->nr_events; - hists__calc_col_len(self, h); + hists__calc_col_len(hists, h); } -void hists__filter_by_dso(struct hists *self, const struct dso *dso) +void hists__filter_by_dso(struct hists *hists, const struct dso *dso) { struct rb_node *nd; - self->nr_entries = self->stats.total_period = 0; - self->stats.nr_events[PERF_RECORD_SAMPLE] = 0; - hists__reset_col_len(self); + hists->nr_entries = hists->stats.total_period = 0; + hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0; + hists__reset_col_len(hists); - for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); if (symbol_conf.exclude_other && !h->parent) @@ -942,19 +942,19 @@ void hists__filter_by_dso(struct hists *self, const struct dso *dso) continue; } - hists__remove_entry_filter(self, h, HIST_FILTER__DSO); + hists__remove_entry_filter(hists, h, HIST_FILTER__DSO); } } -void hists__filter_by_thread(struct hists *self, const struct thread *thread) +void hists__filter_by_thread(struct hists *hists, const struct thread *thread) { struct rb_node *nd; - self->nr_entries = self->stats.total_period = 0; - self->stats.nr_events[PERF_RECORD_SAMPLE] = 0; - hists__reset_col_len(self); + hists->nr_entries = hists->stats.total_period = 0; + hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0; + hists__reset_col_len(hists); - for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); if (thread != NULL && h->thread != thread) { @@ -962,7 +962,7 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread) continue; } - hists__remove_entry_filter(self, h, HIST_FILTER__THREAD); + hists__remove_entry_filter(hists, h, HIST_FILTER__THREAD); } } @@ -976,13 +976,13 @@ int hist_entry__annotate(struct hist_entry *he, size_t privsize) return symbol__annotate(he->ms.sym, he->ms.map, privsize); } -void hists__inc_nr_events(struct hists *self, u32 type) +void hists__inc_nr_events(struct hists *hists, u32 type) { - ++self->stats.nr_events[0]; - ++self->stats.nr_events[type]; + ++hists->stats.nr_events[0]; + ++hists->stats.nr_events[type]; } -size_t hists__fprintf_nr_events(struct hists *self, FILE *fp) +size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) { int i; size_t ret = 0; @@ -990,7 +990,7 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp) for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { const char *name; - if (self->stats.nr_events[i] == 0) + if (hists->stats.nr_events[i] == 0) continue; name = perf_event__name(i); @@ -998,7 +998,7 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp) continue; ret += fprintf(fp, "%16s events: %10d\n", name, - self->stats.nr_events[i]); + hists->stats.nr_events[i]); } return ret;