From 08c5342646c634951b634d1cc89c9a9844206421 Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Wed, 7 Dec 2011 09:21:37 -0800 Subject: [PATCH] --- yaml --- r: 277491 b: refs/heads/master c: 2c29d9dd577b74b44e580f957ea44d1df73af23a h: refs/heads/master i: 277489: ec818027b8bb67aec953f0624c48ed74141889e1 277487: 406881da913dccbc93c6d8fd27012ddafba6632c v: v3 --- [refs] | 2 +- trunk/arch/x86/include/asm/mce.h | 12 ++-- trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c | 34 ++++++++-- trunk/arch/x86/kernel/cpu/mcheck/mce.c | 64 ++----------------- .../arch/x86/kernel/cpu/mcheck/therm_throt.c | 29 +++++++-- trunk/drivers/edac/i7core_edac.c | 4 +- trunk/drivers/edac/mce_amd.c | 4 +- trunk/drivers/edac/sb_edac.c | 6 +- 8 files changed, 74 insertions(+), 81 deletions(-) diff --git a/[refs] b/[refs] index db20e03e7ac0..9373d5e907bf 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 715a43182a20a9e2bae59b4e0826a91ee30f355c +refs/heads/master: 2c29d9dd577b74b44e580f957ea44d1df73af23a diff --git a/trunk/arch/x86/include/asm/mce.h b/trunk/arch/x86/include/asm/mce.h index b7c47a468fde..98165835dc68 100644 --- a/trunk/arch/x86/include/asm/mce.h +++ b/trunk/arch/x86/include/asm/mce.h @@ -50,10 +50,11 @@ #define MCJ_CTX_MASK 3 #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) #define MCJ_CTX_RANDOM 0 /* inject context: random */ -#define MCJ_CTX_PROCESS 1 /* inject context: process */ -#define MCJ_CTX_IRQ 2 /* inject context: IRQ */ -#define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */ -#define MCJ_EXCEPTION 8 /* raise as exception */ +#define MCJ_CTX_PROCESS 0x1 /* inject context: process */ +#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ +#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ +#define MCJ_EXCEPTION 0x8 /* raise as exception */ +#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ /* Fields are zero when not available */ struct mce { @@ -120,8 +121,7 @@ struct mce_log { #ifdef __KERNEL__ -extern void mce_register_decode_chain(struct notifier_block *nb); -extern void mce_unregister_decode_chain(struct notifier_block *nb); +extern struct atomic_notifier_head x86_mce_decoder_chain; #include #include diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c b/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c index 319882ef848d..fc4beb393577 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -92,6 +93,18 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) return NMI_HANDLED; } +static void mce_irq_ipi(void *info) +{ + int cpu = smp_processor_id(); + struct mce *m = &__get_cpu_var(injectm); + + if (cpumask_test_cpu(cpu, mce_inject_cpumask) && + m->inject_flags & MCJ_EXCEPTION) { + cpumask_clear_cpu(cpu, mce_inject_cpumask); + raise_exception(m, NULL); + } +} + /* Inject mce on current CPU */ static int raise_local(void) { @@ -139,9 +152,10 @@ static void raise_mce(struct mce *m) return; #ifdef CONFIG_X86_LOCAL_APIC - if (m->inject_flags & MCJ_NMI_BROADCAST) { + if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) { unsigned long start; int cpu; + get_online_cpus(); cpumask_copy(mce_inject_cpumask, cpu_online_mask); cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); @@ -151,13 +165,25 @@ static void raise_mce(struct mce *m) MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) cpumask_clear_cpu(cpu, mce_inject_cpumask); } - if (!cpumask_empty(mce_inject_cpumask)) - apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR); + if (!cpumask_empty(mce_inject_cpumask)) { + if (m->inject_flags & MCJ_IRQ_BRAODCAST) { + /* + * don't wait because mce_irq_ipi is necessary + * to be sync with following raise_local + */ + preempt_disable(); + smp_call_function_many(mce_inject_cpumask, + mce_irq_ipi, NULL, 0); + preempt_enable(); + } else if (m->inject_flags & MCJ_NMI_BROADCAST) + apic->send_IPI_mask(mce_inject_cpumask, + NMI_VECTOR); + } start = jiffies; while (!cpumask_empty(mce_inject_cpumask)) { if (!time_before(jiffies, start + 2*HZ)) { printk(KERN_ERR - "Timeout waiting for mce inject NMI %lx\n", + "Timeout waiting for mce inject %lx\n", *cpumask_bits(mce_inject_cpumask)); break; } diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce.c b/trunk/arch/x86/kernel/cpu/mcheck/mce.c index 5be2464cce6a..2af127d4c3d1 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce.c @@ -95,6 +95,13 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; +/* + * CPU/chipset specific EDAC code can register a notifier call here to print + * MCE errors in a human-readable form. + */ +ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); +EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); + /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL @@ -102,12 +109,6 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { static DEFINE_PER_CPU(struct work_struct, mce_work); -/* - * CPU/chipset specific EDAC code can register a notifier call here to print - * MCE errors in a human-readable form. - */ -ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); - /* Do initial initialization of a struct mce */ void mce_setup(struct mce *m) { @@ -189,57 +190,6 @@ void mce_log(struct mce *mce) set_bit(0, &mce_need_notify); } -static void drain_mcelog_buffer(void) -{ - unsigned int next, i, prev = 0; - - next = rcu_dereference_check_mce(mcelog.next); - - do { - struct mce *m; - - /* drain what was logged during boot */ - for (i = prev; i < next; i++) { - unsigned long start = jiffies; - unsigned retries = 1; - - m = &mcelog.entry[i]; - - while (!m->finished) { - if (time_after_eq(jiffies, start + 2*retries)) - retries++; - - cpu_relax(); - - if (!m->finished && retries >= 4) { - pr_err("MCE: skipping error being logged currently!\n"); - break; - } - } - smp_rmb(); - atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); - } - - memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m)); - prev = next; - next = cmpxchg(&mcelog.next, prev, 0); - } while (next != prev); -} - - -void mce_register_decode_chain(struct notifier_block *nb) -{ - atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); - drain_mcelog_buffer(); -} -EXPORT_SYMBOL_GPL(mce_register_decode_chain); - -void mce_unregister_decode_chain(struct notifier_block *nb) -{ - atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); -} -EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); - static void print_mce(struct mce *m) { int ret = 0; diff --git a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c index ce04b5804085..787e06c84ea6 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -323,6 +323,17 @@ device_initcall(thermal_throttle_init_device); #endif /* CONFIG_SYSFS */ +/* + * Set up the most two significant bit to notify mce log that this thermal + * event type. + * This is a temp solution. May be changed in the future with mce log + * infrasture. + */ +#define CORE_THROTTLED (0) +#define CORE_POWER_LIMIT ((__u64)1 << 62) +#define PACKAGE_THROTTLED ((__u64)2 << 62) +#define PACKAGE_POWER_LIMIT ((__u64)3 << 62) + static void notify_thresholds(__u64 msr_val) { /* check whether the interrupt handler is defined; @@ -352,23 +363,27 @@ static void intel_thermal_interrupt(void) if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, THERMAL_THROTTLING_EVENT, CORE_LEVEL) != 0) - mce_log_therm_throt_event(msr_val); + mce_log_therm_throt_event(CORE_THROTTLED | msr_val); if (this_cpu_has(X86_FEATURE_PLN)) - therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, + if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, - CORE_LEVEL); + CORE_LEVEL) != 0) + mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); if (this_cpu_has(X86_FEATURE_PTS)) { rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); - therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, + if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, THERMAL_THROTTLING_EVENT, - PACKAGE_LEVEL); + PACKAGE_LEVEL) != 0) + mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); if (this_cpu_has(X86_FEATURE_PLN)) - therm_throt_process(msr_val & + if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, - PACKAGE_LEVEL); + PACKAGE_LEVEL) != 0) + mce_log_therm_throt_event(PACKAGE_POWER_LIMIT + | msr_val); } } diff --git a/trunk/drivers/edac/i7core_edac.c b/trunk/drivers/edac/i7core_edac.c index 8568d9b61875..70ad8923f1d7 100644 --- a/trunk/drivers/edac/i7core_edac.c +++ b/trunk/drivers/edac/i7core_edac.c @@ -2234,7 +2234,7 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev) if (pvt->enable_scrub) disable_sdram_scrub_setting(mci); - mce_unregister_decode_chain(&i7_mce_dec); + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec); /* Disable EDAC polling */ i7core_pci_ctl_release(pvt); @@ -2336,7 +2336,7 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) /* DCLK for scrub rate setting */ pvt->dclk_freq = get_dclk_freq(); - mce_register_decode_chain(&i7_mce_dec); + atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec); return 0; diff --git a/trunk/drivers/edac/mce_amd.c b/trunk/drivers/edac/mce_amd.c index bd926ea2e00c..d0864d9c38ad 100644 --- a/trunk/drivers/edac/mce_amd.c +++ b/trunk/drivers/edac/mce_amd.c @@ -884,7 +884,7 @@ static int __init mce_amd_init(void) pr_info("MCE: In-kernel MCE decoding enabled.\n"); - mce_register_decode_chain(&amd_mce_dec_nb); + atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); return 0; } @@ -893,7 +893,7 @@ early_initcall(mce_amd_init); #ifdef MODULE static void __exit mce_amd_exit(void) { - mce_unregister_decode_chain(&amd_mce_dec_nb); + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); kfree(fam_ops); } diff --git a/trunk/drivers/edac/sb_edac.c b/trunk/drivers/edac/sb_edac.c index 965bc0cc0cf6..7a402bfbee7d 100644 --- a/trunk/drivers/edac/sb_edac.c +++ b/trunk/drivers/edac/sb_edac.c @@ -1661,7 +1661,8 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", __func__, mci, &sbridge_dev->pdev[0]->dev); - mce_unregister_decode_chain(&sbridge_mce_dec); + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, + &sbridge_mce_dec); /* Remove MC sysfs nodes */ edac_mc_del_mc(mci->dev); @@ -1730,7 +1731,8 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) goto fail0; } - mce_register_decode_chain(&sbridge_mce_dec); + atomic_notifier_chain_register(&x86_mce_decoder_chain, + &sbridge_mce_dec); return 0; fail0: