Skip to content

Commit

Permalink
mce, edac: Use an atomic notifier for MCEs decoding
Browse files Browse the repository at this point in the history
Add an atomic notifier which ensures proper locking when conveying
MCE info to EDAC for decoding. The actual notifier call overrides a
default, negative priority notifier.

Note: make sure we register the default decoder only once since
mcheck_init() runs on each CPU.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
LKML-Reference: <20091003065752.GA8935@liondog.tnic>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Borislav Petkov authored and Ingo Molnar committed Oct 12, 2009
1 parent d93a8f8 commit fb25319
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 19 deletions.
3 changes: 2 additions & 1 deletion arch/x86/include/asm/mce.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ struct mce_log {
#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)

extern struct atomic_notifier_head x86_mce_decoder_chain;

#ifdef __KERNEL__

#include <linux/percpu.h>
Expand Down Expand Up @@ -213,6 +215,5 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
void intel_init_thermal(struct cpuinfo_x86 *c);

void mce_log_therm_throt_event(__u64 status);

#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */
29 changes: 20 additions & 9 deletions arch/x86/kernel/cpu/mcheck/mce.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;

static void default_decode_mce(struct mce *m)
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
*/
ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);

static int default_decode_mce(struct notifier_block *nb, unsigned long val,
void *data)
{
pr_emerg("No human readable MCE decoding support on this CPU type.\n");
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");

return NOTIFY_STOP;
}

/*
* CPU/chipset specific EDAC code can register a callback here to print
* MCE errors in a human-readable form:
*/
void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
EXPORT_SYMBOL(x86_mce_decode_callback);
static struct notifier_block mce_dec_nb = {
.notifier_call = default_decode_mce,
.priority = -1,
};

/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
Expand Down Expand Up @@ -204,9 +212,9 @@ static void print_mce(struct mce *m)

/*
* Print out human-readable details about the MCE error,
* (if the CPU has an implementation for that):
* (if the CPU has an implementation for that)
*/
x86_mce_decode_callback(m);
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
}

static void print_mce_head(void)
Expand Down Expand Up @@ -1420,6 +1428,9 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
mce_cpu_features(c);
mce_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);

if (raw_smp_processor_id() == 0)
atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
}

/*
Expand Down
21 changes: 12 additions & 9 deletions drivers/edac/edac_mce_amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

static bool report_gart_errors;
static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
static void (*orig_mce_callback)(struct mce *m);

void amd_report_gart_errors(bool v)
{
Expand Down Expand Up @@ -363,8 +362,10 @@ static inline void amd_decode_err_code(unsigned int ec)
pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
}

static void amd_decode_mce(struct mce *m)
static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *m = (struct mce *)data;
struct err_regs regs;
int node, ecc;

Expand Down Expand Up @@ -420,20 +421,22 @@ static void amd_decode_mce(struct mce *m)
}

amd_decode_err_code(m->status & 0xffff);

return NOTIFY_STOP;
}

static struct notifier_block amd_mce_dec_nb = {
.notifier_call = amd_decode_mce,
};

static int __init mce_amd_init(void)
{
/*
* We can decode MCEs for Opteron and later CPUs:
*/
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
(boot_cpu_data.x86 >= 0xf)) {
/* safe the default decode mce callback */
orig_mce_callback = x86_mce_decode_callback;

x86_mce_decode_callback = amd_decode_mce;
}
(boot_cpu_data.x86 >= 0xf))
atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);

return 0;
}
Expand All @@ -442,7 +445,7 @@ early_initcall(mce_amd_init);
#ifdef MODULE
static void __exit mce_amd_exit(void)
{
x86_mce_decode_callback = orig_mce_callback;
atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
}

MODULE_DESCRIPTION("AMD MCE decoder");
Expand Down

0 comments on commit fb25319

Please sign in to comment.