Skip to content

Commit

Permalink
x86/mce/AMD: Log Deferred Errors using SMCA MCA_DE{STAT,ADDR} registers
Browse files Browse the repository at this point in the history
Scalable MCA provides new registers for all banks for logging deferred
errors: MCA_DESTAT and MCA_DEADDR. Deferred errors are always logged to
these registers.

Update the AMD deferred error handler to use these registers, if
available.

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com>
[ Sanity-check __log_error() args, massage a bit. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Aravind Gopalakrishnan <aravindksg.lkml@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1462971509-3856-2-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Yazen Ghannam authored and Ingo Molnar committed May 12, 2016
1 parent fead35c commit 3410200
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 8 deletions.
4 changes: 4 additions & 0 deletions arch/x86/include/asm/mce.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,17 @@
#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))

/*
Expand Down
32 changes: 24 additions & 8 deletions arch/x86/kernel/cpu/mcheck/mce_amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,12 +430,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c);
}

static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
static void
__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
{
u32 msr_status = msr_ops.status(bank);
u32 msr_addr = msr_ops.addr(bank);
struct mce m;
u64 status;

rdmsrl(msr_ops.status(bank), status);
WARN_ON_ONCE(deferred_err && threshold_err);

if (deferred_err && mce_flags.smca) {
msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
}

rdmsrl(msr_status, status);

if (!(status & MCI_STATUS_VAL))
return;

Expand All @@ -448,10 +459,11 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
m.misc = misc;

if (m.status & MCI_STATUS_ADDRV)
rdmsrl(msr_ops.addr(bank), m.addr);
rdmsrl(msr_addr, m.addr);

mce_log(&m);
wrmsrl(msr_ops.status(bank), 0);

wrmsrl(msr_status, 0);
}

static inline void __smp_deferred_error_interrupt(void)
Expand Down Expand Up @@ -479,17 +491,21 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
u64 status;
unsigned int bank;
u32 msr_status;
u64 status;

for (bank = 0; bank < mca_cfg.banks; ++bank) {
rdmsrl(msr_ops.status(bank), status);
msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
: msr_ops.status(bank);

rdmsrl(msr_status, status);

if (!(status & MCI_STATUS_VAL) ||
!(status & MCI_STATUS_DEFERRED))
continue;

__log_error(bank, false, 0);
__log_error(bank, true, false, 0);
break;
}
}
Expand Down Expand Up @@ -544,7 +560,7 @@ static void amd_threshold_interrupt(void)
return;

log:
__log_error(bank, true, ((u64)high << 32) | low);
__log_error(bank, false, true, ((u64)high << 32) | low);
}

/*
Expand Down

0 comments on commit 3410200

Please sign in to comment.