Skip to content

Commit

Permalink
powerpc/pseries: Display machine check error details.
Browse files Browse the repository at this point in the history
Extract the MCE error details from RTAS extended log and display it to
console.

With this patch you should now see mce logs like below:

[  142.371818] Severe Machine check interrupt [Recovered]
[  142.371822]   NIP [d00000000ca301b8]: init_module+0x1b8/0x338 [bork_kernel]
[  142.371822]   Initiator: CPU
[  142.371823]   Error type: SLB [Multihit]
[  142.371824]     Effective address: d00000000ca70000

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
  • Loading branch information
Mahesh Salgaonkar authored and Michael Ellerman committed Sep 19, 2018
1 parent a43c159 commit 8f0b805
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 0 deletions.
5 changes: 5 additions & 0 deletions arch/powerpc/include/asm/rtas.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,11 @@ static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog)
return (elog->byte1 & 0x04) >> 2;
}

static inline uint8_t rtas_error_initiator(const struct rtas_error_log *elog)
{
return (elog->byte2 & 0xf0) >> 4;
}

#define rtas_error_type(x) ((x)->byte3)

static inline
Expand Down
133 changes: 133 additions & 0 deletions arch/powerpc/platforms/pseries/ras.c
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,136 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
return 0; /* need to perform reset */
}

#define VAL_TO_STRING(ar, val) \
(((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown")

static void pseries_print_mce_info(struct pt_regs *regs,
struct rtas_error_log *errp)
{
const char *level, *sevstr;
struct pseries_errorlog *pseries_log;
struct pseries_mc_errorlog *mce_log;
u8 error_type, err_sub_type;
u64 addr;
u8 initiator = rtas_error_initiator(errp);
int disposition = rtas_error_disposition(errp);

static const char * const initiators[] = {
"Unknown",
"CPU",
"PCI",
"ISA",
"Memory",
"Power Mgmt",
};
static const char * const mc_err_types[] = {
"UE",
"SLB",
"ERAT",
"TLB",
"D-Cache",
"Unknown",
"I-Cache",
};
static const char * const mc_ue_types[] = {
"Indeterminate",
"Instruction fetch",
"Page table walk ifetch",
"Load/Store",
"Page table walk Load/Store",
};

/* SLB sub errors valid values are 0x0, 0x1, 0x2 */
static const char * const mc_slb_types[] = {
"Parity",
"Multihit",
"Indeterminate",
};

/* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
static const char * const mc_soft_types[] = {
"Unknown",
"Parity",
"Multihit",
"Indeterminate",
};

if (!rtas_error_extended(errp)) {
pr_err("Machine check interrupt: Missing extended error log\n");
return;
}

pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
if (pseries_log == NULL)
return;

mce_log = (struct pseries_mc_errorlog *)pseries_log->data;

error_type = mce_log->error_type;
err_sub_type = rtas_mc_error_sub_type(mce_log);

switch (rtas_error_severity(errp)) {
case RTAS_SEVERITY_NO_ERROR:
level = KERN_INFO;
sevstr = "Harmless";
break;
case RTAS_SEVERITY_WARNING:
level = KERN_WARNING;
sevstr = "";
break;
case RTAS_SEVERITY_ERROR:
case RTAS_SEVERITY_ERROR_SYNC:
level = KERN_ERR;
sevstr = "Severe";
break;
case RTAS_SEVERITY_FATAL:
default:
level = KERN_ERR;
sevstr = "Fatal";
break;
}

printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
disposition == RTAS_DISP_FULLY_RECOVERED ?
"Recovered" : "Not recovered");
if (user_mode(regs)) {
printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level,
regs->nip, current->pid, current->comm);
} else {
printk("%s NIP [%016lx]: %pS\n", level, regs->nip,
(void *)regs->nip);
}
printk("%s Initiator: %s\n", level,
VAL_TO_STRING(initiators, initiator));

switch (error_type) {
case MC_ERROR_TYPE_UE:
printk("%s Error type: %s [%s]\n", level,
VAL_TO_STRING(mc_err_types, error_type),
VAL_TO_STRING(mc_ue_types, err_sub_type));
break;
case MC_ERROR_TYPE_SLB:
printk("%s Error type: %s [%s]\n", level,
VAL_TO_STRING(mc_err_types, error_type),
VAL_TO_STRING(mc_slb_types, err_sub_type));
break;
case MC_ERROR_TYPE_ERAT:
case MC_ERROR_TYPE_TLB:
printk("%s Error type: %s [%s]\n", level,
VAL_TO_STRING(mc_err_types, error_type),
VAL_TO_STRING(mc_soft_types, err_sub_type));
break;
default:
printk("%s Error type: %s\n", level,
VAL_TO_STRING(mc_err_types, error_type));
break;
}

addr = rtas_mc_get_effective_addr(mce_log);
if (addr)
printk("%s Effective address: %016llx\n", level, addr);
}

static int mce_handle_error(struct rtas_error_log *errp)
{
struct pseries_errorlog *pseries_log;
Expand Down Expand Up @@ -585,8 +715,11 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
int recovered = 0;
int disposition = rtas_error_disposition(err);

pseries_print_mce_info(regs, err);

if (!(regs->msr & MSR_RI)) {
/* If MSR_RI isn't set, we cannot recover */
pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
recovered = 0;

} else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
Expand Down

0 comments on commit 8f0b805

Please sign in to comment.