Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 200029
b: refs/heads/master
c: 8a2f118
h: refs/heads/master
i:
  200027: db5160d
v: v3
  • Loading branch information
Mauro Carvalho Chehab committed May 10, 2010
1 parent 1090985 commit 360a4f0
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 23 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: ba6c5c62eeb877da638e43f1282f778432142eec
refs/heads/master: 8a2f118e3a023a4e8cbe56a6e51f7b78fa8c76a0
92 changes: 70 additions & 22 deletions trunk/drivers/edac/i7core_edac.c
Original file line number Diff line number Diff line change
Expand Up @@ -1319,33 +1319,75 @@ static void check_mc_test_err(struct mem_ctl_info *mci, u8 socket)
pvt->last_ce_count[socket][0] = new0;
}

/*
* According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
* Architectures Software Developer’s Manual Volume 3B.
* The MCA registers are the following ones:
* struct mce field MCA Register
* m->status MSR_IA32_MC0_STATUS
* m->addr MSR_IA32_MC0_ADDR
* m->misc MSR_IA32_MC0_MISC
* m->mcgstatus MSR_IA32_MCG_STATUS
* In the case of Nehalem, the error information is masked at .status and .misc
* fields
*/
static void i7core_mce_output_error(struct mem_ctl_info *mci,
struct mce *m)
{
debugf0("CPU %d: Machine Check Exception: %16Lx"
"Bank %d: %016Lx\n",
m->cpu, m->mcgstatus, m->bank, m->status);
if (m->ip) {
debugf0("RIP%s %02x:<%016Lx>\n",
!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
m->cs, m->ip);
char *type="NON-FATAL";
char *err, *msg;
unsigned long error = m->status & 0x1ff0000l;
u32 core_err_cnt = (m->status >> 38) && 0x7fff;
u32 dimm = (m->misc >> 16) & 0x3;
u32 channel = (m->misc >> 18) & 0x3;
u32 syndrome = m->misc >> 32;
u32 errnum = find_first_bit(&error, 32);

switch (errnum) {
case 16:
err = "read ECC error";
break;
case 17:
err = "RAS ECC error";
break;
case 18:
err = "write parity error";
break;
case 19:
err = "redundacy loss";
break;
case 20:
err = "reserved";
break;
case 21:
err = "memory range error";
break;
case 22:
err = "RTID out of range";
break;
case 23:
err = "address parity error";
break;
case 24:
err = "byte enable parity error";
break;
default:
err = "unknown";
}
printk(KERN_EMERG "TSC %llx ", m->tsc);
if (m->addr)
printk("ADDR %llx ", m->addr);
if (m->misc)
printk("MISC %llx ", m->misc);

#if 0
snprintf(msg, sizeof(msg),
"%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s "
"RAS=%d CAS=%d %s Err=0x%lx (%s))",
type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas,
type, allErrors, error_name[errnum]);
msg = kasprintf(GFP_ATOMIC,
"%s (addr = 0x%08llx Bank=0x%08x, Dimm=%d, Channel=%d, "
"syndrome=0x%08x total error count=%d Err=%d (%s))\n",
type, (long long) m->addr, m->bank, dimm, channel,
syndrome, core_err_cnt,errnum, err);

debugf0("%s", msg);

/* Call the helper to output message */
edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
#endif
edac_mc_handle_fbd_ue(mci, 0 /* FIXME: should be rank here */,
0, 0 /* FIXME: should be channel here */, msg);

kfree(msg);
}

/*
Expand Down Expand Up @@ -1398,6 +1440,13 @@ static int i7core_mce_check_error(void *priv, struct mce *mce)

debugf0(__FILE__ ": %s()\n", __func__);

/*
* Just let mcelog handle it if the error is
* outside the memory controller
*/
if (((mce->status & 0xffff) >> 7) != 1)
return 0;

spin_lock_irqsave(&pvt->mce_lock, flags);
if (pvt->mce_count < MCE_LOG_LEN) {
memcpy(&pvt->mce_entry[pvt->mce_count], mce, sizeof(*mce));
Expand All @@ -1406,8 +1455,7 @@ static int i7core_mce_check_error(void *priv, struct mce *mce)
spin_unlock_irqrestore(&pvt->mce_lock, flags);

/* Advice mcelog that the error were handled */
// return 1;
return 0; // Let's duplicate the log
return 1;
}

/*
Expand Down

0 comments on commit 360a4f0

Please sign in to comment.