Skip to content

Commit

Permalink
amd64_edac: convert driver to use the new edac ABI
Browse files Browse the repository at this point in the history
The legacy edac ABI is going to be removed. Port the driver to use
and benefit from the new API functionality.

Cc: Doug Thompson <norsk5@yahoo.com>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
  • Loading branch information
Mauro Carvalho Chehab committed May 28, 2012
1 parent 4275be6 commit ab5a503
Showing 1 changed file with 92 additions and 45 deletions.
137 changes: 92 additions & 45 deletions drivers/edac/amd64_edac.c
Original file line number Diff line number Diff line change
Expand Up @@ -1039,6 +1039,37 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
int channel, csrow;
u32 page, offset;

error_address_to_page_and_offset(sys_addr, &page, &offset);

/*
* Find out which node the error address belongs to. This may be
* different from the node that detected the error.
*/
src_mci = find_mc_by_sys_addr(mci, sys_addr);
if (!src_mci) {
amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
(unsigned long)sys_addr);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
page, offset, syndrome,
-1, -1, -1,
EDAC_MOD_STR,
"failed to map error addr to a node",
NULL);
return;
}

/* Now map the sys_addr to a CSROW */
csrow = sys_addr_to_csrow(src_mci, sys_addr);
if (csrow < 0) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
page, offset, syndrome,
-1, -1, -1,
EDAC_MOD_STR,
"failed to map error addr to a csrow",
NULL);
return;
}

/* CHIPKILL enabled */
if (pvt->nbcfg & NBCFG_CHIPKILL) {
channel = get_channel_from_ecc_syndrome(mci, syndrome);
Expand All @@ -1048,9 +1079,15 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
* 2 DIMMs is in error. So we need to ID 'both' of them
* as suspect.
*/
amd64_mc_warn(mci, "unknown syndrome 0x%04x - possible "
"error reporting race\n", syndrome);
edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - "
"possible error reporting race\n",
syndrome);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
page, offset, syndrome,
csrow, -1, -1,
EDAC_MOD_STR,
"unknown syndrome - possible error reporting race",
NULL);
return;
}
} else {
Expand All @@ -1065,28 +1102,10 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
channel = ((sys_addr & BIT(3)) != 0);
}

/*
* Find out which node the error address belongs to. This may be
* different from the node that detected the error.
*/
src_mci = find_mc_by_sys_addr(mci, sys_addr);
if (!src_mci) {
amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
(unsigned long)sys_addr);
edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
return;
}

/* Now map the sys_addr to a CSROW */
csrow = sys_addr_to_csrow(src_mci, sys_addr);
if (csrow < 0) {
edac_mc_handle_ce_no_info(src_mci, EDAC_MOD_STR);
} else {
error_address_to_page_and_offset(sys_addr, &page, &offset);

edac_mc_handle_ce(src_mci, page, offset, syndrome, csrow,
channel, EDAC_MOD_STR);
}
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci,
page, offset, syndrome,
csrow, channel, -1,
EDAC_MOD_STR, "", NULL);
}

static int ddr2_cs_size(unsigned i, bool dct_width)
Expand Down Expand Up @@ -1592,15 +1611,20 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
u32 page, offset;
int nid, csrow, chan = 0;

error_address_to_page_and_offset(sys_addr, &page, &offset);

csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);

if (csrow < 0) {
edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
page, offset, syndrome,
-1, -1, -1,
EDAC_MOD_STR,
"failed to map error addr to a csrow",
NULL);
return;
}

error_address_to_page_and_offset(sys_addr, &page, &offset);

/*
* We need the syndromes for channel detection only when we're
* ganged. Otherwise @chan should already contain the channel at
Expand All @@ -1609,16 +1633,10 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
if (dct_ganging_enabled(pvt))
chan = get_channel_from_ecc_syndrome(mci, syndrome);

if (chan >= 0)
edac_mc_handle_ce(mci, page, offset, syndrome, csrow, chan,
EDAC_MOD_STR);
else
/*
* Channel unknown, report all channels on this CSROW as failed.
*/
for (chan = 0; chan < mci->csrows[csrow].nr_channels; chan++)
edac_mc_handle_ce(mci, page, offset, syndrome,
csrow, chan, EDAC_MOD_STR);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
page, offset, syndrome,
csrow, chan, -1,
EDAC_MOD_STR, "", NULL);
}

/*
Expand Down Expand Up @@ -1899,7 +1917,12 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
/* Ensure that the Error Address is VALID */
if (!(m->status & MCI_STATUS_ADDRV)) {
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
0, 0, 0,
-1, -1, -1,
EDAC_MOD_STR,
"HW has no ERROR_ADDRESS available",
NULL);
return;
}

Expand All @@ -1923,11 +1946,17 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)

if (!(m->status & MCI_STATUS_ADDRV)) {
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
0, 0, 0,
-1, -1, -1,
EDAC_MOD_STR,
"HW has no ERROR_ADDRESS available",
NULL);
return;
}

sys_addr = get_error_address(m);
error_address_to_page_and_offset(sys_addr, &page, &offset);

/*
* Find out which node the error address belongs to. This may be
Expand All @@ -1937,7 +1966,11 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
if (!src_mci) {
amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n",
(unsigned long)sys_addr);
edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
page, offset, 0,
-1, -1, -1,
EDAC_MOD_STR,
"ERROR ADDRESS NOT mapped to a MC", NULL);
return;
}

Expand All @@ -1947,10 +1980,17 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
if (csrow < 0) {
amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
(unsigned long)sys_addr);
edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
page, offset, 0,
-1, -1, -1,
EDAC_MOD_STR,
"ERROR ADDRESS NOT mapped to CS",
NULL);
} else {
error_address_to_page_and_offset(sys_addr, &page, &offset);
edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR);
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
page, offset, 0,
csrow, -1, -1,
EDAC_MOD_STR, "", NULL);
}
}

Expand Down Expand Up @@ -2515,6 +2555,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
struct amd64_pvt *pvt = NULL;
struct amd64_family_type *fam_type = NULL;
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
int err = 0, ret;
u8 nid = get_node_id(F2);

Expand Down Expand Up @@ -2549,7 +2590,13 @@ static int amd64_init_one_instance(struct pci_dev *F2)
goto err_siblings;

ret = -ENOMEM;
mci = edac_mc_alloc(0, pvt->csels[0].b_cnt, pvt->channel_count, nid);
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = pvt->csels[0].b_cnt;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = pvt->channel_count;
layers[1].is_virt_csrow = false;
mci = new_edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
if (!mci)
goto err_siblings;

Expand Down

0 comments on commit ab5a503

Please sign in to comment.