Skip to content

Commit

Permalink
amd64_edac: Check ECC capabilities initially
Browse files Browse the repository at this point in the history
Rework the code to check the hardware ECC capabilities at PCI probing
time. We do all further initialization only if we actually can/have ECC
enabled.

While at it:
0. Fix function naming.
1. Simplify/clarify debug output.
2. Remove amd64_ prefix from the static functions
3. Reorganize code.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
  • Loading branch information
Borislav Petkov committed Jan 7, 2011
1 parent ae7bb7c commit 2299ef7
Showing 1 changed file with 75 additions and 66 deletions.
141 changes: 75 additions & 66 deletions drivers/edac/amd64_edac.c
Original file line number Diff line number Diff line change
Expand Up @@ -2188,18 +2188,19 @@ static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt)
static int amd64_init_csrows(struct mem_ctl_info *mci)
{
struct csrow_info *csrow;
struct amd64_pvt *pvt;
struct amd64_pvt *pvt = mci->pvt_info;
u64 input_addr_min, input_addr_max, sys_addr;
u32 val;
int i, empty = 1;

pvt = mci->pvt_info;
amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &val);

amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &pvt->nbcfg);
pvt->nbcfg = val;
pvt->ctl_error_info.nbcfg = val;

debugf0("NBCFG= 0x%x CHIPKILL= %s DRAM ECC= %s\n", pvt->nbcfg,
(pvt->nbcfg & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
(pvt->nbcfg & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"
);
debugf0("node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
pvt->mc_node_id, val,
!!(val & K8_NBCFG_CHIPKILL), !!(val & K8_NBCFG_ECC_ENABLE));

for (i = 0; i < pvt->cs_count; i++) {
csrow = &mci->csrows[i];
Expand Down Expand Up @@ -2294,7 +2295,7 @@ static bool amd64_nb_mce_bank_enabled_on_node(int nid)
return ret;
}

static int amd64_toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
{
cpumask_var_t cmask;
int cpu;
Expand Down Expand Up @@ -2332,30 +2333,31 @@ static int amd64_toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool o
return 0;
}

static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
struct pci_dev *F3)
{
struct amd64_pvt *pvt = mci->pvt_info;
u8 nid = pvt->mc_node_id;
struct ecc_settings *s = ecc_stngs[nid];
bool ret = true;
u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;

amd64_read_pci_cfg(pvt->F3, K8_NBCTL, &value);
if (toggle_ecc_err_reporting(s, nid, ON)) {
amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
return false;
}

amd64_read_pci_cfg(F3, K8_NBCTL, &value);

/* turn on UECCEn and CECCEn bits */
s->old_nbctl = value & mask;
s->nbctl_valid = true;

value |= mask;
pci_write_config_dword(pvt->F3, K8_NBCTL, value);
pci_write_config_dword(F3, K8_NBCTL, value);

if (amd64_toggle_ecc_err_reporting(s, nid, ON))
amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
amd64_read_pci_cfg(F3, K8_NBCFG, &value);

amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value);

debugf0("NBCFG(1)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value,
(value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
(value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
debugf0("1: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
nid, value,
!!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE));

if (!(value & K8_NBCFG_ECC_ENABLE)) {
amd64_warn("DRAM ECC disabled on this node, enabling...\n");
Expand All @@ -2364,25 +2366,26 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)

/* Attempt to turn on DRAM ECC Enable */
value |= K8_NBCFG_ECC_ENABLE;
pci_write_config_dword(pvt->F3, K8_NBCFG, value);
pci_write_config_dword(F3, K8_NBCFG, value);

amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value);
amd64_read_pci_cfg(F3, K8_NBCFG, &value);

if (!(value & K8_NBCFG_ECC_ENABLE)) {
amd64_warn("Hardware rejected DRAM ECC enable,"
"check memory DIMM configuration.\n");
ret = false;
} else {
amd64_info("Hardware accepted DRAM ECC Enable\n");
}
} else {
s->flags.nb_ecc_prev = 1;
}

debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value,
(value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
(value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
debugf0("2: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
nid, value,
!!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE));

pvt->ctl_error_info.nbcfg = value;
return ret;
}

static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
Expand All @@ -2408,49 +2411,44 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
}

/* restore the NB Enable MCGCTL bit */
if (amd64_toggle_ecc_err_reporting(s, nid, OFF))
if (toggle_ecc_err_reporting(s, nid, OFF))
amd64_warn("Error restoring NB MCGCTL settings!\n");
}

/*
* EDAC requires that the BIOS have ECC enabled before taking over the
* processing of ECC errors. This is because the BIOS can properly initialize
* the memory system completely. A command line option allows to force-enable
* hardware ECC later in amd64_enable_ecc_error_reporting().
* EDAC requires that the BIOS have ECC enabled before
* taking over the processing of ECC errors. A command line
* option allows to force-enable hardware ECC later in
* enable_ecc_error_reporting().
*/
static const char *ecc_msg =
"ECC disabled in the BIOS or no ECC capability, module will not load.\n"
" Either enable ECC checking or force module loading by setting "
"'ecc_enable_override'.\n"
" (Note that use of the override may cause unknown side effects.)\n";

static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
static bool ecc_enabled(struct pci_dev *F3, u8 nid)
{
u32 value;
u8 ecc_enabled = 0;
u8 ecc_en = 0;
bool nb_mce_en = false;

amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value);
amd64_read_pci_cfg(F3, K8_NBCFG, &value);

ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
amd64_info("DRAM ECC %s.\n", (ecc_enabled ? "enabled" : "disabled"));
ecc_en = !!(value & K8_NBCFG_ECC_ENABLE);
amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled"));

nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id);
nb_mce_en = amd64_nb_mce_bank_enabled_on_node(nid);
if (!nb_mce_en)
amd64_notice("NB MCE bank disabled, "
"set MSR 0x%08x[4] on node %d to enable.\n",
MSR_IA32_MCG_CTL, pvt->mc_node_id);

if (!ecc_enabled || !nb_mce_en) {
if (!ecc_enable_override) {
amd64_notice("%s", ecc_msg);
return -ENODEV;
} else {
amd64_warn("Forcing ECC on!\n");
}
}
amd64_notice("NB MCE bank disabled, set MSR "
"0x%08x[4] on node %d to enable.\n",
MSR_IA32_MCG_CTL, nid);

return 0;
if (!ecc_en || !nb_mce_en) {
amd64_notice("%s", ecc_msg);
return false;
}
return true;
}

struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
Expand Down Expand Up @@ -2536,7 +2534,7 @@ static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt)
return fam_type;
}

static int amd64_probe_one_instance(struct pci_dev *F2)
static int amd64_init_one_instance(struct pci_dev *F2)
{
struct amd64_pvt *pvt = NULL;
struct amd64_family_type *fam_type = NULL;
Expand All @@ -2561,11 +2559,6 @@ static int amd64_probe_one_instance(struct pci_dev *F2)
if (err)
goto err_free;

ret = -EINVAL;
err = amd64_check_ecc_enabled(pvt);
if (err)
goto err_put;

/*
* Save the pointer to the private data for use in 2nd initialization
* stage
Expand All @@ -2574,9 +2567,6 @@ static int amd64_probe_one_instance(struct pci_dev *F2)

return 0;

err_put:
amd64_free_mc_sibling_devices(pvt);

err_free:
kfree(pvt);

Expand Down Expand Up @@ -2618,7 +2608,6 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
if (amd64_init_csrows(mci))
mci->edac_cap = EDAC_FLAG_NONE;

amd64_enable_ecc_error_reporting(mci);
amd64_set_mc_sysfs_attributes(mci);

ret = -ENODEV;
Expand Down Expand Up @@ -2655,12 +2644,13 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
}


static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
const struct pci_device_id *mc_type)
{
int ret = 0;
u8 nid = get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s;
int ret = 0;

ret = pci_enable_device(pdev);
if (ret < 0) {
Expand All @@ -2671,15 +2661,34 @@ static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
ret = -ENOMEM;
s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
if (!s)
return ret;
goto err_out;

ecc_stngs[nid] = s;

ret = amd64_probe_one_instance(pdev);
if (!ecc_enabled(F3, nid)) {
ret = -ENODEV;

if (!ecc_enable_override)
goto err_enable;

amd64_warn("Forcing ECC on!\n");

if (!enable_ecc_error_reporting(s, nid, F3))
goto err_enable;
}

ret = amd64_init_one_instance(pdev);
if (ret < 0)
amd64_err("Error probing instance: %d\n", nid);

return ret;

err_enable:
kfree(s);
ecc_stngs[nid] = NULL;

err_out:
return ret;
}

static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
Expand Down Expand Up @@ -2741,7 +2750,7 @@ MODULE_DEVICE_TABLE(pci, amd64_pci_table);

static struct pci_driver amd64_pci_driver = {
.name = EDAC_MOD_STR,
.probe = amd64_init_one_instance,
.probe = amd64_probe_one_instance,
.remove = __devexit_p(amd64_remove_one_instance),
.id_table = amd64_pci_table,
};
Expand Down

0 comments on commit 2299ef7

Please sign in to comment.