Skip to content

Commit

Permalink
amd64_edac: Carve out ECC-related hw settings
Browse files Browse the repository at this point in the history
This is in preparation for the init path reorganization where we want
only to

1) test whether a particular node supports ECC
2) can it be enabled

and only then do the necessary allocation/initialization. For that,
we need to decouple the ECC settings of the node from the instance's
descriptor.

The should be no functional change introduced by this patch.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
  • Loading branch information
Borislav Petkov committed Jan 7, 2011
1 parent f1db274 commit ae7bb7c
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 24 deletions.
59 changes: 40 additions & 19 deletions drivers/edac/amd64_edac.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ static struct msr __percpu *msrs;
/* Per-node driver instances */
static struct mem_ctl_info **mcis;
static struct amd64_pvt **pvts;
static struct ecc_settings **ecc_stngs;

/*
* Address to DRAM bank mapping: see F2x80 for K8 and F2x[1,0]80 for Fam10 and
Expand Down Expand Up @@ -2293,7 +2294,7 @@ static bool amd64_nb_mce_bank_enabled_on_node(int nid)
return ret;
}

static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
static int amd64_toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
{
cpumask_var_t cmask;
int cpu;
Expand All @@ -2303,7 +2304,7 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
return false;
}

get_cpus_on_this_dct_cpumask(cmask, pvt->mc_node_id);
get_cpus_on_this_dct_cpumask(cmask, nid);

rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);

Expand All @@ -2313,14 +2314,14 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)

if (on) {
if (reg->l & K8_MSR_MCGCTL_NBE)
pvt->flags.nb_mce_enable = 1;
s->flags.nb_mce_enable = 1;

reg->l |= K8_MSR_MCGCTL_NBE;
} else {
/*
* Turn off NB MCE reporting only when it was off before
*/
if (!pvt->flags.nb_mce_enable)
if (!s->flags.nb_mce_enable)
reg->l &= ~K8_MSR_MCGCTL_NBE;
}
}
Expand All @@ -2334,18 +2335,20 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
{
struct amd64_pvt *pvt = mci->pvt_info;
u8 nid = pvt->mc_node_id;
struct ecc_settings *s = ecc_stngs[nid];
u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;

amd64_read_pci_cfg(pvt->F3, K8_NBCTL, &value);

/* turn on UECCn and CECCEn bits */
pvt->old_nbctl = value & mask;
pvt->nbctl_mcgctl_saved = 1;
/* turn on UECCEn and CECCEn bits */
s->old_nbctl = value & mask;
s->nbctl_valid = true;

value |= mask;
pci_write_config_dword(pvt->F3, K8_NBCTL, value);

if (amd64_toggle_ecc_err_reporting(pvt, ON))
if (amd64_toggle_ecc_err_reporting(s, nid, ON))
amd64_warn("Error enabling ECC reporting over MCGCTL!\n");

amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value);
Expand All @@ -2357,7 +2360,7 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
if (!(value & K8_NBCFG_ECC_ENABLE)) {
amd64_warn("DRAM ECC disabled on this node, enabling...\n");

pvt->flags.nb_ecc_prev = 0;
s->flags.nb_ecc_prev = 0;

/* Attempt to turn on DRAM ECC Enable */
value |= K8_NBCFG_ECC_ENABLE;
Expand All @@ -2372,7 +2375,7 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
amd64_info("Hardware accepted DRAM ECC Enable\n");
}
} else {
pvt->flags.nb_ecc_prev = 1;
s->flags.nb_ecc_prev = 1;
}

debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value,
Expand All @@ -2384,26 +2387,28 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)

static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
{
u8 nid = pvt->mc_node_id;
struct ecc_settings *s = ecc_stngs[nid];
u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;

if (!pvt->nbctl_mcgctl_saved)
if (!s->nbctl_valid)
return;

amd64_read_pci_cfg(pvt->F3, K8_NBCTL, &value);
value &= ~mask;
value |= pvt->old_nbctl;
value |= s->old_nbctl;

pci_write_config_dword(pvt->F3, K8_NBCTL, value);

/* restore previous BIOS DRAM ECC "off" setting which we force-enabled */
if (!pvt->flags.nb_ecc_prev) {
/* restore previous BIOS DRAM ECC "off" setting we force-enabled */
if (!s->flags.nb_ecc_prev) {
amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value);
value &= ~K8_NBCFG_ECC_ENABLE;
pci_write_config_dword(pvt->F3, K8_NBCFG, value);
}

/* restore the NB Enable MCGCTL bit */
if (amd64_toggle_ecc_err_reporting(pvt, OFF))
if (amd64_toggle_ecc_err_reporting(s, nid, OFF))
amd64_warn("Error restoring NB MCGCTL settings!\n");
}

Expand Down Expand Up @@ -2654,16 +2659,25 @@ static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
const struct pci_device_id *mc_type)
{
int ret = 0;
u8 nid = get_node_id(pdev);
struct ecc_settings *s;

ret = pci_enable_device(pdev);
if (ret < 0) {
debugf0("ret=%d\n", ret);
return -EIO;
}

ret = -ENOMEM;
s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
if (!s)
return ret;

ecc_stngs[nid] = s;

ret = amd64_probe_one_instance(pdev);
if (ret < 0)
amd64_err("Error probing instance: %d\n", get_node_id(pdev));
amd64_err("Error probing instance: %d\n", nid);

return ret;
}
Expand All @@ -2688,6 +2702,9 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
amd_report_gart_errors(false);
amd_unregister_ecc_decoder(amd64_decode_bus_error);

kfree(ecc_stngs[pvt->mc_node_id]);
ecc_stngs[pvt->mc_node_id] = NULL;

/* Free the EDAC CORE resources */
mci->pvt_info = NULL;
mcis[pvt->mc_node_id] = NULL;
Expand Down Expand Up @@ -2767,9 +2784,10 @@ static int __init amd64_edac_init(void)
goto err_ret;

err = -ENOMEM;
pvts = kzalloc(amd_nb_num() * sizeof(pvts[0]), GFP_KERNEL);
mcis = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL);
if (!(pvts && mcis))
pvts = kzalloc(amd_nb_num() * sizeof(pvts[0]), GFP_KERNEL);
mcis = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL);
ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
if (!(pvts && mcis && ecc_stngs))
goto err_ret;

msrs = msrs_alloc();
Expand Down Expand Up @@ -2820,6 +2838,9 @@ static void __exit amd64_edac_exit(void)

pci_unregister_driver(&amd64_pci_driver);

kfree(ecc_stngs);
ecc_stngs = NULL;

kfree(mcis);
mcis = NULL;

Expand Down
14 changes: 9 additions & 5 deletions drivers/edac/amd64_edac.h
Original file line number Diff line number Diff line change
Expand Up @@ -461,17 +461,21 @@ struct amd64_pvt {
/* place to store error injection parameters prior to issue */
struct error_injection injection;

/* Save old hw registers' values before we modified them */
u32 nbctl_mcgctl_saved; /* When true, following 2 are valid */
u32 old_nbctl;

/* DCT per-family scrubrate setting */
u32 min_scrubrate;

/* family name this instance is running on */
const char *ctl_name;

/* misc settings */
};

/*
* per-node ECC settings descriptor
*/
struct ecc_settings {
u32 old_nbctl;
bool nbctl_valid;

struct flags {
unsigned long nb_mce_enable:1;
unsigned long nb_ecc_prev:1;
Expand Down

0 comments on commit ae7bb7c

Please sign in to comment.