From c29dfd661fe2f8d1b48c7f00590929c04b25bf40 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Jan 2025 07:50:26 +0100 Subject: [PATCH 01/19] EDAC/ie31200: work around false positive build warning gcc-14 produces a bogus warning in some configurations: drivers/edac/ie31200_edac.c: In function 'ie31200_probe1.isra': drivers/edac/ie31200_edac.c:412:26: error: 'dimm_info' is used uninitialized [-Werror=uninitialized] 412 | struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL]; | ^~~~~~~~~ drivers/edac/ie31200_edac.c:412:26: note: 'dimm_info' declared here 412 | struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL]; | ^~~~~~~~~ I don't see any way the unintialized access could really happen here, but I can see why the compiler gets confused by the two loops. Instead, rework the two nested loops to only read the addr_decode registers and then keep only one instance of the dimm info structure. [Tony: Qiuxu pointed out that the "populate DIMM info" comment was left behind in the refactor and suggested moving it. I deleted the comment as unnecessry in front os a call to populate_dimm_info(). That seems pretty self-describing.] Signed-off-by: Arnd Bergmann Acked-by: Jason Baron Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20250122065031.1321015-1-arnd@kernel.org --- drivers/edac/ie31200_edac.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 4fc16922dc1af..c6188de13c003 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -409,10 +409,9 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) int i, j, ret; struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; - struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL]; void __iomem *window; struct ie31200_priv *priv; - u32 addr_decode, mad_offset; + u32 addr_decode[IE31200_CHANNELS], mad_offset; /* * Kaby Lake, Coffee Lake seem to work like Skylake. Please re-visit @@ -470,19 +469,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) mad_offset = IE31200_MAD_DIMM_0_OFFSET; } - /* populate DIMM info */ for (i = 0; i < IE31200_CHANNELS; i++) { - addr_decode = readl(window + mad_offset + + addr_decode[i] = readl(window + mad_offset + (i * 4)); - edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); - for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { - populate_dimm_info(&dimm_info[i][j], addr_decode, j, - skl); - edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n", - dimm_info[i][j].size, - dimm_info[i][j].dual_rank, - dimm_info[i][j].x16_width); - } + edac_dbg(0, "addr_decode: 0x%x\n", addr_decode[i]); } /* @@ -493,14 +483,22 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) */ for (i = 0; i < IE31200_DIMMS_PER_CHANNEL; i++) { for (j = 0; j < IE31200_CHANNELS; j++) { + struct dimm_data dimm_info; struct dimm_info *dimm; unsigned long nr_pages; - nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl); + populate_dimm_info(&dimm_info, addr_decode[j], i, + skl); + edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n", + dimm_info.size, + dimm_info.dual_rank, + dimm_info.x16_width); + + nr_pages = IE31200_PAGES(dimm_info.size, skl); if (nr_pages == 0) continue; - if (dimm_info[j][i].dual_rank) { + if (dimm_info.dual_rank) { nr_pages = nr_pages / 2; dimm = edac_get_dimm(mci, (i * 2) + 1, j, 0); dimm->nr_pages = nr_pages; From 267e5b1d267539d9a927dc04aab6f15aca57da92 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Wed, 12 Feb 2025 16:33:54 +0800 Subject: [PATCH 02/19] EDAC/igen6: Fix the flood of invalid error reports The ECC_ERROR_LOG register of certain SoCs may contain the invalid value ~0, which results in a flood of invalid error reports in polling mode. Fix the flood of invalid error reports by skipping the invalid ECC error log value ~0. Fixes: e14232afa944 ("EDAC/igen6: Add polling support") Reported-by: Ramses Closes: https://lore.kernel.org/all/OISL8Rv--F-9@well-founded.dev/ Tested-by: Ramses Reported-by: John Closes: https://lore.kernel.org/all/p5YcxOE6M3Ncxpn2-Ia_wCt61EM4LwIiN3LroQvT_-G2jMrFDSOW5k2A9D8UUzD2toGpQBN1eI0sL5dSKnkO8iteZegLoQEj-DwQaMhGx4A=@proton.me/ Tested-by: John Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20250212083354.31919-1-qiuxu.zhuo@intel.com --- drivers/edac/igen6_edac.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index fdf3a84fe6988..595908af9e5c9 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -785,13 +785,22 @@ static u64 ecclog_read_and_clear(struct igen6_imc *imc) { u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET); - if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) { - /* Clear CE/UE bits by writing 1s */ - writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); - return ecclog; - } + /* + * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain + * the invalid value ~0. This will result in a flood of invalid + * error reports in polling mode. Skip it. + */ + if (ecclog == ~0) + return 0; - return 0; + /* Neither a CE nor a UE. Skip it.*/ + if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE))) + return 0; + + /* Clear CE/UE bits by writing 1s */ + writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); + + return ecclog; } static void errsts_clear(struct igen6_imc *imc) From d9207cf7760f5f5599e9ff7eb0fedf56821a1d59 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 14 Feb 2025 08:27:28 +0800 Subject: [PATCH 03/19] EDAC/{skx_common,i10nm}: Fix some missing error reports on Emerald Rapids When doing error injection to some memory DIMMs on certain Intel Emerald Rapids servers, the i10nm_edac missed error reports for some memory DIMMs. Certain BIOS configurations may hide some memory controllers, and the i10nm_edac doesn't enumerate these hidden memory controllers. However, the ADXL decodes memory errors using memory controller physical indices even if there are hidden memory controllers. Therefore, the memory controller physical indices reported by the ADXL may mismatch the logical indices enumerated by the i10nm_edac, resulting in missed error reports for some memory DIMMs. Fix this issue by creating a mapping table from memory controller physical indices (used by the ADXL) to logical indices (used by the i10nm_edac) and using it to convert the physical indices to the logical indices during the error handling process. Fixes: c545f5e41225 ("EDAC/i10nm: Skip the absent memory controllers") Reported-by: Kevin Chang Tested-by: Kevin Chang Reported-by: Thomas Chen Tested-by: Thomas Chen Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20250214002728.6287-1-qiuxu.zhuo@intel.com --- drivers/edac/i10nm_base.c | 2 ++ drivers/edac/skx_common.c | 33 +++++++++++++++++++++++++++++++++ drivers/edac/skx_common.h | 11 +++++++++++ 3 files changed, 46 insertions(+) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index f45d849d3f150..355a977019e94 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -751,6 +751,8 @@ static int i10nm_get_ddr_munits(void) continue; } else { d->imc[lmc].mdev = mdev; + if (res_cfg->type == SPR) + skx_set_mc_mapping(d, i, lmc); lmc++; } } diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index f7bd930e058fe..fa5b442b18449 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -121,6 +121,35 @@ void skx_adxl_put(void) } EXPORT_SYMBOL_GPL(skx_adxl_put); +static void skx_init_mc_mapping(struct skx_dev *d) +{ + /* + * By default, the BIOS presents all memory controllers within each + * socket to the EDAC driver. The physical indices are the same as + * the logical indices of the memory controllers enumerated by the + * EDAC driver. + */ + for (int i = 0; i < NUM_IMC; i++) + d->mc_mapping[i] = i; +} + +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc) +{ + edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, lmc); + + d->mc_mapping[pmc] = lmc; +} +EXPORT_SYMBOL_GPL(skx_set_mc_mapping); + +static u8 skx_get_mc_mapping(struct skx_dev *d, u8 pmc) +{ + edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, d->mc_mapping[pmc]); + + return d->mc_mapping[pmc]; +} + static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) { struct skx_dev *d; @@ -188,6 +217,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) return false; } + res->imc = skx_get_mc_mapping(d, res->imc); + for (i = 0; i < adxl_component_count; i++) { if (adxl_values[i] == ~0x0ull) continue; @@ -326,6 +357,8 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) d->bus[0], d->bus[1], d->bus[2], d->bus[3]); list_add_tail(&d->list, &dev_edac_list); prev = pdev; + + skx_init_mc_mapping(d); } if (list) diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index b0845bdd45164..ca5408803f878 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -93,6 +93,16 @@ struct skx_dev { struct pci_dev *uracu; /* for i10nm CPU */ struct pci_dev *pcu_cr3; /* for HBM memory detection */ u32 mcroute; + /* + * Some server BIOS may hide certain memory controllers, and the + * EDAC driver skips those hidden memory controllers. However, the + * ADXL still decodes memory error address using physical memory + * controller indices. The mapping table is used to convert the + * physical indices (reported by ADXL) to the logical indices + * (used the EDAC driver) of present memory controllers during the + * error handling process. + */ + u8 mc_mapping[NUM_IMC]; struct skx_imc { struct mem_ctl_info *mci; struct pci_dev *mdev; /* for i10nm CPU */ @@ -242,6 +252,7 @@ void skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); void skx_set_mem_cfg(bool mem_cfg_2lm); void skx_set_res_cfg(struct res_config *cfg); +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); From d09055122bd20827e4772a215b4b1f8f9dce2eda Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 23 Feb 2025 22:24:29 +0100 Subject: [PATCH 04/19] EDAC: Use string choice helper functions Remove hard-coded strings by using the str_enabled_disabled(), str_yes_no(), str_write_read(), and str_plural() helper functions. Add a space in "All DIMMs support ECC: yes/no" to improve readability. Signed-off-by: Thorsten Blum Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Yazen Ghannam Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20250223212429.3466-2-thorsten.blum@linux.dev --- drivers/edac/amd64_edac.c | 47 +++++++++++++++++++-------------------- drivers/edac/debugfs.c | 5 ++++- drivers/edac/i5400_edac.c | 3 ++- drivers/edac/i7300_edac.c | 7 +++--- drivers/edac/xgene_edac.c | 17 +++++++------- 5 files changed, 42 insertions(+), 37 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 8414ceb43e4ac..d133a5be58905 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include #include "amd64_edac.h" #include #include @@ -1171,22 +1172,21 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3)); } - edac_dbg(1, "All DIMMs support ECC:%s\n", - (dclr & BIT(19)) ? "yes" : "no"); + edac_dbg(1, "All DIMMs support ECC: %s\n", str_yes_no(dclr & BIT(19))); edac_dbg(1, " PAR/ERR parity: %s\n", - (dclr & BIT(8)) ? "enabled" : "disabled"); + str_enabled_disabled(dclr & BIT(8))); if (pvt->fam == 0x10) edac_dbg(1, " DCT 128bit mode width: %s\n", (dclr & BIT(11)) ? "128b" : "64b"); edac_dbg(1, " x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n", - (dclr & BIT(12)) ? "yes" : "no", - (dclr & BIT(13)) ? "yes" : "no", - (dclr & BIT(14)) ? "yes" : "no", - (dclr & BIT(15)) ? "yes" : "no"); + str_yes_no(dclr & BIT(12)), + str_yes_no(dclr & BIT(13)), + str_yes_no(dclr & BIT(14)), + str_yes_no(dclr & BIT(15))); } #define CS_EVEN_PRIMARY BIT(0) @@ -1353,14 +1353,14 @@ static void umc_dump_misc_regs(struct amd64_pvt *pvt) edac_dbg(1, "UMC%d UMC cap high: 0x%x\n", i, umc->umc_cap_hi); edac_dbg(1, "UMC%d ECC capable: %s, ChipKill ECC capable: %s\n", - i, (umc->umc_cap_hi & BIT(30)) ? "yes" : "no", - (umc->umc_cap_hi & BIT(31)) ? "yes" : "no"); + i, str_yes_no(umc->umc_cap_hi & BIT(30)), + str_yes_no(umc->umc_cap_hi & BIT(31))); edac_dbg(1, "UMC%d All DIMMs support ECC: %s\n", - i, (umc->umc_cfg & BIT(12)) ? "yes" : "no"); + i, str_yes_no(umc->umc_cfg & BIT(12))); edac_dbg(1, "UMC%d x4 DIMMs present: %s\n", - i, (umc->dimm_cfg & BIT(6)) ? "yes" : "no"); + i, str_yes_no(umc->dimm_cfg & BIT(6))); edac_dbg(1, "UMC%d x16 DIMMs present: %s\n", - i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no"); + i, str_yes_no(umc->dimm_cfg & BIT(7))); umc_debug_display_dimm_sizes(pvt, i); } @@ -1371,11 +1371,11 @@ static void dct_dump_misc_regs(struct amd64_pvt *pvt) edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap); edac_dbg(1, " NB two channel DRAM capable: %s\n", - (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no"); + str_yes_no(pvt->nbcap & NBCAP_DCT_DUAL)); edac_dbg(1, " ECC capable: %s, ChipKill ECC capable: %s\n", - (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no", - (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no"); + str_yes_no(pvt->nbcap & NBCAP_SECDED), + str_yes_no(pvt->nbcap & NBCAP_CHIPKILL)); debug_dump_dramcfg_low(pvt, pvt->dclr0, 0); @@ -1398,7 +1398,7 @@ static void dct_dump_misc_regs(struct amd64_pvt *pvt) if (!dct_ganging_enabled(pvt)) debug_dump_dramcfg_low(pvt, pvt->dclr1, 1); - edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); + edac_dbg(1, " DramHoleValid: %s\n", str_yes_no(dhar_valid(pvt))); amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz); } @@ -2027,15 +2027,15 @@ static void read_dram_ctl_register(struct amd64_pvt *pvt) if (!dct_ganging_enabled(pvt)) edac_dbg(0, " Address range split per DCT: %s\n", - (dct_high_range_enabled(pvt) ? "yes" : "no")); + str_yes_no(dct_high_range_enabled(pvt))); edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n", - (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"), - (dct_memory_cleared(pvt) ? "yes" : "no")); + str_enabled_disabled(dct_data_intlv_enabled(pvt)), + str_yes_no(dct_memory_cleared(pvt))); edac_dbg(0, " channel interleave: %s, " "interleave bits selector: 0x%x\n", - (dct_interleave_enabled(pvt) ? "enabled" : "disabled"), + str_enabled_disabled(dct_interleave_enabled(pvt)), dct_sel_interleave_addr(pvt)); } @@ -3208,8 +3208,7 @@ static bool nb_mce_bank_enabled_on_node(u16 nid) nbe = reg->l & MSR_MCGCTL_NBE; edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", - cpu, reg->q, - (nbe ? "enabled" : "disabled")); + cpu, reg->q, str_enabled_disabled(nbe)); if (!nbe) goto out; @@ -3353,7 +3352,7 @@ static bool dct_ecc_enabled(struct amd64_pvt *pvt) edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n", MSR_IA32_MCG_CTL, nid); - edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, (ecc_en ? "enabled" : "disabled")); + edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, str_enabled_disabled(ecc_en)); if (!ecc_en || !nb_mce_en) return false; @@ -3378,7 +3377,7 @@ static bool umc_ecc_enabled(struct amd64_pvt *pvt) } } - edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, (ecc_en ? "enabled" : "disabled")); + edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, str_enabled_disabled(ecc_en)); return ecc_en; } diff --git a/drivers/edac/debugfs.c b/drivers/edac/debugfs.c index 4804332d99465..8195fc9c9354a 100644 --- a/drivers/edac/debugfs.c +++ b/drivers/edac/debugfs.c @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only + +#include + #include "edac_module.h" static struct dentry *edac_debugfs; @@ -22,7 +25,7 @@ static ssize_t edac_fake_inject_write(struct file *file, "Generating %d %s fake error%s to %d.%d.%d to test core handling. NOTE: this won't test the driver-specific decoding logic.\n", errcount, (type == HW_EVENT_ERR_UNCORRECTED) ? "UE" : "CE", - errcount > 1 ? "s" : "", + str_plural(errcount), mci->fake_inject_layer[0], mci->fake_inject_layer[1], mci->fake_inject_layer[2] diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 49b4499269fb7..b5cf25905b059 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "edac_module.h" @@ -899,7 +900,7 @@ static void decode_mtr(int slot_row, u16 mtr) edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n", - MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr))); edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); edac_dbg(2, "\t\tNUMRANK: %s\n", diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 61adaa872ba7b..69068f8d0cadf 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "edac_module.h" @@ -620,7 +621,7 @@ static int decode_mtr(struct i7300_pvt *pvt, edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n", - MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr))); edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); edac_dbg(2, "\t\tNUMRANK: %s\n", @@ -871,9 +872,9 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) IS_MIRRORED(pvt->mc_settings) ? "" : "non-"); edac_dbg(0, "Error detection is %s\n", - IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); + str_enabled_disabled(IS_ECC_ENABLED(pvt->mc_settings))); edac_dbg(0, "Retry is %s\n", - IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); + str_enabled_disabled(IS_RETRY_ENABLED(pvt->mc_settings))); /* Get Memory Interleave Range registers */ pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0, diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c index 699c7d29d80cd..9955396c9a520 100644 --- a/drivers/edac/xgene_edac.c +++ b/drivers/edac/xgene_edac.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "edac_module.h" @@ -1407,7 +1408,7 @@ static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev) dev_err(edac_dev->dev, "Multiple XGIC write size error\n"); info = readl(ctx->dev_csr + XGICTRANSERRREQINFO); dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n", - info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info), + str_read_write(info & REQTYPE_MASK), ERRADDR_RD(info), info); writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS); @@ -1489,19 +1490,19 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev) if (reg & AGENT_OFFLINE_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s access to offline agent error\n", - write ? "write" : "read"); + str_write_read(write)); if (reg & UNIMPL_RBPAGE_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s access to unimplemented page error\n", - write ? "write" : "read"); + str_write_read(write)); if (reg & WORD_ALIGNED_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s word aligned access error\n", - write ? "write" : "read"); + str_write_read(write)); if (reg & PAGE_ACCESS_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s to page out of range access error\n", - write ? "write" : "read"); + str_write_read(write)); if (regmap_write(ctx->edac->rb_map, RBEIR, 0)) return; if (regmap_write(ctx->edac->rb_map, RBCSR, 0)) @@ -1560,7 +1561,7 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev) err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL); err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH); dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n", - REQTYPE_F2_RD(err_addr_hi) ? "read" : "write", + str_read_write(REQTYPE_F2_RD(err_addr_hi)), ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi); if (reg & WRERR_RESP_MASK) dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n", @@ -1611,7 +1612,7 @@ static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev) dev_err(edac_dev->dev, "%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n", reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "", - REQTYPE_RD(err_addr_hi) ? "read" : "write", + str_read_write(REQTYPE_RD(err_addr_hi)), ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi); writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS); @@ -1625,7 +1626,7 @@ static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev) dev_err(edac_dev->dev, "%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n", reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "", - REQTYPE_RD(err_addr_hi) ? "read" : "write", + str_read_write(REQTYPE_RD(err_addr_hi)), ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi); writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS); } From 12378e1c3ff8259d6269980dcfd0cbb46735ade7 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 1 Feb 2025 14:09:54 +0100 Subject: [PATCH 05/19] EDAC/amd64: Simplify return statement in dct_ecc_enabled() Simplify the return statement to improve the code's readability. No functional changes. Signed-off-by: Thorsten Blum Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20250201130953.1377-2-thorsten.blum@linux.dev --- drivers/edac/amd64_edac.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index d133a5be58905..90f0eb7cc5b9b 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3354,10 +3354,7 @@ static bool dct_ecc_enabled(struct amd64_pvt *pvt) edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, str_enabled_disabled(ecc_en)); - if (!ecc_en || !nb_mce_en) - return false; - else - return true; + return ecc_en && nb_mce_en; } static bool umc_ecc_enabled(struct amd64_pvt *pvt) From ac2fbe0948a551e9732b3c5ebf0a37281af68df2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 31 Jan 2025 21:27:02 +0100 Subject: [PATCH 06/19] EDAC/igen6: Constify struct res_config The res_config structs are not modified in this driver. Constifying these structures moves some data to a read-only section, so increase overall security, especially when the structure holds some function pointers. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 36777 2479 4304 43560 aa28 drivers/edac/igen6_edac.o After: ===== text data bss dec hex filename 37297 1959 4304 43560 aa28 drivers/edac/igen6_edac.o Signed-off-by: Christophe JAILLET Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/a06153870951a64b438e76adf97d440e02c1a1fc.1738355198.git.christophe.jaillet@wanadoo.fr --- drivers/edac/igen6_edac.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index fdf3a84fe6988..38e624209b0f4 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -125,7 +125,7 @@ #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) -static struct res_config { +static const struct res_config { bool machine_check; int num_imc; u32 imc_base; @@ -472,7 +472,7 @@ static u64 rpl_p_err_addr(u64 ecclog) return ECC_ERROR_LOG_ADDR45(ecclog); } -static struct res_config ehl_cfg = { +static const struct res_config ehl_cfg = { .num_imc = 1, .imc_base = 0x5000, .ibecc_base = 0xdc00, @@ -482,7 +482,7 @@ static struct res_config ehl_cfg = { .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, }; -static struct res_config icl_cfg = { +static const struct res_config icl_cfg = { .num_imc = 1, .imc_base = 0x5000, .ibecc_base = 0xd800, @@ -492,7 +492,7 @@ static struct res_config icl_cfg = { .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, }; -static struct res_config tgl_cfg = { +static const struct res_config tgl_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0x5000, @@ -506,7 +506,7 @@ static struct res_config tgl_cfg = { .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, }; -static struct res_config adl_cfg = { +static const struct res_config adl_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -517,7 +517,7 @@ static struct res_config adl_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static struct res_config adl_n_cfg = { +static const struct res_config adl_n_cfg = { .machine_check = true, .num_imc = 1, .imc_base = 0xd800, @@ -528,7 +528,7 @@ static struct res_config adl_n_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static struct res_config rpl_p_cfg = { +static const struct res_config rpl_p_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -540,7 +540,7 @@ static struct res_config rpl_p_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static struct res_config mtl_ps_cfg = { +static const struct res_config mtl_ps_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -551,7 +551,7 @@ static struct res_config mtl_ps_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static struct res_config mtl_p_cfg = { +static const struct res_config mtl_p_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -1374,7 +1374,7 @@ static void unregister_err_handler(void) unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); } -static void opstate_set(struct res_config *cfg, const struct pci_device_id *ent) +static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent) { /* * Quirk: Certain SoCs' error reporting interrupts don't work. From 136899ffc462b491f2b18db18fe7cd519d5cb6c2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 19 Sep 2024 18:04:27 +0100 Subject: [PATCH 07/19] EDAC/pnd2: Make read-only const array intlv static Don't populate the const read-only array intlv on the stack at run time, instead make it static. This also shrinks the object size: $ size pnd2_edac.o.* text data bss dec hex filename 15632 264 1384 17280 4380 pnd2_edac.o.new 15644 264 1384 17292 438c pnd2_edac.o.old Signed-off-by: Colin Ian King Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20240919170427.497429-1-colin.i.king@gmail.com --- drivers/edac/pnd2_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index f93f2f2b1cf25..af14c8a3279f7 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -372,7 +372,7 @@ static int gen_asym_mask(struct b_cr_slice_channel_hash *p, struct b_cr_asym_mem_region1_mchbar *as1, struct b_cr_asym_2way_mem_region_mchbar *as2way) { - const int intlv[] = { 0x5, 0xA, 0x3, 0xC }; + static const int intlv[] = { 0x5, 0xA, 0x3, 0xC }; int mask = 0; if (as2way->asym_2way_interleave_enable) @@ -489,7 +489,7 @@ static int dnv_get_registers(void) */ static int get_registers(void) { - const int intlv[] = { 10, 11, 12, 12 }; + static const int intlv[] = { 10, 11, 12, 12 }; if (RD_REG(&tolud, b_cr_tolud_pci) || RD_REG(&touud_lo, b_cr_touud_lo_pci) || From d59d844e319d97682c8de29b88d2d60922a683b3 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:01 +0800 Subject: [PATCH 08/19] EDAC/ie31200: Fix the size of EDAC_MC_LAYER_CHIP_SELECT layer The EDAC_MC_LAYER_CHIP_SELECT layer pertains to the rank, not the DIMM. Fix its size to reflect the number of ranks instead of the number of DIMMs. Also delete the unused macros IE31200_{DIMMS,RANKS}. Fixes: 7ee40b897d18 ("ie31200_edac: Introduce the driver") Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-2-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index c6188de13c003..10301e17014cc 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -94,8 +94,6 @@ (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \ PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK)) -#define IE31200_DIMMS 4 -#define IE31200_RANKS 8 #define IE31200_RANKS_PER_CHANNEL 4 #define IE31200_DIMMS_PER_CHANNEL 2 #define IE31200_CHANNELS 2 @@ -428,7 +426,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) nr_channels = how_many_channels(pdev); layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = IE31200_DIMMS; + layers[0].size = IE31200_RANKS_PER_CHANNEL; layers[0].is_virt_csrow = true; layers[1].type = EDAC_MC_LAYER_CHANNEL; layers[1].size = nr_channels; From 3427befbbca6b19fe0e37f91d66ce5221de70bf1 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:02 +0800 Subject: [PATCH 09/19] EDAC/ie31200: Fix the DIMM size mask for several SoCs The DIMM size mask for {Sky, Kaby, Coffee} Lake is not bits{7:0}, but bits{5:0}. Fix it. Fixes: 953dee9bbd24 ("EDAC, ie31200_edac: Add Skylake support") Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-3-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 10301e17014cc..2886866cb457a 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -165,6 +165,7 @@ #define IE31200_MAD_DIMM_0_OFFSET 0x5004 #define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C #define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0) +#define IE31200_MAD_DIMM_SIZE_SKL GENMASK_ULL(5, 0) #define IE31200_MAD_DIMM_A_RANK BIT(17) #define IE31200_MAD_DIMM_A_RANK_SHIFT 17 #define IE31200_MAD_DIMM_A_RANK_SKL BIT(10) @@ -378,7 +379,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan) { - dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE; + dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE_SKL; dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0; dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >> (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4))); From 231e341036d9988447e3b3345cf741a98139199e Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:03 +0800 Subject: [PATCH 10/19] EDAC/ie31200: Fix the error path order of ie31200_init() The error path order of ie31200_init() is incorrect, fix it. Fixes: 709ed1bcef12 ("EDAC/ie31200: Fallback if host bridge device is already initialized") Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-4-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 2886866cb457a..a8dd55ec52cea 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -619,7 +619,7 @@ static int __init ie31200_init(void) pci_rc = pci_register_driver(&ie31200_driver); if (pci_rc < 0) - goto fail0; + return pci_rc; if (!mci_pdev) { ie31200_registered = 0; @@ -630,11 +630,13 @@ static int __init ie31200_init(void) if (mci_pdev) break; } + if (!mci_pdev) { edac_dbg(0, "ie31200 pci_get_device fail\n"); pci_rc = -ENODEV; - goto fail1; + goto fail0; } + pci_rc = ie31200_init_one(mci_pdev, &ie31200_pci_tbl[i]); if (pci_rc < 0) { edac_dbg(0, "ie31200 init fail\n"); @@ -642,12 +644,12 @@ static int __init ie31200_init(void) goto fail1; } } - return 0; + return 0; fail1: - pci_unregister_driver(&ie31200_driver); -fail0: pci_dev_put(mci_pdev); +fail0: + pci_unregister_driver(&ie31200_driver); return pci_rc; } From 44eae52089ebcb6b22c63a2506bc610bb70fa927 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:04 +0800 Subject: [PATCH 11/19] EDAC/ie31200: Fix the 3rd parameter name of *populate_dimm_info() The 3rd parameter of *populate_dimm_info() pertains to the DIMM index within a channel, not the channel index. Fix the parameter name to dimm to reflect its actual purpose. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-5-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index a8dd55ec52cea..35f4e8e46ca20 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -377,29 +377,29 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) } static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int chan) + int dimm) { - dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE_SKL; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0; - dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >> - (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4))); + dd->size = (addr_decode >> (dimm << 4)) & IE31200_MAD_DIMM_SIZE_SKL; + dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (dimm << 4))) ? 1 : 0; + dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (dimm << 4))) >> + (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (dimm << 4))); } static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int chan) + int dimm) { - dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0; - dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0; + dd->size = (addr_decode >> (dimm << 3)) & IE31200_MAD_DIMM_SIZE; + dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << dimm)) ? 1 : 0; + dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << dimm)) ? 1 : 0; } -static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan, +static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, bool skl) { if (skl) - __skl_populate_dimm_info(dd, addr_decode, chan); + __skl_populate_dimm_info(dd, addr_decode, dimm); else - __populate_dimm_info(dd, addr_decode, chan); + __populate_dimm_info(dd, addr_decode, dimm); } From 312e67a03d8bb1745804936a4f6fdb69d64c3435 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:05 +0800 Subject: [PATCH 12/19] EDAC/ie31200: Simplify the pci_device_id table Use PCI_VDEVICE() to simplify the pci_device_id table. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-6-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 44 ++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 35f4e8e46ca20..4e1f85dc16798 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -576,28 +576,28 @@ static void ie31200_remove_one(struct pci_dev *pdev) } static const struct pci_device_id ie31200_pci_tbl[] = { - { PCI_VEND_DEV(INTEL, IE31200_HB_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_12), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_3), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_4), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_5), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_6), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_7), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_8), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_9), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_10), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_11), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_12), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), IE31200 }, { 0, } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl); From 2a52cce6486171a1adb43f8f17ad3f1d1c234c7c Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:06 +0800 Subject: [PATCH 13/19] EDAC/ie31200: Make the memory controller resources configurable The resources such as MMIO, register offset, register mask, memory DIMM information, ECC error log location, etc., of the memory controller, and the number of memory controllers can be device-ID-specific. It requires adding numerous 'if (device_id == new_id)' special handling cases to the code to support a new SoC. Make these kinds of resources configurable and separate them from the code to facilitate the addition of new SoC support. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-7-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 257 ++++++++++++++++-------------------- 1 file changed, 111 insertions(+), 146 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 4e1f85dc16798..71061ab5fa91e 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -101,25 +101,10 @@ /* Intel IE31200 register addresses - device 0 function 0 - DRAM Controller */ #define IE31200_MCHBAR_LOW 0x48 #define IE31200_MCHBAR_HIGH 0x4c -#define IE31200_MCHBAR_MASK GENMASK_ULL(38, 15) -#define IE31200_MMR_WINDOW_SIZE BIT(15) /* * Error Status Register (16b) * - * 15 reserved - * 14 Isochronous TBWRR Run Behind FIFO Full - * (ITCV) - * 13 Isochronous TBWRR Run Behind FIFO Put - * (ITSTV) - * 12 reserved - * 11 MCH Thermal Sensor Event - * for SMI/SCI/SERR (GTSE) - * 10 reserved - * 9 LOCK to non-DRAM Memory Flag (LCKF) - * 8 reserved - * 7 DRAM Throttle Flag (DTF) - * 6:2 reserved * 1 Multi-bit DRAM ECC Error Flag (DMERR) * 0 Single-bit DRAM ECC Error Flag (DSERR) */ @@ -128,65 +113,45 @@ #define IE31200_ERRSTS_CE BIT(0) #define IE31200_ERRSTS_BITS (IE31200_ERRSTS_UE | IE31200_ERRSTS_CE) -/* - * Channel 0 ECC Error Log (64b) - * - * 63:48 Error Column Address (ERRCOL) - * 47:32 Error Row Address (ERRROW) - * 31:29 Error Bank Address (ERRBANK) - * 28:27 Error Rank Address (ERRRANK) - * 26:24 reserved - * 23:16 Error Syndrome (ERRSYND) - * 15: 2 reserved - * 1 Multiple Bit Error Status (MERRSTS) - * 0 Correctable Error Status (CERRSTS) - */ - -#define IE31200_C0ECCERRLOG 0x40c8 -#define IE31200_C1ECCERRLOG 0x44c8 -#define IE31200_C0ECCERRLOG_SKL 0x4048 -#define IE31200_C1ECCERRLOG_SKL 0x4448 -#define IE31200_ECCERRLOG_CE BIT(0) -#define IE31200_ECCERRLOG_UE BIT(1) -#define IE31200_ECCERRLOG_RANK_BITS GENMASK_ULL(28, 27) -#define IE31200_ECCERRLOG_RANK_SHIFT 27 -#define IE31200_ECCERRLOG_SYNDROME_BITS GENMASK_ULL(23, 16) -#define IE31200_ECCERRLOG_SYNDROME_SHIFT 16 - -#define IE31200_ECCERRLOG_SYNDROME(log) \ - ((log & IE31200_ECCERRLOG_SYNDROME_BITS) >> \ - IE31200_ECCERRLOG_SYNDROME_SHIFT) - #define IE31200_CAPID0 0xe4 #define IE31200_CAPID0_PDCD BIT(4) #define IE31200_CAPID0_DDPCD BIT(6) #define IE31200_CAPID0_ECC BIT(1) -#define IE31200_MAD_DIMM_0_OFFSET 0x5004 -#define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C -#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0) -#define IE31200_MAD_DIMM_SIZE_SKL GENMASK_ULL(5, 0) -#define IE31200_MAD_DIMM_A_RANK BIT(17) -#define IE31200_MAD_DIMM_A_RANK_SHIFT 17 -#define IE31200_MAD_DIMM_A_RANK_SKL BIT(10) -#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT 10 -#define IE31200_MAD_DIMM_A_WIDTH BIT(19) -#define IE31200_MAD_DIMM_A_WIDTH_SHIFT 19 -#define IE31200_MAD_DIMM_A_WIDTH_SKL GENMASK_ULL(9, 8) -#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT 8 - /* Skylake reports 1GB increments, everything else is 256MB */ #define IE31200_PAGES(n, skl) \ (n << (28 + (2 * skl) - PAGE_SHIFT)) +/* Non-constant mask variant of FIELD_GET() */ +#define field_get(_mask, _reg) (((_reg) & (_mask)) >> (ffs(_mask) - 1)) + static int nr_channels; static struct pci_dev *mci_pdev; static int ie31200_registered = 1; +struct res_config { + enum mem_type mtype; + /* Host MMIO configuration register */ + u64 reg_mchbar_mask; + u64 reg_mchbar_window_size; + /* ECC error log register */ + u64 reg_eccerrlog_offset[IE31200_CHANNELS]; + u64 reg_eccerrlog_ce_mask; + u64 reg_eccerrlog_ue_mask; + u64 reg_eccerrlog_rank_mask; + u64 reg_eccerrlog_syndrome_mask; + /* DIMM characteristics register */ + u64 reg_mad_dimm_offset[IE31200_CHANNELS]; + u32 reg_mad_dimm_size_mask[IE31200_DIMMS_PER_CHANNEL]; + u32 reg_mad_dimm_rank_mask[IE31200_DIMMS_PER_CHANNEL]; + u32 reg_mad_dimm_width_mask[IE31200_DIMMS_PER_CHANNEL]; +}; + struct ie31200_priv { void __iomem *window; void __iomem *c0errlog; void __iomem *c1errlog; + struct res_config *cfg; }; enum ie31200_chips { @@ -250,12 +215,6 @@ static bool ecc_capable(struct pci_dev *pdev) return true; } -static int eccerrlog_row(u64 log) -{ - return ((log & IE31200_ECCERRLOG_RANK_BITS) >> - IE31200_ECCERRLOG_RANK_SHIFT); -} - static void ie31200_clear_error_info(struct mem_ctl_info *mci) { /* @@ -308,6 +267,8 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, static void ie31200_process_error_info(struct mem_ctl_info *mci, struct ie31200_error_info *info) { + struct ie31200_priv *priv = mci->pvt_info; + struct res_config *cfg = priv->cfg; int channel; u64 log; @@ -322,17 +283,17 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci, for (channel = 0; channel < nr_channels; channel++) { log = info->eccerrlog[channel]; - if (log & IE31200_ECCERRLOG_UE) { + if (log & cfg->reg_eccerrlog_ue_mask) { edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, - eccerrlog_row(log), + field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, "ie31200 UE", ""); - } else if (log & IE31200_ECCERRLOG_CE) { + } else if (log & cfg->reg_eccerrlog_ce_mask) { edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, - IE31200_ECCERRLOG_SYNDROME(log), - eccerrlog_row(log), + field_get(cfg->reg_eccerrlog_syndrome_mask, log), + field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, "ie31200 CE", ""); } @@ -347,7 +308,7 @@ static void ie31200_check(struct mem_ctl_info *mci) ie31200_process_error_info(mci, &info); } -static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) +static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg) { union { u64 mchbar; @@ -360,7 +321,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) pci_read_config_dword(pdev, IE31200_MCHBAR_LOW, &u.mchbar_low); pci_read_config_dword(pdev, IE31200_MCHBAR_HIGH, &u.mchbar_high); - u.mchbar &= IE31200_MCHBAR_MASK; + u.mchbar &= cfg->reg_mchbar_mask; if (u.mchbar != (resource_size_t)u.mchbar) { ie31200_printk(KERN_ERR, "mmio space beyond accessible range (0x%llx)\n", @@ -368,7 +329,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) return NULL; } - window = ioremap(u.mchbar, IE31200_MMR_WINDOW_SIZE); + window = ioremap(u.mchbar, cfg->reg_mchbar_window_size); if (!window) ie31200_printk(KERN_ERR, "Cannot map mmio space at 0x%llx\n", (unsigned long long)u.mchbar); @@ -376,41 +337,22 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) return window; } -static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int dimm) -{ - dd->size = (addr_decode >> (dimm << 4)) & IE31200_MAD_DIMM_SIZE_SKL; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (dimm << 4))) ? 1 : 0; - dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (dimm << 4))) >> - (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (dimm << 4))); -} - -static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int dimm) -{ - dd->size = (addr_decode >> (dimm << 3)) & IE31200_MAD_DIMM_SIZE; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << dimm)) ? 1 : 0; - dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << dimm)) ? 1 : 0; -} - static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, - bool skl) + struct res_config *cfg) { - if (skl) - __skl_populate_dimm_info(dd, addr_decode, dimm); - else - __populate_dimm_info(dd, addr_decode, dimm); + dd->size = field_get(cfg->reg_mad_dimm_size_mask[dimm], addr_decode); + dd->dual_rank = field_get(cfg->reg_mad_dimm_rank_mask[dimm], addr_decode); + dd->x16_width = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode); } - -static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) +static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) { int i, j, ret; struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; void __iomem *window; struct ie31200_priv *priv; - u32 addr_decode[IE31200_CHANNELS], mad_offset; + u32 addr_decode[IE31200_CHANNELS]; /* * Kaby Lake, Coffee Lake seem to work like Skylake. Please re-visit @@ -437,7 +379,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) if (!mci) return -ENOMEM; - window = ie31200_map_mchbar(pdev); + window = ie31200_map_mchbar(pdev, cfg); if (!window) { ret = -ENODEV; goto fail_free; @@ -445,32 +387,22 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) edac_dbg(3, "MC: init mci\n"); mci->pdev = &pdev->dev; - if (skl) - mci->mtype_cap = MEM_FLAG_DDR4; - else - mci->mtype_cap = MEM_FLAG_DDR3; + mci->mtype_cap = BIT(cfg->mtype); mci->edac_ctl_cap = EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = ie31200_devs[dev_idx].ctl_name; + mci->ctl_name = ie31200_devs[0].ctl_name; mci->dev_name = pci_name(pdev); mci->edac_check = ie31200_check; mci->ctl_page_to_phys = NULL; priv = mci->pvt_info; priv->window = window; - if (skl) { - priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL; - priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL; - mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL; - } else { - priv->c0errlog = window + IE31200_C0ECCERRLOG; - priv->c1errlog = window + IE31200_C1ECCERRLOG; - mad_offset = IE31200_MAD_DIMM_0_OFFSET; - } + priv->c0errlog = window + cfg->reg_eccerrlog_offset[0]; + priv->c1errlog = window + cfg->reg_eccerrlog_offset[1]; + priv->cfg = cfg; for (i = 0; i < IE31200_CHANNELS; i++) { - addr_decode[i] = readl(window + mad_offset + - (i * 4)); + addr_decode[i] = readl(window + cfg->reg_mad_dimm_offset[i]); edac_dbg(0, "addr_decode: 0x%x\n", addr_decode[i]); } @@ -486,8 +418,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) struct dimm_info *dimm; unsigned long nr_pages; - populate_dimm_info(&dimm_info, addr_decode[j], i, - skl); + populate_dimm_info(&dimm_info, addr_decode[j], i, cfg); edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n", dimm_info.size, dimm_info.dual_rank, @@ -503,10 +434,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* just a guess */ - if (skl) - dimm->mtype = MEM_DDR4; - else - dimm->mtype = MEM_DDR3; + dimm->mtype = cfg->mtype; dimm->dtype = DEV_UNKNOWN; dimm->edac_mode = EDAC_UNKNOWN; } @@ -514,10 +442,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* same guess */ - if (skl) - dimm->mtype = MEM_DDR4; - else - dimm->mtype = MEM_DDR3; + dimm->mtype = cfg->mtype; dimm->dtype = DEV_UNKNOWN; dimm->edac_mode = EDAC_UNKNOWN; } @@ -552,7 +477,7 @@ static int ie31200_init_one(struct pci_dev *pdev, edac_dbg(0, "MC:\n"); if (pci_enable_device(pdev) < 0) return -EIO; - rc = ie31200_probe1(pdev, ent->driver_data); + rc = ie31200_probe1(pdev, (struct res_config *)ent->driver_data); if (rc == 0 && !mci_pdev) mci_pdev = pci_dev_get(pdev); @@ -575,29 +500,69 @@ static void ie31200_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } +static struct res_config snb_cfg = { + .mtype = MEM_DDR3, + .reg_mchbar_mask = GENMASK_ULL(38, 15), + .reg_mchbar_window_size = BIT_ULL(15), + .reg_eccerrlog_offset[0] = 0x40c8, + .reg_eccerrlog_offset[1] = 0x44c8, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ue_mask = BIT_ULL(1), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_offset[0] = 0x5004, + .reg_mad_dimm_offset[1] = 0x5008, + .reg_mad_dimm_size_mask[0] = GENMASK(7, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(15, 8), + .reg_mad_dimm_rank_mask[0] = BIT(17), + .reg_mad_dimm_rank_mask[1] = BIT(18), + .reg_mad_dimm_width_mask[0] = BIT(19), + .reg_mad_dimm_width_mask[1] = BIT(20), +}; + +static struct res_config skl_cfg = { + .mtype = MEM_DDR4, + .reg_mchbar_mask = GENMASK_ULL(38, 15), + .reg_mchbar_window_size = BIT_ULL(15), + .reg_eccerrlog_offset[0] = 0x4048, + .reg_eccerrlog_offset[1] = 0x4448, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ue_mask = BIT_ULL(1), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_offset[0] = 0x500c, + .reg_mad_dimm_offset[1] = 0x5010, + .reg_mad_dimm_size_mask[0] = GENMASK(5, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(21, 16), + .reg_mad_dimm_rank_mask[0] = BIT(10), + .reg_mad_dimm_rank_mask[1] = BIT(26), + .reg_mad_dimm_width_mask[0] = GENMASK(9, 8), + .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), +}; + static const struct pci_device_id ie31200_pci_tbl[] = { - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_3), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_4), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_5), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_6), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_7), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_8), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_9), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_10), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_11), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_12), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_3), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_4), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_5), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_6), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_7), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_8), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_9), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_10), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_11), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_12), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), (kernel_ulong_t)&skl_cfg }, { 0, } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl); From afdbc36555567a622843c5330174260f7ea954fc Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:07 +0800 Subject: [PATCH 14/19] EDAC/ie31200: Make struct dimm_data contain decoded information The current dimm_data structure contains encoded DIMM information, which needs to be decoded for a given SoC when it is used. Make it contain decoded information when it's initialized so that the places where it is used do not need to decode it again, thereby simplifying the code. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-8-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 62 ++++++++++++------------------------- 1 file changed, 19 insertions(+), 43 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 71061ab5fa91e..865a2f838317f 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -84,16 +84,6 @@ #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9 0x3ec6 #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10 0x3eca -/* Test if HB is for Skylake or later. */ -#define DEVICE_ID_SKYLAKE_OR_LATER(did) \ - (((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_8) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_12) || \ - (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \ - PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK)) - #define IE31200_RANKS_PER_CHANNEL 4 #define IE31200_DIMMS_PER_CHANNEL 2 #define IE31200_CHANNELS 2 @@ -118,10 +108,6 @@ #define IE31200_CAPID0_DDPCD BIT(6) #define IE31200_CAPID0_ECC BIT(1) -/* Skylake reports 1GB increments, everything else is 256MB */ -#define IE31200_PAGES(n, skl) \ - (n << (28 + (2 * skl) - PAGE_SHIFT)) - /* Non-constant mask variant of FIELD_GET() */ #define field_get(_mask, _reg) (((_reg) & (_mask)) >> (ffs(_mask) - 1)) @@ -141,6 +127,7 @@ struct res_config { u64 reg_eccerrlog_rank_mask; u64 reg_eccerrlog_syndrome_mask; /* DIMM characteristics register */ + u64 reg_mad_dimm_size_granularity; u64 reg_mad_dimm_offset[IE31200_CHANNELS]; u32 reg_mad_dimm_size_mask[IE31200_DIMMS_PER_CHANNEL]; u32 reg_mad_dimm_rank_mask[IE31200_DIMMS_PER_CHANNEL]; @@ -175,9 +162,9 @@ static const struct ie31200_dev_info ie31200_devs[] = { }; struct dimm_data { - u8 size; /* in multiples of 256MB, except Skylake is 1GB */ - u8 dual_rank : 1, - x16_width : 2; /* 0 means x8 width */ + u64 size; /* in bytes */ + u8 ranks; + enum dev_type dtype; }; static int how_many_channels(struct pci_dev *pdev) @@ -340,26 +327,20 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, struct res_config *cfg) { - dd->size = field_get(cfg->reg_mad_dimm_size_mask[dimm], addr_decode); - dd->dual_rank = field_get(cfg->reg_mad_dimm_rank_mask[dimm], addr_decode); - dd->x16_width = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode); + dd->size = field_get(cfg->reg_mad_dimm_size_mask[dimm], addr_decode) * cfg->reg_mad_dimm_size_granularity; + dd->ranks = field_get(cfg->reg_mad_dimm_rank_mask[dimm], addr_decode) + 1; + dd->dtype = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode) + DEV_X8; } static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) { - int i, j, ret; + int i, j, k, ret; struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; void __iomem *window; struct ie31200_priv *priv; u32 addr_decode[IE31200_CHANNELS]; - /* - * Kaby Lake, Coffee Lake seem to work like Skylake. Please re-visit - * this logic when adding new CPU support. - */ - bool skl = DEVICE_ID_SKYLAKE_OR_LATER(pdev->device); - edac_dbg(0, "MC:\n"); if (!ecc_capable(pdev)) { @@ -419,32 +400,25 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) unsigned long nr_pages; populate_dimm_info(&dimm_info, addr_decode[j], i, cfg); - edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n", - dimm_info.size, - dimm_info.dual_rank, - dimm_info.x16_width); + edac_dbg(0, "channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", + j, i, dimm_info.size >> 20, + dimm_info.ranks, + dimm_info.dtype); - nr_pages = IE31200_PAGES(dimm_info.size, skl); + nr_pages = MiB_TO_PAGES(dimm_info.size >> 20); if (nr_pages == 0) continue; - if (dimm_info.dual_rank) { - nr_pages = nr_pages / 2; - dimm = edac_get_dimm(mci, (i * 2) + 1, j, 0); + nr_pages = nr_pages / dimm_info.ranks; + for (k = 0; k < dimm_info.ranks; k++) { + dimm = edac_get_dimm(mci, (i * dimm_info.ranks) + k, j, 0); dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* just a guess */ dimm->mtype = cfg->mtype; - dimm->dtype = DEV_UNKNOWN; + dimm->dtype = dimm_info.dtype; dimm->edac_mode = EDAC_UNKNOWN; } - dimm = edac_get_dimm(mci, i * 2, j, 0); - dimm->nr_pages = nr_pages; - edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); - dimm->grain = 8; /* same guess */ - dimm->mtype = cfg->mtype; - dimm->dtype = DEV_UNKNOWN; - dimm->edac_mode = EDAC_UNKNOWN; } } @@ -510,6 +484,7 @@ static struct res_config snb_cfg = { .reg_eccerrlog_ue_mask = BIT_ULL(1), .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_size_granularity = BIT_ULL(28), .reg_mad_dimm_offset[0] = 0x5004, .reg_mad_dimm_offset[1] = 0x5008, .reg_mad_dimm_size_mask[0] = GENMASK(7, 0), @@ -530,6 +505,7 @@ static struct res_config skl_cfg = { .reg_eccerrlog_ue_mask = BIT_ULL(1), .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_size_granularity = BIT_ULL(30), .reg_mad_dimm_offset[0] = 0x500c, .reg_mad_dimm_offset[1] = 0x5010, .reg_mad_dimm_size_mask[0] = GENMASK(5, 0), From a217961b83ae845fe7247255f0b8a01e3b2ed8a2 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:08 +0800 Subject: [PATCH 15/19] EDAC/ie31200: Fold the two channel loops into one loop Fold the two channel loops to simplify the code and improve readability. Also, delete the comments related to the DRB register, as this register is not used here. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-9-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 865a2f838317f..01d719845a88d 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -339,7 +339,7 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) struct edac_mc_layer layers[2]; void __iomem *window; struct ie31200_priv *priv; - u32 addr_decode[IE31200_CHANNELS]; + u32 addr_decode; edac_dbg(0, "MC:\n"); @@ -383,25 +383,17 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) priv->cfg = cfg; for (i = 0; i < IE31200_CHANNELS; i++) { - addr_decode[i] = readl(window + cfg->reg_mad_dimm_offset[i]); - edac_dbg(0, "addr_decode: 0x%x\n", addr_decode[i]); - } + addr_decode = readl(window + cfg->reg_mad_dimm_offset[i]); + edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); - /* - * The dram rank boundary (DRB) reg values are boundary addresses - * for each DRAM rank with a granularity of 64MB. DRB regs are - * cumulative; the last one will contain the total memory - * contained in all ranks. - */ - for (i = 0; i < IE31200_DIMMS_PER_CHANNEL; i++) { - for (j = 0; j < IE31200_CHANNELS; j++) { + for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { struct dimm_data dimm_info; struct dimm_info *dimm; unsigned long nr_pages; - populate_dimm_info(&dimm_info, addr_decode[j], i, cfg); + populate_dimm_info(&dimm_info, addr_decode, j, cfg); edac_dbg(0, "channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", - j, i, dimm_info.size >> 20, + i, j, dimm_info.size >> 20, dimm_info.ranks, dimm_info.dtype); @@ -411,7 +403,7 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) nr_pages = nr_pages / dimm_info.ranks; for (k = 0; k < dimm_info.ranks; k++) { - dimm = edac_get_dimm(mci, (i * dimm_info.ranks) + k, j, 0); + dimm = edac_get_dimm(mci, (j * dimm_info.ranks) + k, i, 0); dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* just a guess */ From 498550e1fa7c1cb52952433d64f9230c247c7c00 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:09 +0800 Subject: [PATCH 16/19] EDAC/ie31200: Break up ie31200_probe1() Split ie31200_probe1() into two helper functions to easily extend support for multiple memory controllers. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-10-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 108 ++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 47 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 01d719845a88d..70be0d00a188a 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -332,21 +332,51 @@ static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, dd->dtype = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode) + DEV_X8; } -static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) +static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *window, + struct res_config *cfg) { - int i, j, k, ret; - struct mem_ctl_info *mci = NULL; - struct edac_mc_layer layers[2]; - void __iomem *window; - struct ie31200_priv *priv; + struct dimm_data dimm_info; + struct dimm_info *dimm; + unsigned long nr_pages; u32 addr_decode; + int i, j, k; - edac_dbg(0, "MC:\n"); + for (i = 0; i < IE31200_CHANNELS; i++) { + addr_decode = readl(window + cfg->reg_mad_dimm_offset[i]); + edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); - if (!ecc_capable(pdev)) { - ie31200_printk(KERN_INFO, "No ECC support\n"); - return -ENODEV; + for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { + populate_dimm_info(&dimm_info, addr_decode, j, cfg); + edac_dbg(0, "channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", + i, j, dimm_info.size >> 20, + dimm_info.ranks, + dimm_info.dtype); + + nr_pages = MiB_TO_PAGES(dimm_info.size >> 20); + if (nr_pages == 0) + continue; + + nr_pages = nr_pages / dimm_info.ranks; + for (k = 0; k < dimm_info.ranks; k++) { + dimm = edac_get_dimm(mci, (j * dimm_info.ranks) + k, i, 0); + dimm->nr_pages = nr_pages; + edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); + dimm->grain = 8; /* just a guess */ + dimm->mtype = cfg->mtype; + dimm->dtype = dimm_info.dtype; + dimm->edac_mode = EDAC_UNKNOWN; + } + } } +} + +static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg) +{ + struct edac_mc_layer layers[2]; + struct ie31200_priv *priv; + struct mem_ctl_info *mci; + void __iomem *window; + int ret; nr_channels = how_many_channels(pdev); layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; @@ -382,38 +412,7 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) priv->c1errlog = window + cfg->reg_eccerrlog_offset[1]; priv->cfg = cfg; - for (i = 0; i < IE31200_CHANNELS; i++) { - addr_decode = readl(window + cfg->reg_mad_dimm_offset[i]); - edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); - - for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { - struct dimm_data dimm_info; - struct dimm_info *dimm; - unsigned long nr_pages; - - populate_dimm_info(&dimm_info, addr_decode, j, cfg); - edac_dbg(0, "channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", - i, j, dimm_info.size >> 20, - dimm_info.ranks, - dimm_info.dtype); - - nr_pages = MiB_TO_PAGES(dimm_info.size >> 20); - if (nr_pages == 0) - continue; - - nr_pages = nr_pages / dimm_info.ranks; - for (k = 0; k < dimm_info.ranks; k++) { - dimm = edac_get_dimm(mci, (j * dimm_info.ranks) + k, i, 0); - dimm->nr_pages = nr_pages; - edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); - dimm->grain = 8; /* just a guess */ - dimm->mtype = cfg->mtype; - dimm->dtype = dimm_info.dtype; - dimm->edac_mode = EDAC_UNKNOWN; - } - } - } - + ie31200_get_dimm_config(mci, window, cfg); ie31200_clear_error_info(mci); if (edac_mc_add_mc(mci)) { @@ -422,19 +421,34 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) goto fail_unmap; } - /* get this far and it's successful */ - edac_dbg(3, "MC: success\n"); return 0; - fail_unmap: iounmap(window); - fail_free: edac_mc_free(mci); - return ret; } +static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) +{ + int ret; + + edac_dbg(0, "MC:\n"); + + if (!ecc_capable(pdev)) { + ie31200_printk(KERN_INFO, "No ECC support\n"); + return -ENODEV; + } + + ret = ie31200_register_mci(pdev, cfg); + if (ret) + return ret; + + /* get this far and it's successful. */ + edac_dbg(3, "MC: success\n"); + return 0; +} + static int ie31200_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { From d0742284ec6da1435acdda428088136944c4c3c2 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:10 +0800 Subject: [PATCH 17/19] EDAC/ie31200: Add Intel Raptor Lake-S SoCs support The Intel Raptor Lake-S SoC contains two memory controllers with DDR5 memory type and out-of-band ECC capability. The resource definitions of the memory controller are different from previous generations. One notable difference is that the PCI ERRSTS register is deprecated and is not used to indicate the presence of errors or to clear the MMIO-mapped ECC error log regsiters. Extend the ie31200_edac driver to support multiple memory controllers, add a resource configuration table and use an MSR register to clear the ECC error log registers to provide EDAC support for Raptor Lake-S SoCs. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-11-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 182 +++++++++++++++++++++++++++++------- 1 file changed, 149 insertions(+), 33 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 70be0d00a188a..8c0a2beec5370 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -84,9 +84,15 @@ #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9 0x3ec6 #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10 0x3eca -#define IE31200_RANKS_PER_CHANNEL 4 +/* Raptor Lake-S */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1 0xa703 +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2 0x4640 +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3 0x4630 + +#define IE31200_RANKS_PER_CHANNEL 8 #define IE31200_DIMMS_PER_CHANNEL 2 #define IE31200_CHANNELS 2 +#define IE31200_IMC_NUM 2 /* Intel IE31200 register addresses - device 0 function 0 - DRAM Controller */ #define IE31200_MCHBAR_LOW 0x48 @@ -117,15 +123,20 @@ static int ie31200_registered = 1; struct res_config { enum mem_type mtype; + int imc_num; /* Host MMIO configuration register */ u64 reg_mchbar_mask; u64 reg_mchbar_window_size; /* ECC error log register */ u64 reg_eccerrlog_offset[IE31200_CHANNELS]; u64 reg_eccerrlog_ce_mask; + u64 reg_eccerrlog_ce_ovfl_mask; u64 reg_eccerrlog_ue_mask; + u64 reg_eccerrlog_ue_ovfl_mask; u64 reg_eccerrlog_rank_mask; u64 reg_eccerrlog_syndrome_mask; + /* MSR to clear ECC error log register */ + u32 msr_clear_eccerrlog_offset; /* DIMM characteristics register */ u64 reg_mad_dimm_size_granularity; u64 reg_mad_dimm_offset[IE31200_CHANNELS]; @@ -139,10 +150,18 @@ struct ie31200_priv { void __iomem *c0errlog; void __iomem *c1errlog; struct res_config *cfg; + struct mem_ctl_info *mci; + struct pci_dev *pdev; + struct device dev; }; +static struct ie31200_pvt { + struct ie31200_priv *priv[IE31200_IMC_NUM]; +} ie31200_pvt; + enum ie31200_chips { IE31200 = 0, + IE31200_1 = 1, }; struct ie31200_dev_info { @@ -159,6 +178,9 @@ static const struct ie31200_dev_info ie31200_devs[] = { [IE31200] = { .ctl_name = "IE31200" }, + [IE31200_1] = { + .ctl_name = "IE31200_1" + }, }; struct dimm_data { @@ -202,23 +224,54 @@ static bool ecc_capable(struct pci_dev *pdev) return true; } +#define mci_to_pci_dev(mci) (((struct ie31200_priv *)(mci)->pvt_info)->pdev) + static void ie31200_clear_error_info(struct mem_ctl_info *mci) { + struct ie31200_priv *priv = mci->pvt_info; + struct res_config *cfg = priv->cfg; + + /* + * The PCI ERRSTS register is deprecated. Write the MSR to clear + * the ECC error log registers in all memory controllers. + */ + if (cfg->msr_clear_eccerrlog_offset) { + if (wrmsr_safe(cfg->msr_clear_eccerrlog_offset, + cfg->reg_eccerrlog_ce_mask | + cfg->reg_eccerrlog_ce_ovfl_mask | + cfg->reg_eccerrlog_ue_mask | + cfg->reg_eccerrlog_ue_ovfl_mask, 0) < 0) + ie31200_printk(KERN_ERR, "Failed to wrmsr.\n"); + + return; + } + /* * Clear any error bits. * (Yes, we really clear bits by writing 1 to them.) */ - pci_write_bits16(to_pci_dev(mci->pdev), IE31200_ERRSTS, + pci_write_bits16(mci_to_pci_dev(mci), IE31200_ERRSTS, IE31200_ERRSTS_BITS, IE31200_ERRSTS_BITS); } static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, struct ie31200_error_info *info) { - struct pci_dev *pdev; + struct pci_dev *pdev = mci_to_pci_dev(mci); struct ie31200_priv *priv = mci->pvt_info; - pdev = to_pci_dev(mci->pdev); + /* + * The PCI ERRSTS register is deprecated, directly read the + * MMIO-mapped ECC error log registers. + */ + if (priv->cfg->msr_clear_eccerrlog_offset) { + info->eccerrlog[0] = lo_hi_readq(priv->c0errlog); + if (nr_channels == 2) + info->eccerrlog[1] = lo_hi_readq(priv->c1errlog); + + ie31200_clear_error_info(mci); + return; + } /* * This is a mess because there is no atomic way to read all the @@ -259,13 +312,15 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci, int channel; u64 log; - if (!(info->errsts & IE31200_ERRSTS_BITS)) - return; + if (!cfg->msr_clear_eccerrlog_offset) { + if (!(info->errsts & IE31200_ERRSTS_BITS)) + return; - if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, - -1, -1, -1, "UE overwrote CE", ""); - info->errsts = info->errsts2; + if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, "UE overwrote CE", ""); + info->errsts = info->errsts2; + } } for (channel = 0; channel < nr_channels; channel++) { @@ -295,7 +350,7 @@ static void ie31200_check(struct mem_ctl_info *mci) ie31200_process_error_info(mci, &info); } -static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg) +static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg, int mc) { union { u64 mchbar; @@ -309,6 +364,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config pci_read_config_dword(pdev, IE31200_MCHBAR_LOW, &u.mchbar_low); pci_read_config_dword(pdev, IE31200_MCHBAR_HIGH, &u.mchbar_high); u.mchbar &= cfg->reg_mchbar_mask; + u.mchbar += cfg->reg_mchbar_window_size * mc; if (u.mchbar != (resource_size_t)u.mchbar) { ie31200_printk(KERN_ERR, "mmio space beyond accessible range (0x%llx)\n", @@ -333,7 +389,7 @@ static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, } static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *window, - struct res_config *cfg) + struct res_config *cfg, int mc) { struct dimm_data dimm_info; struct dimm_info *dimm; @@ -347,8 +403,8 @@ static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *wind for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { populate_dimm_info(&dimm_info, addr_decode, j, cfg); - edac_dbg(0, "channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", - i, j, dimm_info.size >> 20, + edac_dbg(0, "mc: %d, channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", + mc, i, j, dimm_info.size >> 20, dimm_info.ranks, dimm_info.dtype); @@ -370,7 +426,7 @@ static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *wind } } -static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg) +static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, int mc) { struct edac_mc_layer layers[2]; struct ie31200_priv *priv; @@ -385,24 +441,23 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg) layers[1].type = EDAC_MC_LAYER_CHANNEL; layers[1].size = nr_channels; layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct ie31200_priv)); if (!mci) return -ENOMEM; - window = ie31200_map_mchbar(pdev, cfg); + window = ie31200_map_mchbar(pdev, cfg, mc); if (!window) { ret = -ENODEV; goto fail_free; } edac_dbg(3, "MC: init mci\n"); - mci->pdev = &pdev->dev; mci->mtype_cap = BIT(cfg->mtype); mci->edac_ctl_cap = EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = ie31200_devs[0].ctl_name; + mci->ctl_name = ie31200_devs[mc].ctl_name; mci->dev_name = pci_name(pdev); mci->edac_check = ie31200_check; mci->ctl_page_to_phys = NULL; @@ -411,8 +466,22 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg) priv->c0errlog = window + cfg->reg_eccerrlog_offset[0]; priv->c1errlog = window + cfg->reg_eccerrlog_offset[1]; priv->cfg = cfg; + priv->mci = mci; + priv->pdev = pdev; + device_initialize(&priv->dev); + /* + * The EDAC core uses mci->pdev (pointer to the structure device) + * as the memory controller ID. The SoCs attach one or more memory + * controllers to a single pci_dev (a single pci_dev->dev can + * correspond to multiple memory controllers). + * + * To make mci->pdev unique, assign pci_dev->dev to mci->pdev + * for the first memory controller and assign a unique priv->dev + * to mci->pdev for each additional memory controller. + */ + mci->pdev = mc ? &priv->dev : &pdev->dev; - ie31200_get_dimm_config(mci, window, cfg); + ie31200_get_dimm_config(mci, window, cfg, mc); ie31200_clear_error_info(mci); if (edac_mc_add_mc(mci)) { @@ -421,6 +490,7 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg) goto fail_unmap; } + ie31200_pvt.priv[mc] = priv; return 0; fail_unmap: iounmap(window); @@ -429,9 +499,27 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg) return ret; } +static void ie31200_unregister_mcis(void) +{ + struct ie31200_priv *priv; + struct mem_ctl_info *mci; + int i; + + for (i = 0; i < IE31200_IMC_NUM; i++) { + priv = ie31200_pvt.priv[i]; + if (!priv) + continue; + + mci = priv->mci; + edac_mc_del_mc(mci->pdev); + iounmap(priv->window); + edac_mc_free(mci); + } +} + static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) { - int ret; + int i, ret; edac_dbg(0, "MC:\n"); @@ -440,13 +528,19 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) return -ENODEV; } - ret = ie31200_register_mci(pdev, cfg); - if (ret) - return ret; + for (i = 0; i < cfg->imc_num; i++) { + ret = ie31200_register_mci(pdev, cfg, i); + if (ret) + goto fail_register; + } /* get this far and it's successful. */ edac_dbg(3, "MC: success\n"); return 0; + +fail_register: + ie31200_unregister_mcis(); + return ret; } static int ie31200_init_one(struct pci_dev *pdev, @@ -466,22 +560,15 @@ static int ie31200_init_one(struct pci_dev *pdev, static void ie31200_remove_one(struct pci_dev *pdev) { - struct mem_ctl_info *mci; - struct ie31200_priv *priv; - edac_dbg(0, "\n"); pci_dev_put(mci_pdev); mci_pdev = NULL; - mci = edac_mc_del_mc(&pdev->dev); - if (!mci) - return; - priv = mci->pvt_info; - iounmap(priv->window); - edac_mc_free(mci); + ie31200_unregister_mcis(); } static struct res_config snb_cfg = { .mtype = MEM_DDR3, + .imc_num = 1, .reg_mchbar_mask = GENMASK_ULL(38, 15), .reg_mchbar_window_size = BIT_ULL(15), .reg_eccerrlog_offset[0] = 0x40c8, @@ -503,6 +590,7 @@ static struct res_config snb_cfg = { static struct res_config skl_cfg = { .mtype = MEM_DDR4, + .imc_num = 1, .reg_mchbar_mask = GENMASK_ULL(38, 15), .reg_mchbar_window_size = BIT_ULL(15), .reg_eccerrlog_offset[0] = 0x4048, @@ -522,6 +610,31 @@ static struct res_config skl_cfg = { .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), }; +struct res_config rpl_s_cfg = { + .mtype = MEM_DDR5, + .imc_num = 2, + .reg_mchbar_mask = GENMASK_ULL(41, 17), + .reg_mchbar_window_size = BIT_ULL(16), + .reg_eccerrlog_offset[0] = 0xe048, + .reg_eccerrlog_offset[1] = 0xe848, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ce_ovfl_mask = BIT_ULL(1), + .reg_eccerrlog_ue_mask = BIT_ULL(2), + .reg_eccerrlog_ue_ovfl_mask = BIT_ULL(3), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .msr_clear_eccerrlog_offset = 0x791, + .reg_mad_dimm_offset[0] = 0xd80c, + .reg_mad_dimm_offset[1] = 0xd810, + .reg_mad_dimm_size_granularity = BIT_ULL(29), + .reg_mad_dimm_size_mask[0] = GENMASK(6, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(22, 16), + .reg_mad_dimm_rank_mask[0] = GENMASK(10, 9), + .reg_mad_dimm_rank_mask[1] = GENMASK(27, 26), + .reg_mad_dimm_width_mask[0] = GENMASK(8, 7), + .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), +}; + static const struct pci_device_id ie31200_pci_tbl[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), (kernel_ulong_t)&snb_cfg }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), (kernel_ulong_t)&snb_cfg }, @@ -545,6 +658,9 @@ static const struct pci_device_id ie31200_pci_tbl[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), (kernel_ulong_t)&skl_cfg }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), (kernel_ulong_t)&skl_cfg }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3), (kernel_ulong_t)&rpl_s_cfg}, { 0, } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl); From a5db1b296b181c7ced38252d2ff40e3cf87a12df Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:11 +0800 Subject: [PATCH 18/19] EDAC/ie31200: Switch Raptor Lake-S to interrupt mode Raptor Lake-S SoCs notify correctable memory errors via CMCI (Corrected Machine Check Interrupt). Switch Raptor Lake-S EDAC support from polling to interrupt mode by registering the callback to the MCE decode notifier chain. Note that as Raptor Lake-S SoCs may not recover from uncorrectable memory errors, the system will hang as soon as this type of error occurs, and the registered callback on the MCE decode chain will not be executed. This is the expected behavior. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-12-qiuxu.zhuo@intel.com --- drivers/edac/Kconfig | 2 +- drivers/edac/ie31200_edac.c | 83 ++++++++++++++++++++++++++++++++++--- 2 files changed, 78 insertions(+), 7 deletions(-) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 2051a7c944a58..b908a118fb322 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -168,7 +168,7 @@ config EDAC_I3200 config EDAC_IE31200 tristate "Intel e312xx" - depends on PCI && X86 + depends on PCI && X86 && X86_MCE_INTEL help Support for error detection and correction on the Intel E3-1200 based DRAM controllers. diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 8c0a2beec5370..2048341495798 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -51,6 +51,7 @@ #include #include +#include #include "edac_module.h" #define EDAC_MOD_STR "ie31200_edac" @@ -123,6 +124,7 @@ static int ie31200_registered = 1; struct res_config { enum mem_type mtype; + bool cmci; int imc_num; /* Host MMIO configuration register */ u64 reg_mchbar_mask; @@ -172,6 +174,7 @@ struct ie31200_error_info { u16 errsts; u16 errsts2; u64 eccerrlog[IE31200_CHANNELS]; + u64 erraddr; }; static const struct ie31200_dev_info ie31200_devs[] = { @@ -327,13 +330,13 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci, log = info->eccerrlog[channel]; if (log & cfg->reg_eccerrlog_ue_mask) { edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - 0, 0, 0, + info->erraddr >> PAGE_SHIFT, 0, 0, field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, "ie31200 UE", ""); } else if (log & cfg->reg_eccerrlog_ce_mask) { edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - 0, 0, + info->erraddr >> PAGE_SHIFT, 0, field_get(cfg->reg_eccerrlog_syndrome_mask, log), field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, @@ -342,14 +345,20 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci, } } -static void ie31200_check(struct mem_ctl_info *mci) +static void __ie31200_check(struct mem_ctl_info *mci, struct mce *mce) { struct ie31200_error_info info; + info.erraddr = mce ? mce->addr : 0; ie31200_get_and_clear_error_info(mci, &info); ie31200_process_error_info(mci, &info); } +static void ie31200_check(struct mem_ctl_info *mci) +{ + __ie31200_check(mci, NULL); +} + static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg, int mc) { union { @@ -459,7 +468,7 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, in mci->mod_name = EDAC_MOD_STR; mci->ctl_name = ie31200_devs[mc].ctl_name; mci->dev_name = pci_name(pdev); - mci->edac_check = ie31200_check; + mci->edac_check = cfg->cmci ? NULL : ie31200_check; mci->ctl_page_to_phys = NULL; priv = mci->pvt_info; priv->window = window; @@ -499,6 +508,58 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, in return ret; } +static void mce_check(struct mce *mce) +{ + struct ie31200_priv *priv; + int i; + + for (i = 0; i < IE31200_IMC_NUM; i++) { + priv = ie31200_pvt.priv[i]; + if (!priv) + continue; + + __ie31200_check(priv->mci, mce); + } +} + +static int mce_handler(struct notifier_block *nb, unsigned long val, void *data) +{ + struct mce *mce = (struct mce *)data; + char *type; + + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + + /* + * Ignore unless this is a memory related error. + * Don't check MCI_STATUS_ADDRV since it's not set on some CPUs. + */ + if ((mce->status & 0xefff) >> 7 != 1) + return NOTIFY_DONE; + + type = mce->mcgstatus & MCG_STATUS_MCIP ? "Exception" : "Event"; + + edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n", + mce->extcpu, type, mce->mcgstatus, + mce->bank, mce->status); + edac_dbg(0, "TSC 0x%llx\n", mce->tsc); + edac_dbg(0, "ADDR 0x%llx\n", mce->addr); + edac_dbg(0, "MISC 0x%llx\n", mce->misc); + edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n", + mce->cpuvendor, mce->cpuid, mce->time, + mce->socketid, mce->apicid); + + mce_check(mce); + mce->kflags |= MCE_HANDLED_EDAC; + + return NOTIFY_DONE; +} + +static struct notifier_block ie31200_mce_dec = { + .notifier_call = mce_handler, + .priority = MCE_PRIO_EDAC, +}; + static void ie31200_unregister_mcis(void) { struct ie31200_priv *priv; @@ -534,6 +595,13 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) goto fail_register; } + if (cfg->cmci) { + mce_register_decode_chain(&ie31200_mce_dec); + edac_op_state = EDAC_OPSTATE_INT; + } else { + edac_op_state = EDAC_OPSTATE_POLL; + } + /* get this far and it's successful. */ edac_dbg(3, "MC: success\n"); return 0; @@ -560,9 +628,13 @@ static int ie31200_init_one(struct pci_dev *pdev, static void ie31200_remove_one(struct pci_dev *pdev) { + struct ie31200_priv *priv = ie31200_pvt.priv[0]; + edac_dbg(0, "\n"); pci_dev_put(mci_pdev); mci_pdev = NULL; + if (priv->cfg->cmci) + mce_unregister_decode_chain(&ie31200_mce_dec); ie31200_unregister_mcis(); } @@ -612,6 +684,7 @@ static struct res_config skl_cfg = { struct res_config rpl_s_cfg = { .mtype = MEM_DDR5, + .cmci = true, .imc_num = 2, .reg_mchbar_mask = GENMASK_ULL(41, 17), .reg_mchbar_window_size = BIT_ULL(16), @@ -677,8 +750,6 @@ static int __init ie31200_init(void) int pci_rc, i; edac_dbg(3, "MC:\n"); - /* Ensure that the OPSTATE is set correctly for POLL or NMI */ - opstate_init(); pci_rc = pci_register_driver(&ie31200_driver); if (pci_rc < 0) From f30dab9d888f60949e7e721c278c7c232eed3835 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Wed, 19 Mar 2025 14:16:30 -0400 Subject: [PATCH 19/19] MAINTAINERS: Add a secondary maintainer for bluefield_edac Add David as a secondary maintainer. [ bp: Rewrite commit message. ] Signed-off-by: David Thompson Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20250319181630.2673-1-davthompson@nvidia.com --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index efee40ea589f7..570411b580ad1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8214,6 +8214,7 @@ F: drivers/edac/aspeed_edac.c EDAC-BLUEFIELD M: Shravan Kumar Ramani +M: David Thompson S: Supported F: drivers/edac/bluefield_edac.c