From c29dfd661fe2f8d1b48c7f00590929c04b25bf40 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Jan 2025 07:50:26 +0100 Subject: [PATCH 01/25] EDAC/ie31200: work around false positive build warning gcc-14 produces a bogus warning in some configurations: drivers/edac/ie31200_edac.c: In function 'ie31200_probe1.isra': drivers/edac/ie31200_edac.c:412:26: error: 'dimm_info' is used uninitialized [-Werror=uninitialized] 412 | struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL]; | ^~~~~~~~~ drivers/edac/ie31200_edac.c:412:26: note: 'dimm_info' declared here 412 | struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL]; | ^~~~~~~~~ I don't see any way the unintialized access could really happen here, but I can see why the compiler gets confused by the two loops. Instead, rework the two nested loops to only read the addr_decode registers and then keep only one instance of the dimm info structure. [Tony: Qiuxu pointed out that the "populate DIMM info" comment was left behind in the refactor and suggested moving it. I deleted the comment as unnecessry in front os a call to populate_dimm_info(). That seems pretty self-describing.] Signed-off-by: Arnd Bergmann Acked-by: Jason Baron Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20250122065031.1321015-1-arnd@kernel.org --- drivers/edac/ie31200_edac.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 4fc16922dc1af..c6188de13c003 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -409,10 +409,9 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) int i, j, ret; struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; - struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL]; void __iomem *window; struct ie31200_priv *priv; - u32 addr_decode, mad_offset; + u32 addr_decode[IE31200_CHANNELS], mad_offset; /* * Kaby Lake, Coffee Lake seem to work like Skylake. Please re-visit @@ -470,19 +469,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) mad_offset = IE31200_MAD_DIMM_0_OFFSET; } - /* populate DIMM info */ for (i = 0; i < IE31200_CHANNELS; i++) { - addr_decode = readl(window + mad_offset + + addr_decode[i] = readl(window + mad_offset + (i * 4)); - edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); - for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { - populate_dimm_info(&dimm_info[i][j], addr_decode, j, - skl); - edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n", - dimm_info[i][j].size, - dimm_info[i][j].dual_rank, - dimm_info[i][j].x16_width); - } + edac_dbg(0, "addr_decode: 0x%x\n", addr_decode[i]); } /* @@ -493,14 +483,22 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) */ for (i = 0; i < IE31200_DIMMS_PER_CHANNEL; i++) { for (j = 0; j < IE31200_CHANNELS; j++) { + struct dimm_data dimm_info; struct dimm_info *dimm; unsigned long nr_pages; - nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl); + populate_dimm_info(&dimm_info, addr_decode[j], i, + skl); + edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n", + dimm_info.size, + dimm_info.dual_rank, + dimm_info.x16_width); + + nr_pages = IE31200_PAGES(dimm_info.size, skl); if (nr_pages == 0) continue; - if (dimm_info[j][i].dual_rank) { + if (dimm_info.dual_rank) { nr_pages = nr_pages / 2; dimm = edac_get_dimm(mci, (i * 2) + 1, j, 0); dimm->nr_pages = nr_pages; From 267e5b1d267539d9a927dc04aab6f15aca57da92 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Wed, 12 Feb 2025 16:33:54 +0800 Subject: [PATCH 02/25] EDAC/igen6: Fix the flood of invalid error reports The ECC_ERROR_LOG register of certain SoCs may contain the invalid value ~0, which results in a flood of invalid error reports in polling mode. Fix the flood of invalid error reports by skipping the invalid ECC error log value ~0. Fixes: e14232afa944 ("EDAC/igen6: Add polling support") Reported-by: Ramses Closes: https://lore.kernel.org/all/OISL8Rv--F-9@well-founded.dev/ Tested-by: Ramses Reported-by: John Closes: https://lore.kernel.org/all/p5YcxOE6M3Ncxpn2-Ia_wCt61EM4LwIiN3LroQvT_-G2jMrFDSOW5k2A9D8UUzD2toGpQBN1eI0sL5dSKnkO8iteZegLoQEj-DwQaMhGx4A=@proton.me/ Tested-by: John Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20250212083354.31919-1-qiuxu.zhuo@intel.com --- drivers/edac/igen6_edac.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index fdf3a84fe6988..595908af9e5c9 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -785,13 +785,22 @@ static u64 ecclog_read_and_clear(struct igen6_imc *imc) { u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET); - if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) { - /* Clear CE/UE bits by writing 1s */ - writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); - return ecclog; - } + /* + * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain + * the invalid value ~0. This will result in a flood of invalid + * error reports in polling mode. Skip it. + */ + if (ecclog == ~0) + return 0; - return 0; + /* Neither a CE nor a UE. Skip it.*/ + if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE))) + return 0; + + /* Clear CE/UE bits by writing 1s */ + writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); + + return ecclog; } static void errsts_clear(struct igen6_imc *imc) From d9207cf7760f5f5599e9ff7eb0fedf56821a1d59 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 14 Feb 2025 08:27:28 +0800 Subject: [PATCH 03/25] EDAC/{skx_common,i10nm}: Fix some missing error reports on Emerald Rapids When doing error injection to some memory DIMMs on certain Intel Emerald Rapids servers, the i10nm_edac missed error reports for some memory DIMMs. Certain BIOS configurations may hide some memory controllers, and the i10nm_edac doesn't enumerate these hidden memory controllers. However, the ADXL decodes memory errors using memory controller physical indices even if there are hidden memory controllers. Therefore, the memory controller physical indices reported by the ADXL may mismatch the logical indices enumerated by the i10nm_edac, resulting in missed error reports for some memory DIMMs. Fix this issue by creating a mapping table from memory controller physical indices (used by the ADXL) to logical indices (used by the i10nm_edac) and using it to convert the physical indices to the logical indices during the error handling process. Fixes: c545f5e41225 ("EDAC/i10nm: Skip the absent memory controllers") Reported-by: Kevin Chang Tested-by: Kevin Chang Reported-by: Thomas Chen Tested-by: Thomas Chen Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20250214002728.6287-1-qiuxu.zhuo@intel.com --- drivers/edac/i10nm_base.c | 2 ++ drivers/edac/skx_common.c | 33 +++++++++++++++++++++++++++++++++ drivers/edac/skx_common.h | 11 +++++++++++ 3 files changed, 46 insertions(+) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index f45d849d3f150..355a977019e94 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -751,6 +751,8 @@ static int i10nm_get_ddr_munits(void) continue; } else { d->imc[lmc].mdev = mdev; + if (res_cfg->type == SPR) + skx_set_mc_mapping(d, i, lmc); lmc++; } } diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index f7bd930e058fe..fa5b442b18449 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -121,6 +121,35 @@ void skx_adxl_put(void) } EXPORT_SYMBOL_GPL(skx_adxl_put); +static void skx_init_mc_mapping(struct skx_dev *d) +{ + /* + * By default, the BIOS presents all memory controllers within each + * socket to the EDAC driver. The physical indices are the same as + * the logical indices of the memory controllers enumerated by the + * EDAC driver. + */ + for (int i = 0; i < NUM_IMC; i++) + d->mc_mapping[i] = i; +} + +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc) +{ + edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, lmc); + + d->mc_mapping[pmc] = lmc; +} +EXPORT_SYMBOL_GPL(skx_set_mc_mapping); + +static u8 skx_get_mc_mapping(struct skx_dev *d, u8 pmc) +{ + edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, d->mc_mapping[pmc]); + + return d->mc_mapping[pmc]; +} + static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) { struct skx_dev *d; @@ -188,6 +217,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) return false; } + res->imc = skx_get_mc_mapping(d, res->imc); + for (i = 0; i < adxl_component_count; i++) { if (adxl_values[i] == ~0x0ull) continue; @@ -326,6 +357,8 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) d->bus[0], d->bus[1], d->bus[2], d->bus[3]); list_add_tail(&d->list, &dev_edac_list); prev = pdev; + + skx_init_mc_mapping(d); } if (list) diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index b0845bdd45164..ca5408803f878 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -93,6 +93,16 @@ struct skx_dev { struct pci_dev *uracu; /* for i10nm CPU */ struct pci_dev *pcu_cr3; /* for HBM memory detection */ u32 mcroute; + /* + * Some server BIOS may hide certain memory controllers, and the + * EDAC driver skips those hidden memory controllers. However, the + * ADXL still decodes memory error address using physical memory + * controller indices. The mapping table is used to convert the + * physical indices (reported by ADXL) to the logical indices + * (used the EDAC driver) of present memory controllers during the + * error handling process. + */ + u8 mc_mapping[NUM_IMC]; struct skx_imc { struct mem_ctl_info *mci; struct pci_dev *mdev; /* for i10nm CPU */ @@ -242,6 +252,7 @@ void skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); void skx_set_mem_cfg(bool mem_cfg_2lm); void skx_set_res_cfg(struct res_config *cfg); +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); From db99ea5f2c0361c8fc2878792e97c7b67c811bd0 Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Wed, 12 Feb 2025 14:36:39 +0000 Subject: [PATCH 04/25] EDAC: Add support for EDAC device features control Add generic EDAC device feature controls supporting the registration of RAS features available in the system. The driver exposes control attributes for these features to userspace in /sys/bus/edac/devices// [ bp: Touch-up documentation, simplify, make edac_dev_type static, fixup edac_dev_register() retvals. ] Co-developed-by: Jonathan Cameron Signed-off-by: Jonathan Cameron Signed-off-by: Shiju Jose Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Fan Ni Tested-by: Daniel Ferguson Tested-by: Fan Ni Link: https://lore.kernel.org/r/20250212143654.1893-2-shiju.jose@huawei.com --- Documentation/edac/features.rst | 93 +++++++++++++++++++++++++++++ Documentation/edac/index.rst | 10 ++++ drivers/edac/edac_device.c | 101 ++++++++++++++++++++++++++++++++ include/linux/edac.h | 26 ++++++++ 4 files changed, 230 insertions(+) create mode 100644 Documentation/edac/features.rst create mode 100644 Documentation/edac/index.rst diff --git a/Documentation/edac/features.rst b/Documentation/edac/features.rst new file mode 100644 index 0000000000000..3c279d026bbd9 --- /dev/null +++ b/Documentation/edac/features.rst @@ -0,0 +1,93 @@ +.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.2-no-invariants-or-later + +================= +EDAC/RAS features +================= + +Copyright (c) 2024-2025 HiSilicon Limited. + +:Author: Shiju Jose +:License: The GNU Free Documentation License, Version 1.2 without + Invariant Sections, Front-Cover Texts nor Back-Cover Texts. + (dual licensed under the GPL v2) + +- Written for: 6.15 + +Introduction +------------ + +EDAC/RAS components plugging and high-level design: + +1. Scrub control + +2. Error Check Scrub (ECS) control + +3. ACPI RAS2 features + +4. Post Package Repair (PPR) control + +5. Memory Sparing Repair control + +High level design is illustrated in the following diagram:: + + +-----------------------------------------------+ + | Userspace - Rasdaemon | + | +-------------+ | + | | RAS CXL mem | +---------------+ | + | |error handler|---->| | | + | +-------------+ | RAS dynamic | | + | +-------------+ | scrub, memory | | + | | RAS memory |---->| repair control| | + | |error handler| +----|----------+ | + | +-------------+ | | + +--------------------------|--------------------+ + | + | + +-------------------------------|------------------------------+ + | Kernel EDAC extension for | controlling RAS Features | + |+------------------------------|----------------------------+ | + || EDAC Core Sysfs EDAC| Bus | | + || +--------------------------|---------------------------+| | + || |/sys/bus/edac/devices//scrubX/ | | EDAC device || | + || |/sys/bus/edac/devices//ecsX/ |<->| EDAC MC || | + || |/sys/bus/edac/devices//repairX | | EDAC sysfs || | + || +---------------------------|--------------------------+| | + || EDAC|Bus | | + || | | | + || +----------+ Get feature | Get feature | | + || | | desc +---------|------+ desc +----------+ | | + || |EDAC scrub|<-----| EDAC device | | | | | + || +----------+ | driver- RAS |----->| EDAC mem | | | + || +----------+ | feature control| | repair | | | + || | |<-----| | +----------+ | | + || |EDAC ECS | +---------|------+ | | + || +----------+ Register RAS|features | | + || ______________________|_____________ | | + |+---------|---------------|------------------|--------------+ | + | +-------|----+ +-------|-------+ +----|----------+ | + | | | | CXL mem driver| | Client driver | | + | | ACPI RAS2 | | scrub, ECS, | | memory repair | | + | | driver | | sparing, PPR | | features | | + | +-----|------+ +-------|-------+ +------|--------+ | + | | | | | + +--------|-----------------|--------------------|--------------+ + | | | + +--------|-----------------|--------------------|--------------+ + | +---|-----------------|--------------------|-------+ | + | | | | + | | Platform HW and Firmware | | + | +--------------------------------------------------+ | + +--------------------------------------------------------------+ + + +1. EDAC Features components - Create feature-specific descriptors. For + example: scrub, ECS, memory repair in the above diagram. + +2. EDAC device driver for controlling RAS Features - Get feature's attribute + descriptors from EDAC RAS feature component and registers device's RAS + features with EDAC bus and expose the features control attributes via + sysfs. For example, /sys/bus/edac/devices//X/ + +3. RAS dynamic feature controller - Userspace sample modules in rasdaemon for + dynamic scrub/repair control to issue scrubbing/repair when excess number + of corrected memory errors are reported in a short span of time. diff --git a/Documentation/edac/index.rst b/Documentation/edac/index.rst new file mode 100644 index 0000000000000..de4a3aa452cb1 --- /dev/null +++ b/Documentation/edac/index.rst @@ -0,0 +1,10 @@ +.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.2-no-invariants-or-later + +============== +EDAC Subsystem +============== + +.. toctree:: + :maxdepth: 1 + + features diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 621dc2a5d0347..6af0893cadc94 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -570,3 +570,104 @@ void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev, block ? block->name : "N/A", count, msg); } EXPORT_SYMBOL_GPL(edac_device_handle_ue_count); + +static void edac_dev_release(struct device *dev) +{ + struct edac_dev_feat_ctx *ctx = container_of(dev, struct edac_dev_feat_ctx, dev); + + kfree(ctx->dev.groups); + kfree(ctx); +} + +static const struct device_type edac_dev_type = { + .name = "edac_dev", + .release = edac_dev_release, +}; + +static void edac_dev_unreg(void *data) +{ + device_unregister(data); +} + +/** + * edac_dev_register - register device for RAS features with EDAC + * @parent: parent device. + * @name: name for the folder in the /sys/bus/edac/devices/, + * which is derived from the parent device. + * For e.g. /sys/bus/edac/devices/cxl_mem0/ + * @private: parent driver's data to store in the context if any. + * @num_features: number of RAS features to register. + * @ras_features: list of RAS features to register. + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + * + */ +int edac_dev_register(struct device *parent, char *name, + void *private, int num_features, + const struct edac_dev_feature *ras_features) +{ + const struct attribute_group **ras_attr_groups; + struct edac_dev_feat_ctx *ctx; + int attr_gcnt = 0; + int ret = -ENOMEM; + int feat; + + if (!parent || !name || !num_features || !ras_features) + return -EINVAL; + + /* Double parse to make space for attributes */ + for (feat = 0; feat < num_features; feat++) { + switch (ras_features[feat].ft_type) { + /* Add feature specific code */ + default: + return -EINVAL; + } + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ras_attr_groups = kcalloc(attr_gcnt + 1, sizeof(*ras_attr_groups), GFP_KERNEL); + if (!ras_attr_groups) + goto ctx_free; + + attr_gcnt = 0; + for (feat = 0; feat < num_features; feat++, ras_features++) { + switch (ras_features->ft_type) { + /* Add feature specific code */ + default: + ret = -EINVAL; + goto groups_free; + } + } + + ctx->dev.parent = parent; + ctx->dev.bus = edac_get_sysfs_subsys(); + ctx->dev.type = &edac_dev_type; + ctx->dev.groups = ras_attr_groups; + ctx->private = private; + dev_set_drvdata(&ctx->dev, ctx); + + ret = dev_set_name(&ctx->dev, name); + if (ret) + goto groups_free; + + ret = device_register(&ctx->dev); + if (ret) { + put_device(&ctx->dev); + return ret; + } + + return devm_add_action_or_reset(parent, edac_dev_unreg, &ctx->dev); + +groups_free: + kfree(ras_attr_groups); +ctx_free: + kfree(ctx); + return ret; +} +EXPORT_SYMBOL_GPL(edac_dev_register); diff --git a/include/linux/edac.h b/include/linux/edac.h index b4ee8961e6236..8c4b6ca2a994d 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -661,4 +661,30 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci, return mci->dimms[index]; } + +/* RAS feature type */ +enum edac_dev_feat { + RAS_FEAT_MAX +}; + +/* EDAC device feature information structure */ +struct edac_dev_data { + u8 instance; + void *private; +}; + +struct edac_dev_feat_ctx { + struct device dev; + void *private; +}; + +struct edac_dev_feature { + enum edac_dev_feat ft_type; + u8 instance; + void *ctx; +}; + +int edac_dev_register(struct device *parent, char *dev_name, + void *parent_pvt_data, int num_features, + const struct edac_dev_feature *ras_features); #endif /* _LINUX_EDAC_H_ */ From f90b738166fe909df48de6a03744ddfbad5002f8 Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Wed, 12 Feb 2025 14:36:40 +0000 Subject: [PATCH 05/25] EDAC: Add scrub control feature Add a scrub control to manage memory scrubbers in the system. Devices with a scrub feature register with the EDAC device driver which retrieves the scrub descriptor from the scrub driver and exposes the control attributes for a instance to userspace at /sys/bus/edac/devices//scrubX/. The common sysfs scrub control interface abstracts the control of arbitrary scrubbing functionality into a common set of functions. The attribute nodes are only present if the client driver has implemented the corresponding attribute callback function and passed the operations to the device driver during registration. [ bp: Massage commit message, docs and code, simplify text a bit. Integrate fixup for: https://lore.kernel.org/r/202502251009.0sGkolEJ-lkp@intel.com Reported-by: kernel test robot Reported-by: Dan Carpenter ] Co-developed-by: Jonathan Cameron Signed-off-by: Jonathan Cameron Signed-off-by: Shiju Jose Signed-off-by: Borislav Petkov (AMD) Tested-by: Daniel Ferguson Tested-by: Fan Ni Link: https://lore.kernel.org/r/20250212143654.1893-3-shiju.jose@huawei.com --- Documentation/ABI/testing/sysfs-edac-scrub | 69 ++++++ Documentation/edac/features.rst | 6 + Documentation/edac/index.rst | 1 + Documentation/edac/scrub.rst | 264 +++++++++++++++++++++ drivers/edac/Kconfig | 9 + drivers/edac/Makefile | 1 + drivers/edac/edac_device.c | 40 +++- drivers/edac/scrub.c | 209 ++++++++++++++++ include/linux/edac.h | 43 ++++ 9 files changed, 638 insertions(+), 4 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-edac-scrub create mode 100644 Documentation/edac/scrub.rst create mode 100755 drivers/edac/scrub.c diff --git a/Documentation/ABI/testing/sysfs-edac-scrub b/Documentation/ABI/testing/sysfs-edac-scrub new file mode 100644 index 0000000000000..c43be90deab4a --- /dev/null +++ b/Documentation/ABI/testing/sysfs-edac-scrub @@ -0,0 +1,69 @@ +What: /sys/bus/edac/devices//scrubX +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + The sysfs EDAC bus devices //scrubX subdirectory + belongs to an instance of memory scrub control feature, + where directory corresponds to a device/memory + region registered with the EDAC device driver for the + scrub control feature. + + The sysfs scrub attr nodes are only present if the parent + driver has implemented the corresponding attr callback + function and provided the necessary operations to the EDAC + device driver during registration. + +What: /sys/bus/edac/devices//scrubX/addr +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The base address of the memory region to be scrubbed + for on-demand scrubbing. Setting address starts scrubbing. + The size must be set before that. + + The readback addr value is non-zero if the requested + on-demand scrubbing is in progress, zero otherwise. + +What: /sys/bus/edac/devices//scrubX/size +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The size of the memory region to be scrubbed + (on-demand scrubbing). + +What: /sys/bus/edac/devices//scrubX/enable_background +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Start/Stop background (patrol) scrubbing if supported. + +What: /sys/bus/edac/devices//scrubX/min_cycle_duration +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RO) Supported minimum scrub cycle duration in seconds + by the memory scrubber. + +What: /sys/bus/edac/devices//scrubX/max_cycle_duration +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RO) Supported maximum scrub cycle duration in seconds + by the memory scrubber. + +What: /sys/bus/edac/devices//scrubX/current_cycle_duration +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The current scrub cycle duration in seconds and must be + within the supported range by the memory scrubber. + + Scrub has an overhead when running and that may want to be + reduced by taking longer to do it. diff --git a/Documentation/edac/features.rst b/Documentation/edac/features.rst index 3c279d026bbd9..fdcecb3df10ee 100644 --- a/Documentation/edac/features.rst +++ b/Documentation/edac/features.rst @@ -91,3 +91,9 @@ High level design is illustrated in the following diagram:: 3. RAS dynamic feature controller - Userspace sample modules in rasdaemon for dynamic scrub/repair control to issue scrubbing/repair when excess number of corrected memory errors are reported in a short span of time. + +RAS features +------------ +1. Memory Scrub + +Memory scrub features are documented in `Documentation/edac/scrub.rst`. diff --git a/Documentation/edac/index.rst b/Documentation/edac/index.rst index de4a3aa452cb1..0a00c23838b60 100644 --- a/Documentation/edac/index.rst +++ b/Documentation/edac/index.rst @@ -8,3 +8,4 @@ EDAC Subsystem :maxdepth: 1 features + scrub diff --git a/Documentation/edac/scrub.rst b/Documentation/edac/scrub.rst new file mode 100644 index 0000000000000..8b9611e4641b5 --- /dev/null +++ b/Documentation/edac/scrub.rst @@ -0,0 +1,264 @@ +.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.2-no-invariants-or-later + +============= +Scrub Control +============= + +Copyright (c) 2024-2025 HiSilicon Limited. + +:Author: Shiju Jose +:License: The GNU Free Documentation License, Version 1.2 without + Invariant Sections, Front-Cover Texts nor Back-Cover Texts. + (dual licensed under the GPL v2) + +- Written for: 6.15 + +Introduction +------------ + +Increasing DRAM size and cost have made memory subsystem reliability an +important concern. These modules are used where potentially corrupted data +could cause expensive or fatal issues. Memory errors are among the top +hardware failures that cause server and workload crashes. + +Memory scrubbing is a feature where an ECC (Error-Correcting Code) engine +reads data from each memory media location, corrects if necessary and writes +the corrected data back to the same memory media location. + +DIMMs can be scrubbed at a configurable rate to detect uncorrected memory +errors and attempt recovery from detected errors, providing the following +benefits: + +1. Proactively scrubbing DIMMs reduces the chance of a correctable error + becoming uncorrectable. + +2. When detected, uncorrected errors caught in unallocated memory pages are + isolated and prevented from being allocated to an application or the OS. + +3. This reduces the likelihood of software or hardware products encountering + memory errors. + +4. The additional data on failures in memory may be used to build up + statistics that are later used to decide whether to use memory repair + technologies such as Post Package Repair or Sparing. + +There are 2 types of memory scrubbing: + +1. Background (patrol) scrubbing while the DRAM is otherwise idle. + +2. On-demand scrubbing for a specific address range or region of memory. + +Several types of interfaces to hardware memory scrubbers have been +identified, such as CXL memory device patrol scrub, CXL DDR5 ECS, ACPI +RAS2 memory scrubbing, and ACPI NVDIMM ARS (Address Range Scrub). + +The control mechanisms vary across different memory scrubbers. To enable +standardized userspace tooling, there is a need to present these controls +through a standardized ABI. + +A generic memory EDAC scrub control allows users to manage underlying +scrubbers in the system through a standardized sysfs control interface. It +abstracts the management of various scrubbing functionalities into a unified +set of functions. + +Use cases of common scrub control feature +----------------------------------------- + +1. Several types of interfaces for hardware memory scrubbers have been + identified, including the CXL memory device patrol scrub, CXL DDR5 ECS, + ACPI RAS2 memory scrubbing features, ACPI NVDIMM ARS (Address Range Scrub), + and software-based memory scrubbers. + + Of the identified interfaces to hardware memory scrubbers some support + control over patrol (background) scrubbing (e.g., ACPI RAS2, CXL) and/or + on-demand scrubbing (e.g., ACPI RAS2, ACPI ARS). However, the scrub control + interfaces vary between memory scrubbers, highlighting the need for + a standardized, generic sysfs scrub control interface that is accessible to + userspace for administration and use by scripts/tools. + +2. User-space scrub controls allow users to disable scrubbing if necessary, + for example, to disable background patrol scrubbing or adjust the scrub + rate for performance-aware operations where background activities need to + be minimized or disabled. + +3. User-space tools enable on-demand scrubbing for specific address ranges, + provided that the scrubber supports this functionality. + +4. User-space tools can also control memory DIMM scrubbing at a configurable + scrub rate via sysfs scrub controls. This approach offers several benefits: + + 4.1. Detects uncorrectable memory errors early, before user access to affected + memory, helping facilitate recovery. + + 4.2. Reduces the likelihood of correctable errors developing into uncorrectable + errors. + +5. Policy control for hotplugged memory is necessary because there may not + be a system-wide BIOS or similar control to manage scrub settings for a CXL + device added after boot. Determining these settings is a policy decision, + balancing reliability against performance, so userspace should control it. + Therefore, a unified interface is recommended for handling this function in + a way that aligns with other similar interfaces, rather than creating a + separate one. + +Scrubbing features +------------------ + +CXL Memory Scrubbing features +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +CXL spec r3.1 [1]_ section 8.2.9.9.11.1 describes the memory device patrol +scrub control feature. The device patrol scrub proactively locates and makes +corrections to errors in regular cycle. The patrol scrub control allows the +userspace request to change CXL patrol scrubber's configurations. + +The patrol scrub control allows the requester to specify the number of +hours in which the patrol scrub cycles must be completed, provided that +the requested scrub rate must be within the supported range of the +scrub rate that the device is capable of. In the CXL driver, the +number of seconds per scrub cycles, which user requests via sysfs, is +rescaled to hours per scrub cycles. + +In addition, they allow the host to disable the feature in case it interferes +with performance-aware operations which require the background operations to +be turned off. + +Error Check Scrub (ECS) +~~~~~~~~~~~~~~~~~~~~~~~ + +CXL spec r3.1 [1]_ section 8.2.9.9.11.2 describes Error Check Scrub (ECS) +- a feature defined in the JEDEC DDR5 SDRAM Specification (JESD79-5) and +allowing DRAM to internally read, correct single-bit errors, and write back +corrected data bits to the DRAM array while providing transparency to error +counts. + +The DDR5 device contains number of memory media Field Replaceable Units (FRU) +per device. The DDR5 ECS feature and thus the ECS control driver supports +configuring the ECS parameters per FRU. + +ACPI RAS2 Hardware-based Memory Scrubbing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +ACPI spec 6.5 [2]_ section 5.2.21 ACPI RAS2 describes an ACPI RAS2 table +which provides interfaces for platform RAS features and supports independent +RAS controls and capabilities for a given RAS feature for multiple instances +of the same component in a given system. + +Memory RAS features apply to RAS capabilities, controls and operations that +are specific to memory. RAS2 PCC sub-spaces for memory-specific RAS features +have a Feature Type of 0x00 (Memory). + +The platform can use the hardware-based memory scrubbing feature to expose +controls and capabilities associated with hardware-based memory scrub +engines. The RAS2 memory scrubbing feature supports as per spec, + +1. Independent memory scrubbing controls for each NUMA domain, identified + using its proximity domain. + +2. Provision for background (patrol) scrubbing of the entire memory system, + as well as on-demand scrubbing for a specific region of memory. + +ACPI Address Range Scrubbing (ARS) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +ACPI spec 6.5 [2]_ section 9.19.7.2 describes Address Range Scrubbing (ARS). +ARS allows the platform to communicate memory errors to system software. +This capability allows system software to prevent accesses to addresses with +uncorrectable errors in memory. ARS functions manage all NVDIMMs present in +the system. Only one scrub can be in progress system wide at any given time. + +The following functions are supported as per the specification: + +1. Query ARS Capabilities for a given address range, indicates platform + supports the ACPI NVDIMM Root Device Unconsumed Error Notification. + +2. Start ARS triggers an Address Range Scrub for the given memory range. + Address scrubbing can be done for volatile or persistent memory, or both. + +3. Query ARS Status command allows software to get the status of ARS, + including the progress of ARS and ARS error record. + +4. Clear Uncorrectable Error. + +5. Translate SPA + +6. ARS Error Inject etc. + +The kernel supports an existing control for ARS and ARS is currently not +supported in EDAC. + +.. [1] https://computeexpresslink.org/cxl-specification/ + +.. [2] https://uefi.org/specs/ACPI/6.5/ + +Comparison of various scrubbing features +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +--------------+-----------+-----------+-----------+-----------+ + | | ACPI | CXL patrol| CXL ECS | ARS | + | Name | RAS2 | scrub | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | On-demand | Supported | No | No | Supported | + | Scrubbing | | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Background | Supported | Supported | Supported | No | + | scrubbing | | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Mode of | Scrub ctrl| per device| per memory| Unknown | + | scrubbing | per NUMA | | media | | + | | domain. | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Query scrub | Supported | Supported | Supported | Supported | + | capabilities | | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Setting | Supported | No | No | Supported | + | address range| | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Setting | Supported | Supported | No | No | + | scrub rate | | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Unit for | Not | in hours | No | No | + | scrub rate | Defined | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | Supported | | | | + | Scrub | on-demand | No | No | Supported | + | status/ | scrubbing | | | | + | Completion | only | | | | + +--------------+-----------+-----------+-----------+-----------+ + | UC error | |CXL general|CXL general| ACPI UCE | + | reporting | Exception |media/DRAM |media/DRAM | notify and| + | | |event/media|event/media| query | + | | |scan? |scan? | ARS status| + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Support for | Supported | Supported | Supported | No | + | EDAC control | | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + +The File System +--------------- + +The control attributes of a registered scrubber instance could be +accessed in: + +/sys/bus/edac/devices//scrubX/ + +sysfs +----- + +Sysfs files are documented in +`Documentation/ABI/testing/sysfs-edac-scrub` diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 2051a7c944a58..175d706168abd 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -75,6 +75,15 @@ config EDAC_GHES In doubt, say 'Y'. +config EDAC_SCRUB + bool "EDAC scrub feature" + help + The EDAC scrub feature is optional and is designed to control the + memory scrubbers in the system. The common sysfs scrub interface + abstracts the control of various arbitrary scrubbing functionalities + into a unified set of functions. + Say 'y/n' to enable/disable EDAC scrub feature. + config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64)" depends on AMD_NB && EDAC_DECODE_MCE diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 89789ba8275fe..cdbd0a07b9e6b 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -12,6 +12,7 @@ edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_core-y += edac_module.o edac_device_sysfs.o wq.o edac_core-$(CONFIG_EDAC_DEBUG) += debugfs.o +edac_core-$(CONFIG_EDAC_SCRUB) += scrub.o ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 6af0893cadc94..54c1e2d024ac2 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -575,6 +575,7 @@ static void edac_dev_release(struct device *dev) { struct edac_dev_feat_ctx *ctx = container_of(dev, struct edac_dev_feat_ctx, dev); + kfree(ctx->scrub); kfree(ctx->dev.groups); kfree(ctx); } @@ -610,9 +611,11 @@ int edac_dev_register(struct device *parent, char *name, const struct edac_dev_feature *ras_features) { const struct attribute_group **ras_attr_groups; + struct edac_dev_data *dev_data; struct edac_dev_feat_ctx *ctx; int attr_gcnt = 0; int ret = -ENOMEM; + int scrub_cnt = 0; int feat; if (!parent || !name || !num_features || !ras_features) @@ -621,7 +624,10 @@ int edac_dev_register(struct device *parent, char *name, /* Double parse to make space for attributes */ for (feat = 0; feat < num_features; feat++) { switch (ras_features[feat].ft_type) { - /* Add feature specific code */ + case RAS_FEAT_SCRUB: + attr_gcnt++; + scrub_cnt++; + break; default: return -EINVAL; } @@ -635,13 +641,37 @@ int edac_dev_register(struct device *parent, char *name, if (!ras_attr_groups) goto ctx_free; + if (scrub_cnt) { + ctx->scrub = kcalloc(scrub_cnt, sizeof(*ctx->scrub), GFP_KERNEL); + if (!ctx->scrub) + goto groups_free; + } + attr_gcnt = 0; + scrub_cnt = 0; for (feat = 0; feat < num_features; feat++, ras_features++) { switch (ras_features->ft_type) { - /* Add feature specific code */ + case RAS_FEAT_SCRUB: + if (!ras_features->scrub_ops || scrub_cnt != ras_features->instance) { + ret = -EINVAL; + goto data_mem_free; + } + + dev_data = &ctx->scrub[scrub_cnt]; + dev_data->instance = scrub_cnt; + dev_data->scrub_ops = ras_features->scrub_ops; + dev_data->private = ras_features->ctx; + ret = edac_scrub_get_desc(parent, &ras_attr_groups[attr_gcnt], + ras_features->instance); + if (ret) + goto data_mem_free; + + scrub_cnt++; + attr_gcnt++; + break; default: ret = -EINVAL; - goto groups_free; + goto data_mem_free; } } @@ -654,7 +684,7 @@ int edac_dev_register(struct device *parent, char *name, ret = dev_set_name(&ctx->dev, name); if (ret) - goto groups_free; + goto data_mem_free; ret = device_register(&ctx->dev); if (ret) { @@ -664,6 +694,8 @@ int edac_dev_register(struct device *parent, char *name, return devm_add_action_or_reset(parent, edac_dev_unreg, &ctx->dev); +data_mem_free: + kfree(ctx->scrub); groups_free: kfree(ras_attr_groups); ctx_free: diff --git a/drivers/edac/scrub.c b/drivers/edac/scrub.c new file mode 100755 index 0000000000000..e421d3ebd959f --- /dev/null +++ b/drivers/edac/scrub.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The generic EDAC scrub driver controls the memory scrubbers in the + * system. The common sysfs scrub interface abstracts the control of + * various arbitrary scrubbing functionalities into a unified set of + * functions. + * + * Copyright (c) 2024-2025 HiSilicon Limited. + */ + +#include + +enum edac_scrub_attributes { + SCRUB_ADDRESS, + SCRUB_SIZE, + SCRUB_ENABLE_BACKGROUND, + SCRUB_MIN_CYCLE_DURATION, + SCRUB_MAX_CYCLE_DURATION, + SCRUB_CUR_CYCLE_DURATION, + SCRUB_MAX_ATTRS +}; + +struct edac_scrub_dev_attr { + struct device_attribute dev_attr; + u8 instance; +}; + +struct edac_scrub_context { + char name[EDAC_FEAT_NAME_LEN]; + struct edac_scrub_dev_attr scrub_dev_attr[SCRUB_MAX_ATTRS]; + struct attribute *scrub_attrs[SCRUB_MAX_ATTRS + 1]; + struct attribute_group group; +}; + +#define TO_SCRUB_DEV_ATTR(_dev_attr) \ + container_of(_dev_attr, struct edac_scrub_dev_attr, dev_attr) + +#define EDAC_SCRUB_ATTR_SHOW(attrib, cb, type, format) \ +static ssize_t attrib##_show(struct device *ras_feat_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + u8 inst = TO_SCRUB_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; \ + type data; \ + int ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->scrub[inst].private, &data); \ + if (ret) \ + return ret; \ + \ + return sysfs_emit(buf, format, data); \ +} + +EDAC_SCRUB_ATTR_SHOW(addr, read_addr, u64, "0x%llx\n") +EDAC_SCRUB_ATTR_SHOW(size, read_size, u64, "0x%llx\n") +EDAC_SCRUB_ATTR_SHOW(enable_background, get_enabled_bg, bool, "%u\n") +EDAC_SCRUB_ATTR_SHOW(min_cycle_duration, get_min_cycle, u32, "%u\n") +EDAC_SCRUB_ATTR_SHOW(max_cycle_duration, get_max_cycle, u32, "%u\n") +EDAC_SCRUB_ATTR_SHOW(current_cycle_duration, get_cycle_duration, u32, "%u\n") + +#define EDAC_SCRUB_ATTR_STORE(attrib, cb, type, conv_func) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + u8 inst = TO_SCRUB_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; \ + type data; \ + int ret; \ + \ + ret = conv_func(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->scrub[inst].private, data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +EDAC_SCRUB_ATTR_STORE(addr, write_addr, u64, kstrtou64) +EDAC_SCRUB_ATTR_STORE(size, write_size, u64, kstrtou64) +EDAC_SCRUB_ATTR_STORE(enable_background, set_enabled_bg, unsigned long, kstrtoul) +EDAC_SCRUB_ATTR_STORE(current_cycle_duration, set_cycle_duration, unsigned long, kstrtoul) + +static umode_t scrub_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id) +{ + struct device *ras_feat_dev = kobj_to_dev(kobj); + struct device_attribute *dev_attr = container_of(a, struct device_attribute, attr); + u8 inst = TO_SCRUB_DEV_ATTR(dev_attr)->instance; + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); + const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; + + switch (attr_id) { + case SCRUB_ADDRESS: + if (ops->read_addr) { + if (ops->write_addr) + return a->mode; + else + return 0444; + } + break; + case SCRUB_SIZE: + if (ops->read_size) { + if (ops->write_size) + return a->mode; + else + return 0444; + } + break; + case SCRUB_ENABLE_BACKGROUND: + if (ops->get_enabled_bg) { + if (ops->set_enabled_bg) + return a->mode; + else + return 0444; + } + break; + case SCRUB_MIN_CYCLE_DURATION: + if (ops->get_min_cycle) + return a->mode; + break; + case SCRUB_MAX_CYCLE_DURATION: + if (ops->get_max_cycle) + return a->mode; + break; + case SCRUB_CUR_CYCLE_DURATION: + if (ops->get_cycle_duration) { + if (ops->set_cycle_duration) + return a->mode; + else + return 0444; + } + break; + default: + break; + } + + return 0; +} + +#define EDAC_SCRUB_ATTR_RO(_name, _instance) \ + ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_RO(_name), \ + .instance = _instance }) + +#define EDAC_SCRUB_ATTR_WO(_name, _instance) \ + ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_WO(_name), \ + .instance = _instance }) + +#define EDAC_SCRUB_ATTR_RW(_name, _instance) \ + ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_RW(_name), \ + .instance = _instance }) + +static int scrub_create_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, u8 instance) +{ + struct edac_scrub_context *scrub_ctx; + struct attribute_group *group; + int i; + struct edac_scrub_dev_attr dev_attr[] = { + [SCRUB_ADDRESS] = EDAC_SCRUB_ATTR_RW(addr, instance), + [SCRUB_SIZE] = EDAC_SCRUB_ATTR_RW(size, instance), + [SCRUB_ENABLE_BACKGROUND] = EDAC_SCRUB_ATTR_RW(enable_background, instance), + [SCRUB_MIN_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RO(min_cycle_duration, instance), + [SCRUB_MAX_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RO(max_cycle_duration, instance), + [SCRUB_CUR_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RW(current_cycle_duration, instance) + }; + + scrub_ctx = devm_kzalloc(scrub_dev, sizeof(*scrub_ctx), GFP_KERNEL); + if (!scrub_ctx) + return -ENOMEM; + + group = &scrub_ctx->group; + for (i = 0; i < SCRUB_MAX_ATTRS; i++) { + memcpy(&scrub_ctx->scrub_dev_attr[i], &dev_attr[i], sizeof(dev_attr[i])); + scrub_ctx->scrub_attrs[i] = &scrub_ctx->scrub_dev_attr[i].dev_attr.attr; + } + sprintf(scrub_ctx->name, "%s%d", "scrub", instance); + group->name = scrub_ctx->name; + group->attrs = scrub_ctx->scrub_attrs; + group->is_visible = scrub_attr_visible; + + attr_groups[0] = group; + + return 0; +} + +/** + * edac_scrub_get_desc - get EDAC scrub descriptors + * @scrub_dev: client device, with scrub support + * @attr_groups: pointer to attribute group container + * @instance: device's scrub instance number. + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + */ +int edac_scrub_get_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, u8 instance) +{ + if (!scrub_dev || !attr_groups) + return -EINVAL; + + return scrub_create_desc(scrub_dev, attr_groups, instance); +} diff --git a/include/linux/edac.h b/include/linux/edac.h index 8c4b6ca2a994d..1cbab08720df1 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -662,13 +662,54 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci, return mci->dimms[index]; } +#define EDAC_FEAT_NAME_LEN 128 + /* RAS feature type */ enum edac_dev_feat { + RAS_FEAT_SCRUB, RAS_FEAT_MAX }; +/** + * struct edac_scrub_ops - scrub device operations (all elements optional) + * @read_addr: read base address of scrubbing range. + * @read_size: read offset of scrubbing range. + * @write_addr: set base address of the scrubbing range. + * @write_size: set offset of the scrubbing range. + * @get_enabled_bg: check if currently performing background scrub. + * @set_enabled_bg: start or stop a bg-scrub. + * @get_min_cycle: get minimum supported scrub cycle duration in seconds. + * @get_max_cycle: get maximum supported scrub cycle duration in seconds. + * @get_cycle_duration: get current scrub cycle duration in seconds. + * @set_cycle_duration: set current scrub cycle duration in seconds. + */ +struct edac_scrub_ops { + int (*read_addr)(struct device *dev, void *drv_data, u64 *base); + int (*read_size)(struct device *dev, void *drv_data, u64 *size); + int (*write_addr)(struct device *dev, void *drv_data, u64 base); + int (*write_size)(struct device *dev, void *drv_data, u64 size); + int (*get_enabled_bg)(struct device *dev, void *drv_data, bool *enable); + int (*set_enabled_bg)(struct device *dev, void *drv_data, bool enable); + int (*get_min_cycle)(struct device *dev, void *drv_data, u32 *min); + int (*get_max_cycle)(struct device *dev, void *drv_data, u32 *max); + int (*get_cycle_duration)(struct device *dev, void *drv_data, u32 *cycle); + int (*set_cycle_duration)(struct device *dev, void *drv_data, u32 cycle); +}; + +#if IS_ENABLED(CONFIG_EDAC_SCRUB) +int edac_scrub_get_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, + u8 instance); +#else +static inline int edac_scrub_get_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, + u8 instance) +{ return -EOPNOTSUPP; } +#endif /* CONFIG_EDAC_SCRUB */ + /* EDAC device feature information structure */ struct edac_dev_data { + const struct edac_scrub_ops *scrub_ops; u8 instance; void *private; }; @@ -676,11 +717,13 @@ struct edac_dev_data { struct edac_dev_feat_ctx { struct device dev; void *private; + struct edac_dev_data *scrub; }; struct edac_dev_feature { enum edac_dev_feat ft_type; u8 instance; + const struct edac_scrub_ops *scrub_ops; void *ctx; }; From bcbd069b11b024994e30c7c2f3d716a4141fdab1 Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Wed, 12 Feb 2025 14:36:41 +0000 Subject: [PATCH 06/25] EDAC: Add a Error Check Scrub control feature Add an Error Check Scrub (ECS) control to manage a memory device's ECS feature. The ECS is a feature defined in JEDEC DDR5 SDRAM Specification (JESD79-5) and allows the DRAM to internally read, correct single-bit errors, and write back corrected data bits to the DRAM array while providing transparency to error counts. The DDR5 device contains a number of memory media Field Replaceable Units (FRU) per device. The DDR5 ECS feature and thus the ECS control driver supports configuring the ECS parameters per FRU. Memory devices support the ECS feature register with the EDAC device driver, which retrieves the ECS descriptor from the EDAC ECS driver. This driver exposes sysfs ECS control attributes to userspace via /sys/bus/edac/devices//ecs_fruX/. The common sysfs ECS control interface abstracts the control of an arbitrary ECS functionality to a common set of functions. Support for the ECS feature is added separately because the control attributes of the DDR5 ECS feature differ from those of the scrub feature. The sysfs ECS attribute nodes are only present if the client driver has implemented the corresponding attribute callback function and passed the necessary operations to the EDAC RAS feature driver during registration. [ bp: Massage, fixup edac_dev_register() retvals. ] Co-developed-by: Jonathan Cameron Signed-off-by: Jonathan Cameron Signed-off-by: Shiju Jose Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Fan Ni Tested-by: Fan Ni Link: https://lore.kernel.org/r/20250212143654.1893-4-shiju.jose@huawei.com --- Documentation/ABI/testing/sysfs-edac-ecs | 74 ++++++++ Documentation/edac/scrub.rst | 2 + drivers/edac/Kconfig | 9 + drivers/edac/Makefile | 1 + drivers/edac/ecs.c | 205 +++++++++++++++++++++++ drivers/edac/edac_device.c | 19 +++ include/linux/edac.h | 48 +++++- 7 files changed, 356 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-edac-ecs create mode 100755 drivers/edac/ecs.c diff --git a/Documentation/ABI/testing/sysfs-edac-ecs b/Documentation/ABI/testing/sysfs-edac-ecs new file mode 100644 index 0000000000000..87c885c4eb1ab --- /dev/null +++ b/Documentation/ABI/testing/sysfs-edac-ecs @@ -0,0 +1,74 @@ +What: /sys/bus/edac/devices//ecs_fruX +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + The sysfs EDAC bus devices //ecs_fruX subdirectory + pertains to the memory media ECS (Error Check Scrub) control + feature, where directory corresponds to a device + registered with the EDAC device driver for the ECS feature. + /ecs_fruX belongs to the media FRUs (Field Replaceable Unit) + under the memory device. + + The sysfs ECS attr nodes are only present if the parent + driver has implemented the corresponding attr callback + function and provided the necessary operations to the EDAC + device driver during registration. + +What: /sys/bus/edac/devices//ecs_fruX/log_entry_type +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The log entry type of how the DDR5 ECS log is reported. + + - 0 - per DRAM. + + - 1 - per memory media FRU. + + - All other values are reserved. + +What: /sys/bus/edac/devices//ecs_fruX/mode +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The mode of how the DDR5 ECS counts the errors. + Error count is tracked based on two different modes + selected by DDR5 ECS Control Feature - Codeword mode and + Row Count mode. If the ECS is under Codeword mode, then + the error count increments each time a codeword with check + bit errors is detected. If the ECS is under Row Count mode, + then the error counter increments each time a row with + check bit errors is detected. + + - 0 - ECS counts rows in the memory media that have ECC errors. + + - 1 - ECS counts codewords with errors, specifically, it counts + the number of ECC-detected errors in the memory media. + + - All other values are reserved. + +What: /sys/bus/edac/devices//ecs_fruX/reset +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (WO) ECS reset ECC counter. + + - 1 - reset ECC counter to the default value. + + - All other values are reserved. + +What: /sys/bus/edac/devices//ecs_fruX/threshold +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) DDR5 ECS threshold count per gigabits of memory cells. + The ECS error count is subject to the ECS Threshold count + per Gbit, which masks error counts less than the Threshold. + + Supported values are 256, 1024 and 4096. + + All other values are reserved. diff --git a/Documentation/edac/scrub.rst b/Documentation/edac/scrub.rst index 8b9611e4641b5..daab929cdba13 100644 --- a/Documentation/edac/scrub.rst +++ b/Documentation/edac/scrub.rst @@ -262,3 +262,5 @@ sysfs Sysfs files are documented in `Documentation/ABI/testing/sysfs-edac-scrub` + +`Documentation/ABI/testing/sysfs-edac-ecs` diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 175d706168abd..9dfc2ea02df1e 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -84,6 +84,15 @@ config EDAC_SCRUB into a unified set of functions. Say 'y/n' to enable/disable EDAC scrub feature. +config EDAC_ECS + bool "EDAC ECS (Error Check Scrub) feature" + help + The EDAC ECS feature is optional and is designed to control on-die + error check scrub (e.g., DDR5 ECS) in the system. The common sysfs + ECS interface abstracts the control of various ECS functionalities + into a unified set of functions. + Say 'y/n' to enable/disable EDAC ECS feature. + config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64)" depends on AMD_NB && EDAC_DECODE_MCE diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index cdbd0a07b9e6b..c3c8b15b14900 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -13,6 +13,7 @@ edac_core-y += edac_module.o edac_device_sysfs.o wq.o edac_core-$(CONFIG_EDAC_DEBUG) += debugfs.o edac_core-$(CONFIG_EDAC_SCRUB) += scrub.o +edac_core-$(CONFIG_EDAC_ECS) += ecs.o ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o diff --git a/drivers/edac/ecs.c b/drivers/edac/ecs.c new file mode 100755 index 0000000000000..1d51838a60c11 --- /dev/null +++ b/drivers/edac/ecs.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The generic ECS driver is designed to support control of on-die error + * check scrub (e.g., DDR5 ECS). The common sysfs ECS interface abstracts + * the control of various ECS functionalities into a unified set of functions. + * + * Copyright (c) 2024-2025 HiSilicon Limited. + */ + +#include + +#define EDAC_ECS_FRU_NAME "ecs_fru" + +enum edac_ecs_attributes { + ECS_LOG_ENTRY_TYPE, + ECS_MODE, + ECS_RESET, + ECS_THRESHOLD, + ECS_MAX_ATTRS +}; + +struct edac_ecs_dev_attr { + struct device_attribute dev_attr; + int fru_id; +}; + +struct edac_ecs_fru_context { + char name[EDAC_FEAT_NAME_LEN]; + struct edac_ecs_dev_attr dev_attr[ECS_MAX_ATTRS]; + struct attribute *ecs_attrs[ECS_MAX_ATTRS + 1]; + struct attribute_group group; +}; + +struct edac_ecs_context { + u16 num_media_frus; + struct edac_ecs_fru_context *fru_ctxs; +}; + +#define TO_ECS_DEV_ATTR(_dev_attr) \ + container_of(_dev_attr, struct edac_ecs_dev_attr, dev_attr) + +#define EDAC_ECS_ATTR_SHOW(attrib, cb, type, format) \ +static ssize_t attrib##_show(struct device *ras_feat_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct edac_ecs_dev_attr *dev_attr = TO_ECS_DEV_ATTR(attr); \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; \ + type data; \ + int ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->ecs.private, \ + dev_attr->fru_id, &data); \ + if (ret) \ + return ret; \ + \ + return sysfs_emit(buf, format, data); \ +} + +EDAC_ECS_ATTR_SHOW(log_entry_type, get_log_entry_type, u32, "%u\n") +EDAC_ECS_ATTR_SHOW(mode, get_mode, u32, "%u\n") +EDAC_ECS_ATTR_SHOW(threshold, get_threshold, u32, "%u\n") + +#define EDAC_ECS_ATTR_STORE(attrib, cb, type, conv_func) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + struct edac_ecs_dev_attr *dev_attr = TO_ECS_DEV_ATTR(attr); \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; \ + type data; \ + int ret; \ + \ + ret = conv_func(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->ecs.private, \ + dev_attr->fru_id, data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +EDAC_ECS_ATTR_STORE(log_entry_type, set_log_entry_type, unsigned long, kstrtoul) +EDAC_ECS_ATTR_STORE(mode, set_mode, unsigned long, kstrtoul) +EDAC_ECS_ATTR_STORE(reset, reset, unsigned long, kstrtoul) +EDAC_ECS_ATTR_STORE(threshold, set_threshold, unsigned long, kstrtoul) + +static umode_t ecs_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id) +{ + struct device *ras_feat_dev = kobj_to_dev(kobj); + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); + const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; + + switch (attr_id) { + case ECS_LOG_ENTRY_TYPE: + if (ops->get_log_entry_type) { + if (ops->set_log_entry_type) + return a->mode; + else + return 0444; + } + break; + case ECS_MODE: + if (ops->get_mode) { + if (ops->set_mode) + return a->mode; + else + return 0444; + } + break; + case ECS_RESET: + if (ops->reset) + return a->mode; + break; + case ECS_THRESHOLD: + if (ops->get_threshold) { + if (ops->set_threshold) + return a->mode; + else + return 0444; + } + break; + default: + break; + } + + return 0; +} + +#define EDAC_ECS_ATTR_RO(_name, _fru_id) \ + ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RO(_name), \ + .fru_id = _fru_id }) + +#define EDAC_ECS_ATTR_WO(_name, _fru_id) \ + ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_WO(_name), \ + .fru_id = _fru_id }) + +#define EDAC_ECS_ATTR_RW(_name, _fru_id) \ + ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RW(_name), \ + .fru_id = _fru_id }) + +static int ecs_create_desc(struct device *ecs_dev, const struct attribute_group **attr_groups, + u16 num_media_frus) +{ + struct edac_ecs_context *ecs_ctx; + u32 fru; + + ecs_ctx = devm_kzalloc(ecs_dev, sizeof(*ecs_ctx), GFP_KERNEL); + if (!ecs_ctx) + return -ENOMEM; + + ecs_ctx->num_media_frus = num_media_frus; + ecs_ctx->fru_ctxs = devm_kcalloc(ecs_dev, num_media_frus, + sizeof(*ecs_ctx->fru_ctxs), + GFP_KERNEL); + if (!ecs_ctx->fru_ctxs) + return -ENOMEM; + + for (fru = 0; fru < num_media_frus; fru++) { + struct edac_ecs_fru_context *fru_ctx = &ecs_ctx->fru_ctxs[fru]; + struct attribute_group *group = &fru_ctx->group; + int i; + + fru_ctx->dev_attr[ECS_LOG_ENTRY_TYPE] = EDAC_ECS_ATTR_RW(log_entry_type, fru); + fru_ctx->dev_attr[ECS_MODE] = EDAC_ECS_ATTR_RW(mode, fru); + fru_ctx->dev_attr[ECS_RESET] = EDAC_ECS_ATTR_WO(reset, fru); + fru_ctx->dev_attr[ECS_THRESHOLD] = EDAC_ECS_ATTR_RW(threshold, fru); + + for (i = 0; i < ECS_MAX_ATTRS; i++) + fru_ctx->ecs_attrs[i] = &fru_ctx->dev_attr[i].dev_attr.attr; + + sprintf(fru_ctx->name, "%s%d", EDAC_ECS_FRU_NAME, fru); + group->name = fru_ctx->name; + group->attrs = fru_ctx->ecs_attrs; + group->is_visible = ecs_attr_visible; + + attr_groups[fru] = group; + } + + return 0; +} + +/** + * edac_ecs_get_desc - get EDAC ECS descriptors + * @ecs_dev: client device, supports ECS feature + * @attr_groups: pointer to attribute group container + * @num_media_frus: number of media FRUs in the device + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + */ +int edac_ecs_get_desc(struct device *ecs_dev, + const struct attribute_group **attr_groups, u16 num_media_frus) +{ + if (!ecs_dev || !attr_groups || !num_media_frus) + return -EINVAL; + + return ecs_create_desc(ecs_dev, attr_groups, num_media_frus); +} diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 54c1e2d024ac2..b914ca3fc5e5f 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -628,6 +628,9 @@ int edac_dev_register(struct device *parent, char *name, attr_gcnt++; scrub_cnt++; break; + case RAS_FEAT_ECS: + attr_gcnt += ras_features[feat].ecs_info.num_media_frus; + break; default: return -EINVAL; } @@ -669,6 +672,22 @@ int edac_dev_register(struct device *parent, char *name, scrub_cnt++; attr_gcnt++; break; + case RAS_FEAT_ECS: + if (!ras_features->ecs_ops) { + ret = -EINVAL; + goto data_mem_free; + } + + dev_data = &ctx->ecs; + dev_data->ecs_ops = ras_features->ecs_ops; + dev_data->private = ras_features->ctx; + ret = edac_ecs_get_desc(parent, &ras_attr_groups[attr_gcnt], + ras_features->ecs_info.num_media_frus); + if (ret) + goto data_mem_free; + + attr_gcnt += ras_features->ecs_info.num_media_frus; + break; default: ret = -EINVAL; goto data_mem_free; diff --git a/include/linux/edac.h b/include/linux/edac.h index 1cbab08720df1..f8346014c14ed 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -667,6 +667,7 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci, /* RAS feature type */ enum edac_dev_feat { RAS_FEAT_SCRUB, + RAS_FEAT_ECS, RAS_FEAT_MAX }; @@ -707,9 +708,47 @@ static inline int edac_scrub_get_desc(struct device *scrub_dev, { return -EOPNOTSUPP; } #endif /* CONFIG_EDAC_SCRUB */ +/** + * struct edac_ecs_ops - ECS device operations (all elements optional) + * @get_log_entry_type: read the log entry type value. + * @set_log_entry_type: set the log entry type value. + * @get_mode: read the mode value. + * @set_mode: set the mode value. + * @reset: reset the ECS counter. + * @get_threshold: read the threshold count per gigabits of memory cells. + * @set_threshold: set the threshold count per gigabits of memory cells. + */ +struct edac_ecs_ops { + int (*get_log_entry_type)(struct device *dev, void *drv_data, int fru_id, u32 *val); + int (*set_log_entry_type)(struct device *dev, void *drv_data, int fru_id, u32 val); + int (*get_mode)(struct device *dev, void *drv_data, int fru_id, u32 *val); + int (*set_mode)(struct device *dev, void *drv_data, int fru_id, u32 val); + int (*reset)(struct device *dev, void *drv_data, int fru_id, u32 val); + int (*get_threshold)(struct device *dev, void *drv_data, int fru_id, u32 *threshold); + int (*set_threshold)(struct device *dev, void *drv_data, int fru_id, u32 threshold); +}; + +struct edac_ecs_ex_info { + u16 num_media_frus; +}; + +#if IS_ENABLED(CONFIG_EDAC_ECS) +int edac_ecs_get_desc(struct device *ecs_dev, + const struct attribute_group **attr_groups, + u16 num_media_frus); +#else +static inline int edac_ecs_get_desc(struct device *ecs_dev, + const struct attribute_group **attr_groups, + u16 num_media_frus) +{ return -EOPNOTSUPP; } +#endif /* CONFIG_EDAC_ECS */ + /* EDAC device feature information structure */ struct edac_dev_data { - const struct edac_scrub_ops *scrub_ops; + union { + const struct edac_scrub_ops *scrub_ops; + const struct edac_ecs_ops *ecs_ops; + }; u8 instance; void *private; }; @@ -718,13 +757,18 @@ struct edac_dev_feat_ctx { struct device dev; void *private; struct edac_dev_data *scrub; + struct edac_dev_data ecs; }; struct edac_dev_feature { enum edac_dev_feat ft_type; u8 instance; - const struct edac_scrub_ops *scrub_ops; + union { + const struct edac_scrub_ops *scrub_ops; + const struct edac_ecs_ops *ecs_ops; + }; void *ctx; + struct edac_ecs_ex_info ecs_info; }; int edac_dev_register(struct device *parent, char *dev_name, From d09055122bd20827e4772a215b4b1f8f9dce2eda Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 23 Feb 2025 22:24:29 +0100 Subject: [PATCH 07/25] EDAC: Use string choice helper functions Remove hard-coded strings by using the str_enabled_disabled(), str_yes_no(), str_write_read(), and str_plural() helper functions. Add a space in "All DIMMs support ECC: yes/no" to improve readability. Signed-off-by: Thorsten Blum Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Yazen Ghannam Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20250223212429.3466-2-thorsten.blum@linux.dev --- drivers/edac/amd64_edac.c | 47 +++++++++++++++++++-------------------- drivers/edac/debugfs.c | 5 ++++- drivers/edac/i5400_edac.c | 3 ++- drivers/edac/i7300_edac.c | 7 +++--- drivers/edac/xgene_edac.c | 17 +++++++------- 5 files changed, 42 insertions(+), 37 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 8414ceb43e4ac..d133a5be58905 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include #include "amd64_edac.h" #include #include @@ -1171,22 +1172,21 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3)); } - edac_dbg(1, "All DIMMs support ECC:%s\n", - (dclr & BIT(19)) ? "yes" : "no"); + edac_dbg(1, "All DIMMs support ECC: %s\n", str_yes_no(dclr & BIT(19))); edac_dbg(1, " PAR/ERR parity: %s\n", - (dclr & BIT(8)) ? "enabled" : "disabled"); + str_enabled_disabled(dclr & BIT(8))); if (pvt->fam == 0x10) edac_dbg(1, " DCT 128bit mode width: %s\n", (dclr & BIT(11)) ? "128b" : "64b"); edac_dbg(1, " x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n", - (dclr & BIT(12)) ? "yes" : "no", - (dclr & BIT(13)) ? "yes" : "no", - (dclr & BIT(14)) ? "yes" : "no", - (dclr & BIT(15)) ? "yes" : "no"); + str_yes_no(dclr & BIT(12)), + str_yes_no(dclr & BIT(13)), + str_yes_no(dclr & BIT(14)), + str_yes_no(dclr & BIT(15))); } #define CS_EVEN_PRIMARY BIT(0) @@ -1353,14 +1353,14 @@ static void umc_dump_misc_regs(struct amd64_pvt *pvt) edac_dbg(1, "UMC%d UMC cap high: 0x%x\n", i, umc->umc_cap_hi); edac_dbg(1, "UMC%d ECC capable: %s, ChipKill ECC capable: %s\n", - i, (umc->umc_cap_hi & BIT(30)) ? "yes" : "no", - (umc->umc_cap_hi & BIT(31)) ? "yes" : "no"); + i, str_yes_no(umc->umc_cap_hi & BIT(30)), + str_yes_no(umc->umc_cap_hi & BIT(31))); edac_dbg(1, "UMC%d All DIMMs support ECC: %s\n", - i, (umc->umc_cfg & BIT(12)) ? "yes" : "no"); + i, str_yes_no(umc->umc_cfg & BIT(12))); edac_dbg(1, "UMC%d x4 DIMMs present: %s\n", - i, (umc->dimm_cfg & BIT(6)) ? "yes" : "no"); + i, str_yes_no(umc->dimm_cfg & BIT(6))); edac_dbg(1, "UMC%d x16 DIMMs present: %s\n", - i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no"); + i, str_yes_no(umc->dimm_cfg & BIT(7))); umc_debug_display_dimm_sizes(pvt, i); } @@ -1371,11 +1371,11 @@ static void dct_dump_misc_regs(struct amd64_pvt *pvt) edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap); edac_dbg(1, " NB two channel DRAM capable: %s\n", - (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no"); + str_yes_no(pvt->nbcap & NBCAP_DCT_DUAL)); edac_dbg(1, " ECC capable: %s, ChipKill ECC capable: %s\n", - (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no", - (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no"); + str_yes_no(pvt->nbcap & NBCAP_SECDED), + str_yes_no(pvt->nbcap & NBCAP_CHIPKILL)); debug_dump_dramcfg_low(pvt, pvt->dclr0, 0); @@ -1398,7 +1398,7 @@ static void dct_dump_misc_regs(struct amd64_pvt *pvt) if (!dct_ganging_enabled(pvt)) debug_dump_dramcfg_low(pvt, pvt->dclr1, 1); - edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); + edac_dbg(1, " DramHoleValid: %s\n", str_yes_no(dhar_valid(pvt))); amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz); } @@ -2027,15 +2027,15 @@ static void read_dram_ctl_register(struct amd64_pvt *pvt) if (!dct_ganging_enabled(pvt)) edac_dbg(0, " Address range split per DCT: %s\n", - (dct_high_range_enabled(pvt) ? "yes" : "no")); + str_yes_no(dct_high_range_enabled(pvt))); edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n", - (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"), - (dct_memory_cleared(pvt) ? "yes" : "no")); + str_enabled_disabled(dct_data_intlv_enabled(pvt)), + str_yes_no(dct_memory_cleared(pvt))); edac_dbg(0, " channel interleave: %s, " "interleave bits selector: 0x%x\n", - (dct_interleave_enabled(pvt) ? "enabled" : "disabled"), + str_enabled_disabled(dct_interleave_enabled(pvt)), dct_sel_interleave_addr(pvt)); } @@ -3208,8 +3208,7 @@ static bool nb_mce_bank_enabled_on_node(u16 nid) nbe = reg->l & MSR_MCGCTL_NBE; edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", - cpu, reg->q, - (nbe ? "enabled" : "disabled")); + cpu, reg->q, str_enabled_disabled(nbe)); if (!nbe) goto out; @@ -3353,7 +3352,7 @@ static bool dct_ecc_enabled(struct amd64_pvt *pvt) edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n", MSR_IA32_MCG_CTL, nid); - edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, (ecc_en ? "enabled" : "disabled")); + edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, str_enabled_disabled(ecc_en)); if (!ecc_en || !nb_mce_en) return false; @@ -3378,7 +3377,7 @@ static bool umc_ecc_enabled(struct amd64_pvt *pvt) } } - edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, (ecc_en ? "enabled" : "disabled")); + edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, str_enabled_disabled(ecc_en)); return ecc_en; } diff --git a/drivers/edac/debugfs.c b/drivers/edac/debugfs.c index 4804332d99465..8195fc9c9354a 100644 --- a/drivers/edac/debugfs.c +++ b/drivers/edac/debugfs.c @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only + +#include + #include "edac_module.h" static struct dentry *edac_debugfs; @@ -22,7 +25,7 @@ static ssize_t edac_fake_inject_write(struct file *file, "Generating %d %s fake error%s to %d.%d.%d to test core handling. NOTE: this won't test the driver-specific decoding logic.\n", errcount, (type == HW_EVENT_ERR_UNCORRECTED) ? "UE" : "CE", - errcount > 1 ? "s" : "", + str_plural(errcount), mci->fake_inject_layer[0], mci->fake_inject_layer[1], mci->fake_inject_layer[2] diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 49b4499269fb7..b5cf25905b059 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "edac_module.h" @@ -899,7 +900,7 @@ static void decode_mtr(int slot_row, u16 mtr) edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n", - MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr))); edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); edac_dbg(2, "\t\tNUMRANK: %s\n", diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 61adaa872ba7b..69068f8d0cadf 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "edac_module.h" @@ -620,7 +621,7 @@ static int decode_mtr(struct i7300_pvt *pvt, edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n", - MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr))); edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); edac_dbg(2, "\t\tNUMRANK: %s\n", @@ -871,9 +872,9 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) IS_MIRRORED(pvt->mc_settings) ? "" : "non-"); edac_dbg(0, "Error detection is %s\n", - IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); + str_enabled_disabled(IS_ECC_ENABLED(pvt->mc_settings))); edac_dbg(0, "Retry is %s\n", - IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); + str_enabled_disabled(IS_RETRY_ENABLED(pvt->mc_settings))); /* Get Memory Interleave Range registers */ pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0, diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c index 699c7d29d80cd..9955396c9a520 100644 --- a/drivers/edac/xgene_edac.c +++ b/drivers/edac/xgene_edac.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "edac_module.h" @@ -1407,7 +1408,7 @@ static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev) dev_err(edac_dev->dev, "Multiple XGIC write size error\n"); info = readl(ctx->dev_csr + XGICTRANSERRREQINFO); dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n", - info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info), + str_read_write(info & REQTYPE_MASK), ERRADDR_RD(info), info); writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS); @@ -1489,19 +1490,19 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev) if (reg & AGENT_OFFLINE_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s access to offline agent error\n", - write ? "write" : "read"); + str_write_read(write)); if (reg & UNIMPL_RBPAGE_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s access to unimplemented page error\n", - write ? "write" : "read"); + str_write_read(write)); if (reg & WORD_ALIGNED_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s word aligned access error\n", - write ? "write" : "read"); + str_write_read(write)); if (reg & PAGE_ACCESS_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s to page out of range access error\n", - write ? "write" : "read"); + str_write_read(write)); if (regmap_write(ctx->edac->rb_map, RBEIR, 0)) return; if (regmap_write(ctx->edac->rb_map, RBCSR, 0)) @@ -1560,7 +1561,7 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev) err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL); err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH); dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n", - REQTYPE_F2_RD(err_addr_hi) ? "read" : "write", + str_read_write(REQTYPE_F2_RD(err_addr_hi)), ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi); if (reg & WRERR_RESP_MASK) dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n", @@ -1611,7 +1612,7 @@ static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev) dev_err(edac_dev->dev, "%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n", reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "", - REQTYPE_RD(err_addr_hi) ? "read" : "write", + str_read_write(REQTYPE_RD(err_addr_hi)), ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi); writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS); @@ -1625,7 +1626,7 @@ static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev) dev_err(edac_dev->dev, "%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n", reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "", - REQTYPE_RD(err_addr_hi) ? "read" : "write", + str_read_write(REQTYPE_RD(err_addr_hi)), ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi); writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS); } From 699ea5219c4b1d9d8819eb2d99e51a3fdb7b1d7b Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Wed, 12 Feb 2025 14:36:42 +0000 Subject: [PATCH 08/25] EDAC: Add a memory repair control feature Add a generic EDAC memory repair control driver to manage memory repairs in the system, such as CXL Post Package Repair (PPR) and other soft and hard PPR features. For example, a CXL device with DRAM components that support PPR features may implement PPR maintenance operations. DRAM components may support two types of PPR: - hard PPR, for a permanent row repair, and - soft PPR, for a temporary row repair. Soft PPR is much faster than hard PPR, but the repair is lost with a power cycle. When a CXL device detects an error in a memory, it may report the need for a repair maintenance operation by using an event record where the "maintenance needed" flag is set. The event records contain the device physical address (DPA) and other optional attributes of the memory to repair. The kernel will report the corresponding CXL general media or DRAM trace event to userspace, and userspace tools (e.g. rasdaemon) will initiate a repair operation in response to the device request via the sysfs repair control. Device with memory repair features registers with EDAC device driver, which retrieves a memory repair descriptor from EDAC memory repair driver and exposes the sysfs repair control attributes to userspace in /sys/bus/edac/devices//mem_repairX/. The common memory repair control interface abstracts the control of arbitrary memory repair functionality into a standardized set of functions. The sysfs memory repair attribute nodes are only available if the client driver has implemented the corresponding attribute callback function and provided operations to the EDAC device driver during registration. [ bp: Massage, fixup edac_dev_register() retvals, merge write_overflow fix to mem_repair_create_desc() ] Signed-off-by: Shiju Jose Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20250212143654.1893-5-shiju.jose@huawei.com --- .../ABI/testing/sysfs-edac-memory-repair | 149 ++++++++++ Documentation/edac/features.rst | 4 + Documentation/edac/index.rst | 1 + Documentation/edac/memory_repair.rst | 121 ++++++++ drivers/edac/Kconfig | 10 + drivers/edac/Makefile | 1 + drivers/edac/edac_device.c | 33 +++ drivers/edac/mem_repair.c | 275 ++++++++++++++++++ include/linux/edac.h | 74 +++++ 9 files changed, 668 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-edac-memory-repair create mode 100644 Documentation/edac/memory_repair.rst create mode 100755 drivers/edac/mem_repair.c diff --git a/Documentation/ABI/testing/sysfs-edac-memory-repair b/Documentation/ABI/testing/sysfs-edac-memory-repair new file mode 100644 index 0000000000000..c54f59e4497b8 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-edac-memory-repair @@ -0,0 +1,149 @@ +What: /sys/bus/edac/devices//mem_repairX +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + The sysfs EDAC bus devices //mem_repairX subdirectory + pertains to the memory media repair features control, such as + PPR (Post Package Repair), memory sparing etc, where + directory corresponds to a device registered with the EDAC + device driver for the memory repair features. + + Post Package Repair is a maintenance operation requests the memory + device to perform a repair operation on its media. It is a memory + self-healing feature that fixes a failing memory location by + replacing it with a spare row in a DRAM device. For example, a + CXL memory device with DRAM components that support PPR features may + implement PPR maintenance operations. DRAM components may support + two types of PPR functions: hard PPR, for a permanent row repair, and + soft PPR, for a temporary row repair. Soft PPR may be much faster + than hard PPR, but the repair is lost with a power cycle. + + The sysfs attributes nodes for a repair feature are only + present if the parent driver has implemented the corresponding + attr callback function and provided the necessary operations + to the EDAC device driver during registration. + + In some states of system configuration (e.g. before address + decoders have been configured), memory devices (e.g. CXL) + may not have an active mapping in the main host address + physical address map. As such, the memory to repair must be + identified by a device specific physical addressing scheme + using a device physical address(DPA). The DPA and other control + attributes to use will be presented in related error records. + +What: /sys/bus/edac/devices//mem_repairX/repair_type +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RO) Memory repair type. For eg. post package repair, + memory sparing etc. Valid values are: + + - ppr - Post package repair. + + - All other values are reserved. + +What: /sys/bus/edac/devices//mem_repairX/persist_mode +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Get/Set the current persist repair mode set for a + repair function. Persist repair modes supported in the + device, based on a memory repair function, either is temporary, + which is lost with a power cycle or permanent. Valid values are: + + - 0 - Soft memory repair (temporary repair). + + - 1 - Hard memory repair (permanent repair). + + - All other values are reserved. + +What: /sys/bus/edac/devices//mem_repairX/repair_safe_when_in_use +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RO) True if memory media is accessible and data is retained + during the memory repair operation. + The data may not be retained and memory requests may not be + correctly processed during a repair operation. In such case + repair operation can not be executed at runtime. The memory + must be taken offline. + +What: /sys/bus/edac/devices//mem_repairX/hpa +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Host Physical Address (HPA) of the memory to repair. + The HPA to use will be provided in related error records. + +What: /sys/bus/edac/devices//mem_repairX/dpa +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Device Physical Address (DPA) of the memory to repair. + The specific DPA to use will be provided in related error + records. + + In some states of system configuration (e.g. before address + decoders have been configured), memory devices (e.g. CXL) + may not have an active mapping in the main host address + physical address map. As such, the memory to repair must be + identified by a device specific physical addressing scheme + using a DPA. The device physical address(DPA) to use will be + presented in related error records. + +What: /sys/bus/edac/devices//mem_repairX/nibble_mask +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Read/Write Nibble mask of the memory to repair. + Nibble mask identifies one or more nibbles in error on the + memory bus that produced the error event. Nibble Mask bit 0 + shall be set if nibble 0 on the memory bus produced the + event, etc. For example, CXL PPR and sparing, a nibble mask + bit set to 1 indicates the request to perform repair + operation in the specific device. All nibble mask bits set + to 1 indicates the request to perform the operation in all + devices. Eg. for CXL memory repair, the specific value of + nibble mask to use will be provided in related error records. + For more details, See nibble mask field in CXL spec ver 3.1, + section 8.2.9.7.1.2 Table 8-103 soft PPR and section + 8.2.9.7.1.3 Table 8-104 hard PPR, section 8.2.9.7.1.4 + Table 8-105 memory sparing. + +What: /sys/bus/edac/devices//mem_repairX/min_hpa +What: /sys/bus/edac/devices//mem_repairX/max_hpa +What: /sys/bus/edac/devices//mem_repairX/min_dpa +What: /sys/bus/edac/devices//mem_repairX/max_dpa +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The supported range of memory address that is to be + repaired. The memory device may give the supported range of + attributes to use and it will depend on the memory device + and the portion of memory to repair. + The userspace may receive the specific value of attributes + to use for a repair operation from the memory device via + related error records and trace events, for eg. CXL DRAM + and CXL general media error records in CXL memory devices. + +What: /sys/bus/edac/devices//mem_repairX/repair +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (WO) Issue the memory repair operation for the specified + memory repair attributes. The operation may fail if resources + are insufficient based on the requirements of the memory + device and repair function. + + - 1 - Issue the repair operation. + + - All other values are reserved. diff --git a/Documentation/edac/features.rst b/Documentation/edac/features.rst index fdcecb3df10ee..3f283de297c79 100644 --- a/Documentation/edac/features.rst +++ b/Documentation/edac/features.rst @@ -97,3 +97,7 @@ RAS features 1. Memory Scrub Memory scrub features are documented in `Documentation/edac/scrub.rst`. + +2. Memory Repair + +Memory repair features are documented in `Documentation/edac/memory_repair.rst`. diff --git a/Documentation/edac/index.rst b/Documentation/edac/index.rst index 0a00c23838b60..420c6601dbae5 100644 --- a/Documentation/edac/index.rst +++ b/Documentation/edac/index.rst @@ -8,4 +8,5 @@ EDAC Subsystem :maxdepth: 1 features + memory_repair scrub diff --git a/Documentation/edac/memory_repair.rst b/Documentation/edac/memory_repair.rst new file mode 100644 index 0000000000000..52162a422864d --- /dev/null +++ b/Documentation/edac/memory_repair.rst @@ -0,0 +1,121 @@ +.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.2-no-invariants-or-later + +========================== +EDAC Memory Repair Control +========================== + +Copyright (c) 2024-2025 HiSilicon Limited. + +:Author: Shiju Jose +:License: The GNU Free Documentation License, Version 1.2 without + Invariant Sections, Front-Cover Texts nor Back-Cover Texts. + (dual licensed under the GPL v2) +:Original Reviewers: + +- Written for: 6.15 + +Introduction +------------ + +Some memory devices support repair operations to address issues in their +memory media. Post Package Repair (PPR) and memory sparing are examples of +such features. + +Post Package Repair (PPR) +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Post Package Repair is a maintenance operation which requests the memory +device to perform repair operation on its media. It is a memory self-healing +feature that fixes a failing memory location by replacing it with a spare row +in a DRAM device. + +For example, a CXL memory device with DRAM components that support PPR +features implements maintenance operations. DRAM components support those +types of PPR functions: + + - hard PPR, for a permanent row repair, and + - soft PPR, for a temporary row repair. + +Soft PPR is much faster than hard PPR, but the repair is lost after a power +cycle. + +The data may not be retained and memory requests may not be correctly +processed during a repair operation. In such case, the repair operation should +not be executed at runtime. + +For example, for CXL memory devices, see CXL spec rev 3.1 [1]_ sections +8.2.9.7.1.1 PPR Maintenance Operations, 8.2.9.7.1.2 sPPR Maintenance Operation +and 8.2.9.7.1.3 hPPR Maintenance Operation for more details. + +Memory Sparing +~~~~~~~~~~~~~~ + +Memory sparing is a repair function that replaces a portion of memory with +a portion of functional memory at a particular granularity. Memory +sparing has cacheline/row/bank/rank sparing granularities. For example, in +rank memory-sparing mode, one memory rank serves as a spare for other ranks on +the same channel in case they fail. + +The spare rank is held in reserve and not used as active memory until +a failure is indicated, with reserved capacity subtracted from the total +available memory in the system. + +After an error threshold is surpassed in a system protected by memory sparing, +the content of a failing rank of DIMMs is copied to the spare rank. The +failing rank is then taken offline and the spare rank placed online for use as +active memory in place of the failed rank. + +For example, CXL memory devices can support various subclasses for sparing +operation vary in terms of the scope of the sparing being performed. + +Cacheline sparing subclass refers to a sparing action that can replace a full +cacheline. Row sparing is provided as an alternative to PPR sparing functions +and its scope is that of a single DDR row. Bank sparing allows an entire bank +to be replaced. Rank sparing is defined as an operation in which an entire DDR +rank is replaced. + +See CXL spec 3.1 [1]_ section 8.2.9.7.1.4 Memory Sparing Maintenance +Operations for more details. + +.. [1] https://computeexpresslink.org/cxl-specification/ + +Use cases of generic memory repair features control +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. The soft PPR, hard PPR and memory-sparing features share similar control + attributes. Therefore, there is a need for a standardized, generic sysfs + repair control that is exposed to userspace and used by administrators, + scripts and tools. + +2. When a CXL device detects an error in a memory component, it informs the + host of the need for a repair maintenance operation by using an event + record where the "maintenance needed" flag is set. The event record + specifies the device physical address (DPA) and attributes of the memory + that requires repair. The kernel reports the corresponding CXL general + media or DRAM trace event to userspace, and userspace tools (e.g. + rasdaemon) initiate a repair maintenance operation in response to the + device request using the sysfs repair control. + +3. Userspace tools, such as rasdaemon, request a repair operation on a memory + region when maintenance need flag set or an uncorrected memory error or + excess of corrected memory errors above a threshold value is reported or an + exceed corrected errors threshold flag set for that memory. + +4. Multiple PPR/sparing instances may be present per memory device. + +5. Drivers should enforce that live repair is safe. In systems where memory + mapping functions can change between boots, one approach to this is to log + memory errors seen on this boot against which to check live memory repair + requests. + +The File System +--------------- + +The control attributes of a registered memory repair instance could be +accessed in the /sys/bus/edac/devices//mem_repairX/ + +sysfs +----- + +Sysfs files are documented in +`Documentation/ABI/testing/sysfs-edac-memory-repair`. diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 9dfc2ea02df1e..703522d5d6c32 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -93,6 +93,16 @@ config EDAC_ECS into a unified set of functions. Say 'y/n' to enable/disable EDAC ECS feature. +config EDAC_MEM_REPAIR + bool "EDAC memory repair feature" + help + The EDAC memory repair feature is optional and is designed to control + the memory devices with repair features, such as Post Package Repair + (PPR), memory sparing etc. The common sysfs memory repair interface + abstracts the control of various memory repair functionalities into + a unified set of functions. + Say 'y/n' to enable/disable EDAC memory repair feature. + config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64)" depends on AMD_NB && EDAC_DECODE_MCE diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index c3c8b15b14900..a8f2d8f6c894a 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -14,6 +14,7 @@ edac_core-y += edac_module.o edac_device_sysfs.o wq.o edac_core-$(CONFIG_EDAC_DEBUG) += debugfs.o edac_core-$(CONFIG_EDAC_SCRUB) += scrub.o edac_core-$(CONFIG_EDAC_ECS) += ecs.o +edac_core-$(CONFIG_EDAC_MEM_REPAIR) += mem_repair.o ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index b914ca3fc5e5f..16611515ab348 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -575,6 +575,7 @@ static void edac_dev_release(struct device *dev) { struct edac_dev_feat_ctx *ctx = container_of(dev, struct edac_dev_feat_ctx, dev); + kfree(ctx->mem_repair); kfree(ctx->scrub); kfree(ctx->dev.groups); kfree(ctx); @@ -613,6 +614,7 @@ int edac_dev_register(struct device *parent, char *name, const struct attribute_group **ras_attr_groups; struct edac_dev_data *dev_data; struct edac_dev_feat_ctx *ctx; + int mem_repair_cnt = 0; int attr_gcnt = 0; int ret = -ENOMEM; int scrub_cnt = 0; @@ -631,6 +633,10 @@ int edac_dev_register(struct device *parent, char *name, case RAS_FEAT_ECS: attr_gcnt += ras_features[feat].ecs_info.num_media_frus; break; + case RAS_FEAT_MEM_REPAIR: + attr_gcnt++; + mem_repair_cnt++; + break; default: return -EINVAL; } @@ -650,8 +656,15 @@ int edac_dev_register(struct device *parent, char *name, goto groups_free; } + if (mem_repair_cnt) { + ctx->mem_repair = kcalloc(mem_repair_cnt, sizeof(*ctx->mem_repair), GFP_KERNEL); + if (!ctx->mem_repair) + goto data_mem_free; + } + attr_gcnt = 0; scrub_cnt = 0; + mem_repair_cnt = 0; for (feat = 0; feat < num_features; feat++, ras_features++) { switch (ras_features->ft_type) { case RAS_FEAT_SCRUB: @@ -688,6 +701,25 @@ int edac_dev_register(struct device *parent, char *name, attr_gcnt += ras_features->ecs_info.num_media_frus; break; + case RAS_FEAT_MEM_REPAIR: + if (!ras_features->mem_repair_ops || + mem_repair_cnt != ras_features->instance) { + ret = -EINVAL; + goto data_mem_free; + } + + dev_data = &ctx->mem_repair[mem_repair_cnt]; + dev_data->instance = mem_repair_cnt; + dev_data->mem_repair_ops = ras_features->mem_repair_ops; + dev_data->private = ras_features->ctx; + ret = edac_mem_repair_get_desc(parent, &ras_attr_groups[attr_gcnt], + ras_features->instance); + if (ret) + goto data_mem_free; + + mem_repair_cnt++; + attr_gcnt++; + break; default: ret = -EINVAL; goto data_mem_free; @@ -714,6 +746,7 @@ int edac_dev_register(struct device *parent, char *name, return devm_add_action_or_reset(parent, edac_dev_unreg, &ctx->dev); data_mem_free: + kfree(ctx->mem_repair); kfree(ctx->scrub); groups_free: kfree(ras_attr_groups); diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c new file mode 100755 index 0000000000000..5c94ac1027db6 --- /dev/null +++ b/drivers/edac/mem_repair.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The generic EDAC memory repair driver is designed to control the memory + * devices with memory repair features, such as Post Package Repair (PPR), + * memory sparing etc. The common sysfs memory repair interface abstracts + * the control of various arbitrary memory repair functionalities into a + * unified set of functions. + * + * Copyright (c) 2024-2025 HiSilicon Limited. + */ + +#include + +enum edac_mem_repair_attributes { + MR_TYPE, + MR_PERSIST_MODE, + MR_SAFE_IN_USE, + MR_HPA, + MR_MIN_HPA, + MR_MAX_HPA, + MR_DPA, + MR_MIN_DPA, + MR_MAX_DPA, + MR_NIBBLE_MASK, + MEM_DO_REPAIR, + MR_MAX_ATTRS +}; + +struct edac_mem_repair_dev_attr { + struct device_attribute dev_attr; + u8 instance; +}; + +struct edac_mem_repair_context { + char name[EDAC_FEAT_NAME_LEN]; + struct edac_mem_repair_dev_attr mem_repair_dev_attr[MR_MAX_ATTRS]; + struct attribute *mem_repair_attrs[MR_MAX_ATTRS + 1]; + struct attribute_group group; +}; + +#define TO_MR_DEV_ATTR(_dev_attr) \ + container_of(_dev_attr, struct edac_mem_repair_dev_attr, dev_attr) + +#define MR_ATTR_SHOW(attrib, cb, type, format) \ +static ssize_t attrib##_show(struct device *ras_feat_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + u8 inst = TO_MR_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_mem_repair_ops *ops = \ + ctx->mem_repair[inst].mem_repair_ops; \ + type data; \ + int ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, \ + &data); \ + if (ret) \ + return ret; \ + \ + return sysfs_emit(buf, format, data); \ +} + +MR_ATTR_SHOW(repair_type, get_repair_type, const char *, "%s\n") +MR_ATTR_SHOW(persist_mode, get_persist_mode, bool, "%u\n") +MR_ATTR_SHOW(repair_safe_when_in_use, get_repair_safe_when_in_use, bool, "%u\n") +MR_ATTR_SHOW(hpa, get_hpa, u64, "0x%llx\n") +MR_ATTR_SHOW(min_hpa, get_min_hpa, u64, "0x%llx\n") +MR_ATTR_SHOW(max_hpa, get_max_hpa, u64, "0x%llx\n") +MR_ATTR_SHOW(dpa, get_dpa, u64, "0x%llx\n") +MR_ATTR_SHOW(min_dpa, get_min_dpa, u64, "0x%llx\n") +MR_ATTR_SHOW(max_dpa, get_max_dpa, u64, "0x%llx\n") +MR_ATTR_SHOW(nibble_mask, get_nibble_mask, u32, "0x%x\n") + +#define MR_ATTR_STORE(attrib, cb, type, conv_func) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + u8 inst = TO_MR_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_mem_repair_ops *ops = \ + ctx->mem_repair[inst].mem_repair_ops; \ + type data; \ + int ret; \ + \ + ret = conv_func(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, \ + data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +MR_ATTR_STORE(persist_mode, set_persist_mode, unsigned long, kstrtoul) +MR_ATTR_STORE(hpa, set_hpa, u64, kstrtou64) +MR_ATTR_STORE(dpa, set_dpa, u64, kstrtou64) +MR_ATTR_STORE(nibble_mask, set_nibble_mask, unsigned long, kstrtoul) + +#define MR_DO_OP(attrib, cb) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + u8 inst = TO_MR_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_mem_repair_ops *ops = ctx->mem_repair[inst].mem_repair_ops; \ + unsigned long data; \ + int ret; \ + \ + ret = kstrtoul(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +MR_DO_OP(repair, do_repair) + +static umode_t mem_repair_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id) +{ + struct device *ras_feat_dev = kobj_to_dev(kobj); + struct device_attribute *dev_attr = container_of(a, struct device_attribute, attr); + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); + u8 inst = TO_MR_DEV_ATTR(dev_attr)->instance; + const struct edac_mem_repair_ops *ops = ctx->mem_repair[inst].mem_repair_ops; + + switch (attr_id) { + case MR_TYPE: + if (ops->get_repair_type) + return a->mode; + break; + case MR_PERSIST_MODE: + if (ops->get_persist_mode) { + if (ops->set_persist_mode) + return a->mode; + else + return 0444; + } + break; + case MR_SAFE_IN_USE: + if (ops->get_repair_safe_when_in_use) + return a->mode; + break; + case MR_HPA: + if (ops->get_hpa) { + if (ops->set_hpa) + return a->mode; + else + return 0444; + } + break; + case MR_MIN_HPA: + if (ops->get_min_hpa) + return a->mode; + break; + case MR_MAX_HPA: + if (ops->get_max_hpa) + return a->mode; + break; + case MR_DPA: + if (ops->get_dpa) { + if (ops->set_dpa) + return a->mode; + else + return 0444; + } + break; + case MR_MIN_DPA: + if (ops->get_min_dpa) + return a->mode; + break; + case MR_MAX_DPA: + if (ops->get_max_dpa) + return a->mode; + break; + case MR_NIBBLE_MASK: + if (ops->get_nibble_mask) { + if (ops->set_nibble_mask) + return a->mode; + else + return 0444; + } + break; + case MEM_DO_REPAIR: + if (ops->do_repair) + return a->mode; + break; + default: + break; + } + + return 0; +} + +#define MR_ATTR_RO(_name, _instance) \ + ((struct edac_mem_repair_dev_attr) { .dev_attr = __ATTR_RO(_name), \ + .instance = _instance }) + +#define MR_ATTR_WO(_name, _instance) \ + ((struct edac_mem_repair_dev_attr) { .dev_attr = __ATTR_WO(_name), \ + .instance = _instance }) + +#define MR_ATTR_RW(_name, _instance) \ + ((struct edac_mem_repair_dev_attr) { .dev_attr = __ATTR_RW(_name), \ + .instance = _instance }) + +static int mem_repair_create_desc(struct device *dev, + const struct attribute_group **attr_groups, + u8 instance) +{ + struct edac_mem_repair_context *ctx; + struct attribute_group *group; + int i; + struct edac_mem_repair_dev_attr dev_attr[] = { + [MR_TYPE] = MR_ATTR_RO(repair_type, instance), + [MR_PERSIST_MODE] = MR_ATTR_RW(persist_mode, instance), + [MR_SAFE_IN_USE] = MR_ATTR_RO(repair_safe_when_in_use, instance), + [MR_HPA] = MR_ATTR_RW(hpa, instance), + [MR_MIN_HPA] = MR_ATTR_RO(min_hpa, instance), + [MR_MAX_HPA] = MR_ATTR_RO(max_hpa, instance), + [MR_DPA] = MR_ATTR_RW(dpa, instance), + [MR_MIN_DPA] = MR_ATTR_RO(min_dpa, instance), + [MR_MAX_DPA] = MR_ATTR_RO(max_dpa, instance), + [MR_NIBBLE_MASK] = MR_ATTR_RW(nibble_mask, instance), + [MEM_DO_REPAIR] = MR_ATTR_WO(repair, instance) + }; + + ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + for (i = 0; i < MR_MAX_ATTRS; i++) { + memcpy(&ctx->mem_repair_dev_attr[i], + &dev_attr[i], sizeof(dev_attr[i])); + ctx->mem_repair_attrs[i] = + &ctx->mem_repair_dev_attr[i].dev_attr.attr; + } + + sprintf(ctx->name, "%s%d", "mem_repair", instance); + group = &ctx->group; + group->name = ctx->name; + group->attrs = ctx->mem_repair_attrs; + group->is_visible = mem_repair_attr_visible; + attr_groups[0] = group; + + return 0; +} + +/** + * edac_mem_repair_get_desc - get EDAC memory repair descriptors + * @dev: client device with memory repair feature + * @attr_groups: pointer to attribute group container + * @instance: device's memory repair instance number. + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + */ +int edac_mem_repair_get_desc(struct device *dev, + const struct attribute_group **attr_groups, u8 instance) +{ + if (!dev || !attr_groups) + return -EINVAL; + + return mem_repair_create_desc(dev, attr_groups, instance); +} diff --git a/include/linux/edac.h b/include/linux/edac.h index f8346014c14ed..cfb2ef41ab955 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -668,6 +668,7 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci, enum edac_dev_feat { RAS_FEAT_SCRUB, RAS_FEAT_ECS, + RAS_FEAT_MEM_REPAIR, RAS_FEAT_MAX }; @@ -743,11 +744,82 @@ static inline int edac_ecs_get_desc(struct device *ecs_dev, { return -EOPNOTSUPP; } #endif /* CONFIG_EDAC_ECS */ +enum edac_mem_repair_type { + EDAC_REPAIR_MAX +}; + +enum edac_mem_repair_cmd { + EDAC_DO_MEM_REPAIR = 1, +}; + +/** + * struct edac_mem_repair_ops - memory repair operations + * (all elements are optional except do_repair, set_hpa/set_dpa) + * @get_repair_type: get the memory repair type, listed in + * enum edac_mem_repair_function. + * @get_persist_mode: get the current persist mode. + * false - Soft repair type (temporary repair). + * true - Hard memory repair type (permanent repair). + * @set_persist_mode: set the persist mode of the memory repair instance. + * @get_repair_safe_when_in_use: get whether memory media is accessible and + * data is retained during repair operation. + * @get_hpa: get current host physical address (HPA) of memory to repair. + * @set_hpa: set host physical address (HPA) of memory to repair. + * @get_min_hpa: get the minimum supported host physical address (HPA). + * @get_max_hpa: get the maximum supported host physical address (HPA). + * @get_dpa: get current device physical address (DPA) of memory to repair. + * @set_dpa: set device physical address (DPA) of memory to repair. + * In some states of system configuration (e.g. before address decoders + * have been configured), memory devices (e.g. CXL) may not have an active + * mapping in the host physical address map. As such, the memory + * to repair must be identified by a device specific physical addressing + * scheme using a device physical address(DPA). The DPA and other control + * attributes to use for the repair operations will be presented in related + * error records. + * @get_min_dpa: get the minimum supported device physical address (DPA). + * @get_max_dpa: get the maximum supported device physical address (DPA). + * @get_nibble_mask: get current nibble mask of memory to repair. + * @set_nibble_mask: set nibble mask of memory to repair. + * @do_repair: Issue memory repair operation for the HPA/DPA and + * other control attributes set for the memory to repair. + * + * All elements are optional except do_repair and at least one of set_hpa/set_dpa. + */ +struct edac_mem_repair_ops { + int (*get_repair_type)(struct device *dev, void *drv_data, const char **type); + int (*get_persist_mode)(struct device *dev, void *drv_data, bool *persist); + int (*set_persist_mode)(struct device *dev, void *drv_data, bool persist); + int (*get_repair_safe_when_in_use)(struct device *dev, void *drv_data, bool *safe); + int (*get_hpa)(struct device *dev, void *drv_data, u64 *hpa); + int (*set_hpa)(struct device *dev, void *drv_data, u64 hpa); + int (*get_min_hpa)(struct device *dev, void *drv_data, u64 *hpa); + int (*get_max_hpa)(struct device *dev, void *drv_data, u64 *hpa); + int (*get_dpa)(struct device *dev, void *drv_data, u64 *dpa); + int (*set_dpa)(struct device *dev, void *drv_data, u64 dpa); + int (*get_min_dpa)(struct device *dev, void *drv_data, u64 *dpa); + int (*get_max_dpa)(struct device *dev, void *drv_data, u64 *dpa); + int (*get_nibble_mask)(struct device *dev, void *drv_data, u32 *val); + int (*set_nibble_mask)(struct device *dev, void *drv_data, u32 val); + int (*do_repair)(struct device *dev, void *drv_data, u32 val); +}; + +#if IS_ENABLED(CONFIG_EDAC_MEM_REPAIR) +int edac_mem_repair_get_desc(struct device *dev, + const struct attribute_group **attr_groups, + u8 instance); +#else +static inline int edac_mem_repair_get_desc(struct device *dev, + const struct attribute_group **attr_groups, + u8 instance) +{ return -EOPNOTSUPP; } +#endif /* CONFIG_EDAC_MEM_REPAIR */ + /* EDAC device feature information structure */ struct edac_dev_data { union { const struct edac_scrub_ops *scrub_ops; const struct edac_ecs_ops *ecs_ops; + const struct edac_mem_repair_ops *mem_repair_ops; }; u8 instance; void *private; @@ -758,6 +830,7 @@ struct edac_dev_feat_ctx { void *private; struct edac_dev_data *scrub; struct edac_dev_data ecs; + struct edac_dev_data *mem_repair; }; struct edac_dev_feature { @@ -766,6 +839,7 @@ struct edac_dev_feature { union { const struct edac_scrub_ops *scrub_ops; const struct edac_ecs_ops *ecs_ops; + const struct edac_mem_repair_ops *mem_repair_ops; }; void *ctx; struct edac_ecs_ex_info ecs_info; From 81e42fc1d3036efd45f66c03a79654fef00ef380 Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Mon, 24 Feb 2025 12:13:40 +0100 Subject: [PATCH 09/25] EDAC: Update memory repair control interface for memory sparing feature Update memory repair control interface for memory sparing feature. CXL memory devices can support soft and hard memory sparing at cacheline, row, bank and rank granularities. Memory sparing is defined as a repair function that replaces a portion of memory with a portion of functional memory at that same granularity. When a CXL device detects an error in memory, it will report to the host that there's need for a repair maintenance operation by using an event record where the "maintenance needed" flag is set. The event records contain the device physical address (DPA) and other attributes of the memory to repair such as bank group, bank, rank, row, column, channel etc. The kernel will report the corresponding CXL general media or DRAM trace event to userspace, and userspace tools (e.g. rasdaemon) will initiate a repair operation in response to the device request via the sysfs repair control. [ bp: Massage. ] Signed-off-by: Shiju Jose Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20250212143654.1893-15-shiju.jose@huawei.com --- .../ABI/testing/sysfs-edac-memory-repair | 57 +++++++++++++ drivers/edac/mem_repair.c | 84 +++++++++++++++++++ include/linux/edac.h | 28 +++++++ 3 files changed, 169 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-edac-memory-repair b/Documentation/ABI/testing/sysfs-edac-memory-repair index c54f59e4497b8..0434a3b23ff3e 100644 --- a/Documentation/ABI/testing/sysfs-edac-memory-repair +++ b/Documentation/ABI/testing/sysfs-edac-memory-repair @@ -42,6 +42,14 @@ Description: - ppr - Post package repair. + - cacheline-sparing + + - row-sparing + + - bank-sparing + + - rank-sparing + - All other values are reserved. What: /sys/bus/edac/devices//mem_repairX/persist_mode @@ -134,6 +142,55 @@ Description: related error records and trace events, for eg. CXL DRAM and CXL general media error records in CXL memory devices. +What: /sys/bus/edac/devices//mem_repairX/bank_group +What: /sys/bus/edac/devices//mem_repairX/bank +What: /sys/bus/edac/devices//mem_repairX/rank +What: /sys/bus/edac/devices//mem_repairX/row +What: /sys/bus/edac/devices//mem_repairX/column +What: /sys/bus/edac/devices//mem_repairX/channel +What: /sys/bus/edac/devices//mem_repairX/sub_channel +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The control attributes for the memory to be repaired. + The specific value of attributes to use depends on the + portion of memory to repair and will be reported to the host + in related error records and be available to userspace + in trace events, such as CXL DRAM and CXL general media + error records of CXL memory devices. + + When readng back these attributes, it returns the current + value of memory requested to be repaired. + + bank_group - The bank group of the memory to repair. + + bank - The bank number of the memory to repair. + + rank - The rank of the memory to repair. Rank is defined as a + set of memory devices on a channel that together execute a + transaction. + + row - The row number of the memory to repair. + + column - The column number of the memory to repair. + + channel - The channel of the memory to repair. Channel is + defined as an interface that can be independently accessed + for a transaction. + + sub_channel - The subchannel of the memory to repair. + + The requirement to set these attributes varies based on the + repair function. The attributes in sysfs are not present + unless required for a repair function. + + For example, CXL spec ver 3.1, Section 8.2.9.7.1.2 Table 8-103 + soft PPR and Section 8.2.9.7.1.3 Table 8-104 hard PPR operations, + these attributes are not required to set. CXL spec ver 3.1, + Section 8.2.9.7.1.4 Table 8-105 memory sparing, these attributes + are required to set based on memory sparing granularity. + What: /sys/bus/edac/devices//mem_repairX/repair Date: March 2025 KernelVersion: 6.15 diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c index 5c94ac1027db6..3b1a845457b08 100755 --- a/drivers/edac/mem_repair.c +++ b/drivers/edac/mem_repair.c @@ -22,6 +22,13 @@ enum edac_mem_repair_attributes { MR_MIN_DPA, MR_MAX_DPA, MR_NIBBLE_MASK, + MR_BANK_GROUP, + MR_BANK, + MR_RANK, + MR_ROW, + MR_COLUMN, + MR_CHANNEL, + MR_SUB_CHANNEL, MEM_DO_REPAIR, MR_MAX_ATTRS }; @@ -70,6 +77,13 @@ MR_ATTR_SHOW(dpa, get_dpa, u64, "0x%llx\n") MR_ATTR_SHOW(min_dpa, get_min_dpa, u64, "0x%llx\n") MR_ATTR_SHOW(max_dpa, get_max_dpa, u64, "0x%llx\n") MR_ATTR_SHOW(nibble_mask, get_nibble_mask, u32, "0x%x\n") +MR_ATTR_SHOW(bank_group, get_bank_group, u32, "%u\n") +MR_ATTR_SHOW(bank, get_bank, u32, "%u\n") +MR_ATTR_SHOW(rank, get_rank, u32, "%u\n") +MR_ATTR_SHOW(row, get_row, u32, "0x%x\n") +MR_ATTR_SHOW(column, get_column, u32, "%u\n") +MR_ATTR_SHOW(channel, get_channel, u32, "%u\n") +MR_ATTR_SHOW(sub_channel, get_sub_channel, u32, "%u\n") #define MR_ATTR_STORE(attrib, cb, type, conv_func) \ static ssize_t attrib##_store(struct device *ras_feat_dev, \ @@ -99,6 +113,13 @@ MR_ATTR_STORE(persist_mode, set_persist_mode, unsigned long, kstrtoul) MR_ATTR_STORE(hpa, set_hpa, u64, kstrtou64) MR_ATTR_STORE(dpa, set_dpa, u64, kstrtou64) MR_ATTR_STORE(nibble_mask, set_nibble_mask, unsigned long, kstrtoul) +MR_ATTR_STORE(bank_group, set_bank_group, unsigned long, kstrtoul) +MR_ATTR_STORE(bank, set_bank, unsigned long, kstrtoul) +MR_ATTR_STORE(rank, set_rank, unsigned long, kstrtoul) +MR_ATTR_STORE(row, set_row, unsigned long, kstrtoul) +MR_ATTR_STORE(column, set_column, unsigned long, kstrtoul) +MR_ATTR_STORE(channel, set_channel, unsigned long, kstrtoul) +MR_ATTR_STORE(sub_channel, set_sub_channel, unsigned long, kstrtoul) #define MR_DO_OP(attrib, cb) \ static ssize_t attrib##_store(struct device *ras_feat_dev, \ @@ -189,6 +210,62 @@ static umode_t mem_repair_attr_visible(struct kobject *kobj, struct attribute *a return 0444; } break; + case MR_BANK_GROUP: + if (ops->get_bank_group) { + if (ops->set_bank_group) + return a->mode; + else + return 0444; + } + break; + case MR_BANK: + if (ops->get_bank) { + if (ops->set_bank) + return a->mode; + else + return 0444; + } + break; + case MR_RANK: + if (ops->get_rank) { + if (ops->set_rank) + return a->mode; + else + return 0444; + } + break; + case MR_ROW: + if (ops->get_row) { + if (ops->set_row) + return a->mode; + else + return 0444; + } + break; + case MR_COLUMN: + if (ops->get_column) { + if (ops->set_column) + return a->mode; + else + return 0444; + } + break; + case MR_CHANNEL: + if (ops->get_channel) { + if (ops->set_channel) + return a->mode; + else + return 0444; + } + break; + case MR_SUB_CHANNEL: + if (ops->get_sub_channel) { + if (ops->set_sub_channel) + return a->mode; + else + return 0444; + } + break; case MEM_DO_REPAIR: if (ops->do_repair) return a->mode; @@ -230,6 +307,13 @@ static int mem_repair_create_desc(struct device *dev, [MR_MIN_DPA] = MR_ATTR_RO(min_dpa, instance), [MR_MAX_DPA] = MR_ATTR_RO(max_dpa, instance), [MR_NIBBLE_MASK] = MR_ATTR_RW(nibble_mask, instance), + [MR_BANK_GROUP] = MR_ATTR_RW(bank_group, instance), + [MR_BANK] = MR_ATTR_RW(bank, instance), + [MR_RANK] = MR_ATTR_RW(rank, instance), + [MR_ROW] = MR_ATTR_RW(row, instance), + [MR_COLUMN] = MR_ATTR_RW(column, instance), + [MR_CHANNEL] = MR_ATTR_RW(channel, instance), + [MR_SUB_CHANNEL] = MR_ATTR_RW(sub_channel, instance), [MEM_DO_REPAIR] = MR_ATTR_WO(repair, instance) }; diff --git a/include/linux/edac.h b/include/linux/edac.h index cfb2ef41ab955..451f9c152c99f 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -780,6 +780,20 @@ enum edac_mem_repair_cmd { * @get_max_dpa: get the maximum supported device physical address (DPA). * @get_nibble_mask: get current nibble mask of memory to repair. * @set_nibble_mask: set nibble mask of memory to repair. + * @get_bank_group: get current bank group of memory to repair. + * @set_bank_group: set bank group of memory to repair. + * @get_bank: get current bank of memory to repair. + * @set_bank: set bank of memory to repair. + * @get_rank: get current rank of memory to repair. + * @set_rank: set rank of memory to repair. + * @get_row: get current row of memory to repair. + * @set_row: set row of memory to repair. + * @get_column: get current column of memory to repair. + * @set_column: set column of memory to repair. + * @get_channel: get current channel of memory to repair. + * @set_channel: set channel of memory to repair. + * @get_sub_channel: get current subchannel of memory to repair. + * @set_sub_channel: set subchannel of memory to repair. * @do_repair: Issue memory repair operation for the HPA/DPA and * other control attributes set for the memory to repair. * @@ -800,6 +814,20 @@ struct edac_mem_repair_ops { int (*get_max_dpa)(struct device *dev, void *drv_data, u64 *dpa); int (*get_nibble_mask)(struct device *dev, void *drv_data, u32 *val); int (*set_nibble_mask)(struct device *dev, void *drv_data, u32 val); + int (*get_bank_group)(struct device *dev, void *drv_data, u32 *val); + int (*set_bank_group)(struct device *dev, void *drv_data, u32 val); + int (*get_bank)(struct device *dev, void *drv_data, u32 *val); + int (*set_bank)(struct device *dev, void *drv_data, u32 val); + int (*get_rank)(struct device *dev, void *drv_data, u32 *val); + int (*set_rank)(struct device *dev, void *drv_data, u32 val); + int (*get_row)(struct device *dev, void *drv_data, u32 *val); + int (*set_row)(struct device *dev, void *drv_data, u32 val); + int (*get_column)(struct device *dev, void *drv_data, u32 *val); + int (*set_column)(struct device *dev, void *drv_data, u32 val); + int (*get_channel)(struct device *dev, void *drv_data, u32 *val); + int (*set_channel)(struct device *dev, void *drv_data, u32 val); + int (*get_sub_channel)(struct device *dev, void *drv_data, u32 *val); + int (*set_sub_channel)(struct device *dev, void *drv_data, u32 val); int (*do_repair)(struct device *dev, void *drv_data, u32 val); }; From 12378e1c3ff8259d6269980dcfd0cbb46735ade7 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 1 Feb 2025 14:09:54 +0100 Subject: [PATCH 10/25] EDAC/amd64: Simplify return statement in dct_ecc_enabled() Simplify the return statement to improve the code's readability. No functional changes. Signed-off-by: Thorsten Blum Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20250201130953.1377-2-thorsten.blum@linux.dev --- drivers/edac/amd64_edac.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index d133a5be58905..90f0eb7cc5b9b 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3354,10 +3354,7 @@ static bool dct_ecc_enabled(struct amd64_pvt *pvt) edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, str_enabled_disabled(ecc_en)); - if (!ecc_en || !nb_mce_en) - return false; - else - return true; + return ecc_en && nb_mce_en; } static bool umc_ecc_enabled(struct amd64_pvt *pvt) From ac2fbe0948a551e9732b3c5ebf0a37281af68df2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 31 Jan 2025 21:27:02 +0100 Subject: [PATCH 11/25] EDAC/igen6: Constify struct res_config The res_config structs are not modified in this driver. Constifying these structures moves some data to a read-only section, so increase overall security, especially when the structure holds some function pointers. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 36777 2479 4304 43560 aa28 drivers/edac/igen6_edac.o After: ===== text data bss dec hex filename 37297 1959 4304 43560 aa28 drivers/edac/igen6_edac.o Signed-off-by: Christophe JAILLET Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/a06153870951a64b438e76adf97d440e02c1a1fc.1738355198.git.christophe.jaillet@wanadoo.fr --- drivers/edac/igen6_edac.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index fdf3a84fe6988..38e624209b0f4 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -125,7 +125,7 @@ #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) -static struct res_config { +static const struct res_config { bool machine_check; int num_imc; u32 imc_base; @@ -472,7 +472,7 @@ static u64 rpl_p_err_addr(u64 ecclog) return ECC_ERROR_LOG_ADDR45(ecclog); } -static struct res_config ehl_cfg = { +static const struct res_config ehl_cfg = { .num_imc = 1, .imc_base = 0x5000, .ibecc_base = 0xdc00, @@ -482,7 +482,7 @@ static struct res_config ehl_cfg = { .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, }; -static struct res_config icl_cfg = { +static const struct res_config icl_cfg = { .num_imc = 1, .imc_base = 0x5000, .ibecc_base = 0xd800, @@ -492,7 +492,7 @@ static struct res_config icl_cfg = { .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, }; -static struct res_config tgl_cfg = { +static const struct res_config tgl_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0x5000, @@ -506,7 +506,7 @@ static struct res_config tgl_cfg = { .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, }; -static struct res_config adl_cfg = { +static const struct res_config adl_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -517,7 +517,7 @@ static struct res_config adl_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static struct res_config adl_n_cfg = { +static const struct res_config adl_n_cfg = { .machine_check = true, .num_imc = 1, .imc_base = 0xd800, @@ -528,7 +528,7 @@ static struct res_config adl_n_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static struct res_config rpl_p_cfg = { +static const struct res_config rpl_p_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -540,7 +540,7 @@ static struct res_config rpl_p_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static struct res_config mtl_ps_cfg = { +static const struct res_config mtl_ps_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -551,7 +551,7 @@ static struct res_config mtl_ps_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static struct res_config mtl_p_cfg = { +static const struct res_config mtl_p_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -1374,7 +1374,7 @@ static void unregister_err_handler(void) unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); } -static void opstate_set(struct res_config *cfg, const struct pci_device_id *ent) +static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent) { /* * Quirk: Certain SoCs' error reporting interrupts don't work. From 136899ffc462b491f2b18db18fe7cd519d5cb6c2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 19 Sep 2024 18:04:27 +0100 Subject: [PATCH 12/25] EDAC/pnd2: Make read-only const array intlv static Don't populate the const read-only array intlv on the stack at run time, instead make it static. This also shrinks the object size: $ size pnd2_edac.o.* text data bss dec hex filename 15632 264 1384 17280 4380 pnd2_edac.o.new 15644 264 1384 17292 438c pnd2_edac.o.old Signed-off-by: Colin Ian King Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20240919170427.497429-1-colin.i.king@gmail.com --- drivers/edac/pnd2_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index f93f2f2b1cf25..af14c8a3279f7 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -372,7 +372,7 @@ static int gen_asym_mask(struct b_cr_slice_channel_hash *p, struct b_cr_asym_mem_region1_mchbar *as1, struct b_cr_asym_2way_mem_region_mchbar *as2way) { - const int intlv[] = { 0x5, 0xA, 0x3, 0xC }; + static const int intlv[] = { 0x5, 0xA, 0x3, 0xC }; int mask = 0; if (as2way->asym_2way_interleave_enable) @@ -489,7 +489,7 @@ static int dnv_get_registers(void) */ static int get_registers(void) { - const int intlv[] = { 10, 11, 12, 12 }; + static const int intlv[] = { 10, 11, 12, 12 }; if (RD_REG(&tolud, b_cr_tolud_pci) || RD_REG(&touud_lo, b_cr_touud_lo_pci) || From 49472722d920ad39208a21eed1e8dc9038b4c2da Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 4 Mar 2025 15:35:58 +0100 Subject: [PATCH 13/25] EDAC/device: Fix dev_set_name() format string Passing a variable string as the format to dev_set_name() causes a W=1 warning: drivers/edac/edac_device.c:736:9: error: format not a string literal and no format arguments [-Werror=format-security] 736 | ret = dev_set_name(&ctx->dev, name); | ^~~ Use a literal "%s" instead so the name can be the argument. Fixes: db99ea5f2c03 ("EDAC: Add support for EDAC device features control") Signed-off-by: Arnd Bergmann Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20250304143603.995820-1-arnd@kernel.org --- drivers/edac/edac_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 16611515ab348..0734909b08a4f 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -733,7 +733,7 @@ int edac_dev_register(struct device *parent, char *name, ctx->private = private; dev_set_drvdata(&ctx->dev, ctx); - ret = dev_set_name(&ctx->dev, name); + ret = dev_set_name(&ctx->dev, "%s", name); if (ret) goto data_mem_free; From d59d844e319d97682c8de29b88d2d60922a683b3 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:01 +0800 Subject: [PATCH 14/25] EDAC/ie31200: Fix the size of EDAC_MC_LAYER_CHIP_SELECT layer The EDAC_MC_LAYER_CHIP_SELECT layer pertains to the rank, not the DIMM. Fix its size to reflect the number of ranks instead of the number of DIMMs. Also delete the unused macros IE31200_{DIMMS,RANKS}. Fixes: 7ee40b897d18 ("ie31200_edac: Introduce the driver") Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-2-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index c6188de13c003..10301e17014cc 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -94,8 +94,6 @@ (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \ PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK)) -#define IE31200_DIMMS 4 -#define IE31200_RANKS 8 #define IE31200_RANKS_PER_CHANNEL 4 #define IE31200_DIMMS_PER_CHANNEL 2 #define IE31200_CHANNELS 2 @@ -428,7 +426,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) nr_channels = how_many_channels(pdev); layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = IE31200_DIMMS; + layers[0].size = IE31200_RANKS_PER_CHANNEL; layers[0].is_virt_csrow = true; layers[1].type = EDAC_MC_LAYER_CHANNEL; layers[1].size = nr_channels; From 3427befbbca6b19fe0e37f91d66ce5221de70bf1 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:02 +0800 Subject: [PATCH 15/25] EDAC/ie31200: Fix the DIMM size mask for several SoCs The DIMM size mask for {Sky, Kaby, Coffee} Lake is not bits{7:0}, but bits{5:0}. Fix it. Fixes: 953dee9bbd24 ("EDAC, ie31200_edac: Add Skylake support") Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-3-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 10301e17014cc..2886866cb457a 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -165,6 +165,7 @@ #define IE31200_MAD_DIMM_0_OFFSET 0x5004 #define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C #define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0) +#define IE31200_MAD_DIMM_SIZE_SKL GENMASK_ULL(5, 0) #define IE31200_MAD_DIMM_A_RANK BIT(17) #define IE31200_MAD_DIMM_A_RANK_SHIFT 17 #define IE31200_MAD_DIMM_A_RANK_SKL BIT(10) @@ -378,7 +379,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan) { - dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE; + dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE_SKL; dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0; dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >> (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4))); From 231e341036d9988447e3b3345cf741a98139199e Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:03 +0800 Subject: [PATCH 16/25] EDAC/ie31200: Fix the error path order of ie31200_init() The error path order of ie31200_init() is incorrect, fix it. Fixes: 709ed1bcef12 ("EDAC/ie31200: Fallback if host bridge device is already initialized") Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-4-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 2886866cb457a..a8dd55ec52cea 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -619,7 +619,7 @@ static int __init ie31200_init(void) pci_rc = pci_register_driver(&ie31200_driver); if (pci_rc < 0) - goto fail0; + return pci_rc; if (!mci_pdev) { ie31200_registered = 0; @@ -630,11 +630,13 @@ static int __init ie31200_init(void) if (mci_pdev) break; } + if (!mci_pdev) { edac_dbg(0, "ie31200 pci_get_device fail\n"); pci_rc = -ENODEV; - goto fail1; + goto fail0; } + pci_rc = ie31200_init_one(mci_pdev, &ie31200_pci_tbl[i]); if (pci_rc < 0) { edac_dbg(0, "ie31200 init fail\n"); @@ -642,12 +644,12 @@ static int __init ie31200_init(void) goto fail1; } } - return 0; + return 0; fail1: - pci_unregister_driver(&ie31200_driver); -fail0: pci_dev_put(mci_pdev); +fail0: + pci_unregister_driver(&ie31200_driver); return pci_rc; } From 44eae52089ebcb6b22c63a2506bc610bb70fa927 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:04 +0800 Subject: [PATCH 17/25] EDAC/ie31200: Fix the 3rd parameter name of *populate_dimm_info() The 3rd parameter of *populate_dimm_info() pertains to the DIMM index within a channel, not the channel index. Fix the parameter name to dimm to reflect its actual purpose. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-5-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index a8dd55ec52cea..35f4e8e46ca20 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -377,29 +377,29 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) } static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int chan) + int dimm) { - dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE_SKL; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0; - dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >> - (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4))); + dd->size = (addr_decode >> (dimm << 4)) & IE31200_MAD_DIMM_SIZE_SKL; + dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (dimm << 4))) ? 1 : 0; + dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (dimm << 4))) >> + (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (dimm << 4))); } static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int chan) + int dimm) { - dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0; - dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0; + dd->size = (addr_decode >> (dimm << 3)) & IE31200_MAD_DIMM_SIZE; + dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << dimm)) ? 1 : 0; + dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << dimm)) ? 1 : 0; } -static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan, +static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, bool skl) { if (skl) - __skl_populate_dimm_info(dd, addr_decode, chan); + __skl_populate_dimm_info(dd, addr_decode, dimm); else - __populate_dimm_info(dd, addr_decode, chan); + __populate_dimm_info(dd, addr_decode, dimm); } From 312e67a03d8bb1745804936a4f6fdb69d64c3435 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:05 +0800 Subject: [PATCH 18/25] EDAC/ie31200: Simplify the pci_device_id table Use PCI_VDEVICE() to simplify the pci_device_id table. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-6-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 44 ++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 35f4e8e46ca20..4e1f85dc16798 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -576,28 +576,28 @@ static void ie31200_remove_one(struct pci_dev *pdev) } static const struct pci_device_id ie31200_pci_tbl[] = { - { PCI_VEND_DEV(INTEL, IE31200_HB_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_12), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_3), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_4), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_5), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_6), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_7), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_8), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_9), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_10), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_11), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_12), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), IE31200 }, { 0, } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl); From 2a52cce6486171a1adb43f8f17ad3f1d1c234c7c Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:06 +0800 Subject: [PATCH 19/25] EDAC/ie31200: Make the memory controller resources configurable The resources such as MMIO, register offset, register mask, memory DIMM information, ECC error log location, etc., of the memory controller, and the number of memory controllers can be device-ID-specific. It requires adding numerous 'if (device_id == new_id)' special handling cases to the code to support a new SoC. Make these kinds of resources configurable and separate them from the code to facilitate the addition of new SoC support. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-7-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 257 ++++++++++++++++-------------------- 1 file changed, 111 insertions(+), 146 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 4e1f85dc16798..71061ab5fa91e 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -101,25 +101,10 @@ /* Intel IE31200 register addresses - device 0 function 0 - DRAM Controller */ #define IE31200_MCHBAR_LOW 0x48 #define IE31200_MCHBAR_HIGH 0x4c -#define IE31200_MCHBAR_MASK GENMASK_ULL(38, 15) -#define IE31200_MMR_WINDOW_SIZE BIT(15) /* * Error Status Register (16b) * - * 15 reserved - * 14 Isochronous TBWRR Run Behind FIFO Full - * (ITCV) - * 13 Isochronous TBWRR Run Behind FIFO Put - * (ITSTV) - * 12 reserved - * 11 MCH Thermal Sensor Event - * for SMI/SCI/SERR (GTSE) - * 10 reserved - * 9 LOCK to non-DRAM Memory Flag (LCKF) - * 8 reserved - * 7 DRAM Throttle Flag (DTF) - * 6:2 reserved * 1 Multi-bit DRAM ECC Error Flag (DMERR) * 0 Single-bit DRAM ECC Error Flag (DSERR) */ @@ -128,65 +113,45 @@ #define IE31200_ERRSTS_CE BIT(0) #define IE31200_ERRSTS_BITS (IE31200_ERRSTS_UE | IE31200_ERRSTS_CE) -/* - * Channel 0 ECC Error Log (64b) - * - * 63:48 Error Column Address (ERRCOL) - * 47:32 Error Row Address (ERRROW) - * 31:29 Error Bank Address (ERRBANK) - * 28:27 Error Rank Address (ERRRANK) - * 26:24 reserved - * 23:16 Error Syndrome (ERRSYND) - * 15: 2 reserved - * 1 Multiple Bit Error Status (MERRSTS) - * 0 Correctable Error Status (CERRSTS) - */ - -#define IE31200_C0ECCERRLOG 0x40c8 -#define IE31200_C1ECCERRLOG 0x44c8 -#define IE31200_C0ECCERRLOG_SKL 0x4048 -#define IE31200_C1ECCERRLOG_SKL 0x4448 -#define IE31200_ECCERRLOG_CE BIT(0) -#define IE31200_ECCERRLOG_UE BIT(1) -#define IE31200_ECCERRLOG_RANK_BITS GENMASK_ULL(28, 27) -#define IE31200_ECCERRLOG_RANK_SHIFT 27 -#define IE31200_ECCERRLOG_SYNDROME_BITS GENMASK_ULL(23, 16) -#define IE31200_ECCERRLOG_SYNDROME_SHIFT 16 - -#define IE31200_ECCERRLOG_SYNDROME(log) \ - ((log & IE31200_ECCERRLOG_SYNDROME_BITS) >> \ - IE31200_ECCERRLOG_SYNDROME_SHIFT) - #define IE31200_CAPID0 0xe4 #define IE31200_CAPID0_PDCD BIT(4) #define IE31200_CAPID0_DDPCD BIT(6) #define IE31200_CAPID0_ECC BIT(1) -#define IE31200_MAD_DIMM_0_OFFSET 0x5004 -#define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C -#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0) -#define IE31200_MAD_DIMM_SIZE_SKL GENMASK_ULL(5, 0) -#define IE31200_MAD_DIMM_A_RANK BIT(17) -#define IE31200_MAD_DIMM_A_RANK_SHIFT 17 -#define IE31200_MAD_DIMM_A_RANK_SKL BIT(10) -#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT 10 -#define IE31200_MAD_DIMM_A_WIDTH BIT(19) -#define IE31200_MAD_DIMM_A_WIDTH_SHIFT 19 -#define IE31200_MAD_DIMM_A_WIDTH_SKL GENMASK_ULL(9, 8) -#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT 8 - /* Skylake reports 1GB increments, everything else is 256MB */ #define IE31200_PAGES(n, skl) \ (n << (28 + (2 * skl) - PAGE_SHIFT)) +/* Non-constant mask variant of FIELD_GET() */ +#define field_get(_mask, _reg) (((_reg) & (_mask)) >> (ffs(_mask) - 1)) + static int nr_channels; static struct pci_dev *mci_pdev; static int ie31200_registered = 1; +struct res_config { + enum mem_type mtype; + /* Host MMIO configuration register */ + u64 reg_mchbar_mask; + u64 reg_mchbar_window_size; + /* ECC error log register */ + u64 reg_eccerrlog_offset[IE31200_CHANNELS]; + u64 reg_eccerrlog_ce_mask; + u64 reg_eccerrlog_ue_mask; + u64 reg_eccerrlog_rank_mask; + u64 reg_eccerrlog_syndrome_mask; + /* DIMM characteristics register */ + u64 reg_mad_dimm_offset[IE31200_CHANNELS]; + u32 reg_mad_dimm_size_mask[IE31200_DIMMS_PER_CHANNEL]; + u32 reg_mad_dimm_rank_mask[IE31200_DIMMS_PER_CHANNEL]; + u32 reg_mad_dimm_width_mask[IE31200_DIMMS_PER_CHANNEL]; +}; + struct ie31200_priv { void __iomem *window; void __iomem *c0errlog; void __iomem *c1errlog; + struct res_config *cfg; }; enum ie31200_chips { @@ -250,12 +215,6 @@ static bool ecc_capable(struct pci_dev *pdev) return true; } -static int eccerrlog_row(u64 log) -{ - return ((log & IE31200_ECCERRLOG_RANK_BITS) >> - IE31200_ECCERRLOG_RANK_SHIFT); -} - static void ie31200_clear_error_info(struct mem_ctl_info *mci) { /* @@ -308,6 +267,8 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, static void ie31200_process_error_info(struct mem_ctl_info *mci, struct ie31200_error_info *info) { + struct ie31200_priv *priv = mci->pvt_info; + struct res_config *cfg = priv->cfg; int channel; u64 log; @@ -322,17 +283,17 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci, for (channel = 0; channel < nr_channels; channel++) { log = info->eccerrlog[channel]; - if (log & IE31200_ECCERRLOG_UE) { + if (log & cfg->reg_eccerrlog_ue_mask) { edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, - eccerrlog_row(log), + field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, "ie31200 UE", ""); - } else if (log & IE31200_ECCERRLOG_CE) { + } else if (log & cfg->reg_eccerrlog_ce_mask) { edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, - IE31200_ECCERRLOG_SYNDROME(log), - eccerrlog_row(log), + field_get(cfg->reg_eccerrlog_syndrome_mask, log), + field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, "ie31200 CE", ""); } @@ -347,7 +308,7 @@ static void ie31200_check(struct mem_ctl_info *mci) ie31200_process_error_info(mci, &info); } -static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) +static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg) { union { u64 mchbar; @@ -360,7 +321,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) pci_read_config_dword(pdev, IE31200_MCHBAR_LOW, &u.mchbar_low); pci_read_config_dword(pdev, IE31200_MCHBAR_HIGH, &u.mchbar_high); - u.mchbar &= IE31200_MCHBAR_MASK; + u.mchbar &= cfg->reg_mchbar_mask; if (u.mchbar != (resource_size_t)u.mchbar) { ie31200_printk(KERN_ERR, "mmio space beyond accessible range (0x%llx)\n", @@ -368,7 +329,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) return NULL; } - window = ioremap(u.mchbar, IE31200_MMR_WINDOW_SIZE); + window = ioremap(u.mchbar, cfg->reg_mchbar_window_size); if (!window) ie31200_printk(KERN_ERR, "Cannot map mmio space at 0x%llx\n", (unsigned long long)u.mchbar); @@ -376,41 +337,22 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) return window; } -static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int dimm) -{ - dd->size = (addr_decode >> (dimm << 4)) & IE31200_MAD_DIMM_SIZE_SKL; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (dimm << 4))) ? 1 : 0; - dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (dimm << 4))) >> - (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (dimm << 4))); -} - -static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int dimm) -{ - dd->size = (addr_decode >> (dimm << 3)) & IE31200_MAD_DIMM_SIZE; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << dimm)) ? 1 : 0; - dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << dimm)) ? 1 : 0; -} - static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, - bool skl) + struct res_config *cfg) { - if (skl) - __skl_populate_dimm_info(dd, addr_decode, dimm); - else - __populate_dimm_info(dd, addr_decode, dimm); + dd->size = field_get(cfg->reg_mad_dimm_size_mask[dimm], addr_decode); + dd->dual_rank = field_get(cfg->reg_mad_dimm_rank_mask[dimm], addr_decode); + dd->x16_width = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode); } - -static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) +static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) { int i, j, ret; struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; void __iomem *window; struct ie31200_priv *priv; - u32 addr_decode[IE31200_CHANNELS], mad_offset; + u32 addr_decode[IE31200_CHANNELS]; /* * Kaby Lake, Coffee Lake seem to work like Skylake. Please re-visit @@ -437,7 +379,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) if (!mci) return -ENOMEM; - window = ie31200_map_mchbar(pdev); + window = ie31200_map_mchbar(pdev, cfg); if (!window) { ret = -ENODEV; goto fail_free; @@ -445,32 +387,22 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) edac_dbg(3, "MC: init mci\n"); mci->pdev = &pdev->dev; - if (skl) - mci->mtype_cap = MEM_FLAG_DDR4; - else - mci->mtype_cap = MEM_FLAG_DDR3; + mci->mtype_cap = BIT(cfg->mtype); mci->edac_ctl_cap = EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = ie31200_devs[dev_idx].ctl_name; + mci->ctl_name = ie31200_devs[0].ctl_name; mci->dev_name = pci_name(pdev); mci->edac_check = ie31200_check; mci->ctl_page_to_phys = NULL; priv = mci->pvt_info; priv->window = window; - if (skl) { - priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL; - priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL; - mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL; - } else { - priv->c0errlog = window + IE31200_C0ECCERRLOG; - priv->c1errlog = window + IE31200_C1ECCERRLOG; - mad_offset = IE31200_MAD_DIMM_0_OFFSET; - } + priv->c0errlog = window + cfg->reg_eccerrlog_offset[0]; + priv->c1errlog = window + cfg->reg_eccerrlog_offset[1]; + priv->cfg = cfg; for (i = 0; i < IE31200_CHANNELS; i++) { - addr_decode[i] = readl(window + mad_offset + - (i * 4)); + addr_decode[i] = readl(window + cfg->reg_mad_dimm_offset[i]); edac_dbg(0, "addr_decode: 0x%x\n", addr_decode[i]); } @@ -486,8 +418,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) struct dimm_info *dimm; unsigned long nr_pages; - populate_dimm_info(&dimm_info, addr_decode[j], i, - skl); + populate_dimm_info(&dimm_info, addr_decode[j], i, cfg); edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n", dimm_info.size, dimm_info.dual_rank, @@ -503,10 +434,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* just a guess */ - if (skl) - dimm->mtype = MEM_DDR4; - else - dimm->mtype = MEM_DDR3; + dimm->mtype = cfg->mtype; dimm->dtype = DEV_UNKNOWN; dimm->edac_mode = EDAC_UNKNOWN; } @@ -514,10 +442,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* same guess */ - if (skl) - dimm->mtype = MEM_DDR4; - else - dimm->mtype = MEM_DDR3; + dimm->mtype = cfg->mtype; dimm->dtype = DEV_UNKNOWN; dimm->edac_mode = EDAC_UNKNOWN; } @@ -552,7 +477,7 @@ static int ie31200_init_one(struct pci_dev *pdev, edac_dbg(0, "MC:\n"); if (pci_enable_device(pdev) < 0) return -EIO; - rc = ie31200_probe1(pdev, ent->driver_data); + rc = ie31200_probe1(pdev, (struct res_config *)ent->driver_data); if (rc == 0 && !mci_pdev) mci_pdev = pci_dev_get(pdev); @@ -575,29 +500,69 @@ static void ie31200_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } +static struct res_config snb_cfg = { + .mtype = MEM_DDR3, + .reg_mchbar_mask = GENMASK_ULL(38, 15), + .reg_mchbar_window_size = BIT_ULL(15), + .reg_eccerrlog_offset[0] = 0x40c8, + .reg_eccerrlog_offset[1] = 0x44c8, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ue_mask = BIT_ULL(1), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_offset[0] = 0x5004, + .reg_mad_dimm_offset[1] = 0x5008, + .reg_mad_dimm_size_mask[0] = GENMASK(7, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(15, 8), + .reg_mad_dimm_rank_mask[0] = BIT(17), + .reg_mad_dimm_rank_mask[1] = BIT(18), + .reg_mad_dimm_width_mask[0] = BIT(19), + .reg_mad_dimm_width_mask[1] = BIT(20), +}; + +static struct res_config skl_cfg = { + .mtype = MEM_DDR4, + .reg_mchbar_mask = GENMASK_ULL(38, 15), + .reg_mchbar_window_size = BIT_ULL(15), + .reg_eccerrlog_offset[0] = 0x4048, + .reg_eccerrlog_offset[1] = 0x4448, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ue_mask = BIT_ULL(1), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_offset[0] = 0x500c, + .reg_mad_dimm_offset[1] = 0x5010, + .reg_mad_dimm_size_mask[0] = GENMASK(5, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(21, 16), + .reg_mad_dimm_rank_mask[0] = BIT(10), + .reg_mad_dimm_rank_mask[1] = BIT(26), + .reg_mad_dimm_width_mask[0] = GENMASK(9, 8), + .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), +}; + static const struct pci_device_id ie31200_pci_tbl[] = { - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_3), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_4), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_5), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_6), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_7), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_8), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_9), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_10), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_11), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_12), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), IE31200 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_3), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_4), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_5), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_6), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_7), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_8), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_9), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_10), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_11), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_12), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), (kernel_ulong_t)&skl_cfg }, { 0, } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl); From afdbc36555567a622843c5330174260f7ea954fc Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:07 +0800 Subject: [PATCH 20/25] EDAC/ie31200: Make struct dimm_data contain decoded information The current dimm_data structure contains encoded DIMM information, which needs to be decoded for a given SoC when it is used. Make it contain decoded information when it's initialized so that the places where it is used do not need to decode it again, thereby simplifying the code. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-8-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 62 ++++++++++++------------------------- 1 file changed, 19 insertions(+), 43 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 71061ab5fa91e..865a2f838317f 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -84,16 +84,6 @@ #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9 0x3ec6 #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10 0x3eca -/* Test if HB is for Skylake or later. */ -#define DEVICE_ID_SKYLAKE_OR_LATER(did) \ - (((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_8) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_12) || \ - (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \ - PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK)) - #define IE31200_RANKS_PER_CHANNEL 4 #define IE31200_DIMMS_PER_CHANNEL 2 #define IE31200_CHANNELS 2 @@ -118,10 +108,6 @@ #define IE31200_CAPID0_DDPCD BIT(6) #define IE31200_CAPID0_ECC BIT(1) -/* Skylake reports 1GB increments, everything else is 256MB */ -#define IE31200_PAGES(n, skl) \ - (n << (28 + (2 * skl) - PAGE_SHIFT)) - /* Non-constant mask variant of FIELD_GET() */ #define field_get(_mask, _reg) (((_reg) & (_mask)) >> (ffs(_mask) - 1)) @@ -141,6 +127,7 @@ struct res_config { u64 reg_eccerrlog_rank_mask; u64 reg_eccerrlog_syndrome_mask; /* DIMM characteristics register */ + u64 reg_mad_dimm_size_granularity; u64 reg_mad_dimm_offset[IE31200_CHANNELS]; u32 reg_mad_dimm_size_mask[IE31200_DIMMS_PER_CHANNEL]; u32 reg_mad_dimm_rank_mask[IE31200_DIMMS_PER_CHANNEL]; @@ -175,9 +162,9 @@ static const struct ie31200_dev_info ie31200_devs[] = { }; struct dimm_data { - u8 size; /* in multiples of 256MB, except Skylake is 1GB */ - u8 dual_rank : 1, - x16_width : 2; /* 0 means x8 width */ + u64 size; /* in bytes */ + u8 ranks; + enum dev_type dtype; }; static int how_many_channels(struct pci_dev *pdev) @@ -340,26 +327,20 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, struct res_config *cfg) { - dd->size = field_get(cfg->reg_mad_dimm_size_mask[dimm], addr_decode); - dd->dual_rank = field_get(cfg->reg_mad_dimm_rank_mask[dimm], addr_decode); - dd->x16_width = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode); + dd->size = field_get(cfg->reg_mad_dimm_size_mask[dimm], addr_decode) * cfg->reg_mad_dimm_size_granularity; + dd->ranks = field_get(cfg->reg_mad_dimm_rank_mask[dimm], addr_decode) + 1; + dd->dtype = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode) + DEV_X8; } static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) { - int i, j, ret; + int i, j, k, ret; struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; void __iomem *window; struct ie31200_priv *priv; u32 addr_decode[IE31200_CHANNELS]; - /* - * Kaby Lake, Coffee Lake seem to work like Skylake. Please re-visit - * this logic when adding new CPU support. - */ - bool skl = DEVICE_ID_SKYLAKE_OR_LATER(pdev->device); - edac_dbg(0, "MC:\n"); if (!ecc_capable(pdev)) { @@ -419,32 +400,25 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) unsigned long nr_pages; populate_dimm_info(&dimm_info, addr_decode[j], i, cfg); - edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n", - dimm_info.size, - dimm_info.dual_rank, - dimm_info.x16_width); + edac_dbg(0, "channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", + j, i, dimm_info.size >> 20, + dimm_info.ranks, + dimm_info.dtype); - nr_pages = IE31200_PAGES(dimm_info.size, skl); + nr_pages = MiB_TO_PAGES(dimm_info.size >> 20); if (nr_pages == 0) continue; - if (dimm_info.dual_rank) { - nr_pages = nr_pages / 2; - dimm = edac_get_dimm(mci, (i * 2) + 1, j, 0); + nr_pages = nr_pages / dimm_info.ranks; + for (k = 0; k < dimm_info.ranks; k++) { + dimm = edac_get_dimm(mci, (i * dimm_info.ranks) + k, j, 0); dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* just a guess */ dimm->mtype = cfg->mtype; - dimm->dtype = DEV_UNKNOWN; + dimm->dtype = dimm_info.dtype; dimm->edac_mode = EDAC_UNKNOWN; } - dimm = edac_get_dimm(mci, i * 2, j, 0); - dimm->nr_pages = nr_pages; - edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); - dimm->grain = 8; /* same guess */ - dimm->mtype = cfg->mtype; - dimm->dtype = DEV_UNKNOWN; - dimm->edac_mode = EDAC_UNKNOWN; } } @@ -510,6 +484,7 @@ static struct res_config snb_cfg = { .reg_eccerrlog_ue_mask = BIT_ULL(1), .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_size_granularity = BIT_ULL(28), .reg_mad_dimm_offset[0] = 0x5004, .reg_mad_dimm_offset[1] = 0x5008, .reg_mad_dimm_size_mask[0] = GENMASK(7, 0), @@ -530,6 +505,7 @@ static struct res_config skl_cfg = { .reg_eccerrlog_ue_mask = BIT_ULL(1), .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_size_granularity = BIT_ULL(30), .reg_mad_dimm_offset[0] = 0x500c, .reg_mad_dimm_offset[1] = 0x5010, .reg_mad_dimm_size_mask[0] = GENMASK(5, 0), From a217961b83ae845fe7247255f0b8a01e3b2ed8a2 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:08 +0800 Subject: [PATCH 21/25] EDAC/ie31200: Fold the two channel loops into one loop Fold the two channel loops to simplify the code and improve readability. Also, delete the comments related to the DRB register, as this register is not used here. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-9-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 865a2f838317f..01d719845a88d 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -339,7 +339,7 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) struct edac_mc_layer layers[2]; void __iomem *window; struct ie31200_priv *priv; - u32 addr_decode[IE31200_CHANNELS]; + u32 addr_decode; edac_dbg(0, "MC:\n"); @@ -383,25 +383,17 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) priv->cfg = cfg; for (i = 0; i < IE31200_CHANNELS; i++) { - addr_decode[i] = readl(window + cfg->reg_mad_dimm_offset[i]); - edac_dbg(0, "addr_decode: 0x%x\n", addr_decode[i]); - } + addr_decode = readl(window + cfg->reg_mad_dimm_offset[i]); + edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); - /* - * The dram rank boundary (DRB) reg values are boundary addresses - * for each DRAM rank with a granularity of 64MB. DRB regs are - * cumulative; the last one will contain the total memory - * contained in all ranks. - */ - for (i = 0; i < IE31200_DIMMS_PER_CHANNEL; i++) { - for (j = 0; j < IE31200_CHANNELS; j++) { + for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { struct dimm_data dimm_info; struct dimm_info *dimm; unsigned long nr_pages; - populate_dimm_info(&dimm_info, addr_decode[j], i, cfg); + populate_dimm_info(&dimm_info, addr_decode, j, cfg); edac_dbg(0, "channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", - j, i, dimm_info.size >> 20, + i, j, dimm_info.size >> 20, dimm_info.ranks, dimm_info.dtype); @@ -411,7 +403,7 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) nr_pages = nr_pages / dimm_info.ranks; for (k = 0; k < dimm_info.ranks; k++) { - dimm = edac_get_dimm(mci, (i * dimm_info.ranks) + k, j, 0); + dimm = edac_get_dimm(mci, (j * dimm_info.ranks) + k, i, 0); dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* just a guess */ From 498550e1fa7c1cb52952433d64f9230c247c7c00 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:09 +0800 Subject: [PATCH 22/25] EDAC/ie31200: Break up ie31200_probe1() Split ie31200_probe1() into two helper functions to easily extend support for multiple memory controllers. No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-10-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 108 ++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 47 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 01d719845a88d..70be0d00a188a 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -332,21 +332,51 @@ static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, dd->dtype = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode) + DEV_X8; } -static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) +static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *window, + struct res_config *cfg) { - int i, j, k, ret; - struct mem_ctl_info *mci = NULL; - struct edac_mc_layer layers[2]; - void __iomem *window; - struct ie31200_priv *priv; + struct dimm_data dimm_info; + struct dimm_info *dimm; + unsigned long nr_pages; u32 addr_decode; + int i, j, k; - edac_dbg(0, "MC:\n"); + for (i = 0; i < IE31200_CHANNELS; i++) { + addr_decode = readl(window + cfg->reg_mad_dimm_offset[i]); + edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); - if (!ecc_capable(pdev)) { - ie31200_printk(KERN_INFO, "No ECC support\n"); - return -ENODEV; + for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { + populate_dimm_info(&dimm_info, addr_decode, j, cfg); + edac_dbg(0, "channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", + i, j, dimm_info.size >> 20, + dimm_info.ranks, + dimm_info.dtype); + + nr_pages = MiB_TO_PAGES(dimm_info.size >> 20); + if (nr_pages == 0) + continue; + + nr_pages = nr_pages / dimm_info.ranks; + for (k = 0; k < dimm_info.ranks; k++) { + dimm = edac_get_dimm(mci, (j * dimm_info.ranks) + k, i, 0); + dimm->nr_pages = nr_pages; + edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); + dimm->grain = 8; /* just a guess */ + dimm->mtype = cfg->mtype; + dimm->dtype = dimm_info.dtype; + dimm->edac_mode = EDAC_UNKNOWN; + } + } } +} + +static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg) +{ + struct edac_mc_layer layers[2]; + struct ie31200_priv *priv; + struct mem_ctl_info *mci; + void __iomem *window; + int ret; nr_channels = how_many_channels(pdev); layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; @@ -382,38 +412,7 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) priv->c1errlog = window + cfg->reg_eccerrlog_offset[1]; priv->cfg = cfg; - for (i = 0; i < IE31200_CHANNELS; i++) { - addr_decode = readl(window + cfg->reg_mad_dimm_offset[i]); - edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); - - for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { - struct dimm_data dimm_info; - struct dimm_info *dimm; - unsigned long nr_pages; - - populate_dimm_info(&dimm_info, addr_decode, j, cfg); - edac_dbg(0, "channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", - i, j, dimm_info.size >> 20, - dimm_info.ranks, - dimm_info.dtype); - - nr_pages = MiB_TO_PAGES(dimm_info.size >> 20); - if (nr_pages == 0) - continue; - - nr_pages = nr_pages / dimm_info.ranks; - for (k = 0; k < dimm_info.ranks; k++) { - dimm = edac_get_dimm(mci, (j * dimm_info.ranks) + k, i, 0); - dimm->nr_pages = nr_pages; - edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); - dimm->grain = 8; /* just a guess */ - dimm->mtype = cfg->mtype; - dimm->dtype = dimm_info.dtype; - dimm->edac_mode = EDAC_UNKNOWN; - } - } - } - + ie31200_get_dimm_config(mci, window, cfg); ie31200_clear_error_info(mci); if (edac_mc_add_mc(mci)) { @@ -422,19 +421,34 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) goto fail_unmap; } - /* get this far and it's successful */ - edac_dbg(3, "MC: success\n"); return 0; - fail_unmap: iounmap(window); - fail_free: edac_mc_free(mci); - return ret; } +static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) +{ + int ret; + + edac_dbg(0, "MC:\n"); + + if (!ecc_capable(pdev)) { + ie31200_printk(KERN_INFO, "No ECC support\n"); + return -ENODEV; + } + + ret = ie31200_register_mci(pdev, cfg); + if (ret) + return ret; + + /* get this far and it's successful. */ + edac_dbg(3, "MC: success\n"); + return 0; +} + static int ie31200_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { From d0742284ec6da1435acdda428088136944c4c3c2 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:10 +0800 Subject: [PATCH 23/25] EDAC/ie31200: Add Intel Raptor Lake-S SoCs support The Intel Raptor Lake-S SoC contains two memory controllers with DDR5 memory type and out-of-band ECC capability. The resource definitions of the memory controller are different from previous generations. One notable difference is that the PCI ERRSTS register is deprecated and is not used to indicate the presence of errors or to clear the MMIO-mapped ECC error log regsiters. Extend the ie31200_edac driver to support multiple memory controllers, add a resource configuration table and use an MSR register to clear the ECC error log registers to provide EDAC support for Raptor Lake-S SoCs. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-11-qiuxu.zhuo@intel.com --- drivers/edac/ie31200_edac.c | 182 +++++++++++++++++++++++++++++------- 1 file changed, 149 insertions(+), 33 deletions(-) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 70be0d00a188a..8c0a2beec5370 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -84,9 +84,15 @@ #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9 0x3ec6 #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10 0x3eca -#define IE31200_RANKS_PER_CHANNEL 4 +/* Raptor Lake-S */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1 0xa703 +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2 0x4640 +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3 0x4630 + +#define IE31200_RANKS_PER_CHANNEL 8 #define IE31200_DIMMS_PER_CHANNEL 2 #define IE31200_CHANNELS 2 +#define IE31200_IMC_NUM 2 /* Intel IE31200 register addresses - device 0 function 0 - DRAM Controller */ #define IE31200_MCHBAR_LOW 0x48 @@ -117,15 +123,20 @@ static int ie31200_registered = 1; struct res_config { enum mem_type mtype; + int imc_num; /* Host MMIO configuration register */ u64 reg_mchbar_mask; u64 reg_mchbar_window_size; /* ECC error log register */ u64 reg_eccerrlog_offset[IE31200_CHANNELS]; u64 reg_eccerrlog_ce_mask; + u64 reg_eccerrlog_ce_ovfl_mask; u64 reg_eccerrlog_ue_mask; + u64 reg_eccerrlog_ue_ovfl_mask; u64 reg_eccerrlog_rank_mask; u64 reg_eccerrlog_syndrome_mask; + /* MSR to clear ECC error log register */ + u32 msr_clear_eccerrlog_offset; /* DIMM characteristics register */ u64 reg_mad_dimm_size_granularity; u64 reg_mad_dimm_offset[IE31200_CHANNELS]; @@ -139,10 +150,18 @@ struct ie31200_priv { void __iomem *c0errlog; void __iomem *c1errlog; struct res_config *cfg; + struct mem_ctl_info *mci; + struct pci_dev *pdev; + struct device dev; }; +static struct ie31200_pvt { + struct ie31200_priv *priv[IE31200_IMC_NUM]; +} ie31200_pvt; + enum ie31200_chips { IE31200 = 0, + IE31200_1 = 1, }; struct ie31200_dev_info { @@ -159,6 +178,9 @@ static const struct ie31200_dev_info ie31200_devs[] = { [IE31200] = { .ctl_name = "IE31200" }, + [IE31200_1] = { + .ctl_name = "IE31200_1" + }, }; struct dimm_data { @@ -202,23 +224,54 @@ static bool ecc_capable(struct pci_dev *pdev) return true; } +#define mci_to_pci_dev(mci) (((struct ie31200_priv *)(mci)->pvt_info)->pdev) + static void ie31200_clear_error_info(struct mem_ctl_info *mci) { + struct ie31200_priv *priv = mci->pvt_info; + struct res_config *cfg = priv->cfg; + + /* + * The PCI ERRSTS register is deprecated. Write the MSR to clear + * the ECC error log registers in all memory controllers. + */ + if (cfg->msr_clear_eccerrlog_offset) { + if (wrmsr_safe(cfg->msr_clear_eccerrlog_offset, + cfg->reg_eccerrlog_ce_mask | + cfg->reg_eccerrlog_ce_ovfl_mask | + cfg->reg_eccerrlog_ue_mask | + cfg->reg_eccerrlog_ue_ovfl_mask, 0) < 0) + ie31200_printk(KERN_ERR, "Failed to wrmsr.\n"); + + return; + } + /* * Clear any error bits. * (Yes, we really clear bits by writing 1 to them.) */ - pci_write_bits16(to_pci_dev(mci->pdev), IE31200_ERRSTS, + pci_write_bits16(mci_to_pci_dev(mci), IE31200_ERRSTS, IE31200_ERRSTS_BITS, IE31200_ERRSTS_BITS); } static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, struct ie31200_error_info *info) { - struct pci_dev *pdev; + struct pci_dev *pdev = mci_to_pci_dev(mci); struct ie31200_priv *priv = mci->pvt_info; - pdev = to_pci_dev(mci->pdev); + /* + * The PCI ERRSTS register is deprecated, directly read the + * MMIO-mapped ECC error log registers. + */ + if (priv->cfg->msr_clear_eccerrlog_offset) { + info->eccerrlog[0] = lo_hi_readq(priv->c0errlog); + if (nr_channels == 2) + info->eccerrlog[1] = lo_hi_readq(priv->c1errlog); + + ie31200_clear_error_info(mci); + return; + } /* * This is a mess because there is no atomic way to read all the @@ -259,13 +312,15 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci, int channel; u64 log; - if (!(info->errsts & IE31200_ERRSTS_BITS)) - return; + if (!cfg->msr_clear_eccerrlog_offset) { + if (!(info->errsts & IE31200_ERRSTS_BITS)) + return; - if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, - -1, -1, -1, "UE overwrote CE", ""); - info->errsts = info->errsts2; + if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, "UE overwrote CE", ""); + info->errsts = info->errsts2; + } } for (channel = 0; channel < nr_channels; channel++) { @@ -295,7 +350,7 @@ static void ie31200_check(struct mem_ctl_info *mci) ie31200_process_error_info(mci, &info); } -static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg) +static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg, int mc) { union { u64 mchbar; @@ -309,6 +364,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config pci_read_config_dword(pdev, IE31200_MCHBAR_LOW, &u.mchbar_low); pci_read_config_dword(pdev, IE31200_MCHBAR_HIGH, &u.mchbar_high); u.mchbar &= cfg->reg_mchbar_mask; + u.mchbar += cfg->reg_mchbar_window_size * mc; if (u.mchbar != (resource_size_t)u.mchbar) { ie31200_printk(KERN_ERR, "mmio space beyond accessible range (0x%llx)\n", @@ -333,7 +389,7 @@ static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, } static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *window, - struct res_config *cfg) + struct res_config *cfg, int mc) { struct dimm_data dimm_info; struct dimm_info *dimm; @@ -347,8 +403,8 @@ static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *wind for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { populate_dimm_info(&dimm_info, addr_decode, j, cfg); - edac_dbg(0, "channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", - i, j, dimm_info.size >> 20, + edac_dbg(0, "mc: %d, channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", + mc, i, j, dimm_info.size >> 20, dimm_info.ranks, dimm_info.dtype); @@ -370,7 +426,7 @@ static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *wind } } -static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg) +static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, int mc) { struct edac_mc_layer layers[2]; struct ie31200_priv *priv; @@ -385,24 +441,23 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg) layers[1].type = EDAC_MC_LAYER_CHANNEL; layers[1].size = nr_channels; layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct ie31200_priv)); if (!mci) return -ENOMEM; - window = ie31200_map_mchbar(pdev, cfg); + window = ie31200_map_mchbar(pdev, cfg, mc); if (!window) { ret = -ENODEV; goto fail_free; } edac_dbg(3, "MC: init mci\n"); - mci->pdev = &pdev->dev; mci->mtype_cap = BIT(cfg->mtype); mci->edac_ctl_cap = EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = ie31200_devs[0].ctl_name; + mci->ctl_name = ie31200_devs[mc].ctl_name; mci->dev_name = pci_name(pdev); mci->edac_check = ie31200_check; mci->ctl_page_to_phys = NULL; @@ -411,8 +466,22 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg) priv->c0errlog = window + cfg->reg_eccerrlog_offset[0]; priv->c1errlog = window + cfg->reg_eccerrlog_offset[1]; priv->cfg = cfg; + priv->mci = mci; + priv->pdev = pdev; + device_initialize(&priv->dev); + /* + * The EDAC core uses mci->pdev (pointer to the structure device) + * as the memory controller ID. The SoCs attach one or more memory + * controllers to a single pci_dev (a single pci_dev->dev can + * correspond to multiple memory controllers). + * + * To make mci->pdev unique, assign pci_dev->dev to mci->pdev + * for the first memory controller and assign a unique priv->dev + * to mci->pdev for each additional memory controller. + */ + mci->pdev = mc ? &priv->dev : &pdev->dev; - ie31200_get_dimm_config(mci, window, cfg); + ie31200_get_dimm_config(mci, window, cfg, mc); ie31200_clear_error_info(mci); if (edac_mc_add_mc(mci)) { @@ -421,6 +490,7 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg) goto fail_unmap; } + ie31200_pvt.priv[mc] = priv; return 0; fail_unmap: iounmap(window); @@ -429,9 +499,27 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg) return ret; } +static void ie31200_unregister_mcis(void) +{ + struct ie31200_priv *priv; + struct mem_ctl_info *mci; + int i; + + for (i = 0; i < IE31200_IMC_NUM; i++) { + priv = ie31200_pvt.priv[i]; + if (!priv) + continue; + + mci = priv->mci; + edac_mc_del_mc(mci->pdev); + iounmap(priv->window); + edac_mc_free(mci); + } +} + static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) { - int ret; + int i, ret; edac_dbg(0, "MC:\n"); @@ -440,13 +528,19 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) return -ENODEV; } - ret = ie31200_register_mci(pdev, cfg); - if (ret) - return ret; + for (i = 0; i < cfg->imc_num; i++) { + ret = ie31200_register_mci(pdev, cfg, i); + if (ret) + goto fail_register; + } /* get this far and it's successful. */ edac_dbg(3, "MC: success\n"); return 0; + +fail_register: + ie31200_unregister_mcis(); + return ret; } static int ie31200_init_one(struct pci_dev *pdev, @@ -466,22 +560,15 @@ static int ie31200_init_one(struct pci_dev *pdev, static void ie31200_remove_one(struct pci_dev *pdev) { - struct mem_ctl_info *mci; - struct ie31200_priv *priv; - edac_dbg(0, "\n"); pci_dev_put(mci_pdev); mci_pdev = NULL; - mci = edac_mc_del_mc(&pdev->dev); - if (!mci) - return; - priv = mci->pvt_info; - iounmap(priv->window); - edac_mc_free(mci); + ie31200_unregister_mcis(); } static struct res_config snb_cfg = { .mtype = MEM_DDR3, + .imc_num = 1, .reg_mchbar_mask = GENMASK_ULL(38, 15), .reg_mchbar_window_size = BIT_ULL(15), .reg_eccerrlog_offset[0] = 0x40c8, @@ -503,6 +590,7 @@ static struct res_config snb_cfg = { static struct res_config skl_cfg = { .mtype = MEM_DDR4, + .imc_num = 1, .reg_mchbar_mask = GENMASK_ULL(38, 15), .reg_mchbar_window_size = BIT_ULL(15), .reg_eccerrlog_offset[0] = 0x4048, @@ -522,6 +610,31 @@ static struct res_config skl_cfg = { .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), }; +struct res_config rpl_s_cfg = { + .mtype = MEM_DDR5, + .imc_num = 2, + .reg_mchbar_mask = GENMASK_ULL(41, 17), + .reg_mchbar_window_size = BIT_ULL(16), + .reg_eccerrlog_offset[0] = 0xe048, + .reg_eccerrlog_offset[1] = 0xe848, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ce_ovfl_mask = BIT_ULL(1), + .reg_eccerrlog_ue_mask = BIT_ULL(2), + .reg_eccerrlog_ue_ovfl_mask = BIT_ULL(3), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .msr_clear_eccerrlog_offset = 0x791, + .reg_mad_dimm_offset[0] = 0xd80c, + .reg_mad_dimm_offset[1] = 0xd810, + .reg_mad_dimm_size_granularity = BIT_ULL(29), + .reg_mad_dimm_size_mask[0] = GENMASK(6, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(22, 16), + .reg_mad_dimm_rank_mask[0] = GENMASK(10, 9), + .reg_mad_dimm_rank_mask[1] = GENMASK(27, 26), + .reg_mad_dimm_width_mask[0] = GENMASK(8, 7), + .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), +}; + static const struct pci_device_id ie31200_pci_tbl[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), (kernel_ulong_t)&snb_cfg }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), (kernel_ulong_t)&snb_cfg }, @@ -545,6 +658,9 @@ static const struct pci_device_id ie31200_pci_tbl[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), (kernel_ulong_t)&skl_cfg }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), (kernel_ulong_t)&skl_cfg }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3), (kernel_ulong_t)&rpl_s_cfg}, { 0, } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl); From a5db1b296b181c7ced38252d2ff40e3cf87a12df Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Mar 2025 09:14:11 +0800 Subject: [PATCH 24/25] EDAC/ie31200: Switch Raptor Lake-S to interrupt mode Raptor Lake-S SoCs notify correctable memory errors via CMCI (Corrected Machine Check Interrupt). Switch Raptor Lake-S EDAC support from polling to interrupt mode by registering the callback to the MCE decode notifier chain. Note that as Raptor Lake-S SoCs may not recover from uncorrectable memory errors, the system will hang as soon as this type of error occurs, and the registered callback on the MCE decode chain will not be executed. This is the expected behavior. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Gary Wang Link: https://lore.kernel.org/r/20250310011411.31685-12-qiuxu.zhuo@intel.com --- drivers/edac/Kconfig | 2 +- drivers/edac/ie31200_edac.c | 83 ++++++++++++++++++++++++++++++++++--- 2 files changed, 78 insertions(+), 7 deletions(-) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 2051a7c944a58..b908a118fb322 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -168,7 +168,7 @@ config EDAC_I3200 config EDAC_IE31200 tristate "Intel e312xx" - depends on PCI && X86 + depends on PCI && X86 && X86_MCE_INTEL help Support for error detection and correction on the Intel E3-1200 based DRAM controllers. diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 8c0a2beec5370..2048341495798 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -51,6 +51,7 @@ #include #include +#include #include "edac_module.h" #define EDAC_MOD_STR "ie31200_edac" @@ -123,6 +124,7 @@ static int ie31200_registered = 1; struct res_config { enum mem_type mtype; + bool cmci; int imc_num; /* Host MMIO configuration register */ u64 reg_mchbar_mask; @@ -172,6 +174,7 @@ struct ie31200_error_info { u16 errsts; u16 errsts2; u64 eccerrlog[IE31200_CHANNELS]; + u64 erraddr; }; static const struct ie31200_dev_info ie31200_devs[] = { @@ -327,13 +330,13 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci, log = info->eccerrlog[channel]; if (log & cfg->reg_eccerrlog_ue_mask) { edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - 0, 0, 0, + info->erraddr >> PAGE_SHIFT, 0, 0, field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, "ie31200 UE", ""); } else if (log & cfg->reg_eccerrlog_ce_mask) { edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - 0, 0, + info->erraddr >> PAGE_SHIFT, 0, field_get(cfg->reg_eccerrlog_syndrome_mask, log), field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, @@ -342,14 +345,20 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci, } } -static void ie31200_check(struct mem_ctl_info *mci) +static void __ie31200_check(struct mem_ctl_info *mci, struct mce *mce) { struct ie31200_error_info info; + info.erraddr = mce ? mce->addr : 0; ie31200_get_and_clear_error_info(mci, &info); ie31200_process_error_info(mci, &info); } +static void ie31200_check(struct mem_ctl_info *mci) +{ + __ie31200_check(mci, NULL); +} + static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg, int mc) { union { @@ -459,7 +468,7 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, in mci->mod_name = EDAC_MOD_STR; mci->ctl_name = ie31200_devs[mc].ctl_name; mci->dev_name = pci_name(pdev); - mci->edac_check = ie31200_check; + mci->edac_check = cfg->cmci ? NULL : ie31200_check; mci->ctl_page_to_phys = NULL; priv = mci->pvt_info; priv->window = window; @@ -499,6 +508,58 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, in return ret; } +static void mce_check(struct mce *mce) +{ + struct ie31200_priv *priv; + int i; + + for (i = 0; i < IE31200_IMC_NUM; i++) { + priv = ie31200_pvt.priv[i]; + if (!priv) + continue; + + __ie31200_check(priv->mci, mce); + } +} + +static int mce_handler(struct notifier_block *nb, unsigned long val, void *data) +{ + struct mce *mce = (struct mce *)data; + char *type; + + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + + /* + * Ignore unless this is a memory related error. + * Don't check MCI_STATUS_ADDRV since it's not set on some CPUs. + */ + if ((mce->status & 0xefff) >> 7 != 1) + return NOTIFY_DONE; + + type = mce->mcgstatus & MCG_STATUS_MCIP ? "Exception" : "Event"; + + edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n", + mce->extcpu, type, mce->mcgstatus, + mce->bank, mce->status); + edac_dbg(0, "TSC 0x%llx\n", mce->tsc); + edac_dbg(0, "ADDR 0x%llx\n", mce->addr); + edac_dbg(0, "MISC 0x%llx\n", mce->misc); + edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n", + mce->cpuvendor, mce->cpuid, mce->time, + mce->socketid, mce->apicid); + + mce_check(mce); + mce->kflags |= MCE_HANDLED_EDAC; + + return NOTIFY_DONE; +} + +static struct notifier_block ie31200_mce_dec = { + .notifier_call = mce_handler, + .priority = MCE_PRIO_EDAC, +}; + static void ie31200_unregister_mcis(void) { struct ie31200_priv *priv; @@ -534,6 +595,13 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) goto fail_register; } + if (cfg->cmci) { + mce_register_decode_chain(&ie31200_mce_dec); + edac_op_state = EDAC_OPSTATE_INT; + } else { + edac_op_state = EDAC_OPSTATE_POLL; + } + /* get this far and it's successful. */ edac_dbg(3, "MC: success\n"); return 0; @@ -560,9 +628,13 @@ static int ie31200_init_one(struct pci_dev *pdev, static void ie31200_remove_one(struct pci_dev *pdev) { + struct ie31200_priv *priv = ie31200_pvt.priv[0]; + edac_dbg(0, "\n"); pci_dev_put(mci_pdev); mci_pdev = NULL; + if (priv->cfg->cmci) + mce_unregister_decode_chain(&ie31200_mce_dec); ie31200_unregister_mcis(); } @@ -612,6 +684,7 @@ static struct res_config skl_cfg = { struct res_config rpl_s_cfg = { .mtype = MEM_DDR5, + .cmci = true, .imc_num = 2, .reg_mchbar_mask = GENMASK_ULL(41, 17), .reg_mchbar_window_size = BIT_ULL(16), @@ -677,8 +750,6 @@ static int __init ie31200_init(void) int pci_rc, i; edac_dbg(3, "MC:\n"); - /* Ensure that the OPSTATE is set correctly for POLL or NMI */ - opstate_init(); pci_rc = pci_register_driver(&ie31200_driver); if (pci_rc < 0) From f30dab9d888f60949e7e721c278c7c232eed3835 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Wed, 19 Mar 2025 14:16:30 -0400 Subject: [PATCH 25/25] MAINTAINERS: Add a secondary maintainer for bluefield_edac Add David as a secondary maintainer. [ bp: Rewrite commit message. ] Signed-off-by: David Thompson Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20250319181630.2673-1-davthompson@nvidia.com --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index efee40ea589f7..570411b580ad1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8214,6 +8214,7 @@ F: drivers/edac/aspeed_edac.c EDAC-BLUEFIELD M: Shravan Kumar Ramani +M: David Thompson S: Supported F: drivers/edac/bluefield_edac.c