diff --git a/Documentation/edac/features.rst b/Documentation/edac/features.rst new file mode 100644 index 0000000000000..3c279d026bbd9 --- /dev/null +++ b/Documentation/edac/features.rst @@ -0,0 +1,93 @@ +.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.2-no-invariants-or-later + +================= +EDAC/RAS features +================= + +Copyright (c) 2024-2025 HiSilicon Limited. + +:Author: Shiju Jose +:License: The GNU Free Documentation License, Version 1.2 without + Invariant Sections, Front-Cover Texts nor Back-Cover Texts. + (dual licensed under the GPL v2) + +- Written for: 6.15 + +Introduction +------------ + +EDAC/RAS components plugging and high-level design: + +1. Scrub control + +2. Error Check Scrub (ECS) control + +3. ACPI RAS2 features + +4. Post Package Repair (PPR) control + +5. Memory Sparing Repair control + +High level design is illustrated in the following diagram:: + + +-----------------------------------------------+ + | Userspace - Rasdaemon | + | +-------------+ | + | | RAS CXL mem | +---------------+ | + | |error handler|---->| | | + | +-------------+ | RAS dynamic | | + | +-------------+ | scrub, memory | | + | | RAS memory |---->| repair control| | + | |error handler| +----|----------+ | + | +-------------+ | | + +--------------------------|--------------------+ + | + | + +-------------------------------|------------------------------+ + | Kernel EDAC extension for | controlling RAS Features | + |+------------------------------|----------------------------+ | + || EDAC Core Sysfs EDAC| Bus | | + || +--------------------------|---------------------------+| | + || |/sys/bus/edac/devices//scrubX/ | | EDAC device || | + || |/sys/bus/edac/devices//ecsX/ |<->| EDAC MC || | + || |/sys/bus/edac/devices//repairX | | EDAC sysfs || | + || +---------------------------|--------------------------+| | + || EDAC|Bus | | + || | | | + || +----------+ Get feature | Get feature | | + || | | desc +---------|------+ desc +----------+ | | + || |EDAC scrub|<-----| EDAC device | | | | | + || +----------+ | driver- RAS |----->| EDAC mem | | | + || +----------+ | feature control| | repair | | | + || | |<-----| | +----------+ | | + || |EDAC ECS | +---------|------+ | | + || +----------+ Register RAS|features | | + || ______________________|_____________ | | + |+---------|---------------|------------------|--------------+ | + | +-------|----+ +-------|-------+ +----|----------+ | + | | | | CXL mem driver| | Client driver | | + | | ACPI RAS2 | | scrub, ECS, | | memory repair | | + | | driver | | sparing, PPR | | features | | + | +-----|------+ +-------|-------+ +------|--------+ | + | | | | | + +--------|-----------------|--------------------|--------------+ + | | | + +--------|-----------------|--------------------|--------------+ + | +---|-----------------|--------------------|-------+ | + | | | | + | | Platform HW and Firmware | | + | +--------------------------------------------------+ | + +--------------------------------------------------------------+ + + +1. EDAC Features components - Create feature-specific descriptors. For + example: scrub, ECS, memory repair in the above diagram. + +2. EDAC device driver for controlling RAS Features - Get feature's attribute + descriptors from EDAC RAS feature component and registers device's RAS + features with EDAC bus and expose the features control attributes via + sysfs. For example, /sys/bus/edac/devices//X/ + +3. RAS dynamic feature controller - Userspace sample modules in rasdaemon for + dynamic scrub/repair control to issue scrubbing/repair when excess number + of corrected memory errors are reported in a short span of time. diff --git a/Documentation/edac/index.rst b/Documentation/edac/index.rst new file mode 100644 index 0000000000000..de4a3aa452cb1 --- /dev/null +++ b/Documentation/edac/index.rst @@ -0,0 +1,10 @@ +.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.2-no-invariants-or-later + +============== +EDAC Subsystem +============== + +.. toctree:: + :maxdepth: 1 + + features diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 621dc2a5d0347..6af0893cadc94 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -570,3 +570,104 @@ void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev, block ? block->name : "N/A", count, msg); } EXPORT_SYMBOL_GPL(edac_device_handle_ue_count); + +static void edac_dev_release(struct device *dev) +{ + struct edac_dev_feat_ctx *ctx = container_of(dev, struct edac_dev_feat_ctx, dev); + + kfree(ctx->dev.groups); + kfree(ctx); +} + +static const struct device_type edac_dev_type = { + .name = "edac_dev", + .release = edac_dev_release, +}; + +static void edac_dev_unreg(void *data) +{ + device_unregister(data); +} + +/** + * edac_dev_register - register device for RAS features with EDAC + * @parent: parent device. + * @name: name for the folder in the /sys/bus/edac/devices/, + * which is derived from the parent device. + * For e.g. /sys/bus/edac/devices/cxl_mem0/ + * @private: parent driver's data to store in the context if any. + * @num_features: number of RAS features to register. + * @ras_features: list of RAS features to register. + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + * + */ +int edac_dev_register(struct device *parent, char *name, + void *private, int num_features, + const struct edac_dev_feature *ras_features) +{ + const struct attribute_group **ras_attr_groups; + struct edac_dev_feat_ctx *ctx; + int attr_gcnt = 0; + int ret = -ENOMEM; + int feat; + + if (!parent || !name || !num_features || !ras_features) + return -EINVAL; + + /* Double parse to make space for attributes */ + for (feat = 0; feat < num_features; feat++) { + switch (ras_features[feat].ft_type) { + /* Add feature specific code */ + default: + return -EINVAL; + } + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ras_attr_groups = kcalloc(attr_gcnt + 1, sizeof(*ras_attr_groups), GFP_KERNEL); + if (!ras_attr_groups) + goto ctx_free; + + attr_gcnt = 0; + for (feat = 0; feat < num_features; feat++, ras_features++) { + switch (ras_features->ft_type) { + /* Add feature specific code */ + default: + ret = -EINVAL; + goto groups_free; + } + } + + ctx->dev.parent = parent; + ctx->dev.bus = edac_get_sysfs_subsys(); + ctx->dev.type = &edac_dev_type; + ctx->dev.groups = ras_attr_groups; + ctx->private = private; + dev_set_drvdata(&ctx->dev, ctx); + + ret = dev_set_name(&ctx->dev, name); + if (ret) + goto groups_free; + + ret = device_register(&ctx->dev); + if (ret) { + put_device(&ctx->dev); + return ret; + } + + return devm_add_action_or_reset(parent, edac_dev_unreg, &ctx->dev); + +groups_free: + kfree(ras_attr_groups); +ctx_free: + kfree(ctx); + return ret; +} +EXPORT_SYMBOL_GPL(edac_dev_register); diff --git a/include/linux/edac.h b/include/linux/edac.h index b4ee8961e6236..8c4b6ca2a994d 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -661,4 +661,30 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci, return mci->dimms[index]; } + +/* RAS feature type */ +enum edac_dev_feat { + RAS_FEAT_MAX +}; + +/* EDAC device feature information structure */ +struct edac_dev_data { + u8 instance; + void *private; +}; + +struct edac_dev_feat_ctx { + struct device dev; + void *private; +}; + +struct edac_dev_feature { + enum edac_dev_feat ft_type; + u8 instance; + void *ctx; +}; + +int edac_dev_register(struct device *parent, char *dev_name, + void *parent_pvt_data, int num_features, + const struct edac_dev_feature *ras_features); #endif /* _LINUX_EDAC_H_ */