diff --git a/Documentation/ABI/testing/sysfs-edac-memory-repair b/Documentation/ABI/testing/sysfs-edac-memory-repair index c54f59e4497b..0434a3b23ff3 100644 --- a/Documentation/ABI/testing/sysfs-edac-memory-repair +++ b/Documentation/ABI/testing/sysfs-edac-memory-repair @@ -42,6 +42,14 @@ Description: - ppr - Post package repair. + - cacheline-sparing + + - row-sparing + + - bank-sparing + + - rank-sparing + - All other values are reserved. What: /sys/bus/edac/devices//mem_repairX/persist_mode @@ -134,6 +142,55 @@ Description: related error records and trace events, for eg. CXL DRAM and CXL general media error records in CXL memory devices. +What: /sys/bus/edac/devices//mem_repairX/bank_group +What: /sys/bus/edac/devices//mem_repairX/bank +What: /sys/bus/edac/devices//mem_repairX/rank +What: /sys/bus/edac/devices//mem_repairX/row +What: /sys/bus/edac/devices//mem_repairX/column +What: /sys/bus/edac/devices//mem_repairX/channel +What: /sys/bus/edac/devices//mem_repairX/sub_channel +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The control attributes for the memory to be repaired. + The specific value of attributes to use depends on the + portion of memory to repair and will be reported to the host + in related error records and be available to userspace + in trace events, such as CXL DRAM and CXL general media + error records of CXL memory devices. + + When readng back these attributes, it returns the current + value of memory requested to be repaired. + + bank_group - The bank group of the memory to repair. + + bank - The bank number of the memory to repair. + + rank - The rank of the memory to repair. Rank is defined as a + set of memory devices on a channel that together execute a + transaction. + + row - The row number of the memory to repair. + + column - The column number of the memory to repair. + + channel - The channel of the memory to repair. Channel is + defined as an interface that can be independently accessed + for a transaction. + + sub_channel - The subchannel of the memory to repair. + + The requirement to set these attributes varies based on the + repair function. The attributes in sysfs are not present + unless required for a repair function. + + For example, CXL spec ver 3.1, Section 8.2.9.7.1.2 Table 8-103 + soft PPR and Section 8.2.9.7.1.3 Table 8-104 hard PPR operations, + these attributes are not required to set. CXL spec ver 3.1, + Section 8.2.9.7.1.4 Table 8-105 memory sparing, these attributes + are required to set based on memory sparing granularity. + What: /sys/bus/edac/devices//mem_repairX/repair Date: March 2025 KernelVersion: 6.15 diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c index 5c94ac1027db..3b1a845457b0 100755 --- a/drivers/edac/mem_repair.c +++ b/drivers/edac/mem_repair.c @@ -22,6 +22,13 @@ enum edac_mem_repair_attributes { MR_MIN_DPA, MR_MAX_DPA, MR_NIBBLE_MASK, + MR_BANK_GROUP, + MR_BANK, + MR_RANK, + MR_ROW, + MR_COLUMN, + MR_CHANNEL, + MR_SUB_CHANNEL, MEM_DO_REPAIR, MR_MAX_ATTRS }; @@ -70,6 +77,13 @@ MR_ATTR_SHOW(dpa, get_dpa, u64, "0x%llx\n") MR_ATTR_SHOW(min_dpa, get_min_dpa, u64, "0x%llx\n") MR_ATTR_SHOW(max_dpa, get_max_dpa, u64, "0x%llx\n") MR_ATTR_SHOW(nibble_mask, get_nibble_mask, u32, "0x%x\n") +MR_ATTR_SHOW(bank_group, get_bank_group, u32, "%u\n") +MR_ATTR_SHOW(bank, get_bank, u32, "%u\n") +MR_ATTR_SHOW(rank, get_rank, u32, "%u\n") +MR_ATTR_SHOW(row, get_row, u32, "0x%x\n") +MR_ATTR_SHOW(column, get_column, u32, "%u\n") +MR_ATTR_SHOW(channel, get_channel, u32, "%u\n") +MR_ATTR_SHOW(sub_channel, get_sub_channel, u32, "%u\n") #define MR_ATTR_STORE(attrib, cb, type, conv_func) \ static ssize_t attrib##_store(struct device *ras_feat_dev, \ @@ -99,6 +113,13 @@ MR_ATTR_STORE(persist_mode, set_persist_mode, unsigned long, kstrtoul) MR_ATTR_STORE(hpa, set_hpa, u64, kstrtou64) MR_ATTR_STORE(dpa, set_dpa, u64, kstrtou64) MR_ATTR_STORE(nibble_mask, set_nibble_mask, unsigned long, kstrtoul) +MR_ATTR_STORE(bank_group, set_bank_group, unsigned long, kstrtoul) +MR_ATTR_STORE(bank, set_bank, unsigned long, kstrtoul) +MR_ATTR_STORE(rank, set_rank, unsigned long, kstrtoul) +MR_ATTR_STORE(row, set_row, unsigned long, kstrtoul) +MR_ATTR_STORE(column, set_column, unsigned long, kstrtoul) +MR_ATTR_STORE(channel, set_channel, unsigned long, kstrtoul) +MR_ATTR_STORE(sub_channel, set_sub_channel, unsigned long, kstrtoul) #define MR_DO_OP(attrib, cb) \ static ssize_t attrib##_store(struct device *ras_feat_dev, \ @@ -189,6 +210,62 @@ static umode_t mem_repair_attr_visible(struct kobject *kobj, struct attribute *a return 0444; } break; + case MR_BANK_GROUP: + if (ops->get_bank_group) { + if (ops->set_bank_group) + return a->mode; + else + return 0444; + } + break; + case MR_BANK: + if (ops->get_bank) { + if (ops->set_bank) + return a->mode; + else + return 0444; + } + break; + case MR_RANK: + if (ops->get_rank) { + if (ops->set_rank) + return a->mode; + else + return 0444; + } + break; + case MR_ROW: + if (ops->get_row) { + if (ops->set_row) + return a->mode; + else + return 0444; + } + break; + case MR_COLUMN: + if (ops->get_column) { + if (ops->set_column) + return a->mode; + else + return 0444; + } + break; + case MR_CHANNEL: + if (ops->get_channel) { + if (ops->set_channel) + return a->mode; + else + return 0444; + } + break; + case MR_SUB_CHANNEL: + if (ops->get_sub_channel) { + if (ops->set_sub_channel) + return a->mode; + else + return 0444; + } + break; case MEM_DO_REPAIR: if (ops->do_repair) return a->mode; @@ -230,6 +307,13 @@ static int mem_repair_create_desc(struct device *dev, [MR_MIN_DPA] = MR_ATTR_RO(min_dpa, instance), [MR_MAX_DPA] = MR_ATTR_RO(max_dpa, instance), [MR_NIBBLE_MASK] = MR_ATTR_RW(nibble_mask, instance), + [MR_BANK_GROUP] = MR_ATTR_RW(bank_group, instance), + [MR_BANK] = MR_ATTR_RW(bank, instance), + [MR_RANK] = MR_ATTR_RW(rank, instance), + [MR_ROW] = MR_ATTR_RW(row, instance), + [MR_COLUMN] = MR_ATTR_RW(column, instance), + [MR_CHANNEL] = MR_ATTR_RW(channel, instance), + [MR_SUB_CHANNEL] = MR_ATTR_RW(sub_channel, instance), [MEM_DO_REPAIR] = MR_ATTR_WO(repair, instance) }; diff --git a/include/linux/edac.h b/include/linux/edac.h index cfb2ef41ab95..451f9c152c99 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -780,6 +780,20 @@ enum edac_mem_repair_cmd { * @get_max_dpa: get the maximum supported device physical address (DPA). * @get_nibble_mask: get current nibble mask of memory to repair. * @set_nibble_mask: set nibble mask of memory to repair. + * @get_bank_group: get current bank group of memory to repair. + * @set_bank_group: set bank group of memory to repair. + * @get_bank: get current bank of memory to repair. + * @set_bank: set bank of memory to repair. + * @get_rank: get current rank of memory to repair. + * @set_rank: set rank of memory to repair. + * @get_row: get current row of memory to repair. + * @set_row: set row of memory to repair. + * @get_column: get current column of memory to repair. + * @set_column: set column of memory to repair. + * @get_channel: get current channel of memory to repair. + * @set_channel: set channel of memory to repair. + * @get_sub_channel: get current subchannel of memory to repair. + * @set_sub_channel: set subchannel of memory to repair. * @do_repair: Issue memory repair operation for the HPA/DPA and * other control attributes set for the memory to repair. * @@ -800,6 +814,20 @@ struct edac_mem_repair_ops { int (*get_max_dpa)(struct device *dev, void *drv_data, u64 *dpa); int (*get_nibble_mask)(struct device *dev, void *drv_data, u32 *val); int (*set_nibble_mask)(struct device *dev, void *drv_data, u32 val); + int (*get_bank_group)(struct device *dev, void *drv_data, u32 *val); + int (*set_bank_group)(struct device *dev, void *drv_data, u32 val); + int (*get_bank)(struct device *dev, void *drv_data, u32 *val); + int (*set_bank)(struct device *dev, void *drv_data, u32 val); + int (*get_rank)(struct device *dev, void *drv_data, u32 *val); + int (*set_rank)(struct device *dev, void *drv_data, u32 val); + int (*get_row)(struct device *dev, void *drv_data, u32 *val); + int (*set_row)(struct device *dev, void *drv_data, u32 val); + int (*get_column)(struct device *dev, void *drv_data, u32 *val); + int (*set_column)(struct device *dev, void *drv_data, u32 val); + int (*get_channel)(struct device *dev, void *drv_data, u32 *val); + int (*set_channel)(struct device *dev, void *drv_data, u32 val); + int (*get_sub_channel)(struct device *dev, void *drv_data, u32 *val); + int (*set_sub_channel)(struct device *dev, void *drv_data, u32 val); int (*do_repair)(struct device *dev, void *drv_data, u32 val); };