diff --git a/Documentation/ABI/testing/sysfs-edac-ecs b/Documentation/ABI/testing/sysfs-edac-ecs new file mode 100644 index 000000000000..87c885c4eb1a --- /dev/null +++ b/Documentation/ABI/testing/sysfs-edac-ecs @@ -0,0 +1,74 @@ +What: /sys/bus/edac/devices//ecs_fruX +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + The sysfs EDAC bus devices //ecs_fruX subdirectory + pertains to the memory media ECS (Error Check Scrub) control + feature, where directory corresponds to a device + registered with the EDAC device driver for the ECS feature. + /ecs_fruX belongs to the media FRUs (Field Replaceable Unit) + under the memory device. + + The sysfs ECS attr nodes are only present if the parent + driver has implemented the corresponding attr callback + function and provided the necessary operations to the EDAC + device driver during registration. + +What: /sys/bus/edac/devices//ecs_fruX/log_entry_type +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The log entry type of how the DDR5 ECS log is reported. + + - 0 - per DRAM. + + - 1 - per memory media FRU. + + - All other values are reserved. + +What: /sys/bus/edac/devices//ecs_fruX/mode +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The mode of how the DDR5 ECS counts the errors. + Error count is tracked based on two different modes + selected by DDR5 ECS Control Feature - Codeword mode and + Row Count mode. If the ECS is under Codeword mode, then + the error count increments each time a codeword with check + bit errors is detected. If the ECS is under Row Count mode, + then the error counter increments each time a row with + check bit errors is detected. + + - 0 - ECS counts rows in the memory media that have ECC errors. + + - 1 - ECS counts codewords with errors, specifically, it counts + the number of ECC-detected errors in the memory media. + + - All other values are reserved. + +What: /sys/bus/edac/devices//ecs_fruX/reset +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (WO) ECS reset ECC counter. + + - 1 - reset ECC counter to the default value. + + - All other values are reserved. + +What: /sys/bus/edac/devices//ecs_fruX/threshold +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) DDR5 ECS threshold count per gigabits of memory cells. + The ECS error count is subject to the ECS Threshold count + per Gbit, which masks error counts less than the Threshold. + + Supported values are 256, 1024 and 4096. + + All other values are reserved. diff --git a/Documentation/ABI/testing/sysfs-edac-memory-repair b/Documentation/ABI/testing/sysfs-edac-memory-repair new file mode 100644 index 000000000000..0434a3b23ff3 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-edac-memory-repair @@ -0,0 +1,206 @@ +What: /sys/bus/edac/devices//mem_repairX +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + The sysfs EDAC bus devices //mem_repairX subdirectory + pertains to the memory media repair features control, such as + PPR (Post Package Repair), memory sparing etc, where + directory corresponds to a device registered with the EDAC + device driver for the memory repair features. + + Post Package Repair is a maintenance operation requests the memory + device to perform a repair operation on its media. It is a memory + self-healing feature that fixes a failing memory location by + replacing it with a spare row in a DRAM device. For example, a + CXL memory device with DRAM components that support PPR features may + implement PPR maintenance operations. DRAM components may support + two types of PPR functions: hard PPR, for a permanent row repair, and + soft PPR, for a temporary row repair. Soft PPR may be much faster + than hard PPR, but the repair is lost with a power cycle. + + The sysfs attributes nodes for a repair feature are only + present if the parent driver has implemented the corresponding + attr callback function and provided the necessary operations + to the EDAC device driver during registration. + + In some states of system configuration (e.g. before address + decoders have been configured), memory devices (e.g. CXL) + may not have an active mapping in the main host address + physical address map. As such, the memory to repair must be + identified by a device specific physical addressing scheme + using a device physical address(DPA). The DPA and other control + attributes to use will be presented in related error records. + +What: /sys/bus/edac/devices//mem_repairX/repair_type +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RO) Memory repair type. For eg. post package repair, + memory sparing etc. Valid values are: + + - ppr - Post package repair. + + - cacheline-sparing + + - row-sparing + + - bank-sparing + + - rank-sparing + + - All other values are reserved. + +What: /sys/bus/edac/devices//mem_repairX/persist_mode +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Get/Set the current persist repair mode set for a + repair function. Persist repair modes supported in the + device, based on a memory repair function, either is temporary, + which is lost with a power cycle or permanent. Valid values are: + + - 0 - Soft memory repair (temporary repair). + + - 1 - Hard memory repair (permanent repair). + + - All other values are reserved. + +What: /sys/bus/edac/devices//mem_repairX/repair_safe_when_in_use +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RO) True if memory media is accessible and data is retained + during the memory repair operation. + The data may not be retained and memory requests may not be + correctly processed during a repair operation. In such case + repair operation can not be executed at runtime. The memory + must be taken offline. + +What: /sys/bus/edac/devices//mem_repairX/hpa +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Host Physical Address (HPA) of the memory to repair. + The HPA to use will be provided in related error records. + +What: /sys/bus/edac/devices//mem_repairX/dpa +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Device Physical Address (DPA) of the memory to repair. + The specific DPA to use will be provided in related error + records. + + In some states of system configuration (e.g. before address + decoders have been configured), memory devices (e.g. CXL) + may not have an active mapping in the main host address + physical address map. As such, the memory to repair must be + identified by a device specific physical addressing scheme + using a DPA. The device physical address(DPA) to use will be + presented in related error records. + +What: /sys/bus/edac/devices//mem_repairX/nibble_mask +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Read/Write Nibble mask of the memory to repair. + Nibble mask identifies one or more nibbles in error on the + memory bus that produced the error event. Nibble Mask bit 0 + shall be set if nibble 0 on the memory bus produced the + event, etc. For example, CXL PPR and sparing, a nibble mask + bit set to 1 indicates the request to perform repair + operation in the specific device. All nibble mask bits set + to 1 indicates the request to perform the operation in all + devices. Eg. for CXL memory repair, the specific value of + nibble mask to use will be provided in related error records. + For more details, See nibble mask field in CXL spec ver 3.1, + section 8.2.9.7.1.2 Table 8-103 soft PPR and section + 8.2.9.7.1.3 Table 8-104 hard PPR, section 8.2.9.7.1.4 + Table 8-105 memory sparing. + +What: /sys/bus/edac/devices//mem_repairX/min_hpa +What: /sys/bus/edac/devices//mem_repairX/max_hpa +What: /sys/bus/edac/devices//mem_repairX/min_dpa +What: /sys/bus/edac/devices//mem_repairX/max_dpa +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The supported range of memory address that is to be + repaired. The memory device may give the supported range of + attributes to use and it will depend on the memory device + and the portion of memory to repair. + The userspace may receive the specific value of attributes + to use for a repair operation from the memory device via + related error records and trace events, for eg. CXL DRAM + and CXL general media error records in CXL memory devices. + +What: /sys/bus/edac/devices//mem_repairX/bank_group +What: /sys/bus/edac/devices//mem_repairX/bank +What: /sys/bus/edac/devices//mem_repairX/rank +What: /sys/bus/edac/devices//mem_repairX/row +What: /sys/bus/edac/devices//mem_repairX/column +What: /sys/bus/edac/devices//mem_repairX/channel +What: /sys/bus/edac/devices//mem_repairX/sub_channel +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The control attributes for the memory to be repaired. + The specific value of attributes to use depends on the + portion of memory to repair and will be reported to the host + in related error records and be available to userspace + in trace events, such as CXL DRAM and CXL general media + error records of CXL memory devices. + + When readng back these attributes, it returns the current + value of memory requested to be repaired. + + bank_group - The bank group of the memory to repair. + + bank - The bank number of the memory to repair. + + rank - The rank of the memory to repair. Rank is defined as a + set of memory devices on a channel that together execute a + transaction. + + row - The row number of the memory to repair. + + column - The column number of the memory to repair. + + channel - The channel of the memory to repair. Channel is + defined as an interface that can be independently accessed + for a transaction. + + sub_channel - The subchannel of the memory to repair. + + The requirement to set these attributes varies based on the + repair function. The attributes in sysfs are not present + unless required for a repair function. + + For example, CXL spec ver 3.1, Section 8.2.9.7.1.2 Table 8-103 + soft PPR and Section 8.2.9.7.1.3 Table 8-104 hard PPR operations, + these attributes are not required to set. CXL spec ver 3.1, + Section 8.2.9.7.1.4 Table 8-105 memory sparing, these attributes + are required to set based on memory sparing granularity. + +What: /sys/bus/edac/devices//mem_repairX/repair +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (WO) Issue the memory repair operation for the specified + memory repair attributes. The operation may fail if resources + are insufficient based on the requirements of the memory + device and repair function. + + - 1 - Issue the repair operation. + + - All other values are reserved. diff --git a/Documentation/ABI/testing/sysfs-edac-scrub b/Documentation/ABI/testing/sysfs-edac-scrub new file mode 100644 index 000000000000..c43be90deab4 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-edac-scrub @@ -0,0 +1,69 @@ +What: /sys/bus/edac/devices//scrubX +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + The sysfs EDAC bus devices //scrubX subdirectory + belongs to an instance of memory scrub control feature, + where directory corresponds to a device/memory + region registered with the EDAC device driver for the + scrub control feature. + + The sysfs scrub attr nodes are only present if the parent + driver has implemented the corresponding attr callback + function and provided the necessary operations to the EDAC + device driver during registration. + +What: /sys/bus/edac/devices//scrubX/addr +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The base address of the memory region to be scrubbed + for on-demand scrubbing. Setting address starts scrubbing. + The size must be set before that. + + The readback addr value is non-zero if the requested + on-demand scrubbing is in progress, zero otherwise. + +What: /sys/bus/edac/devices//scrubX/size +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The size of the memory region to be scrubbed + (on-demand scrubbing). + +What: /sys/bus/edac/devices//scrubX/enable_background +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) Start/Stop background (patrol) scrubbing if supported. + +What: /sys/bus/edac/devices//scrubX/min_cycle_duration +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RO) Supported minimum scrub cycle duration in seconds + by the memory scrubber. + +What: /sys/bus/edac/devices//scrubX/max_cycle_duration +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RO) Supported maximum scrub cycle duration in seconds + by the memory scrubber. + +What: /sys/bus/edac/devices//scrubX/current_cycle_duration +Date: March 2025 +KernelVersion: 6.15 +Contact: linux-edac@vger.kernel.org +Description: + (RW) The current scrub cycle duration in seconds and must be + within the supported range by the memory scrubber. + + Scrub has an overhead when running and that may want to be + reduced by taking longer to do it. diff --git a/Documentation/edac/features.rst b/Documentation/edac/features.rst new file mode 100644 index 000000000000..3f283de297c7 --- /dev/null +++ b/Documentation/edac/features.rst @@ -0,0 +1,103 @@ +.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.2-no-invariants-or-later + +================= +EDAC/RAS features +================= + +Copyright (c) 2024-2025 HiSilicon Limited. + +:Author: Shiju Jose +:License: The GNU Free Documentation License, Version 1.2 without + Invariant Sections, Front-Cover Texts nor Back-Cover Texts. + (dual licensed under the GPL v2) + +- Written for: 6.15 + +Introduction +------------ + +EDAC/RAS components plugging and high-level design: + +1. Scrub control + +2. Error Check Scrub (ECS) control + +3. ACPI RAS2 features + +4. Post Package Repair (PPR) control + +5. Memory Sparing Repair control + +High level design is illustrated in the following diagram:: + + +-----------------------------------------------+ + | Userspace - Rasdaemon | + | +-------------+ | + | | RAS CXL mem | +---------------+ | + | |error handler|---->| | | + | +-------------+ | RAS dynamic | | + | +-------------+ | scrub, memory | | + | | RAS memory |---->| repair control| | + | |error handler| +----|----------+ | + | +-------------+ | | + +--------------------------|--------------------+ + | + | + +-------------------------------|------------------------------+ + | Kernel EDAC extension for | controlling RAS Features | + |+------------------------------|----------------------------+ | + || EDAC Core Sysfs EDAC| Bus | | + || +--------------------------|---------------------------+| | + || |/sys/bus/edac/devices//scrubX/ | | EDAC device || | + || |/sys/bus/edac/devices//ecsX/ |<->| EDAC MC || | + || |/sys/bus/edac/devices//repairX | | EDAC sysfs || | + || +---------------------------|--------------------------+| | + || EDAC|Bus | | + || | | | + || +----------+ Get feature | Get feature | | + || | | desc +---------|------+ desc +----------+ | | + || |EDAC scrub|<-----| EDAC device | | | | | + || +----------+ | driver- RAS |----->| EDAC mem | | | + || +----------+ | feature control| | repair | | | + || | |<-----| | +----------+ | | + || |EDAC ECS | +---------|------+ | | + || +----------+ Register RAS|features | | + || ______________________|_____________ | | + |+---------|---------------|------------------|--------------+ | + | +-------|----+ +-------|-------+ +----|----------+ | + | | | | CXL mem driver| | Client driver | | + | | ACPI RAS2 | | scrub, ECS, | | memory repair | | + | | driver | | sparing, PPR | | features | | + | +-----|------+ +-------|-------+ +------|--------+ | + | | | | | + +--------|-----------------|--------------------|--------------+ + | | | + +--------|-----------------|--------------------|--------------+ + | +---|-----------------|--------------------|-------+ | + | | | | + | | Platform HW and Firmware | | + | +--------------------------------------------------+ | + +--------------------------------------------------------------+ + + +1. EDAC Features components - Create feature-specific descriptors. For + example: scrub, ECS, memory repair in the above diagram. + +2. EDAC device driver for controlling RAS Features - Get feature's attribute + descriptors from EDAC RAS feature component and registers device's RAS + features with EDAC bus and expose the features control attributes via + sysfs. For example, /sys/bus/edac/devices//X/ + +3. RAS dynamic feature controller - Userspace sample modules in rasdaemon for + dynamic scrub/repair control to issue scrubbing/repair when excess number + of corrected memory errors are reported in a short span of time. + +RAS features +------------ +1. Memory Scrub + +Memory scrub features are documented in `Documentation/edac/scrub.rst`. + +2. Memory Repair + +Memory repair features are documented in `Documentation/edac/memory_repair.rst`. diff --git a/Documentation/edac/index.rst b/Documentation/edac/index.rst new file mode 100644 index 000000000000..420c6601dbae --- /dev/null +++ b/Documentation/edac/index.rst @@ -0,0 +1,12 @@ +.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.2-no-invariants-or-later + +============== +EDAC Subsystem +============== + +.. toctree:: + :maxdepth: 1 + + features + memory_repair + scrub diff --git a/Documentation/edac/memory_repair.rst b/Documentation/edac/memory_repair.rst new file mode 100644 index 000000000000..52162a422864 --- /dev/null +++ b/Documentation/edac/memory_repair.rst @@ -0,0 +1,121 @@ +.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.2-no-invariants-or-later + +========================== +EDAC Memory Repair Control +========================== + +Copyright (c) 2024-2025 HiSilicon Limited. + +:Author: Shiju Jose +:License: The GNU Free Documentation License, Version 1.2 without + Invariant Sections, Front-Cover Texts nor Back-Cover Texts. + (dual licensed under the GPL v2) +:Original Reviewers: + +- Written for: 6.15 + +Introduction +------------ + +Some memory devices support repair operations to address issues in their +memory media. Post Package Repair (PPR) and memory sparing are examples of +such features. + +Post Package Repair (PPR) +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Post Package Repair is a maintenance operation which requests the memory +device to perform repair operation on its media. It is a memory self-healing +feature that fixes a failing memory location by replacing it with a spare row +in a DRAM device. + +For example, a CXL memory device with DRAM components that support PPR +features implements maintenance operations. DRAM components support those +types of PPR functions: + + - hard PPR, for a permanent row repair, and + - soft PPR, for a temporary row repair. + +Soft PPR is much faster than hard PPR, but the repair is lost after a power +cycle. + +The data may not be retained and memory requests may not be correctly +processed during a repair operation. In such case, the repair operation should +not be executed at runtime. + +For example, for CXL memory devices, see CXL spec rev 3.1 [1]_ sections +8.2.9.7.1.1 PPR Maintenance Operations, 8.2.9.7.1.2 sPPR Maintenance Operation +and 8.2.9.7.1.3 hPPR Maintenance Operation for more details. + +Memory Sparing +~~~~~~~~~~~~~~ + +Memory sparing is a repair function that replaces a portion of memory with +a portion of functional memory at a particular granularity. Memory +sparing has cacheline/row/bank/rank sparing granularities. For example, in +rank memory-sparing mode, one memory rank serves as a spare for other ranks on +the same channel in case they fail. + +The spare rank is held in reserve and not used as active memory until +a failure is indicated, with reserved capacity subtracted from the total +available memory in the system. + +After an error threshold is surpassed in a system protected by memory sparing, +the content of a failing rank of DIMMs is copied to the spare rank. The +failing rank is then taken offline and the spare rank placed online for use as +active memory in place of the failed rank. + +For example, CXL memory devices can support various subclasses for sparing +operation vary in terms of the scope of the sparing being performed. + +Cacheline sparing subclass refers to a sparing action that can replace a full +cacheline. Row sparing is provided as an alternative to PPR sparing functions +and its scope is that of a single DDR row. Bank sparing allows an entire bank +to be replaced. Rank sparing is defined as an operation in which an entire DDR +rank is replaced. + +See CXL spec 3.1 [1]_ section 8.2.9.7.1.4 Memory Sparing Maintenance +Operations for more details. + +.. [1] https://computeexpresslink.org/cxl-specification/ + +Use cases of generic memory repair features control +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. The soft PPR, hard PPR and memory-sparing features share similar control + attributes. Therefore, there is a need for a standardized, generic sysfs + repair control that is exposed to userspace and used by administrators, + scripts and tools. + +2. When a CXL device detects an error in a memory component, it informs the + host of the need for a repair maintenance operation by using an event + record where the "maintenance needed" flag is set. The event record + specifies the device physical address (DPA) and attributes of the memory + that requires repair. The kernel reports the corresponding CXL general + media or DRAM trace event to userspace, and userspace tools (e.g. + rasdaemon) initiate a repair maintenance operation in response to the + device request using the sysfs repair control. + +3. Userspace tools, such as rasdaemon, request a repair operation on a memory + region when maintenance need flag set or an uncorrected memory error or + excess of corrected memory errors above a threshold value is reported or an + exceed corrected errors threshold flag set for that memory. + +4. Multiple PPR/sparing instances may be present per memory device. + +5. Drivers should enforce that live repair is safe. In systems where memory + mapping functions can change between boots, one approach to this is to log + memory errors seen on this boot against which to check live memory repair + requests. + +The File System +--------------- + +The control attributes of a registered memory repair instance could be +accessed in the /sys/bus/edac/devices//mem_repairX/ + +sysfs +----- + +Sysfs files are documented in +`Documentation/ABI/testing/sysfs-edac-memory-repair`. diff --git a/Documentation/edac/scrub.rst b/Documentation/edac/scrub.rst new file mode 100644 index 000000000000..daab929cdba1 --- /dev/null +++ b/Documentation/edac/scrub.rst @@ -0,0 +1,266 @@ +.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.2-no-invariants-or-later + +============= +Scrub Control +============= + +Copyright (c) 2024-2025 HiSilicon Limited. + +:Author: Shiju Jose +:License: The GNU Free Documentation License, Version 1.2 without + Invariant Sections, Front-Cover Texts nor Back-Cover Texts. + (dual licensed under the GPL v2) + +- Written for: 6.15 + +Introduction +------------ + +Increasing DRAM size and cost have made memory subsystem reliability an +important concern. These modules are used where potentially corrupted data +could cause expensive or fatal issues. Memory errors are among the top +hardware failures that cause server and workload crashes. + +Memory scrubbing is a feature where an ECC (Error-Correcting Code) engine +reads data from each memory media location, corrects if necessary and writes +the corrected data back to the same memory media location. + +DIMMs can be scrubbed at a configurable rate to detect uncorrected memory +errors and attempt recovery from detected errors, providing the following +benefits: + +1. Proactively scrubbing DIMMs reduces the chance of a correctable error + becoming uncorrectable. + +2. When detected, uncorrected errors caught in unallocated memory pages are + isolated and prevented from being allocated to an application or the OS. + +3. This reduces the likelihood of software or hardware products encountering + memory errors. + +4. The additional data on failures in memory may be used to build up + statistics that are later used to decide whether to use memory repair + technologies such as Post Package Repair or Sparing. + +There are 2 types of memory scrubbing: + +1. Background (patrol) scrubbing while the DRAM is otherwise idle. + +2. On-demand scrubbing for a specific address range or region of memory. + +Several types of interfaces to hardware memory scrubbers have been +identified, such as CXL memory device patrol scrub, CXL DDR5 ECS, ACPI +RAS2 memory scrubbing, and ACPI NVDIMM ARS (Address Range Scrub). + +The control mechanisms vary across different memory scrubbers. To enable +standardized userspace tooling, there is a need to present these controls +through a standardized ABI. + +A generic memory EDAC scrub control allows users to manage underlying +scrubbers in the system through a standardized sysfs control interface. It +abstracts the management of various scrubbing functionalities into a unified +set of functions. + +Use cases of common scrub control feature +----------------------------------------- + +1. Several types of interfaces for hardware memory scrubbers have been + identified, including the CXL memory device patrol scrub, CXL DDR5 ECS, + ACPI RAS2 memory scrubbing features, ACPI NVDIMM ARS (Address Range Scrub), + and software-based memory scrubbers. + + Of the identified interfaces to hardware memory scrubbers some support + control over patrol (background) scrubbing (e.g., ACPI RAS2, CXL) and/or + on-demand scrubbing (e.g., ACPI RAS2, ACPI ARS). However, the scrub control + interfaces vary between memory scrubbers, highlighting the need for + a standardized, generic sysfs scrub control interface that is accessible to + userspace for administration and use by scripts/tools. + +2. User-space scrub controls allow users to disable scrubbing if necessary, + for example, to disable background patrol scrubbing or adjust the scrub + rate for performance-aware operations where background activities need to + be minimized or disabled. + +3. User-space tools enable on-demand scrubbing for specific address ranges, + provided that the scrubber supports this functionality. + +4. User-space tools can also control memory DIMM scrubbing at a configurable + scrub rate via sysfs scrub controls. This approach offers several benefits: + + 4.1. Detects uncorrectable memory errors early, before user access to affected + memory, helping facilitate recovery. + + 4.2. Reduces the likelihood of correctable errors developing into uncorrectable + errors. + +5. Policy control for hotplugged memory is necessary because there may not + be a system-wide BIOS or similar control to manage scrub settings for a CXL + device added after boot. Determining these settings is a policy decision, + balancing reliability against performance, so userspace should control it. + Therefore, a unified interface is recommended for handling this function in + a way that aligns with other similar interfaces, rather than creating a + separate one. + +Scrubbing features +------------------ + +CXL Memory Scrubbing features +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +CXL spec r3.1 [1]_ section 8.2.9.9.11.1 describes the memory device patrol +scrub control feature. The device patrol scrub proactively locates and makes +corrections to errors in regular cycle. The patrol scrub control allows the +userspace request to change CXL patrol scrubber's configurations. + +The patrol scrub control allows the requester to specify the number of +hours in which the patrol scrub cycles must be completed, provided that +the requested scrub rate must be within the supported range of the +scrub rate that the device is capable of. In the CXL driver, the +number of seconds per scrub cycles, which user requests via sysfs, is +rescaled to hours per scrub cycles. + +In addition, they allow the host to disable the feature in case it interferes +with performance-aware operations which require the background operations to +be turned off. + +Error Check Scrub (ECS) +~~~~~~~~~~~~~~~~~~~~~~~ + +CXL spec r3.1 [1]_ section 8.2.9.9.11.2 describes Error Check Scrub (ECS) +- a feature defined in the JEDEC DDR5 SDRAM Specification (JESD79-5) and +allowing DRAM to internally read, correct single-bit errors, and write back +corrected data bits to the DRAM array while providing transparency to error +counts. + +The DDR5 device contains number of memory media Field Replaceable Units (FRU) +per device. The DDR5 ECS feature and thus the ECS control driver supports +configuring the ECS parameters per FRU. + +ACPI RAS2 Hardware-based Memory Scrubbing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +ACPI spec 6.5 [2]_ section 5.2.21 ACPI RAS2 describes an ACPI RAS2 table +which provides interfaces for platform RAS features and supports independent +RAS controls and capabilities for a given RAS feature for multiple instances +of the same component in a given system. + +Memory RAS features apply to RAS capabilities, controls and operations that +are specific to memory. RAS2 PCC sub-spaces for memory-specific RAS features +have a Feature Type of 0x00 (Memory). + +The platform can use the hardware-based memory scrubbing feature to expose +controls and capabilities associated with hardware-based memory scrub +engines. The RAS2 memory scrubbing feature supports as per spec, + +1. Independent memory scrubbing controls for each NUMA domain, identified + using its proximity domain. + +2. Provision for background (patrol) scrubbing of the entire memory system, + as well as on-demand scrubbing for a specific region of memory. + +ACPI Address Range Scrubbing (ARS) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +ACPI spec 6.5 [2]_ section 9.19.7.2 describes Address Range Scrubbing (ARS). +ARS allows the platform to communicate memory errors to system software. +This capability allows system software to prevent accesses to addresses with +uncorrectable errors in memory. ARS functions manage all NVDIMMs present in +the system. Only one scrub can be in progress system wide at any given time. + +The following functions are supported as per the specification: + +1. Query ARS Capabilities for a given address range, indicates platform + supports the ACPI NVDIMM Root Device Unconsumed Error Notification. + +2. Start ARS triggers an Address Range Scrub for the given memory range. + Address scrubbing can be done for volatile or persistent memory, or both. + +3. Query ARS Status command allows software to get the status of ARS, + including the progress of ARS and ARS error record. + +4. Clear Uncorrectable Error. + +5. Translate SPA + +6. ARS Error Inject etc. + +The kernel supports an existing control for ARS and ARS is currently not +supported in EDAC. + +.. [1] https://computeexpresslink.org/cxl-specification/ + +.. [2] https://uefi.org/specs/ACPI/6.5/ + +Comparison of various scrubbing features +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +--------------+-----------+-----------+-----------+-----------+ + | | ACPI | CXL patrol| CXL ECS | ARS | + | Name | RAS2 | scrub | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | On-demand | Supported | No | No | Supported | + | Scrubbing | | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Background | Supported | Supported | Supported | No | + | scrubbing | | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Mode of | Scrub ctrl| per device| per memory| Unknown | + | scrubbing | per NUMA | | media | | + | | domain. | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Query scrub | Supported | Supported | Supported | Supported | + | capabilities | | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Setting | Supported | No | No | Supported | + | address range| | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Setting | Supported | Supported | No | No | + | scrub rate | | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Unit for | Not | in hours | No | No | + | scrub rate | Defined | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + | | Supported | | | | + | Scrub | on-demand | No | No | Supported | + | status/ | scrubbing | | | | + | Completion | only | | | | + +--------------+-----------+-----------+-----------+-----------+ + | UC error | |CXL general|CXL general| ACPI UCE | + | reporting | Exception |media/DRAM |media/DRAM | notify and| + | | |event/media|event/media| query | + | | |scan? |scan? | ARS status| + +--------------+-----------+-----------+-----------+-----------+ + | | | | | | + | Support for | Supported | Supported | Supported | No | + | EDAC control | | | | | + | | | | | | + +--------------+-----------+-----------+-----------+-----------+ + +The File System +--------------- + +The control attributes of a registered scrubber instance could be +accessed in: + +/sys/bus/edac/devices//scrubX/ + +sysfs +----- + +Sysfs files are documented in +`Documentation/ABI/testing/sysfs-edac-scrub` + +`Documentation/ABI/testing/sysfs-edac-ecs` diff --git a/MAINTAINERS b/MAINTAINERS index 54859024f5e0..c32759cdc569 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8236,6 +8236,7 @@ F: drivers/edac/aspeed_edac.c EDAC-BLUEFIELD M: Shravan Kumar Ramani +M: David Thompson S: Supported F: drivers/edac/bluefield_edac.c diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 2051a7c944a5..19ad3c3b675d 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -75,6 +75,34 @@ config EDAC_GHES In doubt, say 'Y'. +config EDAC_SCRUB + bool "EDAC scrub feature" + help + The EDAC scrub feature is optional and is designed to control the + memory scrubbers in the system. The common sysfs scrub interface + abstracts the control of various arbitrary scrubbing functionalities + into a unified set of functions. + Say 'y/n' to enable/disable EDAC scrub feature. + +config EDAC_ECS + bool "EDAC ECS (Error Check Scrub) feature" + help + The EDAC ECS feature is optional and is designed to control on-die + error check scrub (e.g., DDR5 ECS) in the system. The common sysfs + ECS interface abstracts the control of various ECS functionalities + into a unified set of functions. + Say 'y/n' to enable/disable EDAC ECS feature. + +config EDAC_MEM_REPAIR + bool "EDAC memory repair feature" + help + The EDAC memory repair feature is optional and is designed to control + the memory devices with repair features, such as Post Package Repair + (PPR), memory sparing etc. The common sysfs memory repair interface + abstracts the control of various memory repair functionalities into + a unified set of functions. + Say 'y/n' to enable/disable EDAC memory repair feature. + config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64)" depends on AMD_NB && EDAC_DECODE_MCE @@ -168,7 +196,7 @@ config EDAC_I3200 config EDAC_IE31200 tristate "Intel e312xx" - depends on PCI && X86 + depends on PCI && X86 && X86_MCE_INTEL help Support for error detection and correction on the Intel E3-1200 based DRAM controllers. diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 89789ba8275f..a8f2d8f6c894 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -12,6 +12,9 @@ edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_core-y += edac_module.o edac_device_sysfs.o wq.o edac_core-$(CONFIG_EDAC_DEBUG) += debugfs.o +edac_core-$(CONFIG_EDAC_SCRUB) += scrub.o +edac_core-$(CONFIG_EDAC_ECS) += ecs.o +edac_core-$(CONFIG_EDAC_MEM_REPAIR) += mem_repair.o ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 8414ceb43e4a..90f0eb7cc5b9 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include #include "amd64_edac.h" #include #include @@ -1171,22 +1172,21 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3)); } - edac_dbg(1, "All DIMMs support ECC:%s\n", - (dclr & BIT(19)) ? "yes" : "no"); + edac_dbg(1, "All DIMMs support ECC: %s\n", str_yes_no(dclr & BIT(19))); edac_dbg(1, " PAR/ERR parity: %s\n", - (dclr & BIT(8)) ? "enabled" : "disabled"); + str_enabled_disabled(dclr & BIT(8))); if (pvt->fam == 0x10) edac_dbg(1, " DCT 128bit mode width: %s\n", (dclr & BIT(11)) ? "128b" : "64b"); edac_dbg(1, " x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n", - (dclr & BIT(12)) ? "yes" : "no", - (dclr & BIT(13)) ? "yes" : "no", - (dclr & BIT(14)) ? "yes" : "no", - (dclr & BIT(15)) ? "yes" : "no"); + str_yes_no(dclr & BIT(12)), + str_yes_no(dclr & BIT(13)), + str_yes_no(dclr & BIT(14)), + str_yes_no(dclr & BIT(15))); } #define CS_EVEN_PRIMARY BIT(0) @@ -1353,14 +1353,14 @@ static void umc_dump_misc_regs(struct amd64_pvt *pvt) edac_dbg(1, "UMC%d UMC cap high: 0x%x\n", i, umc->umc_cap_hi); edac_dbg(1, "UMC%d ECC capable: %s, ChipKill ECC capable: %s\n", - i, (umc->umc_cap_hi & BIT(30)) ? "yes" : "no", - (umc->umc_cap_hi & BIT(31)) ? "yes" : "no"); + i, str_yes_no(umc->umc_cap_hi & BIT(30)), + str_yes_no(umc->umc_cap_hi & BIT(31))); edac_dbg(1, "UMC%d All DIMMs support ECC: %s\n", - i, (umc->umc_cfg & BIT(12)) ? "yes" : "no"); + i, str_yes_no(umc->umc_cfg & BIT(12))); edac_dbg(1, "UMC%d x4 DIMMs present: %s\n", - i, (umc->dimm_cfg & BIT(6)) ? "yes" : "no"); + i, str_yes_no(umc->dimm_cfg & BIT(6))); edac_dbg(1, "UMC%d x16 DIMMs present: %s\n", - i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no"); + i, str_yes_no(umc->dimm_cfg & BIT(7))); umc_debug_display_dimm_sizes(pvt, i); } @@ -1371,11 +1371,11 @@ static void dct_dump_misc_regs(struct amd64_pvt *pvt) edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap); edac_dbg(1, " NB two channel DRAM capable: %s\n", - (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no"); + str_yes_no(pvt->nbcap & NBCAP_DCT_DUAL)); edac_dbg(1, " ECC capable: %s, ChipKill ECC capable: %s\n", - (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no", - (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no"); + str_yes_no(pvt->nbcap & NBCAP_SECDED), + str_yes_no(pvt->nbcap & NBCAP_CHIPKILL)); debug_dump_dramcfg_low(pvt, pvt->dclr0, 0); @@ -1398,7 +1398,7 @@ static void dct_dump_misc_regs(struct amd64_pvt *pvt) if (!dct_ganging_enabled(pvt)) debug_dump_dramcfg_low(pvt, pvt->dclr1, 1); - edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); + edac_dbg(1, " DramHoleValid: %s\n", str_yes_no(dhar_valid(pvt))); amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz); } @@ -2027,15 +2027,15 @@ static void read_dram_ctl_register(struct amd64_pvt *pvt) if (!dct_ganging_enabled(pvt)) edac_dbg(0, " Address range split per DCT: %s\n", - (dct_high_range_enabled(pvt) ? "yes" : "no")); + str_yes_no(dct_high_range_enabled(pvt))); edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n", - (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"), - (dct_memory_cleared(pvt) ? "yes" : "no")); + str_enabled_disabled(dct_data_intlv_enabled(pvt)), + str_yes_no(dct_memory_cleared(pvt))); edac_dbg(0, " channel interleave: %s, " "interleave bits selector: 0x%x\n", - (dct_interleave_enabled(pvt) ? "enabled" : "disabled"), + str_enabled_disabled(dct_interleave_enabled(pvt)), dct_sel_interleave_addr(pvt)); } @@ -3208,8 +3208,7 @@ static bool nb_mce_bank_enabled_on_node(u16 nid) nbe = reg->l & MSR_MCGCTL_NBE; edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", - cpu, reg->q, - (nbe ? "enabled" : "disabled")); + cpu, reg->q, str_enabled_disabled(nbe)); if (!nbe) goto out; @@ -3353,12 +3352,9 @@ static bool dct_ecc_enabled(struct amd64_pvt *pvt) edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n", MSR_IA32_MCG_CTL, nid); - edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, (ecc_en ? "enabled" : "disabled")); + edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, str_enabled_disabled(ecc_en)); - if (!ecc_en || !nb_mce_en) - return false; - else - return true; + return ecc_en && nb_mce_en; } static bool umc_ecc_enabled(struct amd64_pvt *pvt) @@ -3378,7 +3374,7 @@ static bool umc_ecc_enabled(struct amd64_pvt *pvt) } } - edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, (ecc_en ? "enabled" : "disabled")); + edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, str_enabled_disabled(ecc_en)); return ecc_en; } diff --git a/drivers/edac/debugfs.c b/drivers/edac/debugfs.c index 4804332d9946..8195fc9c9354 100644 --- a/drivers/edac/debugfs.c +++ b/drivers/edac/debugfs.c @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only + +#include + #include "edac_module.h" static struct dentry *edac_debugfs; @@ -22,7 +25,7 @@ static ssize_t edac_fake_inject_write(struct file *file, "Generating %d %s fake error%s to %d.%d.%d to test core handling. NOTE: this won't test the driver-specific decoding logic.\n", errcount, (type == HW_EVENT_ERR_UNCORRECTED) ? "UE" : "CE", - errcount > 1 ? "s" : "", + str_plural(errcount), mci->fake_inject_layer[0], mci->fake_inject_layer[1], mci->fake_inject_layer[2] diff --git a/drivers/edac/ecs.c b/drivers/edac/ecs.c new file mode 100755 index 000000000000..1d51838a60c1 --- /dev/null +++ b/drivers/edac/ecs.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The generic ECS driver is designed to support control of on-die error + * check scrub (e.g., DDR5 ECS). The common sysfs ECS interface abstracts + * the control of various ECS functionalities into a unified set of functions. + * + * Copyright (c) 2024-2025 HiSilicon Limited. + */ + +#include + +#define EDAC_ECS_FRU_NAME "ecs_fru" + +enum edac_ecs_attributes { + ECS_LOG_ENTRY_TYPE, + ECS_MODE, + ECS_RESET, + ECS_THRESHOLD, + ECS_MAX_ATTRS +}; + +struct edac_ecs_dev_attr { + struct device_attribute dev_attr; + int fru_id; +}; + +struct edac_ecs_fru_context { + char name[EDAC_FEAT_NAME_LEN]; + struct edac_ecs_dev_attr dev_attr[ECS_MAX_ATTRS]; + struct attribute *ecs_attrs[ECS_MAX_ATTRS + 1]; + struct attribute_group group; +}; + +struct edac_ecs_context { + u16 num_media_frus; + struct edac_ecs_fru_context *fru_ctxs; +}; + +#define TO_ECS_DEV_ATTR(_dev_attr) \ + container_of(_dev_attr, struct edac_ecs_dev_attr, dev_attr) + +#define EDAC_ECS_ATTR_SHOW(attrib, cb, type, format) \ +static ssize_t attrib##_show(struct device *ras_feat_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct edac_ecs_dev_attr *dev_attr = TO_ECS_DEV_ATTR(attr); \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; \ + type data; \ + int ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->ecs.private, \ + dev_attr->fru_id, &data); \ + if (ret) \ + return ret; \ + \ + return sysfs_emit(buf, format, data); \ +} + +EDAC_ECS_ATTR_SHOW(log_entry_type, get_log_entry_type, u32, "%u\n") +EDAC_ECS_ATTR_SHOW(mode, get_mode, u32, "%u\n") +EDAC_ECS_ATTR_SHOW(threshold, get_threshold, u32, "%u\n") + +#define EDAC_ECS_ATTR_STORE(attrib, cb, type, conv_func) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + struct edac_ecs_dev_attr *dev_attr = TO_ECS_DEV_ATTR(attr); \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; \ + type data; \ + int ret; \ + \ + ret = conv_func(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->ecs.private, \ + dev_attr->fru_id, data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +EDAC_ECS_ATTR_STORE(log_entry_type, set_log_entry_type, unsigned long, kstrtoul) +EDAC_ECS_ATTR_STORE(mode, set_mode, unsigned long, kstrtoul) +EDAC_ECS_ATTR_STORE(reset, reset, unsigned long, kstrtoul) +EDAC_ECS_ATTR_STORE(threshold, set_threshold, unsigned long, kstrtoul) + +static umode_t ecs_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id) +{ + struct device *ras_feat_dev = kobj_to_dev(kobj); + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); + const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; + + switch (attr_id) { + case ECS_LOG_ENTRY_TYPE: + if (ops->get_log_entry_type) { + if (ops->set_log_entry_type) + return a->mode; + else + return 0444; + } + break; + case ECS_MODE: + if (ops->get_mode) { + if (ops->set_mode) + return a->mode; + else + return 0444; + } + break; + case ECS_RESET: + if (ops->reset) + return a->mode; + break; + case ECS_THRESHOLD: + if (ops->get_threshold) { + if (ops->set_threshold) + return a->mode; + else + return 0444; + } + break; + default: + break; + } + + return 0; +} + +#define EDAC_ECS_ATTR_RO(_name, _fru_id) \ + ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RO(_name), \ + .fru_id = _fru_id }) + +#define EDAC_ECS_ATTR_WO(_name, _fru_id) \ + ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_WO(_name), \ + .fru_id = _fru_id }) + +#define EDAC_ECS_ATTR_RW(_name, _fru_id) \ + ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RW(_name), \ + .fru_id = _fru_id }) + +static int ecs_create_desc(struct device *ecs_dev, const struct attribute_group **attr_groups, + u16 num_media_frus) +{ + struct edac_ecs_context *ecs_ctx; + u32 fru; + + ecs_ctx = devm_kzalloc(ecs_dev, sizeof(*ecs_ctx), GFP_KERNEL); + if (!ecs_ctx) + return -ENOMEM; + + ecs_ctx->num_media_frus = num_media_frus; + ecs_ctx->fru_ctxs = devm_kcalloc(ecs_dev, num_media_frus, + sizeof(*ecs_ctx->fru_ctxs), + GFP_KERNEL); + if (!ecs_ctx->fru_ctxs) + return -ENOMEM; + + for (fru = 0; fru < num_media_frus; fru++) { + struct edac_ecs_fru_context *fru_ctx = &ecs_ctx->fru_ctxs[fru]; + struct attribute_group *group = &fru_ctx->group; + int i; + + fru_ctx->dev_attr[ECS_LOG_ENTRY_TYPE] = EDAC_ECS_ATTR_RW(log_entry_type, fru); + fru_ctx->dev_attr[ECS_MODE] = EDAC_ECS_ATTR_RW(mode, fru); + fru_ctx->dev_attr[ECS_RESET] = EDAC_ECS_ATTR_WO(reset, fru); + fru_ctx->dev_attr[ECS_THRESHOLD] = EDAC_ECS_ATTR_RW(threshold, fru); + + for (i = 0; i < ECS_MAX_ATTRS; i++) + fru_ctx->ecs_attrs[i] = &fru_ctx->dev_attr[i].dev_attr.attr; + + sprintf(fru_ctx->name, "%s%d", EDAC_ECS_FRU_NAME, fru); + group->name = fru_ctx->name; + group->attrs = fru_ctx->ecs_attrs; + group->is_visible = ecs_attr_visible; + + attr_groups[fru] = group; + } + + return 0; +} + +/** + * edac_ecs_get_desc - get EDAC ECS descriptors + * @ecs_dev: client device, supports ECS feature + * @attr_groups: pointer to attribute group container + * @num_media_frus: number of media FRUs in the device + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + */ +int edac_ecs_get_desc(struct device *ecs_dev, + const struct attribute_group **attr_groups, u16 num_media_frus) +{ + if (!ecs_dev || !attr_groups || !num_media_frus) + return -EINVAL; + + return ecs_create_desc(ecs_dev, attr_groups, num_media_frus); +} diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 621dc2a5d034..0734909b08a4 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -570,3 +570,188 @@ void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev, block ? block->name : "N/A", count, msg); } EXPORT_SYMBOL_GPL(edac_device_handle_ue_count); + +static void edac_dev_release(struct device *dev) +{ + struct edac_dev_feat_ctx *ctx = container_of(dev, struct edac_dev_feat_ctx, dev); + + kfree(ctx->mem_repair); + kfree(ctx->scrub); + kfree(ctx->dev.groups); + kfree(ctx); +} + +static const struct device_type edac_dev_type = { + .name = "edac_dev", + .release = edac_dev_release, +}; + +static void edac_dev_unreg(void *data) +{ + device_unregister(data); +} + +/** + * edac_dev_register - register device for RAS features with EDAC + * @parent: parent device. + * @name: name for the folder in the /sys/bus/edac/devices/, + * which is derived from the parent device. + * For e.g. /sys/bus/edac/devices/cxl_mem0/ + * @private: parent driver's data to store in the context if any. + * @num_features: number of RAS features to register. + * @ras_features: list of RAS features to register. + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + * + */ +int edac_dev_register(struct device *parent, char *name, + void *private, int num_features, + const struct edac_dev_feature *ras_features) +{ + const struct attribute_group **ras_attr_groups; + struct edac_dev_data *dev_data; + struct edac_dev_feat_ctx *ctx; + int mem_repair_cnt = 0; + int attr_gcnt = 0; + int ret = -ENOMEM; + int scrub_cnt = 0; + int feat; + + if (!parent || !name || !num_features || !ras_features) + return -EINVAL; + + /* Double parse to make space for attributes */ + for (feat = 0; feat < num_features; feat++) { + switch (ras_features[feat].ft_type) { + case RAS_FEAT_SCRUB: + attr_gcnt++; + scrub_cnt++; + break; + case RAS_FEAT_ECS: + attr_gcnt += ras_features[feat].ecs_info.num_media_frus; + break; + case RAS_FEAT_MEM_REPAIR: + attr_gcnt++; + mem_repair_cnt++; + break; + default: + return -EINVAL; + } + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ras_attr_groups = kcalloc(attr_gcnt + 1, sizeof(*ras_attr_groups), GFP_KERNEL); + if (!ras_attr_groups) + goto ctx_free; + + if (scrub_cnt) { + ctx->scrub = kcalloc(scrub_cnt, sizeof(*ctx->scrub), GFP_KERNEL); + if (!ctx->scrub) + goto groups_free; + } + + if (mem_repair_cnt) { + ctx->mem_repair = kcalloc(mem_repair_cnt, sizeof(*ctx->mem_repair), GFP_KERNEL); + if (!ctx->mem_repair) + goto data_mem_free; + } + + attr_gcnt = 0; + scrub_cnt = 0; + mem_repair_cnt = 0; + for (feat = 0; feat < num_features; feat++, ras_features++) { + switch (ras_features->ft_type) { + case RAS_FEAT_SCRUB: + if (!ras_features->scrub_ops || scrub_cnt != ras_features->instance) { + ret = -EINVAL; + goto data_mem_free; + } + + dev_data = &ctx->scrub[scrub_cnt]; + dev_data->instance = scrub_cnt; + dev_data->scrub_ops = ras_features->scrub_ops; + dev_data->private = ras_features->ctx; + ret = edac_scrub_get_desc(parent, &ras_attr_groups[attr_gcnt], + ras_features->instance); + if (ret) + goto data_mem_free; + + scrub_cnt++; + attr_gcnt++; + break; + case RAS_FEAT_ECS: + if (!ras_features->ecs_ops) { + ret = -EINVAL; + goto data_mem_free; + } + + dev_data = &ctx->ecs; + dev_data->ecs_ops = ras_features->ecs_ops; + dev_data->private = ras_features->ctx; + ret = edac_ecs_get_desc(parent, &ras_attr_groups[attr_gcnt], + ras_features->ecs_info.num_media_frus); + if (ret) + goto data_mem_free; + + attr_gcnt += ras_features->ecs_info.num_media_frus; + break; + case RAS_FEAT_MEM_REPAIR: + if (!ras_features->mem_repair_ops || + mem_repair_cnt != ras_features->instance) { + ret = -EINVAL; + goto data_mem_free; + } + + dev_data = &ctx->mem_repair[mem_repair_cnt]; + dev_data->instance = mem_repair_cnt; + dev_data->mem_repair_ops = ras_features->mem_repair_ops; + dev_data->private = ras_features->ctx; + ret = edac_mem_repair_get_desc(parent, &ras_attr_groups[attr_gcnt], + ras_features->instance); + if (ret) + goto data_mem_free; + + mem_repair_cnt++; + attr_gcnt++; + break; + default: + ret = -EINVAL; + goto data_mem_free; + } + } + + ctx->dev.parent = parent; + ctx->dev.bus = edac_get_sysfs_subsys(); + ctx->dev.type = &edac_dev_type; + ctx->dev.groups = ras_attr_groups; + ctx->private = private; + dev_set_drvdata(&ctx->dev, ctx); + + ret = dev_set_name(&ctx->dev, "%s", name); + if (ret) + goto data_mem_free; + + ret = device_register(&ctx->dev); + if (ret) { + put_device(&ctx->dev); + return ret; + } + + return devm_add_action_or_reset(parent, edac_dev_unreg, &ctx->dev); + +data_mem_free: + kfree(ctx->mem_repair); + kfree(ctx->scrub); +groups_free: + kfree(ras_attr_groups); +ctx_free: + kfree(ctx); + return ret; +} +EXPORT_SYMBOL_GPL(edac_dev_register); diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index f45d849d3f15..355a977019e9 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -751,6 +751,8 @@ static int i10nm_get_ddr_munits(void) continue; } else { d->imc[lmc].mdev = mdev; + if (res_cfg->type == SPR) + skx_set_mc_mapping(d, i, lmc); lmc++; } } diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 49b4499269fb..b5cf25905b05 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "edac_module.h" @@ -899,7 +900,7 @@ static void decode_mtr(int slot_row, u16 mtr) edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n", - MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr))); edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); edac_dbg(2, "\t\tNUMRANK: %s\n", diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 61adaa872ba7..69068f8d0cad 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "edac_module.h" @@ -620,7 +621,7 @@ static int decode_mtr(struct i7300_pvt *pvt, edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n", - MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr))); edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); edac_dbg(2, "\t\tNUMRANK: %s\n", @@ -871,9 +872,9 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) IS_MIRRORED(pvt->mc_settings) ? "" : "non-"); edac_dbg(0, "Error detection is %s\n", - IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); + str_enabled_disabled(IS_ECC_ENABLED(pvt->mc_settings))); edac_dbg(0, "Retry is %s\n", - IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); + str_enabled_disabled(IS_RETRY_ENABLED(pvt->mc_settings))); /* Get Memory Interleave Range registers */ pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0, diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 4fc16922dc1a..204834149579 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -51,6 +51,7 @@ #include #include +#include #include "edac_module.h" #define EDAC_MOD_STR "ie31200_edac" @@ -84,44 +85,23 @@ #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9 0x3ec6 #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10 0x3eca -/* Test if HB is for Skylake or later. */ -#define DEVICE_ID_SKYLAKE_OR_LATER(did) \ - (((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_8) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_12) || \ - (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \ - PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK)) - -#define IE31200_DIMMS 4 -#define IE31200_RANKS 8 -#define IE31200_RANKS_PER_CHANNEL 4 +/* Raptor Lake-S */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1 0xa703 +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2 0x4640 +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3 0x4630 + +#define IE31200_RANKS_PER_CHANNEL 8 #define IE31200_DIMMS_PER_CHANNEL 2 #define IE31200_CHANNELS 2 +#define IE31200_IMC_NUM 2 /* Intel IE31200 register addresses - device 0 function 0 - DRAM Controller */ #define IE31200_MCHBAR_LOW 0x48 #define IE31200_MCHBAR_HIGH 0x4c -#define IE31200_MCHBAR_MASK GENMASK_ULL(38, 15) -#define IE31200_MMR_WINDOW_SIZE BIT(15) /* * Error Status Register (16b) * - * 15 reserved - * 14 Isochronous TBWRR Run Behind FIFO Full - * (ITCV) - * 13 Isochronous TBWRR Run Behind FIFO Put - * (ITSTV) - * 12 reserved - * 11 MCH Thermal Sensor Event - * for SMI/SCI/SERR (GTSE) - * 10 reserved - * 9 LOCK to non-DRAM Memory Flag (LCKF) - * 8 reserved - * 7 DRAM Throttle Flag (DTF) - * 6:2 reserved * 1 Multi-bit DRAM ECC Error Flag (DMERR) * 0 Single-bit DRAM ECC Error Flag (DSERR) */ @@ -130,68 +110,60 @@ #define IE31200_ERRSTS_CE BIT(0) #define IE31200_ERRSTS_BITS (IE31200_ERRSTS_UE | IE31200_ERRSTS_CE) -/* - * Channel 0 ECC Error Log (64b) - * - * 63:48 Error Column Address (ERRCOL) - * 47:32 Error Row Address (ERRROW) - * 31:29 Error Bank Address (ERRBANK) - * 28:27 Error Rank Address (ERRRANK) - * 26:24 reserved - * 23:16 Error Syndrome (ERRSYND) - * 15: 2 reserved - * 1 Multiple Bit Error Status (MERRSTS) - * 0 Correctable Error Status (CERRSTS) - */ - -#define IE31200_C0ECCERRLOG 0x40c8 -#define IE31200_C1ECCERRLOG 0x44c8 -#define IE31200_C0ECCERRLOG_SKL 0x4048 -#define IE31200_C1ECCERRLOG_SKL 0x4448 -#define IE31200_ECCERRLOG_CE BIT(0) -#define IE31200_ECCERRLOG_UE BIT(1) -#define IE31200_ECCERRLOG_RANK_BITS GENMASK_ULL(28, 27) -#define IE31200_ECCERRLOG_RANK_SHIFT 27 -#define IE31200_ECCERRLOG_SYNDROME_BITS GENMASK_ULL(23, 16) -#define IE31200_ECCERRLOG_SYNDROME_SHIFT 16 - -#define IE31200_ECCERRLOG_SYNDROME(log) \ - ((log & IE31200_ECCERRLOG_SYNDROME_BITS) >> \ - IE31200_ECCERRLOG_SYNDROME_SHIFT) - #define IE31200_CAPID0 0xe4 #define IE31200_CAPID0_PDCD BIT(4) #define IE31200_CAPID0_DDPCD BIT(6) #define IE31200_CAPID0_ECC BIT(1) -#define IE31200_MAD_DIMM_0_OFFSET 0x5004 -#define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C -#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0) -#define IE31200_MAD_DIMM_A_RANK BIT(17) -#define IE31200_MAD_DIMM_A_RANK_SHIFT 17 -#define IE31200_MAD_DIMM_A_RANK_SKL BIT(10) -#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT 10 -#define IE31200_MAD_DIMM_A_WIDTH BIT(19) -#define IE31200_MAD_DIMM_A_WIDTH_SHIFT 19 -#define IE31200_MAD_DIMM_A_WIDTH_SKL GENMASK_ULL(9, 8) -#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT 8 - -/* Skylake reports 1GB increments, everything else is 256MB */ -#define IE31200_PAGES(n, skl) \ - (n << (28 + (2 * skl) - PAGE_SHIFT)) +/* Non-constant mask variant of FIELD_GET() */ +#define field_get(_mask, _reg) (((_reg) & (_mask)) >> (ffs(_mask) - 1)) static int nr_channels; static struct pci_dev *mci_pdev; static int ie31200_registered = 1; +struct res_config { + enum mem_type mtype; + bool cmci; + int imc_num; + /* Host MMIO configuration register */ + u64 reg_mchbar_mask; + u64 reg_mchbar_window_size; + /* ECC error log register */ + u64 reg_eccerrlog_offset[IE31200_CHANNELS]; + u64 reg_eccerrlog_ce_mask; + u64 reg_eccerrlog_ce_ovfl_mask; + u64 reg_eccerrlog_ue_mask; + u64 reg_eccerrlog_ue_ovfl_mask; + u64 reg_eccerrlog_rank_mask; + u64 reg_eccerrlog_syndrome_mask; + /* MSR to clear ECC error log register */ + u32 msr_clear_eccerrlog_offset; + /* DIMM characteristics register */ + u64 reg_mad_dimm_size_granularity; + u64 reg_mad_dimm_offset[IE31200_CHANNELS]; + u32 reg_mad_dimm_size_mask[IE31200_DIMMS_PER_CHANNEL]; + u32 reg_mad_dimm_rank_mask[IE31200_DIMMS_PER_CHANNEL]; + u32 reg_mad_dimm_width_mask[IE31200_DIMMS_PER_CHANNEL]; +}; + struct ie31200_priv { void __iomem *window; void __iomem *c0errlog; void __iomem *c1errlog; + struct res_config *cfg; + struct mem_ctl_info *mci; + struct pci_dev *pdev; + struct device dev; }; +static struct ie31200_pvt { + struct ie31200_priv *priv[IE31200_IMC_NUM]; +} ie31200_pvt; + enum ie31200_chips { IE31200 = 0, + IE31200_1 = 1, }; struct ie31200_dev_info { @@ -202,18 +174,22 @@ struct ie31200_error_info { u16 errsts; u16 errsts2; u64 eccerrlog[IE31200_CHANNELS]; + u64 erraddr; }; static const struct ie31200_dev_info ie31200_devs[] = { [IE31200] = { .ctl_name = "IE31200" }, + [IE31200_1] = { + .ctl_name = "IE31200_1" + }, }; struct dimm_data { - u8 size; /* in multiples of 256MB, except Skylake is 1GB */ - u8 dual_rank : 1, - x16_width : 2; /* 0 means x8 width */ + u64 size; /* in bytes */ + u8 ranks; + enum dev_type dtype; }; static int how_many_channels(struct pci_dev *pdev) @@ -251,29 +227,54 @@ static bool ecc_capable(struct pci_dev *pdev) return true; } -static int eccerrlog_row(u64 log) -{ - return ((log & IE31200_ECCERRLOG_RANK_BITS) >> - IE31200_ECCERRLOG_RANK_SHIFT); -} +#define mci_to_pci_dev(mci) (((struct ie31200_priv *)(mci)->pvt_info)->pdev) static void ie31200_clear_error_info(struct mem_ctl_info *mci) { + struct ie31200_priv *priv = mci->pvt_info; + struct res_config *cfg = priv->cfg; + + /* + * The PCI ERRSTS register is deprecated. Write the MSR to clear + * the ECC error log registers in all memory controllers. + */ + if (cfg->msr_clear_eccerrlog_offset) { + if (wrmsr_safe(cfg->msr_clear_eccerrlog_offset, + cfg->reg_eccerrlog_ce_mask | + cfg->reg_eccerrlog_ce_ovfl_mask | + cfg->reg_eccerrlog_ue_mask | + cfg->reg_eccerrlog_ue_ovfl_mask, 0) < 0) + ie31200_printk(KERN_ERR, "Failed to wrmsr.\n"); + + return; + } + /* * Clear any error bits. * (Yes, we really clear bits by writing 1 to them.) */ - pci_write_bits16(to_pci_dev(mci->pdev), IE31200_ERRSTS, + pci_write_bits16(mci_to_pci_dev(mci), IE31200_ERRSTS, IE31200_ERRSTS_BITS, IE31200_ERRSTS_BITS); } static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, struct ie31200_error_info *info) { - struct pci_dev *pdev; + struct pci_dev *pdev = mci_to_pci_dev(mci); struct ie31200_priv *priv = mci->pvt_info; - pdev = to_pci_dev(mci->pdev); + /* + * The PCI ERRSTS register is deprecated, directly read the + * MMIO-mapped ECC error log registers. + */ + if (priv->cfg->msr_clear_eccerrlog_offset) { + info->eccerrlog[0] = lo_hi_readq(priv->c0errlog); + if (nr_channels == 2) + info->eccerrlog[1] = lo_hi_readq(priv->c1errlog); + + ie31200_clear_error_info(mci); + return; + } /* * This is a mess because there is no atomic way to read all the @@ -309,46 +310,56 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, static void ie31200_process_error_info(struct mem_ctl_info *mci, struct ie31200_error_info *info) { + struct ie31200_priv *priv = mci->pvt_info; + struct res_config *cfg = priv->cfg; int channel; u64 log; - if (!(info->errsts & IE31200_ERRSTS_BITS)) - return; + if (!cfg->msr_clear_eccerrlog_offset) { + if (!(info->errsts & IE31200_ERRSTS_BITS)) + return; - if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, - -1, -1, -1, "UE overwrote CE", ""); - info->errsts = info->errsts2; + if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, "UE overwrote CE", ""); + info->errsts = info->errsts2; + } } for (channel = 0; channel < nr_channels; channel++) { log = info->eccerrlog[channel]; - if (log & IE31200_ECCERRLOG_UE) { + if (log & cfg->reg_eccerrlog_ue_mask) { edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - 0, 0, 0, - eccerrlog_row(log), + info->erraddr >> PAGE_SHIFT, 0, 0, + field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, "ie31200 UE", ""); - } else if (log & IE31200_ECCERRLOG_CE) { + } else if (log & cfg->reg_eccerrlog_ce_mask) { edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - 0, 0, - IE31200_ECCERRLOG_SYNDROME(log), - eccerrlog_row(log), + info->erraddr >> PAGE_SHIFT, 0, + field_get(cfg->reg_eccerrlog_syndrome_mask, log), + field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, "ie31200 CE", ""); } } } -static void ie31200_check(struct mem_ctl_info *mci) +static void __ie31200_check(struct mem_ctl_info *mci, struct mce *mce) { struct ie31200_error_info info; + info.erraddr = mce ? mce->addr : 0; ie31200_get_and_clear_error_info(mci, &info); ie31200_process_error_info(mci, &info); } -static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) +static void ie31200_check(struct mem_ctl_info *mci) +{ + __ie31200_check(mci, NULL); +} + +static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg, int mc) { union { u64 mchbar; @@ -361,7 +372,8 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) pci_read_config_dword(pdev, IE31200_MCHBAR_LOW, &u.mchbar_low); pci_read_config_dword(pdev, IE31200_MCHBAR_HIGH, &u.mchbar_high); - u.mchbar &= IE31200_MCHBAR_MASK; + u.mchbar &= cfg->reg_mchbar_mask; + u.mchbar += cfg->reg_mchbar_window_size * mc; if (u.mchbar != (resource_size_t)u.mchbar) { ie31200_printk(KERN_ERR, "mmio space beyond accessible range (0x%llx)\n", @@ -369,7 +381,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) return NULL; } - window = ioremap(u.mchbar, IE31200_MMR_WINDOW_SIZE); + window = ioremap(u.mchbar, cfg->reg_mchbar_window_size); if (!window) ie31200_printk(KERN_ERR, "Cannot map mmio space at 0x%llx\n", (unsigned long long)u.mchbar); @@ -377,155 +389,108 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) return window; } -static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int chan) +static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, + struct res_config *cfg) { - dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0; - dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >> - (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4))); + dd->size = field_get(cfg->reg_mad_dimm_size_mask[dimm], addr_decode) * cfg->reg_mad_dimm_size_granularity; + dd->ranks = field_get(cfg->reg_mad_dimm_rank_mask[dimm], addr_decode) + 1; + dd->dtype = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode) + DEV_X8; } -static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int chan) +static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *window, + struct res_config *cfg, int mc) { - dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0; - dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0; -} + struct dimm_data dimm_info; + struct dimm_info *dimm; + unsigned long nr_pages; + u32 addr_decode; + int i, j, k; -static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan, - bool skl) -{ - if (skl) - __skl_populate_dimm_info(dd, addr_decode, chan); - else - __populate_dimm_info(dd, addr_decode, chan); -} + for (i = 0; i < IE31200_CHANNELS; i++) { + addr_decode = readl(window + cfg->reg_mad_dimm_offset[i]); + edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); + for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { + populate_dimm_info(&dimm_info, addr_decode, j, cfg); + edac_dbg(0, "mc: %d, channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", + mc, i, j, dimm_info.size >> 20, + dimm_info.ranks, + dimm_info.dtype); -static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) + nr_pages = MiB_TO_PAGES(dimm_info.size >> 20); + if (nr_pages == 0) + continue; + + nr_pages = nr_pages / dimm_info.ranks; + for (k = 0; k < dimm_info.ranks; k++) { + dimm = edac_get_dimm(mci, (j * dimm_info.ranks) + k, i, 0); + dimm->nr_pages = nr_pages; + edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); + dimm->grain = 8; /* just a guess */ + dimm->mtype = cfg->mtype; + dimm->dtype = dimm_info.dtype; + dimm->edac_mode = EDAC_UNKNOWN; + } + } + } +} + +static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, int mc) { - int i, j, ret; - struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; - struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL]; - void __iomem *window; struct ie31200_priv *priv; - u32 addr_decode, mad_offset; - - /* - * Kaby Lake, Coffee Lake seem to work like Skylake. Please re-visit - * this logic when adding new CPU support. - */ - bool skl = DEVICE_ID_SKYLAKE_OR_LATER(pdev->device); - - edac_dbg(0, "MC:\n"); - - if (!ecc_capable(pdev)) { - ie31200_printk(KERN_INFO, "No ECC support\n"); - return -ENODEV; - } + struct mem_ctl_info *mci; + void __iomem *window; + int ret; nr_channels = how_many_channels(pdev); layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = IE31200_DIMMS; + layers[0].size = IE31200_RANKS_PER_CHANNEL; layers[0].is_virt_csrow = true; layers[1].type = EDAC_MC_LAYER_CHANNEL; layers[1].size = nr_channels; layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct ie31200_priv)); if (!mci) return -ENOMEM; - window = ie31200_map_mchbar(pdev); + window = ie31200_map_mchbar(pdev, cfg, mc); if (!window) { ret = -ENODEV; goto fail_free; } edac_dbg(3, "MC: init mci\n"); - mci->pdev = &pdev->dev; - if (skl) - mci->mtype_cap = MEM_FLAG_DDR4; - else - mci->mtype_cap = MEM_FLAG_DDR3; + mci->mtype_cap = BIT(cfg->mtype); mci->edac_ctl_cap = EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = ie31200_devs[dev_idx].ctl_name; + mci->ctl_name = ie31200_devs[mc].ctl_name; mci->dev_name = pci_name(pdev); - mci->edac_check = ie31200_check; + mci->edac_check = cfg->cmci ? NULL : ie31200_check; mci->ctl_page_to_phys = NULL; priv = mci->pvt_info; priv->window = window; - if (skl) { - priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL; - priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL; - mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL; - } else { - priv->c0errlog = window + IE31200_C0ECCERRLOG; - priv->c1errlog = window + IE31200_C1ECCERRLOG; - mad_offset = IE31200_MAD_DIMM_0_OFFSET; - } - - /* populate DIMM info */ - for (i = 0; i < IE31200_CHANNELS; i++) { - addr_decode = readl(window + mad_offset + - (i * 4)); - edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); - for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { - populate_dimm_info(&dimm_info[i][j], addr_decode, j, - skl); - edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n", - dimm_info[i][j].size, - dimm_info[i][j].dual_rank, - dimm_info[i][j].x16_width); - } - } - + priv->c0errlog = window + cfg->reg_eccerrlog_offset[0]; + priv->c1errlog = window + cfg->reg_eccerrlog_offset[1]; + priv->cfg = cfg; + priv->mci = mci; + priv->pdev = pdev; + device_initialize(&priv->dev); /* - * The dram rank boundary (DRB) reg values are boundary addresses - * for each DRAM rank with a granularity of 64MB. DRB regs are - * cumulative; the last one will contain the total memory - * contained in all ranks. + * The EDAC core uses mci->pdev (pointer to the structure device) + * as the memory controller ID. The SoCs attach one or more memory + * controllers to a single pci_dev (a single pci_dev->dev can + * correspond to multiple memory controllers). + * + * To make mci->pdev unique, assign pci_dev->dev to mci->pdev + * for the first memory controller and assign a unique priv->dev + * to mci->pdev for each additional memory controller. */ - for (i = 0; i < IE31200_DIMMS_PER_CHANNEL; i++) { - for (j = 0; j < IE31200_CHANNELS; j++) { - struct dimm_info *dimm; - unsigned long nr_pages; - - nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl); - if (nr_pages == 0) - continue; - - if (dimm_info[j][i].dual_rank) { - nr_pages = nr_pages / 2; - dimm = edac_get_dimm(mci, (i * 2) + 1, j, 0); - dimm->nr_pages = nr_pages; - edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); - dimm->grain = 8; /* just a guess */ - if (skl) - dimm->mtype = MEM_DDR4; - else - dimm->mtype = MEM_DDR3; - dimm->dtype = DEV_UNKNOWN; - dimm->edac_mode = EDAC_UNKNOWN; - } - dimm = edac_get_dimm(mci, i * 2, j, 0); - dimm->nr_pages = nr_pages; - edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); - dimm->grain = 8; /* same guess */ - if (skl) - dimm->mtype = MEM_DDR4; - else - dimm->mtype = MEM_DDR3; - dimm->dtype = DEV_UNKNOWN; - dimm->edac_mode = EDAC_UNKNOWN; - } - } + mci->pdev = mc ? &priv->dev : &pdev->dev; + ie31200_get_dimm_config(mci, window, cfg, mc); ie31200_clear_error_info(mci); if (edac_mc_add_mc(mci)) { @@ -534,16 +499,115 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) goto fail_unmap; } - /* get this far and it's successful */ - edac_dbg(3, "MC: success\n"); + ie31200_pvt.priv[mc] = priv; return 0; - fail_unmap: iounmap(window); - fail_free: edac_mc_free(mci); + return ret; +} + +static void mce_check(struct mce *mce) +{ + struct ie31200_priv *priv; + int i; + + for (i = 0; i < IE31200_IMC_NUM; i++) { + priv = ie31200_pvt.priv[i]; + if (!priv) + continue; + + __ie31200_check(priv->mci, mce); + } +} + +static int mce_handler(struct notifier_block *nb, unsigned long val, void *data) +{ + struct mce *mce = (struct mce *)data; + char *type; + + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + + /* + * Ignore unless this is a memory related error. + * Don't check MCI_STATUS_ADDRV since it's not set on some CPUs. + */ + if ((mce->status & 0xefff) >> 7 != 1) + return NOTIFY_DONE; + + type = mce->mcgstatus & MCG_STATUS_MCIP ? "Exception" : "Event"; + + edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n", + mce->extcpu, type, mce->mcgstatus, + mce->bank, mce->status); + edac_dbg(0, "TSC 0x%llx\n", mce->tsc); + edac_dbg(0, "ADDR 0x%llx\n", mce->addr); + edac_dbg(0, "MISC 0x%llx\n", mce->misc); + edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n", + mce->cpuvendor, mce->cpuid, mce->time, + mce->socketid, mce->apicid); + + mce_check(mce); + mce->kflags |= MCE_HANDLED_EDAC; + + return NOTIFY_DONE; +} + +static struct notifier_block ie31200_mce_dec = { + .notifier_call = mce_handler, + .priority = MCE_PRIO_EDAC, +}; + +static void ie31200_unregister_mcis(void) +{ + struct ie31200_priv *priv; + struct mem_ctl_info *mci; + int i; + + for (i = 0; i < IE31200_IMC_NUM; i++) { + priv = ie31200_pvt.priv[i]; + if (!priv) + continue; + mci = priv->mci; + edac_mc_del_mc(mci->pdev); + iounmap(priv->window); + edac_mc_free(mci); + } +} + +static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) +{ + int i, ret; + + edac_dbg(0, "MC:\n"); + + if (!ecc_capable(pdev)) { + ie31200_printk(KERN_INFO, "No ECC support\n"); + return -ENODEV; + } + + for (i = 0; i < cfg->imc_num; i++) { + ret = ie31200_register_mci(pdev, cfg, i); + if (ret) + goto fail_register; + } + + if (cfg->cmci) { + mce_register_decode_chain(&ie31200_mce_dec); + edac_op_state = EDAC_OPSTATE_INT; + } else { + edac_op_state = EDAC_OPSTATE_POLL; + } + + /* get this far and it's successful. */ + edac_dbg(3, "MC: success\n"); + return 0; + +fail_register: + ie31200_unregister_mcis(); return ret; } @@ -555,7 +619,7 @@ static int ie31200_init_one(struct pci_dev *pdev, edac_dbg(0, "MC:\n"); if (pci_enable_device(pdev) < 0) return -EIO; - rc = ie31200_probe1(pdev, ent->driver_data); + rc = ie31200_probe1(pdev, (struct res_config *)ent->driver_data); if (rc == 0 && !mci_pdev) mci_pdev = pci_dev_get(pdev); @@ -564,43 +628,112 @@ static int ie31200_init_one(struct pci_dev *pdev, static void ie31200_remove_one(struct pci_dev *pdev) { - struct mem_ctl_info *mci; - struct ie31200_priv *priv; + struct ie31200_priv *priv = ie31200_pvt.priv[0]; edac_dbg(0, "\n"); pci_dev_put(mci_pdev); mci_pdev = NULL; - mci = edac_mc_del_mc(&pdev->dev); - if (!mci) - return; - priv = mci->pvt_info; - iounmap(priv->window); - edac_mc_free(mci); + if (priv->cfg->cmci) + mce_unregister_decode_chain(&ie31200_mce_dec); + ie31200_unregister_mcis(); } +static struct res_config snb_cfg = { + .mtype = MEM_DDR3, + .imc_num = 1, + .reg_mchbar_mask = GENMASK_ULL(38, 15), + .reg_mchbar_window_size = BIT_ULL(15), + .reg_eccerrlog_offset[0] = 0x40c8, + .reg_eccerrlog_offset[1] = 0x44c8, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ue_mask = BIT_ULL(1), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_size_granularity = BIT_ULL(28), + .reg_mad_dimm_offset[0] = 0x5004, + .reg_mad_dimm_offset[1] = 0x5008, + .reg_mad_dimm_size_mask[0] = GENMASK(7, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(15, 8), + .reg_mad_dimm_rank_mask[0] = BIT(17), + .reg_mad_dimm_rank_mask[1] = BIT(18), + .reg_mad_dimm_width_mask[0] = BIT(19), + .reg_mad_dimm_width_mask[1] = BIT(20), +}; + +static struct res_config skl_cfg = { + .mtype = MEM_DDR4, + .imc_num = 1, + .reg_mchbar_mask = GENMASK_ULL(38, 15), + .reg_mchbar_window_size = BIT_ULL(15), + .reg_eccerrlog_offset[0] = 0x4048, + .reg_eccerrlog_offset[1] = 0x4448, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ue_mask = BIT_ULL(1), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_size_granularity = BIT_ULL(30), + .reg_mad_dimm_offset[0] = 0x500c, + .reg_mad_dimm_offset[1] = 0x5010, + .reg_mad_dimm_size_mask[0] = GENMASK(5, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(21, 16), + .reg_mad_dimm_rank_mask[0] = BIT(10), + .reg_mad_dimm_rank_mask[1] = BIT(26), + .reg_mad_dimm_width_mask[0] = GENMASK(9, 8), + .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), +}; + +struct res_config rpl_s_cfg = { + .mtype = MEM_DDR5, + .cmci = true, + .imc_num = 2, + .reg_mchbar_mask = GENMASK_ULL(41, 17), + .reg_mchbar_window_size = BIT_ULL(16), + .reg_eccerrlog_offset[0] = 0xe048, + .reg_eccerrlog_offset[1] = 0xe848, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ce_ovfl_mask = BIT_ULL(1), + .reg_eccerrlog_ue_mask = BIT_ULL(2), + .reg_eccerrlog_ue_ovfl_mask = BIT_ULL(3), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .msr_clear_eccerrlog_offset = 0x791, + .reg_mad_dimm_offset[0] = 0xd80c, + .reg_mad_dimm_offset[1] = 0xd810, + .reg_mad_dimm_size_granularity = BIT_ULL(29), + .reg_mad_dimm_size_mask[0] = GENMASK(6, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(22, 16), + .reg_mad_dimm_rank_mask[0] = GENMASK(10, 9), + .reg_mad_dimm_rank_mask[1] = GENMASK(27, 26), + .reg_mad_dimm_width_mask[0] = GENMASK(8, 7), + .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), +}; + static const struct pci_device_id ie31200_pci_tbl[] = { - { PCI_VEND_DEV(INTEL, IE31200_HB_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_12), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_3), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_4), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_5), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_6), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_7), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_8), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_9), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_10), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_11), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_12), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3), (kernel_ulong_t)&rpl_s_cfg}, { 0, } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl); @@ -617,12 +750,10 @@ static int __init ie31200_init(void) int pci_rc, i; edac_dbg(3, "MC:\n"); - /* Ensure that the OPSTATE is set correctly for POLL or NMI */ - opstate_init(); pci_rc = pci_register_driver(&ie31200_driver); if (pci_rc < 0) - goto fail0; + return pci_rc; if (!mci_pdev) { ie31200_registered = 0; @@ -633,11 +764,13 @@ static int __init ie31200_init(void) if (mci_pdev) break; } + if (!mci_pdev) { edac_dbg(0, "ie31200 pci_get_device fail\n"); pci_rc = -ENODEV; - goto fail1; + goto fail0; } + pci_rc = ie31200_init_one(mci_pdev, &ie31200_pci_tbl[i]); if (pci_rc < 0) { edac_dbg(0, "ie31200 init fail\n"); @@ -645,12 +778,12 @@ static int __init ie31200_init(void) goto fail1; } } - return 0; + return 0; fail1: - pci_unregister_driver(&ie31200_driver); -fail0: pci_dev_put(mci_pdev); +fail0: + pci_unregister_driver(&ie31200_driver); return pci_rc; } diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index fdf3a84fe698..5807517ee32d 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -125,7 +125,7 @@ #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) -static struct res_config { +static const struct res_config { bool machine_check; int num_imc; u32 imc_base; @@ -472,7 +472,7 @@ static u64 rpl_p_err_addr(u64 ecclog) return ECC_ERROR_LOG_ADDR45(ecclog); } -static struct res_config ehl_cfg = { +static const struct res_config ehl_cfg = { .num_imc = 1, .imc_base = 0x5000, .ibecc_base = 0xdc00, @@ -482,7 +482,7 @@ static struct res_config ehl_cfg = { .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, }; -static struct res_config icl_cfg = { +static const struct res_config icl_cfg = { .num_imc = 1, .imc_base = 0x5000, .ibecc_base = 0xd800, @@ -492,7 +492,7 @@ static struct res_config icl_cfg = { .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, }; -static struct res_config tgl_cfg = { +static const struct res_config tgl_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0x5000, @@ -506,7 +506,7 @@ static struct res_config tgl_cfg = { .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, }; -static struct res_config adl_cfg = { +static const struct res_config adl_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -517,7 +517,7 @@ static struct res_config adl_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static struct res_config adl_n_cfg = { +static const struct res_config adl_n_cfg = { .machine_check = true, .num_imc = 1, .imc_base = 0xd800, @@ -528,7 +528,7 @@ static struct res_config adl_n_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static struct res_config rpl_p_cfg = { +static const struct res_config rpl_p_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -540,7 +540,7 @@ static struct res_config rpl_p_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static struct res_config mtl_ps_cfg = { +static const struct res_config mtl_ps_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -551,7 +551,7 @@ static struct res_config mtl_ps_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static struct res_config mtl_p_cfg = { +static const struct res_config mtl_p_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -785,13 +785,22 @@ static u64 ecclog_read_and_clear(struct igen6_imc *imc) { u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET); - if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) { - /* Clear CE/UE bits by writing 1s */ - writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); - return ecclog; - } + /* + * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain + * the invalid value ~0. This will result in a flood of invalid + * error reports in polling mode. Skip it. + */ + if (ecclog == ~0) + return 0; - return 0; + /* Neither a CE nor a UE. Skip it.*/ + if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE))) + return 0; + + /* Clear CE/UE bits by writing 1s */ + writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); + + return ecclog; } static void errsts_clear(struct igen6_imc *imc) @@ -1374,7 +1383,7 @@ static void unregister_err_handler(void) unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); } -static void opstate_set(struct res_config *cfg, const struct pci_device_id *ent) +static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent) { /* * Quirk: Certain SoCs' error reporting interrupts don't work. diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c new file mode 100755 index 000000000000..3b1a845457b0 --- /dev/null +++ b/drivers/edac/mem_repair.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The generic EDAC memory repair driver is designed to control the memory + * devices with memory repair features, such as Post Package Repair (PPR), + * memory sparing etc. The common sysfs memory repair interface abstracts + * the control of various arbitrary memory repair functionalities into a + * unified set of functions. + * + * Copyright (c) 2024-2025 HiSilicon Limited. + */ + +#include + +enum edac_mem_repair_attributes { + MR_TYPE, + MR_PERSIST_MODE, + MR_SAFE_IN_USE, + MR_HPA, + MR_MIN_HPA, + MR_MAX_HPA, + MR_DPA, + MR_MIN_DPA, + MR_MAX_DPA, + MR_NIBBLE_MASK, + MR_BANK_GROUP, + MR_BANK, + MR_RANK, + MR_ROW, + MR_COLUMN, + MR_CHANNEL, + MR_SUB_CHANNEL, + MEM_DO_REPAIR, + MR_MAX_ATTRS +}; + +struct edac_mem_repair_dev_attr { + struct device_attribute dev_attr; + u8 instance; +}; + +struct edac_mem_repair_context { + char name[EDAC_FEAT_NAME_LEN]; + struct edac_mem_repair_dev_attr mem_repair_dev_attr[MR_MAX_ATTRS]; + struct attribute *mem_repair_attrs[MR_MAX_ATTRS + 1]; + struct attribute_group group; +}; + +#define TO_MR_DEV_ATTR(_dev_attr) \ + container_of(_dev_attr, struct edac_mem_repair_dev_attr, dev_attr) + +#define MR_ATTR_SHOW(attrib, cb, type, format) \ +static ssize_t attrib##_show(struct device *ras_feat_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + u8 inst = TO_MR_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_mem_repair_ops *ops = \ + ctx->mem_repair[inst].mem_repair_ops; \ + type data; \ + int ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, \ + &data); \ + if (ret) \ + return ret; \ + \ + return sysfs_emit(buf, format, data); \ +} + +MR_ATTR_SHOW(repair_type, get_repair_type, const char *, "%s\n") +MR_ATTR_SHOW(persist_mode, get_persist_mode, bool, "%u\n") +MR_ATTR_SHOW(repair_safe_when_in_use, get_repair_safe_when_in_use, bool, "%u\n") +MR_ATTR_SHOW(hpa, get_hpa, u64, "0x%llx\n") +MR_ATTR_SHOW(min_hpa, get_min_hpa, u64, "0x%llx\n") +MR_ATTR_SHOW(max_hpa, get_max_hpa, u64, "0x%llx\n") +MR_ATTR_SHOW(dpa, get_dpa, u64, "0x%llx\n") +MR_ATTR_SHOW(min_dpa, get_min_dpa, u64, "0x%llx\n") +MR_ATTR_SHOW(max_dpa, get_max_dpa, u64, "0x%llx\n") +MR_ATTR_SHOW(nibble_mask, get_nibble_mask, u32, "0x%x\n") +MR_ATTR_SHOW(bank_group, get_bank_group, u32, "%u\n") +MR_ATTR_SHOW(bank, get_bank, u32, "%u\n") +MR_ATTR_SHOW(rank, get_rank, u32, "%u\n") +MR_ATTR_SHOW(row, get_row, u32, "0x%x\n") +MR_ATTR_SHOW(column, get_column, u32, "%u\n") +MR_ATTR_SHOW(channel, get_channel, u32, "%u\n") +MR_ATTR_SHOW(sub_channel, get_sub_channel, u32, "%u\n") + +#define MR_ATTR_STORE(attrib, cb, type, conv_func) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + u8 inst = TO_MR_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_mem_repair_ops *ops = \ + ctx->mem_repair[inst].mem_repair_ops; \ + type data; \ + int ret; \ + \ + ret = conv_func(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, \ + data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +MR_ATTR_STORE(persist_mode, set_persist_mode, unsigned long, kstrtoul) +MR_ATTR_STORE(hpa, set_hpa, u64, kstrtou64) +MR_ATTR_STORE(dpa, set_dpa, u64, kstrtou64) +MR_ATTR_STORE(nibble_mask, set_nibble_mask, unsigned long, kstrtoul) +MR_ATTR_STORE(bank_group, set_bank_group, unsigned long, kstrtoul) +MR_ATTR_STORE(bank, set_bank, unsigned long, kstrtoul) +MR_ATTR_STORE(rank, set_rank, unsigned long, kstrtoul) +MR_ATTR_STORE(row, set_row, unsigned long, kstrtoul) +MR_ATTR_STORE(column, set_column, unsigned long, kstrtoul) +MR_ATTR_STORE(channel, set_channel, unsigned long, kstrtoul) +MR_ATTR_STORE(sub_channel, set_sub_channel, unsigned long, kstrtoul) + +#define MR_DO_OP(attrib, cb) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + u8 inst = TO_MR_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_mem_repair_ops *ops = ctx->mem_repair[inst].mem_repair_ops; \ + unsigned long data; \ + int ret; \ + \ + ret = kstrtoul(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +MR_DO_OP(repair, do_repair) + +static umode_t mem_repair_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id) +{ + struct device *ras_feat_dev = kobj_to_dev(kobj); + struct device_attribute *dev_attr = container_of(a, struct device_attribute, attr); + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); + u8 inst = TO_MR_DEV_ATTR(dev_attr)->instance; + const struct edac_mem_repair_ops *ops = ctx->mem_repair[inst].mem_repair_ops; + + switch (attr_id) { + case MR_TYPE: + if (ops->get_repair_type) + return a->mode; + break; + case MR_PERSIST_MODE: + if (ops->get_persist_mode) { + if (ops->set_persist_mode) + return a->mode; + else + return 0444; + } + break; + case MR_SAFE_IN_USE: + if (ops->get_repair_safe_when_in_use) + return a->mode; + break; + case MR_HPA: + if (ops->get_hpa) { + if (ops->set_hpa) + return a->mode; + else + return 0444; + } + break; + case MR_MIN_HPA: + if (ops->get_min_hpa) + return a->mode; + break; + case MR_MAX_HPA: + if (ops->get_max_hpa) + return a->mode; + break; + case MR_DPA: + if (ops->get_dpa) { + if (ops->set_dpa) + return a->mode; + else + return 0444; + } + break; + case MR_MIN_DPA: + if (ops->get_min_dpa) + return a->mode; + break; + case MR_MAX_DPA: + if (ops->get_max_dpa) + return a->mode; + break; + case MR_NIBBLE_MASK: + if (ops->get_nibble_mask) { + if (ops->set_nibble_mask) + return a->mode; + else + return 0444; + } + break; + case MR_BANK_GROUP: + if (ops->get_bank_group) { + if (ops->set_bank_group) + return a->mode; + else + return 0444; + } + break; + case MR_BANK: + if (ops->get_bank) { + if (ops->set_bank) + return a->mode; + else + return 0444; + } + break; + case MR_RANK: + if (ops->get_rank) { + if (ops->set_rank) + return a->mode; + else + return 0444; + } + break; + case MR_ROW: + if (ops->get_row) { + if (ops->set_row) + return a->mode; + else + return 0444; + } + break; + case MR_COLUMN: + if (ops->get_column) { + if (ops->set_column) + return a->mode; + else + return 0444; + } + break; + case MR_CHANNEL: + if (ops->get_channel) { + if (ops->set_channel) + return a->mode; + else + return 0444; + } + break; + case MR_SUB_CHANNEL: + if (ops->get_sub_channel) { + if (ops->set_sub_channel) + return a->mode; + else + return 0444; + } + break; + case MEM_DO_REPAIR: + if (ops->do_repair) + return a->mode; + break; + default: + break; + } + + return 0; +} + +#define MR_ATTR_RO(_name, _instance) \ + ((struct edac_mem_repair_dev_attr) { .dev_attr = __ATTR_RO(_name), \ + .instance = _instance }) + +#define MR_ATTR_WO(_name, _instance) \ + ((struct edac_mem_repair_dev_attr) { .dev_attr = __ATTR_WO(_name), \ + .instance = _instance }) + +#define MR_ATTR_RW(_name, _instance) \ + ((struct edac_mem_repair_dev_attr) { .dev_attr = __ATTR_RW(_name), \ + .instance = _instance }) + +static int mem_repair_create_desc(struct device *dev, + const struct attribute_group **attr_groups, + u8 instance) +{ + struct edac_mem_repair_context *ctx; + struct attribute_group *group; + int i; + struct edac_mem_repair_dev_attr dev_attr[] = { + [MR_TYPE] = MR_ATTR_RO(repair_type, instance), + [MR_PERSIST_MODE] = MR_ATTR_RW(persist_mode, instance), + [MR_SAFE_IN_USE] = MR_ATTR_RO(repair_safe_when_in_use, instance), + [MR_HPA] = MR_ATTR_RW(hpa, instance), + [MR_MIN_HPA] = MR_ATTR_RO(min_hpa, instance), + [MR_MAX_HPA] = MR_ATTR_RO(max_hpa, instance), + [MR_DPA] = MR_ATTR_RW(dpa, instance), + [MR_MIN_DPA] = MR_ATTR_RO(min_dpa, instance), + [MR_MAX_DPA] = MR_ATTR_RO(max_dpa, instance), + [MR_NIBBLE_MASK] = MR_ATTR_RW(nibble_mask, instance), + [MR_BANK_GROUP] = MR_ATTR_RW(bank_group, instance), + [MR_BANK] = MR_ATTR_RW(bank, instance), + [MR_RANK] = MR_ATTR_RW(rank, instance), + [MR_ROW] = MR_ATTR_RW(row, instance), + [MR_COLUMN] = MR_ATTR_RW(column, instance), + [MR_CHANNEL] = MR_ATTR_RW(channel, instance), + [MR_SUB_CHANNEL] = MR_ATTR_RW(sub_channel, instance), + [MEM_DO_REPAIR] = MR_ATTR_WO(repair, instance) + }; + + ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + for (i = 0; i < MR_MAX_ATTRS; i++) { + memcpy(&ctx->mem_repair_dev_attr[i], + &dev_attr[i], sizeof(dev_attr[i])); + ctx->mem_repair_attrs[i] = + &ctx->mem_repair_dev_attr[i].dev_attr.attr; + } + + sprintf(ctx->name, "%s%d", "mem_repair", instance); + group = &ctx->group; + group->name = ctx->name; + group->attrs = ctx->mem_repair_attrs; + group->is_visible = mem_repair_attr_visible; + attr_groups[0] = group; + + return 0; +} + +/** + * edac_mem_repair_get_desc - get EDAC memory repair descriptors + * @dev: client device with memory repair feature + * @attr_groups: pointer to attribute group container + * @instance: device's memory repair instance number. + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + */ +int edac_mem_repair_get_desc(struct device *dev, + const struct attribute_group **attr_groups, u8 instance) +{ + if (!dev || !attr_groups) + return -EINVAL; + + return mem_repair_create_desc(dev, attr_groups, instance); +} diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index f93f2f2b1cf2..af14c8a3279f 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -372,7 +372,7 @@ static int gen_asym_mask(struct b_cr_slice_channel_hash *p, struct b_cr_asym_mem_region1_mchbar *as1, struct b_cr_asym_2way_mem_region_mchbar *as2way) { - const int intlv[] = { 0x5, 0xA, 0x3, 0xC }; + static const int intlv[] = { 0x5, 0xA, 0x3, 0xC }; int mask = 0; if (as2way->asym_2way_interleave_enable) @@ -489,7 +489,7 @@ static int dnv_get_registers(void) */ static int get_registers(void) { - const int intlv[] = { 10, 11, 12, 12 }; + static const int intlv[] = { 10, 11, 12, 12 }; if (RD_REG(&tolud, b_cr_tolud_pci) || RD_REG(&touud_lo, b_cr_touud_lo_pci) || diff --git a/drivers/edac/scrub.c b/drivers/edac/scrub.c new file mode 100755 index 000000000000..e421d3ebd959 --- /dev/null +++ b/drivers/edac/scrub.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The generic EDAC scrub driver controls the memory scrubbers in the + * system. The common sysfs scrub interface abstracts the control of + * various arbitrary scrubbing functionalities into a unified set of + * functions. + * + * Copyright (c) 2024-2025 HiSilicon Limited. + */ + +#include + +enum edac_scrub_attributes { + SCRUB_ADDRESS, + SCRUB_SIZE, + SCRUB_ENABLE_BACKGROUND, + SCRUB_MIN_CYCLE_DURATION, + SCRUB_MAX_CYCLE_DURATION, + SCRUB_CUR_CYCLE_DURATION, + SCRUB_MAX_ATTRS +}; + +struct edac_scrub_dev_attr { + struct device_attribute dev_attr; + u8 instance; +}; + +struct edac_scrub_context { + char name[EDAC_FEAT_NAME_LEN]; + struct edac_scrub_dev_attr scrub_dev_attr[SCRUB_MAX_ATTRS]; + struct attribute *scrub_attrs[SCRUB_MAX_ATTRS + 1]; + struct attribute_group group; +}; + +#define TO_SCRUB_DEV_ATTR(_dev_attr) \ + container_of(_dev_attr, struct edac_scrub_dev_attr, dev_attr) + +#define EDAC_SCRUB_ATTR_SHOW(attrib, cb, type, format) \ +static ssize_t attrib##_show(struct device *ras_feat_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + u8 inst = TO_SCRUB_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; \ + type data; \ + int ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->scrub[inst].private, &data); \ + if (ret) \ + return ret; \ + \ + return sysfs_emit(buf, format, data); \ +} + +EDAC_SCRUB_ATTR_SHOW(addr, read_addr, u64, "0x%llx\n") +EDAC_SCRUB_ATTR_SHOW(size, read_size, u64, "0x%llx\n") +EDAC_SCRUB_ATTR_SHOW(enable_background, get_enabled_bg, bool, "%u\n") +EDAC_SCRUB_ATTR_SHOW(min_cycle_duration, get_min_cycle, u32, "%u\n") +EDAC_SCRUB_ATTR_SHOW(max_cycle_duration, get_max_cycle, u32, "%u\n") +EDAC_SCRUB_ATTR_SHOW(current_cycle_duration, get_cycle_duration, u32, "%u\n") + +#define EDAC_SCRUB_ATTR_STORE(attrib, cb, type, conv_func) \ +static ssize_t attrib##_store(struct device *ras_feat_dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + u8 inst = TO_SCRUB_DEV_ATTR(attr)->instance; \ + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \ + const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; \ + type data; \ + int ret; \ + \ + ret = conv_func(buf, 0, &data); \ + if (ret < 0) \ + return ret; \ + \ + ret = ops->cb(ras_feat_dev->parent, ctx->scrub[inst].private, data); \ + if (ret) \ + return ret; \ + \ + return len; \ +} + +EDAC_SCRUB_ATTR_STORE(addr, write_addr, u64, kstrtou64) +EDAC_SCRUB_ATTR_STORE(size, write_size, u64, kstrtou64) +EDAC_SCRUB_ATTR_STORE(enable_background, set_enabled_bg, unsigned long, kstrtoul) +EDAC_SCRUB_ATTR_STORE(current_cycle_duration, set_cycle_duration, unsigned long, kstrtoul) + +static umode_t scrub_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id) +{ + struct device *ras_feat_dev = kobj_to_dev(kobj); + struct device_attribute *dev_attr = container_of(a, struct device_attribute, attr); + u8 inst = TO_SCRUB_DEV_ATTR(dev_attr)->instance; + struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); + const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; + + switch (attr_id) { + case SCRUB_ADDRESS: + if (ops->read_addr) { + if (ops->write_addr) + return a->mode; + else + return 0444; + } + break; + case SCRUB_SIZE: + if (ops->read_size) { + if (ops->write_size) + return a->mode; + else + return 0444; + } + break; + case SCRUB_ENABLE_BACKGROUND: + if (ops->get_enabled_bg) { + if (ops->set_enabled_bg) + return a->mode; + else + return 0444; + } + break; + case SCRUB_MIN_CYCLE_DURATION: + if (ops->get_min_cycle) + return a->mode; + break; + case SCRUB_MAX_CYCLE_DURATION: + if (ops->get_max_cycle) + return a->mode; + break; + case SCRUB_CUR_CYCLE_DURATION: + if (ops->get_cycle_duration) { + if (ops->set_cycle_duration) + return a->mode; + else + return 0444; + } + break; + default: + break; + } + + return 0; +} + +#define EDAC_SCRUB_ATTR_RO(_name, _instance) \ + ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_RO(_name), \ + .instance = _instance }) + +#define EDAC_SCRUB_ATTR_WO(_name, _instance) \ + ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_WO(_name), \ + .instance = _instance }) + +#define EDAC_SCRUB_ATTR_RW(_name, _instance) \ + ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_RW(_name), \ + .instance = _instance }) + +static int scrub_create_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, u8 instance) +{ + struct edac_scrub_context *scrub_ctx; + struct attribute_group *group; + int i; + struct edac_scrub_dev_attr dev_attr[] = { + [SCRUB_ADDRESS] = EDAC_SCRUB_ATTR_RW(addr, instance), + [SCRUB_SIZE] = EDAC_SCRUB_ATTR_RW(size, instance), + [SCRUB_ENABLE_BACKGROUND] = EDAC_SCRUB_ATTR_RW(enable_background, instance), + [SCRUB_MIN_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RO(min_cycle_duration, instance), + [SCRUB_MAX_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RO(max_cycle_duration, instance), + [SCRUB_CUR_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RW(current_cycle_duration, instance) + }; + + scrub_ctx = devm_kzalloc(scrub_dev, sizeof(*scrub_ctx), GFP_KERNEL); + if (!scrub_ctx) + return -ENOMEM; + + group = &scrub_ctx->group; + for (i = 0; i < SCRUB_MAX_ATTRS; i++) { + memcpy(&scrub_ctx->scrub_dev_attr[i], &dev_attr[i], sizeof(dev_attr[i])); + scrub_ctx->scrub_attrs[i] = &scrub_ctx->scrub_dev_attr[i].dev_attr.attr; + } + sprintf(scrub_ctx->name, "%s%d", "scrub", instance); + group->name = scrub_ctx->name; + group->attrs = scrub_ctx->scrub_attrs; + group->is_visible = scrub_attr_visible; + + attr_groups[0] = group; + + return 0; +} + +/** + * edac_scrub_get_desc - get EDAC scrub descriptors + * @scrub_dev: client device, with scrub support + * @attr_groups: pointer to attribute group container + * @instance: device's scrub instance number. + * + * Return: + * * %0 - Success. + * * %-EINVAL - Invalid parameters passed. + * * %-ENOMEM - Dynamic memory allocation failed. + */ +int edac_scrub_get_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, u8 instance) +{ + if (!scrub_dev || !attr_groups) + return -EINVAL; + + return scrub_create_desc(scrub_dev, attr_groups, instance); +} diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index f7bd930e058f..fa5b442b1844 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -121,6 +121,35 @@ void skx_adxl_put(void) } EXPORT_SYMBOL_GPL(skx_adxl_put); +static void skx_init_mc_mapping(struct skx_dev *d) +{ + /* + * By default, the BIOS presents all memory controllers within each + * socket to the EDAC driver. The physical indices are the same as + * the logical indices of the memory controllers enumerated by the + * EDAC driver. + */ + for (int i = 0; i < NUM_IMC; i++) + d->mc_mapping[i] = i; +} + +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc) +{ + edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, lmc); + + d->mc_mapping[pmc] = lmc; +} +EXPORT_SYMBOL_GPL(skx_set_mc_mapping); + +static u8 skx_get_mc_mapping(struct skx_dev *d, u8 pmc) +{ + edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n", + pmc, d->mc_mapping[pmc]); + + return d->mc_mapping[pmc]; +} + static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) { struct skx_dev *d; @@ -188,6 +217,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) return false; } + res->imc = skx_get_mc_mapping(d, res->imc); + for (i = 0; i < adxl_component_count; i++) { if (adxl_values[i] == ~0x0ull) continue; @@ -326,6 +357,8 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) d->bus[0], d->bus[1], d->bus[2], d->bus[3]); list_add_tail(&d->list, &dev_edac_list); prev = pdev; + + skx_init_mc_mapping(d); } if (list) diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index b0845bdd4516..ca5408803f87 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -93,6 +93,16 @@ struct skx_dev { struct pci_dev *uracu; /* for i10nm CPU */ struct pci_dev *pcu_cr3; /* for HBM memory detection */ u32 mcroute; + /* + * Some server BIOS may hide certain memory controllers, and the + * EDAC driver skips those hidden memory controllers. However, the + * ADXL still decodes memory error address using physical memory + * controller indices. The mapping table is used to convert the + * physical indices (reported by ADXL) to the logical indices + * (used the EDAC driver) of present memory controllers during the + * error handling process. + */ + u8 mc_mapping[NUM_IMC]; struct skx_imc { struct mem_ctl_info *mci; struct pci_dev *mdev; /* for i10nm CPU */ @@ -242,6 +252,7 @@ void skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); void skx_set_mem_cfg(bool mem_cfg_2lm); void skx_set_res_cfg(struct res_config *cfg); +void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c index 699c7d29d80c..9955396c9a52 100644 --- a/drivers/edac/xgene_edac.c +++ b/drivers/edac/xgene_edac.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "edac_module.h" @@ -1407,7 +1408,7 @@ static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev) dev_err(edac_dev->dev, "Multiple XGIC write size error\n"); info = readl(ctx->dev_csr + XGICTRANSERRREQINFO); dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n", - info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info), + str_read_write(info & REQTYPE_MASK), ERRADDR_RD(info), info); writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS); @@ -1489,19 +1490,19 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev) if (reg & AGENT_OFFLINE_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s access to offline agent error\n", - write ? "write" : "read"); + str_write_read(write)); if (reg & UNIMPL_RBPAGE_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s access to unimplemented page error\n", - write ? "write" : "read"); + str_write_read(write)); if (reg & WORD_ALIGNED_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s word aligned access error\n", - write ? "write" : "read"); + str_write_read(write)); if (reg & PAGE_ACCESS_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s to page out of range access error\n", - write ? "write" : "read"); + str_write_read(write)); if (regmap_write(ctx->edac->rb_map, RBEIR, 0)) return; if (regmap_write(ctx->edac->rb_map, RBCSR, 0)) @@ -1560,7 +1561,7 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev) err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL); err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH); dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n", - REQTYPE_F2_RD(err_addr_hi) ? "read" : "write", + str_read_write(REQTYPE_F2_RD(err_addr_hi)), ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi); if (reg & WRERR_RESP_MASK) dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n", @@ -1611,7 +1612,7 @@ static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev) dev_err(edac_dev->dev, "%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n", reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "", - REQTYPE_RD(err_addr_hi) ? "read" : "write", + str_read_write(REQTYPE_RD(err_addr_hi)), ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi); writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS); @@ -1625,7 +1626,7 @@ static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev) dev_err(edac_dev->dev, "%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n", reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "", - REQTYPE_RD(err_addr_hi) ? "read" : "write", + str_read_write(REQTYPE_RD(err_addr_hi)), ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi); writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS); } diff --git a/include/linux/edac.h b/include/linux/edac.h index b4ee8961e623..451f9c152c99 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -661,4 +661,219 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci, return mci->dimms[index]; } + +#define EDAC_FEAT_NAME_LEN 128 + +/* RAS feature type */ +enum edac_dev_feat { + RAS_FEAT_SCRUB, + RAS_FEAT_ECS, + RAS_FEAT_MEM_REPAIR, + RAS_FEAT_MAX +}; + +/** + * struct edac_scrub_ops - scrub device operations (all elements optional) + * @read_addr: read base address of scrubbing range. + * @read_size: read offset of scrubbing range. + * @write_addr: set base address of the scrubbing range. + * @write_size: set offset of the scrubbing range. + * @get_enabled_bg: check if currently performing background scrub. + * @set_enabled_bg: start or stop a bg-scrub. + * @get_min_cycle: get minimum supported scrub cycle duration in seconds. + * @get_max_cycle: get maximum supported scrub cycle duration in seconds. + * @get_cycle_duration: get current scrub cycle duration in seconds. + * @set_cycle_duration: set current scrub cycle duration in seconds. + */ +struct edac_scrub_ops { + int (*read_addr)(struct device *dev, void *drv_data, u64 *base); + int (*read_size)(struct device *dev, void *drv_data, u64 *size); + int (*write_addr)(struct device *dev, void *drv_data, u64 base); + int (*write_size)(struct device *dev, void *drv_data, u64 size); + int (*get_enabled_bg)(struct device *dev, void *drv_data, bool *enable); + int (*set_enabled_bg)(struct device *dev, void *drv_data, bool enable); + int (*get_min_cycle)(struct device *dev, void *drv_data, u32 *min); + int (*get_max_cycle)(struct device *dev, void *drv_data, u32 *max); + int (*get_cycle_duration)(struct device *dev, void *drv_data, u32 *cycle); + int (*set_cycle_duration)(struct device *dev, void *drv_data, u32 cycle); +}; + +#if IS_ENABLED(CONFIG_EDAC_SCRUB) +int edac_scrub_get_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, + u8 instance); +#else +static inline int edac_scrub_get_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, + u8 instance) +{ return -EOPNOTSUPP; } +#endif /* CONFIG_EDAC_SCRUB */ + +/** + * struct edac_ecs_ops - ECS device operations (all elements optional) + * @get_log_entry_type: read the log entry type value. + * @set_log_entry_type: set the log entry type value. + * @get_mode: read the mode value. + * @set_mode: set the mode value. + * @reset: reset the ECS counter. + * @get_threshold: read the threshold count per gigabits of memory cells. + * @set_threshold: set the threshold count per gigabits of memory cells. + */ +struct edac_ecs_ops { + int (*get_log_entry_type)(struct device *dev, void *drv_data, int fru_id, u32 *val); + int (*set_log_entry_type)(struct device *dev, void *drv_data, int fru_id, u32 val); + int (*get_mode)(struct device *dev, void *drv_data, int fru_id, u32 *val); + int (*set_mode)(struct device *dev, void *drv_data, int fru_id, u32 val); + int (*reset)(struct device *dev, void *drv_data, int fru_id, u32 val); + int (*get_threshold)(struct device *dev, void *drv_data, int fru_id, u32 *threshold); + int (*set_threshold)(struct device *dev, void *drv_data, int fru_id, u32 threshold); +}; + +struct edac_ecs_ex_info { + u16 num_media_frus; +}; + +#if IS_ENABLED(CONFIG_EDAC_ECS) +int edac_ecs_get_desc(struct device *ecs_dev, + const struct attribute_group **attr_groups, + u16 num_media_frus); +#else +static inline int edac_ecs_get_desc(struct device *ecs_dev, + const struct attribute_group **attr_groups, + u16 num_media_frus) +{ return -EOPNOTSUPP; } +#endif /* CONFIG_EDAC_ECS */ + +enum edac_mem_repair_type { + EDAC_REPAIR_MAX +}; + +enum edac_mem_repair_cmd { + EDAC_DO_MEM_REPAIR = 1, +}; + +/** + * struct edac_mem_repair_ops - memory repair operations + * (all elements are optional except do_repair, set_hpa/set_dpa) + * @get_repair_type: get the memory repair type, listed in + * enum edac_mem_repair_function. + * @get_persist_mode: get the current persist mode. + * false - Soft repair type (temporary repair). + * true - Hard memory repair type (permanent repair). + * @set_persist_mode: set the persist mode of the memory repair instance. + * @get_repair_safe_when_in_use: get whether memory media is accessible and + * data is retained during repair operation. + * @get_hpa: get current host physical address (HPA) of memory to repair. + * @set_hpa: set host physical address (HPA) of memory to repair. + * @get_min_hpa: get the minimum supported host physical address (HPA). + * @get_max_hpa: get the maximum supported host physical address (HPA). + * @get_dpa: get current device physical address (DPA) of memory to repair. + * @set_dpa: set device physical address (DPA) of memory to repair. + * In some states of system configuration (e.g. before address decoders + * have been configured), memory devices (e.g. CXL) may not have an active + * mapping in the host physical address map. As such, the memory + * to repair must be identified by a device specific physical addressing + * scheme using a device physical address(DPA). The DPA and other control + * attributes to use for the repair operations will be presented in related + * error records. + * @get_min_dpa: get the minimum supported device physical address (DPA). + * @get_max_dpa: get the maximum supported device physical address (DPA). + * @get_nibble_mask: get current nibble mask of memory to repair. + * @set_nibble_mask: set nibble mask of memory to repair. + * @get_bank_group: get current bank group of memory to repair. + * @set_bank_group: set bank group of memory to repair. + * @get_bank: get current bank of memory to repair. + * @set_bank: set bank of memory to repair. + * @get_rank: get current rank of memory to repair. + * @set_rank: set rank of memory to repair. + * @get_row: get current row of memory to repair. + * @set_row: set row of memory to repair. + * @get_column: get current column of memory to repair. + * @set_column: set column of memory to repair. + * @get_channel: get current channel of memory to repair. + * @set_channel: set channel of memory to repair. + * @get_sub_channel: get current subchannel of memory to repair. + * @set_sub_channel: set subchannel of memory to repair. + * @do_repair: Issue memory repair operation for the HPA/DPA and + * other control attributes set for the memory to repair. + * + * All elements are optional except do_repair and at least one of set_hpa/set_dpa. + */ +struct edac_mem_repair_ops { + int (*get_repair_type)(struct device *dev, void *drv_data, const char **type); + int (*get_persist_mode)(struct device *dev, void *drv_data, bool *persist); + int (*set_persist_mode)(struct device *dev, void *drv_data, bool persist); + int (*get_repair_safe_when_in_use)(struct device *dev, void *drv_data, bool *safe); + int (*get_hpa)(struct device *dev, void *drv_data, u64 *hpa); + int (*set_hpa)(struct device *dev, void *drv_data, u64 hpa); + int (*get_min_hpa)(struct device *dev, void *drv_data, u64 *hpa); + int (*get_max_hpa)(struct device *dev, void *drv_data, u64 *hpa); + int (*get_dpa)(struct device *dev, void *drv_data, u64 *dpa); + int (*set_dpa)(struct device *dev, void *drv_data, u64 dpa); + int (*get_min_dpa)(struct device *dev, void *drv_data, u64 *dpa); + int (*get_max_dpa)(struct device *dev, void *drv_data, u64 *dpa); + int (*get_nibble_mask)(struct device *dev, void *drv_data, u32 *val); + int (*set_nibble_mask)(struct device *dev, void *drv_data, u32 val); + int (*get_bank_group)(struct device *dev, void *drv_data, u32 *val); + int (*set_bank_group)(struct device *dev, void *drv_data, u32 val); + int (*get_bank)(struct device *dev, void *drv_data, u32 *val); + int (*set_bank)(struct device *dev, void *drv_data, u32 val); + int (*get_rank)(struct device *dev, void *drv_data, u32 *val); + int (*set_rank)(struct device *dev, void *drv_data, u32 val); + int (*get_row)(struct device *dev, void *drv_data, u32 *val); + int (*set_row)(struct device *dev, void *drv_data, u32 val); + int (*get_column)(struct device *dev, void *drv_data, u32 *val); + int (*set_column)(struct device *dev, void *drv_data, u32 val); + int (*get_channel)(struct device *dev, void *drv_data, u32 *val); + int (*set_channel)(struct device *dev, void *drv_data, u32 val); + int (*get_sub_channel)(struct device *dev, void *drv_data, u32 *val); + int (*set_sub_channel)(struct device *dev, void *drv_data, u32 val); + int (*do_repair)(struct device *dev, void *drv_data, u32 val); +}; + +#if IS_ENABLED(CONFIG_EDAC_MEM_REPAIR) +int edac_mem_repair_get_desc(struct device *dev, + const struct attribute_group **attr_groups, + u8 instance); +#else +static inline int edac_mem_repair_get_desc(struct device *dev, + const struct attribute_group **attr_groups, + u8 instance) +{ return -EOPNOTSUPP; } +#endif /* CONFIG_EDAC_MEM_REPAIR */ + +/* EDAC device feature information structure */ +struct edac_dev_data { + union { + const struct edac_scrub_ops *scrub_ops; + const struct edac_ecs_ops *ecs_ops; + const struct edac_mem_repair_ops *mem_repair_ops; + }; + u8 instance; + void *private; +}; + +struct edac_dev_feat_ctx { + struct device dev; + void *private; + struct edac_dev_data *scrub; + struct edac_dev_data ecs; + struct edac_dev_data *mem_repair; +}; + +struct edac_dev_feature { + enum edac_dev_feat ft_type; + u8 instance; + union { + const struct edac_scrub_ops *scrub_ops; + const struct edac_ecs_ops *ecs_ops; + const struct edac_mem_repair_ops *mem_repair_ops; + }; + void *ctx; + struct edac_ecs_ex_info ecs_info; +}; + +int edac_dev_register(struct device *parent, char *dev_name, + void *parent_pvt_data, int num_features, + const struct edac_dev_feature *ras_features); #endif /* _LINUX_EDAC_H_ */