Skip to content

Commit

Permalink
ACPI, APEI, CPER: Add UEFI 2.4 support for memory error
Browse files Browse the repository at this point in the history
In latest UEFI spec(by now it is 2.4) memory error definition
for CPER (UEFI 2.4 Appendix N Common Platform Error Record)
adds some new fields. These fields help people to locate
memory error to an actual DIMM location.

Original-author: Tony Luck <tony.luck@intel.com>
Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
  • Loading branch information
Chen, Gong authored and Tony Luck committed Oct 23, 2013
1 parent dd6dad4 commit 147de14
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 12 deletions.
3 changes: 1 addition & 2 deletions arch/x86/kernel/cpu/mcheck/mce-apei.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
struct mce m;

/* Only corrected MC is reported */
if (!corrected || !(mem_err->validation_bits &
CPER_MEM_VALID_PHYSICAL_ADDRESS))
if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA))
return;

mce_setup(&m);
Expand Down
7 changes: 4 additions & 3 deletions drivers/acpi/apei/cper.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
* various tables, such as ERST, BERT and HEST etc.
*
* For more information about CPER, please refer to Appendix N of UEFI
* Specification version 2.3.
* Specification version 2.4.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
Expand Down Expand Up @@ -191,16 +191,17 @@ static const char *cper_mem_err_type_strs[] = {
"memory sparing",
"scrub corrected error",
"scrub uncorrected error",
"physical memory map-out event",
};

static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
{
if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
if (mem->validation_bits & CPER_MEM_VALID_PA)
printk("%s""physical_address: 0x%016llx\n",
pfx, mem->physical_addr);
if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
printk("%s""physical_address_mask: 0x%016llx\n",
pfx, mem->physical_addr_mask);
if (mem->validation_bits & CPER_MEM_VALID_NODE)
Expand Down
4 changes: 2 additions & 2 deletions drivers/acpi/apei/ghes.c
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)

if (sec_sev == GHES_SEV_CORRECTED &&
(gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) &&
(mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)) {
(mem_err->validation_bits & CPER_MEM_VALID_PA)) {
pfn = mem_err->physical_addr >> PAGE_SHIFT;
if (pfn_valid(pfn))
memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE);
Expand All @@ -430,7 +430,7 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)
}
if (sev == GHES_SEV_RECOVERABLE &&
sec_sev == GHES_SEV_RECOVERABLE &&
mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
mem_err->validation_bits & CPER_MEM_VALID_PA) {
pfn = mem_err->physical_addr >> PAGE_SHIFT;
memory_failure_queue(pfn, 0, 0);
}
Expand Down
5 changes: 2 additions & 3 deletions drivers/edac/ghes_edac.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,15 +297,14 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
}

/* Error address */
if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
if (mem_err->validation_bits & CPER_MEM_VALID_PA) {
e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
}

/* Error grain */
if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) {
if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK)
e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
}

/* Memory error location, mapped on e->location */
p = e->location;
Expand Down
11 changes: 9 additions & 2 deletions include/linux/cper.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ enum {
#define CPER_PROC_VALID_IP 0x1000

#define CPER_MEM_VALID_ERROR_STATUS 0x0001
#define CPER_MEM_VALID_PHYSICAL_ADDRESS 0x0002
#define CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK 0x0004
#define CPER_MEM_VALID_PA 0x0002
#define CPER_MEM_VALID_PA_MASK 0x0004
#define CPER_MEM_VALID_NODE 0x0008
#define CPER_MEM_VALID_CARD 0x0010
#define CPER_MEM_VALID_MODULE 0x0020
Expand All @@ -232,6 +232,9 @@ enum {
#define CPER_MEM_VALID_RESPONDER_ID 0x1000
#define CPER_MEM_VALID_TARGET_ID 0x2000
#define CPER_MEM_VALID_ERROR_TYPE 0x4000
#define CPER_MEM_VALID_RANK_NUMBER 0x8000
#define CPER_MEM_VALID_CARD_HANDLE 0x10000
#define CPER_MEM_VALID_MODULE_HANDLE 0x20000

#define CPER_PCIE_VALID_PORT_TYPE 0x0001
#define CPER_PCIE_VALID_VERSION 0x0002
Expand Down Expand Up @@ -347,6 +350,10 @@ struct cper_sec_mem_err {
__u64 responder_id;
__u64 target_id;
__u8 error_type;
__u8 reserved;
__u16 rank;
__u16 mem_array_handle; /* card handle in UEFI 2.4 */
__u16 mem_dev_handle; /* module handle in UEFI 2.4 */
};

struct cper_sec_pcie {
Expand Down

0 comments on commit 147de14

Please sign in to comment.