Skip to content

Commit

Permalink
Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/g…
Browse files Browse the repository at this point in the history
…it/lenb/linux-acpi-2.6

* 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6: (27 commits)
  ACPI: Don't let acpi_pad needlessly mark TSC unstable
  drivers/acpi/sleep.h: Checkpatch cleanup
  ACPI: Minor cleanup eliminating redundant PMTIMER_TICKS to NS conversion
  ACPI: delete unused c-state promotion/demotion data strucutures
  ACPI: video: fix acpi_backlight=video
  ACPI: EC: Use kmemdup
  drivers/acpi: use kasprintf
  ACPI, APEI, EINJ injection parameters support
  Add x64 support to debugfs
  ACPI, APEI, Use ERST for persistent storage of MCE
  ACPI, APEI, Error Record Serialization Table (ERST) support
  ACPI, APEI, Generic Hardware Error Source memory error support
  ACPI, APEI, UEFI Common Platform Error Record (CPER) header
  Unified UUID/GUID definition
  ACPI Hardware Error Device (PNP0C33) support
  ACPI, APEI, PCIE AER, use general HEST table parsing in AER firmware_first setup
  ACPI, APEI, Document for APEI
  ACPI, APEI, EINJ support
  ACPI, APEI, HEST table parsing
  ACPI, APEI, APEI supporting infrastructure
  ...
  • Loading branch information
Linus Torvalds committed May 28, 2010
2 parents d372e7f + d3b3833 commit 9a90e09
Show file tree
Hide file tree
Showing 59 changed files with 4,472 additions and 471 deletions.
59 changes: 59 additions & 0 deletions Documentation/acpi/apei/einj.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
APEI Error INJection
~~~~~~~~~~~~~~~~~~~~

EINJ provides a hardware error injection mechanism
It is very useful for debugging and testing of other APEI and RAS features.

To use EINJ, make sure the following are enabled in your kernel
configuration:

CONFIG_DEBUG_FS
CONFIG_ACPI_APEI
CONFIG_ACPI_APEI_EINJ

The user interface of EINJ is debug file system, under the
directory apei/einj. The following files are provided.

- available_error_type
Reading this file returns the error injection capability of the
platform, that is, which error types are supported. The error type
definition is as follow, the left field is the error type value, the
right field is error description.

0x00000001 Processor Correctable
0x00000002 Processor Uncorrectable non-fatal
0x00000004 Processor Uncorrectable fatal
0x00000008 Memory Correctable
0x00000010 Memory Uncorrectable non-fatal
0x00000020 Memory Uncorrectable fatal
0x00000040 PCI Express Correctable
0x00000080 PCI Express Uncorrectable fatal
0x00000100 PCI Express Uncorrectable non-fatal
0x00000200 Platform Correctable
0x00000400 Platform Uncorrectable non-fatal
0x00000800 Platform Uncorrectable fatal

The format of file contents are as above, except there are only the
available error type lines.

- error_type
This file is used to set the error type value. The error type value
is defined in "available_error_type" description.

- error_inject
Write any integer to this file to trigger the error
injection. Before this, please specify all necessary error
parameters.

- param1
This file is used to set the first error parameter value. Effect of
parameter depends on error_type specified. For memory error, this is
physical memory address.

- param2
This file is used to set the second error parameter value. Effect of
parameter depends on error_type specified. For memory error, this is
physical memory address mask.

For more information about EINJ, please refer to ACPI specification
version 4.0, section 17.5.
12 changes: 10 additions & 2 deletions Documentation/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,10 @@ and is between 256 and 4096 characters. It is defined in the file

acpi= [HW,ACPI,X86]
Advanced Configuration and Power Interface
Format: { force | off | ht | strict | noirq | rsdt }
Format: { force | off | strict | noirq | rsdt }
force -- enable ACPI if default was off
off -- disable ACPI if default was on
noirq -- do not use ACPI for IRQ routing
ht -- run only enough ACPI to enable Hyper Threading
strict -- Be less tolerant of platforms that are not
strictly ACPI specification compliant.
rsdt -- prefer RSDT over (default) XSDT
Expand Down Expand Up @@ -758,6 +757,10 @@ and is between 256 and 4096 characters. It is defined in the file
Default value is 0.
Value can be changed at runtime via /selinux/enforce.

erst_disable [ACPI]
Disable Error Record Serialization Table (ERST)
support.

ether= [HW,NET] Ethernet cards parameters
This option is obsoleted by the "netdev=" option, which
has equivalent usage. See its documentation for details.
Expand Down Expand Up @@ -852,6 +855,11 @@ and is between 256 and 4096 characters. It is defined in the file
hd= [EIDE] (E)IDE hard drive subsystem geometry
Format: <cyl>,<head>,<sect>

hest_disable [ACPI]
Disable Hardware Error Source Table (HEST) support;
corresponding firmware-first mode error processing
logic will be disabled.

highmem=nn[KMG] [KNL,BOOT] forces the highmem zone to have an exact
size of <nn>. This works even on boxes that have no
highmem otherwise. This also works to reduce highmem
Expand Down
1 change: 0 additions & 1 deletion arch/ia64/include/asm/acpi.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ ia64_acpi_release_global_lock (unsigned int *lock)
#define acpi_noirq 0 /* ACPI always enabled on IA64 */
#define acpi_pci_disabled 0 /* ACPI PCI always enabled on IA64 */
#define acpi_strict 1 /* no ACPI spec workarounds on IA64 */
#define acpi_ht 0 /* no HT-only mode on IA64 */
#endif
#define acpi_processor_cstate_check(x) (x) /* no idle limits on IA64 :) */
static inline void disable_acpi(void) { }
Expand Down
5 changes: 4 additions & 1 deletion arch/ia64/pci/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,11 @@ pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl)
}

struct pci_bus * __devinit
pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
pci_acpi_scan_root(struct acpi_pci_root *root)
{
struct acpi_device *device = root->device;
int domain = root->segment;
int bus = root->secondary.start;
struct pci_controller *controller;
unsigned int windows = 0;
struct pci_bus *pbus;
Expand Down
2 changes: 0 additions & 2 deletions arch/x86/include/asm/acpi.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ extern int acpi_ioapic;
extern int acpi_noirq;
extern int acpi_strict;
extern int acpi_disabled;
extern int acpi_ht;
extern int acpi_pci_disabled;
extern int acpi_skip_timer_override;
extern int acpi_use_timer_override;
Expand All @@ -97,7 +96,6 @@ void acpi_pic_sci_set_trigger(unsigned int, u16);
static inline void disable_acpi(void)
{
acpi_disabled = 1;
acpi_ht = 0;
acpi_pci_disabled = 1;
acpi_noirq = 1;
}
Expand Down
8 changes: 8 additions & 0 deletions arch/x86/include/asm/mce.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,5 +225,13 @@ extern void mcheck_intel_therm_init(void);
static inline void mcheck_intel_therm_init(void) { }
#endif

/*
* Used by APEI to report memory error via /dev/mcelog
*/

struct cper_sec_mem_err;
extern void apei_mce_report_mem_error(int corrected,
struct cper_sec_mem_err *mem_err);

#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */
19 changes: 3 additions & 16 deletions arch/x86/kernel/acpi/boot.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ EXPORT_SYMBOL(acpi_disabled);
int acpi_noirq; /* skip ACPI IRQ initialization */
int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */
EXPORT_SYMBOL(acpi_pci_disabled);
int acpi_ht __initdata = 1; /* enable HT */

int acpi_lapic;
int acpi_ioapic;
Expand Down Expand Up @@ -1501,9 +1500,8 @@ void __init acpi_boot_table_init(void)

/*
* If acpi_disabled, bail out
* One exception: acpi=ht continues far enough to enumerate LAPICs
*/
if (acpi_disabled && !acpi_ht)
if (acpi_disabled)
return;

/*
Expand Down Expand Up @@ -1534,9 +1532,8 @@ int __init early_acpi_boot_init(void)
{
/*
* If acpi_disabled, bail out
* One exception: acpi=ht continues far enough to enumerate LAPICs
*/
if (acpi_disabled && !acpi_ht)
if (acpi_disabled)
return 1;

/*
Expand All @@ -1554,9 +1551,8 @@ int __init acpi_boot_init(void)

/*
* If acpi_disabled, bail out
* One exception: acpi=ht continues far enough to enumerate LAPICs
*/
if (acpi_disabled && !acpi_ht)
if (acpi_disabled)
return 1;

acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
Expand Down Expand Up @@ -1591,21 +1587,12 @@ static int __init parse_acpi(char *arg)
/* acpi=force to over-ride black-list */
else if (strcmp(arg, "force") == 0) {
acpi_force = 1;
acpi_ht = 1;
acpi_disabled = 0;
}
/* acpi=strict disables out-of-spec workarounds */
else if (strcmp(arg, "strict") == 0) {
acpi_strict = 1;
}
/* Limit ACPI just to boot-time to enable HT */
else if (strcmp(arg, "ht") == 0) {
if (!acpi_force) {
printk(KERN_WARNING "acpi=ht will be removed in Linux-2.6.35\n");
disable_acpi();
}
acpi_ht = 1;
}
/* acpi=rsdt use RSDT instead of XSDT */
else if (strcmp(arg, "rsdt") == 0) {
acpi_rsdt_forced = 1;
Expand Down
2 changes: 0 additions & 2 deletions arch/x86/kernel/acpi/sleep.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,6 @@ static int __init acpi_sleep_setup(char *str)
#endif
if (strncmp(str, "old_ordering", 12) == 0)
acpi_old_suspend_ordering();
if (strncmp(str, "sci_force_enable", 16) == 0)
acpi_set_sci_en_on_resume();
str = strchr(str, ',');
if (str != NULL)
str += strspn(str, ", \t");
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kernel/cpu/mcheck/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o

obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o

obj-$(CONFIG_ACPI_APEI) += mce-apei.o
138 changes: 138 additions & 0 deletions arch/x86/kernel/cpu/mcheck/mce-apei.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
* Bridge between MCE and APEI
*
* On some machine, corrected memory errors are reported via APEI
* generic hardware error source (GHES) instead of corrected Machine
* Check. These corrected memory errors can be reported to user space
* through /dev/mcelog via faking a corrected Machine Check, so that
* the error memory page can be offlined by /sbin/mcelog if the error
* count for one page is beyond the threshold.
*
* For fatal MCE, save MCE record into persistent storage via ERST, so
* that the MCE record can be logged after reboot via ERST.
*
* Copyright 2010 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/

#include <linux/kernel.h>
#include <linux/acpi.h>
#include <linux/cper.h>
#include <acpi/apei.h>
#include <asm/mce.h>

#include "mce-internal.h"

void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
{
struct mce m;

/* Only corrected MC is reported */
if (!corrected)
return;

mce_setup(&m);
m.bank = 1;
/* Fake a memory read corrected error with unknown channel */
m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
m.addr = mem_err->physical_addr;
mce_log(&m);
mce_notify_irq();
}
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);

#define CPER_CREATOR_MCE \
UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
0x64, 0x90, 0xb8, 0x9d)
#define CPER_SECTION_TYPE_MCE \
UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
0x04, 0x4a, 0x38, 0xfc)

/*
* CPER specification (in UEFI specification 2.3 appendix N) requires
* byte-packed.
*/
struct cper_mce_record {
struct cper_record_header hdr;
struct cper_section_descriptor sec_hdr;
struct mce mce;
} __packed;

int apei_write_mce(struct mce *m)
{
struct cper_mce_record rcd;

memset(&rcd, 0, sizeof(rcd));
memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
rcd.hdr.revision = CPER_RECORD_REV;
rcd.hdr.signature_end = CPER_SIG_END;
rcd.hdr.section_count = 1;
rcd.hdr.error_severity = CPER_SER_FATAL;
/* timestamp, platform_id, partition_id are all invalid */
rcd.hdr.validation_bits = 0;
rcd.hdr.record_length = sizeof(rcd);
rcd.hdr.creator_id = CPER_CREATOR_MCE;
rcd.hdr.notification_type = CPER_NOTIFY_MCE;
rcd.hdr.record_id = cper_next_record_id();
rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;

rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
rcd.sec_hdr.section_length = sizeof(rcd.mce);
rcd.sec_hdr.revision = CPER_SEC_REV;
/* fru_id and fru_text is invalid */
rcd.sec_hdr.validation_bits = 0;
rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
rcd.sec_hdr.section_severity = CPER_SER_FATAL;

memcpy(&rcd.mce, m, sizeof(*m));

return erst_write(&rcd.hdr);
}

ssize_t apei_read_mce(struct mce *m, u64 *record_id)
{
struct cper_mce_record rcd;
ssize_t len;

len = erst_read_next(&rcd.hdr, sizeof(rcd));
if (len <= 0)
return len;
/* Can not skip other records in storage via ERST unless clear them */
else if (len != sizeof(rcd) ||
uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
if (printk_ratelimit())
pr_warning(
"MCE-APEI: Can not skip the unknown record in ERST");
return -EIO;
}

memcpy(m, &rcd.mce, sizeof(*m));
*record_id = rcd.hdr.record_id;

return sizeof(*m);
}

/* Check whether there is record in ERST */
int apei_check_mce(void)
{
return erst_get_record_count();
}

int apei_clear_mce(u64 record_id)
{
return erst_clear(record_id);
}
23 changes: 23 additions & 0 deletions arch/x86/kernel/cpu/mcheck/mce-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,26 @@ extern int mce_ser;

extern struct mce_bank *mce_banks;

#ifdef CONFIG_ACPI_APEI
int apei_write_mce(struct mce *m);
ssize_t apei_read_mce(struct mce *m, u64 *record_id);
int apei_check_mce(void);
int apei_clear_mce(u64 record_id);
#else
static inline int apei_write_mce(struct mce *m)
{
return -EINVAL;
}
static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id)
{
return 0;
}
static inline int apei_check_mce(void)
{
return 0;
}
static inline int apei_clear_mce(u64 record_id)
{
return -EINVAL;
}
#endif
Loading

0 comments on commit 9a90e09

Please sign in to comment.