Skip to content

Commit

Permalink
Merge branch 'x86/mce2' into x86/core
Browse files Browse the repository at this point in the history
  • Loading branch information
Ingo Molnar committed Mar 5, 2009
2 parents a1413c8 + 73af76d commit caab36b
Show file tree
Hide file tree
Showing 12 changed files with 709 additions and 172 deletions.
5 changes: 5 additions & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,11 @@ config X86_MCE_AMD
Additional support for AMD specific MCE features such as
the DRAM Error Threshold.

config X86_MCE_THRESHOLD
depends on X86_MCE_AMD || X86_MCE_INTEL
bool
default y

config X86_MCE_NONFATAL
tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
depends on X86_32 && X86_MCE
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/asm/apicdef.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
#define APIC_ESR_SENDILL 0x00020
#define APIC_ESR_RECVILL 0x00040
#define APIC_ESR_ILLREGA 0x00080
#define APIC_LVTCMCI 0x2f0
#define APIC_ICR 0x300
#define APIC_DEST_SELF 0x40000
#define APIC_DEST_ALLINC 0x80000
Expand Down
35 changes: 32 additions & 3 deletions arch/x86/include/asm/mce.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
*/

#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */
#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */

#define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */
#define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */
Expand Down Expand Up @@ -90,14 +92,29 @@ extern int mce_disabled;

#include <asm/atomic.h>

void mce_setup(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct sys_device, device_mce);
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);

/*
* To support more than 128 would need to escape the predefined
* Linux defined extended banks first.
*/
#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)

#ifdef CONFIG_X86_MCE_INTEL
void mce_intel_feature_init(struct cpuinfo_x86 *c);
void cmci_clear(void);
void cmci_reenable(void);
void cmci_rediscover(int dying);
void cmci_recheck(void);
#else
static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
static inline void cmci_clear(void) {}
static inline void cmci_reenable(void) {}
static inline void cmci_rediscover(int dying) {}
static inline void cmci_recheck(void) {}
#endif

#ifdef CONFIG_X86_MCE_AMD
Expand All @@ -106,11 +123,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c);
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
#endif

void mce_log_therm_throt_event(unsigned int cpu, __u64 status);
extern int mce_available(struct cpuinfo_x86 *c);

void mce_log_therm_throt_event(__u64 status);

extern atomic_t mce_entry;

extern void do_machine_check(struct pt_regs *, long);

typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);

enum mcp_flags {
MCP_TIMESTAMP = (1 << 0), /* log time stamp */
MCP_UC = (1 << 1), /* log uncorrected errors */
};
extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);

extern int mce_notify_user(void);

#endif /* !CONFIG_X86_32 */
Expand All @@ -120,8 +149,8 @@ extern void mcheck_init(struct cpuinfo_x86 *c);
#else
#define mcheck_init(c) do { } while (0)
#endif
extern void stop_mce(void);
extern void restart_mce(void);

extern void (*mce_threshold_vector)(void);

#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */
5 changes: 5 additions & 0 deletions arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@
#define MSR_IA32_MC0_ADDR 0x00000402
#define MSR_IA32_MC0_MISC 0x00000403

/* These are consecutive and not in the normal 4er MCE bank block */
#define MSR_IA32_MC0_CTL2 0x00000280
#define CMCI_EN (1ULL << 30)
#define CMCI_THRESHOLD_MASK 0xffffULL

#define MSR_P6_PERFCTR0 0x000000c1
#define MSR_P6_PERFCTR1 0x000000c2
#define MSR_P6_EVNTSEL0 0x00000186
Expand Down
17 changes: 11 additions & 6 deletions arch/x86/kernel/alternative.c
Original file line number Diff line number Diff line change
Expand Up @@ -414,9 +414,17 @@ void __init alternative_instructions(void)
that might execute the to be patched code.
Other CPUs are not running. */
stop_nmi();
#ifdef CONFIG_X86_MCE
stop_mce();
#endif

/*
* Don't stop machine check exceptions while patching.
* MCEs only happen when something got corrupted and in this
* case we must do something about the corruption.
* Ignoring it is worse than a unlikely patching race.
* Also machine checks tend to be broadcast and if one CPU
* goes into machine check the others follow quickly, so we don't
* expect a machine check to cause undue problems during to code
* patching.
*/

apply_alternatives(__alt_instructions, __alt_instructions_end);

Expand Down Expand Up @@ -456,9 +464,6 @@ void __init alternative_instructions(void)
(unsigned long)__smp_locks_end);

restart_nmi();
#ifdef CONFIG_X86_MCE
restart_mce();
#endif
}

/**
Expand Down
15 changes: 15 additions & 0 deletions arch/x86/kernel/apic/apic.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include <asm/idle.h>
#include <asm/mtrr.h>
#include <asm/smp.h>
#include <asm/mce.h>

unsigned int num_processors;

Expand Down Expand Up @@ -842,6 +843,14 @@ void clear_local_APIC(void)
apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
}
#endif
#ifdef CONFIG_X86_MCE_INTEL
if (maxlvt >= 6) {
v = apic_read(APIC_LVTCMCI);
if (!(v & APIC_LVT_MASKED))
apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
}
#endif

/*
* Clean APIC state for other OSs:
*/
Expand Down Expand Up @@ -1241,6 +1250,12 @@ void __cpuinit setup_local_APIC(void)
apic_write(APIC_LVT1, value);

preempt_enable();

#ifdef CONFIG_X86_MCE_INTEL
/* Recheck CMCI information after local APIC is up on CPU #0 */
if (smp_processor_id() == 0)
cmci_recheck();
#endif
}

void __cpuinit end_local_APIC_setup(void)
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/cpu/mcheck/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o
obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
14 changes: 0 additions & 14 deletions arch/x86/kernel/cpu/mcheck/mce_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,20 +60,6 @@ void mcheck_init(struct cpuinfo_x86 *c)
}
}

static unsigned long old_cr4 __initdata;

void __init stop_mce(void)
{
old_cr4 = read_cr4();
clear_in_cr4(X86_CR4_MCE);
}

void __init restart_mce(void)
{
if (old_cr4 & X86_CR4_MCE)
set_in_cr4(X86_CR4_MCE);
}

static int __init mcheck_disable(char *str)
{
mce_disabled = 1;
Expand Down
Loading

0 comments on commit caab36b

Please sign in to comment.