Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 148933
b: refs/heads/master
c: 9b1beaf
h: refs/heads/master
i:
  148931: 21054c8
v: v3
  • Loading branch information
Andi Kleen authored and H. Peter Anvin committed Jun 3, 2009
1 parent 24e19e9 commit 7920cac
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 2 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 8fa8dd9e3aafb7b440b7d54219891615abc6390e
refs/heads/master: 9b1beaf2b551a8a1604f104025b24e9c535c8963
1 change: 1 addition & 0 deletions trunk/arch/x86/include/asm/mce.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ enum mcp_flags {
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);

int mce_notify_irq(void);
void mce_notify_process(void);

DECLARE_PER_CPU(struct mce, injectm);
extern struct file_operations mce_chrdev_ops;
Expand Down
133 changes: 133 additions & 0 deletions trunk/arch/x86/kernel/cpu/mcheck/mce.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/fs.h>
#include <linux/mm.h>

#include <asm/processor.h>
#include <asm/hw_irq.h>
Expand Down Expand Up @@ -105,6 +106,8 @@ static inline int skip_bank_init(int i)
return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
}

static DEFINE_PER_CPU(struct work_struct, mce_work);

/* Do initial initialization of a struct mce */
void mce_setup(struct mce *m)
{
Expand Down Expand Up @@ -312,13 +315,77 @@ static void mce_wrmsrl(u32 msr, u64 v)
wrmsrl(msr, v);
}

/*
* Simple lockless ring to communicate PFNs from the exception handler with the
* process context work function. This is vastly simplified because there's
* only a single reader and a single writer.
*/
#define MCE_RING_SIZE 16 /* we use one entry less */

struct mce_ring {
unsigned short start;
unsigned short end;
unsigned long ring[MCE_RING_SIZE];
};
static DEFINE_PER_CPU(struct mce_ring, mce_ring);

/* Runs with CPU affinity in workqueue */
static int mce_ring_empty(void)
{
struct mce_ring *r = &__get_cpu_var(mce_ring);

return r->start == r->end;
}

static int mce_ring_get(unsigned long *pfn)
{
struct mce_ring *r;
int ret = 0;

*pfn = 0;
get_cpu();
r = &__get_cpu_var(mce_ring);
if (r->start == r->end)
goto out;
*pfn = r->ring[r->start];
r->start = (r->start + 1) % MCE_RING_SIZE;
ret = 1;
out:
put_cpu();
return ret;
}

/* Always runs in MCE context with preempt off */
static int mce_ring_add(unsigned long pfn)
{
struct mce_ring *r = &__get_cpu_var(mce_ring);
unsigned next;

next = (r->end + 1) % MCE_RING_SIZE;
if (next == r->start)
return -1;
r->ring[r->end] = pfn;
wmb();
r->end = next;
return 0;
}

int mce_available(struct cpuinfo_x86 *c)
{
if (mce_disabled)
return 0;
return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
}

static void mce_schedule_work(void)
{
if (!mce_ring_empty()) {
struct work_struct *work = &__get_cpu_var(mce_work);
if (!work_pending(work))
schedule_work(work);
}
}

/*
* Get the address of the instruction at the time of the machine check
* error.
Expand Down Expand Up @@ -349,6 +416,7 @@ asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs)
exit_idle();
irq_enter();
mce_notify_irq();
mce_schedule_work();
irq_exit();
}
#endif
Expand All @@ -357,6 +425,13 @@ static void mce_report_event(struct pt_regs *regs)
{
if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
mce_notify_irq();
/*
* Triggering the work queue here is just an insurance
* policy in case the syscall exit notify handler
* doesn't run soon enough or ends up running on the
* wrong CPU (can happen when audit sleeps)
*/
mce_schedule_work();
return;
}

Expand Down Expand Up @@ -731,6 +806,23 @@ static int mce_end(int order)
return ret;
}

/*
* Check if the address reported by the CPU is in a format we can parse.
* It would be possible to add code for most other cases, but all would
* be somewhat complicated (e.g. segment offset would require an instruction
* parser). So only support physical addresses upto page granuality for now.
*/
static int mce_usable_address(struct mce *m)
{
if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
return 0;
if ((m->misc & 0x3f) > PAGE_SHIFT)
return 0;
if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS)
return 0;
return 1;
}

static void mce_clear_state(unsigned long *toclear)
{
int i;
Expand Down Expand Up @@ -865,6 +957,16 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if (m.status & MCI_STATUS_ADDRV)
m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);

/*
* Action optional error. Queue address for later processing.
* When the ring overflows we just ignore the AO error.
* RED-PEN add some logging mechanism when
* usable_address or mce_add_ring fails.
* RED-PEN don't ignore overflow for tolerant == 0
*/
if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
mce_ring_add(m.addr >> PAGE_SHIFT);

mce_get_rip(&m, regs);
mce_log(&m);

Expand Down Expand Up @@ -916,6 +1018,36 @@ void do_machine_check(struct pt_regs *regs, long error_code)
}
EXPORT_SYMBOL_GPL(do_machine_check);

/* dummy to break dependency. actual code is in mm/memory-failure.c */
void __attribute__((weak)) memory_failure(unsigned long pfn, int vector)
{
printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn);
}

/*
* Called after mce notification in process context. This code
* is allowed to sleep. Call the high level VM handler to process
* any corrupted pages.
* Assume that the work queue code only calls this one at a time
* per CPU.
* Note we don't disable preemption, so this code might run on the wrong
* CPU. In this case the event is picked up by the scheduled work queue.
* This is merely a fast path to expedite processing in some common
* cases.
*/
void mce_notify_process(void)
{
unsigned long pfn;
mce_notify_irq();
while (mce_ring_get(&pfn))
memory_failure(pfn, MCE_VECTOR);
}

static void mce_process_work(struct work_struct *dummy)
{
mce_notify_process();
}

#ifdef CONFIG_X86_MCE_INTEL
/***
* mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
Expand Down Expand Up @@ -1204,6 +1336,7 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
mce_init();
mce_cpu_features(c);
mce_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
}

/*
Expand Down
2 changes: 1 addition & 1 deletion trunk/arch/x86/kernel/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
#ifdef CONFIG_X86_NEW_MCE
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
mce_notify_irq();
mce_notify_process();
#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */

/* deal with pending signal delivery */
Expand Down

0 comments on commit 7920cac

Please sign in to comment.