Skip to content

Commit

Permalink
x86/mce: Reenable CMCI banks when swiching back to interrupt mode
Browse files Browse the repository at this point in the history
Zhang Liguang reported the following issue:

1) System detects a CMCI storm on the current CPU.

2) Kernel disables the CMCI interrupt on banks owned by the
   current CPU and switches to poll mode

3) After the CMCI storm subsides, kernel switches back to
   interrupt mode

4) We expect the system to reenable the CMCI interrupt on banks
   owned by the current CPU

   mce_intel_adjust_timer
   |-> cmci_reenable
       |-> cmci_discover     # owned banks are ignored here

  static void cmci_discover(int banks)
	...
	for (i = 0; i < banks; i++) {
		...
		if (test_bit(i, owned))	# ownd banks is ignore here
			continue;

So convert cmci_storm_disable_banks() to
cmci_toggle_interrupt_mode() which controls whether to enable or
disable CMCI interrupts with its argument.

NB: We cannot clear the owned bit because the banks won't be
polled, otherwise. See:

  27f6c57 ("x86, CMCI: Add proper detection of end of CMCI storms")

for more info.

Reported-by: Zhang Liguang <zhangliguang@huawei.com>
Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org> # v3.15+
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: huawei.libin@huawei.com
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: rui.xiang@huawei.com
Link: http://lkml.kernel.org/r/1439396985-12812-10-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Xie XiuQi authored and Ingo Molnar committed Aug 13, 2015
1 parent 8838eb6 commit 1b48465
Showing 1 changed file with 23 additions and 18 deletions.
41 changes: 23 additions & 18 deletions arch/x86/kernel/cpu/mcheck/mce_intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,27 @@ void mce_intel_hcpu_update(unsigned long cpu)
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
}

static void cmci_toggle_interrupt_mode(bool on)
{
unsigned long flags, *owned;
int bank;
u64 val;

raw_spin_lock_irqsave(&cmci_discover_lock, flags);
owned = this_cpu_ptr(mce_banks_owned);
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);

if (on)
val |= MCI_CTL2_CMCI_EN;
else
val &= ~MCI_CTL2_CMCI_EN;

wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}

unsigned long cmci_intel_adjust_timer(unsigned long interval)
{
if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
Expand Down Expand Up @@ -175,7 +196,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
*/
if (!atomic_read(&cmci_storm_on_cpus)) {
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
cmci_reenable();
cmci_toggle_interrupt_mode(true);
cmci_recheck();
}
return CMCI_POLL_INTERVAL;
Expand All @@ -186,22 +207,6 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
}
}

static void cmci_storm_disable_banks(void)
{
unsigned long flags, *owned;
int bank;
u64 val;

raw_spin_lock_irqsave(&cmci_discover_lock, flags);
owned = this_cpu_ptr(mce_banks_owned);
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}

static bool cmci_storm_detect(void)
{
unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
Expand All @@ -223,7 +228,7 @@ static bool cmci_storm_detect(void)
if (cnt <= CMCI_STORM_THRESHOLD)
return false;

cmci_storm_disable_banks();
cmci_toggle_interrupt_mode(false);
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
r = atomic_add_return(1, &cmci_storm_on_cpus);
mce_timer_kick(CMCI_STORM_INTERVAL);
Expand Down

0 comments on commit 1b48465

Please sign in to comment.