Skip to content

Commit

Permalink
Blackfin: implement nmi_watchdog for SMP on BF561
Browse files Browse the repository at this point in the history
Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
  • Loading branch information
Graf Yang authored and Mike Frysinger committed Mar 9, 2010
1 parent 726e965 commit 60ffdb3
Show file tree
Hide file tree
Showing 8 changed files with 361 additions and 1 deletion.
9 changes: 9 additions & 0 deletions arch/blackfin/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,15 @@ config EARLY_PRINTK
all of this lives in the init section and is thrown away after the
kernel boots completely.

config NMI_WATCHDOG
bool "Enable NMI watchdog to help debugging lockup on SMP"
default n
depends on (SMP && !BFIN_SCRATCH_REG_RETN)
help
If any CPU in the system does not execute the period local timer
interrupt for more than 5 seconds, then the NMI handler dumps debug
information. This information can be used to debug the lockup.

config CPLB_INFO
bool "Display the CPLB information"
help
Expand Down
4 changes: 4 additions & 0 deletions arch/blackfin/include/asm/irq.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,8 @@

#include <asm-generic/irq.h>

#ifdef CONFIG_NMI_WATCHDOG
# define ARCH_HAS_NMI_WATCHDOG
#endif

#endif /* _BFIN_IRQ_H_ */
12 changes: 12 additions & 0 deletions arch/blackfin/include/asm/nmi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
* Copyright 2010 Analog Devices Inc.
*
* Licensed under the GPL-2
*/

#ifndef _BFIN_NMI_H_
#define _BFIN_NMI_H_

#include <linux/nmi.h>

#endif
1 change: 1 addition & 0 deletions arch/blackfin/include/asm/smp.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ extern char coreb_trampoline_start, coreb_trampoline_end;
struct corelock_slot {
int lock;
};
extern struct corelock_slot corelock;

void smp_icache_flush_range_others(unsigned long start,
unsigned long end);
Expand Down
1 change: 1 addition & 0 deletions arch/blackfin/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ obj-$(CONFIG_CPLB_INFO) += cplbinfo.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KGDB_TESTS) += kgdb_test.o
obj-$(CONFIG_NMI_WATCHDOG) += nmi.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_EARLY_PRINTK) += shadow_console.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
Expand Down
313 changes: 313 additions & 0 deletions arch/blackfin/kernel/nmi.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,313 @@
/*
* Blackfin nmi_watchdog Driver
*
* Originally based on bfin_wdt.c
* Copyright 2010-2010 Analog Devices Inc.
* Graff Yang <graf.yang@analog.com>
*
* Enter bugs at http://blackfin.uclinux.org/
*
* Licensed under the GPL-2 or later.
*/

#include <linux/bitops.h>
#include <linux/hardirq.h>
#include <linux/sysdev.h>
#include <linux/pm.h>
#include <linux/nmi.h>
#include <linux/smp.h>
#include <linux/timer.h>
#include <asm/blackfin.h>
#include <asm/atomic.h>
#include <asm/cacheflush.h>

/* Bit in WDOG_CTL that indicates watchdog has expired (WDR0) */
#define WDOG_EXPIRED 0x8000

/* Masks for WDEV field in WDOG_CTL register */
#define ICTL_RESET 0x0
#define ICTL_NMI 0x2
#define ICTL_GPI 0x4
#define ICTL_NONE 0x6
#define ICTL_MASK 0x6

/* Masks for WDEN field in WDOG_CTL register */
#define WDEN_MASK 0x0FF0
#define WDEN_ENABLE 0x0000
#define WDEN_DISABLE 0x0AD0

#define DRV_NAME "nmi-wdt"

#define NMI_WDT_TIMEOUT 5 /* 5 seconds */
#define NMI_CHECK_TIMEOUT (4 * HZ) /* 4 seconds in jiffies */
static int nmi_wdt_cpu = 1;

static unsigned int timeout = NMI_WDT_TIMEOUT;
static int nmi_active;

static unsigned short wdoga_ctl;
static unsigned int wdoga_cnt;
static struct corelock_slot saved_corelock;
static atomic_t nmi_touched[NR_CPUS];
static struct timer_list ntimer;

enum {
COREA_ENTER_NMI = 0,
COREA_EXIT_NMI,
COREB_EXIT_NMI,

NMI_EVENT_NR,
};
static unsigned long nmi_event __attribute__ ((__section__(".l2.bss")));

/* we are in nmi, non-atomic bit ops is safe */
static inline void set_nmi_event(int event)
{
__set_bit(event, &nmi_event);
}

static inline void wait_nmi_event(int event)
{
while (!test_bit(event, &nmi_event))
barrier();
__clear_bit(event, &nmi_event);
}

static inline void send_corea_nmi(void)
{
wdoga_ctl = bfin_read_WDOGA_CTL();
wdoga_cnt = bfin_read_WDOGA_CNT();

bfin_write_WDOGA_CTL(WDEN_DISABLE);
bfin_write_WDOGA_CNT(0);
bfin_write_WDOGA_CTL(WDEN_ENABLE | ICTL_NMI);
}

static inline void restore_corea_nmi(void)
{
bfin_write_WDOGA_CTL(WDEN_DISABLE);
bfin_write_WDOGA_CTL(WDOG_EXPIRED | WDEN_DISABLE | ICTL_NONE);

bfin_write_WDOGA_CNT(wdoga_cnt);
bfin_write_WDOGA_CTL(wdoga_ctl);
}

static inline void save_corelock(void)
{
saved_corelock = corelock;
corelock.lock = 0;
}

static inline void restore_corelock(void)
{
corelock = saved_corelock;
}


static inline void nmi_wdt_keepalive(void)
{
bfin_write_WDOGB_STAT(0);
}

static inline void nmi_wdt_stop(void)
{
bfin_write_WDOGB_CTL(WDEN_DISABLE);
}

/* before calling this function, you must stop the WDT */
static inline void nmi_wdt_clear(void)
{
/* clear TRO bit, disable event generation */
bfin_write_WDOGB_CTL(WDOG_EXPIRED | WDEN_DISABLE | ICTL_NONE);
}

static inline void nmi_wdt_start(void)
{
bfin_write_WDOGB_CTL(WDEN_ENABLE | ICTL_NMI);
}

static inline int nmi_wdt_running(void)
{
return ((bfin_read_WDOGB_CTL() & WDEN_MASK) != WDEN_DISABLE);
}

static inline int nmi_wdt_set_timeout(unsigned long t)
{
u32 cnt, max_t, sclk;
int run;

sclk = get_sclk();
max_t = -1 / sclk;
cnt = t * sclk;
if (t > max_t) {
pr_warning("NMI: timeout value is too large\n");
return -EINVAL;
}

run = nmi_wdt_running();
nmi_wdt_stop();
bfin_write_WDOGB_CNT(cnt);
if (run)
nmi_wdt_start();

timeout = t;

return 0;
}

int check_nmi_wdt_touched(void)
{
unsigned int this_cpu = smp_processor_id();
unsigned int cpu;

cpumask_t mask = cpu_online_map;

if (!atomic_read(&nmi_touched[this_cpu]))
return 0;

atomic_set(&nmi_touched[this_cpu], 0);

cpu_clear(this_cpu, mask);
for_each_cpu_mask(cpu, mask) {
invalidate_dcache_range((unsigned long)(&nmi_touched[cpu]),
(unsigned long)(&nmi_touched[cpu]));
if (!atomic_read(&nmi_touched[cpu]))
return 0;
atomic_set(&nmi_touched[cpu], 0);
}

return 1;
}

static void nmi_wdt_timer(unsigned long data)
{
if (check_nmi_wdt_touched())
nmi_wdt_keepalive();

mod_timer(&ntimer, jiffies + NMI_CHECK_TIMEOUT);
}

static int __init init_nmi_wdt(void)
{
nmi_wdt_set_timeout(timeout);
nmi_wdt_start();
nmi_active = true;

init_timer(&ntimer);
ntimer.function = nmi_wdt_timer;
ntimer.expires = jiffies + NMI_CHECK_TIMEOUT;
add_timer(&ntimer);

pr_info("nmi_wdt: initialized: timeout=%d sec\n", timeout);
return 0;
}
device_initcall(init_nmi_wdt);

void touch_nmi_watchdog(void)
{
atomic_set(&nmi_touched[smp_processor_id()], 1);
}

/* Suspend/resume support */
#ifdef CONFIG_PM
static int nmi_wdt_suspend(struct sys_device *dev, pm_message_t state)
{
nmi_wdt_stop();
return 0;
}

static int nmi_wdt_resume(struct sys_device *dev)
{
if (nmi_active)
nmi_wdt_start();
return 0;
}

static struct sysdev_class nmi_sysclass = {
.name = DRV_NAME,
.resume = nmi_wdt_resume,
.suspend = nmi_wdt_suspend,
};

static struct sys_device device_nmi_wdt = {
.id = 0,
.cls = &nmi_sysclass,
};

static int __init init_nmi_wdt_sysfs(void)
{
int error;

if (!nmi_active)
return 0;

error = sysdev_class_register(&nmi_sysclass);
if (!error)
error = sysdev_register(&device_nmi_wdt);
return error;
}
late_initcall(init_nmi_wdt_sysfs);

#endif /* CONFIG_PM */


asmlinkage notrace void do_nmi(struct pt_regs *fp)
{
unsigned int cpu = smp_processor_id();
nmi_enter();

cpu_pda[cpu].__nmi_count += 1;

if (cpu == nmi_wdt_cpu) {
/* CoreB goes here first */

/* reload the WDOG_STAT */
nmi_wdt_keepalive();

/* clear nmi interrupt for CoreB */
nmi_wdt_stop();
nmi_wdt_clear();

/* trigger NMI interrupt of CoreA */
send_corea_nmi();

/* waiting CoreB to enter NMI */
wait_nmi_event(COREA_ENTER_NMI);

/* recover WDOGA's settings */
restore_corea_nmi();

save_corelock();

/* corelock is save/cleared, CoreA is dummping messages */

wait_nmi_event(COREA_EXIT_NMI);
} else {
/* OK, CoreA entered NMI */
set_nmi_event(COREA_ENTER_NMI);
}

pr_emerg("\nNMI Watchdog detected LOCKUP, dump for CPU %d\n", cpu);
dump_bfin_process(fp);
dump_bfin_mem(fp);
show_regs(fp);
dump_bfin_trace_buffer();
show_stack(current, (unsigned long *)fp);

if (cpu == nmi_wdt_cpu) {
pr_emerg("This fault is not recoverable, sorry!\n");

/* CoreA dump finished, restore the corelock */
restore_corelock();

set_nmi_event(COREB_EXIT_NMI);
} else {
/* CoreB dump finished, notice the CoreA we are done */
set_nmi_event(COREA_EXIT_NMI);

/* synchronize with CoreA */
wait_nmi_event(COREB_EXIT_NMI);
}

nmi_exit();
}
4 changes: 4 additions & 0 deletions arch/blackfin/kernel/time-ts.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <asm/blackfin.h>
#include <asm/time.h>
#include <asm/gptimers.h>
#include <asm/nmi.h>

/* Accelerators for sched_clock()
* convert from cycles(64bits) => nanoseconds (64bits)
Expand Down Expand Up @@ -309,6 +310,9 @@ irqreturn_t bfin_coretmr_interrupt(int irq, void *dev_id)

smp_mb();
evt->event_handler(evt);

touch_nmi_watchdog();

return IRQ_HANDLED;
}

Expand Down
Loading

0 comments on commit 60ffdb3

Please sign in to comment.