Skip to content

Commit

Permalink
ipmi_watchdog: restore settings when BMC reset
Browse files Browse the repository at this point in the history
If the BMC gets reset, it will return 0x80 response errors.

In less than a week
# grep "Error 80 on cmd 22" /var/log/kernel |wc -l
378681

In this case, it is probably a good idea to restore the IPMI settings.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
Tested-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com>
Reported-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Corey Minyard authored and Linus Torvalds committed Dec 20, 2011
1 parent ff05b6f commit b75d91f
Showing 1 changed file with 38 additions and 3 deletions.
41 changes: 38 additions & 3 deletions drivers/char/ipmi/ipmi_watchdog.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@
#define IPMI_WDOG_SET_TIMER 0x24
#define IPMI_WDOG_GET_TIMER 0x25

#define IPMI_WDOG_TIMER_NOT_INIT_RESP 0x80

/* These are here until the real ones get into the watchdog.h interface. */
#ifndef WDIOC_GETTIMEOUT
#define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int)
Expand Down Expand Up @@ -596,6 +598,7 @@ static int ipmi_heartbeat(void)
struct kernel_ipmi_msg msg;
int rv;
struct ipmi_system_interface_addr addr;
int timeout_retries = 0;

if (ipmi_ignore_heartbeat)
return 0;
Expand All @@ -616,6 +619,7 @@ static int ipmi_heartbeat(void)

mutex_lock(&heartbeat_lock);

restart:
atomic_set(&heartbeat_tofree, 2);

/*
Expand Down Expand Up @@ -653,7 +657,33 @@ static int ipmi_heartbeat(void)
/* Wait for the heartbeat to be sent. */
wait_for_completion(&heartbeat_wait);

if (heartbeat_recv_msg.msg.data[0] != 0) {
if (heartbeat_recv_msg.msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP) {
timeout_retries++;
if (timeout_retries > 3) {
printk(KERN_ERR PFX ": Unable to restore the IPMI"
" watchdog's settings, giving up.\n");
rv = -EIO;
goto out_unlock;
}

/*
* The timer was not initialized, that means the BMC was
* probably reset and lost the watchdog information. Attempt
* to restore the timer's info. Note that we still hold
* the heartbeat lock, to keep a heartbeat from happening
* in this process, so must say no heartbeat to avoid a
* deadlock on this mutex.
*/
rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
if (rv) {
printk(KERN_ERR PFX ": Unable to send the command to"
" set the watchdog's settings, giving up.\n");
goto out_unlock;
}

/* We might need a new heartbeat, so do it now */
goto restart;
} else if (heartbeat_recv_msg.msg.data[0] != 0) {
/*
* Got an error in the heartbeat response. It was already
* reported in ipmi_wdog_msg_handler, but we should return
Expand All @@ -662,6 +692,7 @@ static int ipmi_heartbeat(void)
rv = -EINVAL;
}

out_unlock:
mutex_unlock(&heartbeat_lock);

return rv;
Expand Down Expand Up @@ -922,11 +953,15 @@ static struct miscdevice ipmi_wdog_miscdev = {
static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg,
void *handler_data)
{
if (msg->msg.data[0] != 0) {
if (msg->msg.cmd == IPMI_WDOG_RESET_TIMER &&
msg->msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP)
printk(KERN_INFO PFX "response: The IPMI controller appears"
" to have been reset, will attempt to reinitialize"
" the watchdog timer\n");
else if (msg->msg.data[0] != 0)
printk(KERN_ERR PFX "response: Error %x on cmd %x\n",
msg->msg.data[0],
msg->msg.cmd);
}

ipmi_free_recv_msg(msg);
}
Expand Down

0 comments on commit b75d91f

Please sign in to comment.