Skip to content

Commit

Permalink
[SCSI] mpt2sas: Support for stopping driver when Firmware encounters
Browse files Browse the repository at this point in the history
Added command line option and shost sysfs attribute called
mpt2sas_fwfault_debug. When enduser writes a "1" to this parameter, this
will enable support in the driver for debugging firmware timeout related
issues.  This handling was added in three areas (a) scsi error handling
callback called task_abort, (b) IOCTL interface, and (c) other timeouts that
result in diag resets, such as manufacturing config pages.  When this
support is enabled, the driver will provide dump_stack to console, halt
controller firmware, and panic driver. The end user probably would want to
setup serial console redirection so the dump stack can be seen.

Here are the three methods for enable this support:

(a) # insmod mpt2sas.ko mpt2sas_fwfault_debug=1
(b) # echo 1 > /sys/module/mpt2sas/parameters/mpt2sas_fwfault_debug
(c) # echo 1 > /sys/class/scsi_host/host#/fwfault_debug  (where # is
the host number)

Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com>
Signed-off-by: Eric Moore <Eric.moore@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
  • Loading branch information
Kashyap, Desai authored and James Bottomley committed Oct 29, 2009
1 parent 9fec5f9 commit fa7f316
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 16 deletions.
88 changes: 74 additions & 14 deletions drivers/scsi/mpt2sas/mpt2sas_base.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,32 @@ static int msix_disable = -1;
module_param(msix_disable, int, 0);
MODULE_PARM_DESC(msix_disable, " disable msix routed interrupts (default=0)");

int mpt2sas_fwfault_debug;
MODULE_PARM_DESC(mpt2sas_fwfault_debug, " enable detection of firmware fault "
"and halt firmware - (default=0)");

/**
* _scsih_set_fwfault_debug - global setting of ioc->fwfault_debug.
*
*/
static int
_scsih_set_fwfault_debug(const char *val, struct kernel_param *kp)
{
int ret = param_set_int(val, kp);
struct MPT2SAS_ADAPTER *ioc;

if (ret)
return ret;

printk(KERN_INFO "setting logging_level(0x%08x)\n",
mpt2sas_fwfault_debug);
list_for_each_entry(ioc, &mpt2sas_ioc_list, list)
ioc->fwfault_debug = mpt2sas_fwfault_debug;
return 0;
}
module_param_call(mpt2sas_fwfault_debug, _scsih_set_fwfault_debug,
param_get_int, &mpt2sas_fwfault_debug, 0644);

/**
* _base_fault_reset_work - workq handling ioc fault conditions
* @work: input argument, used to derive ioc
Expand Down Expand Up @@ -177,6 +203,51 @@ mpt2sas_base_stop_watchdog(struct MPT2SAS_ADAPTER *ioc)
}
}

/**
* mpt2sas_base_fault_info - verbose translation of firmware FAULT code
* @ioc: per adapter object
* @fault_code: fault code
*
* Return nothing.
*/
void
mpt2sas_base_fault_info(struct MPT2SAS_ADAPTER *ioc , u16 fault_code)
{
printk(MPT2SAS_ERR_FMT "fault_state(0x%04x)!\n",
ioc->name, fault_code);
}

/**
* mpt2sas_halt_firmware - halt's mpt controller firmware
* @ioc: per adapter object
*
* For debugging timeout related issues. Writing 0xCOFFEE00
* to the doorbell register will halt controller firmware. With
* the purpose to stop both driver and firmware, the enduser can
* obtain a ring buffer from controller UART.
*/
void
mpt2sas_halt_firmware(struct MPT2SAS_ADAPTER *ioc)
{
u32 doorbell;

if (!ioc->fwfault_debug)
return;

dump_stack();

doorbell = readl(&ioc->chip->Doorbell);
if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT)
mpt2sas_base_fault_info(ioc , doorbell);
else {
writel(0xC0FFEE00, &ioc->chip->Doorbell);
printk(MPT2SAS_ERR_FMT "Firmware is halted due to command "
"timeout\n", ioc->name);
}

panic("panic in %s\n", __func__);
}

#ifdef CONFIG_SCSI_MPT2SAS_LOGGING
/**
* _base_sas_ioc_info - verbose translation of the ioc status
Expand Down Expand Up @@ -525,20 +596,6 @@ _base_sas_log_info(struct MPT2SAS_ADAPTER *ioc , u32 log_info)
sas_loginfo.dw.subcode);
}

/**
* mpt2sas_base_fault_info - verbose translation of firmware FAULT code
* @ioc: pointer to scsi command object
* @fault_code: fault code
*
* Return nothing.
*/
void
mpt2sas_base_fault_info(struct MPT2SAS_ADAPTER *ioc , u16 fault_code)
{
printk(MPT2SAS_ERR_FMT "fault_state(0x%04x)!\n",
ioc->name, fault_code);
}

/**
* _base_display_reply_info -
* @ioc: pointer to scsi command object
Expand Down Expand Up @@ -3684,6 +3741,9 @@ mpt2sas_base_hard_reset_handler(struct MPT2SAS_ADAPTER *ioc, int sleep_flag,
dtmprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: enter\n", ioc->name,
__func__));

if (mpt2sas_fwfault_debug)
mpt2sas_halt_firmware(ioc);

spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags);
if (ioc->shost_recovery) {
spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);
Expand Down
4 changes: 4 additions & 0 deletions drivers/scsi/mpt2sas/mpt2sas_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,7 @@ typedef void (*MPT_ADD_SGE)(void *paddr, u32 flags_length, dma_addr_t dma_addr);
* @chip_phys: physical addrss prior to mapping
* @pio_chip: I/O mapped register space
* @logging_level: see mpt2sas_debug.h
* @fwfault_debug: debuging FW timeouts
* @ir_firmware: IR firmware present
* @bars: bitmask of BAR's that must be configured
* @mask_interrupts: ignore interrupt
Expand Down Expand Up @@ -587,6 +588,7 @@ struct MPT2SAS_ADAPTER {
unsigned long chip_phys;
unsigned long pio_chip;
int logging_level;
int fwfault_debug;
u8 ir_firmware;
int bars;
u8 mask_interrupts;
Expand Down Expand Up @@ -803,6 +805,8 @@ int mpt2sas_base_scsi_enclosure_processor(struct MPT2SAS_ADAPTER *ioc,
Mpi2SepReply_t *mpi_reply, Mpi2SepRequest_t *mpi_request);
void mpt2sas_base_validate_event_type(struct MPT2SAS_ADAPTER *ioc, u32 *event_type);

void mpt2sas_halt_firmware(struct MPT2SAS_ADAPTER *ioc);

/* scsih shared API */
u8 mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index,
u32 reply);
Expand Down
41 changes: 39 additions & 2 deletions drivers/scsi/mpt2sas/mpt2sas_ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -896,6 +896,7 @@ _ctl_do_mpt_command(struct MPT2SAS_ADAPTER *ioc,
printk(MPT2SAS_INFO_FMT "issue target reset: handle "
"= (0x%04x)\n", ioc->name,
mpi_request->FunctionDependent1);
mpt2sas_halt_firmware(ioc);
mutex_lock(&ioc->tm_cmds.mutex);
mpt2sas_scsih_issue_tm(ioc,
mpi_request->FunctionDependent1, 0,
Expand Down Expand Up @@ -2474,6 +2475,43 @@ _ctl_logging_level_store(struct device *cdev, struct device_attribute *attr,
static DEVICE_ATTR(logging_level, S_IRUGO | S_IWUSR,
_ctl_logging_level_show, _ctl_logging_level_store);

/* device attributes */
/*
* _ctl_fwfault_debug_show - show/store fwfault_debug
* @cdev - pointer to embedded class device
* @buf - the buffer returned
*
* mpt2sas_fwfault_debug is command line option
* A sysfs 'read/write' shost attribute.
*/
static ssize_t
_ctl_fwfault_debug_show(struct device *cdev,
struct device_attribute *attr, char *buf)
{
struct Scsi_Host *shost = class_to_shost(cdev);
struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);

return snprintf(buf, PAGE_SIZE, "%d\n", ioc->fwfault_debug);
}
static ssize_t
_ctl_fwfault_debug_store(struct device *cdev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct Scsi_Host *shost = class_to_shost(cdev);
struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
int val = 0;

if (sscanf(buf, "%d", &val) != 1)
return -EINVAL;

ioc->fwfault_debug = val;
printk(MPT2SAS_INFO_FMT "fwfault_debug=%d\n", ioc->name,
ioc->fwfault_debug);
return strlen(buf);
}
static DEVICE_ATTR(fwfault_debug, S_IRUGO | S_IWUSR,
_ctl_fwfault_debug_show, _ctl_fwfault_debug_store);

struct device_attribute *mpt2sas_host_attrs[] = {
&dev_attr_version_fw,
&dev_attr_version_bios,
Expand All @@ -2487,13 +2525,12 @@ struct device_attribute *mpt2sas_host_attrs[] = {
&dev_attr_io_delay,
&dev_attr_device_delay,
&dev_attr_logging_level,
&dev_attr_fwfault_debug,
&dev_attr_fw_queue_depth,
&dev_attr_host_sas_address,
NULL,
};

/* device attributes */

/**
* _ctl_device_sas_address_show - sas address
* @cdev - pointer to embedded class device
Expand Down
2 changes: 2 additions & 0 deletions drivers/scsi/mpt2sas/mpt2sas_scsih.c
Original file line number Diff line number Diff line change
Expand Up @@ -1929,6 +1929,8 @@ _scsih_abort(struct scsi_cmnd *scmd)
goto out;
}

mpt2sas_halt_firmware(ioc);

mutex_lock(&ioc->tm_cmds.mutex);
handle = sas_device_priv_data->sas_target->handle;
mpt2sas_scsih_issue_tm(ioc, handle, sas_device_priv_data->lun,
Expand Down

0 comments on commit fa7f316

Please sign in to comment.