Skip to content

Commit

Permalink
[SCSI] libsas: Handle SCSI commands that complete with failure codes
Browse files Browse the repository at this point in the history
This patch moves the code that handles SAS failures out of the main EH
function and into a separate function.  It also detects commands that have
no sas_task (i.e. they completed, but with error data) and sends them into
scsi_error for processing.  This allows us to handle SCSI errors (and
enables auto-spinup as a side effect) instead of dropping them on the
floor and falling into an infinite loop.  It also requires the
implementation of a device reset function, which the SAS failure code has
been modified to employ for REQ_DEVICE_RESET.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
  • Loading branch information
Darrick J. Wong authored and James Bottomley committed Jan 27, 2007
1 parent dca84e4 commit ad68923
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 44 deletions.
148 changes: 104 additions & 44 deletions drivers/scsi/libsas/sas_scsi_host.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <scsi/scsi_transport_sas.h>
#include "../scsi_sas_internal.h"
#include "../scsi_transport_api.h"
#include "../scsi_priv.h"

#include <linux/err.h>
#include <linux/blkdev.h>
Expand Down Expand Up @@ -396,54 +397,80 @@ static int sas_recover_I_T(struct domain_device *dev)
return res;
}

static int eh_reset_phy_helper(struct sas_phy *phy)
/* Find the sas_phy that's attached to this device */
struct sas_phy *find_local_sas_phy(struct domain_device *dev)
{
int tmf_resp;
struct domain_device *pdev = dev->parent;
struct ex_phy *exphy = NULL;
int i;

/* Directly attached device */
if (!pdev)
return dev->port->phy;

tmf_resp = sas_phy_reset(phy, 1);
if (tmf_resp)
SAS_DPRINTK("Hard reset of phy %d failed 0x%x\n",
phy->identify.phy_identifier,
tmf_resp);
/* Otherwise look in the expander */
for (i = 0; i < pdev->ex_dev.num_phys; i++)
if (!memcmp(dev->sas_addr,
pdev->ex_dev.ex_phy[i].attached_sas_addr,
SAS_ADDR_SIZE)) {
exphy = &pdev->ex_dev.ex_phy[i];
break;
}

return tmf_resp;
BUG_ON(!exphy);
return exphy->phy;
}

void sas_scsi_recover_host(struct Scsi_Host *shost)
/* Attempt to send a target reset message to a device */
int sas_eh_device_reset_handler(struct scsi_cmnd *cmd)
{
struct domain_device *dev = cmd_to_domain_dev(cmd);
struct sas_phy *phy = find_local_sas_phy(dev);
int res;

res = sas_phy_reset(phy, 1);
if (res)
SAS_DPRINTK("Device reset of %s failed 0x%x\n",
phy->dev.kobj.k_name,
res);
if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE)
return SUCCESS;

return FAILED;
}

/* Try to reset a device */
static int try_to_reset_cmd_device(struct Scsi_Host *shost,
struct scsi_cmnd *cmd)
{
if (!shost->hostt->eh_device_reset_handler)
return FAILED;

return shost->hostt->eh_device_reset_handler(cmd);
}

static int sas_eh_handle_sas_errors(struct Scsi_Host *shost,
struct list_head *work_q,
struct list_head *done_q)
{
struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
unsigned long flags;
LIST_HEAD(error_q);
struct scsi_cmnd *cmd, *n;
enum task_disposition res = TASK_IS_DONE;
int tmf_resp, need_reset;
struct sas_internal *i = to_sas_internal(shost->transportt);
struct sas_phy *task_sas_phy = NULL;

spin_lock_irqsave(shost->host_lock, flags);
list_splice_init(&shost->eh_cmd_q, &error_q);
spin_unlock_irqrestore(shost->host_lock, flags);

SAS_DPRINTK("Enter %s\n", __FUNCTION__);
unsigned long flags;
struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);

/* All tasks on this list were marked SAS_TASK_STATE_ABORTED
* by sas_scsi_timed_out() callback.
*/
Again:
SAS_DPRINTK("going over list...\n");
list_for_each_entry_safe(cmd, n, &error_q, eh_entry) {
list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
struct sas_task *task = TO_SAS_TASK(cmd);
list_del_init(&cmd->eh_entry);

if (!task) {
SAS_DPRINTK("%s: taskless cmd?!\n", __FUNCTION__);
if (!task)
continue;
}

list_del_init(&cmd->eh_entry);

spin_lock_irqsave(&task->task_state_lock, flags);
need_reset = task->task_state_flags & SAS_TASK_NEED_DEV_RESET;
if (need_reset)
task_sas_phy = task->dev->port->phy;
spin_unlock_irqrestore(&task->task_state_lock, flags);

SAS_DPRINTK("trying to find task 0x%p\n", task);
Expand All @@ -457,14 +484,14 @@ void sas_scsi_recover_host(struct Scsi_Host *shost)
task);
task->task_done(task);
if (need_reset)
eh_reset_phy_helper(task_sas_phy);
try_to_reset_cmd_device(shost, cmd);
continue;
case TASK_IS_ABORTED:
SAS_DPRINTK("%s: task 0x%p is aborted\n",
__FUNCTION__, task);
task->task_done(task);
if (need_reset)
eh_reset_phy_helper(task_sas_phy);
try_to_reset_cmd_device(shost, cmd);
continue;
case TASK_IS_AT_LU:
SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task);
Expand All @@ -476,8 +503,8 @@ void sas_scsi_recover_host(struct Scsi_Host *shost)
cmd->device->lun);
task->task_done(task);
if (need_reset)
eh_reset_phy_helper(task_sas_phy);
sas_scsi_clear_queue_lu(&error_q, cmd);
try_to_reset_cmd_device(shost, cmd);
sas_scsi_clear_queue_lu(work_q, cmd);
goto Again;
}
/* fallthrough */
Expand All @@ -491,8 +518,8 @@ void sas_scsi_recover_host(struct Scsi_Host *shost)
SAS_ADDR(task->dev->sas_addr));
task->task_done(task);
if (need_reset)
eh_reset_phy_helper(task_sas_phy);
sas_scsi_clear_queue_I_T(&error_q, task->dev);
try_to_reset_cmd_device(shost, cmd);
sas_scsi_clear_queue_I_T(work_q, task->dev);
goto Again;
}
/* Hammer time :-) */
Expand All @@ -506,8 +533,8 @@ void sas_scsi_recover_host(struct Scsi_Host *shost)
"succeeded\n", port->id);
task->task_done(task);
if (need_reset)
eh_reset_phy_helper(task_sas_phy);
sas_scsi_clear_queue_port(&error_q,
try_to_reset_cmd_device(shost, cmd);
sas_scsi_clear_queue_port(work_q,
port);
goto Again;
}
Expand All @@ -520,7 +547,7 @@ void sas_scsi_recover_host(struct Scsi_Host *shost)
"succeeded\n");
task->task_done(task);
if (need_reset)
eh_reset_phy_helper(task_sas_phy);
try_to_reset_cmd_device(shost, cmd);
goto out;
}
}
Expand All @@ -535,21 +562,53 @@ void sas_scsi_recover_host(struct Scsi_Host *shost)

task->task_done(task);
if (need_reset)
eh_reset_phy_helper(task_sas_phy);
try_to_reset_cmd_device(shost, cmd);
goto clear_q;
}
}
out:
scsi_eh_flush_done_q(&ha->eh_done_q);
SAS_DPRINTK("--- Exit %s\n", __FUNCTION__);
return;
return list_empty(work_q);
clear_q:
SAS_DPRINTK("--- Exit %s -- clear_q\n", __FUNCTION__);
list_for_each_entry_safe(cmd, n, &error_q, eh_entry) {
list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
struct sas_task *task = TO_SAS_TASK(cmd);
list_del_init(&cmd->eh_entry);
task->task_done(task);
}
return list_empty(work_q);
}

void sas_scsi_recover_host(struct Scsi_Host *shost)
{
struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
unsigned long flags;
LIST_HEAD(eh_work_q);

spin_lock_irqsave(shost->host_lock, flags);
list_splice_init(&shost->eh_cmd_q, &eh_work_q);
spin_unlock_irqrestore(shost->host_lock, flags);

SAS_DPRINTK("Enter %s\n", __FUNCTION__);
/*
* Deal with commands that still have SAS tasks (i.e. they didn't
* complete via the normal sas_task completion mechanism)
*/
if (sas_eh_handle_sas_errors(shost, &eh_work_q, &ha->eh_done_q))
goto out;

/*
* Now deal with SCSI commands that completed ok but have a an error
* code (and hopefully sense data) attached. This is roughly what
* scsi_unjam_host does, but we skip scsi_eh_abort_cmds because any
* command we see here has no sas_task and is thus unknown to the HA.
*/
if (!scsi_eh_get_sense(&eh_work_q, &ha->eh_done_q))
scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q);

out:
scsi_eh_flush_done_q(&ha->eh_done_q);
SAS_DPRINTK("--- Exit %s\n", __FUNCTION__);
return;
}

enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
Expand Down Expand Up @@ -914,3 +973,4 @@ EXPORT_SYMBOL_GPL(__sas_task_abort);
EXPORT_SYMBOL_GPL(sas_task_abort);
EXPORT_SYMBOL_GPL(sas_phy_reset);
EXPORT_SYMBOL_GPL(sas_phy_enable);
EXPORT_SYMBOL_GPL(sas_eh_device_reset_handler);
1 change: 1 addition & 0 deletions include/scsi/libsas.h
Original file line number Diff line number Diff line change
Expand Up @@ -660,5 +660,6 @@ void sas_init_dev(struct domain_device *);

void sas_task_abort(struct sas_task *);
int __sas_task_abort(struct sas_task *);
int sas_eh_device_reset_handler(struct scsi_cmnd *cmd);

#endif /* _SASLIB_H_ */

0 comments on commit ad68923

Please sign in to comment.