Skip to content

Commit

Permalink
scsi: qla2xxx: Fix MPI failure AEN (8200) handling
Browse files Browse the repository at this point in the history
Today, upon an MPI failure AEN, on top of collecting an MPI dump, a regular
firmware dump is also taken and then chip reset. This is disruptive to IOs
and not required. Make the firmware dump collection, followed by chip
reset, optional (not done by default).

Firmware dump buffer and MPI dump buffer are independent of each
other with this change and each can have dump that was taken at two
different times for two different issues. The MPI dump is saved in a
separate buffer and is retrieved differently from firmware dump.

To collect full dump on MPI failure AEN, a module parameter is
introduced:
    ql2xfulldump_on_mpifail (default: 0)

Link: https://lore.kernel.org/r/20200331104015.24868-2-njavali@marvell.com
Reported-by: kbuild test robot <lkp@intel.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Arun Easi <aeasi@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
  • Loading branch information
Arun Easi authored and Martin K. Petersen committed Apr 24, 2020
1 parent e304142 commit cbb01c2
Show file tree
Hide file tree
Showing 7 changed files with 186 additions and 43 deletions.
30 changes: 27 additions & 3 deletions drivers/scsi/qla2xxx/qla_attr.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ qla2x00_sysfs_read_fw_dump(struct file *filp, struct kobject *kobj,
struct qla_hw_data *ha = vha->hw;
int rval = 0;

if (!(ha->fw_dump_reading || ha->mctp_dump_reading))
if (!(ha->fw_dump_reading || ha->mctp_dump_reading ||
ha->mpi_fw_dump_reading))
return 0;

mutex_lock(&ha->optrom_mutex);
Expand All @@ -42,6 +43,10 @@ qla2x00_sysfs_read_fw_dump(struct file *filp, struct kobject *kobj,
} else if (ha->mctp_dumped && ha->mctp_dump_reading) {
rval = memory_read_from_buffer(buf, count, &off, ha->mctp_dump,
MCTP_DUMP_SIZE);
} else if (ha->mpi_fw_dumped && ha->mpi_fw_dump_reading) {
rval = memory_read_from_buffer(buf, count, &off,
ha->mpi_fw_dump,
ha->mpi_fw_dump_len);
} else if (ha->fw_dump_reading) {
rval = memory_read_from_buffer(buf, count, &off, ha->fw_dump,
ha->fw_dump_len);
Expand Down Expand Up @@ -103,7 +108,6 @@ qla2x00_sysfs_write_fw_dump(struct file *filp, struct kobject *kobj,
qla82xx_set_reset_owner(vha);
qla8044_idc_unlock(ha);
} else {
ha->fw_dump_mpi = 1;
qla2x00_system_error(vha);
}
break;
Expand Down Expand Up @@ -137,6 +141,22 @@ qla2x00_sysfs_write_fw_dump(struct file *filp, struct kobject *kobj,
vha->host_no);
}
break;
case 8:
if (!ha->mpi_fw_dump_reading)
break;
ql_log(ql_log_info, vha, 0x70e7,
"MPI firmware dump cleared on (%ld).\n", vha->host_no);
ha->mpi_fw_dump_reading = 0;
ha->mpi_fw_dumped = 0;
break;
case 9:
if (ha->mpi_fw_dumped && !ha->mpi_fw_dump_reading) {
ha->mpi_fw_dump_reading = 1;
ql_log(ql_log_info, vha, 0x70e8,
"Raw MPI firmware dump ready for read on (%ld).\n",
vha->host_no);
}
break;
}
return count;
}
Expand Down Expand Up @@ -706,7 +726,8 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj,
scsi_unblock_requests(vha->host);
break;
case 0x2025d:
if (!IS_QLA81XX(ha) && !IS_QLA83XX(ha))
if (!IS_QLA81XX(ha) && !IS_QLA83XX(ha) &&
!IS_QLA27XX(ha) && !IS_QLA28XX(ha))
return -EPERM;

ql_log(ql_log_info, vha, 0x706f,
Expand All @@ -724,6 +745,8 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj,
qla83xx_idc_audit(vha, IDC_AUDIT_TIMESTAMP);
qla83xx_idc_unlock(vha, 0);
break;
} else if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
qla27xx_reset_mpi(vha);
} else {
/* Make sure FC side is not in reset */
WARN_ON_ONCE(qla2x00_wait_for_hba_online(vha) !=
Expand All @@ -737,6 +760,7 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj,
scsi_unblock_requests(vha->host);
break;
}
break;
case 0x2025e:
if (!IS_P3P_TYPE(ha) || vha != base_vha) {
ql_log(ql_log_info, vha, 0x7071,
Expand Down
13 changes: 12 additions & 1 deletion drivers/scsi/qla2xxx/qla_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -3223,6 +3223,7 @@ struct isp_operations {
uint32_t);

void (*fw_dump) (struct scsi_qla_host *, int);
void (*mpi_fw_dump)(struct scsi_qla_host *, int);

int (*beacon_on) (struct scsi_qla_host *);
int (*beacon_off) (struct scsi_qla_host *);
Expand Down Expand Up @@ -3748,6 +3749,11 @@ struct qlt_hw_data {

#define LEAK_EXCHG_THRESH_HOLD_PERCENT 75 /* 75 percent */

struct qla_hw_data_stat {
u32 num_fw_dump;
u32 num_mpi_reset;
};

/*
* Qlogic host adapter specific data structure.
*/
Expand Down Expand Up @@ -4230,7 +4236,6 @@ struct qla_hw_data {
uint32_t fw_dump_len;
u32 fw_dump_alloc_len;
bool fw_dumped;
bool fw_dump_mpi;
unsigned long fw_dump_cap_flags;
#define RISC_PAUSE_CMPL 0
#define DMA_SHUTDOWN_CMPL 1
Expand All @@ -4241,6 +4246,10 @@ struct qla_hw_data {
#define ISP_MBX_RDY 6
#define ISP_SOFT_RESET_CMPL 7
int fw_dump_reading;
void *mpi_fw_dump;
u32 mpi_fw_dump_len;
int mpi_fw_dump_reading:1;
int mpi_fw_dumped:1;
int prev_minidump_failed;
dma_addr_t eft_dma;
void *eft;
Expand Down Expand Up @@ -4454,6 +4463,8 @@ struct qla_hw_data {
uint16_t last_zio_threshold;

#define DEFAULT_ZIO_THRESHOLD 5

struct qla_hw_data_stat stat;
};

struct active_regions {
Expand Down
3 changes: 3 additions & 0 deletions drivers/scsi/qla2xxx/qla_gbl.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ extern int ql2xenablemsix;
extern int qla2xuseresexchforels;
extern int ql2xexlogins;
extern int ql2xdifbundlinginternalbuffers;
extern int ql2xfulldump_on_mpifail;

extern int qla2x00_loop_reset(scsi_qla_host_t *);
extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int);
Expand Down Expand Up @@ -645,6 +646,7 @@ extern void qla82xx_fw_dump(scsi_qla_host_t *, int);
extern void qla8044_fw_dump(scsi_qla_host_t *, int);

extern void qla27xx_fwdump(scsi_qla_host_t *, int);
extern void qla27xx_mpi_fwdump(scsi_qla_host_t *, int);
extern ulong qla27xx_fwdt_calculate_dump_size(struct scsi_qla_host *, void *);
extern int qla27xx_fwdt_template_valid(void *);
extern ulong qla27xx_fwdt_template_size(void *);
Expand Down Expand Up @@ -933,5 +935,6 @@ extern void qla24xx_process_purex_list(struct purex_list *);

/* nvme.c */
void qla_nvme_unregister_remote_port(struct fc_port *fcport);
void qla27xx_reset_mpi(scsi_qla_host_t *vha);
void qla_handle_els_plogi_done(scsi_qla_host_t *vha, struct event_arg *ea);
#endif /* _QLA_GBL_H */
2 changes: 2 additions & 0 deletions drivers/scsi/qla2xxx/qla_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -3339,6 +3339,8 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha)
dump_size / 1024);

if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
ha->mpi_fw_dump = (char *)fw_dump +
ha->fwdt[1].dump_size;
mutex_unlock(&ha->optrom_mutex);
return;
}
Expand Down
54 changes: 37 additions & 17 deletions drivers/scsi/qla2xxx/qla_isr.c
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,39 @@ qla2x00_find_fcport_by_nportid(scsi_qla_host_t *vha, port_id_t *id,
return NULL;
}

/* Shall be called only on supported adapters. */
static void
qla27xx_handle_8200_aen(scsi_qla_host_t *vha, uint16_t *mb)
{
struct qla_hw_data *ha = vha->hw;
bool reset_isp_needed = 0;

ql_log(ql_log_warn, vha, 0x02f0,
"MPI Heartbeat stop. MPI reset is%s needed. "
"MB0[%xh] MB1[%xh] MB2[%xh] MB3[%xh]\n",
mb[0] & BIT_8 ? "" : " not",
mb[0], mb[1], mb[2], mb[3]);

if ((mb[1] & BIT_8) == 0)
return;

ql_log(ql_log_warn, vha, 0x02f1,
"MPI Heartbeat stop. FW dump needed\n");

if (ql2xfulldump_on_mpifail) {
ha->isp_ops->fw_dump(vha, 1);
reset_isp_needed = 1;
}

ha->isp_ops->mpi_fw_dump(vha, 1);

if (reset_isp_needed) {
vha->hw->flags.fw_init_done = 0;
set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
qla2xxx_wake_dpc(vha);
}
}

/**
* qla2x00_async_event() - Process aynchronous events.
* @vha: SCSI driver HA context
Expand Down Expand Up @@ -872,9 +905,9 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
"ISP System Error - mbx1=%xh mbx2=%xh mbx3=%xh.\n ",
mb[1], mb[2], mb[3]);

ha->fw_dump_mpi =
(IS_QLA27XX(ha) || IS_QLA28XX(ha)) &&
RD_REG_WORD(&reg24->mailbox7) & BIT_8;
if ((IS_QLA27XX(ha) || IS_QLA28XX(ha)) &&
RD_REG_WORD(&reg24->mailbox7) & BIT_8)
ha->isp_ops->mpi_fw_dump(vha, 1);
ha->isp_ops->fw_dump(vha, 1);
ha->flags.fw_init_done = 0;
QLA_FW_STOPPED(ha);
Expand Down Expand Up @@ -1375,20 +1408,7 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)

case MBA_IDC_AEN:
if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
ha->flags.fw_init_done = 0;
ql_log(ql_log_warn, vha, 0xffff,
"MPI Heartbeat stop. Chip reset needed. MB0[%xh] MB1[%xh] MB2[%xh] MB3[%xh]\n",
mb[0], mb[1], mb[2], mb[3]);

if ((mb[1] & BIT_8) ||
(mb[2] & BIT_8)) {
ql_log(ql_log_warn, vha, 0xd013,
"MPI Heartbeat stop. FW dump needed\n");
ha->fw_dump_mpi = 1;
ha->isp_ops->fw_dump(vha, 1);
}
set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
qla2xxx_wake_dpc(vha);
qla27xx_handle_8200_aen(vha, mb);
} else if (IS_QLA83XX(ha)) {
mb[4] = RD_REG_WORD(&reg24->mailbox4);
mb[5] = RD_REG_WORD(&reg24->mailbox5);
Expand Down
6 changes: 6 additions & 0 deletions drivers/scsi/qla2xxx/qla_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ static int apidev_major;
*/
struct kmem_cache *srb_cachep;

int ql2xfulldump_on_mpifail;
module_param(ql2xfulldump_on_mpifail, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(ql2xfulldump_on_mpifail,
"Set this to take full dump on MPI hang.");

/*
* CT6 CTX allocation cache
*/
Expand Down Expand Up @@ -2518,6 +2523,7 @@ static struct isp_operations qla27xx_isp_ops = {
.read_nvram = NULL,
.write_nvram = NULL,
.fw_dump = qla27xx_fwdump,
.mpi_fw_dump = qla27xx_mpi_fwdump,
.beacon_on = qla24xx_beacon_on,
.beacon_off = qla24xx_beacon_off,
.beacon_blink = qla83xx_beacon_blink,
Expand Down
Loading

0 comments on commit cbb01c2

Please sign in to comment.