From f10f582d28220f50099d3f561116256267821429 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 8 Feb 2022 12:54:48 -0600 Subject: [PATCH 1/3] scsi: qedi: Fix ABBA deadlock in qedi_process_tmf_resp() and qedi_process_cmd_cleanup_resp() This fixes a deadlock added with commit b40f3894e39e ("scsi: qedi: Complete TMF works before disconnect") Bug description from Jia-Ju Bai: qedi_process_tmf_resp() spin_lock(&session->back_lock); --> Line 201 (Lock A) spin_lock(&qedi_conn->tmf_work_lock); --> Line 230 (Lock B) qedi_process_cmd_cleanup_resp() spin_lock_bh(&qedi_conn->tmf_work_lock); --> Line 752 (Lock B) spin_lock_bh(&conn->session->back_lock); --> Line 784 (Lock A) When qedi_process_tmf_resp() and qedi_process_cmd_cleanup_resp() are concurrently executed, the deadlock can occur. This patch fixes the deadlock by not holding the tmf_work_lock in qedi_process_cmd_cleanup_resp while holding the back_lock. The tmf_work_lock is only needed while we remove the tmf_work from the work_list. Link: https://lore.kernel.org/r/20220208185448.6206-1-michael.christie@oracle.com Fixes: b40f3894e39e ("scsi: qedi: Complete TMF works before disconnect") Cc: Manish Rangankar Cc: Nilesh Javali Reported-by: TOTE Robot Reported-by: Jia-Ju Bai Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_fw.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index 5916ed7662d56..4eb89aa4a39dc 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -771,11 +771,10 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, qedi_cmd->list_tmf_work = NULL; } } + spin_unlock_bh(&qedi_conn->tmf_work_lock); - if (!found) { - spin_unlock_bh(&qedi_conn->tmf_work_lock); + if (!found) goto check_cleanup_reqs; - } QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n", @@ -806,7 +805,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, qedi_cmd->state = CLEANUP_RECV; unlock: spin_unlock_bh(&conn->session->back_lock); - spin_unlock_bh(&qedi_conn->tmf_work_lock); wake_up_interruptible(&qedi_conn->wait_queue); return; From 7f4c5a26f735dea4bbc0eb8eb9da99cda95a8563 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 12 Feb 2022 08:31:20 -0800 Subject: [PATCH 2/3] scsi: lpfc: Fix pt2pt NVMe PRLI reject LOGO loop When connected point to point, the driver does not know the FC4's supported by the other end. In Fabrics, it can query the nameserver. Thus the driver must send PRLIs for the FC4s it supports and enable support based on the acc(ept) or rej(ect) of the respective FC4 PRLI. Currently the driver supports SCSI and NVMe PRLIs. Unfortunately, although the behavior is per standard, many devices have come to expect only SCSI PRLIs. In this particular example, the NVMe PRLI is properly RJT'd but the target decided that it must LOGO after seeing the unexpected NVMe PRLI. The LOGO causes the sequence to restart and login is now in an infinite failure loop. Fix the problem by having the driver, on a pt2pt link, remember NVMe PRLI accept or reject status across logout as long as the link stays "up". When retrying login, if the prior NVMe PRLI was rejected, it will not be sent on the next login. Link: https://lore.kernel.org/r/20220212163120.15385-1-jsmart2021@gmail.com Cc: # v5.4+ Reviewed-by: Ewan D. Milne Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 1 + drivers/scsi/lpfc/lpfc_attr.c | 3 +++ drivers/scsi/lpfc/lpfc_els.c | 20 +++++++++++++++++++- drivers/scsi/lpfc/lpfc_nportdisc.c | 5 +++-- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index a1e0a106c132b..98cabe09c0404 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -592,6 +592,7 @@ struct lpfc_vport { #define FC_VPORT_LOGO_RCVD 0x200 /* LOGO received on vport */ #define FC_RSCN_DISCOVERY 0x400 /* Auth all devices after RSCN */ #define FC_LOGO_RCVD_DID_CHNG 0x800 /* FDISC on phys port detect DID chng*/ +#define FC_PT2PT_NO_NVME 0x1000 /* Don't send NVME PRLI */ #define FC_SCSI_SCAN_TMO 0x4000 /* scsi scan timer running */ #define FC_ABORT_DISCOVERY 0x8000 /* we want to abort discovery */ #define FC_NDISC_ACTIVE 0x10000 /* NPort discovery active */ diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index bac78fbce8d6e..fa8415259cb8a 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1315,6 +1315,9 @@ lpfc_issue_lip(struct Scsi_Host *shost) pmboxq->u.mb.mbxCommand = MBX_DOWN_LINK; pmboxq->u.mb.mbxOwner = OWN_HOST; + if ((vport->fc_flag & FC_PT2PT) && (vport->fc_flag & FC_PT2PT_NO_NVME)) + vport->fc_flag &= ~FC_PT2PT_NO_NVME; + mbxstatus = lpfc_sli_issue_mbox_wait(phba, pmboxq, LPFC_MBOX_TMO * 2); if ((mbxstatus == MBX_SUCCESS) && diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index db5ccae1b63da..f936833c99099 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1072,7 +1072,8 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* FLOGI failed, so there is no fabric */ spin_lock_irq(shost->host_lock); - vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); + vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP | + FC_PT2PT_NO_NVME); spin_unlock_irq(shost->host_lock); /* If private loop, then allow max outstanding els to be @@ -4607,6 +4608,23 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* Added for Vendor specifc support * Just keep retrying for these Rsn / Exp codes */ + if ((vport->fc_flag & FC_PT2PT) && + cmd == ELS_CMD_NVMEPRLI) { + switch (stat.un.b.lsRjtRsnCode) { + case LSRJT_UNABLE_TPC: + case LSRJT_INVALID_CMD: + case LSRJT_LOGICAL_ERR: + case LSRJT_CMD_UNSUPPORTED: + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "0168 NVME PRLI LS_RJT " + "reason %x port doesn't " + "support NVME, disabling NVME\n", + stat.un.b.lsRjtRsnCode); + retry = 0; + vport->fc_flag |= FC_PT2PT_NO_NVME; + goto out_retry; + } + } switch (stat.un.b.lsRjtRsnCode) { case LSRJT_UNABLE_TPC: /* The driver has a VALID PLOGI but the rport has diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 7d717a4ac14d1..fdf5e777bf113 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -1961,8 +1961,9 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport, * is configured try it. */ ndlp->nlp_fc4_type |= NLP_FC4_FCP; - if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) || - (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) { + if ((!(vport->fc_flag & FC_PT2PT_NO_NVME)) && + (vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH || + vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) { ndlp->nlp_fc4_type |= NLP_FC4_NVME; /* We need to update the localport also */ lpfc_nvme_update_localport(vport); From 10af115646171afc0217177d6eae92917b785897 Mon Sep 17 00:00:00 2001 From: Jinyoung Choi Date: Mon, 14 Feb 2022 19:33:52 +0900 Subject: [PATCH 3/3] scsi: ufs: core: Fix divide by zero in ufshcd_map_queues() Before calling blk_mq_map_queues(), the mq_map and nr_queues belonging to struct blk_mq_queue_map must have a valid value. If nr_queues is set to 0, the system may encounter a divide by zero depending on the type of architecture. blk_mq_map_queues() -> queue_index() Link: https://lore.kernel.org/r/1891546521.01644873481638.JavaMail.epsvc@epcpadp4 Reviewed-by: Bart Van Assche Signed-off-by: Jinyoung Choi Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 50b12d60dc1b2..9349557b8a01b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2681,7 +2681,7 @@ static int ufshcd_map_queues(struct Scsi_Host *shost) break; case HCTX_TYPE_READ: map->nr_queues = 0; - break; + continue; default: WARN_ON_ONCE(true); }