From 8ccd6926db7dd865dd6eabe3c25ae2696bffc07e Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Mon, 3 Aug 2020 14:02:24 -0700 Subject: [PATCH] scsi: lpfc: Fix RSCN timeout due to incorrect gidft counter In configs with a large number of initiators in the same zone (>250), RSCN timeouts are seen when creating or deleting vports: lpfc 0000:07:00.1: 5:(0):0231 RSCN timeout Data: x0 x3 During RSCN processing driver issues GID_FT command to nameserver. A counter for number of simultaneous GID_FT commands is maintained (an unsigned value). The counter is incremented when the GID_FT is issued. If the GID_FT command fails for some reason the driver retries the GID_FT from the completion call back. But the counter was decremented before the retry was issued. When the second GID_FT completes, the callback again tries to decrement the counter, possibly wrapping to a very large non-zero value, which causes the RSCN cleanup code to not execute. Thus the RSCN timeout failure. Do not decrement the counter on a retry. Also add defensive checks to ensure the counter is not decremented if already zero. Link: https://lore.kernel.org/r/20200803210229.23063-4-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_ct.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index dd9f2bf54edd4..ef2015fad2d59 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -713,7 +713,8 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* This is a GID_FT completing so the gidft_inp counter was * incremented before the GID_FT was issued to the wire. */ - vport->gidft_inp--; + if (vport->gidft_inp) + vport->gidft_inp--; /* * Skip processing the NS response @@ -741,11 +742,14 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, goto out; /* CT command is being retried */ - vport->gidft_inp--; rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, vport->fc_ns_retry, type); if (rc == 0) goto out; + else { /* Unable to send NS cmd */ + if (vport->gidft_inp) + vport->gidft_inp--; + } } if (vport->fc_flag & FC_RSCN_MODE) lpfc_els_flush_rscn(vport); @@ -825,7 +829,8 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, (uint32_t) CTrsp->ReasonCode, (uint32_t) CTrsp->Explanation); } - vport->gidft_inp--; + if (vport->gidft_inp) + vport->gidft_inp--; } lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, @@ -918,7 +923,8 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* This is a GID_PT completing so the gidft_inp counter was * incremented before the GID_PT was issued to the wire. */ - vport->gidft_inp--; + if (vport->gidft_inp) + vport->gidft_inp--; /* * Skip processing the NS response @@ -942,11 +948,14 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, vport->fc_ns_retry++; /* CT command is being retried */ - vport->gidft_inp--; rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_PT, vport->fc_ns_retry, GID_PT_N_PORT); if (rc == 0) goto out; + else { /* Unable to send NS cmd */ + if (vport->gidft_inp) + vport->gidft_inp--; + } } if (vport->fc_flag & FC_RSCN_MODE) lpfc_els_flush_rscn(vport); @@ -1027,7 +1036,8 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, (uint32_t)CTrsp->ReasonCode, (uint32_t)CTrsp->Explanation); } - vport->gidft_inp--; + if (vport->gidft_inp) + vport->gidft_inp--; } lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,