Skip to content

Commit

Permalink
RDMA/bnxt_re: handle command completions after driver detect a timedout
Browse files Browse the repository at this point in the history
If calling context detect command timeout, associated memory stored on
stack will not be valid. If firmware complete the same command later,
this causes incorrect memory access by driver.

Added is_waiter_alive to handle delayed completion by firmware.
is_waiter_alive is set and reset under command queue lock.

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Link: https://lore.kernel.org/r/1686308514-11996-11-git-send-email-selvin.xavier@broadcom.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
  • Loading branch information
Kashyap Desai authored and Leon Romanovsky committed Jun 12, 2023
1 parent 354f5bd commit 691eb7c
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 26 deletions.
59 changes: 33 additions & 26 deletions drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
{
struct bnxt_qplib_cmdq_ctx *cmdq;
u16 cbit;
int ret;

cmdq = &rcfw->cmdq;
cbit = cookie % rcfw->cmdq_depth;
Expand All @@ -115,9 +114,9 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
return bnxt_qplib_map_rc(opcode);

/* Non zero means command completed */
ret = wait_event_timeout(cmdq->waitq,
!test_bit(cbit, cmdq->cmdq_bitmap),
msecs_to_jiffies(10000));
wait_event_timeout(cmdq->waitq,
!test_bit(cbit, cmdq->cmdq_bitmap),
msecs_to_jiffies(10000));

if (!test_bit(cbit, cmdq->cmdq_bitmap))
return 0;
Expand Down Expand Up @@ -170,7 +169,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
static int __send_message(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_cmdqmsg *msg)
{
u32 bsize, opcode, free_slots, required_slots;
u32 bsize, free_slots, required_slots;
struct bnxt_qplib_cmdq_ctx *cmdq;
struct bnxt_qplib_crsqe *crsqe;
struct bnxt_qplib_cmdqe *cmdqe;
Expand All @@ -185,8 +184,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
hwq = &cmdq->hwq;
pdev = rcfw->pdev;

opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz);

if (test_bit(FIRMWARE_TIMED_OUT, &cmdq->flags))
return -ETIMEDOUT;

Expand Down Expand Up @@ -216,6 +213,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
crsqe->free_slots = free_slots;
crsqe->resp = (struct creq_qp_event *)msg->resp;
crsqe->resp->cookie = cpu_to_le16(cookie);
crsqe->is_waiter_alive = true;
crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz);
if (__get_cmdq_base_resp_size(msg->req, msg->req_sz) && msg->sb) {
struct bnxt_qplib_rcfw_sbuf *sbuf = msg->sb;
Expand Down Expand Up @@ -347,7 +345,9 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_cmdqmsg *msg)
{
struct creq_qp_event *evnt = (struct creq_qp_event *)msg->resp;
u16 cookie;
struct bnxt_qplib_crsqe *crsqe;
unsigned long flags;
u16 cookie, cbit;
int rc = 0;
u8 opcode;

Expand All @@ -363,6 +363,7 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,

cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz))
& RCFW_MAX_COOKIE_VALUE;
cbit = cookie % rcfw->cmdq_depth;

if (msg->block)
rc = __block_for_resp(rcfw, cookie, opcode);
Expand All @@ -378,6 +379,14 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
return rc;
}

if (rc) {
spin_lock_irqsave(&rcfw->cmdq.hwq.lock, flags);
crsqe = &rcfw->crsqe_tbl[cbit];
crsqe->is_waiter_alive = false;
spin_unlock_irqrestore(&rcfw->cmdq.hwq.lock, flags);
return -ETIMEDOUT;
}

if (evnt->status) {
/* failed with status */
dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
Expand Down Expand Up @@ -480,15 +489,15 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
struct creq_qp_error_notification *err_event;
struct bnxt_qplib_hwq *hwq = &rcfw->cmdq.hwq;
struct bnxt_qplib_crsqe *crsqe;
u32 qp_id, tbl_indx, req_size;
struct bnxt_qplib_qp *qp;
u16 cbit, blocked = 0;
bool is_waiter_alive;
struct pci_dev *pdev;
unsigned long flags;
u32 wait_cmds = 0;
__le16 mcookie;
u16 cookie;
int rc = 0;
u32 qp_id, tbl_indx;

pdev = rcfw->pdev;
switch (qp_event->event) {
Expand Down Expand Up @@ -520,31 +529,29 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
spin_lock_irqsave_nested(&hwq->lock, flags,
SINGLE_DEPTH_NESTING);
cookie = le16_to_cpu(qp_event->cookie);
mcookie = qp_event->cookie;
blocked = cookie & RCFW_CMD_IS_BLOCKING;
cookie &= RCFW_MAX_COOKIE_VALUE;
cbit = cookie % rcfw->cmdq_depth;
crsqe = &rcfw->crsqe_tbl[cbit];
if (crsqe->resp &&
crsqe->resp->cookie == mcookie) {
memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
crsqe->resp = NULL;
} else {
if (crsqe->resp && crsqe->resp->cookie)
dev_err(&pdev->dev,
"CMD %s cookie sent=%#x, recd=%#x\n",
crsqe->resp ? "mismatch" : "collision",
crsqe->resp ? crsqe->resp->cookie : 0,
mcookie);
}
if (!test_and_clear_bit(cbit, rcfw->cmdq.cmdq_bitmap))
dev_warn(&pdev->dev,
"CMD bit %d was not requested\n", cbit);
hwq->cons += crsqe->req_size;

if (crsqe->is_waiter_alive) {
if (crsqe->resp)
memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
if (!blocked)
wait_cmds++;
}

req_size = crsqe->req_size;
is_waiter_alive = crsqe->is_waiter_alive;

crsqe->req_size = 0;
if (!is_waiter_alive)
crsqe->resp = NULL;

if (!blocked)
wait_cmds++;
hwq->cons += req_size;
spin_unlock_irqrestore(&hwq->lock, flags);
}
*num_wait += wait_cmds;
Expand Down
1 change: 1 addition & 0 deletions drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ struct bnxt_qplib_crsqe {
u32 req_size;
/* Free slots at the time of submission */
u32 free_slots;
bool is_waiter_alive;
};

struct bnxt_qplib_rcfw_sbuf {
Expand Down

0 comments on commit 691eb7c

Please sign in to comment.