Skip to content

Commit

Permalink
RDMA/qedr: SRQ's bug fixes
Browse files Browse the repository at this point in the history
QP's with the same SRQ, working on different CQs and running in parallel
on different CPUs could lead to a race when maintaining the SRQ consumer
count, and leads to FW running out of SRQs. Update the consumer
atomically.  Make sure the wqe_prod is updated after the sge_prod due to
FW requirements.

Fixes: 3491c9e ("qedr: Add support for kernel mode SRQ's")
Link: https://lore.kernel.org/r/20200708195526.31040-1-ybason@marvell.com
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Yuval Basson <ybason@marvell.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
  • Loading branch information
Yuval Basson authored and Jason Gunthorpe committed Jul 16, 2020
1 parent 317000b commit acca72e
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 14 deletions.
4 changes: 2 additions & 2 deletions drivers/infiniband/hw/qedr/qedr.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,10 +344,10 @@ struct qedr_srq_hwq_info {
u32 wqe_prod;
u32 sge_prod;
u32 wr_prod_cnt;
u32 wr_cons_cnt;
atomic_t wr_cons_cnt;
u32 num_elems;

u32 *virt_prod_pair_addr;
struct rdma_srq_producers *virt_prod_pair_addr;
dma_addr_t phy_prod_pair_addr;
};

Expand Down
22 changes: 10 additions & 12 deletions drivers/infiniband/hw/qedr/verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -3686,7 +3686,7 @@ static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
* count and consumer count and subtract it from max
* work request supported so that we get elements left.
*/
used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt);

return hw_srq->max_wr - used;
}
Expand All @@ -3701,7 +3701,6 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
unsigned long flags;
int status = 0;
u32 num_sge;
u32 offset;

spin_lock_irqsave(&srq->lock, flags);

Expand All @@ -3714,7 +3713,8 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
if (!qedr_srq_elem_left(hw_srq) ||
wr->num_sge > srq->hw_srq.max_sges) {
DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n",
hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
hw_srq->wr_prod_cnt,
atomic_read(&hw_srq->wr_cons_cnt),
wr->num_sge, srq->hw_srq.max_sges);
status = -ENOMEM;
*bad_wr = wr;
Expand Down Expand Up @@ -3748,22 +3748,20 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
hw_srq->sge_prod++;
}

/* Flush WQE and SGE information before
/* Update WQE and SGE information before
* updating producer.
*/
wmb();
dma_wmb();

/* SRQ producer is 8 bytes. Need to update SGE producer index
* in first 4 bytes and need to update WQE producer in
* next 4 bytes.
*/
*srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
offset = offsetof(struct rdma_srq_producers, wqe_prod);
*((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
hw_srq->wqe_prod;
srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod;
/* Make sure sge producer is updated first */
dma_wmb();
srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod;

/* Flush producer after updating it. */
wmb();
wr = wr->next;
}

Expand Down Expand Up @@ -4182,7 +4180,7 @@ static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
} else {
__process_resp_one(dev, qp, cq, wc, resp, wr_id);
}
srq->hw_srq.wr_cons_cnt++;
atomic_inc(&srq->hw_srq.wr_cons_cnt);

return 1;
}
Expand Down

0 comments on commit acca72e

Please sign in to comment.