rdma/cxgb4: Add support for kernel mode SRQ's

This patch implements the srq specific verbs such as create/destroy/modify and post_srq_recv. And adds srq specific structures and defines to t4.h and uapi. Also updates the cq poll logic to deal with completions that are associated with the SRQ's. This patch also handles kernel mode SRQ_LIMIT events as well as flushed SRQ buffers Signed-off-by: Raju Rangoju <rajur@chelsio.com> Reviewed-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
mariux64 · Jul 26, 2018 · 6a0b617 · 6a0b617
1 parent 7fc7a7c
commit 6a0b617
Show file tree

Hide file tree

Showing 8 changed files with 929 additions and 167 deletions.
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1853,10 +1853,34 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
 	return 0;
 }
 
+static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx_status)
+{
+	enum chip_type adapter_type;
+	u32 srqidx;
+	u8 status;
+
+	adapter_type = ep->com.dev->rdev.lldi.adapter_type;
+	status = ABORT_RSS_STATUS_G(be32_to_cpu(srqidx_status));
+	srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status));
+
+	/*
+	 * If this TCB had a srq buffer cached, then we must complete
+	 * it. For user mode, that means saving the srqidx in the
+	 * user/kernel status page for this qp.  For kernel mode, just
+	 * synthesize the CQE now.
+	 */
+	if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
+		if (ep->com.qp->ibqp.uobject)
+			t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
+		else
+			c4iw_flush_srqidx(ep->com.qp, srqidx);
+	}
+}
+
 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 {
 	struct c4iw_ep *ep;
-	struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
+	struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
 	int release = 0;
 	unsigned int tid = GET_TID(rpl);
 
@@ -1865,6 +1889,9 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 		pr_warn("Abort rpl to freed endpoint\n");
 		return 0;
 	}
+
+	complete_cached_srq_buffers(ep, rpl->srqidx_status);
+
 	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 	mutex_lock(&ep->com.mutex);
 	switch (ep->com.state) {
@@ -2719,28 +2746,35 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
 
 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 {
-	struct cpl_abort_req_rss *req = cplhdr(skb);
+	struct cpl_abort_req_rss6 *req = cplhdr(skb);
 	struct c4iw_ep *ep;
 	struct sk_buff *rpl_skb;
 	struct c4iw_qp_attributes attrs;
 	int ret;
 	int release = 0;
 	unsigned int tid = GET_TID(req);
+	u8 status;
+
 	u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
 
 	ep = get_ep_from_tid(dev, tid);
 	if (!ep)
 		return 0;
 
-	if (cxgb_is_neg_adv(req->status)) {
+	status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
+
+	if (cxgb_is_neg_adv(status)) {
 		pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
-			 ep->hwtid, req->status, neg_adv_str(req->status));
+			 ep->hwtid, status, neg_adv_str(status));
 		ep->stats.abort_neg_adv++;
 		mutex_lock(&dev->rdev.stats.lock);
 		dev->rdev.stats.neg_adv++;
 		mutex_unlock(&dev->rdev.stats.lock);
 		goto deref_ep;
 	}
+
+	complete_cached_srq_buffers(ep, req->srqidx_status);
+
 	pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
 		 ep->com.state);
 	set_bit(PEER_ABORT, &ep->com.history);

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
@@ -182,7 +182,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	return ret;
 }
 
-static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
+static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx)
 {
 	struct t4_cqe cqe;
 
@@ -195,6 +195,8 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
 				 CQE_SWCQE_V(1) |
 				 CQE_QPID_V(wq->sq.qid));
 	cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
+	if (srqidx)
+		cqe.u.srcqe.abs_rqe_idx = cpu_to_be32(srqidx);
 	cq->sw_queue[cq->sw_pidx] = cqe;
 	t4_swcq_produce(cq);
 }
@@ -207,7 +209,7 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
 	pr_debug("wq %p cq %p rq.in_use %u skip count %u\n",
 		 wq, cq, wq->rq.in_use, count);
 	while (in_use--) {
-		insert_recv_cqe(wq, cq);
+		insert_recv_cqe(wq, cq, 0);
 		flushed++;
 	}
 	return flushed;
@@ -458,6 +460,72 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
 	pr_debug("cq %p count %d\n", cq, *count);
 }
 
+static void post_pending_srq_wrs(struct t4_srq *srq)
+{
+	struct t4_srq_pending_wr *pwr;
+	u16 idx = 0;
+
+	while (srq->pending_in_use) {
+		pwr = &srq->pending_wrs[srq->pending_cidx];
+		srq->sw_rq[srq->pidx].wr_id = pwr->wr_id;
+		srq->sw_rq[srq->pidx].valid = 1;
+
+		pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
+			 __func__,
+			 srq->cidx, srq->pidx, srq->wq_pidx,
+			 srq->in_use, srq->size,
+			 (unsigned long long)pwr->wr_id);
+
+		c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16);
+		t4_srq_consume_pending_wr(srq);
+		t4_srq_produce(srq, pwr->len16);
+		idx += DIV_ROUND_UP(pwr->len16 * 16, T4_EQ_ENTRY_SIZE);
+	}
+
+	if (idx) {
+		t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe);
+		srq->queue[srq->size].status.host_wq_pidx =
+			srq->wq_pidx;
+	}
+}
+
+static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq)
+{
+	int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx;
+	u64 wr_id;
+
+	srq->sw_rq[rel_idx].valid = 0;
+	wr_id = srq->sw_rq[rel_idx].wr_id;
+
+	if (rel_idx == srq->cidx) {
+		pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
+			 __func__, rel_idx, srq->cidx, srq->pidx,
+			 srq->wq_pidx, srq->in_use, srq->size,
+			 (unsigned long long)srq->sw_rq[rel_idx].wr_id);
+		t4_srq_consume(srq);
+		while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) {
+			pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
+				 __func__, srq->cidx, srq->pidx,
+				 srq->wq_pidx, srq->in_use,
+				 srq->size, srq->ooo_count,
+				 (unsigned long long)
+				 srq->sw_rq[srq->cidx].wr_id);
+			t4_srq_consume_ooo(srq);
+		}
+		if (srq->ooo_count == 0 && srq->pending_in_use)
+			post_pending_srq_wrs(srq);
+	} else {
+		pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
+			 __func__, rel_idx, srq->cidx,
+			 srq->pidx, srq->wq_pidx,
+			 srq->in_use, srq->size,
+			 srq->ooo_count,
+			 (unsigned long long)srq->sw_rq[rel_idx].wr_id);
+		t4_srq_produce_ooo(srq);
+	}
+	return wr_id;
+}
+
 /*
  * poll_cq
  *
@@ -475,7 +543,8 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
  *    -EOVERFLOW    CQ overflow detected.
  */
 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
-		   u8 *cqe_flushed, u64 *cookie, u32 *credit)
+		   u8 *cqe_flushed, u64 *cookie, u32 *credit,
+		   struct t4_srq *srq)
 {
 	int ret = 0;
 	struct t4_cqe *hw_cqe, read_cqe;
@@ -540,7 +609,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 		 */
 		if (CQE_TYPE(hw_cqe) == 1) {
 			if (CQE_STATUS(hw_cqe))
-				t4_set_wq_in_error(wq);
+				t4_set_wq_in_error(wq, 0);
 			ret = -EAGAIN;
 			goto skip_cqe;
 		}
@@ -551,7 +620,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 		 */
 		if (CQE_WRID_STAG(hw_cqe) == 1) {
 			if (CQE_STATUS(hw_cqe))
-				t4_set_wq_in_error(wq);
+				t4_set_wq_in_error(wq, 0);
 			ret = -EAGAIN;
 			goto skip_cqe;
 		}
@@ -576,7 +645,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 
 	if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
 		*cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
-		t4_set_wq_in_error(wq);
+		t4_set_wq_in_error(wq, 0);
 	}
 
 	/*
@@ -590,15 +659,9 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 		 * then we complete this with T4_ERR_MSN and mark the wq in
 		 * error.
 		 */
-
-		if (t4_rq_empty(wq)) {
-			t4_set_wq_in_error(wq);
-			ret = -EAGAIN;
-			goto skip_cqe;
-		}
 		if (unlikely(!CQE_STATUS(hw_cqe) &&
 			     CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
-			t4_set_wq_in_error(wq);
+			t4_set_wq_in_error(wq, 0);
 			hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
 		}
 		goto proc_cqe;
@@ -657,11 +720,16 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 			c4iw_log_wr_stats(wq, hw_cqe);
 		t4_sq_consume(wq);
 	} else {
-		pr_debug("completing rq idx %u\n", wq->rq.cidx);
-		*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
-		if (c4iw_wr_log)
-			c4iw_log_wr_stats(wq, hw_cqe);
-		t4_rq_consume(wq);
+		if (!srq) {
+			pr_debug("completing rq idx %u\n", wq->rq.cidx);
+			*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
+			if (c4iw_wr_log)
+				c4iw_log_wr_stats(wq, hw_cqe);
+			t4_rq_consume(wq);
+		} else {
+			*cookie = reap_srq_cqe(hw_cqe, srq);
+		}
+		wq->rq.msn++;
 		goto skip_cqe;
 	}
 
@@ -685,7 +753,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 }
 
 static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
-			      struct ib_wc *wc)
+			      struct ib_wc *wc, struct c4iw_srq *srq)
 {
 	struct t4_cqe uninitialized_var(cqe);
 	struct t4_wq *wq = qhp ? &qhp->wq : NULL;
@@ -694,7 +762,8 @@ static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
 	u64 cookie = 0;
 	int ret;
 
-	ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
+	ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit,
+		      srq ? &srq->wq : NULL);
 	if (ret)
 		goto out;
 
@@ -703,6 +772,13 @@ static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
 	wc->vendor_err = CQE_STATUS(&cqe);
 	wc->wc_flags = 0;
 
+	/*
+	 * Simulate a SRQ_LIMIT_REACHED HW notification if required.
+	 */
+	if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed &&
+	    srq->wq.in_use < srq->srq_limit)
+		c4iw_dispatch_srq_limit_reached_event(srq);
+
 	pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
 		 CQE_QPID(&cqe),
 		 CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
@@ -828,6 +904,7 @@ static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
  */
 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 {
+	struct c4iw_srq *srq = NULL;
 	struct c4iw_qp *qhp = NULL;
 	struct t4_cqe *rd_cqe;
 	int ret;
@@ -840,10 +917,15 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 	qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
 	if (qhp) {
 		spin_lock(&qhp->lock);
-		ret = __c4iw_poll_cq_one(chp, qhp, wc);
+		srq = qhp->srq;
+		if (srq)
+			spin_lock(&srq->lock);
+		ret = __c4iw_poll_cq_one(chp, qhp, wc, srq);
 		spin_unlock(&qhp->lock);
+		if (srq)
+			spin_unlock(&srq->lock);
 	} else {
-		ret = __c4iw_poll_cq_one(chp, NULL, wc);
+		ret = __c4iw_poll_cq_one(chp, NULL, wc, NULL);
 	}
 	return ret;
 }
@@ -1078,3 +1160,19 @@ int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 	spin_unlock_irqrestore(&chp->lock, flag);
 	return ret;
 }
+
+void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx)
+{
+	struct c4iw_cq *rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
+	unsigned long flag;
+
+	/* locking heirarchy: cq lock first, then qp lock. */
+	spin_lock_irqsave(&rchp->lock, flag);
+	spin_lock(&qhp->lock);
+
+	/* create a SRQ RECV CQE for srqidx */
+	insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx);
+
+	spin_unlock(&qhp->lock);
+	spin_unlock_irqrestore(&rchp->lock, flag);
+}