Skip to content

Commit

Permalink
IB/hfi1: Add interlock between TID RDMA WRITE and other requests
Browse files Browse the repository at this point in the history
This locking mechanism is designed to provent vavious memory corruption
scenarios from occurring when requests are pipelined, especially when
RDMA WRITE requests are interleaved with TID RDMA READ requests:
1. READ-AFTER-READ;
2. READ-AFTER-WRITE;
3. WRITE-AFTER-READ;
4. WRITE-AFTER-WRITE.
When memory corruption is likely, a request will be held back until
previous requests have been completed.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
  • Loading branch information
Kaike Wan authored and Doug Ledford committed Feb 5, 2019
1 parent 3c6cb20 commit c6c2311
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 2 deletions.
6 changes: 6 additions & 0 deletions drivers/infiniband/hw/hfi1/rc.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
}

e = &qp->s_ack_queue[qp->s_tail_ack_queue];
/* Check for tid write fence */
if ((qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK) ||
hfi1_tid_rdma_ack_interlock(qp, e)) {
iowait_set_flag(&qpriv->s_iowait, IOWAIT_PENDING_IB);
goto bail;
}
if (e->opcode == OP(RDMA_READ_REQUEST)) {
/*
* If a RDMA read response is being resent and
Expand Down
46 changes: 44 additions & 2 deletions drivers/infiniband/hw/hfi1/tid_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -2179,6 +2179,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
req->state = TID_REQUEST_RESEND;
req->cur_seg = req->comp_seg;
}
qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
}
/* Re-process old requests.*/
if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
Expand Down Expand Up @@ -3229,6 +3230,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
struct rvt_swqe *prev;
struct hfi1_qp_priv *priv = qp->priv;
u32 s_prev;
struct tid_rdma_request *req;

s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
prev = rvt_get_swqe_ptr(qp, s_prev);
Expand All @@ -3240,14 +3242,28 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
case IB_WR_RDMA_WRITE:
switch (prev->wr.opcode) {
case IB_WR_TID_RDMA_WRITE:
req = wqe_to_tid_req(prev);
if (req->ack_seg != req->total_segs)
goto interlock;
default:
break;
}
case IB_WR_RDMA_READ:
break;
if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
break;
/* fall through */
case IB_WR_TID_RDMA_READ:
switch (prev->wr.opcode) {
case IB_WR_RDMA_READ:
if (qp->s_acked != qp->s_cur)
goto interlock;
break;
case IB_WR_TID_RDMA_WRITE:
req = wqe_to_tid_req(prev);
if (req->ack_seg != req->total_segs)
goto interlock;
default:
break;
}
Expand Down Expand Up @@ -5157,7 +5173,9 @@ static int make_tid_rdma_ack(struct rvt_qp *qp,
e = &qp->s_ack_queue[qpriv->r_tid_ack];
req = ack_to_tid_req(e);
flow = req->acked_tail;
}
} else if (req->ack_seg == req->total_segs &&
qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK)
qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;

hwords += hfi1_build_tid_rdma_write_ack(qp, e, ohdr, flow, &bth1,
&bth2);
Expand Down Expand Up @@ -5310,3 +5328,27 @@ bool hfi1_schedule_tid_send(struct rvt_qp *qp)
IOWAIT_PENDING_TID);
return false;
}

bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e)
{
struct rvt_ack_entry *prev;
struct tid_rdma_request *req;
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
u32 s_prev;

s_prev = qp->s_tail_ack_queue == 0 ? rvt_size_atomic(&dev->rdi) :
(qp->s_tail_ack_queue - 1);
prev = &qp->s_ack_queue[s_prev];

if ((e->opcode == TID_OP(READ_REQ) ||
e->opcode == OP(RDMA_READ_REQUEST)) &&
prev->opcode == TID_OP(WRITE_REQ)) {
req = ack_to_tid_req(prev);
if (req->ack_seg != req->total_segs) {
priv->s_flags |= HFI1_R_TID_WAIT_INTERLCK;
return true;
}
}
return false;
}
9 changes: 9 additions & 0 deletions drivers/infiniband/hw/hfi1/tid_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,23 @@
* s_flags, there are no collisions.
*
* HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
* HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock
*/
#define HFI1_S_TID_BUSY_SET BIT(0)
/* BIT(1) reserved for RVT_S_BUSY. */
#define HFI1_R_TID_RSC_TIMER BIT(2)
/* BIT(3) reserved for RVT_S_RESP_PENDING. */
/* BIT(4) reserved for RVT_S_ACK_PENDING. */
#define HFI1_S_TID_WAIT_INTERLCK BIT(5)
#define HFI1_R_TID_WAIT_INTERLCK BIT(6)
/* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */
/* BIT(16) reserved for RVT_S_SEND_ONE */
#define HFI1_S_TID_RETRY_TIMER BIT(17)
/* BIT(18) reserved for RVT_S_ECN. */
#define HFI1_R_TID_SW_PSN BIT(19)
/* BIT(26) reserved for HFI1_S_WAIT_HALT */
/* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */
/* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */

/*
* Unlike regular IB RDMA VERBS, which do not require an entry
Expand Down Expand Up @@ -309,4 +316,6 @@ void _hfi1_do_tid_send(struct work_struct *work);

bool hfi1_schedule_tid_send(struct rvt_qp *qp);

bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e);

#endif /* HFI1_TID_RDMA_H */

0 comments on commit c6c2311

Please sign in to comment.