Skip to content

Commit

Permalink
RDMA/cxgb4: Support variable sized work requests
Browse files Browse the repository at this point in the history
T4 EQ entries are in multiples of 64 bytes.  Currently the RDMA SQ and
RQ use fixed sized entries composed of 4 EQ entries for the SQ and 2
EQ entries for the RQ.  For optimial latency with small IO, we need to
change this so the HW only needs to DMA the EQ entries actually used
by a given work request.

Implementation:

- add wq_pidx counter to track where we are in the EQ.  cidx/pidx are
  used for the sw sq/rq tracking and flow control.

- the variable part of work requests is the SGL.  Add new functions to
  build the SGL and/or immediate data directly in the EQ memory
  wrapping when needed.

- adjust the min burst size for the EQ contexts to 64B.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Steve Wise authored and Roland Dreier committed Jul 21, 2010
1 parent d3c814e commit d37ac31
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 122 deletions.
220 changes: 115 additions & 105 deletions drivers/infiniband/hw/cxgb4/qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
V_FW_RI_RES_WR_DCAEN(0) |
V_FW_RI_RES_WR_DCACPU(0) |
V_FW_RI_RES_WR_FBMIN(3) |
V_FW_RI_RES_WR_FBMIN(2) |
V_FW_RI_RES_WR_FBMAX(3) |
V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
V_FW_RI_RES_WR_CIDXFTHRESH(0) |
Expand All @@ -185,7 +185,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
V_FW_RI_RES_WR_DCAEN(0) |
V_FW_RI_RES_WR_DCACPU(0) |
V_FW_RI_RES_WR_FBMIN(3) |
V_FW_RI_RES_WR_FBMIN(2) |
V_FW_RI_RES_WR_FBMAX(3) |
V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
V_FW_RI_RES_WR_CIDXFTHRESH(0) |
Expand Down Expand Up @@ -235,12 +235,78 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
return -ENOMEM;
}

static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
struct ib_send_wr *wr, int max, u32 *plenp)
{
u8 *dstp, *srcp;
u32 plen = 0;
int i;
int rem, len;

dstp = (u8 *)immdp->data;
for (i = 0; i < wr->num_sge; i++) {
if ((plen + wr->sg_list[i].length) > max)
return -EMSGSIZE;
srcp = (u8 *)(unsigned long)wr->sg_list[i].addr;
plen += wr->sg_list[i].length;
rem = wr->sg_list[i].length;
while (rem) {
if (dstp == (u8 *)&sq->queue[sq->size])
dstp = (u8 *)sq->queue;
if (rem <= (u8 *)&sq->queue[sq->size] - dstp)
len = rem;
else
len = (u8 *)&sq->queue[sq->size] - dstp;
memcpy(dstp, srcp, len);
dstp += len;
srcp += len;
rem -= len;
}
}
immdp->op = FW_RI_DATA_IMMD;
immdp->r1 = 0;
immdp->r2 = 0;
immdp->immdlen = cpu_to_be32(plen);
*plenp = plen;
return 0;
}

static int build_isgl(__be64 *queue_start, __be64 *queue_end,
struct fw_ri_isgl *isglp, struct ib_sge *sg_list,
int num_sge, u32 *plenp)

{
int i;
u32 plen = 0;
__be64 *flitp = (__be64 *)isglp->sge;

for (i = 0; i < num_sge; i++) {
if ((plen + sg_list[i].length) < plen)
return -EMSGSIZE;
plen += sg_list[i].length;
*flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) |
sg_list[i].length);
if (++flitp == queue_end)
flitp = queue_start;
*flitp = cpu_to_be64(sg_list[i].addr);
if (++flitp == queue_end)
flitp = queue_start;
}
isglp->op = FW_RI_DATA_ISGL;
isglp->r1 = 0;
isglp->nsge = cpu_to_be16(num_sge);
isglp->r2 = 0;
if (plenp)
*plenp = plen;
return 0;
}

static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
struct ib_send_wr *wr, u8 *len16)
{
u32 plen;
int size;
u8 *datap;
int ret;

if (wr->num_sge > T4_MAX_SEND_SGE)
return -EINVAL;
Expand All @@ -267,43 +333,23 @@ static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
default:
return -EINVAL;
}

plen = 0;
if (wr->num_sge) {
if (wr->send_flags & IB_SEND_INLINE) {
datap = (u8 *)wqe->send.u.immd_src[0].data;
for (i = 0; i < wr->num_sge; i++) {
if ((plen + wr->sg_list[i].length) >
T4_MAX_SEND_INLINE) {
return -EMSGSIZE;
}
plen += wr->sg_list[i].length;
memcpy(datap,
(void *)(unsigned long)wr->sg_list[i].addr,
wr->sg_list[i].length);
datap += wr->sg_list[i].length;
}
wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD;
wqe->send.u.immd_src[0].r1 = 0;
wqe->send.u.immd_src[0].r2 = 0;
wqe->send.u.immd_src[0].immdlen = cpu_to_be32(plen);
ret = build_immd(sq, wqe->send.u.immd_src, wr,
T4_MAX_SEND_INLINE, &plen);
if (ret)
return ret;
size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
plen;
} else {
for (i = 0; i < wr->num_sge; i++) {
if ((plen + wr->sg_list[i].length) < plen)
return -EMSGSIZE;
plen += wr->sg_list[i].length;
wqe->send.u.isgl_src[0].sge[i].stag =
cpu_to_be32(wr->sg_list[i].lkey);
wqe->send.u.isgl_src[0].sge[i].len =
cpu_to_be32(wr->sg_list[i].length);
wqe->send.u.isgl_src[0].sge[i].to =
cpu_to_be64(wr->sg_list[i].addr);
}
wqe->send.u.isgl_src[0].op = FW_RI_DATA_ISGL;
wqe->send.u.isgl_src[0].r1 = 0;
wqe->send.u.isgl_src[0].nsge = cpu_to_be16(wr->num_sge);
wqe->send.u.isgl_src[0].r2 = 0;
ret = build_isgl((__be64 *)sq->queue,
(__be64 *)&sq->queue[sq->size],
wqe->send.u.isgl_src,
wr->sg_list, wr->num_sge, &plen);
if (ret)
return ret;
size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
wr->num_sge * sizeof(struct fw_ri_sge);
}
Expand All @@ -313,62 +359,40 @@ static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
wqe->send.u.immd_src[0].r2 = 0;
wqe->send.u.immd_src[0].immdlen = 0;
size = sizeof wqe->send + sizeof(struct fw_ri_immd);
plen = 0;
}
*len16 = DIV_ROUND_UP(size, 16);
wqe->send.plen = cpu_to_be32(plen);
return 0;
}

static int build_rdma_write(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
struct ib_send_wr *wr, u8 *len16)
{
int i;
u32 plen;
int size;
u8 *datap;
int ret;

if (wr->num_sge > T4_MAX_WRITE_SGE)
if (wr->num_sge > T4_MAX_SEND_SGE)
return -EINVAL;
wqe->write.r2 = 0;
wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
plen = 0;
if (wr->num_sge) {
if (wr->send_flags & IB_SEND_INLINE) {
datap = (u8 *)wqe->write.u.immd_src[0].data;
for (i = 0; i < wr->num_sge; i++) {
if ((plen + wr->sg_list[i].length) >
T4_MAX_WRITE_INLINE) {
return -EMSGSIZE;
}
plen += wr->sg_list[i].length;
memcpy(datap,
(void *)(unsigned long)wr->sg_list[i].addr,
wr->sg_list[i].length);
datap += wr->sg_list[i].length;
}
wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD;
wqe->write.u.immd_src[0].r1 = 0;
wqe->write.u.immd_src[0].r2 = 0;
wqe->write.u.immd_src[0].immdlen = cpu_to_be32(plen);
ret = build_immd(sq, wqe->write.u.immd_src, wr,
T4_MAX_WRITE_INLINE, &plen);
if (ret)
return ret;
size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
plen;
} else {
for (i = 0; i < wr->num_sge; i++) {
if ((plen + wr->sg_list[i].length) < plen)
return -EMSGSIZE;
plen += wr->sg_list[i].length;
wqe->write.u.isgl_src[0].sge[i].stag =
cpu_to_be32(wr->sg_list[i].lkey);
wqe->write.u.isgl_src[0].sge[i].len =
cpu_to_be32(wr->sg_list[i].length);
wqe->write.u.isgl_src[0].sge[i].to =
cpu_to_be64(wr->sg_list[i].addr);
}
wqe->write.u.isgl_src[0].op = FW_RI_DATA_ISGL;
wqe->write.u.isgl_src[0].r1 = 0;
wqe->write.u.isgl_src[0].nsge =
cpu_to_be16(wr->num_sge);
wqe->write.u.isgl_src[0].r2 = 0;
ret = build_isgl((__be64 *)sq->queue,
(__be64 *)&sq->queue[sq->size],
wqe->write.u.isgl_src,
wr->sg_list, wr->num_sge, &plen);
if (ret)
return ret;
size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
wr->num_sge * sizeof(struct fw_ri_sge);
}
Expand All @@ -378,6 +402,7 @@ static int build_rdma_write(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
wqe->write.u.immd_src[0].r2 = 0;
wqe->write.u.immd_src[0].immdlen = 0;
size = sizeof wqe->write + sizeof(struct fw_ri_immd);
plen = 0;
}
*len16 = DIV_ROUND_UP(size, 16);
wqe->write.plen = cpu_to_be32(plen);
Expand Down Expand Up @@ -416,29 +441,13 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
struct ib_recv_wr *wr, u8 *len16)
{
int i;
int plen = 0;
int ret;

for (i = 0; i < wr->num_sge; i++) {
if ((plen + wr->sg_list[i].length) < plen)
return -EMSGSIZE;
plen += wr->sg_list[i].length;
wqe->recv.isgl.sge[i].stag =
cpu_to_be32(wr->sg_list[i].lkey);
wqe->recv.isgl.sge[i].len =
cpu_to_be32(wr->sg_list[i].length);
wqe->recv.isgl.sge[i].to =
cpu_to_be64(wr->sg_list[i].addr);
}
for (; i < T4_MAX_RECV_SGE; i++) {
wqe->recv.isgl.sge[i].stag = 0;
wqe->recv.isgl.sge[i].len = 0;
wqe->recv.isgl.sge[i].to = 0;
}
wqe->recv.isgl.op = FW_RI_DATA_ISGL;
wqe->recv.isgl.r1 = 0;
wqe->recv.isgl.nsge = cpu_to_be16(wr->num_sge);
wqe->recv.isgl.r2 = 0;
ret = build_isgl((__be64 *)qhp->wq.rq.queue,
(__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size],
&wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
if (ret)
return ret;
*len16 = DIV_ROUND_UP(sizeof wqe->recv +
wr->num_sge * sizeof(struct fw_ri_sge), 16);
return 0;
Expand Down Expand Up @@ -547,7 +556,9 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
*bad_wr = wr;
break;
}
wqe = &qhp->wq.sq.queue[qhp->wq.sq.pidx];
wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);

fw_flags = 0;
if (wr->send_flags & IB_SEND_SOLICITED)
fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
Expand All @@ -564,12 +575,12 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
swsqe->opcode = FW_RI_SEND;
else
swsqe->opcode = FW_RI_SEND_WITH_INV;
err = build_rdma_send(wqe, wr, &len16);
err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
break;
case IB_WR_RDMA_WRITE:
fw_opcode = FW_RI_RDMA_WRITE_WR;
swsqe->opcode = FW_RI_RDMA_WRITE;
err = build_rdma_write(wqe, wr, &len16);
err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16);
break;
case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV:
Expand Down Expand Up @@ -619,8 +630,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
swsqe->opcode, swsqe->read_len);
wr = wr->next;
num_wrs--;
t4_sq_produce(&qhp->wq);
idx++;
t4_sq_produce(&qhp->wq, len16);
idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
}
if (t4_wq_db_enabled(&qhp->wq))
t4_ring_sq_db(&qhp->wq, idx);
Expand Down Expand Up @@ -656,7 +667,9 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
*bad_wr = wr;
break;
}
wqe = &qhp->wq.rq.queue[qhp->wq.rq.pidx];
wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue +
qhp->wq.rq.wq_pidx *
T4_EQ_ENTRY_SIZE);
if (num_wrs)
err = build_rdma_recv(qhp, wqe, wr, &len16);
else
Expand All @@ -675,15 +688,12 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
wqe->recv.r2[1] = 0;
wqe->recv.r2[2] = 0;
wqe->recv.len16 = len16;
if (len16 < 5)
wqe->flits[8] = 0;

PDBG("%s cookie 0x%llx pidx %u\n", __func__,
(unsigned long long) wr->wr_id, qhp->wq.rq.pidx);
t4_rq_produce(&qhp->wq);
t4_rq_produce(&qhp->wq, len16);
idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
wr = wr->next;
num_wrs--;
idx++;
}
if (t4_wq_db_enabled(&qhp->wq))
t4_ring_rq_db(&qhp->wq, idx);
Expand Down
Loading

0 comments on commit d37ac31

Please sign in to comment.