Skip to content

Commit

Permalink
IB/core: Add support for "send with invalidate" work requests
Browse files Browse the repository at this point in the history
Add a new IB_WR_SEND_WITH_INV send opcode that can be used to mark a
"send with invalidate" work request as defined in the iWARP verbs and
the InfiniBand base memory management extensions.  Also put "imm_data"
and a new "invalidate_rkey" member in a new "ex" union in struct
ib_send_wr. The invalidate_rkey member can be used to pass in an
R_Key/STag to be invalidated.  Add this new union to struct
ib_uverbs_send_wr.  Add code to copy the invalidate_rkey field in
ib_uverbs_post_send().

Fix up low-level drivers to deal with the change to struct ib_send_wr,
and just remove the imm_data initialization from net/sunrpc/xprtrdma/,
since that code never does any send with immediate operations.

Also, move the existing IB_DEVICE_SEND_W_INV flag to a new bit, since
the iWARP drivers currently in the tree set the bit.  The amso1100
driver at least will silently fail to honor the IB_SEND_INVALIDATE bit
if passed in as part of userspace send requests (since it does not
implement kernel bypass work request queueing).  Remove the flag from
all existing drivers that set it until we know which ones are OK.

The values chosen for the new flag is not consecutive to avoid clashing
with flags defined in the XRC patches, which are not merged yet but
which are already in use and are likely to be merged soon.

This resurrects a patch sent long ago by Mikkel Hagen <mhagen@iol.unh.edu>.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Roland Dreier committed Apr 17, 2008
1 parent e7eacd3 commit 0f39cf3
Show file tree
Hide file tree
Showing 15 changed files with 46 additions and 31 deletions.
13 changes: 11 additions & 2 deletions drivers/infiniband/core/uverbs_cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -1463,7 +1463,6 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
next->num_sge = user_wr->num_sge;
next->opcode = user_wr->opcode;
next->send_flags = user_wr->send_flags;
next->imm_data = (__be32 __force) user_wr->imm_data;

if (is_ud) {
next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
Expand All @@ -1476,14 +1475,24 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
} else {
switch (next->opcode) {
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
next->ex.imm_data =
(__be32 __force) user_wr->ex.imm_data;
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_READ:
next->wr.rdma.remote_addr =
user_wr->wr.rdma.remote_addr;
next->wr.rdma.rkey =
user_wr->wr.rdma.rkey;
break;
case IB_WR_SEND_WITH_IMM:
next->ex.imm_data =
(__be32 __force) user_wr->ex.imm_data;
break;
case IB_WR_SEND_WITH_INV:
next->ex.invalidate_rkey =
user_wr->ex.invalidate_rkey;
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
next->wr.atomic.remote_addr =
Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/amso1100/c2_rnic.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev)
IB_DEVICE_CURR_QP_STATE_MOD |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_ZERO_STAG |
IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
IB_DEVICE_MEM_WINDOW);

/* Allocate the qptr_array */
c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
Expand Down
3 changes: 1 addition & 2 deletions drivers/infiniband/hw/cxgb3/iwch_provider.c
Original file line number Diff line number Diff line change
Expand Up @@ -1109,8 +1109,7 @@ int iwch_register_device(struct iwch_dev *dev)
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
dev->ibdev.owner = THIS_MODULE;
dev->device_cap_flags =
(IB_DEVICE_ZERO_STAG |
IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
(IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW);

dev->ibdev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
Expand Down
4 changes: 2 additions & 2 deletions drivers/infiniband/hw/cxgb3/iwch_qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
wqe->send.reserved[2] = 0;
if (wr->opcode == IB_WR_SEND_WITH_IMM) {
plen = 4;
wqe->send.sgl[0].stag = wr->imm_data;
wqe->send.sgl[0].stag = wr->ex.imm_data;
wqe->send.sgl[0].len = __constant_cpu_to_be32(0);
wqe->send.num_sgle = __constant_cpu_to_be32(0);
*flit_cnt = 5;
Expand Down Expand Up @@ -112,7 +112,7 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,

if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
plen = 4;
wqe->write.sgl[0].stag = wr->imm_data;
wqe->write.sgl[0].stag = wr->ex.imm_data;
wqe->write.sgl[0].len = __constant_cpu_to_be32(0);
wqe->write.num_sgle = __constant_cpu_to_be32(0);
*flit_cnt = 6;
Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/ehca/ehca_reqs.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
/* this might not work as long as HW does not support it */
wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data);
wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);
wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
}

Expand Down
8 changes: 4 additions & 4 deletions drivers/infiniband/hw/ipath/ipath_rc.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ int ipath_make_rc_req(struct ipath_qp *qp)
else {
qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.imm_data;
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
Expand Down Expand Up @@ -346,7 +346,7 @@ int ipath_make_rc_req(struct ipath_qp *qp)
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after RETH */
ohdr->u.rc.imm_data = wqe->wr.imm_data;
ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
Expand Down Expand Up @@ -490,7 +490,7 @@ int ipath_make_rc_req(struct ipath_qp *qp)
else {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.imm_data;
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
Expand Down Expand Up @@ -526,7 +526,7 @@ int ipath_make_rc_req(struct ipath_qp *qp)
else {
qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.imm_data;
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
Expand Down
4 changes: 2 additions & 2 deletions drivers/infiniband/hw/ipath/ipath_ruc.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
switch (wqe->wr.opcode) {
case IB_WR_SEND_WITH_IMM:
wc.wc_flags = IB_WC_WITH_IMM;
wc.imm_data = wqe->wr.imm_data;
wc.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
if (!ipath_get_rwqe(qp, 0)) {
Expand Down Expand Up @@ -339,7 +339,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
goto err;
}
wc.wc_flags = IB_WC_WITH_IMM;
wc.imm_data = wqe->wr.imm_data;
wc.imm_data = wqe->wr.ex.imm_data;
if (!ipath_get_rwqe(qp, 1))
goto rnr_nak;
/* FALLTHROUGH */
Expand Down
8 changes: 4 additions & 4 deletions drivers/infiniband/hw/ipath/ipath_uc.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
qp->s_state =
OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.imm_data;
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
Expand Down Expand Up @@ -123,7 +123,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the RETH */
ohdr->u.rc.imm_data = wqe->wr.imm_data;
ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
Expand Down Expand Up @@ -152,7 +152,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
else {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.imm_data;
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
Expand All @@ -177,7 +177,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
qp->s_state =
OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.imm_data;
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
Expand Down
4 changes: 2 additions & 2 deletions drivers/infiniband/hw/ipath/ipath_ud.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)

if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
wc.wc_flags = IB_WC_WITH_IMM;
wc.imm_data = swqe->wr.imm_data;
wc.imm_data = swqe->wr.ex.imm_data;
} else {
wc.wc_flags = 0;
wc.imm_data = 0;
Expand Down Expand Up @@ -327,7 +327,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
}
if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_hdrwords++;
ohdr->u.ud.imm_data = wqe->wr.imm_data;
ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
} else
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
Expand Down
4 changes: 2 additions & 2 deletions drivers/infiniband/hw/mlx4/qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1249,7 +1249,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
case IB_WR_SEND_WITH_IMM:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
sqp->ud_header.immediate_data = wr->imm_data;
sqp->ud_header.immediate_data = wr->ex.imm_data;
break;
default:
return -EINVAL;
Expand Down Expand Up @@ -1492,7 +1492,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,

if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
ctrl->imm = wr->imm_data;
ctrl->imm = wr->ex.imm_data;
else
ctrl->imm = 0;

Expand Down
6 changes: 3 additions & 3 deletions drivers/infiniband/hw/mthca/mthca_qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1532,7 +1532,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
case IB_WR_SEND_WITH_IMM:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
sqp->ud_header.immediate_data = wr->imm_data;
sqp->ud_header.immediate_data = wr->ex.imm_data;
break;
default:
return -EINVAL;
Expand Down Expand Up @@ -1679,7 +1679,7 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
cpu_to_be32(1);
if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;

wqe += sizeof (struct mthca_next_seg);
size = sizeof (struct mthca_next_seg) / 16;
Expand Down Expand Up @@ -2020,7 +2020,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
cpu_to_be32(1);
if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;

wqe += sizeof (struct mthca_next_seg);
size = sizeof (struct mthca_next_seg) / 16;
Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/nes/nes_hw.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
nesadapter->base_pd = 1;

nesadapter->device_cap_flags =
IB_DEVICE_ZERO_STAG | IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW;
IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW;

nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
[(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
Expand Down
5 changes: 4 additions & 1 deletion include/rdma/ib_user_verbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,10 @@ struct ib_uverbs_send_wr {
__u32 num_sge;
__u32 opcode;
__u32 send_flags;
__u32 imm_data;
union {
__u32 imm_data;
__u32 invalidate_rkey;
} ex;
union {
struct {
__u64 remote_addr;
Expand Down
11 changes: 8 additions & 3 deletions include/rdma/ib_verbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ enum ib_device_cap_flags {
IB_DEVICE_SRQ_RESIZE = (1<<13),
IB_DEVICE_N_NOTIFY_CQ = (1<<14),
IB_DEVICE_ZERO_STAG = (1<<15),
IB_DEVICE_SEND_W_INV = (1<<16),
IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */
IB_DEVICE_MEM_WINDOW = (1<<17),
/*
* Devices should set IB_DEVICE_UD_IP_SUM if they support
Expand All @@ -105,6 +105,7 @@ enum ib_device_cap_flags {
*/
IB_DEVICE_UD_IP_CSUM = (1<<18),
IB_DEVICE_UD_TSO = (1<<19),
IB_DEVICE_SEND_W_INV = (1<<21),
};

enum ib_atomic_cap {
Expand Down Expand Up @@ -625,7 +626,8 @@ enum ib_wr_opcode {
IB_WR_RDMA_READ,
IB_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD,
IB_WR_LSO
IB_WR_LSO,
IB_WR_SEND_WITH_INV,
};

enum ib_send_flags {
Expand All @@ -649,7 +651,10 @@ struct ib_send_wr {
int num_sge;
enum ib_wr_opcode opcode;
int send_flags;
__be32 imm_data;
union {
__be32 imm_data;
u32 invalidate_rkey;
} ex;
union {
struct {
u64 remote_addr;
Expand Down
1 change: 0 additions & 1 deletion net/sunrpc/xprtrdma/verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1573,7 +1573,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
send_wr.sg_list = req->rl_send_iov;
send_wr.num_sge = req->rl_niovs;
send_wr.opcode = IB_WR_SEND;
send_wr.imm_data = 0;
if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
ib_dma_sync_single_for_device(ia->ri_id->device,
req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
Expand Down

0 comments on commit 0f39cf3

Please sign in to comment.