Skip to content

Commit

Permalink
xprtrdma: Move Receive posting to Receive handler
Browse files Browse the repository at this point in the history
Receive completion and Reply handling are done by a BOUND
workqueue, meaning they run on only one CPU.

Posting receives is currently done in the send_request path, which
on large systems is typically done on a different CPU than the one
handling Receive completions. This results in movement of
Receive-related cachelines between the sending and receiving CPUs.

More importantly, it means that currently Receives are posted while
the transport's write lock is held, which is unnecessary and costly.

Finally, allocation of Receive buffers is performed on-demand in
the Receive completion handler. This helps guarantee that they are
allocated on the same NUMA node as the CPU that handles Receive
completions.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
  • Loading branch information
Chuck Lever authored and Anna Schumaker committed May 7, 2018
1 parent 0e0b854 commit 7c8d9e7
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 129 deletions.
40 changes: 35 additions & 5 deletions include/trace/events/rpcrdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,39 @@ TRACE_EVENT(xprtrdma_post_recv,
)
);

TRACE_EVENT(xprtrdma_post_recvs,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
unsigned int count,
int status
),

TP_ARGS(r_xprt, count, status),

TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(unsigned int, count)
__field(int, status)
__field(int, posted)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),

TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->count = count;
__entry->status = status;
__entry->posted = r_xprt->rx_buf.rb_posted_receives;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),

TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)",
__get_str(addr), __get_str(port), __entry->r_xprt,
__entry->count, __entry->posted, __entry->status
)
);

/**
** Completion events
**/
Expand Down Expand Up @@ -800,7 +833,6 @@ TRACE_EVENT(xprtrdma_allocate,
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, req)
__field(const void *, rep)
__field(size_t, callsize)
__field(size_t, rcvsize)
),
Expand All @@ -809,15 +841,13 @@ TRACE_EVENT(xprtrdma_allocate,
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->req = req;
__entry->rep = req ? req->rl_reply : NULL;
__entry->callsize = task->tk_rqstp->rq_callsize;
__entry->rcvsize = task->tk_rqstp->rq_rcvsize;
),

TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)",
TP_printk("task:%u@%u req=%p (%zu, %zu)",
__entry->task_id, __entry->client_id,
__entry->req, __entry->rep,
__entry->callsize, __entry->rcvsize
__entry->req, __entry->callsize, __entry->rcvsize
)
);

Expand Down
32 changes: 6 additions & 26 deletions net/sunrpc/xprtrdma/backchannel.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,23 +71,6 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
return -ENOMEM;
}

/* Allocate and add receive buffers to the rpcrdma_buffer's
* existing list of rep's. These are released when the
* transport is destroyed.
*/
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
unsigned int count)
{
int rc = 0;

while (count--) {
rc = rpcrdma_create_rep(r_xprt);
if (rc)
break;
}
return rc;
}

/**
* xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
* @xprt: transport associated with these backchannel resources
Expand Down Expand Up @@ -116,14 +99,6 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
if (rc)
goto out_free;

rc = rpcrdma_bc_setup_reps(r_xprt, reqs);
if (rc)
goto out_free;

rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs);
if (rc)
goto out_free;

r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
request_module("svcrdma");
trace_xprtrdma_cb_setup(r_xprt, reqs);
Expand Down Expand Up @@ -228,6 +203,7 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
if (rc < 0)
goto failed_marshal;

rpcrdma_post_recvs(r_xprt, true);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
return 0;
Expand Down Expand Up @@ -268,10 +244,14 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
*/
void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
{
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpc_xprt *xprt = rqst->rq_xprt;

dprintk("RPC: %s: freeing rqst %p (req %p)\n",
__func__, rqst, rpcr_to_rdmar(rqst));
__func__, rqst, req);

rpcrdma_recv_buffer_put(req->rl_reply);
req->rl_reply = NULL;

spin_lock_bh(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
Expand Down
22 changes: 7 additions & 15 deletions net/sunrpc/xprtrdma/rpc_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -1027,8 +1027,6 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)

out_short:
pr_warn("RPC/RDMA short backward direction call\n");
if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
xprt_disconnect_done(&r_xprt->rx_xprt);
return true;
}
#else /* CONFIG_SUNRPC_BACKCHANNEL */
Expand Down Expand Up @@ -1334,13 +1332,14 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
u32 credits;
__be32 *p;

--buf->rb_posted_receives;

if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus;

/* Fixed transport header fields */
xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
rep->rr_hdrbuf.head[0].iov_base);

/* Fixed transport header fields */
p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
if (unlikely(!p))
goto out_shortreply;
Expand Down Expand Up @@ -1379,17 +1378,10 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)

trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);

rpcrdma_post_recvs(r_xprt, false);
queue_work(rpcrdma_receive_wq, &rep->rr_work);
return;

out_badstatus:
rpcrdma_recv_buffer_put(rep);
if (r_xprt->rx_ep.rep_connected == 1) {
r_xprt->rx_ep.rep_connected = -EIO;
rpcrdma_conn_func(&r_xprt->rx_ep);
}
return;

out_badversion:
trace_xprtrdma_reply_vers(rep);
goto repost;
Expand All @@ -1409,7 +1401,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
* receive buffer before returning.
*/
repost:
r_xprt->rx_stats.bad_reply_count++;
if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
rpcrdma_recv_buffer_put(rep);
rpcrdma_post_recvs(r_xprt, false);
out_badstatus:
rpcrdma_recv_buffer_put(rep);
}
3 changes: 0 additions & 3 deletions net/sunrpc/xprtrdma/transport.c
Original file line number Diff line number Diff line change
Expand Up @@ -722,9 +722,6 @@ xprt_rdma_send_request(struct rpc_task *task)
if (rc < 0)
goto failed_marshal;

if (req->rl_reply == NULL) /* e.g. reconnection */
rpcrdma_recv_buffer_get(req);

/* Must suppress retransmit to maintain credits */
if (rqst->rq_connect_cookie == xprt->connect_cookie)
goto drop_connection;
Expand Down
Loading

0 comments on commit 7c8d9e7

Please sign in to comment.