Skip to content

Commit

Permalink
xprtrdma: Avoid deadlock when credit window is reset
Browse files Browse the repository at this point in the history
Update the cwnd while processing the server's reply.  Otherwise the
next task on the xprt_sending queue is still subject to the old
credit window. Currently, no task is awoken if the old congestion
window is still exceeded, even if the new window is larger, and a
deadlock results.

This is an issue during a transport reconnect. Servers don't
normally shrink the credit window, but the client does reset it to
1 when reconnecting so the server can safely grow it again.

As a minor optimization, remove the hack of grabbing the initial
cwnd size (which happens to be RPC_CWNDSCALE) and using that value
as the congestion scaling factor. The scaling value is invariant,
and we are better off without the multiplication operation.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
  • Loading branch information
Chuck Lever authored and Anna Schumaker committed Jun 4, 2014
1 parent 4f4cf5a commit e7ce710
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 19 deletions.
6 changes: 6 additions & 0 deletions net/sunrpc/xprtrdma/rpc_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
__be32 *iptr;
int rdmalen, status;
unsigned long cwnd;

/* Check status. If bad, signal disconnect and return rep to pool */
if (rep->rr_len == ~0U) {
Expand Down Expand Up @@ -845,6 +846,11 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
break;
}

cwnd = xprt->cwnd;
xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
if (xprt->cwnd > cwnd)
xprt_release_rqst_cong(rqst->rq_task);

dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
__func__, xprt, rqst, status);
xprt_complete_rqst(rqst->rq_task, status);
Expand Down
19 changes: 1 addition & 18 deletions net/sunrpc/xprtrdma/transport.c
Original file line number Diff line number Diff line change
Expand Up @@ -448,23 +448,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
}
}

static int
xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int credits = atomic_read(&r_xprt->rx_buf.rb_credits);

/* == RPC_CWNDSCALE @ init, but *after* setup */
if (r_xprt->rx_buf.rb_cwndscale == 0UL) {
r_xprt->rx_buf.rb_cwndscale = xprt->cwnd;
dprintk("RPC: %s: cwndscale %lu\n", __func__,
r_xprt->rx_buf.rb_cwndscale);
BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
}
xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
return xprt_reserve_xprt_cong(xprt, task);
}

/*
* The RDMA allocate/free functions need the task structure as a place
* to hide the struct rpcrdma_req, which is necessary for the actual send/recv
Expand Down Expand Up @@ -686,7 +669,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
*/

static struct rpc_xprt_ops xprt_rdma_procs = {
.reserve_xprt = xprt_rdma_reserve_xprt,
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
.alloc_slot = xprt_alloc_slot,
.release_request = xprt_release_rqst_cong, /* ditto */
Expand Down
1 change: 0 additions & 1 deletion net/sunrpc/xprtrdma/xprt_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,6 @@ struct rpcrdma_req {
struct rpcrdma_buffer {
spinlock_t rb_lock; /* protects indexes */
atomic_t rb_credits; /* most recent server credits */
unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */
int rb_max_requests;/* client max requests */
struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
int rb_send_index;
Expand Down

0 comments on commit e7ce710

Please sign in to comment.