Skip to content

Commit

Permalink
RPC/RDMA: harden connection logic against missing/late rdma_cm upcalls.
Browse files Browse the repository at this point in the history
Add defensive timeouts to wait_for_completion() calls in RDMA
address resolution, and make them interruptible. Fix the timeout
units to milliseconds (formerly jiffies) and move to private header.

Signed-off-by: Tom Talpey <talpey@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
  • Loading branch information
Tom Talpey authored and Trond Myklebust committed Oct 10, 2008
1 parent 1a95405 commit 5675add
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 7 deletions.
3 changes: 0 additions & 3 deletions include/linux/sunrpc/xprtrdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,6 @@

#define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */

#define RDMA_RESOLVE_TIMEOUT (5*HZ) /* TBD 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */

/* memory registration strategies */
#define RPCRDMA_PERSISTENT_REGISTRATION (1)

Expand Down
11 changes: 7 additions & 4 deletions net/sunrpc/xprtrdma/verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
case RDMA_CM_EVENT_ROUTE_RESOLVED:
ia->ri_async_rc = 0;
complete(&ia->ri_done);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
Expand Down Expand Up @@ -363,26 +364,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
return id;
}

ia->ri_async_rc = 0;
ia->ri_async_rc = -ETIMEDOUT;
rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
if (rc) {
dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
__func__, rc);
goto out;
}
wait_for_completion(&ia->ri_done);
wait_for_completion_interruptible_timeout(&ia->ri_done,
msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
rc = ia->ri_async_rc;
if (rc)
goto out;

ia->ri_async_rc = 0;
ia->ri_async_rc = -ETIMEDOUT;
rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
if (rc) {
dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
__func__, rc);
goto out;
}
wait_for_completion(&ia->ri_done);
wait_for_completion_interruptible_timeout(&ia->ri_done,
msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
rc = ia->ri_async_rc;
if (rc)
goto out;
Expand Down
3 changes: 3 additions & 0 deletions net/sunrpc/xprtrdma/xprt_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */

#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */

/*
* Interface Adapter -- one per transport instance
*/
Expand Down

0 comments on commit 5675add

Please sign in to comment.