Skip to content

Commit

Permalink
xprtrdma: Support unplugging an HCA from under an NFS mount
Browse files Browse the repository at this point in the history
The device driver for the underlying physical device associated
with an RPC-over-RDMA transport can be removed while RPC-over-RDMA
transports are still in use (ie, while NFS filesystems are still
mounted and active). The IB core performs a connection event upcall
to request that consumers free all RDMA resources associated with
a transport.

There may be pending RPCs when this occurs. Care must be taken to
release associated resources without leaving references that can
trigger a subsequent crash if a signal or soft timeout occurs. We
rely on the caller of the transport's ->close method to ensure that
the previous RPC task has invoked xprt_release but the transport
remains write-locked.

A DEVICE_REMOVE upcall forces a disconnect then sleeps. When ->close
is invoked, it destroys the transport's H/W resources, then wakes
the upcall, which completes and allows the core driver unload to
continue.

BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=266
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
  • Loading branch information
Chuck Lever authored and Anna Schumaker committed Apr 25, 2017
1 parent 91a10c5 commit bebd031
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 9 deletions.
29 changes: 24 additions & 5 deletions net/sunrpc/xprtrdma/transport.c
Original file line number Diff line number Diff line change
Expand Up @@ -457,19 +457,33 @@ xprt_setup_rdma(struct xprt_create *args)
return ERR_PTR(rc);
}

/*
* Close a connection, during shutdown or timeout/reconnect
/**
* xprt_rdma_close - Close down RDMA connection
* @xprt: generic transport to be closed
*
* Called during transport shutdown reconnect, or device
* removal. Caller holds the transport's write lock.
*/
static void
xprt_rdma_close(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;

dprintk("RPC: %s: closing xprt %p\n", __func__, xprt);

dprintk("RPC: %s: closing\n", __func__);
if (r_xprt->rx_ep.rep_connected > 0)
if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
xprt_clear_connected(xprt);
rpcrdma_ia_remove(ia);
return;
}
if (ep->rep_connected == -ENODEV)
return;
if (ep->rep_connected > 0)
xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt);
rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
rpcrdma_ep_disconnect(ep, ia);
}

static void
Expand Down Expand Up @@ -680,6 +694,8 @@ xprt_rdma_free(struct rpc_task *task)
* xprt_rdma_send_request - marshal and send an RPC request
* @task: RPC task with an RPC message in rq_snd_buf
*
* Caller holds the transport's write lock.
*
* Return values:
* 0: The request has been sent
* ENOTCONN: Caller needs to invoke connect logic then call again
Expand All @@ -706,6 +722,9 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int rc = 0;

if (!xprt_connected(xprt))
goto drop_connection;

/* On retransmit, remove any previously registered chunks */
if (unlikely(!list_empty(&req->rl_registered)))
r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
Expand Down
74 changes: 70 additions & 4 deletions net/sunrpc/xprtrdma/verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@
/*
* internal functions
*/
static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);

static struct workqueue_struct *rpcrdma_receive_wq;

Expand Down Expand Up @@ -262,6 +264,21 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
__func__, ep);
complete(&ia->ri_done);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
pr_info("rpcrdma: removing device for %pIS:%u\n",
sap, rpc_get_port(sap));
#endif
set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
ep->rep_connected = -ENODEV;
xprt_force_disconnect(&xprt->rx_xprt);
wait_for_completion(&ia->ri_remove_done);

ia->ri_id = NULL;
ia->ri_pd = NULL;
ia->ri_device = NULL;
/* Return 1 to ensure the core destroys the id. */
return 1;
case RDMA_CM_EVENT_ESTABLISHED:
connstate = 1;
ib_query_qp(ia->ri_id->qp, attr,
Expand Down Expand Up @@ -291,9 +308,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
goto connected;
case RDMA_CM_EVENT_DISCONNECTED:
connstate = -ECONNABORTED;
goto connected;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
connstate = -ENODEV;
connected:
dprintk("RPC: %s: %sconnected\n",
__func__, connstate > 0 ? "" : "dis");
Expand Down Expand Up @@ -346,6 +360,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
int rc;

init_completion(&ia->ri_done);
init_completion(&ia->ri_remove_done);

id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
IB_QPT_RC);
Expand Down Expand Up @@ -468,6 +483,56 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
return rc;
}

/**
* rpcrdma_ia_remove - Handle device driver unload
* @ia: interface adapter being removed
*
* Divest transport H/W resources associated with this adapter,
* but allow it to be restored later.
*/
void
rpcrdma_ia_remove(struct rpcrdma_ia *ia)
{
struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
rx_ia);
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_req *req;
struct rpcrdma_rep *rep;

cancel_delayed_work_sync(&buf->rb_refresh_worker);

/* This is similar to rpcrdma_ep_destroy, but:
* - Don't cancel the connect worker.
* - Don't call rpcrdma_ep_disconnect, which waits
* for another conn upcall, which will deadlock.
* - rdma_disconnect is unneeded, the underlying
* connection is already gone.
*/
if (ia->ri_id->qp) {
ib_drain_qp(ia->ri_id->qp);
rdma_destroy_qp(ia->ri_id);
ia->ri_id->qp = NULL;
}
ib_free_cq(ep->rep_attr.recv_cq);
ib_free_cq(ep->rep_attr.send_cq);

/* The ULP is responsible for ensuring all DMA
* mappings and MRs are gone.
*/
list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf);
list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf);
rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
}
rpcrdma_destroy_mrs(buf);

/* Allow waiters to continue */
complete(&ia->ri_remove_done);
}

/**
* rpcrdma_ia_close - Clean up/close an IA.
* @ia: interface adapter to close
Expand Down Expand Up @@ -1080,7 +1145,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)

out_nomws:
dprintk("RPC: %s: no MWs available\n", __func__);
schedule_delayed_work(&buf->rb_refresh_worker, 0);
if (r_xprt->rx_ep.rep_connected != -ENODEV)
schedule_delayed_work(&buf->rb_refresh_worker, 0);

/* Allow the reply handler and refresh worker to run */
cond_resched();
Expand Down
7 changes: 7 additions & 0 deletions net/sunrpc/xprtrdma/xprt_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ struct rpcrdma_ia {
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
struct completion ri_done;
struct completion ri_remove_done;
int ri_async_rc;
unsigned int ri_max_segs;
unsigned int ri_max_frmr_depth;
Expand All @@ -78,10 +79,15 @@ struct rpcrdma_ia {
bool ri_reminv_expected;
bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype;
unsigned long ri_flags;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
};

enum {
RPCRDMA_IAF_REMOVING = 0,
};

/*
* RDMA Endpoint -- one per transport instance
*/
Expand Down Expand Up @@ -511,6 +517,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
* Interface Adapter calls - xprtrdma/verbs.c
*/
int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *);
bool fmr_is_supported(struct rpcrdma_ia *);
Expand Down

0 comments on commit bebd031

Please sign in to comment.