Skip to content

Commit

Permalink
xprtrdma: kmalloc rpcrdma_ep separate from rpcrdma_xprt
Browse files Browse the repository at this point in the history
Change the rpcrdma_xprt_disconnect() function so that it no longer
waits for the DISCONNECTED event.  This prevents blocking if the
remote is unresponsive.

In rpcrdma_xprt_disconnect(), the transport's rpcrdma_ep is
detached. Upon return from rpcrdma_xprt_disconnect(), the transport
(r_xprt) is ready immediately for a new connection.

The RDMA_CM_DEVICE_REMOVAL and RDMA_CM_DISCONNECTED events are now
handled almost identically.

However, because the lifetimes of rpcrdma_xprt structures and
rpcrdma_ep structures are now independent, creating an rpcrdma_ep
needs to take a module ref count. The ep now owns most of the
hardware resources for a transport.

Also, a kref is needed to ensure that rpcrdma_ep sticks around
long enough for the cm_event_handler to finish.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
  • Loading branch information
Chuck Lever authored and Anna Schumaker committed Mar 27, 2020
1 parent 745b734 commit e28ce90
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 191 deletions.
63 changes: 2 additions & 61 deletions include/trace/events/rpcrdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class,
TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->rc = rc;
__entry->connect_status = r_xprt->rx_ep.re_connect_status;
__entry->connect_status = r_xprt->rx_ep->re_connect_status;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
Expand Down Expand Up @@ -342,37 +342,6 @@ DECLARE_EVENT_CLASS(xprtrdma_cb_event,
** Connection events
**/

TRACE_EVENT(xprtrdma_cm_event,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
struct rdma_cm_event *event
),

TP_ARGS(r_xprt, event),

TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(unsigned int, event)
__field(int, status)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),

TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->event = event->event;
__entry->status = event->status;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),

TP_printk("peer=[%s]:%s r_xprt=%p: %s (%u/%d)",
__get_str(addr), __get_str(port),
__entry->r_xprt, rdma_show_cm_event(__entry->event),
__entry->event, __entry->status
)
);

TRACE_EVENT(xprtrdma_inline_thresh,
TP_PROTO(
const struct rpcrdma_ep *ep
Expand Down Expand Up @@ -409,34 +378,6 @@ TRACE_EVENT(xprtrdma_inline_thresh,
)
);

TRACE_EVENT(xprtrdma_remove,
TP_PROTO(
const struct rpcrdma_ep *ep
),

TP_ARGS(ep),

TP_STRUCT__entry(
__array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
__array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
__string(name, ep->re_id->device->name)
),

TP_fast_assign(
const struct rdma_cm_id *id = ep->re_id;

memcpy(__entry->srcaddr, &id->route.addr.src_addr,
sizeof(struct sockaddr_in6));
memcpy(__entry->dstaddr, &id->route.addr.dst_addr,
sizeof(struct sockaddr_in6));
__assign_str(name, id->device->name);
),

TP_printk("%pISpc -> %pISpc device=%s",
__entry->srcaddr, __entry->dstaddr, __get_str(name)
)
);

DEFINE_CONN_EVENT(connect);
DEFINE_CONN_EVENT(disconnect);
DEFINE_CONN_EVENT(flush_dct);
Expand Down Expand Up @@ -831,7 +772,7 @@ TRACE_EVENT(xprtrdma_post_recvs,
__entry->r_xprt = r_xprt;
__entry->count = count;
__entry->status = status;
__entry->posted = r_xprt->rx_ep.re_receive_count;
__entry->posted = r_xprt->rx_ep->re_receive_count;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
Expand Down
4 changes: 2 additions & 2 deletions net/sunrpc/xprtrdma/backchannel.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
size_t maxmsg;

maxmsg = min_t(unsigned int, ep->re_inline_send, ep->re_inline_recv);
Expand Down Expand Up @@ -190,7 +190,7 @@ static struct rpc_rqst *rpcrdma_bc_rqst_get(struct rpcrdma_xprt *r_xprt)
if (xprt->bc_alloc_count >= RPCRDMA_BACKWARD_WRS)
return NULL;

size = min_t(size_t, r_xprt->rx_ep.re_inline_recv, PAGE_SIZE);
size = min_t(size_t, r_xprt->rx_ep->re_inline_recv, PAGE_SIZE);
req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL);
if (!req)
return NULL;
Expand Down
12 changes: 6 additions & 6 deletions net/sunrpc/xprtrdma/frwr_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ static void frwr_mr_recycle(struct rpcrdma_mr *mr)

if (mr->mr_dir != DMA_NONE) {
trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(r_xprt->rx_ep.re_id->device,
ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
mr->mr_dir = DMA_NONE;
}
Expand Down Expand Up @@ -115,7 +115,7 @@ void frwr_reset(struct rpcrdma_req *req)
*/
int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
{
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
unsigned int depth = ep->re_max_fr_depth;
struct scatterlist *sg;
struct ib_mr *frmr;
Expand Down Expand Up @@ -283,7 +283,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
int nsegs, bool writing, __be32 xid,
struct rpcrdma_mr *mr)
{
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct ib_reg_wr *reg_wr;
int i, n, dma_nents;
struct ib_mr *ibmr;
Expand Down Expand Up @@ -405,7 +405,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
post_wr = &frwr->fr_regwr.wr;
}

return ib_post_send(r_xprt->rx_ep.re_id->qp, post_wr, NULL);
return ib_post_send(r_xprt->rx_ep->re_id->qp, post_wr, NULL);
}

/**
Expand Down Expand Up @@ -535,7 +535,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* unless re_id->qp is a valid pointer.
*/
bad_wr = NULL;
rc = ib_post_send(r_xprt->rx_ep.re_id->qp, first, &bad_wr);
rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);

/* The final LOCAL_INV WR in the chain is supposed to
* do the wake. If it was never posted, the wake will
Expand Down Expand Up @@ -640,7 +640,7 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* unless re_id->qp is a valid pointer.
*/
bad_wr = NULL;
rc = ib_post_send(r_xprt->rx_ep.re_id->qp, first, &bad_wr);
rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);
if (!rc)
return;

Expand Down
17 changes: 9 additions & 8 deletions net/sunrpc/xprtrdma/rpc_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,10 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst)
{
struct xdr_buf *xdr = &rqst->rq_snd_buf;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
unsigned int count, remaining, offset;

if (xdr->len > r_xprt->rx_ep.re_max_inline_send)
if (xdr->len > ep->re_max_inline_send)
return false;

if (xdr->page_len) {
Expand All @@ -144,7 +145,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
remaining -= min_t(unsigned int,
PAGE_SIZE - offset, remaining);
offset = 0;
if (++count > r_xprt->rx_ep.re_attr.cap.max_send_sge)
if (++count > ep->re_attr.cap.max_send_sge)
return false;
}
}
Expand All @@ -161,7 +162,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst)
{
return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.re_max_inline_recv;
return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep->re_max_inline_recv;
}

/* The client is required to provide a Reply chunk if the maximum
Expand All @@ -175,7 +176,7 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
const struct xdr_buf *buf = &rqst->rq_rcv_buf;

return (buf->head[0].iov_len + buf->tail[0].iov_len) <
r_xprt->rx_ep.re_max_inline_recv;
r_xprt->rx_ep->re_max_inline_recv;
}

/* Split @vec on page boundaries into SGEs. FMR registers pages, not
Expand Down Expand Up @@ -254,15 +255,15 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
/* When encoding a Read chunk, the tail iovec contains an
* XDR pad and may be omitted.
*/
if (type == rpcrdma_readch && r_xprt->rx_ep.re_implicit_roundup)
if (type == rpcrdma_readch && r_xprt->rx_ep->re_implicit_roundup)
goto out;

/* When encoding a Write chunk, some servers need to see an
* extra segment for non-XDR-aligned Write chunks. The upper
* layer provides space in the tail iovec that may be used
* for this purpose.
*/
if (type == rpcrdma_writech && r_xprt->rx_ep.re_implicit_roundup)
if (type == rpcrdma_writech && r_xprt->rx_ep->re_implicit_roundup)
goto out;

if (xdrbuf->tail[0].iov_len)
Expand Down Expand Up @@ -1475,8 +1476,8 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)

if (credits == 0)
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_ep.re_max_requests)
credits = r_xprt->rx_ep.re_max_requests;
else if (credits > r_xprt->rx_ep->re_max_requests)
credits = r_xprt->rx_ep->re_max_requests;
if (buf->rb_credits != credits)
rpcrdma_update_cwnd(r_xprt, credits);
rpcrdma_post_recvs(r_xprt, false);
Expand Down
37 changes: 17 additions & 20 deletions net/sunrpc/xprtrdma/transport.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,12 +238,12 @@ xprt_rdma_connect_worker(struct work_struct *work)
struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
rx_connect_worker.work);
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
int rc;

rc = rpcrdma_xprt_connect(r_xprt);
xprt_clear_connecting(xprt);
if (ep->re_connect_status > 0) {
if (r_xprt->rx_ep && r_xprt->rx_ep->re_connect_status > 0) {
xprt->connect_cookie++;
xprt->stat.connect_count++;
xprt->stat.connect_time += (long)jiffies -
xprt->stat.connect_start;
Expand All @@ -266,7 +266,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);

trace_xprtrdma_op_inject_dsc(r_xprt);
rdma_disconnect(r_xprt->rx_ep.re_id);
rdma_disconnect(r_xprt->rx_ep->re_id);
}

/**
Expand Down Expand Up @@ -316,10 +316,15 @@ xprt_setup_rdma(struct xprt_create *args)
if (args->addrlen > sizeof(xprt->addr))
return ERR_PTR(-EBADF);

if (!try_module_get(THIS_MODULE))
return ERR_PTR(-EIO);

xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0,
xprt_rdma_slot_table_entries);
if (!xprt)
if (!xprt) {
module_put(THIS_MODULE);
return ERR_PTR(-ENOMEM);
}

xprt->timeout = &xprt_rdma_default_timeout;
xprt->connect_timeout = xprt->timeout->to_initval;
Expand Down Expand Up @@ -348,11 +353,12 @@ xprt_setup_rdma(struct xprt_create *args)

new_xprt = rpcx_to_rdmax(xprt);
rc = rpcrdma_buffer_create(new_xprt);
if (rc)
goto out2;

if (!try_module_get(THIS_MODULE))
goto out4;
if (rc) {
xprt_rdma_free_addresses(xprt);
xprt_free(xprt);
module_put(THIS_MODULE);
return ERR_PTR(rc);
}

INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
xprt_rdma_connect_worker);
Expand All @@ -364,15 +370,6 @@ xprt_setup_rdma(struct xprt_create *args)
xprt->address_strings[RPC_DISPLAY_PORT]);
trace_xprtrdma_create(new_xprt);
return xprt;

out4:
rpcrdma_buffer_destroy(&new_xprt->rx_buf);
rc = -ENODEV;
out2:
trace_xprtrdma_op_destroy(new_xprt);
xprt_rdma_free_addresses(xprt);
xprt_free(xprt);
return ERR_PTR(rc);
}

/**
Expand Down Expand Up @@ -491,11 +488,11 @@ static void
xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
unsigned long delay;

delay = 0;
if (ep->re_connect_status != 0) {
if (ep && ep->re_connect_status != 0) {
delay = xprt_reconnect_delay(xprt);
xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO);
}
Expand Down
Loading

0 comments on commit e28ce90

Please sign in to comment.