Skip to content

Commit

Permalink
Merge tag 'nfs-rdma-for-3.19' of git://git.linux-nfs.org/projects/ann…
Browse files Browse the repository at this point in the history
…a/nfs-rdma into linux-next

Pull NFS client RDMA changes for 3.19 from Anna Schumaker:
 "NFS: Client side changes for RDMA

  These patches various bugfixes and cleanups for using NFS over RDMA, including
  better error handling and performance improvements by using pad optimization.

  Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>"

* tag 'nfs-rdma-for-3.19' of git://git.linux-nfs.org/projects/anna/nfs-rdma:
  xprtrdma: Display async errors
  xprtrdma: Enable pad optimization
  xprtrdma: Re-write rpcrdma_flush_cqs()
  xprtrdma: Refactor tasklet scheduling
  xprtrdma: unmap all FMRs during transport disconnect
  xprtrdma: Cap req_cqinit
  xprtrdma: Return an errno from rpcrdma_register_external()
  • Loading branch information
Trond Myklebust committed Nov 26, 2014
2 parents 1702562 + 7ff11de commit ea52641
Showing 3 changed files with 107 additions and 17 deletions.
4 changes: 2 additions & 2 deletions net/sunrpc/xprtrdma/transport.c
Original file line number Diff line number Diff line change
@@ -73,7 +73,7 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding;
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
int xprt_rdma_pad_optimize = 0;
int xprt_rdma_pad_optimize = 1;

#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)

@@ -599,7 +599,7 @@ xprt_rdma_send_request(struct rpc_task *task)

if (req->rl_niovs == 0)
rc = rpcrdma_marshal_req(rqst);
else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL)
rc = rpcrdma_marshal_chunks(rqst, 0);
if (rc < 0)
goto failed_marshal;
114 changes: 99 additions & 15 deletions net/sunrpc/xprtrdma/verbs.c
Original file line number Diff line number Diff line change
@@ -62,6 +62,7 @@
#endif

static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);

/*
* internal functions
@@ -105,13 +106,51 @@ rpcrdma_run_tasklet(unsigned long data)

static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);

static const char * const async_event[] = {
"CQ error",
"QP fatal error",
"QP request error",
"QP access error",
"communication established",
"send queue drained",
"path migration successful",
"path mig error",
"device fatal error",
"port active",
"port error",
"LID change",
"P_key change",
"SM change",
"SRQ error",
"SRQ limit reached",
"last WQE reached",
"client reregister",
"GID change",
};

#define ASYNC_MSG(status) \
((status) < ARRAY_SIZE(async_event) ? \
async_event[(status)] : "unknown async error")

static void
rpcrdma_schedule_tasklet(struct list_head *sched_list)
{
unsigned long flags;

spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
list_splice_tail(sched_list, &rpcrdma_tasklets_g);
spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
tasklet_schedule(&rpcrdma_tasklet_g);
}

static void
rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
{
struct rpcrdma_ep *ep = context;

dprintk("RPC: %s: QP error %X on device %s ep %p\n",
__func__, event->event, event->device->name, context);
pr_err("RPC: %s: %s on device %s ep %p\n",
__func__, ASYNC_MSG(event->event),
event->device->name, context);
if (ep->rep_connected == 1) {
ep->rep_connected = -EIO;
ep->rep_func(ep);
@@ -124,8 +163,9 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
{
struct rpcrdma_ep *ep = context;

dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
__func__, event->event, event->device->name, context);
pr_err("RPC: %s: %s on device %s ep %p\n",
__func__, ASYNC_MSG(event->event),
event->device->name, context);
if (ep->rep_connected == 1) {
ep->rep_connected = -EIO;
ep->rep_func(ep);
@@ -243,7 +283,6 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
struct list_head sched_list;
struct ib_wc *wcs;
int budget, count, rc;
unsigned long flags;

INIT_LIST_HEAD(&sched_list);
budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
@@ -261,10 +300,7 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
rc = 0;

out_schedule:
spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
list_splice_tail(&sched_list, &rpcrdma_tasklets_g);
spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
tasklet_schedule(&rpcrdma_tasklet_g);
rpcrdma_schedule_tasklet(&sched_list);
return rc;
}

@@ -309,8 +345,15 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
static void
rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
{
rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep);
rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep);
struct ib_wc wc;
LIST_HEAD(sched_list);

while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
rpcrdma_recvcq_process_wc(&wc, &sched_list);
if (!list_empty(&sched_list))
rpcrdma_schedule_tasklet(&sched_list);
while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
rpcrdma_sendcq_process_wc(&wc);
}

#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -733,7 +776,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,

/* set trigger for requesting send completion */
ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
if (ep->rep_cqinit <= 2)
if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
else if (ep->rep_cqinit <= 2)
ep->rep_cqinit = 0;
INIT_CQCOUNT(ep);
ep->rep_ia = ia;
@@ -866,8 +911,19 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
rpcrdma_ep_disconnect(ep, ia);
rpcrdma_flush_cqs(ep);

if (ia->ri_memreg_strategy == RPCRDMA_FRMR)
switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
rpcrdma_reset_frmrs(ia);
break;
case RPCRDMA_MTHCAFMR:
rpcrdma_reset_fmrs(ia);
break;
case RPCRDMA_ALLPHYSICAL:
break;
default:
rc = -EIO;
goto out;
}

xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
id = rpcrdma_create_id(xprt, ia,
@@ -1287,6 +1343,34 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
kfree(buf->rb_pool);
}

/* After a disconnect, unmap all FMRs.
*
* This is invoked only in the transport connect worker in order
* to serialize with rpcrdma_register_fmr_external().
*/
static void
rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
{
struct rpcrdma_xprt *r_xprt =
container_of(ia, struct rpcrdma_xprt, rx_ia);
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct list_head *pos;
struct rpcrdma_mw *r;
LIST_HEAD(l);
int rc;

list_for_each(pos, &buf->rb_all) {
r = list_entry(pos, struct rpcrdma_mw, mw_all);

INIT_LIST_HEAD(&l);
list_add(&r->r.fmr->list, &l);
rc = ib_unmap_fmr(&l);
if (rc)
dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
__func__, rc);
}
}

/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
* an unusable state. Find FRMRs in this state and dereg / reg
* each. FRMRs that are VALID and attached to an rpcrdma_req are
@@ -1918,10 +2002,10 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
break;

default:
return -1;
return -EIO;
}
if (rc)
return -1;
return rc;

return nsegs;
}
6 changes: 6 additions & 0 deletions net/sunrpc/xprtrdma/xprt_rdma.h
Original file line number Diff line number Diff line change
@@ -97,6 +97,12 @@ struct rpcrdma_ep {
struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE];
};

/*
* Force a signaled SEND Work Request every so often,
* in case the provider needs to do some housekeeping.
*/
#define RPCRDMA_MAX_UNSIGNALED_SENDS (32)

#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)

0 comments on commit ea52641

Please sign in to comment.