forked from Minki/linux
NFS: Client side changes for RDMA
These patches various bugfixes and cleanups for using NFS over RDMA, including better error handling and performance improvements by using pad optimization. Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com> -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQIcBAABCAAGBQJUdPv2AAoJENfLVL+wpUDrJkMQAKjtPZHLMcj+eHm4f1ZKLJxy GSrUZV21TU9tL0NVE/5An8US6hoLwHpNXsW8o+gHTAeGRyiCmIaNXGd1Ql/4PYRH zfzdNXoaJAh1N5iXX11fF3gOWqx/SolqzO2xLDVETK/3lAvq0VwMYoMElBQB6qQW 8sN3z8yVuz/9Ia9oGIFhqu1B6dcKPHkQDMtmsElGxeEX/+9yEg4HUKx+kZDtV0Uj 8/JM8Jh1FKRCQT/P6INkRItdY5KaSJGFc43BkC/8lbugfxa5XCyu/m/qMr9FJsDV nM6rwaiVcmR/mvD3fL82+Jg/M+P9VUHQ1/Az0sV9G+fEoHH/1Mey3LfMzNpUmf9v bykrPRuzXkPPQgN1VnjSaF2RF+CWwV9Nme1VVXM/zj8gHX1mcmQF/wPRxDuLjCrt EObAFsvHOwDTZZmYp9bG5kc6IvwvT8aeeVQMJ4q4PSGD3w8AtoIyJDn+Ee0LFD1K Zw0oZpTJpI4t7DVxGBSdo2wZWuMU/UKqGqGtGJ+ljXfTRuuq968Q5j5ujaA9vf0v C9igYTU8hq4teMzhZrfR1jtTWoSS+5zamb1KtvAZy8gsht2PQVgE9xka2k/AV8uE ul/w5HU4OV+QIrHNbiu7BE8B2Ags6smpdHMqn9fqLBwvG+JEwbWqk1zeTsajxzq+ hkvKkkMq6JjDbsDf96Yk =YIru -----END PGP SIGNATURE----- Merge tag 'nfs-rdma-for-3.19' of git://git.linux-nfs.org/projects/anna/nfs-rdma into linux-next Pull NFS client RDMA changes for 3.19 from Anna Schumaker: "NFS: Client side changes for RDMA These patches various bugfixes and cleanups for using NFS over RDMA, including better error handling and performance improvements by using pad optimization. Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>" * tag 'nfs-rdma-for-3.19' of git://git.linux-nfs.org/projects/anna/nfs-rdma: xprtrdma: Display async errors xprtrdma: Enable pad optimization xprtrdma: Re-write rpcrdma_flush_cqs() xprtrdma: Refactor tasklet scheduling xprtrdma: unmap all FMRs during transport disconnect xprtrdma: Cap req_cqinit xprtrdma: Return an errno from rpcrdma_register_external()
This commit is contained in:
commit
ea5264138d
@ -73,7 +73,7 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
|
|||||||
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
|
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
|
||||||
static unsigned int xprt_rdma_inline_write_padding;
|
static unsigned int xprt_rdma_inline_write_padding;
|
||||||
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
|
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
|
||||||
int xprt_rdma_pad_optimize = 0;
|
int xprt_rdma_pad_optimize = 1;
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
|
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
|
||||||
|
|
||||||
@ -599,7 +599,7 @@ xprt_rdma_send_request(struct rpc_task *task)
|
|||||||
|
|
||||||
if (req->rl_niovs == 0)
|
if (req->rl_niovs == 0)
|
||||||
rc = rpcrdma_marshal_req(rqst);
|
rc = rpcrdma_marshal_req(rqst);
|
||||||
else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
|
else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL)
|
||||||
rc = rpcrdma_marshal_chunks(rqst, 0);
|
rc = rpcrdma_marshal_chunks(rqst, 0);
|
||||||
if (rc < 0)
|
if (rc < 0)
|
||||||
goto failed_marshal;
|
goto failed_marshal;
|
||||||
|
@ -62,6 +62,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
|
static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
|
||||||
|
static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* internal functions
|
* internal functions
|
||||||
@ -105,13 +106,51 @@ rpcrdma_run_tasklet(unsigned long data)
|
|||||||
|
|
||||||
static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
|
static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
|
||||||
|
|
||||||
|
static const char * const async_event[] = {
|
||||||
|
"CQ error",
|
||||||
|
"QP fatal error",
|
||||||
|
"QP request error",
|
||||||
|
"QP access error",
|
||||||
|
"communication established",
|
||||||
|
"send queue drained",
|
||||||
|
"path migration successful",
|
||||||
|
"path mig error",
|
||||||
|
"device fatal error",
|
||||||
|
"port active",
|
||||||
|
"port error",
|
||||||
|
"LID change",
|
||||||
|
"P_key change",
|
||||||
|
"SM change",
|
||||||
|
"SRQ error",
|
||||||
|
"SRQ limit reached",
|
||||||
|
"last WQE reached",
|
||||||
|
"client reregister",
|
||||||
|
"GID change",
|
||||||
|
};
|
||||||
|
|
||||||
|
#define ASYNC_MSG(status) \
|
||||||
|
((status) < ARRAY_SIZE(async_event) ? \
|
||||||
|
async_event[(status)] : "unknown async error")
|
||||||
|
|
||||||
|
static void
|
||||||
|
rpcrdma_schedule_tasklet(struct list_head *sched_list)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
|
||||||
|
list_splice_tail(sched_list, &rpcrdma_tasklets_g);
|
||||||
|
spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
|
||||||
|
tasklet_schedule(&rpcrdma_tasklet_g);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
|
rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
|
||||||
{
|
{
|
||||||
struct rpcrdma_ep *ep = context;
|
struct rpcrdma_ep *ep = context;
|
||||||
|
|
||||||
dprintk("RPC: %s: QP error %X on device %s ep %p\n",
|
pr_err("RPC: %s: %s on device %s ep %p\n",
|
||||||
__func__, event->event, event->device->name, context);
|
__func__, ASYNC_MSG(event->event),
|
||||||
|
event->device->name, context);
|
||||||
if (ep->rep_connected == 1) {
|
if (ep->rep_connected == 1) {
|
||||||
ep->rep_connected = -EIO;
|
ep->rep_connected = -EIO;
|
||||||
ep->rep_func(ep);
|
ep->rep_func(ep);
|
||||||
@ -124,8 +163,9 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
|
|||||||
{
|
{
|
||||||
struct rpcrdma_ep *ep = context;
|
struct rpcrdma_ep *ep = context;
|
||||||
|
|
||||||
dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
|
pr_err("RPC: %s: %s on device %s ep %p\n",
|
||||||
__func__, event->event, event->device->name, context);
|
__func__, ASYNC_MSG(event->event),
|
||||||
|
event->device->name, context);
|
||||||
if (ep->rep_connected == 1) {
|
if (ep->rep_connected == 1) {
|
||||||
ep->rep_connected = -EIO;
|
ep->rep_connected = -EIO;
|
||||||
ep->rep_func(ep);
|
ep->rep_func(ep);
|
||||||
@ -243,7 +283,6 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
|
|||||||
struct list_head sched_list;
|
struct list_head sched_list;
|
||||||
struct ib_wc *wcs;
|
struct ib_wc *wcs;
|
||||||
int budget, count, rc;
|
int budget, count, rc;
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
INIT_LIST_HEAD(&sched_list);
|
INIT_LIST_HEAD(&sched_list);
|
||||||
budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
|
budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
|
||||||
@ -261,10 +300,7 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
|
|||||||
rc = 0;
|
rc = 0;
|
||||||
|
|
||||||
out_schedule:
|
out_schedule:
|
||||||
spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
|
rpcrdma_schedule_tasklet(&sched_list);
|
||||||
list_splice_tail(&sched_list, &rpcrdma_tasklets_g);
|
|
||||||
spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
|
|
||||||
tasklet_schedule(&rpcrdma_tasklet_g);
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -309,8 +345,15 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
|
|||||||
static void
|
static void
|
||||||
rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
|
rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
|
||||||
{
|
{
|
||||||
rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep);
|
struct ib_wc wc;
|
||||||
rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep);
|
LIST_HEAD(sched_list);
|
||||||
|
|
||||||
|
while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
|
||||||
|
rpcrdma_recvcq_process_wc(&wc, &sched_list);
|
||||||
|
if (!list_empty(&sched_list))
|
||||||
|
rpcrdma_schedule_tasklet(&sched_list);
|
||||||
|
while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
|
||||||
|
rpcrdma_sendcq_process_wc(&wc);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
|
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
|
||||||
@ -733,7 +776,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
|
|||||||
|
|
||||||
/* set trigger for requesting send completion */
|
/* set trigger for requesting send completion */
|
||||||
ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
|
ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
|
||||||
if (ep->rep_cqinit <= 2)
|
if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
|
||||||
|
ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
|
||||||
|
else if (ep->rep_cqinit <= 2)
|
||||||
ep->rep_cqinit = 0;
|
ep->rep_cqinit = 0;
|
||||||
INIT_CQCOUNT(ep);
|
INIT_CQCOUNT(ep);
|
||||||
ep->rep_ia = ia;
|
ep->rep_ia = ia;
|
||||||
@ -866,8 +911,19 @@ retry:
|
|||||||
rpcrdma_ep_disconnect(ep, ia);
|
rpcrdma_ep_disconnect(ep, ia);
|
||||||
rpcrdma_flush_cqs(ep);
|
rpcrdma_flush_cqs(ep);
|
||||||
|
|
||||||
if (ia->ri_memreg_strategy == RPCRDMA_FRMR)
|
switch (ia->ri_memreg_strategy) {
|
||||||
|
case RPCRDMA_FRMR:
|
||||||
rpcrdma_reset_frmrs(ia);
|
rpcrdma_reset_frmrs(ia);
|
||||||
|
break;
|
||||||
|
case RPCRDMA_MTHCAFMR:
|
||||||
|
rpcrdma_reset_fmrs(ia);
|
||||||
|
break;
|
||||||
|
case RPCRDMA_ALLPHYSICAL:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rc = -EIO;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
|
xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
|
||||||
id = rpcrdma_create_id(xprt, ia,
|
id = rpcrdma_create_id(xprt, ia,
|
||||||
@ -1287,6 +1343,34 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
|
|||||||
kfree(buf->rb_pool);
|
kfree(buf->rb_pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* After a disconnect, unmap all FMRs.
|
||||||
|
*
|
||||||
|
* This is invoked only in the transport connect worker in order
|
||||||
|
* to serialize with rpcrdma_register_fmr_external().
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
|
||||||
|
{
|
||||||
|
struct rpcrdma_xprt *r_xprt =
|
||||||
|
container_of(ia, struct rpcrdma_xprt, rx_ia);
|
||||||
|
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
|
||||||
|
struct list_head *pos;
|
||||||
|
struct rpcrdma_mw *r;
|
||||||
|
LIST_HEAD(l);
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
list_for_each(pos, &buf->rb_all) {
|
||||||
|
r = list_entry(pos, struct rpcrdma_mw, mw_all);
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&l);
|
||||||
|
list_add(&r->r.fmr->list, &l);
|
||||||
|
rc = ib_unmap_fmr(&l);
|
||||||
|
if (rc)
|
||||||
|
dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
|
||||||
|
__func__, rc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
|
/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
|
||||||
* an unusable state. Find FRMRs in this state and dereg / reg
|
* an unusable state. Find FRMRs in this state and dereg / reg
|
||||||
* each. FRMRs that are VALID and attached to an rpcrdma_req are
|
* each. FRMRs that are VALID and attached to an rpcrdma_req are
|
||||||
@ -1918,10 +2002,10 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return -1;
|
return -EIO;
|
||||||
}
|
}
|
||||||
if (rc)
|
if (rc)
|
||||||
return -1;
|
return rc;
|
||||||
|
|
||||||
return nsegs;
|
return nsegs;
|
||||||
}
|
}
|
||||||
|
@ -97,6 +97,12 @@ struct rpcrdma_ep {
|
|||||||
struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE];
|
struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Force a signaled SEND Work Request every so often,
|
||||||
|
* in case the provider needs to do some housekeeping.
|
||||||
|
*/
|
||||||
|
#define RPCRDMA_MAX_UNSIGNALED_SENDS (32)
|
||||||
|
|
||||||
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
|
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
|
||||||
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
|
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user