linux/net/sunrpc/xprtrdma/frwr_ops.c
Linus Torvalds 048ccca8c1 Initial roundup of 4.5 merge window patches
- Remove usage of ib_query_device and instead store attributes in
   ib_device struct
 - Move iopoll out of block and into lib, rename to irqpoll, and use
   in several places in the rdma stack as our new completion queue
   polling library mechanism.  Update the other block drivers that
   already used iopoll to use the new mechanism too.
 - Replace the per-entry GID table locks with a single GID table lock
 - IPoIB multicast cleanup
 - Cleanups to the IB MR facility
 - Add support for 64bit extended IB counters
 - Fix for netlink oops while parsing RDMA nl messages
 - RoCEv2 support for the core IB code
 - mlx4 RoCEv2 support
 - mlx5 RoCEv2 support
 - Cross Channel support for mlx5
 - Timestamp support for mlx5
 - Atomic support for mlx5
 - Raw QP support for mlx5
 - MAINTAINERS update for mlx4/mlx5
 - Misc ocrdma, qib, nes, usNIC, cxgb3, cxgb4, mlx4, mlx5 updates
 - Add support for remote invalidate to the iSER driver (pushed through the
   RDMA tree due to dependencies, acknowledged by nab)
 - Update to NFSoRDMA (pushed through the RDMA tree due to dependencies,
   acknowledged by Bruce)
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJWoSygAAoJELgmozMOVy/dDjsP/2vbTda2MvQfkfkGEZBQdJSg
 095RN0gQgCJdg78lAl8yuaK8r4VN/7uefpDtFdudH1I/Pei7X0wxN9R1UzFNG4KR
 AD53lz92IVPs15328SbPR2kvNWISR9aBFQo3rlElq3Grqlp0EMn2Ou1vtu87rekF
 aMllxr8Nl0uZhP+eWusOsYpJUUtwirLgRnrAyfqo2UxZh/TMIroT0TCx1KXjVcAg
 dhDARiZAdu3OgSc6OsWqmH+DELEq6dFVA5F+DDBGAb8bFZqlJc7cuMHWInwNsNXT
 so4bnEQ835alTbsdYtqs5DUNS8heJTAJP4Uz0ehkTh/uNCcvnKeUTw1c2P/lXI1k
 7s33gMM+0FXj0swMBw0kKwAF2d9Hhus9UAN7NwjBuOyHcjGRd5q7SAnfWkvKx000
 s9jVW19slb2I38gB58nhjOh8s+vXUArgxnV1+kTia1+bJSR5swvVoWRicRXdF0vh
 TvLX/BjbSIU73g1TnnLNYoBTV3ybFKQ6bVdQW7fzSTDs54dsI1vvdHXi3bYZCpnL
 HVwQTZRfEzkvb0AdKbcvf8p/TlaAHem3ODqtO1eHvO4if1QJBSn+SptTEeJVYYdK
 n4B3l/dMoBH4JXJUmEHB9jwAvYOpv/YLAFIvdL7NFwbqGNsC3nfXFcmkVORB1W3B
 KEMcM2we4bz+uyKMjEAD
 =5oO7
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma updates from Doug Ledford:
 "Initial roundup of 4.5 merge window patches

   - Remove usage of ib_query_device and instead store attributes in
     ib_device struct

   - Move iopoll out of block and into lib, rename to irqpoll, and use
     in several places in the rdma stack as our new completion queue
     polling library mechanism.  Update the other block drivers that
     already used iopoll to use the new mechanism too.

   - Replace the per-entry GID table locks with a single GID table lock

   - IPoIB multicast cleanup

   - Cleanups to the IB MR facility

   - Add support for 64bit extended IB counters

   - Fix for netlink oops while parsing RDMA nl messages

   - RoCEv2 support for the core IB code

   - mlx4 RoCEv2 support

   - mlx5 RoCEv2 support

   - Cross Channel support for mlx5

   - Timestamp support for mlx5

   - Atomic support for mlx5

   - Raw QP support for mlx5

   - MAINTAINERS update for mlx4/mlx5

   - Misc ocrdma, qib, nes, usNIC, cxgb3, cxgb4, mlx4, mlx5 updates

   - Add support for remote invalidate to the iSER driver (pushed
     through the RDMA tree due to dependencies, acknowledged by nab)

   - Update to NFSoRDMA (pushed through the RDMA tree due to
     dependencies, acknowledged by Bruce)"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (169 commits)
  IB/mlx5: Unify CQ create flags check
  IB/mlx5: Expose Raw Packet QP to user space consumers
  {IB, net}/mlx5: Move the modify QP operation table to mlx5_ib
  IB/mlx5: Support setting Ethernet priority for Raw Packet QPs
  IB/mlx5: Add Raw Packet QP query functionality
  IB/mlx5: Add create and destroy functionality for Raw Packet QP
  IB/mlx5: Refactor mlx5_ib_qp to accommodate other QP types
  IB/mlx5: Allocate a Transport Domain for each ucontext
  net/mlx5_core: Warn on unsupported events of QP/RQ/SQ
  net/mlx5_core: Add RQ and SQ event handling
  net/mlx5_core: Export transport objects
  IB/mlx5: Expose CQE version to user-space
  IB/mlx5: Add CQE version 1 support to user QPs and SRQs
  IB/mlx5: Fix data validation in mlx5_ib_alloc_ucontext
  IB/sa: Fix netlink local service GFP crash
  IB/srpt: Remove redundant wc array
  IB/qib: Improve ipoib UD performance
  IB/mlx4: Advertise RoCE v2 support
  IB/mlx4: Create and use another QP1 for RoCEv2
  IB/mlx4: Enable send of RoCE QP1 packets with IP/UDP headers
  ...
2016-01-23 18:45:06 -08:00

610 lines
16 KiB
C

/*
* Copyright (c) 2015 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/
/* Lightweight memory registration using Fast Registration Work
* Requests (FRWR). Also referred to sometimes as FRMR mode.
*
* FRWR features ordered asynchronous registration and deregistration
* of arbitrarily sized memory regions. This is the fastest and safest
* but most complex memory registration mode.
*/
/* Normal operation
*
* A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
* Work Request (frmr_op_map). When the RDMA operation is finished, this
* Memory Region is invalidated using a LOCAL_INV Work Request
* (frmr_op_unmap).
*
* Typically these Work Requests are not signaled, and neither are RDMA
* SEND Work Requests (with the exception of signaling occasionally to
* prevent provider work queue overflows). This greatly reduces HCA
* interrupt workload.
*
* As an optimization, frwr_op_unmap marks MRs INVALID before the
* LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
* rb_mws immediately so that no work (like managing a linked list
* under a spinlock) is needed in the completion upcall.
*
* But this means that frwr_op_map() can occasionally encounter an MR
* that is INVALID but the LOCAL_INV WR has not completed. Work Queue
* ordering prevents a subsequent FAST_REG WR from executing against
* that MR while it is still being invalidated.
*/
/* Transport recovery
*
* ->op_map and the transport connect worker cannot run at the same
* time, but ->op_unmap can fire while the transport connect worker
* is running. Thus MR recovery is handled in ->op_map, to guarantee
* that recovered MRs are owned by a sending RPC, and not one where
* ->op_unmap could fire at the same time transport reconnect is
* being done.
*
* When the underlying transport disconnects, MRs are left in one of
* three states:
*
* INVALID: The MR was not in use before the QP entered ERROR state.
* (Or, the LOCAL_INV WR has not completed or flushed yet).
*
* STALE: The MR was being registered or unregistered when the QP
* entered ERROR state, and the pending WR was flushed.
*
* VALID: The MR was registered before the QP entered ERROR state.
*
* When frwr_op_map encounters STALE and VALID MRs, they are recovered
* with ib_dereg_mr and then are re-initialized. Beause MR recovery
* allocates fresh resources, it is deferred to a workqueue, and the
* recovered MRs are placed back on the rb_mws list when recovery is
* complete. frwr_op_map allocates another MR for the current RPC while
* the broken MR is reset.
*
* To ensure that frwr_op_map doesn't encounter an MR that is marked
* INVALID but that is about to be flushed due to a previous transport
* disconnect, the transport connect worker attempts to drain all
* pending send queue WRs before the transport is reconnected.
*/
#include "xprt_rdma.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
static struct workqueue_struct *frwr_recovery_wq;
#define FRWR_RECOVERY_WQ_FLAGS (WQ_UNBOUND | WQ_MEM_RECLAIM)
int
frwr_alloc_recovery_wq(void)
{
frwr_recovery_wq = alloc_workqueue("frwr_recovery",
FRWR_RECOVERY_WQ_FLAGS, 0);
return !frwr_recovery_wq ? -ENOMEM : 0;
}
void
frwr_destroy_recovery_wq(void)
{
struct workqueue_struct *wq;
if (!frwr_recovery_wq)
return;
wq = frwr_recovery_wq;
frwr_recovery_wq = NULL;
destroy_workqueue(wq);
}
/* Deferred reset of a single FRMR. Generate a fresh rkey by
* replacing the MR.
*
* There's no recovery if this fails. The FRMR is abandoned, but
* remains in rb_all. It will be cleaned up when the transport is
* destroyed.
*/
static void
__frwr_recovery_worker(struct work_struct *work)
{
struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw,
r.frmr.fr_work);
struct rpcrdma_xprt *r_xprt = r->r.frmr.fr_xprt;
unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
if (ib_dereg_mr(r->r.frmr.fr_mr))
goto out_fail;
r->r.frmr.fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
if (IS_ERR(r->r.frmr.fr_mr))
goto out_fail;
dprintk("RPC: %s: recovered FRMR %p\n", __func__, r);
r->r.frmr.fr_state = FRMR_IS_INVALID;
rpcrdma_put_mw(r_xprt, r);
return;
out_fail:
pr_warn("RPC: %s: FRMR %p unrecovered\n",
__func__, r);
}
/* A broken MR was discovered in a context that can't sleep.
* Defer recovery to the recovery worker.
*/
static void
__frwr_queue_recovery(struct rpcrdma_mw *r)
{
INIT_WORK(&r->r.frmr.fr_work, __frwr_recovery_worker);
queue_work(frwr_recovery_wq, &r->r.frmr.fr_work);
}
static int
__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
unsigned int depth)
{
struct rpcrdma_frmr *f = &r->r.frmr;
int rc;
f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
if (IS_ERR(f->fr_mr))
goto out_mr_err;
f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL);
if (!f->sg)
goto out_list_err;
sg_init_table(f->sg, depth);
return 0;
out_mr_err:
rc = PTR_ERR(f->fr_mr);
dprintk("RPC: %s: ib_alloc_mr status %i\n",
__func__, rc);
return rc;
out_list_err:
rc = -ENOMEM;
dprintk("RPC: %s: sg allocation failure\n",
__func__);
ib_dereg_mr(f->fr_mr);
return rc;
}
static void
__frwr_release(struct rpcrdma_mw *r)
{
int rc;
rc = ib_dereg_mr(r->r.frmr.fr_mr);
if (rc)
dprintk("RPC: %s: ib_dereg_mr status %i\n",
__func__, rc);
kfree(r->r.frmr.sg);
}
static int
frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
int depth, delta;
ia->ri_max_frmr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
ia->ri_device->attrs.max_fast_reg_page_list_len);
dprintk("RPC: %s: device's max FR page list len = %u\n",
__func__, ia->ri_max_frmr_depth);
/* Add room for frmr register and invalidate WRs.
* 1. FRMR reg WR for head
* 2. FRMR invalidate WR for head
* 3. N FRMR reg WRs for pagelist
* 4. N FRMR invalidate WRs for pagelist
* 5. FRMR reg WR for tail
* 6. FRMR invalidate WR for tail
* 7. The RDMA_SEND WR
*/
depth = 7;
/* Calculate N if the device max FRMR depth is smaller than
* RPCRDMA_MAX_DATA_SEGS.
*/
if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth;
do {
depth += 2; /* FRMR reg + invalidate */
delta -= ia->ri_max_frmr_depth;
} while (delta > 0);
}
ep->rep_attr.cap.max_send_wr *= depth;
if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) {
cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests *
depth;
}
return 0;
}
/* FRWR mode conveys a list of pages per chunk segment. The
* maximum length of that list is the FRWR page list depth.
*/
static size_t
frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
}
/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs
* to be reset.
*
* WARNING: Only wr_id and status are reliable at this point
*/
static void
__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_mw *r)
{
if (likely(wc->status == IB_WC_SUCCESS))
return;
/* WARNING: Only wr_id and status are reliable at this point */
r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
if (wc->status == IB_WC_WR_FLUSH_ERR)
dprintk("RPC: %s: frmr %p flushed\n", __func__, r);
else
pr_warn("RPC: %s: frmr %p error, status %s (%d)\n",
__func__, r, ib_wc_status_msg(wc->status), wc->status);
r->r.frmr.fr_state = FRMR_IS_STALE;
}
static void
frwr_sendcompletion(struct ib_wc *wc)
{
struct rpcrdma_mw *r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
struct rpcrdma_frmr *f = &r->r.frmr;
if (unlikely(wc->status != IB_WC_SUCCESS))
__frwr_sendcompletion_flush(wc, r);
if (f->fr_waiter)
complete(&f->fr_linv_done);
}
static int
frwr_op_init(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct ib_device *device = r_xprt->rx_ia.ri_device;
unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
int i;
spin_lock_init(&buf->rb_mwlock);
INIT_LIST_HEAD(&buf->rb_mws);
INIT_LIST_HEAD(&buf->rb_all);
i = max_t(int, RPCRDMA_MAX_DATA_SEGS / depth, 1);
i += 2; /* head + tail */
i *= buf->rb_max_requests; /* one set for each RPC slot */
dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
while (i--) {
struct rpcrdma_mw *r;
int rc;
r = kzalloc(sizeof(*r), GFP_KERNEL);
if (!r)
return -ENOMEM;
rc = __frwr_init(r, pd, device, depth);
if (rc) {
kfree(r);
return rc;
}
list_add(&r->mw_list, &buf->rb_mws);
list_add(&r->mw_all, &buf->rb_all);
r->mw_sendcompletion = frwr_sendcompletion;
r->r.frmr.fr_xprt = r_xprt;
}
return 0;
}
/* Post a FAST_REG Work Request to register a memory region
* for remote access via RDMA READ or RDMA WRITE.
*/
static int
frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct ib_device *device = ia->ri_device;
enum dma_data_direction direction = rpcrdma_data_dir(writing);
struct rpcrdma_mr_seg *seg1 = seg;
struct rpcrdma_mw *mw;
struct rpcrdma_frmr *frmr;
struct ib_mr *mr;
struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr;
int rc, i, n, dma_nents;
u8 key;
mw = seg1->rl_mw;
seg1->rl_mw = NULL;
do {
if (mw)
__frwr_queue_recovery(mw);
mw = rpcrdma_get_mw(r_xprt);
if (!mw)
return -ENOMEM;
} while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
frmr = &mw->r.frmr;
frmr->fr_state = FRMR_IS_VALID;
frmr->fr_waiter = false;
mr = frmr->fr_mr;
reg_wr = &frmr->fr_regwr;
if (nsegs > ia->ri_max_frmr_depth)
nsegs = ia->ri_max_frmr_depth;
for (i = 0; i < nsegs;) {
if (seg->mr_page)
sg_set_page(&frmr->sg[i],
seg->mr_page,
seg->mr_len,
offset_in_page(seg->mr_offset));
else
sg_set_buf(&frmr->sg[i], seg->mr_offset,
seg->mr_len);
++seg;
++i;
/* Check for holes */
if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
frmr->sg_nents = i;
dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction);
if (!dma_nents) {
pr_err("RPC: %s: failed to dma map sg %p sg_nents %u\n",
__func__, frmr->sg, frmr->sg_nents);
return -ENOMEM;
}
n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
if (unlikely(n != frmr->sg_nents)) {
pr_err("RPC: %s: failed to map mr %p (%u/%u)\n",
__func__, frmr->fr_mr, n, frmr->sg_nents);
rc = n < 0 ? n : -EINVAL;
goto out_senderr;
}
dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n",
__func__, mw, frmr->sg_nents, mr->length);
key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key);
reg_wr->wr.next = NULL;
reg_wr->wr.opcode = IB_WR_REG_MR;
reg_wr->wr.wr_id = (uintptr_t)mw;
reg_wr->wr.num_sge = 0;
reg_wr->wr.send_flags = 0;
reg_wr->mr = mr;
reg_wr->key = mr->rkey;
reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
DECR_CQCOUNT(&r_xprt->rx_ep);
rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
if (rc)
goto out_senderr;
seg1->mr_dir = direction;
seg1->rl_mw = mw;
seg1->mr_rkey = mr->rkey;
seg1->mr_base = mr->iova;
seg1->mr_nsegs = frmr->sg_nents;
seg1->mr_len = mr->length;
return frmr->sg_nents;
out_senderr:
dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc);
ib_dma_unmap_sg(device, frmr->sg, dma_nents, direction);
__frwr_queue_recovery(mw);
return rc;
}
static struct ib_send_wr *
__frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg)
{
struct rpcrdma_mw *mw = seg->rl_mw;
struct rpcrdma_frmr *f = &mw->r.frmr;
struct ib_send_wr *invalidate_wr;
f->fr_waiter = false;
f->fr_state = FRMR_IS_INVALID;
invalidate_wr = &f->fr_invwr;
memset(invalidate_wr, 0, sizeof(*invalidate_wr));
invalidate_wr->wr_id = (unsigned long)(void *)mw;
invalidate_wr->opcode = IB_WR_LOCAL_INV;
invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;
return invalidate_wr;
}
static void
__frwr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int rc)
{
struct ib_device *device = r_xprt->rx_ia.ri_device;
struct rpcrdma_mw *mw = seg->rl_mw;
struct rpcrdma_frmr *f = &mw->r.frmr;
seg->rl_mw = NULL;
ib_dma_unmap_sg(device, f->sg, f->sg_nents, seg->mr_dir);
if (!rc)
rpcrdma_put_mw(r_xprt, mw);
else
__frwr_queue_recovery(mw);
}
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*/
static void
frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mr_seg *seg;
unsigned int i, nchunks;
struct rpcrdma_frmr *f;
int rc;
dprintk("RPC: %s: req %p\n", __func__, req);
/* ORDER: Invalidate all of the req's MRs first
*
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
invalidate_wrs = pos = prev = NULL;
seg = NULL;
for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
seg = &req->rl_segments[i];
pos = __frwr_prepare_linv_wr(seg);
if (!invalidate_wrs)
invalidate_wrs = pos;
else
prev->next = pos;
prev = pos;
i += seg->mr_nsegs;
}
f = &seg->rl_mw->r.frmr;
/* Strong send queue ordering guarantees that when the
* last WR in the chain completes, all WRs in the chain
* are complete.
*/
f->fr_invwr.send_flags = IB_SEND_SIGNALED;
f->fr_waiter = true;
init_completion(&f->fr_linv_done);
INIT_CQCOUNT(&r_xprt->rx_ep);
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
* unless ri_id->qp is a valid pointer.
*/
rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
if (rc)
pr_warn("%s: ib_post_send failed %i\n", __func__, rc);
wait_for_completion(&f->fr_linv_done);
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
*/
for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
seg = &req->rl_segments[i];
__frwr_dma_unmap(r_xprt, seg, rc);
i += seg->mr_nsegs;
seg->mr_nsegs = 0;
}
req->rl_nchunks = 0;
}
/* Post a LOCAL_INV Work Request to prevent further remote access
* via RDMA READ or RDMA WRITE.
*/
static int
frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
struct rpcrdma_mr_seg *seg1 = seg;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mw *mw = seg1->rl_mw;
struct rpcrdma_frmr *frmr = &mw->r.frmr;
struct ib_send_wr *invalidate_wr, *bad_wr;
int rc, nsegs = seg->mr_nsegs;
dprintk("RPC: %s: FRMR %p\n", __func__, mw);
seg1->rl_mw = NULL;
frmr->fr_state = FRMR_IS_INVALID;
invalidate_wr = &mw->r.frmr.fr_invwr;
memset(invalidate_wr, 0, sizeof(*invalidate_wr));
invalidate_wr->wr_id = (uintptr_t)mw;
invalidate_wr->opcode = IB_WR_LOCAL_INV;
invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);
read_lock(&ia->ri_qplock);
rc = ib_post_send(ia->ri_id->qp, invalidate_wr, &bad_wr);
read_unlock(&ia->ri_qplock);
if (rc)
goto out_err;
rpcrdma_put_mw(r_xprt, mw);
return nsegs;
out_err:
dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc);
__frwr_queue_recovery(mw);
return nsegs;
}
static void
frwr_op_destroy(struct rpcrdma_buffer *buf)
{
struct rpcrdma_mw *r;
/* Ensure stale MWs for "buf" are no longer in flight */
flush_workqueue(frwr_recovery_wq);
while (!list_empty(&buf->rb_all)) {
r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
list_del(&r->mw_all);
__frwr_release(r);
kfree(r);
}
}
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
.ro_unmap_sync = frwr_op_unmap_sync,
.ro_unmap = frwr_op_unmap,
.ro_open = frwr_op_open,
.ro_maxpages = frwr_op_maxpages,
.ro_init = frwr_op_init,
.ro_destroy = frwr_op_destroy,
.ro_displayname = "frwr",
};