IB/rdmavt: Compress adjacent SGEs in rvt_lkey_ok()

SGEs that are contiguous needlessly consume driver dependent TX resources.

The lkey validation logic is enhanced to compress the SGE that ends
up in the send wqe when consecutive addresses are detected.

The lkey validation API used to return 1 (success) or 0 (fail).

The return value is now an -errno, 0 (compressed), or 1 (uncompressed).  A
additional argument is added to pass the last SQE for the compression.

Loopback callers always pass a NULL to last_sge since the optimization is
of little benefit in that situation.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Brian Welty <brian.welty@intel.com>
Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Mike Marciniszyn 2017-05-12 09:20:31 -07:00 committed by Doug Ledford
parent 9039746cdf
commit 14fe13fcd3
7 changed files with 130 additions and 24 deletions

View File

@ -75,7 +75,7 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
continue; continue;
/* Check LKEY */ /* Check LKEY */
if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
&wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
goto bad_lkey; goto bad_lkey;
qp->r_len += wqe->sg_list[i].length; qp->r_len += wqe->sg_list[i].length;
j++; j++;

View File

@ -59,7 +59,7 @@ static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
continue; continue;
/* Check LKEY */ /* Check LKEY */
if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
&wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
goto bad_lkey; goto bad_lkey;
qp->r_len += wqe->sg_list[i].length; qp->r_len += wqe->sg_list[i].length;
j++; j++;

View File

@ -777,24 +777,55 @@ out:
return ret; return ret;
} }
/**
* rvt_sge_adjacent - is isge compressible
* @isge: outgoing internal SGE
* @last_sge: last outgoing SGE written
* @sge: SGE to check
*
* If adjacent will update last_sge to add length.
*
* Return: true if isge is adjacent to last sge
*/
static inline bool rvt_sge_adjacent(struct rvt_sge *isge,
struct rvt_sge *last_sge,
struct ib_sge *sge)
{
if (last_sge && sge->lkey == last_sge->mr->lkey &&
((uint64_t)(last_sge->vaddr + last_sge->length) == sge->addr)) {
if (sge->lkey) {
if (unlikely((sge->addr - last_sge->mr->user_base +
sge->length > last_sge->mr->length)))
return false; /* overrun, caller will catch */
} else {
last_sge->length += sge->length;
}
last_sge->sge_length += sge->length;
trace_rvt_sge_adjacent(last_sge, sge);
return true;
}
return false;
}
/** /**
* rvt_lkey_ok - check IB SGE for validity and initialize * rvt_lkey_ok - check IB SGE for validity and initialize
* @rkt: table containing lkey to check SGE against * @rkt: table containing lkey to check SGE against
* @pd: protection domain * @pd: protection domain
* @isge: outgoing internal SGE * @isge: outgoing internal SGE
* @last_sge: last outgoing SGE written
* @sge: SGE to check * @sge: SGE to check
* @acc: access flags * @acc: access flags
* *
* Check the IB SGE for validity and initialize our internal version * Check the IB SGE for validity and initialize our internal version
* of it. * of it.
* *
* Return: 1 if valid and successful, otherwise returns 0. * Increments the reference count when a new sge is stored.
*
* increments the reference count upon success
* *
* Return: 0 if compressed, 1 if added , otherwise returns -errno.
*/ */
int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
struct rvt_sge *isge, struct ib_sge *sge, int acc) struct rvt_sge *isge, struct rvt_sge *last_sge,
struct ib_sge *sge, int acc)
{ {
struct rvt_mregion *mr; struct rvt_mregion *mr;
unsigned n, m; unsigned n, m;
@ -804,12 +835,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
* We use LKEY == zero for kernel virtual addresses * We use LKEY == zero for kernel virtual addresses
* (see rvt_get_dma_mr() and dma_virt_ops). * (see rvt_get_dma_mr() and dma_virt_ops).
*/ */
rcu_read_lock();
if (sge->lkey == 0) { if (sge->lkey == 0) {
struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
if (pd->user) if (pd->user)
goto bail; return -EINVAL;
if (rvt_sge_adjacent(isge, last_sge, sge))
return 0;
rcu_read_lock();
mr = rcu_dereference(dev->dma_mr); mr = rcu_dereference(dev->dma_mr);
if (!mr) if (!mr)
goto bail; goto bail;
@ -824,6 +857,9 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->n = 0; isge->n = 0;
goto ok; goto ok;
} }
if (rvt_sge_adjacent(isge, last_sge, sge))
return 0;
rcu_read_lock();
mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
if (!mr) if (!mr)
goto bail; goto bail;
@ -874,12 +910,13 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->m = m; isge->m = m;
isge->n = n; isge->n = n;
ok: ok:
trace_rvt_sge_new(isge, sge);
return 1; return 1;
bail_unref: bail_unref:
rvt_put_mr(mr); rvt_put_mr(mr);
bail: bail:
rcu_read_unlock(); rcu_read_unlock();
return 0; return -EINVAL;
} }
EXPORT_SYMBOL(rvt_lkey_ok); EXPORT_SYMBOL(rvt_lkey_ok);

View File

@ -1646,7 +1646,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
struct rvt_pd *pd; struct rvt_pd *pd;
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
u8 log_pmtu; u8 log_pmtu;
int ret; int ret, incr;
size_t cplen; size_t cplen;
bool reserved_op; bool reserved_op;
int local_ops_delayed = 0; int local_ops_delayed = 0;
@ -1719,22 +1719,23 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
wqe->length = 0; wqe->length = 0;
j = 0; j = 0;
if (wr->num_sge) { if (wr->num_sge) {
struct rvt_sge *last_sge = NULL;
acc = wr->opcode >= IB_WR_RDMA_READ ? acc = wr->opcode >= IB_WR_RDMA_READ ?
IB_ACCESS_LOCAL_WRITE : 0; IB_ACCESS_LOCAL_WRITE : 0;
for (i = 0; i < wr->num_sge; i++) { for (i = 0; i < wr->num_sge; i++) {
u32 length = wr->sg_list[i].length; u32 length = wr->sg_list[i].length;
int ok;
if (length == 0) if (length == 0)
continue; continue;
ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], incr = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge,
&wr->sg_list[i], acc); &wr->sg_list[i], acc);
if (!ok) { if (unlikely(incr < 0))
ret = -EINVAL; goto bail_lkey_error;
goto bail_inval_free;
}
wqe->length += length; wqe->length += length;
j++; if (incr)
last_sge = &wqe->sg_list[j];
j += incr;
} }
wqe->wr.num_sge = j; wqe->wr.num_sge = j;
} }
@ -1781,12 +1782,14 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED; wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
qp->s_avail--; qp->s_avail--;
} }
trace_rvt_post_one_wr(qp, wqe); trace_rvt_post_one_wr(qp, wqe, wr->num_sge);
smp_wmb(); /* see request builders */ smp_wmb(); /* see request builders */
qp->s_head = next; qp->s_head = next;
return 0; return 0;
bail_lkey_error:
ret = incr;
bail_inval_free: bail_inval_free:
/* release mr holds */ /* release mr holds */
while (j) { while (j) {

View File

@ -103,6 +103,68 @@ DEFINE_EVENT(
TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
TP_ARGS(mr, m, n, v, len)); TP_ARGS(mr, m, n, v, len));
DECLARE_EVENT_CLASS(
rvt_sge_template,
TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
TP_ARGS(sge, isge),
TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(sge->mr->pd->device))
__field(struct rvt_mregion *, mr)
__field(struct rvt_sge *, sge)
__field(struct ib_sge *, isge)
__field(void *, vaddr)
__field(u64, ivaddr)
__field(u32, lkey)
__field(u32, sge_length)
__field(u32, length)
__field(u32, ilength)
__field(int, user)
__field(u16, m)
__field(u16, n)
),
TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(sge->mr->pd->device));
__entry->mr = sge->mr;
__entry->sge = sge;
__entry->isge = isge;
__entry->vaddr = sge->vaddr;
__entry->ivaddr = isge->addr;
__entry->lkey = sge->mr->lkey;
__entry->sge_length = sge->sge_length;
__entry->length = sge->length;
__entry->ilength = isge->length;
__entry->m = sge->m;
__entry->n = sge->m;
__entry->user = ibpd_to_rvtpd(sge->mr->pd)->user;
),
TP_printk(
"[%s] mr %p sge %p isge %p vaddr %p ivaddr %llx lkey %x sge_length %u length %u ilength %u m %u n %u user %u",
__get_str(dev),
__entry->mr,
__entry->sge,
__entry->isge,
__entry->vaddr,
__entry->ivaddr,
__entry->lkey,
__entry->sge_length,
__entry->length,
__entry->ilength,
__entry->m,
__entry->n,
__entry->user
)
);
DEFINE_EVENT(
rvt_sge_template, rvt_sge_adjacent,
TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
TP_ARGS(sge, isge));
DEFINE_EVENT(
rvt_sge_template, rvt_sge_new,
TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
TP_ARGS(sge, isge));
#endif /* __RVT_TRACE_MR_H */ #endif /* __RVT_TRACE_MR_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH

View File

@ -84,12 +84,12 @@ __print_symbolic(opcode, \
wr_opcode_name(RESERVED10)) wr_opcode_name(RESERVED10))
#define POS_PRN \ #define POS_PRN \
"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u" "[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u wr_num_sge %u"
TRACE_EVENT( TRACE_EVENT(
rvt_post_one_wr, rvt_post_one_wr,
TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, int wr_num_sge),
TP_ARGS(qp, wqe), TP_ARGS(qp, wqe, wr_num_sge),
TP_STRUCT__entry( TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
__field(u64, wr_id) __field(u64, wr_id)
@ -108,6 +108,7 @@ TRACE_EVENT(
__field(int, send_flags) __field(int, send_flags)
__field(pid_t, pid) __field(pid_t, pid)
__field(int, num_sge) __field(int, num_sge)
__field(int, wr_num_sge)
), ),
TP_fast_assign( TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
@ -127,6 +128,7 @@ TRACE_EVENT(
__entry->ssn = wqe->ssn; __entry->ssn = wqe->ssn;
__entry->send_flags = wqe->wr.send_flags; __entry->send_flags = wqe->wr.send_flags;
__entry->num_sge = wqe->wr.num_sge; __entry->num_sge = wqe->wr.num_sge;
__entry->wr_num_sge = wr_num_sge;
), ),
TP_printk( TP_printk(
POS_PRN, POS_PRN,
@ -146,7 +148,8 @@ TRACE_EVENT(
__entry->head, __entry->head,
__entry->last, __entry->last,
__entry->pid, __entry->pid,
__entry->num_sge __entry->num_sge,
__entry->wr_num_sge
) )
); );

View File

@ -515,7 +515,8 @@ int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey);
int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
u32 len, u64 vaddr, u32 rkey, int acc); u32 len, u64 vaddr, u32 rkey, int acc);
int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
struct rvt_sge *isge, struct ib_sge *sge, int acc); struct rvt_sge *isge, struct rvt_sge *last_sge,
struct ib_sge *sge, int acc);
struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
u16 lid); u16 lid);