RDMA/bnxt_re: Handle variable WQE support for user applications

User library calculates the number of slots required for user applications
and it can pass that information to the driver.  Driver can use this value
and update the HW directly. This mechanism is currently used only for the
newly introduced variable size WQEs.

Extend the bnxt_re_qp_req structure to pass the Send Queue slot count.
Reorganize the code to get the sq_slots before initializing the Send Queue
attributes.

Link: https://patch.msgid.link/r/1724042847-1481-5-git-send-email-selvin.xavier@broadcom.com
Signed-off-by: Hongguang Gao <hongguang.gao@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Selvin Xavier 2024-08-18 21:47:26 -07:00 committed by Jason Gunthorpe
parent b930d0bac9
commit d8ea645d69
3 changed files with 83 additions and 47 deletions

View File

@ -1017,20 +1017,15 @@ static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp,
}
static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
struct bnxt_re_qp *qp, struct ib_udata *udata)
struct bnxt_re_qp *qp, struct bnxt_re_ucontext *cntx,
struct bnxt_re_qp_req *ureq)
{
struct bnxt_qplib_qp *qplib_qp;
struct bnxt_re_ucontext *cntx;
struct bnxt_re_qp_req ureq;
int bytes = 0, psn_sz;
struct ib_umem *umem;
int psn_nume;
qplib_qp = &qp->qplib_qp;
cntx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext,
ib_uctx);
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
return -EFAULT;
bytes = (qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size);
/* Consider mapping PSN search memory only for RC QPs. */
@ -1038,17 +1033,20 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
psn_sz = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ?
sizeof(struct sq_psn_search_ext) :
sizeof(struct sq_psn_search);
psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
qplib_qp->sq.max_wqe :
((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) /
sizeof(struct bnxt_qplib_sge));
if (cntx && bnxt_re_is_var_size_supported(rdev, cntx)) {
psn_nume = ureq->sq_slots;
} else {
psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
qplib_qp->sq.max_wqe : ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) /
sizeof(struct bnxt_qplib_sge));
}
if (_is_host_msn_table(rdev->qplib_res.dattr->dev_cap_flags2))
psn_nume = roundup_pow_of_two(psn_nume);
bytes += (psn_nume * psn_sz);
}
bytes = PAGE_ALIGN(bytes);
umem = ib_umem_get(&rdev->ibdev, ureq.qpsva, bytes,
umem = ib_umem_get(&rdev->ibdev, ureq->qpsva, bytes,
IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem))
return PTR_ERR(umem);
@ -1057,12 +1055,12 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
qplib_qp->sq.sg_info.umem = umem;
qplib_qp->sq.sg_info.pgsize = PAGE_SIZE;
qplib_qp->sq.sg_info.pgshft = PAGE_SHIFT;
qplib_qp->qp_handle = ureq.qp_handle;
qplib_qp->qp_handle = ureq->qp_handle;
if (!qp->qplib_qp.srq) {
bytes = (qplib_qp->rq.max_wqe * qplib_qp->rq.wqe_size);
bytes = PAGE_ALIGN(bytes);
umem = ib_umem_get(&rdev->ibdev, ureq.qprva, bytes,
umem = ib_umem_get(&rdev->ibdev, ureq->qprva, bytes,
IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem))
goto rqfail;
@ -1261,14 +1259,15 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp)
static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
struct ib_qp_init_attr *init_attr,
struct bnxt_re_ucontext *uctx)
struct bnxt_re_ucontext *uctx,
struct bnxt_re_qp_req *ureq)
{
struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_qplib_qp *qplqp;
struct bnxt_re_dev *rdev;
struct bnxt_qplib_q *sq;
int diff = 0;
int entries;
int diff;
int rc;
rdev = qp->rdev;
@ -1277,22 +1276,28 @@ static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
dev_attr = &rdev->dev_attr;
sq->max_sge = init_attr->cap.max_send_sge;
if (sq->max_sge > dev_attr->max_qp_sges) {
sq->max_sge = dev_attr->max_qp_sges;
init_attr->cap.max_send_sge = sq->max_sge;
}
rc = bnxt_re_setup_swqe_size(qp, init_attr);
if (rc)
return rc;
entries = init_attr->cap.max_send_wr;
/* Allocate 128 + 1 more than what's provided */
diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ?
0 : BNXT_QPLIB_RESERVED_QP_WRS;
entries = bnxt_re_init_depth(entries + diff + 1, uctx);
sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1);
sq->max_sw_wqe = bnxt_qplib_get_depth(sq, qplqp->wqe_mode, true);
if (uctx && qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) {
sq->max_wqe = ureq->sq_slots;
sq->max_sw_wqe = ureq->sq_slots;
sq->wqe_size = sizeof(struct sq_sge);
} else {
if (sq->max_sge > dev_attr->max_qp_sges) {
sq->max_sge = dev_attr->max_qp_sges;
init_attr->cap.max_send_sge = sq->max_sge;
}
rc = bnxt_re_setup_swqe_size(qp, init_attr);
if (rc)
return rc;
/* Allocate 128 + 1 more than what's provided */
diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ?
0 : BNXT_QPLIB_RESERVED_QP_WRS;
entries = bnxt_re_init_depth(entries + diff + 1, uctx);
sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1);
sq->max_sw_wqe = bnxt_qplib_get_depth(sq, qplqp->wqe_mode, true);
}
sq->q_full_delta = diff + 1;
/*
* Reserving one slot for Phantom WQE. Application can
@ -1355,10 +1360,10 @@ out:
static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
struct bnxt_re_ucontext *uctx,
struct bnxt_re_qp_req *ureq)
{
struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_re_ucontext *uctx;
struct bnxt_qplib_qp *qplqp;
struct bnxt_re_dev *rdev;
struct bnxt_re_cq *cq;
@ -1368,7 +1373,6 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
qplqp = &qp->qplib_qp;
dev_attr = &rdev->dev_attr;
uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
/* Setup misc params */
ether_addr_copy(qplqp->smac, rdev->netdev->dev_addr);
qplqp->pd = &pd->qplib_pd;
@ -1381,8 +1385,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
goto out;
}
qplqp->type = (u8)qptype;
qplqp->wqe_mode = rdev->chip_ctx->modes.wqe_mode;
qplqp->wqe_mode = bnxt_re_is_var_size_supported(rdev, uctx);
if (init_attr->qp_type == IB_QPT_RC) {
qplqp->max_rd_atomic = dev_attr->max_qp_rd_atom;
qplqp->max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
@ -1417,14 +1420,14 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
bnxt_re_adjust_gsi_rq_attr(qp);
/* Setup SQ */
rc = bnxt_re_init_sq_attr(qp, init_attr, uctx);
rc = bnxt_re_init_sq_attr(qp, init_attr, uctx, ureq);
if (rc)
goto out;
if (init_attr->qp_type == IB_QPT_GSI)
bnxt_re_adjust_gsi_sq_attr(qp, init_attr, uctx);
if (udata) /* This will update DPI and qp_handle */
rc = bnxt_re_init_user_qp(rdev, pd, qp, udata);
if (uctx) /* This will update DPI and qp_handle */
rc = bnxt_re_init_user_qp(rdev, pd, qp, uctx, ureq);
out:
return rc;
}
@ -1525,14 +1528,27 @@ static bool bnxt_re_test_qp_limits(struct bnxt_re_dev *rdev,
int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr,
struct ib_udata *udata)
{
struct ib_pd *ib_pd = ib_qp->pd;
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_re_ucontext *uctx;
struct bnxt_re_qp_req ureq;
struct bnxt_re_dev *rdev;
struct bnxt_re_pd *pd;
struct bnxt_re_qp *qp;
struct ib_pd *ib_pd;
u32 active_qps;
int rc;
ib_pd = ib_qp->pd;
pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
rdev = pd->rdev;
dev_attr = &rdev->dev_attr;
qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
if (udata)
if (ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq))))
return -EFAULT;
rc = bnxt_re_test_qp_limits(rdev, qp_init_attr, dev_attr);
if (!rc) {
rc = -EINVAL;
@ -1540,7 +1556,7 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr,
}
qp->rdev = rdev;
rc = bnxt_re_init_qp_attr(qp, pd, qp_init_attr, udata);
rc = bnxt_re_init_qp_attr(qp, pd, qp_init_attr, uctx, &ureq);
if (rc)
goto fail;
@ -4215,7 +4231,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
goto cfail;
if (ureq.comp_mask & BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT) {
resp.comp_mask |= BNXT_RE_UCNTX_CMASK_POW2_DISABLED;
uctx->cmask |= BNXT_RE_UCNTX_CMASK_POW2_DISABLED;
uctx->cmask |= BNXT_RE_UCNTX_CAP_POW2_DISABLED;
}
}

View File

@ -171,12 +171,26 @@ static inline u16 bnxt_re_get_rwqe_size(int nsge)
return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge));
}
enum {
BNXT_RE_UCNTX_CAP_POW2_DISABLED = 0x1ULL,
BNXT_RE_UCNTX_CAP_VAR_WQE_ENABLED = 0x2ULL,
};
static inline u32 bnxt_re_init_depth(u32 ent, struct bnxt_re_ucontext *uctx)
{
return uctx ? (uctx->cmask & BNXT_RE_UCNTX_CMASK_POW2_DISABLED) ?
return uctx ? (uctx->cmask & BNXT_RE_UCNTX_CAP_POW2_DISABLED) ?
ent : roundup_pow_of_two(ent) : ent;
}
static inline bool bnxt_re_is_var_size_supported(struct bnxt_re_dev *rdev,
struct bnxt_re_ucontext *uctx)
{
if (uctx)
return uctx->cmask & BNXT_RE_UCNTX_CAP_VAR_WQE_ENABLED;
else
return rdev->chip_ctx->modes.wqe_mode;
}
int bnxt_re_query_device(struct ib_device *ibdev,
struct ib_device_attr *ib_attr,
struct ib_udata *udata);

View File

@ -118,10 +118,16 @@ struct bnxt_re_resize_cq_req {
__aligned_u64 cq_va;
};
enum bnxt_re_qp_mask {
BNXT_RE_QP_REQ_MASK_VAR_WQE_SQ_SLOTS = 0x1,
};
struct bnxt_re_qp_req {
__aligned_u64 qpsva;
__aligned_u64 qprva;
__aligned_u64 qp_handle;
__aligned_u64 comp_mask;
__u32 sq_slots;
};
struct bnxt_re_qp_resp {