RDMA/mlx5: Reorganize mlx5_ib_reg_user_mr()

This function handles an ODP and regular MR flow all mushed together, even
though the two flows are quite different. Split them into two dedicated
functions.

Link: https://lore.kernel.org/r/20201130075839.278575-5-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Jason Gunthorpe 2020-11-30 09:58:38 +02:00
parent 6e0954b11c
commit 38f8ff5b44
3 changed files with 140 additions and 129 deletions

View File

@ -1340,7 +1340,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags, struct ib_sge *sg_list, u32 num_sge);
int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable);
int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
@ -1362,7 +1362,7 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
{
return -EOPNOTSUPP;
}
static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{
return -EOPNOTSUPP;
}

View File

@ -56,6 +56,10 @@ enum {
static void
create_mkey_callback(int status, struct mlx5_async_work *context);
static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
struct ib_umem *umem, u64 iova,
int access_flags, unsigned int page_size,
bool populate);
static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
struct ib_pd *pd)
@ -875,32 +879,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev)
return MLX5_MAX_UMR_SHIFT;
}
static struct ib_umem *mr_umem_get(struct mlx5_ib_dev *dev, u64 start,
u64 length, int access_flags)
{
struct ib_umem *u;
if (access_flags & IB_ACCESS_ON_DEMAND) {
struct ib_umem_odp *odp;
odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
&mlx5_mn_ops);
if (IS_ERR(odp)) {
mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
PTR_ERR(odp));
return ERR_CAST(odp);
}
return &odp->umem;
}
u = ib_umem_get(&dev->ib_dev, start, length, access_flags);
if (IS_ERR(u)) {
mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
return u;
}
return u;
}
static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct mlx5_ib_umr_context *context =
@ -957,9 +935,18 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
return &cache->ent[order];
}
static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
struct ib_umem *umem, u64 iova,
int access_flags)
static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
u64 length, int access_flags)
{
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->ibmr.length = length;
mr->access_flags = access_flags;
}
static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct ib_umem *umem, u64 iova,
int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_cache_ent *ent;
@ -971,16 +958,26 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
ent = mr_cache_ent_from_order(
dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
if (!ent)
return ERR_PTR(-E2BIG);
/* Matches access in alloc_cache_mr() */
if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
return ERR_PTR(-EOPNOTSUPP);
/*
* Matches access in alloc_cache_mr(). If the MR can't come from the
* cache then synchronously create an uncached one.
*/
if (!ent || ent->limit == 0 ||
!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(NULL, pd, umem, iova, access_flags, page_size,
false);
mutex_unlock(&dev->slow_path_mutex);
return mr;
}
mr = get_cache_mr(ent);
if (!mr) {
mr = create_cache_mr(ent);
/*
* The above already tried to do the same stuff as reg_create(),
* no reason to try it again.
*/
if (IS_ERR(mr))
return mr;
}
@ -993,6 +990,8 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
mr->mmkey.size = umem->length;
mr->mmkey.pd = to_mpd(pd)->pdn;
mr->page_shift = order_base_2(page_size);
mr->umem = umem;
set_mr_fields(dev, mr, umem->length, access_flags);
return mr;
}
@ -1279,10 +1278,10 @@ err:
*/
static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
struct ib_umem *umem, u64 iova,
int access_flags, bool populate)
int access_flags, unsigned int page_size,
bool populate)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
unsigned int page_size;
struct mlx5_ib_mr *mr;
__be64 *pas;
void *mkc;
@ -1291,11 +1290,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
int err;
bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
page_size =
mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
if (WARN_ON(!page_size))
return ERR_PTR(-EINVAL);
if (!page_size) {
page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size,
0, iova);
if (!page_size)
return ERR_PTR(-EINVAL);
}
mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@ -1352,6 +1352,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
mr->mmkey.type = MLX5_MKEY_MR;
mr->desc_size = sizeof(struct mlx5_mtt);
mr->dev = dev;
mr->umem = umem;
set_mr_fields(dev, mr, umem->length, access_flags);
kvfree(in);
mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
@ -1368,15 +1370,6 @@ err_1:
return ERR_PTR(err);
}
static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
u64 length, int access_flags)
{
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->ibmr.length = length;
mr->access_flags = access_flags;
}
static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
u64 length, int acc, int mode)
{
@ -1471,70 +1464,32 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
attr->access_flags, mode);
}
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata)
static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
bool xlt_with_umr;
struct ib_umem *umem;
int err;
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
return ERR_PTR(-EOPNOTSUPP);
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length);
/* ODP requires xlt update via umr to work. */
if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND))
return ERR_PTR(-EINVAL);
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
length == U64_MAX) {
if (virt_addr != start)
return ERR_PTR(-EINVAL);
if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
return ERR_PTR(-EINVAL);
mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
if (IS_ERR(mr))
return ERR_CAST(mr);
return &mr->ibmr;
}
umem = mr_umem_get(dev, start, length, access_flags);
if (IS_ERR(umem))
return ERR_CAST(umem);
xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
if (xlt_with_umr) {
mr = alloc_mr_from_cache(pd, umem, virt_addr, access_flags);
if (IS_ERR(mr))
mr = NULL;
}
if (!mr) {
mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
} else {
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(NULL, pd, umem, virt_addr, access_flags,
!xlt_with_umr);
mr = reg_create(NULL, pd, umem, iova, access_flags, 0, true);
mutex_unlock(&dev->slow_path_mutex);
}
if (IS_ERR(mr)) {
err = PTR_ERR(mr);
goto error;
ib_umem_release(umem);
return ERR_CAST(mr);
}
mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
mr->umem = umem;
atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
set_mr_fields(dev, mr, length, access_flags);
atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) {
if (xlt_with_umr) {
/*
* If the MR was created with reg_create then it will be
* configured properly but left disabled. It is safe to go ahead
@ -1546,32 +1501,88 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(err);
}
}
return &mr->ibmr;
}
if (is_odp_mr(mr)) {
to_ib_umem_odp(mr->umem)->private = mr;
init_waitqueue_head(&mr->q_deferred_work);
atomic_set(&mr->num_deferred_work, 0);
err = xa_err(xa_store(&dev->odp_mkeys,
mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
GFP_KERNEL));
if (err) {
dereg_mr(dev, mr);
return ERR_PTR(err);
}
static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
u64 iova, int access_flags,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct ib_umem_odp *odp;
struct mlx5_ib_mr *mr;
int err;
err = mlx5_ib_init_odp_mr(mr, xlt_with_umr);
if (err) {
dereg_mr(dev, mr);
return ERR_PTR(err);
}
if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
return ERR_PTR(-EOPNOTSUPP);
if (!start && length == U64_MAX) {
if (iova != 0)
return ERR_PTR(-EINVAL);
if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
return ERR_PTR(-EINVAL);
mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
if (IS_ERR(mr))
return ERR_CAST(mr);
return &mr->ibmr;
}
/* ODP requires xlt update via umr to work. */
if (!mlx5_ib_can_load_pas_with_umr(dev, length))
return ERR_PTR(-EINVAL);
odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
&mlx5_mn_ops);
if (IS_ERR(odp))
return ERR_CAST(odp);
mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
if (IS_ERR(mr)) {
ib_umem_release(&odp->umem);
return ERR_CAST(mr);
}
odp->private = mr;
init_waitqueue_head(&mr->q_deferred_work);
atomic_set(&mr->num_deferred_work, 0);
err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
&mr->mmkey, GFP_KERNEL));
if (err)
goto err_dereg_mr;
err = mlx5_ib_init_odp_mr(mr);
if (err)
goto err_dereg_mr;
return &mr->ibmr;
error:
ib_umem_release(umem);
err_dereg_mr:
dereg_mr(dev, mr);
return ERR_PTR(err);
}
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 iova, int access_flags,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct ib_umem *umem;
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
return ERR_PTR(-EOPNOTSUPP);
mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, iova, length, access_flags);
if (access_flags & IB_ACCESS_ON_DEMAND)
return create_user_odp_mr(pd, start, length, iova, access_flags,
udata);
umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
if (IS_ERR(umem))
return ERR_CAST(umem);
return create_real_mr(pd, umem, iova, access_flags);
}
/**
* mlx5_mr_cache_invalidate - Fence all DMA on the MR
* @mr: The MR to fence
@ -1661,7 +1672,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
atomic_sub(ib_umem_num_pages(mr->umem),
&dev->mdev->priv.reg_pages);
ib_umem_release(mr->umem);
mr->umem = mr_umem_get(dev, addr, len, access_flags);
mr->umem = ib_umem_get(&dev->ib_dev, addr, len, access_flags);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
mr->umem = NULL;
@ -1685,7 +1696,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
if (err)
goto err;
mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, true);
mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, 0, true);
if (IS_ERR(mr)) {
err = PTR_ERR(mr);
mr = to_mmr(ib_mr);

View File

@ -536,6 +536,10 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct mlx5_ib_mr *imr;
int err;
if (!mlx5_ib_can_load_pas_with_umr(dev,
MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
return ERR_PTR(-EOPNOTSUPP);
umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
if (IS_ERR(umem_odp))
return ERR_CAST(umem_odp);
@ -831,17 +835,13 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
flags);
}
int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{
u32 flags = MLX5_PF_FLAGS_SNAPSHOT;
int ret;
if (enable)
flags |= MLX5_PF_FLAGS_ENABLE;
ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem),
mr->umem->address, mr->umem->length, NULL,
flags);
ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address,
mr->umem->length, NULL,
MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE);
return ret >= 0 ? 0 : ret;
}