Mellanox specific updates for 4.11 merge window

Because the Mellanox code required being based on a net-next tree,
 I keept it separate from the remainder of the RDMA stack submission
 that is based on 4.10-rc3.
 
 This branch contains:
 
 - Various mlx4 and mlx5 fixes and minor changes
 - Support for adding a tag match rule to flow specs
 - Support for cvlan offload operation for raw ethernet QPs
 - A change to the core IB code to recognize raw eth capabilities and
   enumerate them (touches non-Mellanox code)
 - Implicit On-Demand Paging memory registration support
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJYrx+WAAoJELgmozMOVy/du70P/1kpW2xY9Le04c3K7na2XOYl
 AUVIDrW/8Go63tpOaM7jBT3k4GlwVFr3IOmBpS24KbW/THxjhyUeP5L5+z2x+go+
 jkQOgtPWWEHr5zP3MzsNyB8fDx1YQOnJwEXxybQRW/cbw4CLjnhP+ezd6FdV/3Yy
 pPEqDVlAErzvNweG+n2r1pjcUbR8uneC3inyMLnyzUBz4CHKmC8fgD3/qJIM+DNb
 gtFT5xHFIXKCigWdQ/EwsTDcHub43V8OXlI5sO7loG6vToOUATMkjI4oOUNhDmYS
 X7XLN3yRK9QHEfb5kutXIZEWzTGh7LiFtUYGaNNYqqzDfSiMRc9NC5kTOfplEXDV
 Uo+AGb6Fh1zYIOzNk7o+tazIv3LaLv6+Fcm+9bbe0VUIqasaylsePqaTwMuIzx/I
 xP5nitmd5lbYo8WdlasVdG6mH1DlJEUbU30v4DpmTpxCP6jGpog7lexyGyF3TgzS
 NhnG0IiIClWh3WQ2/GdsFK/obIdFkpLeASli1hwD81vzPfly9zc2YpgqydZI3WCr
 q6hTXYnANcP6+eciCpQPO7giRdXdiKey08Uoq/2jxb7Qbm4daG6UwopjvH9/lm1F
 m6UDaDvzNYm+Rx+bL/+KSx9JO9+fJB1L51yCmvLGpWi6yJI4ZTfanHNMBsCua46N
 Kev/DSpIAzX1WOBkte+a
 =rspQ
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull Mellanox rdma updates from Doug Ledford:
 "Mellanox specific updates for 4.11 merge window

  Because the Mellanox code required being based on a net-next tree, I
  keept it separate from the remainder of the RDMA stack submission that
  is based on 4.10-rc3.

  This branch contains:

   - Various mlx4 and mlx5 fixes and minor changes

   - Support for adding a tag match rule to flow specs

   - Support for cvlan offload operation for raw ethernet QPs

   - A change to the core IB code to recognize raw eth capabilities and
     enumerate them (touches non-Mellanox code)

   - Implicit On-Demand Paging memory registration support"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (40 commits)
  IB/mlx5: Fix configuration of port capabilities
  IB/mlx4: Take source GID by index from HW GID table
  IB/mlx5: Fix blue flame buffer size calculation
  IB/mlx4: Remove unused variable from function declaration
  IB: Query ports via the core instead of direct into the driver
  IB: Add protocol for USNIC
  IB/mlx4: Support raw packet protocol
  IB/mlx5: Support raw packet protocol
  IB/core: Add raw packet protocol
  IB/mlx5: Add implicit MR support
  IB/mlx5: Expose MR cache for mlx5_ib
  IB/mlx5: Add null_mkey access
  IB/umem: Indicate that process is being terminated
  IB/umem: Update on demand page (ODP) support
  IB/core: Add implicit MR flag
  IB/mlx5: Support creation of a WQ with scatter FCS offload
  IB/mlx5: Enable QP creation with cvlan offload
  IB/mlx5: Enable WQ creation and modification with cvlan offload
  IB/mlx5: Expose vlan offloads capabilities
  IB/uverbs: Enable QP creation with cvlan offload
  ...
This commit is contained in:
Linus Torvalds 2017-02-23 11:27:49 -08:00
commit af17fe7a63
42 changed files with 1418 additions and 271 deletions

View File

@ -99,9 +99,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (dmasync)
dma_attrs |= DMA_ATTR_WRITE_BARRIER;
if (!size)
return ERR_PTR(-EINVAL);
/*
* If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error.

View File

@ -239,6 +239,71 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
.invalidate_range_end = ib_umem_notifier_invalidate_range_end,
};
struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
unsigned long addr,
size_t size)
{
struct ib_umem *umem;
struct ib_umem_odp *odp_data;
int pages = size >> PAGE_SHIFT;
int ret;
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
umem->context = context;
umem->length = size;
umem->address = addr;
umem->page_size = PAGE_SIZE;
umem->writable = 1;
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
if (!odp_data) {
ret = -ENOMEM;
goto out_umem;
}
odp_data->umem = umem;
mutex_init(&odp_data->umem_mutex);
init_completion(&odp_data->notifier_completion);
odp_data->page_list = vzalloc(pages * sizeof(*odp_data->page_list));
if (!odp_data->page_list) {
ret = -ENOMEM;
goto out_odp_data;
}
odp_data->dma_list = vzalloc(pages * sizeof(*odp_data->dma_list));
if (!odp_data->dma_list) {
ret = -ENOMEM;
goto out_page_list;
}
down_write(&context->umem_rwsem);
context->odp_mrs_count++;
rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
if (likely(!atomic_read(&context->notifier_count)))
odp_data->mn_counters_active = true;
else
list_add(&odp_data->no_private_counters,
&context->no_private_counters);
up_write(&context->umem_rwsem);
umem->odp_data = odp_data;
return umem;
out_page_list:
vfree(odp_data->page_list);
out_odp_data:
kfree(odp_data);
out_umem:
kfree(umem);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(ib_alloc_odp_umem);
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
{
int ret_val;
@ -270,18 +335,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
init_completion(&umem->odp_data->notifier_completion);
umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
if (ib_umem_num_pages(umem)) {
umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->page_list));
if (!umem->odp_data->page_list) {
ret_val = -ENOMEM;
goto out_odp_data;
}
if (!umem->odp_data->page_list) {
ret_val = -ENOMEM;
goto out_odp_data;
}
umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->dma_list));
if (!umem->odp_data->dma_list) {
ret_val = -ENOMEM;
goto out_page_list;
if (!umem->odp_data->dma_list) {
ret_val = -ENOMEM;
goto out_page_list;
}
}
/*
@ -466,6 +533,7 @@ static int ib_umem_odp_map_dma_single_page(
}
umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
umem->odp_data->page_list[page_index] = page;
umem->npages++;
stored_page = 1;
} else if (umem->odp_data->page_list[page_index] == page) {
umem->odp_data->dma_list[page_index] |= access_mask;
@ -505,7 +573,8 @@ out:
* for failure.
* An -EAGAIN error code is returned when a concurrent mmu notifier prevents
* the function from completing its task.
*
* An -ENOENT error code indicates that userspace process is being terminated
* and mm was already destroyed.
* @umem: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
@ -553,7 +622,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
owning_mm = get_task_mm(owning_process);
if (owning_mm == NULL) {
ret = -EINVAL;
ret = -ENOENT;
goto out_put_task;
}
@ -665,6 +734,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
put_page(page);
umem->odp_data->page_list[idx] = NULL;
umem->odp_data->dma_list[idx] = 0;
umem->npages--;
}
}
mutex_unlock(&umem->odp_data->umem_mutex);

View File

@ -78,17 +78,32 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root,
void *cookie)
{
int ret_val = 0;
struct umem_odp_node *node;
struct umem_odp_node *node, *next;
struct ib_umem_odp *umem;
if (unlikely(start == last))
return ret_val;
for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
node = rbt_ib_umem_iter_next(node, start, last - 1)) {
for (node = rbt_ib_umem_iter_first(root, start, last - 1);
node; node = next) {
next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree);
ret_val = cb(umem->umem, start, last, cookie) || ret_val;
}
return ret_val;
}
EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
u64 addr, u64 length)
{
struct umem_odp_node *node;
node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
if (node)
return container_of(node, struct ib_umem_odp, interval_tree);
return NULL;
}
EXPORT_SYMBOL(rbt_ib_umem_lookup);

View File

@ -228,6 +228,7 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_ipv4 ipv4;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
struct ib_uverbs_flow_spec_action_tag flow_tag;
};
};

View File

@ -1891,7 +1891,8 @@ static int create_qp(struct ib_uverbs_file *file,
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV |
IB_QP_CREATE_SCATTER_FCS)) {
IB_QP_CREATE_SCATTER_FCS |
IB_QP_CREATE_CVLAN_STRIPPING)) {
ret = -EINVAL;
goto err_put;
}
@ -3143,6 +3144,25 @@ out_put:
return ret ? ret : in_len;
}
static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec)
{
ib_spec->type = kern_spec->type;
switch (ib_spec->type) {
case IB_FLOW_SPEC_ACTION_TAG:
if (kern_spec->flow_tag.size !=
sizeof(struct ib_uverbs_flow_spec_action_tag))
return -EINVAL;
ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
break;
default:
return -EINVAL;
}
return 0;
}
static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
{
/* Returns user space filter size, includes padding */
@ -3167,8 +3187,8 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
return kern_filter_size;
}
static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec)
static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec)
{
ssize_t actual_filter_sz;
ssize_t kern_filter_sz;
@ -3263,6 +3283,18 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
return 0;
}
static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec)
{
if (kern_spec->reserved)
return -EINVAL;
if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
else
return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
}
int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
@ -3325,6 +3357,9 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
wq_init_attr.wq_context = file;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
sizeof(cmd.create_flags)))
wq_init_attr.create_flags = cmd.create_flags;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
@ -3480,7 +3515,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (!cmd.attr_mask)
return -EINVAL;
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
wq = idr_read_wq(cmd.wq_handle, file->ucontext);
@ -3489,6 +3524,10 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
wq_attr.curr_wq_state = cmd.curr_wq_state;
wq_attr.wq_state = cmd.wq_state;
if (cmd.attr_mask & IB_WQ_FLAGS) {
wq_attr.flags = cmd.flags;
wq_attr.flags_mask = cmd.flags_mask;
}
ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
put_wq_read(wq);
return ret;
@ -4323,6 +4362,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.max_wq_type_rq = attr.max_wq_type_rq;
resp.response_length += sizeof(resp.max_wq_type_rq);
if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
goto end;
resp.raw_packet_caps = attr.raw_packet_caps;
resp.response_length += sizeof(resp.raw_packet_caps);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;

View File

@ -1133,7 +1133,7 @@ static int iwch_query_port(struct ib_device *ibdev,
dev = to_iwch_dev(ibdev);
netdev = dev->rdev.port_info.lldevs[port-1];
memset(props, 0, sizeof(struct ib_port_attr));
/* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@ -1329,13 +1329,14 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
err = iwch_query_port(ibdev, port_num, &attr);
immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}

View File

@ -370,8 +370,7 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
dev = to_c4iw_dev(ibdev);
netdev = dev->rdev.lldi.ports[port-1];
memset(props, 0, sizeof(struct ib_port_attr));
/* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@ -508,13 +507,14 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
err = c4iw_query_port(ibdev, port_num, &attr);
immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}

View File

@ -1302,6 +1302,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
u16 lid = ppd->lid;
/* props being zeroed by the caller, avoid zeroing it here */
props->lid = lid ? lid : 0;
props->lmc = ppd->lmc;
/* OPA logical states match IB logical states */

View File

@ -250,7 +250,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
assert(port_num > 0);
port = port_num - 1;
memset(props, 0, sizeof(*props));
/* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = hr_dev->caps.max_mtu;
props->gid_tbl_len = hr_dev->caps.gid_table_len[port];
@ -401,14 +401,15 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
struct ib_port_attr attr;
int ret;
ret = hns_roce_query_port(ib_dev, port_num, &attr);
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
ret = ib_query_port(ib_dev, port_num, &attr);
if (ret)
return ret;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;

View File

@ -97,8 +97,7 @@ static int i40iw_query_port(struct ib_device *ibdev,
struct i40iw_device *iwdev = to_iwdev(ibdev);
struct net_device *netdev = iwdev->netdev;
memset(props, 0, sizeof(*props));
/* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@ -2497,14 +2496,15 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
err = i40iw_query_port(ibdev, port_num, &attr);
immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}

View File

@ -499,6 +499,7 @@ static int set_guid_rec(struct ib_device *ibdev,
struct list_head *head =
&dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
memset(&attr, 0, sizeof(attr));
err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
if (err) {
pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",

View File

@ -678,7 +678,7 @@ static u8 state_to_phys_state(enum ib_port_state state)
}
static int eth_link_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props, int netw_view)
struct ib_port_attr *props)
{
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@ -741,11 +741,11 @@ int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
{
int err;
memset(props, 0, sizeof *props);
/* props being zeroed by the caller, avoid zeroing it here */
err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
ib_link_query_port(ibdev, port, props, netw_view) :
eth_link_query_port(ibdev, port, props, netw_view);
eth_link_query_port(ibdev, port, props);
return err;
}
@ -1014,7 +1014,7 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
mutex_lock(&mdev->cap_mask_mutex);
err = mlx4_ib_query_port(ibdev, port, &attr);
err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@ -2537,24 +2537,27 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
err = mlx4_ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
} else {
if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
}
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
return 0;
}

View File

@ -2420,11 +2420,31 @@ static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num)
return vl;
}
static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
int index, union ib_gid *gid,
enum ib_gid_type *gid_type)
{
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
struct mlx4_port_gid_table *port_gid_table;
unsigned long flags;
port_gid_table = &iboe->gids[port_num - 1];
spin_lock_irqsave(&iboe->lock, flags);
memcpy(gid, &port_gid_table->gids[index].gid, sizeof(*gid));
*gid_type = port_gid_table->gids[index].gid_type;
spin_unlock_irqrestore(&iboe->lock, flags);
if (!memcmp(gid, &zgid, sizeof(*gid)))
return -ENOENT;
return 0;
}
#define MLX4_ROCEV2_QP1_SPORT 0xC000
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
struct ib_device *ib_dev = sqp->qp.ibqp.device;
struct mlx4_ib_dev *ibdev = to_mdev(ib_dev);
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
@ -2450,8 +2470,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
struct ib_gid_attr gid_attr;
enum ib_gid_type gid_type;
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
@ -2462,18 +2481,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
if (err)
return err;
} else {
err = ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index, &sgid,
&gid_attr);
err = fill_gid_by_hw_index(ibdev, sqp->qp.port,
ah->av.ib.gid_index,
&sgid, &gid_type);
if (!err) {
if (gid_attr.ndev)
dev_put(gid_attr.ndev);
if (!memcmp(&sgid, &zgid, sizeof(sgid)))
err = -ENOENT;
}
if (!err) {
is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
if (is_udp) {
if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
ip_version = 4;
@ -2951,21 +2963,17 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
if (sqp->roce_v2_gsi) {
struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
struct ib_gid_attr gid_attr;
enum ib_gid_type gid_type;
union ib_gid gid;
if (!ib_get_cached_gid(ibqp->device,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index, &gid,
&gid_attr)) {
if (gid_attr.ndev)
dev_put(gid_attr.ndev);
qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
to_mqp(sqp->roce_v2_gsi) : qp;
} else {
if (!fill_gid_by_hw_index(mdev, sqp->qp.port,
ah->av.ib.gid_index,
&gid, &gid_type))
qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
to_mqp(sqp->roce_v2_gsi) : qp;
else
pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
ah->av.ib.gid_index);
}
}
}

View File

@ -226,6 +226,7 @@ static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
int ret = 0 ;
struct ib_port_attr attr;
memset(&attr, 0, sizeof(attr));
/* get the physical gid and pkey table sizes.*/
ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
if (ret)

View File

@ -1,4 +1,4 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2017, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "cmd.h"
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
{
u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
int err;
MLX5_SET(query_special_contexts_in, in, opcode,
MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (!err)
*null_mkey = MLX5_GET(query_special_contexts_out, out,
null_mkey);
return err;
}

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2017, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX5_IB_CMD_H
#define MLX5_IB_CMD_H
#include <linux/kernel.h>
#include <linux/mlx5/driver.h>
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
#endif /* MLX5_IB_CMD_H */

View File

@ -42,12 +42,24 @@ enum {
MLX5_IB_VENDOR_CLASS2 = 0xa
};
static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
struct ib_mad *in_mad)
{
if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED &&
in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
return true;
return dev->mdev->port_caps[port_num - 1].has_smi;
}
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad)
{
u8 op_modifier = 0;
if (!can_do_mad_ifc(dev, port, (struct ib_mad *)in_mad))
return -EPERM;
/* Key check traps can't be generated unless we have in_wc to
* tell us where to send the trap.
*/
@ -515,7 +527,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
if (!in_mad || !out_mad)
goto out;
memset(props, 0, sizeof(*props));
/* props being zeroed by the caller, avoid zeroing it here */
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;

View File

@ -65,10 +65,6 @@ MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRIVER_VERSION);
static int deprecated_prof_sel = 2;
module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
@ -175,7 +171,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
enum ib_mtu ndev_ib_mtu;
u16 qkey_viol_cntr;
memset(props, 0, sizeof(*props));
/* props being zeroed by the caller, avoid zeroing it here */
props->port_cap_flags |= IB_PORT_CM_SUP;
props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
@ -326,6 +322,27 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
}
int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
int index, enum ib_gid_type *gid_type)
{
struct ib_gid_attr attr;
union ib_gid gid;
int ret;
ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
if (ret)
return ret;
if (!attr.ndev)
return -ENODEV;
dev_put(attr.ndev);
*gid_type = attr.gid_type;
return 0;
}
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
@ -565,8 +582,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
if (MLX5_CAP_ETH(mdev, csum_cap))
if (MLX5_CAP_ETH(mdev, csum_cap)) {
/* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM;
}
if (MLX5_CAP_ETH(dev->mdev, vlan_cap))
props->raw_packet_caps |=
IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
@ -605,8 +629,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
MLX5_CAP_ETH(dev->mdev, scatter_fcs))
MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
/* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
}
if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
@ -831,7 +858,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
goto out;
}
memset(props, 0, sizeof(*props));
/* props being zeroed by the caller, avoid zeroing it here */
err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
if (err)
@ -969,6 +996,31 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
return err;
}
static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
u32 value)
{
struct mlx5_hca_vport_context ctx = {};
int err;
err = mlx5_query_hca_vport_context(dev->mdev, 0,
port_num, 0, &ctx);
if (err)
return err;
if (~ctx.cap_mask1_perm & mask) {
mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
mask, ctx.cap_mask1_perm);
return -EINVAL;
}
ctx.cap_mask1 = value;
ctx.cap_mask1_perm = mask;
err = mlx5_core_modify_hca_vport_context(dev->mdev, 0,
port_num, 0, &ctx);
return err;
}
static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
@ -976,10 +1028,20 @@ static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_attr attr;
u32 tmp;
int err;
u32 change_mask;
u32 value;
bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) ==
IB_LINK_LAYER_INFINIBAND);
if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) {
change_mask = props->clr_port_cap_mask | props->set_port_cap_mask;
value = ~props->clr_port_cap_mask | props->set_port_cap_mask;
return set_port_caps_atomic(dev, port, change_mask, value);
}
mutex_lock(&dev->cap_mask_mutex);
err = mlx5_ib_query_port(ibdev, port, &attr);
err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@ -1661,6 +1723,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
#define LAST_IPV6_FIELD traffic_class
#define LAST_TCP_UDP_FIELD src_port
#define LAST_TUNNEL_FIELD tunnel_id
#define LAST_FLOW_TAG_FIELD tag_id
/* Field is the last supported field */
#define FIELDS_NOT_SUPPORTED(filter, field)\
@ -1671,7 +1734,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
sizeof(filter.field))
static int parse_flow_attr(u32 *match_c, u32 *match_v,
const union ib_flow_spec *ib_spec)
const union ib_flow_spec *ib_spec, u32 *tag_id)
{
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
@ -1695,7 +1758,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
case IB_FLOW_SPEC_ETH:
if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
return -ENOTSUPP;
return -EOPNOTSUPP;
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dmac_47_16),
@ -1743,7 +1806,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
break;
case IB_FLOW_SPEC_IPV4:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
return -ENOTSUPP;
return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
@ -1775,7 +1838,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
break;
case IB_FLOW_SPEC_IPV6:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
return -ENOTSUPP;
return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
@ -1816,7 +1879,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
case IB_FLOW_SPEC_TCP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
return -ENOTSUPP;
return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
@ -1836,7 +1899,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
case IB_FLOW_SPEC_UDP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
return -ENOTSUPP;
return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
@ -1856,13 +1919,22 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
case IB_FLOW_SPEC_VXLAN_TUNNEL:
if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
LAST_TUNNEL_FIELD))
return -ENOTSUPP;
return -EOPNOTSUPP;
MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
ntohl(ib_spec->tunnel.mask.tunnel_id));
MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
ntohl(ib_spec->tunnel.val.tunnel_id));
break;
case IB_FLOW_SPEC_ACTION_TAG:
if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
LAST_FLOW_TAG_FIELD))
return -EOPNOTSUPP;
if (ib_spec->flow_tag.tag_id >= BIT(24))
return -EINVAL;
*tag_id = ib_spec->flow_tag.tag_id;
break;
default:
return -EINVAL;
}
@ -2046,6 +2118,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_flow_spec *spec;
const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
unsigned int spec_index;
u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
int err = 0;
if (!is_valid_attr(flow_attr))
@ -2062,7 +2135,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(spec->match_criteria,
spec->match_value, ib_flow);
spec->match_value, ib_flow, &flow_tag);
if (err < 0)
goto free;
@ -2072,7 +2145,16 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
(flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
flow_tag, flow_attr->type);
err = -EINVAL;
goto free;
}
flow_act.flow_tag = flow_tag;
handler->rule = mlx5_add_flow_rules(ft, spec,
&flow_act,
dst, 1);
@ -2542,6 +2624,35 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
ibdev->ib_active = false;
}
static int set_has_smi_cap(struct mlx5_ib_dev *dev)
{
struct mlx5_hca_vport_context vport_ctx;
int err;
int port;
for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
dev->mdev->port_caps[port - 1].has_smi = false;
if (MLX5_CAP_GEN(dev->mdev, port_type) ==
MLX5_CAP_PORT_TYPE_IB) {
if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
err = mlx5_query_hca_vport_context(dev->mdev, 0,
port, 0,
&vport_ctx);
if (err) {
mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
port, err);
return err;
}
dev->mdev->port_caps[port - 1].has_smi =
vport_ctx.has_smi;
} else {
dev->mdev->port_caps[port - 1].has_smi = true;
}
}
}
return 0;
}
static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
int port;
@ -2566,6 +2677,10 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
if (!dprops)
goto out;
err = set_has_smi_cap(dev);
if (err)
goto out;
err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
if (err) {
mlx5_ib_warn(dev, "query_device failed %d\n", err);
@ -2573,6 +2688,7 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
}
for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
memset(pprops, 0, sizeof(*pprops));
err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
if (err) {
mlx5_ib_warn(dev, "query_port %d failed %d\n",
@ -2867,11 +2983,13 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
if (ll == IB_LINK_LAYER_INFINIBAND)
return RDMA_CORE_PORT_IBA_IB;
ret = RDMA_CORE_PORT_RAW_PACKET;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
return 0;
return ret;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
return 0;
return ret;
if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
ret |= RDMA_CORE_PORT_IBA_ROCE;
@ -2890,7 +3008,9 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
int err;
err = mlx5_ib_query_port(ibdev, port_num, &attr);
immutable->core_cap_flags = get_core_cap_flags(ibdev);
err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
@ -3011,13 +3131,102 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
struct mlx5_ib_q_counter {
const char *name;
size_t offset;
};
#define INIT_Q_COUNTER(_name) \
{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
static const struct mlx5_ib_q_counter basic_q_cnts[] = {
INIT_Q_COUNTER(rx_write_requests),
INIT_Q_COUNTER(rx_read_requests),
INIT_Q_COUNTER(rx_atomic_requests),
INIT_Q_COUNTER(out_of_buffer),
};
static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = {
INIT_Q_COUNTER(out_of_sequence),
};
static const struct mlx5_ib_q_counter retrans_q_cnts[] = {
INIT_Q_COUNTER(duplicate_request),
INIT_Q_COUNTER(rnr_nak_retry_err),
INIT_Q_COUNTER(packet_seq_err),
INIT_Q_COUNTER(implied_nak_seq_err),
INIT_Q_COUNTER(local_ack_timeout_err),
};
static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
for (i = 0; i < dev->num_ports; i++)
for (i = 0; i < dev->num_ports; i++) {
mlx5_core_dealloc_q_counter(dev->mdev,
dev->port[i].q_cnt_id);
dev->port[i].q_cnts.set_id);
kfree(dev->port[i].q_cnts.names);
kfree(dev->port[i].q_cnts.offsets);
}
}
static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev,
const char ***names,
size_t **offsets,
u32 *num)
{
u32 num_counters;
num_counters = ARRAY_SIZE(basic_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
num_counters += ARRAY_SIZE(retrans_q_cnts);
*names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL);
if (!*names)
return -ENOMEM;
*offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL);
if (!*offsets)
goto err_names;
*num = num_counters;
return 0;
err_names:
kfree(*names);
return -ENOMEM;
}
static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev,
const char **names,
size_t *offsets)
{
int i;
int j = 0;
for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
names[j] = basic_q_cnts[i].name;
offsets[j] = basic_q_cnts[i].offset;
}
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
names[j] = out_of_seq_q_cnts[i].name;
offsets[j] = out_of_seq_q_cnts[i].offset;
}
}
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
names[j] = retrans_q_cnts[i].name;
offsets[j] = retrans_q_cnts[i].offset;
}
}
}
static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
@ -3026,14 +3235,26 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
int ret;
for (i = 0; i < dev->num_ports; i++) {
struct mlx5_ib_port *port = &dev->port[i];
ret = mlx5_core_alloc_q_counter(dev->mdev,
&dev->port[i].q_cnt_id);
&port->q_cnts.set_id);
if (ret) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
i + 1, ret);
goto dealloc_counters;
}
ret = __mlx5_ib_alloc_q_counters(dev,
&port->q_cnts.names,
&port->q_cnts.offsets,
&port->q_cnts.num_counters);
if (ret)
goto dealloc_counters;
mlx5_ib_fill_q_counters(dev, port->q_cnts.names,
port->q_cnts.offsets);
}
return 0;
@ -3041,62 +3262,39 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
dealloc_counters:
while (--i >= 0)
mlx5_core_dealloc_q_counter(dev->mdev,
dev->port[i].q_cnt_id);
dev->port[i].q_cnts.set_id);
return ret;
}
static const char * const names[] = {
"rx_write_requests",
"rx_read_requests",
"rx_atomic_requests",
"out_of_buffer",
"out_of_sequence",
"duplicate_request",
"rnr_nak_retry_err",
"packet_seq_err",
"implied_nak_seq_err",
"local_ack_timeout_err",
};
static const size_t stats_offsets[] = {
MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
};
static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
u8 port_num)
{
BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_port *port = &dev->port[port_num - 1];
/* We support only per port stats */
if (port_num == 0)
return NULL;
return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
return rdma_alloc_hw_stats_struct(port->q_cnts.names,
port->q_cnts.num_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u8 port, int index)
u8 port_num, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_port *port = &dev->port[port_num - 1];
int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
void *out;
__be32 val;
int ret;
int i;
if (!port || !stats)
if (!stats)
return -ENOSYS;
out = mlx5_vzalloc(outlen);
@ -3104,18 +3302,19 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
return -ENOMEM;
ret = mlx5_core_query_q_counter(dev->mdev,
dev->port[port - 1].q_cnt_id, 0,
port->q_cnts.set_id, 0,
out, outlen);
if (ret)
goto free;
for (i = 0; i < ARRAY_SIZE(names); i++) {
val = *(__be32 *)(out + stats_offsets[i]);
for (i = 0; i < port->q_cnts.num_counters; i++) {
val = *(__be32 *)(out + port->q_cnts.offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
free:
kvfree(out);
return ARRAY_SIZE(names);
return port->q_cnts.num_counters;
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
@ -3267,8 +3466,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
}
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
}
@ -3322,9 +3520,11 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (err)
goto err_rsrc;
err = mlx5_ib_alloc_q_counters(dev);
if (err)
goto err_odp;
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
err = mlx5_ib_alloc_q_counters(dev);
if (err)
goto err_odp;
}
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
if (!dev->mdev->priv.uar)
@ -3373,7 +3573,8 @@ err_uar_page:
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
err_q_cnt:
mlx5_ib_dealloc_q_counters(dev);
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_q_counters(dev);
err_odp:
mlx5_ib_odp_remove_one(dev);
@ -3406,7 +3607,8 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
mlx5_ib_dealloc_q_counters(dev);
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_q_counters(dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
@ -3430,8 +3632,7 @@ static int __init mlx5_ib_init(void)
{
int err;
if (deprecated_prof_sel != 2)
pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
mlx5_ib_odp_init();
err = mlx5_register_interface(&mlx5_ib_interface);

View File

@ -202,6 +202,7 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_UPD_XLT_ADDR BIT(3)
#define MLX5_IB_UPD_XLT_PD BIT(4)
#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
#define MLX5_IB_UPD_XLT_INDIRECT BIT(6)
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
*
@ -220,6 +221,10 @@ struct wr_list {
u16 next;
};
enum mlx5_ib_rq_flags {
MLX5_IB_RQ_CVLAN_STRIPPING = 1 << 0,
};
struct mlx5_ib_wq {
u64 *wrid;
u32 *wr_data;
@ -308,6 +313,7 @@ struct mlx5_ib_rq {
struct mlx5_db *doorbell;
u32 tirn;
u8 state;
u32 flags;
};
struct mlx5_ib_sq {
@ -392,6 +398,7 @@ enum mlx5_ib_qp_flags {
MLX5_IB_QP_SQPN_QP1 = 1 << 6,
MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
MLX5_IB_QP_RSS = 1 << 8,
MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9,
};
struct mlx5_umr_wr {
@ -497,6 +504,10 @@ struct mlx5_ib_mr {
int live;
void *descs_alloc;
int access_flags; /* Needed for rereg MR */
struct mlx5_ib_mr *parent;
atomic_t num_leaf_free;
wait_queue_head_t q_leaf_free;
};
struct mlx5_ib_mw {
@ -535,6 +546,10 @@ struct mlx5_cache_ent {
struct dentry *dir;
char name[4];
u32 order;
u32 xlt;
u32 access_mode;
u32 page;
u32 size;
u32 cur;
u32 miss;
@ -549,6 +564,7 @@ struct mlx5_cache_ent {
struct work_struct work;
struct delayed_work dwork;
int pending;
struct completion compl;
};
struct mlx5_mr_cache {
@ -579,8 +595,15 @@ struct mlx5_ib_resources {
struct mutex mutex;
};
struct mlx5_ib_q_counters {
const char **names;
size_t *offsets;
u32 num_counters;
u16 set_id;
};
struct mlx5_ib_port {
u16 q_cnt_id;
struct mlx5_ib_q_counters q_cnts;
};
struct mlx5_roce {
@ -619,6 +642,7 @@ struct mlx5_ib_dev {
* being used by a page fault handler.
*/
struct srcu_struct mr_srcu;
u32 null_mkey;
#endif
struct mlx5_ib_flow_db flow_db;
/* protect resources needed as part of reset flow */
@ -771,6 +795,9 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
int access_flags);
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata);
@ -824,7 +851,9 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
@ -848,6 +877,9 @@ int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
unsigned long end);
void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
size_t nentries, struct mlx5_ib_mr *mr, int flags);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
@ -855,9 +887,13 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
}
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
static inline void mlx5_ib_odp_cleanup(void) {}
static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
size_t nentries, struct mlx5_ib_mr *mr,
int flags) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
@ -872,6 +908,8 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
int index);
int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
int index, enum ib_gid_type *gid_type);
/* GSI QP helper functions */
struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,

View File

@ -49,6 +49,7 @@ enum {
static int clean_mr(struct mlx5_ib_mr *mr);
static int use_umr(struct mlx5_ib_dev *dev, int order);
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
@ -149,6 +150,9 @@ static void reg_mr_callback(int status, void *context)
if (err)
pr_err("Error inserting to mkey tree. 0x%x\n", -err);
write_unlock_irqrestore(&table->lock, flags);
if (!completion_done(&ent->compl))
complete(&ent->compl);
}
static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
@ -157,7 +161,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
struct mlx5_cache_ent *ent = &cache->ent[c];
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
int npages = 1 << ent->order;
void *mkc;
u32 *in;
int err = 0;
@ -185,11 +188,11 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
MLX5_SET(mkc, mkc, log_page_size, 12);
MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
MLX5_SET(mkc, mkc, log_page_size, ent->page);
spin_lock_irq(&ent->lock);
ent->pending++;
@ -447,6 +450,42 @@ static void cache_work_func(struct work_struct *work)
__cache_work_func(ent);
}
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
int err;
if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
return NULL;
}
ent = &cache->ent[entry];
while (1) {
spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
spin_unlock_irq(&ent->lock);
err = add_keys(dev, entry, 1);
if (err && err != -EAGAIN)
return ERR_PTR(err);
wait_for_completion(&ent->compl);
} else {
mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
list);
list_del(&mr->list);
ent->cur--;
spin_unlock_irq(&ent->lock);
if (ent->cur < ent->limit)
queue_work(cache->wq, &ent->work);
return mr;
}
}
}
static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
{
struct mlx5_mr_cache *cache = &dev->cache;
@ -456,12 +495,12 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
int i;
c = order2idx(dev, order);
if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
if (c < 0 || c > MAX_UMR_CACHE_ENTRY) {
mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
return NULL;
}
for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) {
ent = &cache->ent[i];
mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
@ -488,7 +527,7 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
return mr;
}
static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
@ -500,6 +539,10 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
return;
}
if (unreg_umr(dev, mr))
return;
ent = &cache->ent[c];
spin_lock_irq(&ent->lock);
list_add_tail(&mr->list, &ent->head);
@ -602,7 +645,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
int limit;
int err;
int i;
@ -615,26 +657,35 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
INIT_LIST_HEAD(&cache->ent[i].head);
spin_lock_init(&cache->ent[i].lock);
ent = &cache->ent[i];
INIT_LIST_HEAD(&ent->head);
spin_lock_init(&ent->lock);
ent->order = i + 2;
ent->dev = dev;
ent->limit = 0;
if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
mlx5_core_is_pf(dev->mdev) &&
use_umr(dev, ent->order))
limit = dev->mdev->profile->mr_cache[i].limit;
else
limit = 0;
init_completion(&ent->compl);
INIT_WORK(&ent->work, cache_work_func);
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
ent->limit = limit;
queue_work(cache->wq, &ent->work);
if (i > MAX_UMR_CACHE_ENTRY) {
mlx5_odp_init_mr_cache_entry(ent);
continue;
}
if (!use_umr(dev, ent->order))
continue;
ent->page = PAGE_SHIFT;
ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
MLX5_IB_UMR_OCTOWORD;
ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
mlx5_core_is_pf(dev->mdev))
ent->limit = dev->mdev->profile->mr_cache[i].limit;
else
ent->limit = 0;
}
err = mlx5_mr_cache_debugfs_init(dev);
@ -758,7 +809,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
static int use_umr(struct mlx5_ib_dev *dev, int order)
{
if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
return order < MAX_MR_CACHE_ENTRIES + 2;
return order <= MAX_UMR_CACHE_ENTRY + 2;
return order <= MLX5_MAX_UMR_SHIFT;
}
@ -871,7 +922,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
MLX5_IB_UPD_XLT_ENABLE);
if (err) {
free_cached_mr(dev, mr);
mlx5_mr_cache_free(dev, mr);
return ERR_PTR(err);
}
@ -886,6 +937,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
{
struct mlx5_ib_dev *dev = mr->dev;
struct ib_umem *umem = mr->umem;
if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
return npages;
}
npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
@ -919,7 +974,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
struct mlx5_umr_wr wr;
struct ib_sge sg;
int err = 0;
int desc_size = sizeof(struct mlx5_mtt);
int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
? sizeof(struct mlx5_klm)
: sizeof(struct mlx5_mtt);
const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
const int page_mask = page_align - 1;
size_t pages_mapped = 0;
@ -1091,6 +1148,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
goto err_2;
}
mr->mmkey.type = MLX5_MKEY_MR;
mr->desc_size = sizeof(struct mlx5_mtt);
mr->umem = umem;
mr->dev = dev;
mr->live = 1;
@ -1136,6 +1194,18 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (!start && length == U64_MAX) {
if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
return ERR_PTR(-EINVAL);
mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
return &mr->ibmr;
}
#endif
err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
&page_shift, &ncont, &order);
@ -1398,12 +1468,7 @@ static int clean_mr(struct mlx5_ib_mr *mr)
return err;
}
} else {
err = unreg_umr(dev, mr);
if (err) {
mlx5_ib_warn(dev, "failed unregister\n");
return err;
}
free_cached_mr(dev, mr);
mlx5_mr_cache_free(dev, mr);
}
if (!umred)
@ -1426,8 +1491,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
/* Wait for all running page-fault handlers to finish. */
synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
ib_umem_end(umem));
if (umem->odp_data->page_list)
mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
ib_umem_end(umem));
else
mlx5_ib_free_implicit_mr(mr);
/*
* We kill the umem before the MR for ODP,
* so that there will not be any invalidations in

View File

@ -34,6 +34,7 @@
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
#include "cmd.h"
#define MAX_PREFETCH_LEN (4*1024*1024U)
@ -41,6 +42,140 @@
* a pagefault. */
#define MMU_NOTIFIER_TIMEOUT 1000
#define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT)
#define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT)
#define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS)
#define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT)
#define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1))
#define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT
static u64 mlx5_imr_ksm_entries;
static int check_parent(struct ib_umem_odp *odp,
struct mlx5_ib_mr *parent)
{
struct mlx5_ib_mr *mr = odp->private;
return mr && mr->parent == parent;
}
static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
{
struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
struct ib_ucontext *ctx = odp->umem->context;
struct rb_node *rb;
down_read(&ctx->umem_rwsem);
while (1) {
rb = rb_next(&odp->interval_tree.rb);
if (!rb)
goto not_found;
odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
if (check_parent(odp, parent))
goto end;
}
not_found:
odp = NULL;
end:
up_read(&ctx->umem_rwsem);
return odp;
}
static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
u64 start, u64 length,
struct mlx5_ib_mr *parent)
{
struct ib_umem_odp *odp;
struct rb_node *rb;
down_read(&ctx->umem_rwsem);
odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length);
if (!odp)
goto end;
while (1) {
if (check_parent(odp, parent))
goto end;
rb = rb_next(&odp->interval_tree.rb);
if (!rb)
goto not_found;
odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
if (ib_umem_start(odp->umem) > start + length)
goto not_found;
}
not_found:
odp = NULL;
end:
up_read(&ctx->umem_rwsem);
return odp;
}
void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
size_t nentries, struct mlx5_ib_mr *mr, int flags)
{
struct ib_pd *pd = mr->ibmr.pd;
struct ib_ucontext *ctx = pd->uobject->context;
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct ib_umem_odp *odp;
unsigned long va;
int i;
if (flags & MLX5_IB_UPD_XLT_ZAP) {
for (i = 0; i < nentries; i++, pklm++) {
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
pklm->key = cpu_to_be32(dev->null_mkey);
pklm->va = 0;
}
return;
}
odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE,
nentries * MLX5_IMR_MTT_SIZE, mr);
for (i = 0; i < nentries; i++, pklm++) {
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
va = (offset + i) * MLX5_IMR_MTT_SIZE;
if (odp && odp->umem->address == va) {
struct mlx5_ib_mr *mtt = odp->private;
pklm->key = cpu_to_be32(mtt->ibmr.lkey);
odp = odp_next(odp);
} else {
pklm->key = cpu_to_be32(dev->null_mkey);
}
mlx5_ib_dbg(dev, "[%d] va %lx key %x\n",
i, va, be32_to_cpu(pklm->key));
}
}
static void mr_leaf_free_action(struct work_struct *work)
{
struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT;
struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
mr->parent = NULL;
synchronize_srcu(&mr->dev->mr_srcu);
if (!READ_ONCE(odp->dying)) {
mr->parent = imr;
if (atomic_dec_and_test(&imr->num_leaf_free))
wake_up(&imr->q_leaf_free);
return;
}
ib_umem_release(odp->umem);
if (imr->live)
mlx5_ib_update_xlt(imr, idx, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
mlx5_mr_cache_free(mr->dev, mr);
if (atomic_dec_and_test(&imr->num_leaf_free))
wake_up(&imr->q_leaf_free);
}
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
unsigned long end)
{
@ -111,6 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
*/
ib_umem_odp_unmap_dma_pages(umem, start, end);
if (unlikely(!umem->npages && mr->parent &&
!umem->odp_data->dying)) {
WRITE_ONCE(umem->odp_data->dying, 1);
atomic_inc(&mr->parent->num_leaf_free);
schedule_work(&umem->odp_data->work);
}
}
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
@ -147,6 +289,11 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
MLX5_CAP_GEN(dev->mdev, null_mkey) &&
MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
return;
}
@ -184,6 +331,197 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
wq_num);
}
static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
struct ib_umem *umem,
bool ksm, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr;
int err;
mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY :
MLX5_IMR_MTT_CACHE_ENTRY);
if (IS_ERR(mr))
return mr;
mr->ibmr.pd = pd;
mr->dev = dev;
mr->access_flags = access_flags;
mr->mmkey.iova = 0;
mr->umem = umem;
if (ksm) {
err = mlx5_ib_update_xlt(mr, 0,
mlx5_imr_ksm_entries,
MLX5_KSM_PAGE_SHIFT,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE);
} else {
err = mlx5_ib_update_xlt(mr, 0,
MLX5_IMR_MTT_ENTRIES,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE |
MLX5_IB_UPD_XLT_ATOMIC);
}
if (err)
goto fail;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->live = 1;
mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
mr->mmkey.key, dev->mdev, mr);
return mr;
fail:
mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
mlx5_mr_cache_free(dev, mr);
return ERR_PTR(err);
}
static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
u64 io_virt, size_t bcnt)
{
struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context;
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
struct ib_umem_odp *odp, *result = NULL;
u64 addr = io_virt & MLX5_IMR_MTT_MASK;
int nentries = 0, start_idx = 0, ret;
struct mlx5_ib_mr *mtt;
struct ib_umem *umem;
mutex_lock(&mr->umem->odp_data->umem_mutex);
odp = odp_lookup(ctx, addr, 1, mr);
mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
io_virt, bcnt, addr, odp);
next_mr:
if (likely(odp)) {
if (nentries)
nentries++;
} else {
umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE);
if (IS_ERR(umem)) {
mutex_unlock(&mr->umem->odp_data->umem_mutex);
return ERR_CAST(umem);
}
mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags);
if (IS_ERR(mtt)) {
mutex_unlock(&mr->umem->odp_data->umem_mutex);
ib_umem_release(umem);
return ERR_CAST(mtt);
}
odp = umem->odp_data;
odp->private = mtt;
mtt->umem = umem;
mtt->mmkey.iova = addr;
mtt->parent = mr;
INIT_WORK(&odp->work, mr_leaf_free_action);
if (!nentries)
start_idx = addr >> MLX5_IMR_MTT_SHIFT;
nentries++;
}
odp->dying = 0;
/* Return first odp if region not covered by single one */
if (likely(!result))
result = odp;
addr += MLX5_IMR_MTT_SIZE;
if (unlikely(addr < io_virt + bcnt)) {
odp = odp_next(odp);
if (odp && odp->umem->address != addr)
odp = NULL;
goto next_mr;
}
if (unlikely(nentries)) {
ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
if (ret) {
mlx5_ib_err(dev, "Failed to update PAS\n");
result = ERR_PTR(ret);
}
}
mutex_unlock(&mr->umem->odp_data->umem_mutex);
return result;
}
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
int access_flags)
{
struct ib_ucontext *ctx = pd->ibpd.uobject->context;
struct mlx5_ib_mr *imr;
struct ib_umem *umem;
umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0);
if (IS_ERR(umem))
return ERR_CAST(umem);
imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags);
if (IS_ERR(imr)) {
ib_umem_release(umem);
return ERR_CAST(imr);
}
imr->umem = umem;
init_waitqueue_head(&imr->q_leaf_free);
atomic_set(&imr->num_leaf_free, 0);
return imr;
}
static int mr_leaf_free(struct ib_umem *umem, u64 start,
u64 end, void *cookie)
{
struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie;
if (mr->parent != imr)
return 0;
ib_umem_odp_unmap_dma_pages(umem,
ib_umem_start(umem),
ib_umem_end(umem));
if (umem->odp_data->dying)
return 0;
WRITE_ONCE(umem->odp_data->dying, 1);
atomic_inc(&imr->num_leaf_free);
schedule_work(&umem->odp_data->work);
return 0;
}
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
{
struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context;
down_read(&ctx->umem_rwsem);
rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
mr_leaf_free, imr);
up_read(&ctx->umem_rwsem);
wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
}
/*
* Handle a single data segment in a page-fault WQE or RDMA region.
*
@ -195,47 +533,43 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
* -EFAULT when there's an error mapping the requested pages. The caller will
* abort the page fault handling.
*/
static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev,
static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
u32 key, u64 io_virt, size_t bcnt,
u32 *bytes_committed,
u32 *bytes_mapped)
{
int srcu_key;
unsigned int current_seq;
unsigned int current_seq = 0;
u64 start_idx;
int npages = 0, ret = 0;
struct mlx5_ib_mr *mr;
u64 access_mask = ODP_READ_ALLOWED_BIT;
struct ib_umem_odp *odp;
int implicit = 0;
size_t size;
srcu_key = srcu_read_lock(&mib_dev->mr_srcu);
mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key);
srcu_key = srcu_read_lock(&dev->mr_srcu);
mr = mlx5_ib_odp_find_mr_lkey(dev, key);
/*
* If we didn't find the MR, it means the MR was closed while we were
* handling the ODP event. In this case we return -EFAULT so that the
* QP will be closed.
*/
if (!mr || !mr->ibmr.pd) {
pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
key);
mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
key);
ret = -EFAULT;
goto srcu_unlock;
}
if (!mr->umem->odp_data) {
pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
key);
mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
key);
if (bytes_mapped)
*bytes_mapped +=
(bcnt - *bytes_committed);
goto srcu_unlock;
}
current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq);
/*
* Ensure the sequence number is valid for some time before we call
* gup.
*/
smp_rmb();
/*
* Avoid branches - this code will perform correctly
* in all iterations (in iteration 2 and above,
@ -244,63 +578,109 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev,
io_virt += *bytes_committed;
bcnt -= *bytes_committed;
if (!mr->umem->odp_data->page_list) {
odp = implicit_mr_get_data(mr, io_virt, bcnt);
if (IS_ERR(odp)) {
ret = PTR_ERR(odp);
goto srcu_unlock;
}
mr = odp->private;
implicit = 1;
} else {
odp = mr->umem->odp_data;
}
next_mr:
current_seq = READ_ONCE(odp->notifiers_seq);
/*
* Ensure the sequence number is valid for some time before we call
* gup.
*/
smp_rmb();
size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
if (mr->umem->writable)
access_mask |= ODP_WRITE_ALLOWED_BIT;
npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt,
access_mask, current_seq);
if (npages < 0) {
ret = npages;
goto srcu_unlock;
}
if (npages > 0) {
mutex_lock(&mr->umem->odp_data->umem_mutex);
ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
access_mask, current_seq);
if (ret < 0)
goto srcu_unlock;
if (ret > 0) {
int np = ret;
mutex_lock(&odp->umem_mutex);
if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
/*
* No need to check whether the MTTs really belong to
* this MR, since ib_umem_odp_map_dma_pages already
* checks this.
*/
ret = mlx5_ib_update_xlt(mr, start_idx, npages,
ret = mlx5_ib_update_xlt(mr, start_idx, np,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ATOMIC);
} else {
ret = -EAGAIN;
}
mutex_unlock(&mr->umem->odp_data->umem_mutex);
mutex_unlock(&odp->umem_mutex);
if (ret < 0) {
if (ret != -EAGAIN)
pr_err("Failed to update mkey page tables\n");
mlx5_ib_err(dev, "Failed to update mkey page tables\n");
goto srcu_unlock;
}
if (bytes_mapped) {
u32 new_mappings = npages * PAGE_SIZE -
u32 new_mappings = np * PAGE_SIZE -
(io_virt - round_down(io_virt, PAGE_SIZE));
*bytes_mapped += min_t(u32, new_mappings, bcnt);
*bytes_mapped += min_t(u32, new_mappings, size);
}
npages += np;
}
bcnt -= size;
if (unlikely(bcnt)) {
struct ib_umem_odp *next;
io_virt += size;
next = odp_next(odp);
if (unlikely(!next || next->umem->address != io_virt)) {
mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
io_virt, next);
ret = -EAGAIN;
goto srcu_unlock_no_wait;
}
odp = next;
mr = odp->private;
goto next_mr;
}
srcu_unlock:
if (ret == -EAGAIN) {
if (!mr->umem->odp_data->dying) {
struct ib_umem_odp *odp_data = mr->umem->odp_data;
if (implicit || !odp->dying) {
unsigned long timeout =
msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
if (!wait_for_completion_timeout(
&odp_data->notifier_completion,
&odp->notifier_completion,
timeout)) {
pr_warn("timeout waiting for mmu notifier completion\n");
mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n",
current_seq, odp->notifiers_seq);
}
} else {
/* The MR is being killed, kill the QP as well. */
ret = -EFAULT;
}
}
srcu_read_unlock(&mib_dev->mr_srcu, srcu_key);
srcu_unlock_no_wait:
srcu_read_unlock(&dev->mr_srcu, srcu_key);
*bytes_committed = 0;
return ret ? ret : npages;
}
@ -618,8 +998,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
goto resolve_page_fault;
} else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
if (ret != -ENOENT)
mlx5_ib_err(dev, "Error getting user pages for page fault. Error: %d\n",
ret);
mlx5_ib_err(dev, "PAGE FAULT error: %d. QP 0x%x. type: 0x%x\n",
ret, pfault->wqe.wq_num, pfault->type);
goto resolve_page_fault;
}
@ -627,7 +1007,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
resolve_page_fault:
mlx5_ib_page_fault_resume(dev, pfault, resume_with_error);
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
pfault->token, resume_with_error,
pfault->wqe.wq_num, resume_with_error,
pfault->type);
free_page((unsigned long)buffer);
}
@ -700,10 +1080,9 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
ret = pagefault_single_data_segment(dev, rkey, address,
prefetch_len,
&bytes_committed, NULL);
if (ret < 0) {
if (ret < 0 && ret != -EAGAIN) {
mlx5_ib_warn(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
ret, pfault->token, address,
prefetch_len);
ret, pfault->token, address, prefetch_len);
}
}
}
@ -728,19 +1107,61 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
}
}
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev)
void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
{
if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
return;
switch (ent->order - 2) {
case MLX5_IMR_MTT_CACHE_ENTRY:
ent->page = PAGE_SHIFT;
ent->xlt = MLX5_IMR_MTT_ENTRIES *
sizeof(struct mlx5_mtt) /
MLX5_IB_UMR_OCTOWORD;
ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
ent->limit = 0;
break;
case MLX5_IMR_KSM_CACHE_ENTRY:
ent->page = MLX5_KSM_PAGE_SHIFT;
ent->xlt = mlx5_imr_ksm_entries *
sizeof(struct mlx5_klm) /
MLX5_IB_UMR_OCTOWORD;
ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
ent->limit = 0;
break;
}
}
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
int ret;
ret = init_srcu_struct(&ibdev->mr_srcu);
ret = init_srcu_struct(&dev->mr_srcu);
if (ret)
return ret;
if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
if (ret) {
mlx5_ib_err(dev, "Error getting null_mkey %d\n", ret);
return ret;
}
}
return 0;
}
void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)
void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *dev)
{
cleanup_srcu_struct(&ibdev->mr_srcu);
cleanup_srcu_struct(&dev->mr_srcu);
}
int mlx5_ib_odp_init(void)
{
mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
MLX5_IMR_MTT_BITS);
return 0;
}

View File

@ -905,7 +905,10 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
else
qp->bf.bfreg = &dev->bfreg;
qp->bf.buf_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
/* We need to divide by two since each register is comprised of
* two buffers of identical size, namely odd and even
*/
qp->bf.buf_size = (1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size)) / 2;
uar_index = qp->bf.bfreg->index;
err = calc_sq_size(dev, init_attr, qp);
@ -1141,7 +1144,8 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
return -ENOMEM;
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
MLX5_SET(rqc, rqc, vsd, 1);
if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
MLX5_SET(rqc, rqc, vsd, 1);
MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
MLX5_SET(rqc, rqc, flush_in_error_en, 1);
@ -1238,6 +1242,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (qp->rq.wqe_cnt) {
rq->base.container_mibqp = qp;
if (qp->flags & MLX5_IB_QP_CVLAN_STRIPPING)
rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
err = create_raw_packet_qp_rq(dev, rq, in);
if (err)
goto err_destroy_sq;
@ -1559,6 +1565,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
if (init_attr->create_flags & IB_QP_CREATE_CVLAN_STRIPPING) {
if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
MLX5_CAP_ETH(dev->mdev, vlan_cap)) ||
(init_attr->qp_type != IB_QPT_RAW_PACKET))
return -EOPNOTSUPP;
qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING;
}
if (pd && pd->uobject) {
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
mlx5_ib_dbg(dev, "copy failed\n");
@ -2198,6 +2212,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
{
enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
int err;
enum ib_gid_type gid_type;
if (attr_mask & IB_QP_PKEY_INDEX)
path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index :
@ -2216,10 +2231,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (ll == IB_LINK_LAYER_ETHERNET) {
if (!(ah->ah_flags & IB_AH_GRH))
return -EINVAL;
err = mlx5_get_roce_gid_type(dev, port, ah->grh.sgid_index,
&gid_type);
if (err)
return err;
memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
ah->grh.sgid_index);
path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
path->ecn_dscp = (ah->grh.traffic_class >> 2) & 0x3f;
} else {
path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
path->fl_free_ar |=
@ -2422,7 +2443,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) {
if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
MLX5_SET64(modify_rq_in, in, modify_bitmask,
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID);
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
} else
pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n",
@ -2777,7 +2798,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->port) - 1;
mibport = &dev->port[port_num];
context->qp_counter_set_usr_page |=
cpu_to_be32((u32)(mibport->q_cnt_id) << 24);
cpu_to_be32((u32)(mibport->q_cnts.set_id) << 24);
}
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
@ -2805,7 +2826,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
raw_qp_param.operation = op;
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id;
raw_qp_param.rq_q_ctr_id = mibport->q_cnts.set_id;
raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
}
@ -3637,8 +3658,9 @@ static int set_psv_wr(struct ib_sig_domain *domain,
psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
break;
default:
pr_err("Bad signature type given.\n");
return 1;
pr_err("Bad signature type (%d) is given.\n",
domain->sig_type);
return -EINVAL;
}
*seg += sizeof(*psv_seg);
@ -3978,6 +4000,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case IB_QPT_SMI:
if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) {
mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
err = -EPERM;
*bad_wr = wr;
goto out;
}
case MLX5_IB_QPT_HW_GSI:
set_datagram_seg(seg, wr);
seg += sizeof(struct mlx5_wqe_datagram_seg);
@ -4579,6 +4607,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
struct ib_wq_init_attr *init_attr)
{
struct mlx5_ib_dev *dev;
int has_net_offloads;
__be64 *rq_pas0;
void *in;
void *rqc;
@ -4610,9 +4639,28 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
has_net_offloads = MLX5_CAP_GEN(dev->mdev, eth_net_offloads);
if (init_attr->create_flags & IB_WQ_FLAGS_CVLAN_STRIPPING) {
if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
mlx5_ib_dbg(dev, "VLAN offloads are not supported\n");
err = -EOPNOTSUPP;
goto out;
}
} else {
MLX5_SET(rqc, rqc, vsd, 1);
}
if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS) {
if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, scatter_fcs))) {
mlx5_ib_dbg(dev, "Scatter FCS is not supported\n");
err = -EOPNOTSUPP;
goto out;
}
MLX5_SET(rqc, rqc, scatter_fcs, 1);
}
rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
out:
kvfree(in);
return err;
}
@ -4896,10 +4944,37 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
MLX5_SET(rqc, rqc, state, wq_state);
if (wq_attr_mask & IB_WQ_FLAGS) {
if (wq_attr->flags_mask & IB_WQ_FLAGS_CVLAN_STRIPPING) {
if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
mlx5_ib_dbg(dev, "VLAN offloads are not "
"supported\n");
err = -EOPNOTSUPP;
goto out;
}
MLX5_SET64(modify_rq_in, in, modify_bitmask,
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
MLX5_SET(rqc, rqc, vsd,
(wq_attr->flags & IB_WQ_FLAGS_CVLAN_STRIPPING) ? 0 : 1);
}
}
if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) {
if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
MLX5_SET64(modify_rq_in, in, modify_bitmask,
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
MLX5_SET(rqc, rqc, counter_set_id, dev->port->q_cnts.set_id);
} else
pr_info_once("%s: Receive WQ counters are not supported on current FW\n",
dev->ib_dev.name);
}
err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
kvfree(in);
if (!err)
rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
out:
kvfree(in);
return err;
}

View File

@ -165,8 +165,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
int err;
int i;
struct mlx5_wqe_srq_next_seg *next;
int page_shift;
int npages;
err = mlx5_db_alloc(dev->mdev, &srq->db);
if (err) {
@ -179,7 +177,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
err = -ENOMEM;
goto err_db;
}
page_shift = srq->buf.page_shift;
srq->head = 0;
srq->tail = srq->msrq.max - 1;
@ -191,10 +188,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
cpu_to_be16((i + 1) & (srq->msrq.max - 1));
}
npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
buf_size, page_shift, srq->buf.npages, npages);
in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift);
in->pas = mlx5_vzalloc(sizeof(*in->pas) * srq->buf.npages);
if (!in->pas) {
err = -ENOMEM;
goto err_buf;
@ -208,7 +203,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
}
srq->wq_sig = !!srq_signature;
in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type == IB_SRQT_XRC)
in->user_index = MLX5_IB_DEFAULT_UIDX;

View File

@ -146,7 +146,7 @@ static int mthca_query_port(struct ib_device *ibdev,
if (!in_mad || !out_mad)
goto out;
memset(props, 0, sizeof *props);
/* props being zeroed by the caller, avoid zeroing it here */
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
@ -212,7 +212,7 @@ static int mthca_modify_port(struct ib_device *ibdev,
if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
return -ERESTARTSYS;
err = mthca_query_port(ibdev, port, &attr);
err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@ -1166,13 +1166,14 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
err = mthca_query_port(ibdev, port_num, &attr);
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;

View File

@ -475,7 +475,7 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr
struct nes_vnic *nesvnic = to_nesvnic(ibdev);
struct net_device *netdev = nesvnic->netdev;
memset(props, 0, sizeof(*props));
/* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@ -3660,13 +3660,14 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
err = nes_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}

View File

@ -93,15 +93,16 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
int err;
dev = get_ocrdma_dev(ibdev);
err = ocrdma_query_port(ibdev, port_num, &attr);
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
if (ocrdma_is_udp_encap_supported(dev))
immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
if (ocrdma_is_udp_encap_supported(dev))
immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;

View File

@ -210,6 +210,7 @@ int ocrdma_query_port(struct ib_device *ibdev,
struct ocrdma_dev *dev;
struct net_device *netdev;
/* props being zeroed by the caller, avoid zeroing it here */
dev = get_ocrdma_dev(ibdev);
if (port > 1) {
pr_err("%s(%d) invalid_port=0x%x\n", __func__,

View File

@ -238,8 +238,8 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
}
rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
memset(attr, 0, sizeof(*attr));
/* *attr being zeroed by the caller, avoid zeroing it here */
if (rdma_port->port_state == QED_RDMA_PORT_UP) {
attr->state = IB_PORT_ACTIVE;
attr->phys_state = 5;
@ -3494,14 +3494,15 @@ int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
err = qedr_query_port(ibdev, port_num, &attr);
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;

View File

@ -1220,6 +1220,7 @@ static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
enum ib_mtu mtu;
u16 lid = ppd->lid;
/* props being zeroed by the caller, avoid zeroing it here */
props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
props->lmc = ppd->lmc;
props->state = dd->f_iblink_state(ppd->lastibcstat);

View File

@ -321,7 +321,9 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
err = usnic_ib_query_port(ibdev, port_num, &attr);
immutable->core_cap_flags = RDMA_CORE_PORT_USNIC;
err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;

View File

@ -330,7 +330,7 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
mutex_lock(&us_ibdev->usdev_lock);
__ethtool_get_link_ksettings(us_ibdev->netdev, &cmd);
memset(props, 0, sizeof(*props));
/* props being zeroed by the caller, avoid zeroing it here */
props->lid = 0;
props->lmc = 1;

View File

@ -132,13 +132,14 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
err = pvrdma_query_port(ibdev, port_num, &attr);
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}

View File

@ -135,7 +135,7 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port,
return err;
}
memset(props, 0, sizeof(*props));
/* props being zeroed by the caller, avoid zeroing it here */
props->state = pvrdma_port_state_to_ib(resp->attrs.state);
props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu);
@ -275,7 +275,7 @@ int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
}
mutex_lock(&vdev->port_mutex);
ret = pvrdma_query_port(ibdev, port, &attr);
ret = ib_query_port(ibdev, port, &attr);
if (ret)
goto out;

View File

@ -165,7 +165,7 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num,
return -EINVAL;
rvp = rdi->ports[port_index];
memset(props, 0, sizeof(*props));
/* props being zeroed by the caller, avoid zeroing it here */
props->sm_lid = rvp->sm_lid;
props->sm_sl = rvp->sm_sl;
props->port_cap_flags = rvp->port_cap_flags;
@ -326,13 +326,14 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num,
if (port_index < 0)
return -EINVAL;
err = rvt_query_port(ibdev, port_num, &attr);
immutable->core_cap_flags = rdi->dparms.core_cap_flags;
err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = rdi->dparms.core_cap_flags;
immutable->max_mad_size = rdi->dparms.max_mad_size;
return 0;

View File

@ -86,6 +86,7 @@ static int rxe_query_port(struct ib_device *dev,
port = &rxe->port;
/* *attr being zeroed by the caller, avoid zeroing it here */
*attr = port->attr;
mutex_lock(&rxe->usdev_lock);
@ -261,13 +262,14 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
int err;
struct ib_port_attr attr;
err = rxe_query_port(dev, port_num, &attr);
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
err = ib_query_port(dev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;

View File

@ -1232,10 +1232,18 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
fs_for_each_fte(fte, fg) {
nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
if (compare_match_value(&fg->mask, match_value, &fte->val) &&
(flow_act->action & fte->action) &&
flow_act->flow_tag == fte->flow_tag) {
(flow_act->action & fte->action)) {
int old_action = fte->action;
if (fte->flow_tag != flow_act->flow_tag) {
mlx5_core_warn(get_dev(&fte->node),
"FTE flow tag %u already exists with different flow tag %u\n",
fte->flow_tag,
flow_act->flow_tag);
handle = ERR_PTR(-EEXIST);
goto unlock_fte;
}
fte->action |= flow_act->action;
handle = add_rule_fte(fte, fg, dest, dest_num,
old_action != flow_act->action);

View File

@ -295,6 +295,7 @@ struct mlx5_port_caps {
int gid_table_len;
int pkey_table_len;
u8 ext_port_cap;
bool has_smi;
};
struct mlx5_cmd_mailbox {
@ -1061,7 +1062,10 @@ enum {
};
enum {
MAX_MR_CACHE_ENTRIES = 21,
MAX_UMR_CACHE_ENTRY = 20,
MLX5_IMR_MTT_CACHE_ENTRY,
MLX5_IMR_KSM_CACHE_ENTRY,
MAX_MR_CACHE_ENTRIES
};
enum {

View File

@ -5013,7 +5013,7 @@ struct mlx5_ifc_modify_rq_out_bits {
enum {
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1,
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3,
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID = 1ULL << 3,
};
struct mlx5_ifc_modify_rq_in_bits {

View File

@ -79,11 +79,15 @@ struct ib_umem_odp {
struct completion notifier_completion;
int dying;
struct work_struct work;
};
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem);
struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
unsigned long addr,
size_t size);
void ib_umem_odp_release(struct ib_umem *umem);
@ -117,10 +121,12 @@ typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end,
umem_call_back cb, void *cookie);
struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root,
u64 start, u64 last);
struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node,
u64 start, u64 last);
/*
* Find first region intersecting with address range.
* Return NULL if not found
*/
struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
u64 addr, u64 length);
static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
unsigned long mmu_seq)
@ -153,6 +159,13 @@ static inline int ib_umem_odp_get(struct ib_ucontext *context,
return -EINVAL;
}
static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
unsigned long addr,
size_t size)
{
return ERR_PTR(-EINVAL);
}
static inline void ib_umem_odp_release(struct ib_umem *umem) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */

View File

@ -207,6 +207,7 @@ enum ib_device_cap_flags {
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23),
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24),
IB_DEVICE_RC_IP_CSUM = (1 << 25),
/* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */
IB_DEVICE_RAW_IP_CSUM = (1 << 26),
/*
* Devices should set IB_DEVICE_CROSS_CHANNEL if they
@ -220,6 +221,7 @@ enum ib_device_cap_flags {
IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31),
IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
/* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34),
};
@ -241,7 +243,8 @@ enum ib_atomic_cap {
};
enum ib_odp_general_cap_bits {
IB_ODP_SUPPORT = 1 << 0,
IB_ODP_SUPPORT = 1 << 0,
IB_ODP_SUPPORT_IMPLICIT = 1 << 1,
};
enum ib_odp_transport_cap_bits {
@ -330,6 +333,7 @@ struct ib_device_attr {
uint64_t hca_core_clock; /* in KHZ */
struct ib_rss_caps rss_caps;
u32 max_wq_type_rq;
u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */
};
enum ib_mtu {
@ -499,6 +503,8 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
#define RDMA_CORE_CAP_PROT_ROCE 0x00200000
#define RDMA_CORE_CAP_PROT_IWARP 0x00400000
#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
#define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000
#define RDMA_CORE_CAP_PROT_USNIC 0x02000000
#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
| RDMA_CORE_CAP_IB_MAD \
@ -522,6 +528,10 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \
| RDMA_CORE_CAP_OPA_MAD)
#define RDMA_CORE_PORT_RAW_PACKET (RDMA_CORE_CAP_PROT_RAW_PACKET)
#define RDMA_CORE_PORT_USNIC (RDMA_CORE_CAP_PROT_USNIC)
struct ib_port_attr {
u64 subnet_prefix;
enum ib_port_state state;
@ -1019,6 +1029,7 @@ enum ib_qp_create_flags {
IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
IB_QP_CREATE_USE_GFP_NOIO = 1 << 7,
IB_QP_CREATE_SCATTER_FCS = 1 << 8,
IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
@ -1470,6 +1481,18 @@ struct ib_srq {
} ext;
};
enum ib_raw_packet_caps {
/* Strip cvlan from incoming packet and report it in the matching work
* completion is supported.
*/
IB_RAW_PACKET_CAP_CVLAN_STRIPPING = (1 << 0),
/* Scatter FCS field of an incoming packet to host memory is supported.
*/
IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1),
/* Checksum offloads are supported (for both send and receive). */
IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2),
};
enum ib_wq_type {
IB_WQT_RQ
};
@ -1493,6 +1516,11 @@ struct ib_wq {
atomic_t usecnt;
};
enum ib_wq_flags {
IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0,
IB_WQ_FLAGS_SCATTER_FCS = 1 << 1,
};
struct ib_wq_init_attr {
void *wq_context;
enum ib_wq_type wq_type;
@ -1500,16 +1528,20 @@ struct ib_wq_init_attr {
u32 max_sge;
struct ib_cq *cq;
void (*event_handler)(struct ib_event *, void *);
u32 create_flags; /* Use enum ib_wq_flags */
};
enum ib_wq_attr_mask {
IB_WQ_STATE = 1 << 0,
IB_WQ_CUR_STATE = 1 << 1,
IB_WQ_STATE = 1 << 0,
IB_WQ_CUR_STATE = 1 << 1,
IB_WQ_FLAGS = 1 << 2,
};
struct ib_wq_attr {
enum ib_wq_state wq_state;
enum ib_wq_state curr_wq_state;
u32 flags; /* Use enum ib_wq_flags */
u32 flags_mask; /* Use enum ib_wq_flags */
};
struct ib_rwq_ind_table {
@ -1618,6 +1650,8 @@ enum ib_flow_spec_type {
IB_FLOW_SPEC_UDP = 0x41,
IB_FLOW_SPEC_VXLAN_TUNNEL = 0x50,
IB_FLOW_SPEC_INNER = 0x100,
/* Actions */
IB_FLOW_SPEC_ACTION_TAG = 0x1000,
};
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
#define IB_FLOW_SPEC_SUPPORT_LAYERS 8
@ -1740,6 +1774,12 @@ struct ib_flow_spec_tunnel {
struct ib_flow_tunnel_filter mask;
};
struct ib_flow_spec_action_tag {
enum ib_flow_spec_type type;
u16 size;
u32 tag_id;
};
union ib_flow_spec {
struct {
u32 type;
@ -1751,6 +1791,7 @@ union ib_flow_spec {
struct ib_flow_spec_tcp_udp tcp_udp;
struct ib_flow_spec_ipv6 ipv6;
struct ib_flow_spec_tunnel tunnel;
struct ib_flow_spec_action_tag flow_tag;
};
struct ib_flow_attr {
@ -2333,6 +2374,16 @@ static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
rdma_protocol_roce(device, port_num);
}
static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
{
return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET;
}
static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
{
return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC;
}
/**
* rdma_cap_ib_mad - Check if the port of a device supports Infiniband
* Management Datagrams.

View File

@ -246,7 +246,7 @@ struct ib_uverbs_ex_query_device_resp {
__u64 device_cap_flags_ex;
struct ib_uverbs_rss_caps rss_caps;
__u32 max_wq_type_rq;
__u32 reserved;
__u32 raw_packet_caps;
};
struct ib_uverbs_query_port {
@ -934,6 +934,19 @@ struct ib_uverbs_flow_spec_ipv6 {
struct ib_uverbs_flow_ipv6_filter mask;
};
struct ib_uverbs_flow_spec_action_tag {
union {
struct ib_uverbs_flow_spec_hdr hdr;
struct {
__u32 type;
__u16 size;
__u16 reserved;
};
};
__u32 tag_id;
__u32 reserved1;
};
struct ib_uverbs_flow_tunnel_filter {
__be32 tunnel_id;
};
@ -1053,6 +1066,8 @@ struct ib_uverbs_ex_create_wq {
__u32 cq_handle;
__u32 max_wr;
__u32 max_sge;
__u32 create_flags; /* Use enum ib_wq_flags */
__u32 reserved;
};
struct ib_uverbs_ex_create_wq_resp {
@ -1081,6 +1096,8 @@ struct ib_uverbs_ex_modify_wq {
__u32 wq_handle;
__u32 wq_state;
__u32 curr_wq_state;
__u32 flags; /* Use enum ib_wq_flags */
__u32 flags_mask; /* Use enum ib_wq_flags */
};
/* Prevent memory allocation rather than max expected size */