From f591822c3cf314442819486f45ff7dc1f690e0c0 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 5 Aug 2019 11:30:10 +0300 Subject: [PATCH 1/7] IB/mlx5: Fix implicit MR release flow Once implicit MR is being called to be released by ib_umem_notifier_release() its leaves were marked as "dying". However, when dereg_mr()->mlx5_ib_free_implicit_mr()->mr_leaf_free() is called, it skips running the mr_leaf_free_action (i.e. umem_odp->work) when those leaves were marked as "dying". As such ib_umem_release() for the leaves won't be called and their MRs will be leaked as well. When an application exits/killed without calling dereg_mr we might hit the above flow. This fatal scenario is reported by WARN_ON() upon mlx5_ib_dealloc_ucontext() as ibcontext->per_mm_list is not empty, the call trace can be seen below. Originally the "dying" mark as part of ib_umem_notifier_release() was introduced to prevent pagefault_mr() from returning a success response once this happened. However, we already have today the completion mechanism so no need for that in those flows any more. Even in case a success response will be returned the firmware will not find the pages and an error will be returned in the following call as a released mm will cause ib_umem_odp_map_dma_pages() to permanently fail mmget_not_zero(). Fix the above issue by dropping the "dying" from the above flows. The other flows that are using "dying" are still needed it for their synchronization purposes. WARNING: CPU: 1 PID: 7218 at drivers/infiniband/hw/mlx5/main.c:2004 mlx5_ib_dealloc_ucontext+0x84/0x90 [mlx5_ib] CPU: 1 PID: 7218 Comm: ibv_rc_pingpong Tainted: G E 5.2.0-rc6+ #13 Call Trace: uverbs_destroy_ufile_hw+0xb5/0x120 [ib_uverbs] ib_uverbs_close+0x1f/0x80 [ib_uverbs] __fput+0xbe/0x250 task_work_run+0x88/0xa0 do_exit+0x2cb/0xc30 ? __fput+0x14b/0x250 do_group_exit+0x39/0xb0 get_signal+0x191/0x920 ? _raw_spin_unlock_bh+0xa/0x20 ? inet_csk_accept+0x229/0x2f0 do_signal+0x36/0x5e0 ? put_unused_fd+0x5b/0x70 ? __sys_accept4+0x1a6/0x1e0 ? inet_hash+0x35/0x40 ? release_sock+0x43/0x90 ? _raw_spin_unlock_bh+0xa/0x20 ? inet_listen+0x9f/0x120 exit_to_usermode_loop+0x5c/0xc6 do_syscall_64+0x182/0x1b0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 81713d3788d2 ("IB/mlx5: Add implicit MR support") Link: https://lore.kernel.org/r/20190805083010.21777-1-leon@kernel.org Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem_odp.c | 4 ---- drivers/infiniband/hw/mlx5/odp.c | 22 ++++++++-------------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 2a75c6f8d827..c0e15db34680 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -112,10 +112,6 @@ static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp, * prevent any further fault handling on this MR. */ ib_umem_notifier_start_account(umem_odp); - umem_odp->dying = 1; - /* Make sure that the fact the umem is dying is out before we release - * all pending page faults. */ - smp_wmb(); complete_all(&umem_odp->notifier_completion); umem_odp->umem.context->invalidate_range( umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 81da82050d05..1d257d1b3b0d 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -579,7 +579,6 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, u32 flags) { int npages = 0, current_seq, page_shift, ret, np; - bool implicit = false; struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; @@ -594,7 +593,6 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, if (IS_ERR(odp)) return PTR_ERR(odp); mr = odp->private; - implicit = true; } else { odp = odp_mr; } @@ -682,19 +680,15 @@ next_mr: out: if (ret == -EAGAIN) { - if (implicit || !odp->dying) { - unsigned long timeout = - msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); + unsigned long timeout = msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); - if (!wait_for_completion_timeout( - &odp->notifier_completion, - timeout)) { - mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", - current_seq, odp->notifiers_seq, odp->notifiers_count); - } - } else { - /* The MR is being killed, kill the QP as well. */ - ret = -EFAULT; + if (!wait_for_completion_timeout(&odp->notifier_completion, + timeout)) { + mlx5_ib_warn( + dev, + "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", + current_seq, odp->notifiers_seq, + odp->notifiers_count); } } From d97de8887a12c598abc4d2e4e57a54c1f030e112 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 7 Aug 2019 13:18:19 +0300 Subject: [PATCH 2/7] RDMA/counter: Prevent QP counter binding if counters unsupported In case of rdma_counter_init() fails, counter allocation and QP bind should not be allowed. Fixes: 413d3347503b ("RDMA/counter: Add set/clear per-port auto mode support") Fixes: 1bd8e0a9d0fd ("RDMA/counter: Allow manual mode configuration support") Signed-off-by: Mark Zhang Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190807101819.7581-1-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/counters.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 45d5164e9574..b79890739a2c 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -38,6 +38,9 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, int ret; port_counter = &dev->port_data[port].port_counter; + if (!port_counter->hstats) + return -EOPNOTSUPP; + mutex_lock(&port_counter->lock); if (on) { ret = __counter_set_mode(&port_counter->mode, @@ -509,6 +512,9 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, if (!rdma_is_port_valid(dev, port)) return -EINVAL; + if (!dev->port_data[port].port_counter.hstats) + return -EOPNOTSUPP; + qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; From e7e6c6320c8c9ed923250cd019e5f9ca0f59b4b8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 7 Aug 2019 15:32:36 +0300 Subject: [PATCH 3/7] IB/mlx5: Check the correct variable in error handling code The code accidentally checks "event_sub" instead of "event_sub->eventfd". Fixes: 759738537142 ("IB/mlx5: Enable subscription for device events over DEVX") Signed-off-by: Dan Carpenter Reviewed-by: Jason Gunthorpe Acked-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190807123236.GA11452@mwanda Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/devx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index ec4370f99381..2d1b3d9609d9 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2026,7 +2026,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( event_sub->eventfd = eventfd_ctx_fdget(redirect_fd); - if (IS_ERR(event_sub)) { + if (IS_ERR(event_sub->eventfd)) { err = PTR_ERR(event_sub->eventfd); event_sub->eventfd = NULL; goto err; From e9eec6a55c95fb918036bfe29c26a535dca1ad49 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Aug 2019 11:15:38 +0300 Subject: [PATCH 4/7] IB/mlx5: Fix use-after-free error while accessing ev_file pointer Call to uverbs_close_fd() releases file pointer to 'ev_file' and mlx5_ib_dev is going to be inaccessible. Cache pointer prior cleaning resources to solve the KASAN warning below. BUG: KASAN: use-after-free in devx_async_event_close+0x391/0x480 [mlx5_ib] Read of size 8 at addr ffff888301e3cec0 by task devx_direct_tes/4631 CPU: 1 PID: 4631 Comm: devx_direct_tes Tainted: G OE 5.3.0-rc1-for-upstream-dbg-2019-07-26_01-19-56-93 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu2 04/01/2014 Call Trace: dump_stack+0x9a/0xeb print_address_description+0x1e2/0x400 ? devx_async_event_close+0x391/0x480 [mlx5_ib] __kasan_report+0x15c/0x1df ? devx_async_event_close+0x391/0x480 [mlx5_ib] kasan_report+0xe/0x20 devx_async_event_close+0x391/0x480 [mlx5_ib] __fput+0x26a/0x7b0 task_work_run+0x10d/0x180 exit_to_usermode_loop+0x137/0x160 do_syscall_64+0x3c7/0x490 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f5df907d664 Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 80 00 00 00 00 8b 05 6a cd 20 00 48 63 ff 85 c0 75 13 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 44 f3 c3 66 90 48 83 ec 18 48 89 7c 24 08 e8 RSP: 002b:00007ffd353cb958 EFLAGS: 00000246 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 000056017a88c348 RCX: 00007f5df907d664 RDX: 00007f5df969d400 RSI: 00007f5de8f1ec90 RDI: 0000000000000006 RBP: 00007f5df9681dc0 R08: 00007f5de8736410 R09: 000056017a9d2dd0 R10: 000000000000000b R11: 0000000000000246 R12: 00007f5de899d7d0 R13: 00007f5df96c4248 R14: 00007f5de8f1ecb0 R15: 000056017ae41308 Allocated by task 4631: save_stack+0x19/0x80 kasan_kmalloc.constprop.3+0xa0/0xd0 alloc_uobj+0x71/0x230 [ib_uverbs] alloc_begin_fd_uobject+0x2e/0xc0 [ib_uverbs] rdma_alloc_begin_uobject+0x96/0x140 [ib_uverbs] ib_uverbs_run_method+0xdf0/0x1940 [ib_uverbs] ib_uverbs_cmd_verbs+0x57e/0xdb0 [ib_uverbs] ib_uverbs_ioctl+0x177/0x260 [ib_uverbs] do_vfs_ioctl+0x18f/0x1010 ksys_ioctl+0x70/0x80 __x64_sys_ioctl+0x6f/0xb0 do_syscall_64+0x95/0x490 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 4631: save_stack+0x19/0x80 __kasan_slab_free+0x11d/0x160 slab_free_freelist_hook+0x67/0x1a0 kfree+0xb9/0x2a0 uverbs_close_fd+0x118/0x1c0 [ib_uverbs] devx_async_event_close+0x28a/0x480 [mlx5_ib] __fput+0x26a/0x7b0 task_work_run+0x10d/0x180 exit_to_usermode_loop+0x137/0x160 do_syscall_64+0x3c7/0x490 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff888301e3cda8 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 280 bytes inside of 512-byte region [ffff888301e3cda8, ffff888301e3cfa8) The buggy address belongs to the page: page:ffffea000c078e00 refcount:1 mapcount:0 mapping:ffff888352811300 index:0x0 compound_mapcount: 0 flags: 0x2fffff80010200(slab|head) raw: 002fffff80010200 ffffea000d152608 ffffea000c077808 ffff888352811300 raw: 0000000000000000 0000000000250025 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888301e3cd80: fc fc fc fc fc fb fb fb fb fb fb fb fb fb fb fb ffff888301e3ce00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888301e3ce80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888301e3cf00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888301e3cf80: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc Disabling lock debugging due to kernel taint Cc: # 5.2 Fixes: 759738537142 ("IB/mlx5: Enable subscription for device events over DEVX") Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20190808081538.28772-1-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/devx.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 2d1b3d9609d9..af5bbb35c058 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2644,12 +2644,13 @@ static int devx_async_event_close(struct inode *inode, struct file *filp) struct devx_async_event_file *ev_file = filp->private_data; struct devx_event_subscription *event_sub, *event_sub_tmp; struct devx_async_event_data *entry, *tmp; + struct mlx5_ib_dev *dev = ev_file->dev; - mutex_lock(&ev_file->dev->devx_event_table.event_xa_lock); + mutex_lock(&dev->devx_event_table.event_xa_lock); /* delete the subscriptions which are related to this FD */ list_for_each_entry_safe(event_sub, event_sub_tmp, &ev_file->subscribed_events_list, file_list) { - devx_cleanup_subscription(ev_file->dev, event_sub); + devx_cleanup_subscription(dev, event_sub); if (event_sub->eventfd) eventfd_ctx_put(event_sub->eventfd); @@ -2658,7 +2659,7 @@ static int devx_async_event_close(struct inode *inode, struct file *filp) kfree_rcu(event_sub, rcu); } - mutex_unlock(&ev_file->dev->devx_event_table.event_xa_lock); + mutex_unlock(&dev->devx_event_table.event_xa_lock); /* free the pending events allocation */ if (!ev_file->omit_data) { @@ -2670,7 +2671,7 @@ static int devx_async_event_close(struct inode *inode, struct file *filp) } uverbs_close_fd(filp); - put_device(&ev_file->dev->ib_dev.dev); + put_device(&dev->ib_dev.dev); return 0; } From 17c19287ecf54fb55f155902dcd39c62a9547c4e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Aug 2019 17:09:04 +0300 Subject: [PATCH 5/7] RDMA/siw: Fix a memory leak in siw_init_cpulist() The error handling code doesn't free siw_cpu_info.tx_valid_cpus[0]. The first iteration through the loop is a no-op so this is sort of an off by one bug. Also Bernard pointed out that we can remove the NULL assignment and simplify the code a bit. Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface") Signed-off-by: Dan Carpenter Reviewed-by: Bernard Metzler Reviewed-by: Bernard Metzler Link: https://lore.kernel.org/r/20190809140904.GB3552@mwanda Signed-off-by: Doug Ledford --- drivers/infiniband/sw/siw/siw_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index d0f140daf659..05a92f997f60 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -160,10 +160,8 @@ static int siw_init_cpulist(void) out_err: siw_cpu_info.num_nodes = 0; - while (i) { + while (--i >= 0) kfree(siw_cpu_info.tx_valid_cpus[i]); - siw_cpu_info.tx_valid_cpus[i--] = NULL; - } kfree(siw_cpu_info.tx_valid_cpus); siw_cpu_info.tx_valid_cpus = NULL; From 932727c55653c1d7838d0ecd0cdce4393be156e0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Aug 2019 13:13:19 +0300 Subject: [PATCH 6/7] RDMA/core: Fix error code in stat_get_doit_qp() We need to set the error codes on these paths. Currently the only possible error code is -EMSGSIZE so that's what the patch uses. Fixes: 83c2c1fcbd08 ("RDMA/nldev: Allow get counter mode through RDMA netlink") Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190809101311.GA17867@mwanda Signed-off-by: Doug Ledford --- drivers/infiniband/core/nldev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 783e465e7c41..87d40d1ecdde 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1952,12 +1952,16 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, if (fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) { + ret = -EMSGSIZE; goto err_msg; + } if ((mode == RDMA_COUNTER_MODE_AUTO) && - nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) { + ret = -EMSGSIZE; goto err_msg; + } nlmsg_end(msg, nlh); ib_device_put(device); From 2c8ccb37b08fe364f02a9914daca474d43151453 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Fri, 9 Aug 2019 17:18:16 +0200 Subject: [PATCH 7/7] RDMA/siw: Change CQ flags from 64->32 bits This patch changes the driver/user shared (mmapped) CQ notification flags field from unsigned 64-bits size to unsigned 32-bits size. This enables building siw on 32-bit architectures. This patch changes the siw-abi, but as siw was only just merged in this merge window cycle, there are no released kernels with the prior abi. We are making no attempt to be binary compatible with siw user space libraries prior to the merge of siw into the upstream kernel, only moving forward with upstream kernels and upstream rdma-core provided siw libraries are we guaranteeing compatibility. Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20190809151816.13018-1-bmt@zurich.ibm.com Signed-off-by: Doug Ledford --- drivers/infiniband/sw/siw/Kconfig | 2 +- drivers/infiniband/sw/siw/siw.h | 2 +- drivers/infiniband/sw/siw/siw_qp.c | 14 ++++++++++---- drivers/infiniband/sw/siw/siw_verbs.c | 16 +++++++++++----- include/uapi/rdma/siw-abi.h | 3 ++- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig index dace276aea14..b622fc62f2cd 100644 --- a/drivers/infiniband/sw/siw/Kconfig +++ b/drivers/infiniband/sw/siw/Kconfig @@ -1,6 +1,6 @@ config RDMA_SIW tristate "Software RDMA over TCP/IP (iWARP) driver" - depends on INET && INFINIBAND && LIBCRC32C && 64BIT + depends on INET && INFINIBAND && LIBCRC32C select DMA_VIRT_OPS help This driver implements the iWARP RDMA transport over diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 03fd7b2f595f..77b1aabf6ff3 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -214,7 +214,7 @@ struct siw_wqe { struct siw_cq { struct ib_cq base_cq; spinlock_t lock; - u64 *notify; + struct siw_cq_ctrl *notify; struct siw_cqe *queue; u32 cq_put; u32 cq_get; diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index e27bd5b35b96..0990307c5d2c 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -1013,18 +1013,24 @@ out: */ static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags) { - u64 cq_notify; + u32 cq_notify; if (!cq->base_cq.comp_handler) return false; - cq_notify = READ_ONCE(*cq->notify); + /* Read application shared notification state */ + cq_notify = READ_ONCE(cq->notify->flags); if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) || ((cq_notify & SIW_NOTIFY_SOLICITED) && (flags & SIW_WQE_SOLICITED))) { - /* dis-arm CQ */ - smp_store_mb(*cq->notify, SIW_NOTIFY_NOT); + /* + * CQ notification is one-shot: Since the + * current CQE causes user notification, + * the CQ gets dis-aremd and must be re-aremd + * by the user for a new notification. + */ + WRITE_ONCE(cq->notify->flags, SIW_NOTIFY_NOT); return true; } diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 32dc79d0e898..e7f3a2379d9d 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -1049,7 +1049,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, spin_lock_init(&cq->lock); - cq->notify = &((struct siw_cq_ctrl *)&cq->queue[size])->notify; + cq->notify = (struct siw_cq_ctrl *)&cq->queue[size]; if (udata) { struct siw_uresp_create_cq uresp = {}; @@ -1141,11 +1141,17 @@ int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags) siw_dbg_cq(cq, "flags: 0x%02x\n", flags); if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) - /* CQ event for next solicited completion */ - smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED); + /* + * Enable CQ event for next solicited completion. + * and make it visible to all associated producers. + */ + smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED); else - /* CQ event for any signalled completion */ - smp_store_mb(*cq->notify, SIW_NOTIFY_ALL); + /* + * Enable CQ event for any signalled completion. + * and make it visible to all associated producers. + */ + smp_store_mb(cq->notify->flags, SIW_NOTIFY_ALL); if (flags & IB_CQ_REPORT_MISSED_EVENTS) return cq->cq_put - cq->cq_get; diff --git a/include/uapi/rdma/siw-abi.h b/include/uapi/rdma/siw-abi.h index 7de68f1dc707..af735f55b291 100644 --- a/include/uapi/rdma/siw-abi.h +++ b/include/uapi/rdma/siw-abi.h @@ -180,6 +180,7 @@ struct siw_cqe { * to control CQ arming. */ struct siw_cq_ctrl { - __aligned_u64 notify; + __u32 flags; + __u32 pad; }; #endif