From 42ea9f1b5c625fad225d4ac96a7e757dd4199d9c Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 6 May 2020 15:59:48 +0300 Subject: [PATCH 01/10] net/mlx5: drain health workqueue in case of driver load error In case there is a work in the health WQ when we teardown the driver, in driver load error flow, the health work will try to read dev->iseg, which was already unmap in mlx5_pci_close(). Fix it by draining the health workqueue first thing in mlx5_pci_close(). Trace of the error: BUG: unable to handle page fault for address: ffffb5b141c18014 PF: supervisor read access in kernel mode PF: error_code(0x0000) - not-present page PGD 1fe95d067 P4D 1fe95d067 PUD 1fe95e067 PMD 1b7823067 PTE 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 6755 Comm: kworker/u128:2 Not tainted 5.2.0-net-next-mlx5-hv_stats-over-last-worked-hyperv #1 Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090006 04/28/2016 Workqueue: mlx5_healtha050:00:02.0 mlx5_fw_fatal_reporter_err_work [mlx5_core] RIP: 0010:ioread32be+0x30/0x40 Code: 00 77 27 48 81 ff 00 00 01 00 76 07 0f b7 d7 ed 0f c8 c3 55 48 c7 c6 3b ee d5 9f 48 89 e5 e8 67 fc ff ff b8 ff ff ff ff 5d c3 <8b> 07 0f c8 c3 66 66 2e 0f 1f 84 00 00 00 00 00 48 81 fe ff ff 03 RSP: 0018:ffffb5b14c56fd78 EFLAGS: 00010292 RAX: ffffb5b141c18000 RBX: ffff8e9f78a801c0 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff8e9f7ecd7628 RDI: ffffb5b141c18014 RBP: ffffb5b14c56fd90 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8e9f372a2c30 R11: ffff8e9f87f4bc40 R12: ffff8e9f372a1fc0 R13: ffff8e9f78a80000 R14: ffffffffc07136a0 R15: ffff8e9f78ae6f20 FS: 0000000000000000(0000) GS:ffff8e9f7ecc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffb5b141c18014 CR3: 00000001c8f82006 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? mlx5_health_try_recover+0x4d/0x270 [mlx5_core] mlx5_fw_fatal_reporter_recover+0x16/0x20 [mlx5_core] devlink_health_reporter_recover+0x1c/0x50 devlink_health_report+0xfb/0x240 mlx5_fw_fatal_reporter_err_work+0x65/0xd0 [mlx5_core] process_one_work+0x1fb/0x4e0 ? process_one_work+0x16b/0x4e0 worker_thread+0x4f/0x3d0 kthread+0x10d/0x140 ? process_one_work+0x4e0/0x4e0 ? kthread_cancel_delayed_work_sync+0x20/0x20 ret_from_fork+0x1f/0x30 Modules linked in: nfsv3 rpcsec_gss_krb5 nfsv4 nfs fscache 8021q garp mrp stp llc ipmi_devintf ipmi_msghandler rpcrdma rdma_ucm ib_iser rdma_cm ib_umad iw_cm ib_ipoib libiscsi scsi_transport_iscsi ib_cm mlx5_ib ib_uverbs ib_core mlx5_core sb_edac crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel aes_x86_64 mlxfw crypto_simd cryptd glue_helper input_leds hyperv_fb intel_rapl_perf joydev serio_raw pci_hyperv pci_hyperv_mini mac_hid hv_balloon nfsd auth_rpcgss nfs_acl lockd grace sunrpc sch_fq_codel ip_tables x_tables autofs4 hv_utils hid_generic hv_storvsc ptp hid_hyperv hid hv_netvsc hyperv_keyboard pps_core scsi_transport_fc psmouse hv_vmbus i2c_piix4 floppy pata_acpi CR2: ffffb5b141c18014 ---[ end trace b12c5503157cad24 ]--- RIP: 0010:ioread32be+0x30/0x40 Code: 00 77 27 48 81 ff 00 00 01 00 76 07 0f b7 d7 ed 0f c8 c3 55 48 c7 c6 3b ee d5 9f 48 89 e5 e8 67 fc ff ff b8 ff ff ff ff 5d c3 <8b> 07 0f c8 c3 66 66 2e 0f 1f 84 00 00 00 00 00 48 81 fe ff ff 03 RSP: 0018:ffffb5b14c56fd78 EFLAGS: 00010292 RAX: ffffb5b141c18000 RBX: ffff8e9f78a801c0 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff8e9f7ecd7628 RDI: ffffb5b141c18014 RBP: ffffb5b14c56fd90 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8e9f372a2c30 R11: ffff8e9f87f4bc40 R12: ffff8e9f372a1fc0 R13: ffff8e9f78a80000 R14: ffffffffc07136a0 R15: ffff8e9f78ae6f20 FS: 0000000000000000(0000) GS:ffff8e9f7ecc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffb5b141c18014 CR3: 00000001c8f82006 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 BUG: sleeping function called from invalid context at ./include/linux/percpu-rwsem.h:38 in_atomic(): 0, irqs_disabled(): 1, pid: 6755, name: kworker/u128:2 INFO: lockdep is turned off. CPU: 3 PID: 6755 Comm: kworker/u128:2 Tainted: G D 5.2.0-net-next-mlx5-hv_stats-over-last-worked-hyperv #1 Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090006 04/28/2016 Workqueue: mlx5_healtha050:00:02.0 mlx5_fw_fatal_reporter_err_work [mlx5_core] Call Trace: dump_stack+0x63/0x88 ___might_sleep+0x10a/0x130 __might_sleep+0x4a/0x80 exit_signals+0x33/0x230 ? blocking_notifier_call_chain+0x16/0x20 do_exit+0xb1/0xc30 ? kthread+0x10d/0x140 ? process_one_work+0x4e0/0x4e0 Fixes: 52c368dc3da7 ("net/mlx5: Move health and page alloc init to mdev_init") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index df46b1fce3a7..18d6c3752abe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -785,6 +785,11 @@ err_disable: static void mlx5_pci_close(struct mlx5_core_dev *dev) { + /* health work might still be active, and it needs pci bar in + * order to know the NIC state. Therefore, drain the health WQ + * before removing the pci bars + */ + mlx5_drain_health_wq(dev); iounmap(dev->iseg); pci_clear_master(dev->pdev); release_bar(dev->pdev); From b6e0b6bebe0732d5cac51f0791f269d2413b8980 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 7 May 2020 09:32:53 +0300 Subject: [PATCH 02/10] net/mlx5: Fix fatal error handling during device load Currently, in case of fatal error during mlx5_load_one(), we cannot enter error state until mlx5_load_one() is finished, what can take several minutes until commands will get timeouts, because these commands can't be processed due to the fatal error. Fix it by setting dev->state as MLX5_DEVICE_STATE_INTERNAL_ERROR before requesting the lock. Fixes: c1d4d2e92ad6 ("net/mlx5: Avoid calling sleeping function by the health poll thread") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index c0cfbab15fe9..b31f769d2df9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -192,15 +192,23 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev) void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) { + bool err_detected = false; + + /* Mark the device as fatal in order to abort FW commands */ + if ((check_fatal_sensors(dev) || force) && + dev->state == MLX5_DEVICE_STATE_UP) { + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + err_detected = true; + } mutex_lock(&dev->intf_state_mutex); - if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) - goto unlock; + if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto unlock;/* a previous error is still being handled */ if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; goto unlock; } - if (check_fatal_sensors(dev) || force) { + if (check_fatal_sensors(dev) || force) { /* protected state setting */ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mlx5_cmd_flush(dev); } From 47a357de2b6b706af3c9471d5042f9ba8907031e Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Mon, 1 Jun 2020 19:45:26 +0300 Subject: [PATCH 03/10] net/mlx5: DR, Fix freeing in dr_create_rc_qp() Variable "in" in dr_create_rc_qp() is allocated with kvzalloc() and should be freed with kvfree(). Fixes: 297cccebdc5a ("net/mlx5: DR, Expose an internal API to issue RDMA operations") Cc: stable@vger.kernel.org Signed-off-by: Denis Efremov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index f421013b0b54..2ca79b9bde1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -179,7 +179,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn); - kfree(in); + kvfree(in); if (err) goto err_in; dr_qp->uar = attr->uar; From 36d45fb9d2fdf348d778bfe73f0427db1c6f9bc7 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 1 Jun 2020 16:03:44 +0300 Subject: [PATCH 04/10] net/mlx5e: Fix repeated XSK usage on one channel After an XSK is closed, the relevant structures in the channel are not zeroed. If an XSK is opened the second time on the same channel without recreating channels, the stray values in the structures will lead to incorrect operation of queues, which causes CQE errors, and the new socket doesn't work at all. This patch fixes the issue by explicitly zeroing XSK-related structs in the channel on XSK close. Note that those structs are zeroed on channel creation, and usually a configuration change (XDP program is set) happens on XSK open, which leads to recreating channels, so typical XSK usecases don't suffer from this issue. However, if XSKs are opened and closed on the same channel without removing the XDP program, this bug reproduces. Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index c28cbae42331..2c80205dc939 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -152,6 +152,10 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) mlx5e_close_cq(&c->xskicosq.cq); mlx5e_close_xdpsq(&c->xsksq); mlx5e_close_cq(&c->xsksq.cq); + + memset(&c->xskrq, 0, sizeof(c->xskrq)); + memset(&c->xsksq, 0, sizeof(c->xsksq)); + memset(&c->xskicosq, 0, sizeof(c->xskicosq)); } void mlx5e_activate_xsk(struct mlx5e_channel *c) From 5f1572e6178e47c3ace55ced187d93240952c9cd Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 17 May 2020 12:45:52 +0300 Subject: [PATCH 05/10] net/mlx5e: Fix ethtool hfunc configuration change Changing RX hash function requires rearranging of RQT internal indexes, the user isn't exposed to such changes and these changes do not affect the user configured indirection table. Rebuild RQ table on hfunc change. Fixes: bdfc028de1b3 ("net/mlx5e: Fix ethtool RX hash func configuration change") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 3ef2525e8de9..ec5658bbe3c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1173,7 +1173,8 @@ int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rss_params *rss = &priv->rss_params; int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); - bool hash_changed = false; + bool refresh_tirs = false; + bool refresh_rqt = false; void *in; if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && @@ -1189,36 +1190,38 @@ int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) { rss->hfunc = hfunc; - hash_changed = true; + refresh_rqt = true; + refresh_tirs = true; } if (indir) { memcpy(rss->indirection_rqt, indir, sizeof(rss->indirection_rqt)); - - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - u32 rqtn = priv->indir_rqt.rqtn; - struct mlx5e_redirect_rqt_param rrp = { - .is_rss = true, - { - .rss = { - .hfunc = rss->hfunc, - .channels = &priv->channels, - }, - }, - }; - - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); - } + refresh_rqt = true; } if (key) { memcpy(rss->toeplitz_hash_key, key, sizeof(rss->toeplitz_hash_key)); - hash_changed = hash_changed || rss->hfunc == ETH_RSS_HASH_TOP; + refresh_tirs = refresh_tirs || rss->hfunc == ETH_RSS_HASH_TOP; } - if (hash_changed) + if (refresh_rqt && test_bit(MLX5E_STATE_OPENED, &priv->state)) { + struct mlx5e_redirect_rqt_param rrp = { + .is_rss = true, + { + .rss = { + .hfunc = rss->hfunc, + .channels = &priv->channels, + }, + }, + }; + u32 rqtn = priv->indir_rqt.rqtn; + + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); + } + + if (refresh_tirs) mlx5e_modify_tirs_hash(priv, in); mutex_unlock(&priv->state_lock); From 60904cd349abc98cb888fc28d1ca55a8e2cf87b3 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 14 May 2020 05:12:56 -0500 Subject: [PATCH 06/10] net/mlx5: Disable reload while removing the device While unregistration is in progress, user might be reloading the interface. This can race with unregistration in below flow which uses the resources which are getting disabled by reload flow. Hence, disable the devlink reloading first when removing the device. CPU0 CPU1 ---- ---- local_pci_remove() devlink_mutex remove_one() devlink_nl_cmd_reload() mlx5_unregister_device() devlink_reload() ops->reload_down() mlx5_unload_one() Fixes: 4383cfcc65e7 ("net/mlx5: Add devlink reload") Signed-off-by: Parav Pandit Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 -- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index e94f0c4d74a7..a99fe4b02b9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -283,7 +283,6 @@ int mlx5_devlink_register(struct devlink *devlink, struct device *dev) goto params_reg_err; mlx5_devlink_set_params_init_values(devlink); devlink_params_publish(devlink); - devlink_reload_enable(devlink); return 0; params_reg_err: @@ -293,7 +292,6 @@ params_reg_err: void mlx5_devlink_unregister(struct devlink *devlink) { - devlink_reload_disable(devlink); devlink_params_unregister(devlink, mlx5_devlink_params, ARRAY_SIZE(mlx5_devlink_params)); devlink_unregister(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 18d6c3752abe..2729afc13ab4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1367,6 +1367,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); pci_save_state(pdev); + devlink_reload_enable(devlink); return 0; err_load_one: @@ -1384,6 +1385,7 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); + devlink_reload_disable(devlink); mlx5_crdump_disable(dev); mlx5_devlink_unregister(devlink); From 98f91c45769302b26e781f949b07a90df3c5cbda Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 15 May 2020 02:44:06 -0500 Subject: [PATCH 07/10] net/mlx5: Fix devlink objects and devlink device unregister sequence Current below problems exists. 1. devlink device is registered by mlx5_load_one(). But it is not unregistered by mlx5_unload_one(). This is incorrect. 2. Above issue leads to, When mlx5 PCI device is removed, currently devlink device is unregistered before devlink ports are unregistered in below ladder diagram. remove_one() mlx5_devlink_unregister() [..] devlink_unregister() <- ports are still registered! mlx5_unload_one() mlx5_unregister_device() mlx5_remove_device() mlx5e_remove() mlx5e_devlink_port_unregister() devlink_port_unregister() 3. Condition checking for registering and unregister device are not symmetric either in these routines. Hence, fix the sequence by having load and unload routines symmetric and in right order. i.e. (a) register devlink device followed by registering devlink ports (b) unregister devlink ports followed by devlink device Do this based on boot and cleanup flags instead of different conditions. Fixes: c6acd629eec7 ("net/mlx5e: Add support for devlink-port in non-representors mode") Fixes: f60f315d339e ("net/mlx5e: Register devlink ports for physical link, PCI PF, VFs") Signed-off-by: Parav Pandit Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/main.c | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2729afc13ab4..e786c5c75dba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1199,23 +1199,22 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) if (err) goto err_load; + set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + if (boot) { err = mlx5_devlink_register(priv_to_devlink(dev), dev->device); if (err) goto err_devlink_reg; - } - - if (mlx5_device_registered(dev)) - mlx5_attach_device(dev); - else mlx5_register_device(dev); - - set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + } else { + mlx5_attach_device(dev); + } mutex_unlock(&dev->intf_state_mutex); return 0; err_devlink_reg: + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mlx5_unload(dev); err_load: if (boot) @@ -1231,10 +1230,15 @@ out: void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) { - if (cleanup) - mlx5_unregister_device(dev); - mutex_lock(&dev->intf_state_mutex); + + if (cleanup) { + mlx5_unregister_device(dev); + mlx5_devlink_unregister(priv_to_devlink(dev)); + } else { + mlx5_detach_device(dev); + } + if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "%s: interface is down, NOP\n", __func__); @@ -1245,9 +1249,6 @@ void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); - if (mlx5_device_registered(dev)) - mlx5_detach_device(dev); - mlx5_unload(dev); if (cleanup) @@ -1387,8 +1388,6 @@ static void remove_one(struct pci_dev *pdev) devlink_reload_disable(devlink); mlx5_crdump_disable(dev); - mlx5_devlink_unregister(devlink); - mlx5_drain_health_wq(dev); mlx5_unload_one(dev, true); mlx5_pci_close(dev); From 0d156f2deda8675c29fa2b8b5ed9b374370e47f2 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Sun, 7 Jun 2020 15:40:40 +0000 Subject: [PATCH 08/10] net/mlx5e: CT: Fix ipv6 nat header rewrite actions Set the ipv6 word fields according to the hardware definitions. Fixes: ac991b48d43c ("net/mlx5e: CT: Offload established flows") Signed-off-by: Oz Shlomo Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index afc19dca1f5f..430025550fad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -328,21 +328,21 @@ mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, case FLOW_ACT_MANGLE_HDR_TYPE_IP6: MLX5_SET(set_action_in, modact, length, 0); - if (offset == offsetof(struct ipv6hdr, saddr)) + if (offset == offsetof(struct ipv6hdr, saddr) + 12) field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; - else if (offset == offsetof(struct ipv6hdr, saddr) + 4) - field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; else if (offset == offsetof(struct ipv6hdr, saddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, saddr) + 4) field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; - else if (offset == offsetof(struct ipv6hdr, saddr) + 12) + else if (offset == offsetof(struct ipv6hdr, saddr)) field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; - else if (offset == offsetof(struct ipv6hdr, daddr)) - field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; - else if (offset == offsetof(struct ipv6hdr, daddr) + 4) - field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; - else if (offset == offsetof(struct ipv6hdr, daddr) + 8) - field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; else if (offset == offsetof(struct ipv6hdr, daddr) + 12) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; + else if (offset == offsetof(struct ipv6hdr, daddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, daddr) + 4) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; + else if (offset == offsetof(struct ipv6hdr, daddr)) field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; else return -EOPNOTSUPP; From 17e73d47cd095154878dfedd4918d6a9482eba13 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 2 Jun 2020 15:28:37 +0300 Subject: [PATCH 09/10] net/mlx5: Don't fail driver on failure to create debugfs Clang warns: drivers/net/ethernet/mellanox/mlx5/core/main.c:1278:6: warning: variable 'err' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] if (!priv->dbg_root) { ^~~~~~~~~~~~~~~ drivers/net/ethernet/mellanox/mlx5/core/main.c:1303:9: note: uninitialized use occurs here return err; ^~~ drivers/net/ethernet/mellanox/mlx5/core/main.c:1278:2: note: remove the 'if' if its condition is always false if (!priv->dbg_root) { ^~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/mellanox/mlx5/core/main.c:1259:9: note: initialize the variable 'err' to silence this warning int err; ^ = 0 1 warning generated. The check of returned value of debugfs_create_dir() is wrong because by the design debugfs failures should never fail the driver and the check itself was wrong too. The kernel compiled without CONFIG_DEBUG_FS will return ERR_PTR(-ENODEV) and not NULL as expected. Fixes: 11f3b84d7068 ("net/mlx5: Split mdev init and pci init") Link: https://github.com/ClangBuiltLinux/linux/issues/1042 Reported-by: Nathan Chancellor Signed-off-by: Leon Romanovsky Reviewed-by: Nathan Chancellor Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e786c5c75dba..8b658908f044 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1281,11 +1281,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) priv->dbg_root = debugfs_create_dir(dev_name(dev->device), mlx5_debugfs_root); - if (!priv->dbg_root) { - dev_err(dev->device, "mlx5_core: error, Cannot create debugfs dir, aborting\n"); - goto err_dbg_root; - } - err = mlx5_health_init(dev); if (err) goto err_health_init; @@ -1300,7 +1295,6 @@ err_pagealloc_init: mlx5_health_cleanup(dev); err_health_init: debugfs_remove(dev->priv.dbg_root); -err_dbg_root: mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); mutex_destroy(&priv->bfregs.wc_head.lock); From 09a9297574cb10b3d9fe722b2baa9a379b2d289c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Jun 2020 20:54:36 +0300 Subject: [PATCH 10/10] net/mlx5: E-Switch, Fix some error pointer dereferences We can't leave "counter" set to an error pointer. Otherwise either it will lead to an error pointer dereference later in the function or it leads to an error pointer dereference when we call mlx5_fc_destroy(). Fixes: 07bab9502641d ("net/mlx5: E-Switch, Refactor eswitch ingress acl codes") Signed-off-by: Dan Carpenter Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c index 9bda4fe2eafa..5dc335e621c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -162,10 +162,12 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { counter = mlx5_fc_create(esw->dev, false); - if (IS_ERR(counter)) + if (IS_ERR(counter)) { esw_warn(esw->dev, "vport[%d] configure ingress drop rule counter failed\n", vport->vport); + counter = NULL; + } vport->ingress.legacy.drop_counter = counter; } @@ -272,7 +274,7 @@ void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw, esw_acl_ingress_table_destroy(vport); clean_drop_counter: - if (!IS_ERR_OR_NULL(vport->ingress.legacy.drop_counter)) { + if (vport->ingress.legacy.drop_counter) { mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter); vport->ingress.legacy.drop_counter = NULL; }