From 4b2c5fa9c9902ce34ecea6711558d9af96351b31 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Wed, 21 Jul 2021 16:14:12 +0300 Subject: [PATCH 01/13] net/mlx5: Add layout to support default timeouts register Add needed structures and defines for DTOR (default timeouts register). This will be used to get timeouts values from FW instead of hard coded values in the driver code thus enabling support for slower devices which need longer timeouts. Signed-off-by: Amir Tzin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 4 +++- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 37 ++++++++++++++++++++++++++++++++++- 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 66eaf0aa7f69..109cc8106d16 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -577,7 +577,9 @@ struct mlx5_init_seg { __be32 rsvd1[120]; __be32 initializing; struct health_buffer health; - __be32 rsvd2[880]; + __be32 rsvd2[878]; + __be32 cmd_exec_to; + __be32 cmd_q_init_to; __be32 internal_timer_h; __be32 internal_timer_l; __be32 rsvd3[2]; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0ca719c00824..ccbd87fbd3bf 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -156,6 +156,7 @@ enum { MLX5_REG_MIRC = 0x9162, MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, + MLX5_REG_DTOR = 0xC00E, }; enum mlx5_qpts_trust_state { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 993204a6c1a1..b8bff5109656 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1306,7 +1306,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 vhca_resource_manager[0x1]; u8 hca_cap_2[0x1]; - u8 reserved_at_21[0x2]; + u8 reserved_at_21[0x1]; + u8 dtor[0x1]; u8 event_on_vhca_state_teardown_request[0x1]; u8 event_on_vhca_state_in_use[0x1]; u8 event_on_vhca_state_active[0x1]; @@ -2807,6 +2808,40 @@ struct mlx5_ifc_dropped_packet_logged_bits { u8 reserved_at_0[0xe0]; }; +struct mlx5_ifc_default_timeout_bits { + u8 to_multiplier[0x3]; + u8 reserved_at_3[0x9]; + u8 to_value[0x14]; +}; + +struct mlx5_ifc_dtor_reg_bits { + u8 reserved_at_0[0x20]; + + struct mlx5_ifc_default_timeout_bits pcie_toggle_to; + + u8 reserved_at_40[0x60]; + + struct mlx5_ifc_default_timeout_bits health_poll_to; + + struct mlx5_ifc_default_timeout_bits full_crdump_to; + + struct mlx5_ifc_default_timeout_bits fw_reset_to; + + struct mlx5_ifc_default_timeout_bits flush_on_err_to; + + struct mlx5_ifc_default_timeout_bits pci_sync_update_to; + + struct mlx5_ifc_default_timeout_bits tear_down_to; + + struct mlx5_ifc_default_timeout_bits fsm_reactivate_to; + + struct mlx5_ifc_default_timeout_bits reclaim_pages_to; + + struct mlx5_ifc_default_timeout_bits reclaim_vfs_pages_to; + + u8 reserved_at_1c0[0x40]; +}; + enum { MLX5_CQ_ERROR_SYNDROME_CQ_OVERRUN = 0x1, MLX5_CQ_ERROR_SYNDROME_CQ_ACCESS_VIOLATION_ERROR = 0x2, From 5945e1adeab527ec96c75a786213c146d4d482a4 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Thu, 7 Oct 2021 18:00:27 +0300 Subject: [PATCH 02/13] net/mlx5: Read timeout values from init segment Replace hard coded timeouts with values stored in firmware's init segment. Timeouts are read from init segment during driver load. If init segment timeouts are not supported then fallback to hard coded defaults instead. Also move pre initialization timeouts which cannot be read from firmware to the new mechanism. Signed-off-by: Amir Tzin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 18 +++- .../ethernet/mellanox/mlx5/core/lib/tout.c | 96 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/lib/tout.h | 28 ++++++ .../net/ethernet/mellanox/mlx5/core/main.c | 38 ++++---- include/linux/mlx5/driver.h | 5 +- 6 files changed, 161 insertions(+), 26 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 63032cd6efb1..a151575be51f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ - fw_reset.o qos.o + fw_reset.o qos.o lib/tout.o # # Netdev basic diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 4dc3a822907a..f71ec4d9d68e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -45,6 +45,7 @@ #include "mlx5_core.h" #include "lib/eq.h" +#include "lib/tout.h" enum { CMD_IF_REV = 5, @@ -225,9 +226,13 @@ static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) static void poll_timeout(struct mlx5_cmd_work_ent *ent) { - unsigned long poll_end = jiffies + msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC + 1000); + struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, cmd); + u64 cmd_to_ms = mlx5_tout_ms(dev, CMD); + unsigned long poll_end; u8 own; + poll_end = jiffies + msecs_to_jiffies(cmd_to_ms + 1000); + do { own = READ_ONCE(ent->lay->status_own); if (!(own & CMD_OWNER_HW)) { @@ -925,15 +930,18 @@ static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); struct mlx5_cmd *cmd = ent->cmd; - struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd); - unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); + bool poll_cmd = ent->polling; struct mlx5_cmd_layout *lay; + struct mlx5_core_dev *dev; + unsigned long cb_timeout; struct semaphore *sem; unsigned long flags; - bool poll_cmd = ent->polling; int alloc_ret; int cmd_mode; + dev = container_of(cmd, struct mlx5_core_dev, cmd); + cb_timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD)); + complete(&ent->handling); sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); @@ -1073,7 +1081,7 @@ static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev, static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) { - unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); + unsigned long timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD)); struct mlx5_cmd *cmd = &dev->cmd; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c new file mode 100644 index 000000000000..ee266e0d122a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include "lib/tout.h" + +struct mlx5_timeouts { + u64 to[MAX_TIMEOUT_TYPES]; +}; + +static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = { + [MLX5_TO_FW_PRE_INIT_TIMEOUT_MS] = 120000, + [MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000, + [MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2, + [MLX5_TO_FW_INIT_MS] = 2000, + [MLX5_TO_CMD_MS] = 60000 +}; + +static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type) +{ + dev->timeouts->to[type] = val; +} + +static void tout_set_def_val(struct mlx5_core_dev *dev) +{ + int i; + + for (i = MLX5_TO_FW_PRE_INIT_TIMEOUT_MS; i < MAX_TIMEOUT_TYPES; i++) + tout_set(dev, tout_def_sw_val[i], i); +} + +int mlx5_tout_init(struct mlx5_core_dev *dev) +{ + dev->timeouts = kmalloc(sizeof(*dev->timeouts), GFP_KERNEL); + if (!dev->timeouts) + return -ENOMEM; + + tout_set_def_val(dev); + return 0; +} + +void mlx5_tout_cleanup(struct mlx5_core_dev *dev) +{ + kfree(dev->timeouts); +} + +/* Time register consists of two fields to_multiplier(time out multiplier) + * and to_value(time out value). to_value is the quantity of the time units and + * to_multiplier is the type and should be one off these four values. + * 0x0: millisecond + * 0x1: seconds + * 0x2: minutes + * 0x3: hours + * this function converts the time stored in the two register fields into + * millisecond. + */ +static u64 tout_convert_reg_field_to_ms(u32 to_mul, u32 to_val) +{ + u64 msec = to_val; + + to_mul &= 0x3; + /* convert hours/minutes/seconds to miliseconds */ + if (to_mul) + msec *= 1000 * int_pow(60, to_mul - 1); + + return msec; +} + +static u64 tout_convert_iseg_to_ms(u32 iseg_to) +{ + return tout_convert_reg_field_to_ms(iseg_to >> 29, iseg_to & 0xfffff); +} + +static bool tout_is_supported(struct mlx5_core_dev *dev) +{ + return !!ioread32be(&dev->iseg->cmd_q_init_to); +} + +void mlx5_tout_query_iseg(struct mlx5_core_dev *dev) +{ + u32 to; + + if (!tout_is_supported(dev)) + return; + + to = ioread32be(&dev->iseg->cmd_q_init_to); + tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_FW_INIT_MS); + + to = ioread32be(&dev->iseg->cmd_exec_to); + tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_CMD_MS); +} + +u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type) +{ + return dev->timeouts->to[type]; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h new file mode 100644 index 000000000000..7e6fc61c5b45 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef MLX5_TIMEOUTS_H +#define MLX5_TIMEOUTS_H + +enum mlx5_timeouts_types { + /* pre init timeouts (not read from FW) */ + MLX5_TO_FW_PRE_INIT_TIMEOUT_MS, + MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS, + MLX5_TO_FW_PRE_INIT_WAIT_MS, + + /* init segment timeouts */ + MLX5_TO_FW_INIT_MS, + MLX5_TO_CMD_MS, + + MAX_TIMEOUT_TYPES +}; + +struct mlx5_core_dev; +int mlx5_tout_init(struct mlx5_core_dev *dev); +void mlx5_tout_cleanup(struct mlx5_core_dev *dev); +void mlx5_tout_query_iseg(struct mlx5_core_dev *dev); +u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type); + +#define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS) + +# endif /* MLX5_TIMEOUTS_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 65313448a47c..b4893eac6ed6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -60,6 +60,7 @@ #include "devlink.h" #include "fw_reset.h" #include "lib/mlx5.h" +#include "lib/tout.h" #include "fpga/core.h" #include "fpga/ipsec.h" #include "accel/ipsec.h" @@ -176,11 +177,6 @@ static struct mlx5_profile profile[] = { }, }; -#define FW_INIT_TIMEOUT_MILI 2000 -#define FW_INIT_WAIT_MS 2 -#define FW_PRE_INIT_TIMEOUT_MILI 120000 -#define FW_INIT_WARN_MESSAGE_INTERVAL 20000 - static int fw_initializing(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->initializing) >> 31; @@ -193,8 +189,6 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); int err = 0; - BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL); - while (fw_initializing(dev)) { if (time_after(jiffies, end)) { err = -EBUSY; @@ -205,7 +199,7 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, jiffies_to_msecs(end - warn) / 1000); warn = jiffies + msecs_to_jiffies(warn_time_mili); } - msleep(FW_INIT_WAIT_MS); + msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT)); } return err; @@ -975,25 +969,34 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) if (mlx5_core_is_pf(dev)) pcie_print_link_status(dev->pdev); + err = mlx5_tout_init(dev); + if (err) { + mlx5_core_err(dev, "Failed initializing timeouts, aborting\n"); + return err; + } + /* wait for firmware to accept initialization segments configurations */ - err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL); + err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT), + mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL)); if (err) { - mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n", - FW_PRE_INIT_TIMEOUT_MILI); - return err; + mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n", + mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); + goto err_tout_cleanup; } err = mlx5_cmd_init(dev); if (err) { mlx5_core_err(dev, "Failed initializing command interface, aborting\n"); - return err; + goto err_tout_cleanup; } - err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0); + mlx5_tout_query_iseg(dev); + + err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_INIT), 0); if (err) { - mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n", - FW_INIT_TIMEOUT_MILI); + mlx5_core_err(dev, "Firmware over %llu MS in initializing state, aborting\n", + mlx5_tout_ms(dev, FW_INIT)); goto err_cmd_cleanup; } @@ -1062,6 +1065,8 @@ err_disable_hca: err_cmd_cleanup: mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); +err_tout_cleanup: + mlx5_tout_cleanup(dev); return err; } @@ -1080,6 +1085,7 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) mlx5_core_disable_hca(dev, 0); mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); + mlx5_tout_cleanup(dev); return 0; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ccbd87fbd3bf..fb06e8870aee 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -66,10 +66,6 @@ enum { }; enum { - /* one minute for the sake of bringup. Generally, commands must always - * complete and we may need to increase this timeout value - */ - MLX5_CMD_TIMEOUT_MSEC = 60 * 1000, MLX5_CMD_WQ_MAX_NAME = 32, }; @@ -755,6 +751,7 @@ struct mlx5_core_dev { u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; u8 embedded_cpu; } caps; + struct mlx5_timeouts *timeouts; u64 sys_image_guid; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; From 32def4120e4876b5367ad58eb3a641bf6915979b Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Wed, 13 Oct 2021 09:07:13 +0300 Subject: [PATCH 03/13] net/mlx5: Read timeout values from DTOR Replace hard coded timeouts with values stored by firmware in default timeouts register (DTOR). Timeouts are read during driver load. If DTOR is not supported by firmware then fallback to hard coded defaults instead. Signed-off-by: Amir Tzin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/health.h | 1 - .../mellanox/mlx5/core/en/reporter_rx.c | 7 +- .../mellanox/mlx5/core/en/reporter_tx.c | 7 +- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 9 +-- .../ethernet/mellanox/mlx5/core/fw_reset.c | 16 ++--- .../net/ethernet/mellanox/mlx5/core/health.c | 21 +++--- .../ethernet/mellanox/mlx5/core/lib/tout.c | 68 ++++++++++++++++++- .../ethernet/mellanox/mlx5/core/lib/tout.h | 13 ++++ .../net/ethernet/mellanox/mlx5/core/main.c | 6 ++ .../ethernet/mellanox/mlx5/core/pagealloc.c | 16 ++--- 10 files changed, 124 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index 018262d0164b..d5b7110a4265 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -32,7 +32,6 @@ void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); #define MLX5E_REPORTER_PER_Q_MAX_LEN 256 -#define MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC 2000 struct mlx5e_err_ctx { int (*recover)(void *ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 0eb125316fe2..74086eb556ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -6,6 +6,7 @@ #include "txrx.h" #include "devlink.h" #include "ptp.h" +#include "lib/tout.h" static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) { @@ -32,8 +33,10 @@ out: static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq) { - unsigned long exp_time = jiffies + - msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC); + struct mlx5_core_dev *dev = icosq->channel->mdev; + unsigned long exp_time; + + exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); while (time_before(jiffies, exp_time)) { if (icosq->cc == icosq->pc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index bb682fd751c9..4f4bc8726ec4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -4,11 +4,14 @@ #include "health.h" #include "en/ptp.h" #include "en/devlink.h" +#include "lib/tout.h" static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) { - unsigned long exp_time = jiffies + - msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC); + struct mlx5_core_dev *dev = sq->mdev; + unsigned long exp_time; + + exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); while (time_before(jiffies, exp_time)) { if (sq->cc == sq->pc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 016d26f809a5..f4f8993eac17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -35,6 +35,7 @@ #include #include "mlx5_core.h" #include "../../mlxfw/mlxfw.h" +#include "lib/tout.h" #include "accel/tls.h" enum { @@ -317,10 +318,9 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev) return 0; } -#define MLX5_FAST_TEARDOWN_WAIT_MS 3000 int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev) { - unsigned long end, delay_ms = MLX5_FAST_TEARDOWN_WAIT_MS; + unsigned long end, delay_ms = mlx5_tout_ms(dev, TEARDOWN); u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {}; u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {}; int state; @@ -618,17 +618,18 @@ static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) fwhandle, 0); } -#define MLX5_FSM_REACTIVATE_TOUT 5000 /* msecs */ static int mlx5_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, u8 *status) { - unsigned long exp_time = jiffies + msecs_to_jiffies(MLX5_FSM_REACTIVATE_TOUT); struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; u32 out[MLX5_ST_SZ_DW(mirc_reg)]; u32 in[MLX5_ST_SZ_DW(mirc_reg)]; + unsigned long exp_time; int err; + exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FSM_REACTIVATE)); + if (!MLX5_CAP_MCAM_REG2(dev, mirc)) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 106b50e42b46..eaca79cc7b9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -3,6 +3,7 @@ #include "fw_reset.h" #include "diag/fw_tracer.h" +#include "lib/tout.h" enum { MLX5_FW_RESET_FLAGS_RESET_REQUESTED, @@ -228,8 +229,6 @@ static void mlx5_sync_reset_request_event(struct work_struct *work) mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n"); } -#define MLX5_PCI_LINK_UP_TIMEOUT 2000 - static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) { struct pci_bus *bridge_bus = dev->pdev->bus; @@ -286,7 +285,7 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) goto restore; } - timeout = jiffies + msecs_to_jiffies(MLX5_PCI_LINK_UP_TIMEOUT); + timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, PCI_TOGGLE)); do { err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, ®16); if (err) @@ -299,8 +298,8 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) if (reg16 & PCI_EXP_LNKSTA_DLLLA) { mlx5_core_info(dev, "PCI Link up\n"); } else { - mlx5_core_err(dev, "PCI link not ready (0x%04x) after %d ms\n", - reg16, MLX5_PCI_LINK_UP_TIMEOUT); + mlx5_core_err(dev, "PCI link not ready (0x%04x) after %llu ms\n", + reg16, mlx5_tout_ms(dev, PCI_TOGGLE)); err = -ETIMEDOUT; } @@ -395,16 +394,15 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti return NOTIFY_OK; } -#define MLX5_FW_RESET_TIMEOUT_MSEC 5000 int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) { - unsigned long timeout = msecs_to_jiffies(MLX5_FW_RESET_TIMEOUT_MSEC); + unsigned long timeout = msecs_to_jiffies(mlx5_tout_ms(dev, PCI_SYNC_UPDATE)); struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; int err; if (!wait_for_completion_timeout(&fw_reset->done, timeout)) { - mlx5_core_warn(dev, "FW sync reset timeout after %d seconds\n", - MLX5_FW_RESET_TIMEOUT_MSEC / 1000); + mlx5_core_warn(dev, "FW sync reset timeout after %llu seconds\n", + mlx5_tout_ms(dev, PCI_SYNC_UPDATE) / 1000); err = -ETIMEDOUT; goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 037e18dd4be0..6a4dd7f78958 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -40,10 +40,10 @@ #include "lib/eq.h" #include "lib/mlx5.h" #include "lib/pci_vsc.h" +#include "lib/tout.h" #include "diag/fw_tracer.h" enum { - MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, MAX_MISSES = 3, }; @@ -219,11 +219,9 @@ unlock: mutex_unlock(&dev->intf_state_mutex); } -#define MLX5_CRDUMP_WAIT_MS 60000 -#define MLX5_FW_RESET_WAIT_MS 1000 void mlx5_error_sw_reset(struct mlx5_core_dev *dev) { - unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS; + unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE); int lock = -EBUSY; mutex_lock(&dev->intf_state_mutex); @@ -237,7 +235,7 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev) lock = lock_sem_sw_reset(dev, true); if (lock == -EBUSY) { - delay_ms = MLX5_CRDUMP_WAIT_MS; + delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP); goto recover_from_sw_reset; } /* Execute SW reset */ @@ -307,13 +305,11 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) mlx5_disable_device(dev); } -/* How much time to wait until health resetting the driver (in msecs) */ -#define MLX5_RECOVERY_WAIT_MSECS 60000 int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev) { unsigned long end; - end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS); + end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET)); while (sensor_pci_not_working(dev)) { if (time_after(jiffies, end)) return -ETIMEDOUT; @@ -674,13 +670,13 @@ static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev) devlink_health_reporter_destroy(health->fw_fatal_reporter); } -static unsigned long get_next_poll_jiffies(void) +static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev) { unsigned long next; get_random_bytes(&next, sizeof(next)); next %= HZ; - next += jiffies + MLX5_HEALTH_POLL_INTERVAL; + next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL)); return next; } @@ -740,11 +736,12 @@ static void poll_health(struct timer_list *t) queue_work(health->wq, &health->report_work); out: - mod_timer(&health->timer, get_next_poll_jiffies()); + mod_timer(&health->timer, get_next_poll_jiffies(dev)); } void mlx5_start_health_poll(struct mlx5_core_dev *dev) { + u64 poll_interval_ms = mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL); struct mlx5_core_health *health = &dev->priv.health; timer_setup(&health->timer, poll_health, 0); @@ -753,7 +750,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; - health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL); + health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms); add_timer(&health->timer); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c index ee266e0d122a..0dd96a6b140d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -13,7 +13,17 @@ static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = { [MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000, [MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2, [MLX5_TO_FW_INIT_MS] = 2000, - [MLX5_TO_CMD_MS] = 60000 + [MLX5_TO_CMD_MS] = 60000, + [MLX5_TO_PCI_TOGGLE_MS] = 2000, + [MLX5_TO_HEALTH_POLL_INTERVAL_MS] = 2000, + [MLX5_TO_FULL_CRDUMP_MS] = 60000, + [MLX5_TO_FW_RESET_MS] = 60000, + [MLX5_TO_FLUSH_ON_ERROR_MS] = 2000, + [MLX5_TO_PCI_SYNC_UPDATE_MS] = 5000, + [MLX5_TO_TEARDOWN_MS] = 3000, + [MLX5_TO_FSM_REACTIVATE_MS] = 5000, + [MLX5_TO_RECLAIM_PAGES_MS] = 5000, + [MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000 }; static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type) @@ -94,3 +104,59 @@ u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type) { return dev->timeouts->to[type]; } + +#define MLX5_TIMEOUT_QUERY(fld, reg_out) \ + ({ \ + struct mlx5_ifc_default_timeout_bits *time_field; \ + u32 to_multi, to_value; \ + u64 to_val_ms; \ + \ + time_field = MLX5_ADDR_OF(dtor_reg, reg_out, fld); \ + to_multi = MLX5_GET(default_timeout, time_field, to_multiplier); \ + to_value = MLX5_GET(default_timeout, time_field, to_value); \ + to_val_ms = tout_convert_reg_field_to_ms(to_multi, to_value); \ + to_val_ms; \ + }) + +#define MLX5_TIMEOUT_FILL(fld, reg_out, dev, to_type, to_extra) \ + ({ \ + u64 fw_to = MLX5_TIMEOUT_QUERY(fld, reg_out); \ + tout_set(dev, fw_to + (to_extra), to_type); \ + fw_to; \ + }) + +static int tout_query_dtor(struct mlx5_core_dev *dev) +{ + u64 pcie_toggle_to_val, tear_down_to_val; + u32 out[MLX5_ST_SZ_DW(dtor_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(dtor_reg)] = {}; + int err; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_DTOR, 0, 0); + if (err) + return err; + + pcie_toggle_to_val = MLX5_TIMEOUT_FILL(pcie_toggle_to, out, dev, MLX5_TO_PCI_TOGGLE_MS, 0); + MLX5_TIMEOUT_FILL(fw_reset_to, out, dev, MLX5_TO_FW_RESET_MS, pcie_toggle_to_val); + + tear_down_to_val = MLX5_TIMEOUT_FILL(tear_down_to, out, dev, MLX5_TO_TEARDOWN_MS, 0); + MLX5_TIMEOUT_FILL(pci_sync_update_to, out, dev, MLX5_TO_PCI_SYNC_UPDATE_MS, + tear_down_to_val); + + MLX5_TIMEOUT_FILL(health_poll_to, out, dev, MLX5_TO_HEALTH_POLL_INTERVAL_MS, 0); + MLX5_TIMEOUT_FILL(full_crdump_to, out, dev, MLX5_TO_FULL_CRDUMP_MS, 0); + MLX5_TIMEOUT_FILL(flush_on_err_to, out, dev, MLX5_TO_FLUSH_ON_ERROR_MS, 0); + MLX5_TIMEOUT_FILL(fsm_reactivate_to, out, dev, MLX5_TO_FSM_REACTIVATE_MS, 0); + MLX5_TIMEOUT_FILL(reclaim_pages_to, out, dev, MLX5_TO_RECLAIM_PAGES_MS, 0); + MLX5_TIMEOUT_FILL(reclaim_vfs_pages_to, out, dev, MLX5_TO_RECLAIM_VFS_PAGES_MS, 0); + + return 0; +} + +int mlx5_tout_query_dtor(struct mlx5_core_dev *dev) +{ + if (tout_is_supported(dev)) + return tout_query_dtor(dev); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h index 7e6fc61c5b45..31faa5c17aa9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -14,6 +14,18 @@ enum mlx5_timeouts_types { MLX5_TO_FW_INIT_MS, MLX5_TO_CMD_MS, + /* DTOR timeouts */ + MLX5_TO_PCI_TOGGLE_MS, + MLX5_TO_HEALTH_POLL_INTERVAL_MS, + MLX5_TO_FULL_CRDUMP_MS, + MLX5_TO_FW_RESET_MS, + MLX5_TO_FLUSH_ON_ERROR_MS, + MLX5_TO_PCI_SYNC_UPDATE_MS, + MLX5_TO_TEARDOWN_MS, + MLX5_TO_FSM_REACTIVATE_MS, + MLX5_TO_RECLAIM_PAGES_MS, + MLX5_TO_RECLAIM_VFS_PAGES_MS, + MAX_TIMEOUT_TYPES }; @@ -21,6 +33,7 @@ struct mlx5_core_dev; int mlx5_tout_init(struct mlx5_core_dev *dev); void mlx5_tout_cleanup(struct mlx5_core_dev *dev); void mlx5_tout_query_iseg(struct mlx5_core_dev *dev); +int mlx5_tout_query_dtor(struct mlx5_core_dev *dev); u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type); #define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b4893eac6ed6..75d284272119 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1020,6 +1020,12 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) goto err_disable_hca; } + err = mlx5_tout_query_dtor(dev); + if (err) { + mlx5_core_err(dev, "failed to read dtor\n"); + goto reclaim_boot_pages; + } + err = set_hca_ctrl(dev); if (err) { mlx5_core_err(dev, "set_hca_ctrl failed\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 110c0837f95b..f6b5451328fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -38,6 +38,7 @@ #include #include "mlx5_core.h" #include "lib/eq.h" +#include "lib/tout.h" enum { MLX5_PAGES_CANT_GIVE = 0, @@ -64,11 +65,6 @@ struct fw_page { unsigned int free_count; }; -enum { - MAX_RECLAIM_TIME_MSECS = 5000, - MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60, -}; - enum { MLX5_MAX_RECLAIM_TIME_MILI = 5000, MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, @@ -641,7 +637,8 @@ static int optimal_reclaimed_pages(void) static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev, struct rb_root *root, u16 func_id) { - unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); + u64 recl_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_PAGES)); + unsigned long end = jiffies + recl_pages_to_jiffies; while (!RB_EMPTY_ROOT(root)) { int nclaimed; @@ -656,7 +653,7 @@ static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev, } if (nclaimed) - end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); + end = jiffies + recl_pages_to_jiffies; if (time_after(jiffies, end)) { mlx5_core_warn(dev, "FW did not return all pages. giving up...\n"); @@ -727,7 +724,8 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) { - unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + u64 recl_vf_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_VFS_PAGES)); + unsigned long end = jiffies + recl_vf_pages_to_jiffies; int prev_pages = *pages; /* In case of internal error we will free the pages manually later */ @@ -743,7 +741,7 @@ int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) return -ETIMEDOUT; } if (*pages < prev_pages) { - end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + end = jiffies + recl_vf_pages_to_jiffies; prev_pages = *pages; } msleep(50); From 17ac528d886841a0b183f4d4a0205176eccfd158 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 12 Oct 2021 14:49:15 +0300 Subject: [PATCH 04/13] net/mlx5: Bridge, provide flow source hints Currently, SMFS mode doesn't support rx-loopback flows which causes bridge egress rules to be rejected because without hint rules for both rx and tx destinations are created by default. Provide explicit flow source hints for compatibility with SMFS. Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Reviewed-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index ed72246d1d83..588622ba38c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -677,6 +677,10 @@ mlx5_esw_bridge_egress_flow_create(u16 vport_num, u16 esw_owner_vhca_id, const u if (!rule_spec) return ERR_PTR(-ENOMEM); + if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) && + vport_num == MLX5_VPORT_UPLINK) + rule_spec->flow_context.flow_source = + MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, From 9fbe1c25ecca87f1c390d1bfd347df92749941c5 Mon Sep 17 00:00:00 2001 From: Moosa Baransi Date: Sun, 26 Sep 2021 17:59:52 +0300 Subject: [PATCH 05/13] net/mlx5i: Enable Rx steering for IPoIB via ethtool Enable steering IPoIB packets via ethtool, the same way it is done today for Ethernet packets. Signed-off-by: Moosa Baransi Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/fs.h | 8 ++--- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 6 ++-- .../mellanox/mlx5/core/en_fs_ethtool.c | 6 ++-- .../mellanox/mlx5/core/ipoib/ethtool.c | 30 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 3 ++ 5 files changed, 43 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 41684a6c44e9..1c23453a041d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -125,15 +125,15 @@ struct mlx5e_ethtool_steering { void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv); void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv); -int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd); -int mlx5e_ethtool_get_rxnfc(struct net_device *dev, +int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd); +int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, u32 *rule_locs); #else static inline void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) { } static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { } -static inline int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) { return -EOPNOTSUPP; } -static inline int mlx5e_ethtool_get_rxnfc(struct net_device *dev, +static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, u32 *rule_locs) { return -EOPNOTSUPP; } #endif /* CONFIG_MLX5_EN_RXNFC */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 7a97e0e21fd7..25926e581d18 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -2137,12 +2137,14 @@ int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, return 0; } - return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs); + return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs); } int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { - return mlx5e_ethtool_set_rxnfc(dev, cmd); + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_rxnfc(priv, cmd); } static int query_port_status_opcode(struct mlx5_core_dev *mdev, u32 *status_opcode) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 03693fa74a70..81ebf281cdb4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -937,9 +937,8 @@ static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, return 0; } -int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) { - struct mlx5e_priv *priv = netdev_priv(dev); int err = 0; switch (cmd->cmd) { @@ -960,10 +959,9 @@ int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) return err; } -int mlx5e_ethtool_get_rxnfc(struct net_device *dev, +int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, u32 *rule_locs) { - struct mlx5e_priv *priv = netdev_priv(dev); int err = 0; switch (info->cmd) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 0c8594c7df21..ee0eb4a4b819 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -33,6 +33,11 @@ #include "en.h" #include "ipoib.h" +static u32 mlx5i_flow_type_mask(u32 flow_type) +{ + return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); +} + static void mlx5i_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { @@ -217,6 +222,27 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev, return 0; } +#ifdef CONFIG_MLX5_EN_RXNFC +static int mlx5i_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct ethtool_rx_flow_spec *fs = &cmd->fs; + + if (mlx5i_flow_type_mask(fs->flow_type) == ETHER_FLOW) + return -EINVAL; + + return mlx5e_ethtool_set_rxnfc(priv, cmd); +} + +static int mlx5i_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, + u32 *rule_locs) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs); +} +#endif + const struct ethtool_ops mlx5i_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | @@ -233,6 +259,10 @@ const struct ethtool_ops mlx5i_ethtool_ops = { .get_coalesce = mlx5i_get_coalesce, .set_coalesce = mlx5i_set_coalesce, .get_ts_info = mlx5i_get_ts_info, +#ifdef CONFIG_MLX5_EN_RXNFC + .get_rxnfc = mlx5i_get_rxnfc, + .set_rxnfc = mlx5i_set_rxnfc, +#endif .get_link_ksettings = mlx5i_get_link_ksettings, .get_link = ethtool_op_get_link, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index f7ebc1f9283f..3b8d8ada1a01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -336,6 +336,8 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) goto err_destroy_arfs_tables; } + mlx5e_ethtool_init_steering(priv); + return 0; err_destroy_arfs_tables: @@ -348,6 +350,7 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) { mlx5e_destroy_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); + mlx5e_ethtool_cleanup_steering(priv); } static int mlx5i_init_rx(struct mlx5e_priv *priv) From fbfa97b4d79f26042f188b84959065213e9d3e99 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 18 Aug 2021 13:21:30 +0300 Subject: [PATCH 06/13] net/mlx5: Disable roce at HCA level Currently, when a user disables roce via the devlink param, this change isn't passed down to the device. If device allows disabling RoCE at device level, make use of it. This instructs the device to skip memory allocations related to RoCE functionality which otherwise is done by the device. Signed-off-by: Shay Drory Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 3 ++- .../net/ethernet/mellanox/mlx5/core/main.c | 25 ++++++++++++++++++- include/linux/mlx5/driver.h | 9 ++++--- include/linux/mlx5/mlx5_ifc.h | 3 ++- 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index a85341a41cd0..1c98652b244a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -454,7 +454,8 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, struct mlx5_core_dev *dev = devlink_priv(devlink); bool new_state = val.vbool; - if (new_state && !MLX5_CAP_GEN(dev, roce)) { + if (new_state && !MLX5_CAP_GEN(dev, roce) && + !MLX5_CAP_GEN(dev, roce_rw_supported)) { NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 75d284272119..47d92fb459ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -558,15 +558,38 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix, MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); + if (MLX5_CAP_GEN(dev, roce_rw_supported)) + MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev)); + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); } +/* Cached MLX5_CAP_GEN(dev, roce) can be out of sync this early in the + * boot process. + * In case RoCE cap is writable in FW and user/devlink requested to change the + * cap, we are yet to query the final state of the above cap. + * Hence, the need for this function. + * + * Returns + * True: + * 1) RoCE cap is read only in FW and already disabled + * OR: + * 2) RoCE cap is writable in FW and user/devlink requested it off. + * + * In any other case, return False. + */ +static bool is_roce_fw_disabled(struct mlx5_core_dev *dev) +{ + return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) || + (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce)); +} + static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx) { void *set_hca_cap; int err; - if (!MLX5_CAP_GEN(dev, roce)) + if (is_roce_fw_disabled(dev)) return 0; err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index fb06e8870aee..7c8b5f06c2cd 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1251,11 +1251,12 @@ static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev) { struct devlink *devlink = priv_to_devlink(dev); union devlink_param_value val; + int err; - devlink_param_driverinit_value_get(devlink, - DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, - &val); - return val.vbool; + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + &val); + return err ? MLX5_CAP_GEN(dev, roce) : val.vbool; } #endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b8bff5109656..c614ad1da44d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1588,7 +1588,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_tis_per_sq[0x5]; u8 ext_stride_num_range[0x1]; - u8 reserved_at_3a1[0x2]; + u8 roce_rw_supported[0x1]; + u8 reserved_at_3a2[0x1]; u8 log_max_stride_sz_rq[0x5]; u8 reserved_at_3a8[0x3]; u8 log_min_stride_sz_rq[0x5]; From 88594d83314ad06314c9743b1ec49d0a95a5d4c7 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 30 Sep 2021 14:23:32 +0300 Subject: [PATCH 07/13] net/mlx5: CT: Fix missing cleanup of ct nat table on init failure If CT fails to initialize it's rhashtables, it doesn't destroy the ct nat global table. Destroy the ct nat global table on ct init failure. Fixes: d7cade513752 ("net/mlx5e: check return value of rhashtable_init") Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 225748a9e52a..740cd6f088b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -2141,6 +2141,7 @@ err_ct_tuples_nat_ht: err_ct_tuples_ht: rhashtable_destroy(&ct_priv->zone_ht); err_ct_zone_ht: + mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); err_ct_nat_tbl: mlx5_chains_destroy_global_table(chains, ct_priv->ct); err_ct_tbl: From 0885ae1a9d34d946e12c1cf9834463ee3541a63a Mon Sep 17 00:00:00 2001 From: Abhiram R N Date: Wed, 22 Sep 2021 12:00:07 +0530 Subject: [PATCH 08/13] net/mlx5e: Add extack msgs related to TC for better debug As multiple places EOPNOTSUPP and EINVAL is returned from driver it becomes difficult to understand the reason only with error code. With the netlink extack message exact reason will be known and will aid in debugging. Signed-off-by: Abhiram R N Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 106 +++++++++++++----- 1 file changed, 76 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index d92ee2f37c22..420b3ec0eb04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1891,8 +1891,10 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, bool needs_mapping, sets_mapping; int err; - if (!mlx5e_is_eswitch_flow(flow)) + if (!mlx5e_is_eswitch_flow(flow)) { + NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported"); return -EOPNOTSUPP; + } needs_mapping = !!flow->attr->chain; sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f); @@ -2264,8 +2266,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, addr_type = match.key->addr_type; /* the HW doesn't support frag first/later */ - if (match.mask->flags & FLOW_DIS_FIRST_FRAG) + if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { + NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported"); return -EOPNOTSUPP; + } if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); @@ -2432,8 +2436,11 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, switch (ip_proto) { case IPPROTO_ICMP: if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & - MLX5_FLEX_PROTO_ICMP)) + MLX5_FLEX_PROTO_ICMP)) { + NL_SET_ERR_MSG_MOD(extack, + "Match on Flex protocols for ICMP is not supported"); return -EOPNOTSUPP; + } MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type, match.mask->type); MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type, @@ -2445,8 +2452,11 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, break; case IPPROTO_ICMPV6: if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & - MLX5_FLEX_PROTO_ICMPV6)) + MLX5_FLEX_PROTO_ICMPV6)) { + NL_SET_ERR_MSG_MOD(extack, + "Match on Flex protocols for ICMPV6 is not supported"); return -EOPNOTSUPP; + } MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type, match.mask->type); MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type, @@ -2552,15 +2562,19 @@ static int pedit_header_offsets[] = { #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, - struct pedit_headers_action *hdrs) + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) { u32 *curr_pmask, *curr_pval; curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); - if (*curr_pmask & mask) /* disallow acting twice on the same location */ + if (*curr_pmask & mask) { /* disallow acting twice on the same location */ + NL_SET_ERR_MSG_MOD(extack, + "curr_pmask and new mask same. Acting twice on same location"); goto out_err; + } *curr_pmask |= mask; *curr_pval |= (val & mask); @@ -2893,7 +2907,7 @@ parse_pedit_to_modify_hdr(struct mlx5e_priv *priv, val = act->mangle.val; offset = act->mangle.offset; - err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]); + err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack); if (err) goto out_err; @@ -2912,8 +2926,10 @@ parse_pedit_to_reformat(const struct flow_action_entry *act, u32 mask, val, offset; u32 *p; - if (act->id != FLOW_ACTION_MANGLE) + if (act->id != FLOW_ACTION_MANGLE) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported action id"); return -EOPNOTSUPP; + } if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) { NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported"); @@ -3429,12 +3445,16 @@ parse_tc_nic_actions(struct mlx5e_priv *priv, u32 action = 0; int err, i; - if (!flow_action_has_entries(flow_action)) + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); return -EINVAL; + } if (!flow_action_hw_stats_check(flow_action, extack, - FLOW_ACTION_HW_STATS_DELAYED_BIT)) + FLOW_ACTION_HW_STATS_DELAYED_BIT)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); return -EOPNOTSUPP; + } nic_attr = attr->nic_attr; nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; @@ -3524,7 +3544,8 @@ parse_tc_nic_actions(struct mlx5e_priv *priv, flow_flag_set(flow, CT); break; default: - NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); + NL_SET_ERR_MSG_MOD(extack, + "The offload action is not supported in NIC action"); return -EOPNOTSUPP; } } @@ -3562,19 +3583,25 @@ static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, static int parse_tc_vlan_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, - u32 *action) + u32 *action, + struct netlink_ext_ack *extack) { u8 vlan_idx = attr->total_vlan; - if (vlan_idx >= MLX5_FS_VLAN_DEPTH) + if (vlan_idx >= MLX5_FS_VLAN_DEPTH) { + NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported"); return -EOPNOTSUPP; + } switch (act->id) { case FLOW_ACTION_VLAN_POP: if (vlan_idx) { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, - MLX5_FS_VLAN_DEPTH)) + MLX5_FS_VLAN_DEPTH)) { + NL_SET_ERR_MSG_MOD(extack, + "vlan pop action is not supported"); return -EOPNOTSUPP; + } *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2; } else { @@ -3590,20 +3617,27 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, if (vlan_idx) { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, - MLX5_FS_VLAN_DEPTH)) + MLX5_FS_VLAN_DEPTH)) { + NL_SET_ERR_MSG_MOD(extack, + "vlan push action is not supported for vlan depth > 1"); return -EOPNOTSUPP; + } *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; } else { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && (act->vlan.proto != htons(ETH_P_8021Q) || - act->vlan.prio)) + act->vlan.prio)) { + NL_SET_ERR_MSG_MOD(extack, + "vlan push action is not supported"); return -EOPNOTSUPP; + } *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; } break; default: + NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN"); return -EINVAL; } @@ -3637,7 +3671,8 @@ static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev, static int add_vlan_push_action(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, struct net_device **out_dev, - u32 *action) + u32 *action, + struct netlink_ext_ack *extack) { struct net_device *vlan_dev = *out_dev; struct flow_action_entry vlan_act = { @@ -3648,7 +3683,7 @@ static int add_vlan_push_action(struct mlx5e_priv *priv, }; int err; - err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action); + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action, extack); if (err) return err; @@ -3659,14 +3694,15 @@ static int add_vlan_push_action(struct mlx5e_priv *priv, return -ENODEV; if (is_vlan_dev(*out_dev)) - err = add_vlan_push_action(priv, attr, out_dev, action); + err = add_vlan_push_action(priv, attr, out_dev, action, extack); return err; } static int add_vlan_pop_action(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, - u32 *action) + u32 *action, + struct netlink_ext_ack *extack) { struct flow_action_entry vlan_act = { .id = FLOW_ACTION_VLAN_POP, @@ -3676,7 +3712,7 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv, nest_level = attr->parse_attr->filter_dev->lower_level - priv->netdev->lower_level; while (nest_level--) { - err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action); + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action, extack); if (err) return err; } @@ -3798,12 +3834,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, int err, i, if_count = 0; bool mpls_push = false; - if (!flow_action_has_entries(flow_action)) + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); return -EINVAL; + } if (!flow_action_hw_stats_check(flow_action, extack, - FLOW_ACTION_HW_STATS_DELAYED_BIT)) + FLOW_ACTION_HW_STATS_DELAYED_BIT)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); return -EOPNOTSUPP; + } esw_attr = attr->esw_attr; parse_attr = attr->parse_attr; @@ -3952,14 +3992,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (is_vlan_dev(out_dev)) { err = add_vlan_push_action(priv, attr, &out_dev, - &action); + &action, extack); if (err) return err; } if (is_vlan_dev(parse_attr->filter_dev)) { err = add_vlan_pop_action(priv, attr, - &action); + &action, extack); if (err) return err; } @@ -4008,10 +4048,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, break; case FLOW_ACTION_TUNNEL_ENCAP: info = act->tunnel; - if (info) + if (info) { encap = true; - else + } else { + NL_SET_ERR_MSG_MOD(extack, + "Zero tunnel attributes is not supported"); return -EOPNOTSUPP; + } break; case FLOW_ACTION_VLAN_PUSH: @@ -4025,7 +4068,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, act, parse_attr, hdrs, &action, extack); } else { - err = parse_tc_vlan_action(priv, act, esw_attr, &action); + err = parse_tc_vlan_action(priv, act, esw_attr, &action, extack); } if (err) return err; @@ -4079,7 +4122,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, flow_flag_set(flow, SAMPLE); break; default: - NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); + NL_SET_ERR_MSG_MOD(extack, + "The offload action is not supported in FDB action"); return -EOPNOTSUPP; } } @@ -4753,8 +4797,10 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (!flow_action_basic_hw_stats_check(flow_action, extack)) + if (!flow_action_basic_hw_stats_check(flow_action, extack)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); return -EOPNOTSUPP; + } flow_action_for_each(i, act, flow_action) { switch (act->id) { From 0e6f3ef469bbf69ea6840aa4d15dcfc8ce978760 Mon Sep 17 00:00:00 2001 From: Len Baker Date: Sun, 5 Sep 2021 09:49:36 +0200 Subject: [PATCH 09/13] net/mlx5: DR, Prefer kcalloc over open coded arithmetic As noted in the "Deprecated Interfaces, Language Features, Attributes, and Conventions" documentation [1], size calculations (especially multiplication) should not be performed in memory allocator (or similar) function arguments due to the risk of them overflowing. This could lead to values wrapping around and a smaller allocation being made than the caller was expecting. Using those allocations could lead to linear overflows of heap memory and other misbehaviors. So, refactor the code a bit to use the purpose specific kcalloc() function instead of the argument size * count in the kzalloc() function. [1] https://www.kernel.org/doc/html/v5.14/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments Signed-off-by: Len Baker Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 50630112c8ff..07936841ce99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -854,6 +854,7 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, struct mlx5dr_action *action; bool reformat_req = false; u32 num_of_ref = 0; + u32 ref_act_cnt; int ret; int i; @@ -862,11 +863,14 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, return NULL; } - hw_dests = kzalloc(sizeof(*hw_dests) * num_of_dests, GFP_KERNEL); + hw_dests = kcalloc(num_of_dests, sizeof(*hw_dests), GFP_KERNEL); if (!hw_dests) return NULL; - ref_actions = kzalloc(sizeof(*ref_actions) * num_of_dests * 2, GFP_KERNEL); + if (unlikely(check_mul_overflow(num_of_dests, 2u, &ref_act_cnt))) + goto free_hw_dests; + + ref_actions = kcalloc(ref_act_cnt, sizeof(*ref_actions), GFP_KERNEL); if (!ref_actions) goto free_hw_dests; From 7b1b6d35f045d677f34f8085ac02827fe4080d7e Mon Sep 17 00:00:00 2001 From: Rongwei Liu Date: Fri, 8 Oct 2021 09:02:39 +0300 Subject: [PATCH 10/13] net/mlx5: Check return status first when querying system_image_guid When querying system_image_guid from firmware, we should check return value first. The buffer content is valid only if query succeed. Signed-off-by: Rongwei Liu Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/vport.c | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 4c1440a95ad7..8846d30a380a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -421,19 +421,21 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, { u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; - mlx5_query_nic_vport_context(mdev, 0, out); + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (err) + goto out; *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out, nic_vport_context.system_image_guid); - +out: kvfree(out); - - return 0; + return err; } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid); @@ -1133,19 +1135,20 @@ EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) { int port_type_cap = MLX5_CAP_GEN(mdev, port_type); - u64 tmp = 0; + u64 tmp; + int err; if (mdev->sys_image_guid) return mdev->sys_image_guid; if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH) - mlx5_query_nic_vport_system_image_guid(mdev, &tmp); + err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); else - mlx5_query_hca_vport_system_image_guid(mdev, &tmp); + err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); - mdev->sys_image_guid = tmp; + mdev->sys_image_guid = err ? 0 : tmp; - return tmp; + return mdev->sys_image_guid; } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); From 2ec16ddde1fa31a83aee04320b248e94348d9152 Mon Sep 17 00:00:00 2001 From: Rongwei Liu Date: Thu, 16 Sep 2021 10:46:17 +0300 Subject: [PATCH 11/13] net/mlx5: Introduce new device index wrapper Downstream patches. Signed-off-by: Rongwei Liu Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c | 2 +- include/linux/mlx5/driver.h | 5 +++++ 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index 86e079310ac3..ae52e7f38306 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -24,7 +24,7 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv) if (mlx5_core_is_pf(priv->mdev)) { attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = PCI_FUNC(priv->mdev->pdev->devfn); + attrs.phys.port_number = mlx5_get_dev_index(priv->mdev); if (MLX5_ESWITCH_MANAGER(priv->mdev)) { mlx5e_devlink_get_port_parent_id(priv->mdev, &ppid); memcpy(attrs.switch_id.id, ppid.id, ppid.id_len); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 20af557ae30c..7f9b96d9537e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -36,7 +36,7 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 return NULL; mlx5_esw_get_port_parent_id(dev, &ppid); - pfnum = PCI_FUNC(dev->pdev->devfn); + pfnum = mlx5_get_dev_index(dev); external = mlx5_core_is_ecpf_esw_manager(dev); if (external) controller_num = dev->priv.eswitch->offloads.host_number + 1; @@ -149,7 +149,7 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p if (IS_ERR(vport)) return PTR_ERR(vport); - pfnum = PCI_FUNC(dev->pdev->devfn); + pfnum = mlx5_get_dev_index(dev); mlx5_esw_get_port_parent_id(dev, &ppid); memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len); dl_port->attrs.switch_id.id_len = ppid.id_len; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index ca7e31a1a431..3e5a7d74020b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2798,7 +2798,7 @@ u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) int id; /* Only 4 bits of pf_num */ - pf_num = PCI_FUNC(esw->dev->pdev->devfn); + pf_num = mlx5_get_dev_index(esw->dev); if (pf_num > max_pf_num) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index ca5690b0a7ab..f35c8ba48aac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -688,7 +688,7 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev, struct net_device *netdev) { - unsigned int fn = PCI_FUNC(dev->pdev->devfn); + unsigned int fn = mlx5_get_dev_index(dev); if (fn >= MLX5_MAX_PORTS) return; @@ -718,7 +718,7 @@ static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev) { - unsigned int fn = PCI_FUNC(dev->pdev->devfn); + unsigned int fn = mlx5_get_dev_index(dev); if (fn >= MLX5_MAX_PORTS) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 13891fdc607e..e1bb3acf45e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -323,7 +323,7 @@ mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_ NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported"); return -EOPNOTSUPP; } - if (new_attr->pfnum != PCI_FUNC(dev->pdev->devfn)) { + if (new_attr->pfnum != mlx5_get_dev_index(dev)) { NL_SET_ERR_MSG_MOD(extack, "Invalid pfnum supplied"); return -EOPNOTSUPP; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7c8b5f06c2cd..aecc38b90de5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1243,6 +1243,11 @@ static inline int mlx5_core_native_port_num(struct mlx5_core_dev *dev) return MLX5_CAP_GEN(dev, native_port_num); } +static inline int mlx5_get_dev_index(struct mlx5_core_dev *dev) +{ + return PCI_FUNC(dev->pdev->devfn); +} + enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; From 1021d0645d593ea86193c5fc371e33e5b208e14d Mon Sep 17 00:00:00 2001 From: Rongwei Liu Date: Tue, 12 Oct 2021 10:40:52 +0300 Subject: [PATCH 12/13] net/mlx5: Use native_port_num as 1st option of device index Using "native_port_num" can support more NICs. Fallback to PCIe IDs if "native_port_num" query fails. Signed-off-by: Rongwei Liu Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index aecc38b90de5..cf508685abca 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1245,7 +1245,12 @@ static inline int mlx5_core_native_port_num(struct mlx5_core_dev *dev) static inline int mlx5_get_dev_index(struct mlx5_core_dev *dev) { - return PCI_FUNC(dev->pdev->devfn); + int idx = MLX5_CAP_GEN(dev, native_port_num); + + if (idx >= 1 && idx <= MLX5_MAX_PORTS) + return idx - 1; + else + return PCI_FUNC(dev->pdev->devfn); } enum { From 8a543184d79c83d0887c25cf202a43559ba39583 Mon Sep 17 00:00:00 2001 From: Rongwei Liu Date: Tue, 12 Oct 2021 10:53:00 +0300 Subject: [PATCH 13/13] net/mlx5: Use system_image_guid to determine bonding With specific NICs, the PFs may have different PCIe ids like 0001:01:00.0/1 and 0002:02:00:00/1. For PFs with the same system_image_guid, driver should consider them under the same physical NIC and they are legal to bond together. If firmware doesn't support system_image_guid, set it to zero and fallback to use PCIe ids. Signed-off-by: Rongwei Liu Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index e8093c4e09d4..a8b84d53dfb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "mlx5_core.h" /* intf dev list mutex */ @@ -537,6 +538,16 @@ int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev) return add_drivers(dev); } +static bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev) +{ + u64 fsystem_guid, psystem_guid; + + fsystem_guid = mlx5_query_nic_system_image_guid(dev); + psystem_guid = mlx5_query_nic_system_image_guid(peer_dev); + + return (fsystem_guid && psystem_guid && fsystem_guid == psystem_guid); +} + static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev) { return (u32)((pci_domain_nr(dev->pdev->bus) << 16) | @@ -556,7 +567,8 @@ static int next_phys_dev(struct device *dev, const void *data) if (mdev == curr) return 0; - if (mlx5_gen_pci_id(mdev) != mlx5_gen_pci_id(curr)) + if (!mlx5_same_hw_devs(mdev, (struct mlx5_core_dev *)curr) && + mlx5_gen_pci_id(mdev) != mlx5_gen_pci_id(curr)) return 0; return 1;