From fe785f56ad5886c08d1cadd9e8b4e1ff6a1866f6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Nov 2021 15:21:02 +0100 Subject: [PATCH 01/85] iwlwifi: pcie: fix constant-conversion warning Both gcc-11 and clang point out a potential issue with integer overflow when the iwl_dev_info_table[] array is empty. This is what clang warns: drivers/net/wireless/intel/iwlwifi/pcie/drv.c:1344:42: error: implicit conversion from 'unsigned long' to 'int' changes value from 18446744073709551615 to -1 [-Werror,-Wconstant-conversion] for (i = ARRAY_SIZE(iwl_dev_info_table) - 1; i >= 0; i--) { ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~ This is still harmless, as the loop correctly terminates, but adding an extra range check makes that obvious to both readers and to the compiler. Fixes: 3f7320428fa4 ("iwlwifi: pcie: simplify iwl_pci_find_dev_info()") Reported-by: kernel test robot Cc: Nick Desaulniers Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211118142124.526901-1-arnd@kernel.org --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index c574f041f096..395e328c6a07 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1339,9 +1339,13 @@ iwl_pci_find_dev_info(u16 device, u16 subsystem_device, u16 mac_type, u8 mac_step, u16 rf_type, u8 cdb, u8 rf_id, u8 no_160, u8 cores) { + int num_devices = ARRAY_SIZE(iwl_dev_info_table); int i; - for (i = ARRAY_SIZE(iwl_dev_info_table) - 1; i >= 0; i--) { + if (!num_devices) + return NULL; + + for (i = num_devices - 1; i >= 0; i--) { const struct iwl_dev_info *dev_info = &iwl_dev_info_table[i]; if (dev_info->device != (u16)IWL_CFG_ANY && From 1b54403c9cc444b6e0ade1f441efdf1270877ace Mon Sep 17 00:00:00 2001 From: chongjiapeng Date: Tue, 2 Nov 2021 15:38:47 +0800 Subject: [PATCH 02/85] iwlwifi: Fix missing error code in iwl_pci_probe() The error code is missing in this code scenario, add the error code '-EINVAL' to the return value 'ret'. Eliminate the follow smatch warning: drivers/net/wireless/intel/iwlwifi/pcie/drv.c:1376 iwl_pci_probe() warn: missing error code 'ret'. Reported-by: Abaci Robot Fixes: 1f171f4f1437 ("iwlwifi: Add support for getting rf id with blank otp") Signed-off-by: chongjiapeng Acked-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1635838727-128735-1-git-send-email-jiapeng.chong@linux.alibaba.com --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 395e328c6a07..5ce07f28e7c3 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1446,8 +1446,10 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) */ if (iwl_trans->trans_cfg->rf_id && iwl_trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_9000 && - !CSR_HW_RFID_TYPE(iwl_trans->hw_rf_id) && get_crf_id(iwl_trans)) + !CSR_HW_RFID_TYPE(iwl_trans->hw_rf_id) && get_crf_id(iwl_trans)) { + ret = -EINVAL; goto out_free_trans; + } dev_info = iwl_pci_find_dev_info(pdev->device, pdev->subsystem_device, CSR_HW_REV_TYPE(iwl_trans->hw_rev), From 5283dd677e52af9db6fe6ad11b2f12220d519d0c Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Wed, 10 Nov 2021 15:01:59 +0200 Subject: [PATCH 03/85] iwlwifi: mvm: retry init flow if failed In some very rare cases the init flow may fail. In many cases, this is recoverable, so we can retry. Implement a loop to retry two more times after the first attempt failed. This can happen in two different situations, namely during probe and during mac80211 start. For the first case, a simple loop is enough. For the second case, we need to add a flag to prevent mac80211 from trying to restart it as well, leaving full control with the driver. Cc: Signed-off-by: Mordechay Goodstein Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20211110150132.57514296ecab.I52a0411774b700bdc7dedb124d8b59bf99456eb2@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 22 +++++++++++------ drivers/net/wireless/intel/iwlwifi/iwl-drv.h | 3 +++ .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 24 ++++++++++++++++++- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 +++ drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 3 +++ 5 files changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 36196e07b1a0..5cec467b995b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -1313,23 +1313,31 @@ _iwl_op_mode_start(struct iwl_drv *drv, struct iwlwifi_opmode_table *op) const struct iwl_op_mode_ops *ops = op->ops; struct dentry *dbgfs_dir = NULL; struct iwl_op_mode *op_mode = NULL; + int retry, max_retry = !!iwlwifi_mod_params.fw_restart * IWL_MAX_INIT_RETRY; + + for (retry = 0; retry <= max_retry; retry++) { #ifdef CONFIG_IWLWIFI_DEBUGFS - drv->dbgfs_op_mode = debugfs_create_dir(op->name, - drv->dbgfs_drv); - dbgfs_dir = drv->dbgfs_op_mode; + drv->dbgfs_op_mode = debugfs_create_dir(op->name, + drv->dbgfs_drv); + dbgfs_dir = drv->dbgfs_op_mode; #endif - op_mode = ops->start(drv->trans, drv->trans->cfg, &drv->fw, dbgfs_dir); + op_mode = ops->start(drv->trans, drv->trans->cfg, + &drv->fw, dbgfs_dir); + + if (op_mode) + return op_mode; + + IWL_ERR(drv, "retry init count %d\n", retry); #ifdef CONFIG_IWLWIFI_DEBUGFS - if (!op_mode) { debugfs_remove_recursive(drv->dbgfs_op_mode); drv->dbgfs_op_mode = NULL; - } #endif + } - return op_mode; + return NULL; } static void _iwl_op_mode_stop(struct iwl_drv *drv) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h index 2e2d60a58692..0fd009e6d685 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h @@ -89,4 +89,7 @@ void iwl_drv_stop(struct iwl_drv *drv); #define IWL_EXPORT_SYMBOL(sym) #endif +/* max retry for init flow */ +#define IWL_MAX_INIT_RETRY 2 + #endif /* __iwl_drv_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 9fb9c7dad314..897e3b91ddb2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -16,6 +16,7 @@ #include #include +#include "iwl-drv.h" #include "iwl-op-mode.h" #include "iwl-io.h" #include "mvm.h" @@ -1117,9 +1118,30 @@ static int iwl_mvm_mac_start(struct ieee80211_hw *hw) { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); int ret; + int retry, max_retry = 0; mutex_lock(&mvm->mutex); - ret = __iwl_mvm_mac_start(mvm); + + /* we are starting the mac not in error flow, and restart is enabled */ + if (!test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status) && + iwlwifi_mod_params.fw_restart) { + max_retry = IWL_MAX_INIT_RETRY; + /* + * This will prevent mac80211 recovery flows to trigger during + * init failures + */ + set_bit(IWL_MVM_STATUS_STARTING, &mvm->status); + } + + for (retry = 0; retry <= max_retry; retry++) { + ret = __iwl_mvm_mac_start(mvm); + if (!ret) + break; + + IWL_ERR(mvm, "mac start retry %d\n", retry); + } + clear_bit(IWL_MVM_STATUS_STARTING, &mvm->status); + mutex_unlock(&mvm->mutex); return ret; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 2b1dcd60e00f..a72d85086fe3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1123,6 +1123,8 @@ struct iwl_mvm { * @IWL_MVM_STATUS_FIRMWARE_RUNNING: firmware is running * @IWL_MVM_STATUS_NEED_FLUSH_P2P: need to flush P2P bcast STA * @IWL_MVM_STATUS_IN_D3: in D3 (or at least about to go into it) + * @IWL_MVM_STATUS_STARTING: starting mac, + * used to disable restart flow while in STARTING state */ enum iwl_mvm_status { IWL_MVM_STATUS_HW_RFKILL, @@ -1134,6 +1136,7 @@ enum iwl_mvm_status { IWL_MVM_STATUS_FIRMWARE_RUNNING, IWL_MVM_STATUS_NEED_FLUSH_P2P, IWL_MVM_STATUS_IN_D3, + IWL_MVM_STATUS_STARTING, }; /* Keep track of completed init configuration */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 232ad531d612..ce7160670aa7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1600,6 +1600,9 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error) */ if (!mvm->fw_restart && fw_error) { iwl_fw_error_collect(&mvm->fwrt, false); + } else if (test_bit(IWL_MVM_STATUS_STARTING, + &mvm->status)) { + IWL_ERR(mvm, "Starting mac, retry will be triggered anyway\n"); } else if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) { struct iwl_mvm_reprobe *reprobe; From f5cecf1d4c5ff76172928bc32e99ca56a5ca2f56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Bartosik?= Date: Wed, 10 Nov 2021 22:57:44 +0100 Subject: [PATCH 04/85] iwlwifi: fix warnings produced by kernel debug options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix warnings produced by: - lockdep_assert_wiphy() in function reg_process_self_managed_hint(), - wiphy_dereference() in function iwl_mvm_init_fw_regd(). Both function are expected to be called in critical section. The warnings were discovered when running v5.15 kernel with debug options enabled: 1) Hardware name: Google Delbin/Delbin RIP: 0010:reg_process_self_managed_hint+0x254/0x347 [cfg80211] ... Call Trace: regulatory_set_wiphy_regd_sync+0x3d/0xb0 iwl_mvm_init_mcc+0x49d/0x5a2 iwl_op_mode_mvm_start+0x1b58/0x2507 ? iwl_mvm_reprobe_wk+0x94/0x94 _iwl_op_mode_start+0x146/0x1a3 iwl_opmode_register+0xda/0x13d init_module+0x28/0x1000 2) drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c:263 suspicious rcu_dereference_protected() usage! ... Hardware name: Google Delbin/Delbin, BIOS Google_Delbin Call Trace: dump_stack_lvl+0xb1/0xe6 iwl_mvm_init_fw_regd+0x2e7/0x379 iwl_mvm_init_mcc+0x2c6/0x5a2 iwl_op_mode_mvm_start+0x1b58/0x2507 ? iwl_mvm_reprobe_wk+0x94/0x94 _iwl_op_mode_start+0x146/0x1a3 iwl_opmode_register+0xda/0x13d init_module+0x28/0x100 Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Signed-off-by: Łukasz Bartosik Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211110215744.5487-1-lukasz.bartosik@semihalf.com --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index ce7160670aa7..cd08e289cd9a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -686,6 +686,7 @@ static int iwl_mvm_start_get_nvm(struct iwl_mvm *mvm) int ret; rtnl_lock(); + wiphy_lock(mvm->hw->wiphy); mutex_lock(&mvm->mutex); ret = iwl_run_init_mvm_ucode(mvm); @@ -701,6 +702,7 @@ static int iwl_mvm_start_get_nvm(struct iwl_mvm *mvm) iwl_mvm_stop_device(mvm); mutex_unlock(&mvm->mutex); + wiphy_unlock(mvm->hw->wiphy); rtnl_unlock(); if (ret < 0) From a571bc28326d9f3e13f5f2d9cda2883e0631b0ce Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 11 Nov 2021 08:23:11 +0100 Subject: [PATCH 05/85] iwlwifi: Fix memory leaks in error handling path Should an error occur (invalid TLV len or memory allocation failure), the memory already allocated in 'reduce_power_data' should be freed before returning, otherwise it is leaking. Fixes: 9dad325f9d57 ("iwlwifi: support loading the reduced power table from UEFI") Signed-off-by: Christophe JAILLET Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1504cd7d842d13ddb8244e18004523128d5c9523.1636615284.git.christophe.jaillet@wanadoo.fr --- drivers/net/wireless/intel/iwlwifi/fw/uefi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c index c875bf35533c..009dd4be597b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c @@ -86,6 +86,7 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans, if (len < tlv_len) { IWL_ERR(trans, "invalid TLV len: %zd/%u\n", len, tlv_len); + kfree(reduce_power_data); reduce_power_data = ERR_PTR(-EINVAL); goto out; } @@ -105,6 +106,7 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans, IWL_DEBUG_FW(trans, "Couldn't allocate (more) reduce_power_data\n"); + kfree(reduce_power_data); reduce_power_data = ERR_PTR(-ENOMEM); goto out; } @@ -134,6 +136,10 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans, done: if (!size) { IWL_DEBUG_FW(trans, "Empty REDUCE_POWER, skipping.\n"); + /* Better safe than sorry, but 'reduce_power_data' should + * always be NULL if !size. + */ + kfree(reduce_power_data); reduce_power_data = ERR_PTR(-ENOENT); goto out; } From 5737b4515deea0829c138ab5201160345ec67d49 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 19 Nov 2021 13:45:10 +0800 Subject: [PATCH 06/85] rtw89: update partition size of firmware header on skb->data The partition size is used to tell hardware the size of piece we are going to send a firmware. The old code updates the size in constant buffer of firmware, and leads system crash. To fix this, update the size on skb->data after we copy the firmware data into skb. Buglink: https://bugzilla.opensuse.org/show_bug.cgi?id=1188303 Fixes: e3ec7017f6a2 ("rtw89: add Realtek 802.11ax driver") Reported-by: Takashi Iwai Signed-off-by: Ping-Ke Shih Tested-by: Takashi Iwai Tested-by: Larry Finger Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211119054512.10620-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 2 +- drivers/net/wireless/realtek/rtw89/fw.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 212aaf577d3c..65ef3dc9d061 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -91,7 +91,6 @@ static int rtw89_fw_hdr_parser(struct rtw89_dev *rtwdev, const u8 *fw, u32 len, info->section_num = GET_FW_HDR_SEC_NUM(fw); info->hdr_len = RTW89_FW_HDR_SIZE + info->section_num * RTW89_FW_SECTION_HDR_SIZE; - SET_FW_HDR_PART_SIZE(fw, FWDL_SECTION_PER_PKT_LEN); bin = fw + info->hdr_len; @@ -275,6 +274,7 @@ static int __rtw89_fw_download_hdr(struct rtw89_dev *rtwdev, const u8 *fw, u32 l } skb_put_data(skb, fw, len); + SET_FW_HDR_PART_SIZE(skb->data, FWDL_SECTION_PER_PKT_LEN); rtw89_h2c_pkt_set_hdr_fwdl(rtwdev, skb, FWCMD_TYPE_H2C, H2C_CAT_MAC, H2C_CL_MAC_FWDL, H2C_FUNC_MAC_FWHDR_DL, len); diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index 7ee0d9323310..36e8d0da6c1e 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -282,8 +282,10 @@ struct rtw89_h2creg_sch_tx_en { le32_get_bits(*((__le32 *)(fwhdr) + 6), GENMASK(15, 8)) #define GET_FW_HDR_CMD_VERSERION(fwhdr) \ le32_get_bits(*((__le32 *)(fwhdr) + 7), GENMASK(31, 24)) -#define SET_FW_HDR_PART_SIZE(fwhdr, val) \ - le32p_replace_bits((__le32 *)(fwhdr) + 7, val, GENMASK(15, 0)) +static inline void SET_FW_HDR_PART_SIZE(void *fwhdr, u32 val) +{ + le32p_replace_bits((__le32 *)fwhdr + 7, val, GENMASK(15, 0)); +} #define SET_CTRL_INFO_MACID(table, val) \ le32p_replace_bits((__le32 *)(table) + 0, val, GENMASK(6, 0)) From 6e53d6d26920d5221d3f4d4f5ffdd629ea69aa5c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 18 Nov 2021 13:47:48 +0100 Subject: [PATCH 07/85] mt76: mt7915: fix NULL pointer dereference in mt7915_get_phy_mode Fix the following NULL pointer dereference in mt7915_get_phy_mode routine adding an ibss interface to the mt7915 driver. [ 101.137097] wlan0: Trigger new scan to find an IBSS to join [ 102.827039] wlan0: Creating new IBSS network, BSSID 26:a4:50:1a:6e:69 [ 103.064756] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 103.073670] Mem abort info: [ 103.076520] ESR = 0x96000005 [ 103.079614] EC = 0x25: DABT (current EL), IL = 32 bits [ 103.084934] SET = 0, FnV = 0 [ 103.088042] EA = 0, S1PTW = 0 [ 103.091215] Data abort info: [ 103.094104] ISV = 0, ISS = 0x00000005 [ 103.098041] CM = 0, WnR = 0 [ 103.101044] user pgtable: 4k pages, 39-bit VAs, pgdp=00000000460b1000 [ 103.107565] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 [ 103.116590] Internal error: Oops: 96000005 [#1] SMP [ 103.189066] CPU: 1 PID: 333 Comm: kworker/u4:3 Not tainted 5.10.75 #0 [ 103.195498] Hardware name: MediaTek MT7622 RFB1 board (DT) [ 103.201124] Workqueue: phy0 ieee80211_iface_work [mac80211] [ 103.206695] pstate: 20000005 (nzCv daif -PAN -UAO -TCO BTYPE=--) [ 103.212705] pc : mt7915_get_phy_mode+0x68/0x120 [mt7915e] [ 103.218103] lr : mt7915_mcu_add_bss_info+0x11c/0x760 [mt7915e] [ 103.223927] sp : ffffffc011cdb9e0 [ 103.227235] x29: ffffffc011cdb9e0 x28: ffffff8006563098 [ 103.232545] x27: ffffff8005f4da22 x26: ffffff800685ac40 [ 103.237855] x25: 0000000000000001 x24: 000000000000011f [ 103.243165] x23: ffffff8005f4e260 x22: ffffff8006567918 [ 103.248475] x21: ffffff8005f4df80 x20: ffffff800685ac58 [ 103.253785] x19: ffffff8006744400 x18: 0000000000000000 [ 103.259094] x17: 0000000000000000 x16: 0000000000000001 [ 103.264403] x15: 000899c3a2d9d2e4 x14: 000899bdc3c3a1c8 [ 103.269713] x13: 0000000000000000 x12: 0000000000000000 [ 103.275024] x11: ffffffc010e30c20 x10: 0000000000000000 [ 103.280333] x9 : 0000000000000050 x8 : ffffff8006567d88 [ 103.285642] x7 : ffffff8006563b5c x6 : ffffff8006563b44 [ 103.290952] x5 : 0000000000000002 x4 : 0000000000000001 [ 103.296262] x3 : 0000000000000001 x2 : 0000000000000001 [ 103.301572] x1 : 0000000000000000 x0 : 0000000000000011 [ 103.306882] Call trace: [ 103.309328] mt7915_get_phy_mode+0x68/0x120 [mt7915e] [ 103.314378] mt7915_bss_info_changed+0x198/0x200 [mt7915e] [ 103.319941] ieee80211_bss_info_change_notify+0x128/0x290 [mac80211] [ 103.326360] __ieee80211_sta_join_ibss+0x308/0x6c4 [mac80211] [ 103.332171] ieee80211_sta_create_ibss+0x8c/0x10c [mac80211] [ 103.337895] ieee80211_ibss_work+0x3dc/0x614 [mac80211] [ 103.343185] ieee80211_iface_work+0x388/0x3f0 [mac80211] [ 103.348495] process_one_work+0x288/0x690 [ 103.352499] worker_thread+0x70/0x464 [ 103.356157] kthread+0x144/0x150 [ 103.359380] ret_from_fork+0x10/0x18 [ 103.362952] Code: 394008c3 52800220 394000e4 7100007f (39400023) Fixes: 37f4ca907c46 ("mt76: mt7915: register per-phy HE capabilities for each interface") Fixes: e57b7901469f ("mt76: add mac80211 driver for MT7915 PCIe-based chipsets") Signed-off-by: Lorenzo Bianconi Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/ddae419a740f1fb9e48afd432035e9f394f512ee.1637239456.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 899957b9d0f1..852d5d97c70b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -176,7 +176,7 @@ mt7915_get_phy_mode(struct ieee80211_vif *vif, struct ieee80211_sta *sta) if (ht_cap->ht_supported) mode |= PHY_MODE_GN; - if (he_cap->has_he) + if (he_cap && he_cap->has_he) mode |= PHY_MODE_AX_24G; } else if (band == NL80211_BAND_5GHZ) { mode |= PHY_MODE_A; @@ -187,7 +187,7 @@ mt7915_get_phy_mode(struct ieee80211_vif *vif, struct ieee80211_sta *sta) if (vht_cap->vht_supported) mode |= PHY_MODE_AC; - if (he_cap->has_he) + if (he_cap && he_cap->has_he) mode |= PHY_MODE_AX_5G; } From ebb75b1b43d3e2bafc4d33eb4b1ae9c8d2759771 Mon Sep 17 00:00:00 2001 From: Deren Wu Date: Mon, 22 Nov 2021 23:10:27 +0800 Subject: [PATCH 08/85] mt76: fix timestamp check in tx_status Should keep SKBs only if timeout timestamp is still after jiffies. Otherwise, report tx status and drop it direclty. Fixes: bd1e3e7b693c ("mt76: introduce packet_id idr") Signed-off-by: Deren Wu Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/7e3784949c0b29a00465966b89fdb0192bd0298e.1637593492.git.deren.wu@mediatek.com --- drivers/net/wireless/mediatek/mt76/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 11719ef034d8..6b8c9dc80542 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -173,7 +173,7 @@ mt76_tx_status_skb_get(struct mt76_dev *dev, struct mt76_wcid *wcid, int pktid, if (!(cb->flags & MT_TX_CB_DMA_DONE)) continue; - if (!time_is_after_jiffies(cb->jiffies + + if (time_is_after_jiffies(cb->jiffies + MT_TX_STATUS_SKB_TIMEOUT)) continue; } From 2a9e9857473bfc5721092ff274bc1e371e5a0d2f Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 22 Nov 2021 18:34:03 +0100 Subject: [PATCH 09/85] mt76: fix possible pktid leak Fix a possible idr pkt-id leak if the packet is dropped on tx side Fixes: bd1e3e7b693c ("mt76: introduce packet_id idr") Signed-off-by: Lorenzo Bianconi Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/a560caffcc24452fb48af53904bbe5c45ea5db93.1637602268.git.lorenzo@kernel.org --- .../wireless/mediatek/mt76/mt7615/pci_mac.c | 3 +-- .../wireless/mediatek/mt76/mt7615/usb_sdio.c | 23 +++++++++++-------- .../wireless/mediatek/mt76/mt76x02_usb_core.c | 8 ++++++- .../net/wireless/mediatek/mt76/mt7915/mac.c | 15 ++++++------ .../wireless/mediatek/mt76/mt7921/sdio_mac.c | 16 ++++++++----- 5 files changed, 38 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c index 5ee52cd70a4b..d1806f198aed 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c @@ -143,8 +143,6 @@ int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, if (!wcid) wcid = &dev->mt76.global_wcid; - pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb); - if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && msta) { struct mt7615_phy *phy = &dev->phy; @@ -164,6 +162,7 @@ int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, if (id < 0) return id; + pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb); mt7615_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, sta, pid, key, false); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c index bd2939ebcbf4..bfe6c1579dc1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c @@ -43,17 +43,11 @@ EXPORT_SYMBOL_GPL(mt7663_usb_sdio_reg_map); static void mt7663_usb_sdio_write_txwi(struct mt7615_dev *dev, struct mt76_wcid *wcid, enum mt76_txq_id qid, struct ieee80211_sta *sta, - struct sk_buff *skb) + int pid, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_key_conf *key = info->control.hw_key; __le32 *txwi; - int pid; - - if (!wcid) - wcid = &dev->mt76.global_wcid; - - pid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb); txwi = (__le32 *)(skb->data - MT_USB_TXD_SIZE); memset(txwi, 0, MT_USB_TXD_SIZE); @@ -195,9 +189,12 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, struct sk_buff *skb = tx_info->skb; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct mt7615_sta *msta; - int pad; + int pad, err, pktid; msta = wcid ? container_of(wcid, struct mt7615_sta, wcid) : NULL; + if (!wcid) + wcid = &dev->mt76.global_wcid; + if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && msta && !msta->rate_probe) { /* request to configure sampling rate */ @@ -207,7 +204,8 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, spin_unlock_bh(&dev->mt76.lock); } - mt7663_usb_sdio_write_txwi(dev, wcid, qid, sta, skb); + pktid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb); + mt7663_usb_sdio_write_txwi(dev, wcid, qid, sta, pktid, skb); if (mt76_is_usb(mdev)) { u32 len = skb->len; @@ -217,7 +215,12 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, pad = round_up(skb->len, 4) - skb->len; } - return mt76_skb_adjust_pad(skb, pad); + err = mt76_skb_adjust_pad(skb, pad); + if (err) + /* Release pktid in case of error. */ + idr_remove(&wcid->pktid, pktid); + + return err; } EXPORT_SYMBOL_GPL(mt7663_usb_sdio_tx_prepare_skb); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c index efd70ddc2fd1..2c6c03809b20 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c @@ -72,6 +72,7 @@ int mt76x02u_tx_prepare_skb(struct mt76_dev *mdev, void *data, bool ampdu = IEEE80211_SKB_CB(tx_info->skb)->flags & IEEE80211_TX_CTL_AMPDU; enum mt76_qsel qsel; u32 flags; + int err; mt76_insert_hdr_pad(tx_info->skb); @@ -106,7 +107,12 @@ int mt76x02u_tx_prepare_skb(struct mt76_dev *mdev, void *data, ewma_pktlen_add(&msta->pktlen, tx_info->skb->len); } - return mt76x02u_skb_dma_info(tx_info->skb, WLAN_PORT, flags); + err = mt76x02u_skb_dma_info(tx_info->skb, WLAN_PORT, flags); + if (err && wcid) + /* Release pktid in case of error. */ + idr_remove(&wcid->pktid, pid); + + return err; } EXPORT_SYMBOL_GPL(mt76x02u_tx_prepare_skb); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 5fcf35f2d9fb..809dc18e5083 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -1151,8 +1151,14 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, } } - pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb); + t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size); + t->skb = tx_info->skb; + id = mt76_token_consume(mdev, &t); + if (id < 0) + return id; + + pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb); mt7915_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, pid, key, false); @@ -1178,13 +1184,6 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, txp->bss_idx = mvif->idx; } - t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size); - t->skb = tx_info->skb; - - id = mt76_token_consume(mdev, &t); - if (id < 0) - return id; - txp->token = cpu_to_le16(id); if (test_bit(MT_WCID_FLAG_4ADDR, &wcid->flags)) txp->rept_wds_wcid = cpu_to_le16(wcid->idx); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c index 137f86a6dbf8..85b3d88f8ecc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c @@ -142,14 +142,12 @@ out: static void mt7921s_write_txwi(struct mt7921_dev *dev, struct mt76_wcid *wcid, enum mt76_txq_id qid, struct ieee80211_sta *sta, - struct sk_buff *skb) + int pid, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_key_conf *key = info->control.hw_key; __le32 *txwi; - int pid; - pid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb); txwi = (__le32 *)(skb->data - MT_SDIO_TXD_SIZE); memset(txwi, 0, MT_SDIO_TXD_SIZE); mt7921_mac_write_txwi(dev, txwi, skb, wcid, key, pid, false); @@ -164,7 +162,7 @@ int mt7921s_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb); struct sk_buff *skb = tx_info->skb; - int pad; + int err, pad, pktid; if (unlikely(tx_info->skb->len <= ETH_HLEN)) return -EINVAL; @@ -181,12 +179,18 @@ int mt7921s_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, } } - mt7921s_write_txwi(dev, wcid, qid, sta, skb); + pktid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb); + mt7921s_write_txwi(dev, wcid, qid, sta, pktid, skb); mt7921_skb_add_sdio_hdr(skb, MT7921_SDIO_DATA); pad = round_up(skb->len, 4) - skb->len; - return mt76_skb_adjust_pad(skb, pad); + err = mt76_skb_adjust_pad(skb, pad); + if (err) + /* Release pktid in case of error. */ + idr_remove(&wcid->pktid, pktid); + + return err; } void mt7921s_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e) From ed53ae75693096f1c10b4561edd31a07b631bd72 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 11 Nov 2021 15:10:03 +0100 Subject: [PATCH 10/85] rt2x00: do not mark device gone on EPROTO errors during start As reported by Exuvo is possible that we have lot's of EPROTO errors during device start i.e. firmware load. But after that device works correctly. Hence marking device gone by few EPROTO errors done by commit e383c70474db ("rt2x00: check number of EPROTO errors") caused regression - Exuvo device stop working after kernel update. To fix disable the check during device start. Link: https://lore.kernel.org/linux-wireless/bff7d309-a816-6a75-51b6-5928ef4f7a8c@exuvo.se/ Reported-and-tested-by: Exuvo Fixes: e383c70474db ("rt2x00: check number of EPROTO errors") Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211111141003.GA134627@wp.pl --- drivers/net/wireless/ralink/rt2x00/rt2x00usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c index e4473a551241..74c3d8cb3100 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c @@ -25,6 +25,9 @@ static bool rt2x00usb_check_usb_error(struct rt2x00_dev *rt2x00dev, int status) if (status == -ENODEV || status == -ENOENT) return true; + if (!test_bit(DEVICE_STATE_STARTED, &rt2x00dev->flags)) + return false; + if (status == -EPROTO || status == -ETIMEDOUT) rt2x00dev->num_proto_errs++; else From f8e7dfd6fdabb831846ab1970a875746559d491b Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 26 Nov 2021 16:51:15 +0100 Subject: [PATCH 11/85] net: stmmac: Avoid DMA_CHAN_CONTROL write if no Split Header support The driver assumes that split headers can be enabled/disabled without stopping/starting the device, so it writes DMA_CHAN_CONTROL from stmmac_set_features(). However, on my system (IP v5.10a without Split Header support), simply writing DMA_CHAN_CONTROL when DMA is running (for example, with the commands below) leads to a TX watchdog timeout. host$ socat TCP-LISTEN:1024,fork,reuseaddr - & device$ ethtool -K eth0 tso off device$ ethtool -K eth0 tso on device$ dd if=/dev/zero bs=1M count=10 | socat - TCP4:host:1024 Note that since my IP is configured without Split Header support, the driver always just reads and writes the same value to the DMA_CHAN_CONTROL register. I don't have access to any platforms with Split Header support so I don't know if these writes to the DMA_CHAN_CONTROL while DMA is running actually work properly on such systems. I could not find anything in the databook that says that DMA_CHAN_CONTROL should not be written when the DMA is running. But on systems without Split Header support, there is in any case no need to call enable_sph() in stmmac_set_features() at all since SPH can never be toggled, so we can avoid the watchdog timeout there by skipping this call. Fixes: 8c6fc097a2f4acf ("net: stmmac: gmac4+: Add Split Header support") Signed-off-by: Vincent Whitchurch Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 748195697e5a..da8306f60730 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5540,8 +5540,6 @@ static int stmmac_set_features(struct net_device *netdev, netdev_features_t features) { struct stmmac_priv *priv = netdev_priv(netdev); - bool sph_en; - u32 chan; /* Keep the COE Type in case of csum is supporting */ if (features & NETIF_F_RXCSUM) @@ -5553,10 +5551,13 @@ static int stmmac_set_features(struct net_device *netdev, */ stmmac_rx_ipc(priv, priv->hw); - sph_en = (priv->hw->rx_csum > 0) && priv->sph; + if (priv->sph_cap) { + bool sph_en = (priv->hw->rx_csum > 0) && priv->sph; + u32 chan; - for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++) - stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan); + for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++) + stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan); + } return 0; } From dacb5d8875cc6cd3a553363b4d6f06760fcbe70c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 26 Nov 2021 19:34:21 +0100 Subject: [PATCH 12/85] tcp: fix page frag corruption on page fault Steffen reported a TCP stream corruption for HTTP requests served by the apache web-server using a cifs mount-point and memory mapping the relevant file. The root cause is quite similar to the one addressed by commit 20eb4f29b602 ("net: fix sk_page_frag() recursion from memory reclaim"). Here the nested access to the task page frag is caused by a page fault on the (mmapped) user-space memory buffer coming from the cifs file. The page fault handler performs an smb transaction on a different socket, inside the same process context. Since sk->sk_allaction for such socket does not prevent the usage for the task_frag, the nested allocation modify "under the hood" the page frag in use by the outer sendmsg call, corrupting the stream. The overall relevant stack trace looks like the following: httpd 78268 [001] 3461630.850950: probe:tcp_sendmsg_locked: ffffffff91461d91 tcp_sendmsg_locked+0x1 ffffffff91462b57 tcp_sendmsg+0x27 ffffffff9139814e sock_sendmsg+0x3e ffffffffc06dfe1d smb_send_kvec+0x28 [...] ffffffffc06cfaf8 cifs_readpages+0x213 ffffffff90e83c4b read_pages+0x6b ffffffff90e83f31 __do_page_cache_readahead+0x1c1 ffffffff90e79e98 filemap_fault+0x788 ffffffff90eb0458 __do_fault+0x38 ffffffff90eb5280 do_fault+0x1a0 ffffffff90eb7c84 __handle_mm_fault+0x4d4 ffffffff90eb8093 handle_mm_fault+0xc3 ffffffff90c74f6d __do_page_fault+0x1ed ffffffff90c75277 do_page_fault+0x37 ffffffff9160111e page_fault+0x1e ffffffff9109e7b5 copyin+0x25 ffffffff9109eb40 _copy_from_iter_full+0xe0 ffffffff91462370 tcp_sendmsg_locked+0x5e0 ffffffff91462370 tcp_sendmsg_locked+0x5e0 ffffffff91462b57 tcp_sendmsg+0x27 ffffffff9139815c sock_sendmsg+0x4c ffffffff913981f7 sock_write_iter+0x97 ffffffff90f2cc56 do_iter_readv_writev+0x156 ffffffff90f2dff0 do_iter_write+0x80 ffffffff90f2e1c3 vfs_writev+0xa3 ffffffff90f2e27c do_writev+0x5c ffffffff90c042bb do_syscall_64+0x5b ffffffff916000ad entry_SYSCALL_64_after_hwframe+0x65 The cifs filesystem rightfully sets sk_allocations to GFP_NOFS, we can avoid the nesting using the sk page frag for allocation lacking the __GFP_FS flag. Do not define an additional mm-helper for that, as this is strictly tied to the sk page frag usage. v1 -> v2: - use a stricted sk_page_frag() check instead of reordering the code (Eric) Reported-by: Steffen Froemer Fixes: 5640f7685831 ("net: use a per task frag allocator") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index b32906e1ab55..715cdb4b2b79 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2430,19 +2430,22 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) * @sk: socket * * Use the per task page_frag instead of the per socket one for - * optimization when we know that we're in the normal context and owns + * optimization when we know that we're in process context and own * everything that's associated with %current. * - * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest - * inside other socket operations and end up recursing into sk_page_frag() - * while it's already in use. + * Both direct reclaim and page faults can nest inside other + * socket operations and end up recursing into sk_page_frag() + * while it's already in use: explicitly avoid task page_frag + * usage if the caller is potentially doing any of them. + * This assumes that page fault handlers use the GFP_NOFS flags. * * Return: a per task page_frag if context allows that, * otherwise a per socket one. */ static inline struct page_frag *sk_page_frag(struct sock *sk) { - if (gfpflags_normal_context(sk->sk_allocation)) + if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) == + (__GFP_DIRECT_RECLAIM | __GFP_FS)) return ¤t->task_frag; return &sk->sk_frag; From 1e89ad864d035001835ccf02acea7b1d3dc41819 Mon Sep 17 00:00:00 2001 From: Luiz Angelo Daros de Luca Date: Fri, 26 Nov 2021 17:13:55 -0300 Subject: [PATCH 13/85] net: dsa: realtek-smi: fix indirect reg access for ports>3 This switch family can have up to 8 UTP ports {0..7}. However, INDIRECT_ACCESS_ADDRESS_PHYNUM_MASK was using 2 bits instead of 3, dropping the most significant bit during indirect register reads and writes. Reading or writing ports 4, 5, 6, and 7 registers was actually manipulating, respectively, ports 0, 1, 2, and 3 registers. This is not sufficient but necessary to support any variant with more than 4 UTP ports, like RTL8367S. rtl8365mb_phy_{read,write} will now returns -EINVAL if phy is greater than 7. Fixes: 4af2950c50c8 ("net: dsa: realtek-smi: add rtl8365mb subdriver for RTL8365MB-VC") Signed-off-by: Luiz Angelo Daros de Luca Signed-off-by: David S. Miller --- drivers/net/dsa/rtl8365mb.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/rtl8365mb.c b/drivers/net/dsa/rtl8365mb.c index baaae97283c5..078ca4cd7160 100644 --- a/drivers/net/dsa/rtl8365mb.c +++ b/drivers/net/dsa/rtl8365mb.c @@ -107,6 +107,7 @@ #define RTL8365MB_LEARN_LIMIT_MAX_8365MB_VC 2112 /* Family-specific data and limits */ +#define RTL8365MB_PHYADDRMAX 7 #define RTL8365MB_NUM_PHYREGS 32 #define RTL8365MB_PHYREGMAX (RTL8365MB_NUM_PHYREGS - 1) #define RTL8365MB_MAX_NUM_PORTS (RTL8365MB_CPU_PORT_NUM_8365MB_VC + 1) @@ -176,7 +177,7 @@ #define RTL8365MB_INDIRECT_ACCESS_STATUS_REG 0x1F01 #define RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG 0x1F02 #define RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_5_1_MASK GENMASK(4, 0) -#define RTL8365MB_INDIRECT_ACCESS_ADDRESS_PHYNUM_MASK GENMASK(6, 5) +#define RTL8365MB_INDIRECT_ACCESS_ADDRESS_PHYNUM_MASK GENMASK(7, 5) #define RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_9_6_MASK GENMASK(11, 8) #define RTL8365MB_PHY_BASE 0x2000 #define RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG 0x1F03 @@ -679,6 +680,9 @@ static int rtl8365mb_phy_read(struct realtek_smi *smi, int phy, int regnum) u16 val; int ret; + if (phy > RTL8365MB_PHYADDRMAX) + return -EINVAL; + if (regnum > RTL8365MB_PHYREGMAX) return -EINVAL; @@ -704,6 +708,9 @@ static int rtl8365mb_phy_write(struct realtek_smi *smi, int phy, int regnum, u32 ocp_addr; int ret; + if (phy > RTL8365MB_PHYADDRMAX) + return -EINVAL; + if (regnum > RTL8365MB_PHYREGMAX) return -EINVAL; From 49989adc38f8693fb6e9f019904dd00c1d1db5ac Mon Sep 17 00:00:00 2001 From: Ole Ernst Date: Sat, 27 Nov 2021 10:05:45 +0100 Subject: [PATCH 14/85] USB: NO_LPM quirk Lenovo Powered USB-C Travel Hub This is another branded 8153 device that doesn't work well with LPM: r8152 2-2.1:1.0 enp0s13f0u2u1: Stop submitting intr, status -71 Disable LPM to resolve the issue. Signed-off-by: Ole Ernst Signed-off-by: David S. Miller --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 8239fe7129dd..019351c0b52c 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -434,6 +434,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Lenovo Powered USB-C Travel Hub (4X90S92381, RTL8153 GigE) */ + { USB_DEVICE(0x17ef, 0x721e), .driver_info = USB_QUIRK_NO_LPM }, + /* Lenovo ThinkCenter A630Z TI024Gen3 usb-audio */ { USB_DEVICE(0x17ef, 0xa012), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, From 817b653160db9852d5a0498a31f047e18ce27e5b Mon Sep 17 00:00:00 2001 From: Sven Schuchmann Date: Sat, 27 Nov 2021 11:47:07 +0100 Subject: [PATCH 15/85] net: usb: lan78xx: lan78xx_phy_init(): use PHY_POLL instead of "0" if no IRQ is available On most systems request for IRQ 0 will fail, phylib will print an error message and fall back to polling. To fix this set the phydev->irq to PHY_POLL if no IRQ is available. Fixes: cc89c323a30e ("lan78xx: Use irq_domain for phy interrupt from USB Int. EP") Reviewed-by: Andrew Lunn Signed-off-by: Sven Schuchmann Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index f20376c1ef3f..8cd265fc1fd9 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2228,7 +2228,7 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) if (dev->domain_data.phyirq > 0) phydev->irq = dev->domain_data.phyirq; else - phydev->irq = 0; + phydev->irq = PHY_POLL; netdev_dbg(dev->net, "phydev->irq = %d\n", phydev->irq); /* set to AUTOMDIX */ From 7d4741eacdefa5f0475431645b56baf00784df1f Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 29 Nov 2021 15:15:05 +0900 Subject: [PATCH 16/85] net: mpls: Fix notifications when deleting a device There are various problems related to netlink notifications for mpls route changes in response to interfaces being deleted: * delete interface of only nexthop DELROUTE notification is missing RTA_OIF attribute * delete interface of non-last nexthop NEWROUTE notification is missing entirely * delete interface of last nexthop DELROUTE notification is missing nexthop All of these problems stem from the fact that existing routes are modified in-place before sending a notification. Restructure mpls_ifdown() to avoid changing the route in the DELROUTE cases and to create a copy in the NEWROUTE case. Fixes: f8efb73c97e2 ("mpls: multipath route support") Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 68 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index ffeb2df8be7a..6e587feb705c 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1491,22 +1491,52 @@ static void mpls_dev_destroy_rcu(struct rcu_head *head) kfree(mdev); } -static void mpls_ifdown(struct net_device *dev, int event) +static int mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - u8 alive, deleted; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { struct mpls_route *rt = rtnl_dereference(platform_label[index]); + bool nh_del = false; + u8 alive = 0; if (!rt) continue; - alive = 0; - deleted = 0; + if (event == NETDEV_UNREGISTER) { + u8 deleted = 0; + + for_nexthops(rt) { + struct net_device *nh_dev = + rtnl_dereference(nh->nh_dev); + + if (!nh_dev || nh_dev == dev) + deleted++; + if (nh_dev == dev) + nh_del = true; + } endfor_nexthops(rt); + + /* if there are no more nexthops, delete the route */ + if (deleted == rt->rt_nhn) { + mpls_route_update(net, index, NULL, NULL); + continue; + } + + if (nh_del) { + size_t size = sizeof(*rt) + rt->rt_nhn * + rt->rt_nh_size; + struct mpls_route *orig = rt; + + rt = kmalloc(size, GFP_KERNEL); + if (!rt) + return -ENOMEM; + memcpy(rt, orig, size); + } + } + change_nexthops(rt) { unsigned int nh_flags = nh->nh_flags; @@ -1530,16 +1560,15 @@ static void mpls_ifdown(struct net_device *dev, int event) next: if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))) alive++; - if (!rtnl_dereference(nh->nh_dev)) - deleted++; } endfor_nexthops(rt); WRITE_ONCE(rt->rt_nhn_alive, alive); - /* if there are no more nexthops, delete the route */ - if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn) - mpls_route_update(net, index, NULL, NULL); + if (nh_del) + mpls_route_update(net, index, rt, NULL); } + + return 0; } static void mpls_ifup(struct net_device *dev, unsigned int flags) @@ -1597,8 +1626,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, return NOTIFY_OK; switch (event) { + int err; + case NETDEV_DOWN: - mpls_ifdown(dev, event); + err = mpls_ifdown(dev, event); + if (err) + return notifier_from_errno(err); break; case NETDEV_UP: flags = dev_get_flags(dev); @@ -1609,13 +1642,18 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, break; case NETDEV_CHANGE: flags = dev_get_flags(dev); - if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) { mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); - else - mpls_ifdown(dev, event); + } else { + err = mpls_ifdown(dev, event); + if (err) + return notifier_from_errno(err); + } break; case NETDEV_UNREGISTER: - mpls_ifdown(dev, event); + err = mpls_ifdown(dev, event); + if (err) + return notifier_from_errno(err); mdev = mpls_dev_get(dev); if (mdev) { mpls_dev_sysctl_unregister(dev, mdev); @@ -1626,8 +1664,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, case NETDEV_CHANGENAME: mdev = mpls_dev_get(dev); if (mdev) { - int err; - mpls_dev_sysctl_unregister(dev, mdev); err = mpls_dev_sysctl_register(dev, mdev); if (err) From 189168181bb67825a14e8083d1503cfdc2891ebf Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 29 Nov 2021 15:15:06 +0900 Subject: [PATCH 17/85] net: mpls: Remove rcu protection from nh_dev Following the previous commit, nh_dev can no longer be accessed and modified concurrently. Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 39 +++++++++++++++------------------------ net/mpls/internal.h | 2 +- 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 6e587feb705c..0c7bde1c14a6 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -409,7 +409,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, goto err; /* Find the output device */ - out_dev = rcu_dereference(nh->nh_dev); + out_dev = nh->nh_dev; if (!mpls_output_possible(out_dev)) goto tx_err; @@ -698,7 +698,7 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, (dev->addr_len != nh->nh_via_alen)) goto errout; - RCU_INIT_POINTER(nh->nh_dev, dev); + nh->nh_dev = dev; if (!(dev->flags & IFF_UP)) { nh->nh_flags |= RTNH_F_DEAD; @@ -1510,12 +1510,9 @@ static int mpls_ifdown(struct net_device *dev, int event) u8 deleted = 0; for_nexthops(rt) { - struct net_device *nh_dev = - rtnl_dereference(nh->nh_dev); - - if (!nh_dev || nh_dev == dev) + if (!nh->nh_dev || nh->nh_dev == dev) deleted++; - if (nh_dev == dev) + if (nh->nh_dev == dev) nh_del = true; } endfor_nexthops(rt); @@ -1540,7 +1537,7 @@ static int mpls_ifdown(struct net_device *dev, int event) change_nexthops(rt) { unsigned int nh_flags = nh->nh_flags; - if (rtnl_dereference(nh->nh_dev) != dev) + if (nh->nh_dev != dev) goto next; switch (event) { @@ -1553,7 +1550,7 @@ static int mpls_ifdown(struct net_device *dev, int event) break; } if (event == NETDEV_UNREGISTER) - RCU_INIT_POINTER(nh->nh_dev, NULL); + nh->nh_dev = NULL; if (nh->nh_flags != nh_flags) WRITE_ONCE(nh->nh_flags, nh_flags); @@ -1588,14 +1585,12 @@ static void mpls_ifup(struct net_device *dev, unsigned int flags) alive = 0; change_nexthops(rt) { unsigned int nh_flags = nh->nh_flags; - struct net_device *nh_dev = - rtnl_dereference(nh->nh_dev); if (!(nh_flags & flags)) { alive++; continue; } - if (nh_dev != dev) + if (nh->nh_dev != dev) continue; alive++; nh_flags &= ~flags; @@ -2030,7 +2025,7 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), nh->nh_via_alen)) goto nla_put_failure; - dev = rtnl_dereference(nh->nh_dev); + dev = nh->nh_dev; if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; if (nh->nh_flags & RTNH_F_LINKDOWN) @@ -2048,7 +2043,7 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; for_nexthops(rt) { - dev = rtnl_dereference(nh->nh_dev); + dev = nh->nh_dev; if (!dev) continue; @@ -2159,18 +2154,14 @@ static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, static bool mpls_rt_uses_dev(struct mpls_route *rt, const struct net_device *dev) { - struct net_device *nh_dev; - if (rt->rt_nhn == 1) { struct mpls_nh *nh = rt->rt_nh; - nh_dev = rtnl_dereference(nh->nh_dev); - if (dev == nh_dev) + if (nh->nh_dev == dev) return true; } else { for_nexthops(rt) { - nh_dev = rtnl_dereference(nh->nh_dev); - if (nh_dev == dev) + if (nh->nh_dev == dev) return true; } endfor_nexthops(rt); } @@ -2258,7 +2249,7 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt) size_t nhsize = 0; for_nexthops(rt) { - if (!rtnl_dereference(nh->nh_dev)) + if (!nh->nh_dev) continue; nhsize += nla_total_size(sizeof(struct rtnexthop)); /* RTA_VIA */ @@ -2504,7 +2495,7 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), nh->nh_via_alen)) goto nla_put_failure; - dev = rtnl_dereference(nh->nh_dev); + dev = nh->nh_dev; if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; @@ -2543,7 +2534,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) rt0 = mpls_rt_alloc(1, lo->addr_len, 0); if (IS_ERR(rt0)) goto nort0; - RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); + rt0->rt_nh->nh_dev = lo; rt0->rt_protocol = RTPROT_KERNEL; rt0->rt_payload_type = MPT_IPV4; rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; @@ -2557,7 +2548,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) rt2 = mpls_rt_alloc(1, lo->addr_len, 0); if (IS_ERR(rt2)) goto nort2; - RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); + rt2->rt_nh->nh_dev = lo; rt2->rt_protocol = RTPROT_KERNEL; rt2->rt_payload_type = MPT_IPV6; rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 838cdfc10e47..893df00b77b6 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -87,7 +87,7 @@ enum mpls_payload_type { }; struct mpls_nh { /* next hop label forwarding entry */ - struct net_device __rcu *nh_dev; + struct net_device *nh_dev; /* nh_flags is accessed under RCU in the packet path; it is * modified handling netdev events with rtnl lock held From 5961060692f8b17cd2080620a3d27b95d2ae05ca Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 29 Nov 2021 17:32:12 +0800 Subject: [PATCH 18/85] net/tls: Fix authentication failure in CCM mode When the TLS cipher suite uses CCM mode, including AES CCM and SM4 CCM, the first byte of the B0 block is flags, and the real IV starts from the second byte. The XOR operation of the IV and rec_seq should be skip this byte, that is, add the iv_offset. Fixes: f295b3ae9f59 ("net/tls: Add support of AES128-CCM based ciphers") Signed-off-by: Tianjia Zhang Cc: Vakul Garg Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d3e7ff90889e..dfe623a4e72f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -521,7 +521,7 @@ static int tls_do_encryption(struct sock *sk, memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv, prot->iv_size + prot->salt_size); - xor_iv_with_seq(prot, rec->iv_data, tls_ctx->tx.rec_seq); + xor_iv_with_seq(prot, rec->iv_data + iv_offset, tls_ctx->tx.rec_seq); sge->offset += prot->prepend_size; sge->length -= prot->prepend_size; @@ -1499,7 +1499,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, else memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size); - xor_iv_with_seq(prot, iv, tls_ctx->rx.rec_seq); + xor_iv_with_seq(prot, iv + iv_offset, tls_ctx->rx.rec_seq); /* Prepare AAD */ tls_make_aad(aad, rxm->full_len - prot->overhead_size + From d8519565447078f141c58ba4193d820f2cdf1914 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 29 Nov 2021 10:16:52 +0800 Subject: [PATCH 19/85] mctp: test: fix skb free in test device tx In our test device, we're currently freeing skbs in the transmit path with kfree(), rather than kfree_skb(). This change uses the correct kfree_skb() instead. Fixes: ded21b722995 ("mctp: Add test utils") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- net/mctp/test/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index cc6b8803aa9d..7b7918702592 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c @@ -12,7 +12,7 @@ static netdev_tx_t mctp_test_dev_tx(struct sk_buff *skb, struct net_device *ndev) { - kfree(skb); + kfree_skb(skb); return NETDEV_TX_OK; } From 2191b1dfef7d45f44b5008d2148676d9f2c82874 Mon Sep 17 00:00:00 2001 From: Erik Ekman Date: Sun, 28 Nov 2021 13:37:11 +0100 Subject: [PATCH 20/85] net/mlx4_en: Update reported link modes for 1/10G When link modes were initially added in commit 2c762679435dc ("net/mlx4_en: Use PTYS register to query ethtool settings") and later updated for the new ethtool API in commit 3d8f7cc78d0eb ("net: mlx4: use new ETHTOOL_G/SSETTINGS API") the only 1/10G non-baseT link modes configured were 1000baseKX, 10000baseKX4 and 10000baseKR. It looks like these got picked to represent other modes since nothing better was available. Switch to using more specific link modes added in commit 5711a98221443 ("net: ethtool: add support for 1000BaseX and missing 10G link modes"). Tested with MCX311A-XCAT connected via DAC. Before: % sudo ethtool enp3s0 Settings for enp3s0: Supported ports: [ FIBRE ] Supported link modes: 1000baseKX/Full 10000baseKR/Full Supported pause frame use: Symmetric Receive-only Supports auto-negotiation: No Supported FEC modes: Not reported Advertised link modes: 1000baseKX/Full 10000baseKR/Full Advertised pause frame use: Symmetric Advertised auto-negotiation: No Advertised FEC modes: Not reported Speed: 10000Mb/s Duplex: Full Auto-negotiation: off Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Supports Wake-on: d Wake-on: d Current message level: 0x00000014 (20) link ifdown Link detected: yes With this change: % sudo ethtool enp3s0 Settings for enp3s0: Supported ports: [ FIBRE ] Supported link modes: 1000baseX/Full 10000baseCR/Full 10000baseSR/Full Supported pause frame use: Symmetric Receive-only Supports auto-negotiation: No Supported FEC modes: Not reported Advertised link modes: 1000baseX/Full 10000baseCR/Full 10000baseSR/Full Advertised pause frame use: Symmetric Advertised auto-negotiation: No Advertised FEC modes: Not reported Speed: 10000Mb/s Duplex: Full Auto-negotiation: off Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Supports Wake-on: d Wake-on: d Current message level: 0x00000014 (20) link ifdown Link detected: yes Tested-by: Michael Stapelberg Signed-off-by: Erik Ekman Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 066d79e4ecfc..10238bedd694 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -670,7 +670,7 @@ void __init mlx4_en_init_ptys2ethtool_map(void) MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_T, SPEED_1000, ETHTOOL_LINK_MODE_1000baseT_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_CX_SGMII, SPEED_1000, - ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + ETHTOOL_LINK_MODE_1000baseX_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_KX, SPEED_1000, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_T, SPEED_10000, @@ -682,9 +682,9 @@ void __init mlx4_en_init_ptys2ethtool_map(void) MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_KR, SPEED_10000, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_CR, SPEED_10000, - ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_SR, SPEED_10000, - ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_20GBASE_KR2, SPEED_20000, ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT, ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); From aa1dcb5646fdf34a15763facf4bf5e482a2814ca Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Mon, 29 Nov 2021 05:28:23 -0800 Subject: [PATCH 21/85] atlantic: Increase delay for fw transactions The max waiting period (of 1 ms) while reading the data from FW shared buffer is too small for certain types of data (e.g., stats). There's a chance that FW could be updating buffer at the same time and driver would be unsuccessful in reading data. Firmware manual recommends to have 1 sec timeout to fix this issue. Fixes: 5cfd54d7dc186 ("net: atlantic: minimal A2 fw_ops") Signed-off-by: Dmitry Bogdanov Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c index dd259c8f2f4f..b0e4119b9883 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c @@ -84,7 +84,7 @@ static int hw_atl2_shared_buffer_read_block(struct aq_hw_s *self, if (cnt > AQ_A2_FW_READ_TRY_MAX) return -ETIME; if (tid1.transaction_cnt_a != tid1.transaction_cnt_b) - udelay(1); + mdelay(1); } while (tid1.transaction_cnt_a != tid1.transaction_cnt_b); hw_atl2_mif_shared_buf_read(self, offset, (u32 *)data, dwords); @@ -339,8 +339,11 @@ static int aq_a2_fw_update_stats(struct aq_hw_s *self) { struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; struct statistics_s stats; + int err; - hw_atl2_shared_buffer_read_safe(self, stats, &stats); + err = hw_atl2_shared_buffer_read_safe(self, stats, &stats); + if (err) + return err; #define AQ_SDELTA(_N_, _F_) (self->curr_stats._N_ += \ stats.msm._F_ - priv->last_stats.msm._F_) From aa685acd98eae25d5351e30288d6cfb65b9c80a5 Mon Sep 17 00:00:00 2001 From: Nikita Danilov Date: Mon, 29 Nov 2021 05:28:24 -0800 Subject: [PATCH 22/85] atlatnic: enable Nbase-t speeds with base-t When 2.5G is advertised, N-Base should be advertised against the T-base caps. N5G is out of use in baseline code and driver should treat both 5G and N5G (and also 2.5G and N2.5G) equally from user perspective. Fixes: 5cfd54d7dc186 ("net: atlantic: minimal A2 fw_ops") Signed-off-by: Nikita Danilov Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../ethernet/aquantia/atlantic/aq_common.h | 25 +++++++++---------- .../atlantic/hw_atl/hw_atl_utils_fw2x.c | 3 --- .../atlantic/hw_atl2/hw_atl2_utils_fw.c | 4 +-- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h b/drivers/net/ethernet/aquantia/atlantic/aq_common.h index 23b2d390fcdd..4ad8f36fcade 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_common.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_common.h @@ -53,20 +53,19 @@ #define AQ_NIC_RATE_10G BIT(0) #define AQ_NIC_RATE_5G BIT(1) -#define AQ_NIC_RATE_5GSR BIT(2) -#define AQ_NIC_RATE_2G5 BIT(3) -#define AQ_NIC_RATE_1G BIT(4) -#define AQ_NIC_RATE_100M BIT(5) -#define AQ_NIC_RATE_10M BIT(6) -#define AQ_NIC_RATE_1G_HALF BIT(7) -#define AQ_NIC_RATE_100M_HALF BIT(8) -#define AQ_NIC_RATE_10M_HALF BIT(9) +#define AQ_NIC_RATE_2G5 BIT(2) +#define AQ_NIC_RATE_1G BIT(3) +#define AQ_NIC_RATE_100M BIT(4) +#define AQ_NIC_RATE_10M BIT(5) +#define AQ_NIC_RATE_1G_HALF BIT(6) +#define AQ_NIC_RATE_100M_HALF BIT(7) +#define AQ_NIC_RATE_10M_HALF BIT(8) -#define AQ_NIC_RATE_EEE_10G BIT(10) -#define AQ_NIC_RATE_EEE_5G BIT(11) -#define AQ_NIC_RATE_EEE_2G5 BIT(12) -#define AQ_NIC_RATE_EEE_1G BIT(13) -#define AQ_NIC_RATE_EEE_100M BIT(14) +#define AQ_NIC_RATE_EEE_10G BIT(9) +#define AQ_NIC_RATE_EEE_5G BIT(10) +#define AQ_NIC_RATE_EEE_2G5 BIT(11) +#define AQ_NIC_RATE_EEE_1G BIT(12) +#define AQ_NIC_RATE_EEE_100M BIT(13) #define AQ_NIC_RATE_EEE_MSK (AQ_NIC_RATE_EEE_10G |\ AQ_NIC_RATE_EEE_5G |\ AQ_NIC_RATE_EEE_2G5 |\ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index eac631c45c56..4d4cfbc91e19 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -132,9 +132,6 @@ static enum hw_atl_fw2x_rate link_speed_mask_2fw2x_ratemask(u32 speed) if (speed & AQ_NIC_RATE_5G) rate |= FW2X_RATE_5G; - if (speed & AQ_NIC_RATE_5GSR) - rate |= FW2X_RATE_5G; - if (speed & AQ_NIC_RATE_2G5) rate |= FW2X_RATE_2G5; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c index b0e4119b9883..b7a9b0ed6df3 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c @@ -154,7 +154,7 @@ static void a2_link_speed_mask2fw(u32 speed, { link_options->rate_10G = !!(speed & AQ_NIC_RATE_10G); link_options->rate_5G = !!(speed & AQ_NIC_RATE_5G); - link_options->rate_N5G = !!(speed & AQ_NIC_RATE_5GSR); + link_options->rate_N5G = link_options->rate_5G; link_options->rate_2P5G = !!(speed & AQ_NIC_RATE_2G5); link_options->rate_N2P5G = link_options->rate_2P5G; link_options->rate_1G = !!(speed & AQ_NIC_RATE_1G); @@ -192,8 +192,6 @@ static u32 a2_fw_lkp_to_mask(struct lkp_link_caps_s *lkp_link_caps) rate |= AQ_NIC_RATE_10G; if (lkp_link_caps->rate_5G) rate |= AQ_NIC_RATE_5G; - if (lkp_link_caps->rate_N5G) - rate |= AQ_NIC_RATE_5GSR; if (lkp_link_caps->rate_2P5G) rate |= AQ_NIC_RATE_2G5; if (lkp_link_caps->rate_1G) From 2465c802232bc8d2b5bd83b55b08d05c11808704 Mon Sep 17 00:00:00 2001 From: Sameer Saurabh Date: Mon, 29 Nov 2021 05:28:25 -0800 Subject: [PATCH 23/85] atlantic: Fix to display FW bundle version instead of FW mac version. The correct way to reflect firmware version is to use bundle version. Hence populating the same instead of MAC fw version. Fixes: c1be0bf092bd2 ("net: atlantic: common functions needed for basic A2 init/deinit hw_ops") Signed-off-by: Sameer Saurabh Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c index b7a9b0ed6df3..e164ac5b55a8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c @@ -500,9 +500,9 @@ u32 hw_atl2_utils_get_fw_version(struct aq_hw_s *self) hw_atl2_shared_buffer_read_safe(self, version, &version); /* A2 FW version is stored in reverse order */ - return version.mac.major << 24 | - version.mac.minor << 16 | - version.mac.build; + return version.bundle.major << 24 | + version.bundle.minor << 16 | + version.bundle.build; } int hw_atl2_utils_get_action_resolve_table_caps(struct aq_hw_s *self, From 413d5e09caa5a11da9c7d72401ba0588466a04c0 Mon Sep 17 00:00:00 2001 From: Nikita Danilov Date: Mon, 29 Nov 2021 05:28:26 -0800 Subject: [PATCH 24/85] atlantic: Add missing DIDs and fix 115c. At the late production stages new dev ids were introduced. These are now in production, so its important for the driver to recognize these. And also fix the board caps for AQC115C adapter. Fixes: b3f0c79cba206 ("net: atlantic: A2 hw_ops skeleton") Signed-off-by: Nikita Danilov Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../net/ethernet/aquantia/atlantic/aq_common.h | 2 ++ .../ethernet/aquantia/atlantic/aq_pci_func.c | 7 ++++++- .../aquantia/atlantic/hw_atl2/hw_atl2.c | 17 +++++++++++++++++ .../aquantia/atlantic/hw_atl2/hw_atl2.h | 2 ++ 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h b/drivers/net/ethernet/aquantia/atlantic/aq_common.h index 4ad8f36fcade..ace691d7cd75 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_common.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_common.h @@ -40,10 +40,12 @@ #define AQ_DEVICE_ID_AQC113DEV 0x00C0 #define AQ_DEVICE_ID_AQC113CS 0x94C0 +#define AQ_DEVICE_ID_AQC113CA 0x34C0 #define AQ_DEVICE_ID_AQC114CS 0x93C0 #define AQ_DEVICE_ID_AQC113 0x04C0 #define AQ_DEVICE_ID_AQC113C 0x14C0 #define AQ_DEVICE_ID_AQC115C 0x12C0 +#define AQ_DEVICE_ID_AQC116C 0x11C0 #define HW_ATL_NIC_NAME "Marvell (aQuantia) AQtion 10Gbit Network Adapter" diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index d4b1976ee69b..797a95142d1f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -49,6 +49,8 @@ static const struct pci_device_id aq_pci_tbl[] = { { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113), }, { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113C), }, { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC115C), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113CA), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC116C), }, {} }; @@ -85,7 +87,10 @@ static const struct aq_board_revision_s hw_atl_boards[] = { { AQ_DEVICE_ID_AQC113CS, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, { AQ_DEVICE_ID_AQC114CS, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, { AQ_DEVICE_ID_AQC113C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, - { AQ_DEVICE_ID_AQC115C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, + { AQ_DEVICE_ID_AQC115C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc115c, }, + { AQ_DEVICE_ID_AQC113CA, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, + { AQ_DEVICE_ID_AQC116C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc116c, }, + }; MODULE_DEVICE_TABLE(pci, aq_pci_tbl); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c index c98708bb044c..0a28428a0cb7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c @@ -72,6 +72,23 @@ const struct aq_hw_caps_s hw_atl2_caps_aqc113 = { AQ_NIC_RATE_10M_HALF, }; +const struct aq_hw_caps_s hw_atl2_caps_aqc115c = { + DEFAULT_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = AQ_NIC_RATE_2G5 | + AQ_NIC_RATE_1G | + AQ_NIC_RATE_100M | + AQ_NIC_RATE_10M, +}; + +const struct aq_hw_caps_s hw_atl2_caps_aqc116c = { + DEFAULT_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = AQ_NIC_RATE_1G | + AQ_NIC_RATE_100M | + AQ_NIC_RATE_10M, +}; + static u32 hw_atl2_sem_act_rslvr_get(struct aq_hw_s *self) { return hw_atl_reg_glb_cpu_sem_get(self, HW_ATL2_FW_SM_ACT_RSLVR); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h index de8723f1c28a..346f0dc9912e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h @@ -9,6 +9,8 @@ #include "aq_common.h" extern const struct aq_hw_caps_s hw_atl2_caps_aqc113; +extern const struct aq_hw_caps_s hw_atl2_caps_aqc115c; +extern const struct aq_hw_caps_s hw_atl2_caps_aqc116c; extern const struct aq_hw_ops hw_atl2_ops; #endif /* HW_ATL2_H */ From 03fa512189eb9b55ded5f3e81ad638315555b340 Mon Sep 17 00:00:00 2001 From: Sameer Saurabh Date: Mon, 29 Nov 2021 05:28:27 -0800 Subject: [PATCH 25/85] Remove Half duplex mode speed capabilities. Since Half Duplex mode has been deprecated by the firmware, driver should not advertise Half Duplex speed in ethtool support link speed values. Fixes: 071a02046c262 ("net: atlantic: A2: half duplex support") Signed-off-by: Sameer Saurabh Signed-off-by: Igor Russkikh Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c index 0a28428a0cb7..5dfc751572ed 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c @@ -65,11 +65,8 @@ const struct aq_hw_caps_s hw_atl2_caps_aqc113 = { AQ_NIC_RATE_5G | AQ_NIC_RATE_2G5 | AQ_NIC_RATE_1G | - AQ_NIC_RATE_1G_HALF | AQ_NIC_RATE_100M | - AQ_NIC_RATE_100M_HALF | - AQ_NIC_RATE_10M | - AQ_NIC_RATE_10M_HALF, + AQ_NIC_RATE_10M, }; const struct aq_hw_caps_s hw_atl2_caps_aqc115c = { From 2087ced0fc3a6d45203925750a2b1bcd5402e639 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Mon, 29 Nov 2021 05:28:28 -0800 Subject: [PATCH 26/85] atlantic: Fix statistics logic for production hardware B0 is the main and widespread device revision of atlantic2 HW. In the current state, driver will incorrectly fetch the statistics for this revision. Fixes: 5cfd54d7dc186 ("net: atlantic: minimal A2 fw_ops") Signed-off-by: Dmitry Bogdanov Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../net/ethernet/aquantia/atlantic/aq_hw.h | 2 + .../net/ethernet/aquantia/atlantic/aq_nic.c | 10 +- .../aquantia/atlantic/hw_atl/hw_atl_utils.c | 15 ++- .../aquantia/atlantic/hw_atl2/hw_atl2_utils.h | 38 ++++++- .../atlantic/hw_atl2/hw_atl2_utils_fw.c | 99 +++++++++++++++---- 5 files changed, 138 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index 062a300a566a..dbd284660135 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -80,6 +80,8 @@ struct aq_hw_link_status_s { }; struct aq_stats_s { + u64 brc; + u64 btc; u64 uprc; u64 mprc; u64 bprc; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 1acf544afeb4..02c4e3b4a6a5 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -905,8 +905,14 @@ u64 *aq_nic_get_stats(struct aq_nic_s *self, u64 *data) data[++i] = stats->mbtc; data[++i] = stats->bbrc; data[++i] = stats->bbtc; - data[++i] = stats->ubrc + stats->mbrc + stats->bbrc; - data[++i] = stats->ubtc + stats->mbtc + stats->bbtc; + if (stats->brc) + data[++i] = stats->brc; + else + data[++i] = stats->ubrc + stats->mbrc + stats->bbrc; + if (stats->btc) + data[++i] = stats->btc; + else + data[++i] = stats->ubtc + stats->mbtc + stats->bbtc; data[++i] = stats->dma_pkt_rc; data[++i] = stats->dma_pkt_tc; data[++i] = stats->dma_oct_rc; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index 3f1704cbe1cb..7e88d7234b14 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -867,12 +867,20 @@ static int hw_atl_fw1x_deinit(struct aq_hw_s *self) int hw_atl_utils_update_stats(struct aq_hw_s *self) { struct aq_stats_s *cs = &self->curr_stats; + struct aq_stats_s curr_stats = *cs; struct hw_atl_utils_mbox mbox; + bool corrupted_stats = false; hw_atl_utils_mpi_read_stats(self, &mbox); -#define AQ_SDELTA(_N_) (self->curr_stats._N_ += \ - mbox.stats._N_ - self->last_stats._N_) +#define AQ_SDELTA(_N_) \ +do { \ + if (!corrupted_stats && \ + ((s64)(mbox.stats._N_ - self->last_stats._N_)) >= 0) \ + curr_stats._N_ += mbox.stats._N_ - self->last_stats._N_; \ + else \ + corrupted_stats = true; \ +} while (0) if (self->aq_link_status.mbps) { AQ_SDELTA(uprc); @@ -892,6 +900,9 @@ int hw_atl_utils_update_stats(struct aq_hw_s *self) AQ_SDELTA(bbrc); AQ_SDELTA(bbtc); AQ_SDELTA(dpc); + + if (!corrupted_stats) + *cs = curr_stats; } #undef AQ_SDELTA diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h index b66fa346581c..6bad64c77b87 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h @@ -239,7 +239,8 @@ struct version_s { u8 minor; u16 build; } phy; - u32 rsvd; + u32 drv_iface_ver:4; + u32 rsvd:28; }; struct link_status_s { @@ -424,7 +425,7 @@ struct cable_diag_status_s { u16 rsvd2; }; -struct statistics_s { +struct statistics_a0_s { struct { u32 link_up; u32 link_down; @@ -457,6 +458,33 @@ struct statistics_s { u32 reserve_fw_gap; }; +struct __packed statistics_b0_s { + u64 rx_good_octets; + u64 rx_pause_frames; + u64 rx_good_frames; + u64 rx_errors; + u64 rx_unicast_frames; + u64 rx_multicast_frames; + u64 rx_broadcast_frames; + + u64 tx_good_octets; + u64 tx_pause_frames; + u64 tx_good_frames; + u64 tx_errors; + u64 tx_unicast_frames; + u64 tx_multicast_frames; + u64 tx_broadcast_frames; + + u32 main_loop_cycles; +}; + +struct __packed statistics_s { + union __packed { + struct statistics_a0_s a0; + struct statistics_b0_s b0; + }; +}; + struct filter_caps_s { u8 l2_filters_base_index:6; u8 flexible_filter_mask:2; @@ -545,7 +573,7 @@ struct management_status_s { u32 rsvd5; }; -struct fw_interface_out { +struct __packed fw_interface_out { struct transaction_counter_s transaction_id; struct version_s version; struct link_status_s link_status; @@ -569,7 +597,6 @@ struct fw_interface_out { struct core_dump_s core_dump; u32 rsvd11; struct statistics_s stats; - u32 rsvd12; struct filter_caps_s filter_caps; struct device_caps_s device_caps; u32 rsvd13; @@ -592,6 +619,9 @@ struct fw_interface_out { #define AQ_HOST_MODE_LOW_POWER 3U #define AQ_HOST_MODE_SHUTDOWN 4U +#define AQ_A2_FW_INTERFACE_A0 0 +#define AQ_A2_FW_INTERFACE_B0 1 + int hw_atl2_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops); int hw_atl2_utils_soft_reset(struct aq_hw_s *self); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c index e164ac5b55a8..58d426dda3ed 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c @@ -333,18 +333,22 @@ static int aq_a2_fw_get_mac_permanent(struct aq_hw_s *self, u8 *mac) return 0; } -static int aq_a2_fw_update_stats(struct aq_hw_s *self) +static void aq_a2_fill_a0_stats(struct aq_hw_s *self, + struct statistics_s *stats) { struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; - struct statistics_s stats; - int err; + struct aq_stats_s *cs = &self->curr_stats; + struct aq_stats_s curr_stats = *cs; + bool corrupted_stats = false; - err = hw_atl2_shared_buffer_read_safe(self, stats, &stats); - if (err) - return err; - -#define AQ_SDELTA(_N_, _F_) (self->curr_stats._N_ += \ - stats.msm._F_ - priv->last_stats.msm._F_) +#define AQ_SDELTA(_N, _F) \ +do { \ + if (!corrupted_stats && \ + ((s64)(stats->a0.msm._F - priv->last_stats.a0.msm._F)) >= 0) \ + curr_stats._N += stats->a0.msm._F - priv->last_stats.a0.msm._F;\ + else \ + corrupted_stats = true; \ +} while (0) if (self->aq_link_status.mbps) { AQ_SDELTA(uprc, rx_unicast_frames); @@ -363,17 +367,76 @@ static int aq_a2_fw_update_stats(struct aq_hw_s *self) AQ_SDELTA(mbtc, tx_multicast_octets); AQ_SDELTA(bbrc, rx_broadcast_octets); AQ_SDELTA(bbtc, tx_broadcast_octets); + + if (!corrupted_stats) + *cs = curr_stats; } #undef AQ_SDELTA - self->curr_stats.dma_pkt_rc = - hw_atl_stats_rx_dma_good_pkt_counter_get(self); - self->curr_stats.dma_pkt_tc = - hw_atl_stats_tx_dma_good_pkt_counter_get(self); - self->curr_stats.dma_oct_rc = - hw_atl_stats_rx_dma_good_octet_counter_get(self); - self->curr_stats.dma_oct_tc = - hw_atl_stats_tx_dma_good_octet_counter_get(self); - self->curr_stats.dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self); + +} + +static void aq_a2_fill_b0_stats(struct aq_hw_s *self, + struct statistics_s *stats) +{ + struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; + struct aq_stats_s *cs = &self->curr_stats; + struct aq_stats_s curr_stats = *cs; + bool corrupted_stats = false; + +#define AQ_SDELTA(_N, _F) \ +do { \ + if (!corrupted_stats && \ + ((s64)(stats->b0._F - priv->last_stats.b0._F)) >= 0) \ + curr_stats._N += stats->b0._F - priv->last_stats.b0._F; \ + else \ + corrupted_stats = true; \ +} while (0) + + if (self->aq_link_status.mbps) { + AQ_SDELTA(uprc, rx_unicast_frames); + AQ_SDELTA(mprc, rx_multicast_frames); + AQ_SDELTA(bprc, rx_broadcast_frames); + AQ_SDELTA(erpr, rx_errors); + AQ_SDELTA(brc, rx_good_octets); + + AQ_SDELTA(uptc, tx_unicast_frames); + AQ_SDELTA(mptc, tx_multicast_frames); + AQ_SDELTA(bptc, tx_broadcast_frames); + AQ_SDELTA(erpt, tx_errors); + AQ_SDELTA(btc, tx_good_octets); + + if (!corrupted_stats) + *cs = curr_stats; + } +#undef AQ_SDELTA +} + +static int aq_a2_fw_update_stats(struct aq_hw_s *self) +{ + struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; + struct aq_stats_s *cs = &self->curr_stats; + struct statistics_s stats; + struct version_s version; + int err; + + err = hw_atl2_shared_buffer_read_safe(self, version, &version); + if (err) + return err; + + err = hw_atl2_shared_buffer_read_safe(self, stats, &stats); + if (err) + return err; + + if (version.drv_iface_ver == AQ_A2_FW_INTERFACE_A0) + aq_a2_fill_a0_stats(self, &stats); + else + aq_a2_fill_b0_stats(self, &stats); + + cs->dma_pkt_rc = hw_atl_stats_rx_dma_good_pkt_counter_get(self); + cs->dma_pkt_tc = hw_atl_stats_tx_dma_good_pkt_counter_get(self); + cs->dma_oct_rc = hw_atl_stats_rx_dma_good_octet_counter_get(self); + cs->dma_oct_tc = hw_atl_stats_tx_dma_good_octet_counter_get(self); + cs->dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self); memcpy(&priv->last_stats, &stats, sizeof(stats)); From 060a0fb721ec5bbe02ae322e434ec87dc25ed6e9 Mon Sep 17 00:00:00 2001 From: Sameer Saurabh Date: Mon, 29 Nov 2021 05:28:29 -0800 Subject: [PATCH 27/85] atlantic: Remove warn trace message. Remove the warn trace message - it's not a correct check here, because the function can still be called on the device in DOWN state Fixes: 508f2e3dce454 ("net: atlantic: split rx and tx per-queue stats") Signed-off-by: Sameer Saurabh Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_vec.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index d281322d7dd2..f4774cf051c9 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -362,9 +362,6 @@ unsigned int aq_vec_get_sw_stats(struct aq_vec_s *self, const unsigned int tc, u { unsigned int count; - WARN_ONCE(!aq_vec_is_valid_tc(self, tc), - "Invalid tc %u (#rx=%u, #tx=%u)\n", - tc, self->rx_rings, self->tx_rings); if (!aq_vec_is_valid_tc(self, tc)) return 0; From cdef485217d30382f3bf6448c54b4401648fe3f1 Mon Sep 17 00:00:00 2001 From: msizanoen1 Date: Tue, 23 Nov 2021 13:48:32 +0100 Subject: [PATCH 28/85] ipv6: fix memory leak in fib6_rule_suppress The kernel leaks memory when a `fib` rule is present in IPv6 nftables firewall rules and a suppress_prefix rule is present in the IPv6 routing rules (used by certain tools such as wg-quick). In such scenarios, every incoming packet will leak an allocation in `ip6_dst_cache` slab cache. After some hours of `bpftrace`-ing and source code reading, I tracked down the issue to ca7a03c41753 ("ipv6: do not free rt if FIB_LOOKUP_NOREF is set on suppress rule"). The problem with that change is that the generic `args->flags` always have `FIB_LOOKUP_NOREF` set[1][2] but the IPv6-specific flag `RT6_LOOKUP_F_DST_NOREF` might not be, leading to `fib6_rule_suppress` not decreasing the refcount when needed. How to reproduce: - Add the following nftables rule to a prerouting chain: meta nfproto ipv6 fib saddr . mark . iif oif missing drop This can be done with: sudo nft create table inet test sudo nft create chain inet test test_chain '{ type filter hook prerouting priority filter + 10; policy accept; }' sudo nft add rule inet test test_chain meta nfproto ipv6 fib saddr . mark . iif oif missing drop - Run: sudo ip -6 rule add table main suppress_prefixlength 0 - Watch `sudo slabtop -o | grep ip6_dst_cache` to see memory usage increase with every incoming ipv6 packet. This patch exposes the protocol-specific flags to the protocol specific `suppress` function, and check the protocol-specific `flags` argument for RT6_LOOKUP_F_DST_NOREF instead of the generic FIB_LOOKUP_NOREF when decreasing the refcount, like this. [1]: https://github.com/torvalds/linux/blob/ca7a03c4175366a92cee0ccc4fec0038c3266e26/net/ipv6/fib6_rules.c#L71 [2]: https://github.com/torvalds/linux/blob/ca7a03c4175366a92cee0ccc4fec0038c3266e26/net/ipv6/fib6_rules.c#L99 Link: https://bugzilla.kernel.org/show_bug.cgi?id=215105 Fixes: ca7a03c41753 ("ipv6: do not free rt if FIB_LOOKUP_NOREF is set on suppress rule") Cc: stable@vger.kernel.org Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- include/net/fib_rules.h | 4 +++- net/core/fib_rules.c | 2 +- net/ipv4/fib_rules.c | 1 + net/ipv6/fib6_rules.c | 4 ++-- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 4b10676c69d1..bd07484ab9dd 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -69,7 +69,7 @@ struct fib_rules_ops { int (*action)(struct fib_rule *, struct flowi *, int, struct fib_lookup_arg *); - bool (*suppress)(struct fib_rule *, + bool (*suppress)(struct fib_rule *, int, struct fib_lookup_arg *); int (*match)(struct fib_rule *, struct flowi *, int); @@ -218,7 +218,9 @@ INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule, + int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule, + int flags, struct fib_lookup_arg *arg)); #endif diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 79df7cd9dbc1..1bb567a3b329 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -323,7 +323,7 @@ jumped: if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress, fib6_rule_suppress, fib4_rule_suppress, - rule, arg)) + rule, flags, arg)) continue; if (err != -EAGAIN) { diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index ce54a30c2ef1..364ad3446b2f 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -141,6 +141,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_action(struct fib_rule *rule, } INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule, + int flags, struct fib_lookup_arg *arg) { struct fib_result *result = (struct fib_result *) arg->result; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 40f3e4f9f33a..dcedfe29d9d9 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -267,6 +267,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_action(struct fib_rule *rule, } INDIRECT_CALLABLE_SCOPE bool fib6_rule_suppress(struct fib_rule *rule, + int flags, struct fib_lookup_arg *arg) { struct fib6_result *res = arg->result; @@ -294,8 +295,7 @@ INDIRECT_CALLABLE_SCOPE bool fib6_rule_suppress(struct fib_rule *rule, return false; suppress_route: - if (!(arg->flags & FIB_LOOKUP_NOREF)) - ip6_rt_put(rt); + ip6_rt_put_flags(rt, flags); return true; } From ca77fba821351190777b236ce749d7c4d353102e Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Sun, 21 Nov 2021 04:16:07 +0000 Subject: [PATCH 29/85] rxrpc: Fix rxrpc_peer leak in rxrpc_look_up_bundle() Need to call rxrpc_put_peer() for bundle candidate before kfree() as it holds a ref to rxrpc_peer. [DH: v2: Changed to abstract out the bundle freeing code into a function] Fixes: 245500d853e9 ("rxrpc: Rewrite the client connection manager") Signed-off-by: Eiichi Tsukata Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/20211121041608.133740-1-eiichi.tsukata@nutanix.com/ # v1 --- net/rxrpc/conn_client.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index dbea0bfee48e..8120138dac01 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -135,16 +135,20 @@ struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle) return bundle; } +static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) +{ + rxrpc_put_peer(bundle->params.peer); + kfree(bundle); +} + void rxrpc_put_bundle(struct rxrpc_bundle *bundle) { unsigned int d = bundle->debug_id; unsigned int u = atomic_dec_return(&bundle->usage); _debug("PUT B=%x %u", d, u); - if (u == 0) { - rxrpc_put_peer(bundle->params.peer); - kfree(bundle); - } + if (u == 0) + rxrpc_free_bundle(bundle); } /* @@ -328,7 +332,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c return candidate; found_bundle_free: - kfree(candidate); + rxrpc_free_bundle(candidate); found_bundle: rxrpc_get_bundle(bundle); spin_unlock(&local->client_bundles_lock); From beacff50edbd6c9659a6f15fc7f6126909fade29 Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Sun, 21 Nov 2021 04:16:08 +0000 Subject: [PATCH 30/85] rxrpc: Fix rxrpc_local leak in rxrpc_lookup_peer() Need to call rxrpc_put_local() for peer candidate before kfree() as it holds a ref to rxrpc_local. [DH: v2: Changed to abstract the peer freeing code out into a function] Fixes: 9ebeddef58c4 ("rxrpc: rxrpc_peer needs to hold a ref on the rxrpc_local record") Signed-off-by: Eiichi Tsukata Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/all/20211121041608.133740-2-eiichi.tsukata@nutanix.com/ # v1 --- net/rxrpc/peer_object.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 68396d052052..0298fe2ad6d3 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -299,6 +299,12 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, return peer; } +static void rxrpc_free_peer(struct rxrpc_peer *peer) +{ + rxrpc_put_local(peer->local); + kfree_rcu(peer, rcu); +} + /* * Set up a new incoming peer. There shouldn't be any other matching peers * since we've already done a search in the list from the non-reentrant context @@ -365,7 +371,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, spin_unlock_bh(&rxnet->peer_hash_lock); if (peer) - kfree(candidate); + rxrpc_free_peer(candidate); else peer = candidate; } @@ -420,8 +426,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) list_del_init(&peer->keepalive_link); spin_unlock_bh(&rxnet->peer_hash_lock); - rxrpc_put_local(peer->local); - kfree_rcu(peer, rcu); + rxrpc_free_peer(peer); } /* @@ -457,8 +462,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer) if (n == 0) { hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); - rxrpc_put_local(peer->local); - kfree_rcu(peer, rcu); + rxrpc_free_peer(peer); } } From 191587cd1a5f36852a0fc32cff2d5bc7680551db Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 29 Nov 2021 14:41:48 +0100 Subject: [PATCH 31/85] mt76: fix key pointer overwrite in mt7921s_write_txwi/mt7663_usb_sdio_write_txwi Fix pointer overwrite in mt7921s_tx_prepare_skb and mt7663_usb_sdio_tx_prepare_skb routines since in commit '2a9e9857473b ("mt76: fix possible pktid leak") mt76_tx_status_skb_add() has been moved out of mt7921s_write_txwi()/mt7663_usb_sdio_write_txwi() overwriting hw key pointer in ieee80211_tx_info structure. Fix the issue saving key pointer before running mt76_tx_status_skb_add(). Fixes: 2a9e9857473b ("mt76: fix possible pktid leak") Tested-by: Deren Wu Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/eba40c84b6d114f618e2ae486cc6d0f2e9272cf9.1638193069.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c | 11 +++++------ drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c | 11 +++++------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c index bfe6c1579dc1..5a6d7829c6e0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c @@ -43,13 +43,11 @@ EXPORT_SYMBOL_GPL(mt7663_usb_sdio_reg_map); static void mt7663_usb_sdio_write_txwi(struct mt7615_dev *dev, struct mt76_wcid *wcid, enum mt76_txq_id qid, struct ieee80211_sta *sta, - int pid, struct sk_buff *skb) + struct ieee80211_key_conf *key, int pid, + struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_key_conf *key = info->control.hw_key; - __le32 *txwi; + __le32 *txwi = (__le32 *)(skb->data - MT_USB_TXD_SIZE); - txwi = (__le32 *)(skb->data - MT_USB_TXD_SIZE); memset(txwi, 0, MT_USB_TXD_SIZE); mt7615_mac_write_txwi(dev, txwi, skb, wcid, sta, pid, key, false); skb_push(skb, MT_USB_TXD_SIZE); @@ -188,6 +186,7 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76); struct sk_buff *skb = tx_info->skb; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_key_conf *key = info->control.hw_key; struct mt7615_sta *msta; int pad, err, pktid; @@ -205,7 +204,7 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, } pktid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb); - mt7663_usb_sdio_write_txwi(dev, wcid, qid, sta, pktid, skb); + mt7663_usb_sdio_write_txwi(dev, wcid, qid, sta, key, pktid, skb); if (mt76_is_usb(mdev)) { u32 len = skb->len; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c index 85b3d88f8ecc..bdec508b6b9f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c @@ -142,13 +142,11 @@ out: static void mt7921s_write_txwi(struct mt7921_dev *dev, struct mt76_wcid *wcid, enum mt76_txq_id qid, struct ieee80211_sta *sta, - int pid, struct sk_buff *skb) + struct ieee80211_key_conf *key, int pid, + struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_key_conf *key = info->control.hw_key; - __le32 *txwi; + __le32 *txwi = (__le32 *)(skb->data - MT_SDIO_TXD_SIZE); - txwi = (__le32 *)(skb->data - MT_SDIO_TXD_SIZE); memset(txwi, 0, MT_SDIO_TXD_SIZE); mt7921_mac_write_txwi(dev, txwi, skb, wcid, key, pid, false); skb_push(skb, MT_SDIO_TXD_SIZE); @@ -161,6 +159,7 @@ int mt7921s_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, { struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb); + struct ieee80211_key_conf *key = info->control.hw_key; struct sk_buff *skb = tx_info->skb; int err, pad, pktid; @@ -180,7 +179,7 @@ int mt7921s_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, } pktid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb); - mt7921s_write_txwi(dev, wcid, qid, sta, pktid, skb); + mt7921s_write_txwi(dev, wcid, qid, sta, key, pktid, skb); mt7921_skb_add_sdio_hdr(skb, MT7921_SDIO_DATA); pad = round_up(skb->len, 4) - skb->len; From ae9287811ba75571cd69505d50ab0e612ace8572 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:20 -0500 Subject: [PATCH 32/85] wireguard: allowedips: add missing __rcu annotation to satisfy sparse A __rcu annotation got lost during refactoring, which caused sparse to become enraged. Fixes: bf7b042dc62a ("wireguard: allowedips: free empty intermediate nodes when removing single node") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/allowedips.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c index b7197e80f226..9a4c8ff32d9d 100644 --- a/drivers/net/wireguard/allowedips.c +++ b/drivers/net/wireguard/allowedips.c @@ -163,7 +163,7 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, return exact; } -static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node) +static inline void connect_node(struct allowedips_node __rcu **parent, u8 bit, struct allowedips_node *node) { node->parent_bit_packed = (unsigned long)parent | bit; rcu_assign_pointer(*parent, node); From 03ff1b1def73f817e196bf96ab36ac259490bd7c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:21 -0500 Subject: [PATCH 33/85] wireguard: selftests: increase default dmesg log size The selftests currently parse the kernel log at the end to track potential memory leaks. With these tests now reading off the end of the buffer, due to recent optimizations, some creation messages were lost, making the tests think that there was a free without an alloc. Fix this by increasing the kernel log size. Fixes: 24b70eeeb4f4 ("wireguard: use synchronize_net rather than synchronize_rcu") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- tools/testing/selftests/wireguard/qemu/kernel.config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 74db83a0aedd..a9b5a520a1d2 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -66,6 +66,7 @@ CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 +CONFIG_LOG_BUF_SHIFT=18 CONFIG_PRINTK_TIME=y CONFIG_BLK_DEV_INITRD=y CONFIG_LEGACY_VSYSCALL_NONE=y From 782c72af567fc2ef09bd7615d0307f24de72c7e0 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:22 -0500 Subject: [PATCH 34/85] wireguard: selftests: actually test for routing loops We previously removed the restriction on looping to self, and then added a test to make sure the kernel didn't blow up during a routing loop. The kernel didn't blow up, thankfully, but on certain architectures where skb fragmentation is easier, such as ppc64, the skbs weren't actually being discarded after a few rounds through. But the test wasn't catching this. So actually test explicitly for massive increases in tx to see if we have a routing loop. Note that the actual loop problem will need to be addressed in a different commit. Fixes: b673e24aad36 ("wireguard: socket: remove errant restriction on looping to self") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- tools/testing/selftests/wireguard/netns.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index ebc4ee0fe179..2e5c1630885e 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -276,7 +276,11 @@ n0 ping -W 1 -c 1 192.168.241.2 n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7 ip2 link del wg0 ip2 link del wg1 -! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel +read _ _ tx_bytes_before < <(n0 wg show wg1 transfer) +! n0 ping -W 1 -c 10 -f 192.168.241.2 || false +sleep 1 +read _ _ tx_bytes_after < <(n0 wg show wg1 transfer) +(( tx_bytes_after - tx_bytes_before < 70000 )) ip0 link del wg1 ip1 link del wg0 From b251b711a92189d558b07fde5a7ccd5a7915ebdd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 29 Nov 2021 10:39:23 -0500 Subject: [PATCH 35/85] wireguard: main: rename 'mod_init' & 'mod_exit' functions to be module-specific Rename module_init & module_exit functions that are named "mod_init" and "mod_exit" so that they are unique in both the System.map file and in initcall_debug output instead of showing up as almost anonymous "mod_init". This is helpful for debugging and in determining how long certain module_init calls take to execute. Signed-off-by: Randy Dunlap Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c index 75dbe77b0b4b..ee4da9ab8013 100644 --- a/drivers/net/wireguard/main.c +++ b/drivers/net/wireguard/main.c @@ -17,7 +17,7 @@ #include #include -static int __init mod_init(void) +static int __init wg_mod_init(void) { int ret; @@ -60,7 +60,7 @@ err_allowedips: return ret; } -static void __exit mod_exit(void) +static void __exit wg_mod_exit(void) { wg_genetlink_uninit(); wg_device_uninit(); @@ -68,8 +68,8 @@ static void __exit mod_exit(void) wg_allowedips_slab_uninit(); } -module_init(mod_init); -module_exit(mod_exit); +module_init(wg_mod_init); +module_exit(wg_mod_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("WireGuard secure network tunnel"); MODULE_AUTHOR("Jason A. Donenfeld "); From 7e938beb8321d34f040557b8915b228af125f73c Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Mon, 29 Nov 2021 10:39:24 -0500 Subject: [PATCH 36/85] wireguard: selftests: rename DEBUG_PI_LIST to DEBUG_PLIST DEBUG_PI_LIST was renamed to DEBUG_PLIST since 8e18faeac3 ("lib/plist: rename DEBUG_PI_LIST to DEBUG_PLIST"). Signed-off-by: Li Zhijian Fixes: 8e18faeac3e4 ("lib/plist: rename DEBUG_PI_LIST to DEBUG_PLIST") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- tools/testing/selftests/wireguard/qemu/debug.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index fe07d97df9fa..2b321b8a96cf 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -47,7 +47,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_TRACE_IRQFLAGS=y CONFIG_DEBUG_BUGVERBOSE=y CONFIG_DEBUG_LIST=y -CONFIG_DEBUG_PI_LIST=y +CONFIG_DEBUG_PLIST=y CONFIG_PROVE_RCU=y CONFIG_SPARSE_RCU_POINTER=y CONFIG_RCU_CPU_STALL_TIMEOUT=21 From 20ae1d6aa159eb91a9bf09ff92ccaa94dbea92c2 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:25 -0500 Subject: [PATCH 37/85] wireguard: device: reset peer src endpoint when netns exits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each peer's endpoint contains a dst_cache entry that takes a reference to another netdev. When the containing namespace exits, we take down the socket and prevent future sockets from being created (by setting creating_net to NULL), which removes that potential reference on the netns. However, it doesn't release references to the netns that a netdev cached in dst_cache might be taking, so the netns still might fail to exit. Since the socket is gimped anyway, we can simply clear all the dst_caches (by way of clearing the endpoint src), which will release all references. However, the current dst_cache_reset function only releases those references lazily. But it turns out that all of our usages of wg_socket_clear_peer_endpoint_src are called from contexts that are not exactly high-speed or bottle-necked. For example, when there's connection difficulty, or when userspace is reconfiguring the interface. And in particular for this patch, when the netns is exiting. So for those cases, it makes more sense to call dst_release immediately. For that, we add a small helper function to dst_cache. This patch also adds a test to netns.sh from Hangbin Liu to ensure this doesn't regress. Tested-by: Hangbin Liu Reported-by: Xiumei Mu Cc: Toke Høiland-Jørgensen Cc: Paolo Abeni Fixes: 900575aa33a3 ("wireguard: device: avoid circular netns references") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/device.c | 3 +++ drivers/net/wireguard/socket.c | 2 +- include/net/dst_cache.h | 11 ++++++++++ net/core/dst_cache.c | 19 +++++++++++++++++ tools/testing/selftests/wireguard/netns.sh | 24 +++++++++++++++++++++- 5 files changed, 57 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 551ddaaaf540..77e64ea6be67 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -398,6 +398,7 @@ static struct rtnl_link_ops link_ops __read_mostly = { static void wg_netns_pre_exit(struct net *net) { struct wg_device *wg; + struct wg_peer *peer; rtnl_lock(); list_for_each_entry(wg, &device_list, device_list) { @@ -407,6 +408,8 @@ static void wg_netns_pre_exit(struct net *net) mutex_lock(&wg->device_update_lock); rcu_assign_pointer(wg->creating_net, NULL); wg_socket_reinit(wg, NULL, NULL); + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_socket_clear_peer_endpoint_src(peer); mutex_unlock(&wg->device_update_lock); } } diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index 8c496b747108..6f07b949cb81 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -308,7 +308,7 @@ void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) { write_lock_bh(&peer->endpoint_lock); memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); - dst_cache_reset(&peer->endpoint_cache); + dst_cache_reset_now(&peer->endpoint_cache); write_unlock_bh(&peer->endpoint_lock); } diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h index 67634675e919..df6622a5fe98 100644 --- a/include/net/dst_cache.h +++ b/include/net/dst_cache.h @@ -79,6 +79,17 @@ static inline void dst_cache_reset(struct dst_cache *dst_cache) dst_cache->reset_ts = jiffies; } +/** + * dst_cache_reset_now - invalidate the cache contents immediately + * @dst_cache: the cache + * + * The caller must be sure there are no concurrent users, as this frees + * all dst_cache users immediately, rather than waiting for the next + * per-cpu usage like dst_cache_reset does. Most callers should use the + * higher speed lazily-freed dst_cache_reset function instead. + */ +void dst_cache_reset_now(struct dst_cache *dst_cache); + /** * dst_cache_init - initialize the cache, allocating the required storage * @dst_cache: the cache diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c index be74ab4551c2..0ccfd5fa5cb9 100644 --- a/net/core/dst_cache.c +++ b/net/core/dst_cache.c @@ -162,3 +162,22 @@ void dst_cache_destroy(struct dst_cache *dst_cache) free_percpu(dst_cache->cache); } EXPORT_SYMBOL_GPL(dst_cache_destroy); + +void dst_cache_reset_now(struct dst_cache *dst_cache) +{ + int i; + + if (!dst_cache->cache) + return; + + dst_cache->reset_ts = jiffies; + for_each_possible_cpu(i) { + struct dst_cache_pcpu *idst = per_cpu_ptr(dst_cache->cache, i); + struct dst_entry *dst = idst->dst; + + idst->cookie = 0; + idst->dst = NULL; + dst_release(dst); + } +} +EXPORT_SYMBOL_GPL(dst_cache_reset_now); diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 2e5c1630885e..8a9461aa0878 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -613,6 +613,28 @@ ip0 link set wg0 up kill $ncat_pid ip0 link del wg0 +# Ensure that dst_cache references don't outlive netns lifetime +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers +ip1 link add veth1 type veth peer name veth2 +ip1 link set veth2 netns $netns2 +ip1 addr add fd00:aa::1/64 dev veth1 +ip2 addr add fd00:aa::2/64 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +ip1 -6 route add default dev veth1 via fd00:aa::2 +ip2 -6 route add default dev veth2 via fd00:aa::1 +n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 +n1 ping6 -c 1 fd00::2 +pp ip netns delete $netns1 +pp ip netns delete $netns2 +pp ip netns add $netns1 +pp ip netns add $netns2 + # Ensure there aren't circular reference loops ip1 link add wg1 type wireguard ip2 link add wg2 type wireguard @@ -631,7 +653,7 @@ while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do done < /dev/kmsg alldeleted=1 for object in "${!objects[@]}"; do - if [[ ${objects["$object"]} != *createddestroyed ]]; then + if [[ ${objects["$object"]} != *createddestroyed && ${objects["$object"]} != *createdcreateddestroyeddestroyed ]]; then echo "Error: $object: merely ${objects["$object"]}" >&3 alldeleted=0 fi From 886fcee939adb5e2af92741b90643a59f2b54f97 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:26 -0500 Subject: [PATCH 38/85] wireguard: receive: use ring buffer for incoming handshakes Apparently the spinlock on incoming_handshake's skb_queue is highly contended, and a torrent of handshake or cookie packets can bring the data plane to its knees, simply by virtue of enqueueing the handshake packets to be processed asynchronously. So, we try switching this to a ring buffer to hopefully have less lock contention. This alleviates the problem somewhat, though it still isn't perfect, so future patches will have to improve this further. However, it at least doesn't completely diminish the data plane. Reported-by: Streun Fabio Reported-by: Joel Wanner Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/device.c | 36 ++++++++++++++++---------------- drivers/net/wireguard/device.h | 9 +++----- drivers/net/wireguard/queueing.c | 6 +++--- drivers/net/wireguard/queueing.h | 2 +- drivers/net/wireguard/receive.c | 27 +++++++++++------------- 5 files changed, 37 insertions(+), 43 deletions(-) diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 77e64ea6be67..a46067c38bf5 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -98,6 +98,7 @@ static int wg_stop(struct net_device *dev) { struct wg_device *wg = netdev_priv(dev); struct wg_peer *peer; + struct sk_buff *skb; mutex_lock(&wg->device_update_lock); list_for_each_entry(peer, &wg->peer_list, peer_list) { @@ -108,7 +109,9 @@ static int wg_stop(struct net_device *dev) wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); } mutex_unlock(&wg->device_update_lock); - skb_queue_purge(&wg->incoming_handshakes); + while ((skb = ptr_ring_consume(&wg->handshake_queue.ring)) != NULL) + kfree_skb(skb); + atomic_set(&wg->handshake_queue_len, 0); wg_socket_reinit(wg, NULL, NULL); return 0; } @@ -235,14 +238,13 @@ static void wg_destruct(struct net_device *dev) destroy_workqueue(wg->handshake_receive_wq); destroy_workqueue(wg->handshake_send_wq); destroy_workqueue(wg->packet_crypt_wq); - wg_packet_queue_free(&wg->decrypt_queue); - wg_packet_queue_free(&wg->encrypt_queue); + wg_packet_queue_free(&wg->handshake_queue, true); + wg_packet_queue_free(&wg->decrypt_queue, false); + wg_packet_queue_free(&wg->encrypt_queue, false); rcu_barrier(); /* Wait for all the peers to be actually freed. */ wg_ratelimiter_uninit(); memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); - skb_queue_purge(&wg->incoming_handshakes); free_percpu(dev->tstats); - free_percpu(wg->incoming_handshakes_worker); kvfree(wg->index_hashtable); kvfree(wg->peer_hashtable); mutex_unlock(&wg->device_update_lock); @@ -298,7 +300,6 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, init_rwsem(&wg->static_identity.lock); mutex_init(&wg->socket_update_lock); mutex_init(&wg->device_update_lock); - skb_queue_head_init(&wg->incoming_handshakes); wg_allowedips_init(&wg->peer_allowedips); wg_cookie_checker_init(&wg->cookie_checker, wg); INIT_LIST_HEAD(&wg->peer_list); @@ -316,16 +317,10 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, if (!dev->tstats) goto err_free_index_hashtable; - wg->incoming_handshakes_worker = - wg_packet_percpu_multicore_worker_alloc( - wg_packet_handshake_receive_worker, wg); - if (!wg->incoming_handshakes_worker) - goto err_free_tstats; - wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s", WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name); if (!wg->handshake_receive_wq) - goto err_free_incoming_handshakes; + goto err_free_tstats; wg->handshake_send_wq = alloc_workqueue("wg-kex-%s", WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name); @@ -347,10 +342,15 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, if (ret < 0) goto err_free_encrypt_queue; - ret = wg_ratelimiter_init(); + ret = wg_packet_queue_init(&wg->handshake_queue, wg_packet_handshake_receive_worker, + MAX_QUEUED_INCOMING_HANDSHAKES); if (ret < 0) goto err_free_decrypt_queue; + ret = wg_ratelimiter_init(); + if (ret < 0) + goto err_free_handshake_queue; + ret = register_netdevice(dev); if (ret < 0) goto err_uninit_ratelimiter; @@ -367,18 +367,18 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, err_uninit_ratelimiter: wg_ratelimiter_uninit(); +err_free_handshake_queue: + wg_packet_queue_free(&wg->handshake_queue, false); err_free_decrypt_queue: - wg_packet_queue_free(&wg->decrypt_queue); + wg_packet_queue_free(&wg->decrypt_queue, false); err_free_encrypt_queue: - wg_packet_queue_free(&wg->encrypt_queue); + wg_packet_queue_free(&wg->encrypt_queue, false); err_destroy_packet_crypt: destroy_workqueue(wg->packet_crypt_wq); err_destroy_handshake_send: destroy_workqueue(wg->handshake_send_wq); err_destroy_handshake_receive: destroy_workqueue(wg->handshake_receive_wq); -err_free_incoming_handshakes: - free_percpu(wg->incoming_handshakes_worker); err_free_tstats: free_percpu(dev->tstats); err_free_index_hashtable: diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h index 854bc3d97150..43c7cebbf50b 100644 --- a/drivers/net/wireguard/device.h +++ b/drivers/net/wireguard/device.h @@ -39,21 +39,18 @@ struct prev_queue { struct wg_device { struct net_device *dev; - struct crypt_queue encrypt_queue, decrypt_queue; + struct crypt_queue encrypt_queue, decrypt_queue, handshake_queue; struct sock __rcu *sock4, *sock6; struct net __rcu *creating_net; struct noise_static_identity static_identity; - struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; - struct workqueue_struct *packet_crypt_wq; - struct sk_buff_head incoming_handshakes; - int incoming_handshake_cpu; - struct multicore_worker __percpu *incoming_handshakes_worker; + struct workqueue_struct *packet_crypt_wq,*handshake_receive_wq, *handshake_send_wq; struct cookie_checker cookie_checker; struct pubkey_hashtable *peer_hashtable; struct index_hashtable *index_hashtable; struct allowedips peer_allowedips; struct mutex device_update_lock, socket_update_lock; struct list_head device_list, peer_list; + atomic_t handshake_queue_len; unsigned int num_peers, device_update_gen; u32 fwmark; u16 incoming_port; diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c index 48e7b982a307..1de413b19e34 100644 --- a/drivers/net/wireguard/queueing.c +++ b/drivers/net/wireguard/queueing.c @@ -38,11 +38,11 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, return 0; } -void wg_packet_queue_free(struct crypt_queue *queue) +void wg_packet_queue_free(struct crypt_queue *queue, bool purge) { free_percpu(queue->worker); - WARN_ON(!__ptr_ring_empty(&queue->ring)); - ptr_ring_cleanup(&queue->ring, NULL); + WARN_ON(!purge && !__ptr_ring_empty(&queue->ring)); + ptr_ring_cleanup(&queue->ring, purge ? (void(*)(void*))kfree_skb : NULL); } #define NEXT(skb) ((skb)->prev) diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index 4ef2944a68bc..e2388107f7fd 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -23,7 +23,7 @@ struct sk_buff; /* queueing.c APIs: */ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, unsigned int len); -void wg_packet_queue_free(struct crypt_queue *queue); +void wg_packet_queue_free(struct crypt_queue *queue, bool purge); struct multicore_worker __percpu * wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 7dc84bcca261..f4e537e3e8ec 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -116,8 +116,8 @@ static void wg_receive_handshake_packet(struct wg_device *wg, return; } - under_load = skb_queue_len(&wg->incoming_handshakes) >= - MAX_QUEUED_INCOMING_HANDSHAKES / 8; + under_load = atomic_read(&wg->handshake_queue_len) >= + MAX_QUEUED_INCOMING_HANDSHAKES / 8; if (under_load) { last_under_load = ktime_get_coarse_boottime_ns(); } else if (last_under_load) { @@ -212,13 +212,14 @@ static void wg_receive_handshake_packet(struct wg_device *wg, void wg_packet_handshake_receive_worker(struct work_struct *work) { - struct wg_device *wg = container_of(work, struct multicore_worker, - work)->ptr; + struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr; + struct wg_device *wg = container_of(queue, struct wg_device, handshake_queue); struct sk_buff *skb; - while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) { + while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { wg_receive_handshake_packet(wg, skb); dev_kfree_skb(skb); + atomic_dec(&wg->handshake_queue_len); cond_resched(); } } @@ -554,21 +555,17 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): { int cpu; - - if (skb_queue_len(&wg->incoming_handshakes) > - MAX_QUEUED_INCOMING_HANDSHAKES || - unlikely(!rng_is_initialized())) { + if (unlikely(!rng_is_initialized() || + ptr_ring_produce_bh(&wg->handshake_queue.ring, skb))) { net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n", wg->dev->name, skb); goto err; } - skb_queue_tail(&wg->incoming_handshakes, skb); - /* Queues up a call to packet_process_queued_handshake_ - * packets(skb): - */ - cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu); + atomic_inc(&wg->handshake_queue_len); + cpu = wg_cpumask_next_online(&wg->handshake_queue.last_cpu); + /* Queues up a call to packet_process_queued_handshake_packets(skb): */ queue_work_on(cpu, wg->handshake_receive_wq, - &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work); + &per_cpu_ptr(wg->handshake_queue.worker, cpu)->work); break; } case cpu_to_le32(MESSAGE_DATA): From fb32f4f606c17b869805d7cede8b03d78339b50a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:27 -0500 Subject: [PATCH 39/85] wireguard: receive: drop handshakes if queue lock is contended If we're being delivered packets from multiple CPUs so quickly that the ring lock is contended for CPU tries, then it's safe to assume that the queue is near capacity anyway, so just drop the packet rather than spinning. This helps deal with multicore DoS that can interfere with data path performance. It _still_ does not completely fix the issue, but it again chips away at it. Reported-by: Streun Fabio Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/receive.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index f4e537e3e8ec..7b8df406c773 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -554,9 +554,19 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): { - int cpu; - if (unlikely(!rng_is_initialized() || - ptr_ring_produce_bh(&wg->handshake_queue.ring, skb))) { + int cpu, ret = -EBUSY; + + if (unlikely(!rng_is_initialized())) + goto drop; + if (atomic_read(&wg->handshake_queue_len) > MAX_QUEUED_INCOMING_HANDSHAKES / 2) { + if (spin_trylock_bh(&wg->handshake_queue.ring.producer_lock)) { + ret = __ptr_ring_produce(&wg->handshake_queue.ring, skb); + spin_unlock_bh(&wg->handshake_queue.ring.producer_lock); + } + } else + ret = ptr_ring_produce_bh(&wg->handshake_queue.ring, skb); + if (ret) { + drop: net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n", wg->dev->name, skb); goto err; From 4e3fd721710553832460c179c2ee5ce67ef7f1e0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Nov 2021 10:39:28 -0500 Subject: [PATCH 40/85] wireguard: ratelimiter: use kvcalloc() instead of kvzalloc() Use 2-factor argument form kvcalloc() instead of kvzalloc(). Link: https://github.com/KSPP/linux/issues/162 Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Gustavo A. R. Silva [Jason: Gustavo's link above is for KSPP, but this isn't actually a security fix, as table_size is bounded to 8192 anyway, and gcc realizes this, so the codegen comes out to be about the same.] Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/ratelimiter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireguard/ratelimiter.c b/drivers/net/wireguard/ratelimiter.c index 3fedd1d21f5e..dd55e5c26f46 100644 --- a/drivers/net/wireguard/ratelimiter.c +++ b/drivers/net/wireguard/ratelimiter.c @@ -176,12 +176,12 @@ int wg_ratelimiter_init(void) (1U << 14) / sizeof(struct hlist_head))); max_entries = table_size * 8; - table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL); + table_v4 = kvcalloc(table_size, sizeof(*table_v4), GFP_KERNEL); if (unlikely(!table_v4)) goto err_kmemcache; #if IS_ENABLED(CONFIG_IPV6) - table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL); + table_v6 = kvcalloc(table_size, sizeof(*table_v6), GFP_KERNEL); if (unlikely(!table_v6)) { kvfree(table_v4); goto err_kmemcache; From f7e5b9bfa6c8820407b64eabc1f29c9a87e8993d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 29 Nov 2021 10:39:29 -0500 Subject: [PATCH 41/85] siphash: use _unaligned version by default On ARM v6 and later, we define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS because the ordinary load/store instructions (ldr, ldrh, ldrb) can tolerate any misalignment of the memory address. However, load/store double and load/store multiple instructions (ldrd, ldm) may still only be used on memory addresses that are 32-bit aligned, and so we have to use the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS macro with care, or we may end up with a severe performance hit due to alignment traps that require fixups by the kernel. Testing shows that this currently happens with clang-13 but not gcc-11. In theory, any compiler version can produce this bug or other problems, as we are dealing with undefined behavior in C99 even on architectures that support this in hardware, see also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100363. Fortunately, the get_unaligned() accessors do the right thing: when building for ARMv6 or later, the compiler will emit unaligned accesses using the ordinary load/store instructions (but avoid the ones that require 32-bit alignment). When building for older ARM, those accessors will emit the appropriate sequence of ldrb/mov/orr instructions. And on architectures that can truly tolerate any kind of misalignment, the get_unaligned() accessors resolve to the leXX_to_cpup accessors that operate on aligned addresses. Since the compiler will in fact emit ldrd or ldm instructions when building this code for ARM v6 or later, the solution is to use the unaligned accessors unconditionally on architectures where this is known to be fast. The _aligned version of the hash function is however still needed to get the best performance on architectures that cannot do any unaligned access in hardware. This new version avoids the undefined behavior and should produce the fastest hash on all architectures we support. Link: https://lore.kernel.org/linux-arm-kernel/20181008211554.5355-4-ard.biesheuvel@linaro.org/ Link: https://lore.kernel.org/linux-crypto/CAK8P3a2KfmmGDbVHULWevB0hv71P2oi2ZCHEAqT=8dQfa0=cqQ@mail.gmail.com/ Reported-by: Ard Biesheuvel Fixes: 2c956a60778c ("siphash: add cryptographically secure PRF") Signed-off-by: Arnd Bergmann Reviewed-by: Jason A. Donenfeld Acked-by: Ard Biesheuvel Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- include/linux/siphash.h | 14 ++++---------- lib/siphash.c | 12 ++++++------ 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/include/linux/siphash.h b/include/linux/siphash.h index bf21591a9e5e..0cda61855d90 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key) } u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); -#endif u64 siphash_1u64(const u64 a, const siphash_key_t *key); u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); @@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len, static inline u64 siphash(const void *data, size_t len, const siphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) return __siphash_unaligned(data, len, key); -#endif return ___siphash_aligned(data, len, key); } @@ -96,10 +93,8 @@ typedef struct { u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key); -#endif u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); @@ -135,10 +130,9 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, static inline u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) return __hsiphash_unaligned(data, len, key); -#endif return ___hsiphash_aligned(data, len, key); } diff --git a/lib/siphash.c b/lib/siphash.c index a90112ee72a1..72b9068ab57b 100644 --- a/lib/siphash.c +++ b/lib/siphash.c @@ -49,6 +49,7 @@ SIPROUND; \ return (v0 ^ v1) ^ (v2 ^ v3); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); @@ -80,8 +81,8 @@ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) POSTAMBLE } EXPORT_SYMBOL(__siphash_aligned); +#endif -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); @@ -113,7 +114,6 @@ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) POSTAMBLE } EXPORT_SYMBOL(__siphash_unaligned); -#endif /** * siphash_1u64 - compute 64-bit siphash PRF value of a u64 @@ -250,6 +250,7 @@ EXPORT_SYMBOL(siphash_3u32); HSIPROUND; \ return (v0 ^ v1) ^ (v2 ^ v3); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); @@ -280,8 +281,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_aligned); +#endif -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key) { @@ -313,7 +314,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len, HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_unaligned); -#endif /** * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32 @@ -418,6 +418,7 @@ EXPORT_SYMBOL(hsiphash_4u32); HSIPROUND; \ return v1 ^ v3; +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u32)); @@ -438,8 +439,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_aligned); +#endif -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key) { @@ -461,7 +462,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len, HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_unaligned); -#endif /** * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32 From 1a59c9c55585e1ec5b352d31b3f8402f196eae94 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 29 Nov 2021 15:16:52 +0000 Subject: [PATCH 42/85] net: mscc: ocelot: fix missing unlock on error in ocelot_hwstamp_set() Add the missing mutex_unlock before return from function ocelot_hwstamp_set() in the ocelot_setup_ptp_traps() error handling case. Fixes: 96ca08c05838 ("net: mscc: ocelot: set up traps for PTP packets") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20211129151652.1165433-1-weiyongjun1@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 409cde1e59c6..1e4ad953cffb 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1563,8 +1563,10 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) } err = ocelot_setup_ptp_traps(ocelot, port, l2, l4); - if (err) + if (err) { + mutex_unlock(&ocelot->ptp_lock); return err; + } if (l2 && l4) cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; From b83f5ac7d922e69a109261f5f940eebbd4e514c4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 29 Nov 2021 22:53:27 +0100 Subject: [PATCH 43/85] net: marvell: mvpp2: Fix the computation of shared CPUs 'bitmap_fill()' fills a bitmap one 'long' at a time. It is likely that an exact number of bits is expected. Use 'bitmap_set()' instead in order not to set unexpected bits. Fixes: e531f76757eb ("net: mvpp2: handle cases where more CPUs are available than s/w threads") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index ce486e16489c..6480696c979b 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -7458,7 +7458,7 @@ static int mvpp2_probe(struct platform_device *pdev) shared = num_present_cpus() - priv->nthreads; if (shared > 0) - bitmap_fill(&priv->lock_map, + bitmap_set(&priv->lock_map, 0, min_t(int, shared, MVPP2_MAX_THREADS)); for (i = 0; i < MVPP2_MAX_THREADS; i++) { From d1ec975f9fa6d2211c1f403010361034a87e317f Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 29 Nov 2021 15:17:46 -0800 Subject: [PATCH 44/85] ice: xsk: clear status_error0 for each allocated desc Fix a bug in which the receiving of packets can stop in the zero-copy driver. Ice HW ignores 3 lower bits from QRX_TAIL register, which means that tail is bumped only on intervals of 8. Currently with XSK RX batching in place, ice_alloc_rx_bufs_zc() clears the status_error0 only of the last descriptor that has been allocated/taken from the XSK buffer pool. status_error0 includes DD bit that is looked upon by the ice_clean_rx_irq_zc() to tell if a descriptor can be processed. The bug can be triggered when driver updates the ntu but not the QRX_TAIL, so HW wouldn't have a chance to write to the ready descriptors. Later on driver moves the ntc to the mentioned set of descriptors and interprets them as a ready to be processed, since corresponding DD bits were not cleared nor any writeback has happened that would clear it. This can then lead to ntc == ntu case which means that ring is empty and no further packet processing. Fix the XSK traffic hang that can be observed when l2fwd scenario from xdpsock is used by making sure that status_error0 is cleared for each descriptor that is fed to HW and therefore we are sure that driver will not processed non-valid DD bits. This will also prevent the driver from processing the descriptors that were allocated in favor of the previously processed ones, but writeback didn't happen yet. Fixes: db804cfc21e9 ("ice: Use the xsk batched rx allocation interface") Signed-off-by: Maciej Fijalkowski Reviewed-by: Alexander Lobakin Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_xsk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index ff55cb415b11..bb9a80847298 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -383,6 +383,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) while (i--) { dma = xsk_buff_xdp_get_dma(*xdp); rx_desc->read.pkt_addr = cpu_to_le64(dma); + rx_desc->wb.status_error0 = 0; rx_desc++; xdp++; From f4a8adbfe4841491b60c14fe610571e1422359f9 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Tue, 30 Nov 2021 12:05:54 +0800 Subject: [PATCH 45/85] dpaa2-eth: destroy workqueue at the end of remove function The commit c55211892f46 ("dpaa2-eth: support PTP Sync packet one-step timestamping") forgets to destroy workqueue at the end of remove function. Fix this by adding destroy_workqueue before fsl_mc_portal_free and free_netdev. Fixes: c55211892f46 ("dpaa2-eth: support PTP Sync packet one-step timestamping") Signed-off-by: Dongliang Mu Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 6451c8383639..8e643567abce 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -4550,6 +4550,8 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) fsl_mc_portal_free(priv->mc_io); + destroy_workqueue(priv->dpaa2_ptp_wq); + dev_dbg(net_dev->dev.parent, "Removed interface %s\n", net_dev->name); free_netdev(net_dev); From 34d8778a943761121f391b7921f79a7adbe1feaf Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Tue, 30 Nov 2021 08:33:58 +0100 Subject: [PATCH 46/85] MAINTAINERS: s390/net: add Alexandra and Wenjia as maintainer Add Alexandra and Wenjia as maintainers for drivers/s390/net and iucv. Also, remove myself as maintainer for these areas. Signed-off-by: Karsten Graul Acked-by: Alexandra Winter Acked-by: Wenjia Zhang Signed-off-by: David S. Miller --- MAINTAINERS | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 360e9aa0205d..43d8fac7fb7c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16623,7 +16623,8 @@ F: drivers/iommu/s390-iommu.c S390 IUCV NETWORK LAYER M: Julian Wiedmann -M: Karsten Graul +M: Alexandra Winter +M: Wenjia Zhang L: linux-s390@vger.kernel.org L: netdev@vger.kernel.org S: Supported @@ -16634,7 +16635,8 @@ F: net/iucv/ S390 NETWORK DRIVERS M: Julian Wiedmann -M: Karsten Graul +M: Alexandra Winter +M: Wenjia Zhang L: linux-s390@vger.kernel.org L: netdev@vger.kernel.org S: Supported From f123cffdd8fe8ea6c7fded4b88516a42798797d0 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 29 Nov 2021 09:53:27 -0800 Subject: [PATCH 47/85] net: netlink: af_netlink: Prevent empty skb by adding a check on len. Adding a check on len parameter to avoid empty skb. This prevents a division error in netem_enqueue function which is caused when skb->len=0 and skb->data_len=0 in the randomized corruption step as shown below. skb->data[prandom_u32() % skb_headlen(skb)] ^= 1<<(prandom_u32() % 8); Crash Report: [ 343.170349] netdevsim netdevsim0 netdevsim3: set [1, 0] type 2 family 0 port 6081 - 0 [ 343.216110] netem: version 1.3 [ 343.235841] divide error: 0000 [#1] PREEMPT SMP KASAN NOPTI [ 343.236680] CPU: 3 PID: 4288 Comm: reproducer Not tainted 5.16.0-rc1+ [ 343.237569] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 [ 343.238707] RIP: 0010:netem_enqueue+0x1590/0x33c0 [sch_netem] [ 343.239499] Code: 89 85 58 ff ff ff e8 5f 5d e9 d3 48 8b b5 48 ff ff ff 8b 8d 50 ff ff ff 8b 85 58 ff ff ff 48 8b bd 70 ff ff ff 31 d2 2b 4f 74 f1 48 b8 00 00 00 00 00 fc ff df 49 01 d5 4c 89 e9 48 c1 e9 03 [ 343.241883] RSP: 0018:ffff88800bcd7368 EFLAGS: 00010246 [ 343.242589] RAX: 00000000ba7c0a9c RBX: 0000000000000001 RCX: 0000000000000000 [ 343.243542] RDX: 0000000000000000 RSI: ffff88800f8edb10 RDI: ffff88800f8eda40 [ 343.244474] RBP: ffff88800bcd7458 R08: 0000000000000000 R09: ffffffff94fb8445 [ 343.245403] R10: ffffffff94fb8336 R11: ffffffff94fb8445 R12: 0000000000000000 [ 343.246355] R13: ffff88800a5a7000 R14: ffff88800a5b5800 R15: 0000000000000020 [ 343.247291] FS: 00007fdde2bd7700(0000) GS:ffff888109780000(0000) knlGS:0000000000000000 [ 343.248350] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 343.249120] CR2: 00000000200000c0 CR3: 000000000ef4c000 CR4: 00000000000006e0 [ 343.250076] Call Trace: [ 343.250423] [ 343.250713] ? memcpy+0x4d/0x60 [ 343.251162] ? netem_init+0xa0/0xa0 [sch_netem] [ 343.251795] ? __sanitizer_cov_trace_pc+0x21/0x60 [ 343.252443] netem_enqueue+0xe28/0x33c0 [sch_netem] [ 343.253102] ? stack_trace_save+0x87/0xb0 [ 343.253655] ? filter_irq_stacks+0xb0/0xb0 [ 343.254220] ? netem_init+0xa0/0xa0 [sch_netem] [ 343.254837] ? __kasan_check_write+0x14/0x20 [ 343.255418] ? _raw_spin_lock+0x88/0xd6 [ 343.255953] dev_qdisc_enqueue+0x50/0x180 [ 343.256508] __dev_queue_xmit+0x1a7e/0x3090 [ 343.257083] ? netdev_core_pick_tx+0x300/0x300 [ 343.257690] ? check_kcov_mode+0x10/0x40 [ 343.258219] ? _raw_spin_unlock_irqrestore+0x29/0x40 [ 343.258899] ? __kasan_init_slab_obj+0x24/0x30 [ 343.259529] ? setup_object.isra.71+0x23/0x90 [ 343.260121] ? new_slab+0x26e/0x4b0 [ 343.260609] ? kasan_poison+0x3a/0x50 [ 343.261118] ? kasan_unpoison+0x28/0x50 [ 343.261637] ? __kasan_slab_alloc+0x71/0x90 [ 343.262214] ? memcpy+0x4d/0x60 [ 343.262674] ? write_comp_data+0x2f/0x90 [ 343.263209] ? __kasan_check_write+0x14/0x20 [ 343.263802] ? __skb_clone+0x5d6/0x840 [ 343.264329] ? __sanitizer_cov_trace_pc+0x21/0x60 [ 343.264958] dev_queue_xmit+0x1c/0x20 [ 343.265470] netlink_deliver_tap+0x652/0x9c0 [ 343.266067] netlink_unicast+0x5a0/0x7f0 [ 343.266608] ? netlink_attachskb+0x860/0x860 [ 343.267183] ? __sanitizer_cov_trace_pc+0x21/0x60 [ 343.267820] ? write_comp_data+0x2f/0x90 [ 343.268367] netlink_sendmsg+0x922/0xe80 [ 343.268899] ? netlink_unicast+0x7f0/0x7f0 [ 343.269472] ? __sanitizer_cov_trace_pc+0x21/0x60 [ 343.270099] ? write_comp_data+0x2f/0x90 [ 343.270644] ? netlink_unicast+0x7f0/0x7f0 [ 343.271210] sock_sendmsg+0x155/0x190 [ 343.271721] ____sys_sendmsg+0x75f/0x8f0 [ 343.272262] ? kernel_sendmsg+0x60/0x60 [ 343.272788] ? write_comp_data+0x2f/0x90 [ 343.273332] ? write_comp_data+0x2f/0x90 [ 343.273869] ___sys_sendmsg+0x10f/0x190 [ 343.274405] ? sendmsg_copy_msghdr+0x80/0x80 [ 343.274984] ? slab_post_alloc_hook+0x70/0x230 [ 343.275597] ? futex_wait_setup+0x240/0x240 [ 343.276175] ? security_file_alloc+0x3e/0x170 [ 343.276779] ? write_comp_data+0x2f/0x90 [ 343.277313] ? __sanitizer_cov_trace_pc+0x21/0x60 [ 343.277969] ? write_comp_data+0x2f/0x90 [ 343.278515] ? __fget_files+0x1ad/0x260 [ 343.279048] ? __sanitizer_cov_trace_pc+0x21/0x60 [ 343.279685] ? write_comp_data+0x2f/0x90 [ 343.280234] ? __sanitizer_cov_trace_pc+0x21/0x60 [ 343.280874] ? sockfd_lookup_light+0xd1/0x190 [ 343.281481] __sys_sendmsg+0x118/0x200 [ 343.281998] ? __sys_sendmsg_sock+0x40/0x40 [ 343.282578] ? alloc_fd+0x229/0x5e0 [ 343.283070] ? write_comp_data+0x2f/0x90 [ 343.283610] ? write_comp_data+0x2f/0x90 [ 343.284135] ? __sanitizer_cov_trace_pc+0x21/0x60 [ 343.284776] ? ktime_get_coarse_real_ts64+0xb8/0xf0 [ 343.285450] __x64_sys_sendmsg+0x7d/0xc0 [ 343.285981] ? syscall_enter_from_user_mode+0x4d/0x70 [ 343.286664] do_syscall_64+0x3a/0x80 [ 343.287158] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 343.287850] RIP: 0033:0x7fdde24cf289 [ 343.288344] Code: 01 00 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d b7 db 2c 00 f7 d8 64 89 01 48 [ 343.290729] RSP: 002b:00007fdde2bd6d98 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 343.291730] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fdde24cf289 [ 343.292673] RDX: 0000000000000000 RSI: 00000000200000c0 RDI: 0000000000000004 [ 343.293618] RBP: 00007fdde2bd6e20 R08: 0000000100000001 R09: 0000000000000000 [ 343.294557] R10: 0000000100000001 R11: 0000000000000246 R12: 0000000000000000 [ 343.295493] R13: 0000000000021000 R14: 0000000000000000 R15: 00007fdde2bd7700 [ 343.296432] [ 343.296735] Modules linked in: sch_netem ip6_vti ip_vti ip_gre ipip sit ip_tunnel geneve macsec macvtap tap ipvlan macvlan 8021q garp mrp hsr wireguard libchacha20poly1305 chacha_x86_64 poly1305_x86_64 ip6_udp_tunnel udp_tunnel libblake2s blake2s_x86_64 libblake2s_generic curve25519_x86_64 libcurve25519_generic libchacha xfrm_interface xfrm6_tunnel tunnel4 veth netdevsim psample batman_adv nlmon dummy team bonding tls vcan ip6_gre ip6_tunnel tunnel6 gre tun ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set ebtable_nat ebtable_broute ip6table_nat ip6table_mangle ip6table_security ip6table_raw iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_mangle iptable_security iptable_raw ebtable_filter ebtables rfkill ip6table_filter ip6_tables iptable_filter ppdev bochs drm_vram_helper drm_ttm_helper ttm drm_kms_helper cec parport_pc drm joydev floppy parport sg syscopyarea sysfillrect sysimgblt i2c_piix4 qemu_fw_cfg fb_sys_fops pcspkr [ 343.297459] ip_tables xfs virtio_net net_failover failover sd_mod sr_mod cdrom t10_pi ata_generic pata_acpi ata_piix libata virtio_pci virtio_pci_legacy_dev serio_raw virtio_pci_modern_dev dm_mirror dm_region_hash dm_log dm_mod [ 343.311074] Dumping ftrace buffer: [ 343.311532] (ftrace buffer empty) [ 343.312040] ---[ end trace a2e3db5a6ae05099 ]--- [ 343.312691] RIP: 0010:netem_enqueue+0x1590/0x33c0 [sch_netem] [ 343.313481] Code: 89 85 58 ff ff ff e8 5f 5d e9 d3 48 8b b5 48 ff ff ff 8b 8d 50 ff ff ff 8b 85 58 ff ff ff 48 8b bd 70 ff ff ff 31 d2 2b 4f 74 f1 48 b8 00 00 00 00 00 fc ff df 49 01 d5 4c 89 e9 48 c1 e9 03 [ 343.315893] RSP: 0018:ffff88800bcd7368 EFLAGS: 00010246 [ 343.316622] RAX: 00000000ba7c0a9c RBX: 0000000000000001 RCX: 0000000000000000 [ 343.317585] RDX: 0000000000000000 RSI: ffff88800f8edb10 RDI: ffff88800f8eda40 [ 343.318549] RBP: ffff88800bcd7458 R08: 0000000000000000 R09: ffffffff94fb8445 [ 343.319503] R10: ffffffff94fb8336 R11: ffffffff94fb8445 R12: 0000000000000000 [ 343.320455] R13: ffff88800a5a7000 R14: ffff88800a5b5800 R15: 0000000000000020 [ 343.321414] FS: 00007fdde2bd7700(0000) GS:ffff888109780000(0000) knlGS:0000000000000000 [ 343.322489] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 343.323283] CR2: 00000000200000c0 CR3: 000000000ef4c000 CR4: 00000000000006e0 [ 343.324264] Kernel panic - not syncing: Fatal exception in interrupt [ 343.333717] Dumping ftrace buffer: [ 343.334175] (ftrace buffer empty) [ 343.334653] Kernel Offset: 0x13600000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 343.336027] Rebooting in 86400 seconds.. Reported-by: syzkaller Signed-off-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20211129175328.55339-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4c575324a985..9eba2e648385 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1852,6 +1852,11 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; + if (len == 0) { + pr_warn_once("Zero length message leads to an empty skb\n"); + return -ENODATA; + } + err = scm_send(sock, msg, &scm, true); if (err < 0) return err; From b0f38e15979fa8851e88e8aa371367f264e7b6e9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 29 Nov 2021 22:39:47 -0800 Subject: [PATCH 48/85] natsemi: xtensa: fix section mismatch warnings Fix section mismatch warnings in xtsonic. The first one appears to be bogus and after fixing the second one, the first one is gone. WARNING: modpost: vmlinux.o(.text+0x529adc): Section mismatch in reference from the function sonic_get_stats() to the function .init.text:set_reset_devices() The function sonic_get_stats() references the function __init set_reset_devices(). This is often because sonic_get_stats lacks a __init annotation or the annotation of set_reset_devices is wrong. WARNING: modpost: vmlinux.o(.text+0x529b3b): Section mismatch in reference from the function xtsonic_probe() to the function .init.text:sonic_probe1() The function xtsonic_probe() references the function __init sonic_probe1(). This is often because xtsonic_probe lacks a __init annotation or the annotation of sonic_probe1 is wrong. Fixes: 74f2a5f0ef64 ("xtensa: Add support for the Sonic Ethernet device for the XT2000 board.") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Christophe JAILLET Cc: Finn Thain Cc: Chris Zankel Cc: linux-xtensa@linux-xtensa.org Cc: Thomas Bogendoerfer Acked-by: Max Filippov Link: https://lore.kernel.org/r/20211130063947.7529-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/natsemi/xtsonic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index ca4686094701..0a02d8bd0a3e 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -120,7 +120,7 @@ static const struct net_device_ops xtsonic_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, }; -static int __init sonic_probe1(struct net_device *dev) +static int sonic_probe1(struct net_device *dev) { unsigned int silicon_revision; struct sonic_local *lp = netdev_priv(dev); From c65d638ab39034cbaa36773b980d28106cfc81fa Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Wed, 17 Nov 2021 13:33:57 +0200 Subject: [PATCH 49/85] net/mlx5e: IPsec: Fix Software parser inner l3 type setting in case of encapsulation Current code wrongly uses the skb->protocol field which reflects the outer l3 protocol to set the inner l3 type in Software Parser (SWP) fields settings in the ethernet segment (eseg) in flows where inner l3 exists like in Vxlan over ESP flow, the above method wrongly use the outer protocol type instead of the inner one. thus breaking cases where inner and outer headers have different protocols. Fix by setting the inner l3 type in SWP according to the inner l3 ip header version. Fixes: 2ac9cfe78223 ("net/mlx5e: IPSec, Add Innova IPSec offload TX data path") Signed-off-by: Raed Salem Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index fb5397324aa4..2db9573a3fe6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -191,7 +191,7 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb, eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; eseg->swp_inner_l4_offset = (skb->csum_start + skb->head - skb->data) / 2; - if (skb->protocol == htons(ETH_P_IPV6)) + if (inner_ip_hdr(skb)->version == 6) eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; break; default: From 51ebf5db67f5c6aed79c05f1aa5137bdf5ca6614 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Thu, 8 Jul 2021 12:48:24 +0300 Subject: [PATCH 50/85] net/mlx5e: Fix missing IPsec statistics on uplink representor The cited patch added the IPsec support to uplink representor, however as uplink representors have his private statistics where IPsec stats is not part of it, that effectively makes IPsec stats hidden when uplink representor stats queried. Resolve by adding IPsec stats to uplink representor private statistics. Fixes: 5589b8f1a2c7 ("net/mlx5e: Add IPsec support to uplink representor") Signed-off-by: Raed Salem Reviewed-by: Alaa Hleihel Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index e58a9ec42553..48895d79796a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1080,6 +1080,10 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = { &MLX5E_STATS_GRP(pme), &MLX5E_STATS_GRP(channels), &MLX5E_STATS_GRP(per_port_buff_congest), +#ifdef CONFIG_MLX5_EN_IPSEC + &MLX5E_STATS_GRP(ipsec_sw), + &MLX5E_STATS_GRP(ipsec_hw), +#endif }; static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv) From 4cce2ccf08fbc27ae34ce0e72db15166e7b5f6a7 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 13 Sep 2021 13:54:30 +0300 Subject: [PATCH 51/85] net/mlx5e: Sync TIR params updates against concurrent create/modify Transport Interface Receive (TIR) objects perform the packet processing and reassembly and is also responsible for demultiplexing the packets into the different RQs. There are certain TIR context attributes that propagate to the pointed RQs and applied to them (like packet_merge offloads (LRO/SHAMPO) and tunneled_offload_en). When TIRs do not agree on attributes values, a "last one wins" policy is applied. Hence, if not synced properly, a race between TIR params update and a concurrent TIR create/modify operation might yield to a mismatch between the shadow parameters in SW and the actual applied state of the RQs in HW. tunneled_offload_en is a fixed attribute per profile, while packet merge offload state might be toggled and get out-of-sync. When this happens, packet_merge offload might be working although not requested, or the opposite. All updates to packet_merge state and all create/modify operations of regular redirection/steering TIRs are done under the same priv->state_lock, so they do not run in parallel, and no race is possible. However, there are other kind of TIRs (acceleration offloads TIRs, like TLS TIRs) which are created on demand for each new connection without holding the coarse priv->state_lock, hence might race. Fix this by synchronizing all packet_merge state reads and writes against all TIR create/modify operations. Include the modify operations of the regular redirection steering TIRs under the new lock, for better code layering and division of responsibilities. Fixes: 1182f3659357 ("net/mlx5e: kTLS, Add kTLS RX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/rx_res.c | 41 ++++++++++++++++++- .../ethernet/mellanox/mlx5/core/en/rx_res.h | 6 +-- .../mellanox/mlx5/core/en_accel/ktls_rx.c | 24 +---------- 3 files changed, 44 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index 142953847996..0015a81eb9a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -13,6 +13,9 @@ struct mlx5e_rx_res { unsigned int max_nch; u32 drop_rqn; + struct mlx5e_packet_merge_param pkt_merge_param; + struct rw_semaphore pkt_merge_param_sem; + struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS]; bool rss_active; u32 rss_rqns[MLX5E_INDIR_RQT_SIZE]; @@ -392,6 +395,7 @@ static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res) if (err) goto out; + /* Separated from the channels RQs, does not share pkt_merge state with them */ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, mlx5e_rqt_get_rqtn(&res->ptp.rqt), inner_ft_support); @@ -447,6 +451,9 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, res->max_nch = max_nch; res->drop_rqn = drop_rqn; + res->pkt_merge_param = *init_pkt_merge_param; + init_rwsem(&res->pkt_merge_param_sem); + err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch); if (err) goto err_out; @@ -513,7 +520,7 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res) return mlx5e_tir_get_tirn(&res->ptp.tir); } -u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) +static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) { return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); } @@ -656,6 +663,9 @@ int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, if (!builder) return -ENOMEM; + down_write(&res->pkt_merge_param_sem); + res->pkt_merge_param = *pkt_merge_param; + mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param); final_err = 0; @@ -681,6 +691,7 @@ int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, } } + up_write(&res->pkt_merge_param_sem); mlx5e_tir_builder_free(builder); return final_err; } @@ -689,3 +700,31 @@ struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res * { return mlx5e_rss_get_hash(res->rss[0]); } + +int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq, + struct mlx5e_tir *tir) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + u32 rqtn; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + rqtn = mlx5e_rx_res_get_rqtn_direct(res, rxq); + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, rqtn, + inner_ft_support); + mlx5e_tir_builder_build_direct(builder); + mlx5e_tir_builder_build_tls(builder); + down_read(&res->pkt_merge_param_sem); + mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); + err = mlx5e_tir_init(tir, builder, res->mdev, false); + up_read(&res->pkt_merge_param_sem); + + mlx5e_tir_builder_free(builder); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index d09f7d174a51..b39b20a720e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -37,9 +37,6 @@ u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); -/* RQTN getters for modules that create their own TIRs */ -u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix); - /* Activate/deactivate API */ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res); @@ -69,4 +66,7 @@ struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx); /* Workaround for hairpin */ struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res); +/* Accel TIRs */ +int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq, + struct mlx5e_tir *tir); #endif /* __MLX5_EN_RX_RES_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index a2a9f68579dd..15711814d2d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -100,25 +100,6 @@ mlx5e_ktls_rx_resync_create_resp_list(void) return resp_list; } -static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 rqtn) -{ - struct mlx5e_tir_builder *builder; - int err; - - builder = mlx5e_tir_builder_alloc(false); - if (!builder) - return -ENOMEM; - - mlx5e_tir_builder_build_rqt(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqtn, false); - mlx5e_tir_builder_build_direct(builder); - mlx5e_tir_builder_build_tls(builder); - err = mlx5e_tir_init(tir, builder, mdev, false); - - mlx5e_tir_builder_free(builder); - - return err; -} - static void accel_rule_handle_work(struct work_struct *work) { struct mlx5e_ktls_offload_context_rx *priv_rx; @@ -609,7 +590,6 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, struct mlx5_core_dev *mdev; struct mlx5e_priv *priv; int rxq, err; - u32 rqtn; tls_ctx = tls_get_ctx(sk); priv = netdev_priv(netdev); @@ -635,9 +615,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, priv_rx->sw_stats = &priv->tls->sw_stats; mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx); - rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq); - - err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn); + err = mlx5e_rx_res_tls_tir_create(priv->rx_res, rxq, &priv_rx->tir); if (err) goto err_create_tir; From e45c0b34493c24eeeebf89f63a5293aac7728ed7 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 5 Nov 2021 15:03:20 +0200 Subject: [PATCH 52/85] net/mlx5: Move MODIFY_RQT command to ignore list in internal error state When the device is in internal error state, command interface isn't accessible and the driver decides which commands to fail and which to ignore. Move the MODIFY_RQT command to the ignore list in order to avoid the following redundant warning messages in internal error state: mlx5_core 0000:82:00.1: mlx5e_rss_disable:419:(pid 23754): Failed to redirect RQT 0x0 to drop RQ 0xc00848: err = -5 mlx5_core 0000:82:00.1: mlx5e_rx_res_channels_deactivate:598:(pid 23754): Failed to redirect direct RQT 0x1 to drop RQ 0xc00848 (channel 0): err = -5 mlx5_core 0000:82:00.1: mlx5e_rx_res_channels_deactivate:607:(pid 23754): Failed to redirect XSK RQT 0x19 to drop RQ 0xc00848 (channel 0): err = -5 Fixes: 43ec0f41fa73 ("net/mlx5e: Hide all implementation details of mlx5e_rx_res") Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 8eaa24d865c5..a46284ca5172 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -341,6 +341,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DEALLOC_SF: case MLX5_CMD_OP_DESTROY_UCTX: case MLX5_CMD_OP_DESTROY_UMEM: + case MLX5_CMD_OP_MODIFY_RQT: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -446,7 +447,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_MODIFY_TIS: case MLX5_CMD_OP_QUERY_TIS: case MLX5_CMD_OP_CREATE_RQT: - case MLX5_CMD_OP_MODIFY_RQT: case MLX5_CMD_OP_QUERY_RQT: case MLX5_CMD_OP_CREATE_FLOW_TABLE: From ffdf45315226926e5ae5faf0ff76caca68f6d39c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 18 Nov 2021 12:29:15 +0200 Subject: [PATCH 53/85] net/mlx5: Lag, Fix recreation of VF LAG Driver needs to nullify the port select attributes of the LAG when port selection is destroyed, otherwise it breaks recreation of the LAG. It fixes the below kernel oops: [ 587.906377] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 587.908843] #PF: supervisor read access in kernel mode [ 587.910730] #PF: error_code(0x0000) - not-present page [ 587.912580] PGD 0 P4D 0 [ 587.913632] Oops: 0000 [#1] SMP PTI [ 587.914644] CPU: 5 PID: 165 Comm: kworker/u20:5 Tainted: G OE 5.9.0_mlnx #1 [ 587.916152] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 587.918332] Workqueue: mlx5_lag mlx5_do_bond_work [mlx5_core] [ 587.919479] RIP: 0010:mlx5_del_flow_rules+0x10/0x270 [mlx5_core] [ 587.920568] mlx5_core 0000:08:00.1 enp8s0f1: Link up [ 587.920680] Code: c0 09 80 a0 e8 cf 42 a4 e0 48 c7 c3 f4 ff ff ff e8 8a 88 dd e0 e9 ab fe ff ff 0f 1f 44 00 00 41 56 41 55 49 89 fd 41 54 55 53 <48> 8b 47 08 48 8b 68 28 48 85 ed 74 2e 48 8d 7d 38 e8 6a 64 34 e1 [ 587.925116] bond0: (slave enp8s0f1): Enslaving as an active interface with an up link [ 587.930415] RSP: 0018:ffffc9000048fd88 EFLAGS: 00010282 [ 587.930417] RAX: ffff88846c14fac0 RBX: ffff88846cddcb80 RCX: 0000000080400007 [ 587.930417] RDX: 0000000080400008 RSI: ffff88846cddcb80 RDI: 0000000000000000 [ 587.930419] RBP: ffff88845fd80140 R08: 0000000000000001 R09: ffffffffa074ba00 [ 587.938132] R10: ffff88846c14fec0 R11: 0000000000000001 R12: ffff88846c122f10 [ 587.939473] R13: 0000000000000000 R14: 0000000000000001 R15: ffff88846d7a0000 [ 587.940800] FS: 0000000000000000(0000) GS:ffff88846fa80000(0000) knlGS:0000000000000000 [ 587.942416] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 587.943536] CR2: 0000000000000008 CR3: 000000000240a002 CR4: 0000000000770ee0 [ 587.944904] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 587.946308] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 587.947639] PKRU: 55555554 [ 587.948236] Call Trace: [ 587.948834] mlx5_lag_destroy_definer.isra.3+0x16/0x90 [mlx5_core] [ 587.950033] mlx5_lag_destroy_definers+0x5b/0x80 [mlx5_core] [ 587.951128] mlx5_deactivate_lag+0x6e/0x80 [mlx5_core] [ 587.952146] mlx5_do_bond+0x150/0x450 [mlx5_core] [ 587.953086] mlx5_do_bond_work+0x3e/0x50 [mlx5_core] [ 587.954086] process_one_work+0x1eb/0x3e0 [ 587.954899] worker_thread+0x2d/0x3c0 [ 587.955656] ? process_one_work+0x3e0/0x3e0 [ 587.956493] kthread+0x115/0x130 [ 587.957174] ? kthread_park+0x90/0x90 [ 587.957929] ret_from_fork+0x1f/0x30 [ 587.973055] ---[ end trace 71ccd6eca89f5513 ]--- Fixes: b7267869e923 ("net/mlx5: Lag, add support to create/destroy/modify port selection") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index ad63dd45c8fb..a6592f9c3c05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -608,4 +608,5 @@ void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev) if (port_sel->tunnel) mlx5_destroy_ttc_table(port_sel->inner.ttc); mlx5_lag_destroy_definers(ldev); + memset(port_sel, 0, sizeof(*port_sel)); } From 1e59b32e45e47c8ea5455182286ba010bfa87813 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Tue, 21 Sep 2021 15:47:33 +0300 Subject: [PATCH 54/85] net/mlx5: E-switch, Respect BW share of the new group To enable transmit schduler on vport FW require non-zero configuration for vport's TSAR. If vport added to the group which has configured BW share value and TX rate values of the vport are zero, then scheduler wouldn't be enabled on this vport. Fix that by calling BW normalization if BW share of the new group is configured. Fixes: 0fe132eac38c ("net/mlx5: E-switch, Allow to add vports to rate groups") Signed-off-by: Dmytro Linkin Reviewed-by: Roi Dayan Reviewed-by: Parav Pandit Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index c6cc67cb4f6a..4501e3d737f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -423,7 +423,7 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, return err; /* Recalculate bw share weights of old and new groups */ - if (vport->qos.bw_share) { + if (vport->qos.bw_share || new_group->bw_share) { esw_qos_normalize_vports_min_rate(esw, curr_group, extack); esw_qos_normalize_vports_min_rate(esw, new_group, extack); } From 43a0696f11567278b9412f947e43dd7906c831a8 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 21 Oct 2021 12:46:17 +0000 Subject: [PATCH 55/85] net/mlx5: E-Switch, fix single FDB creation on BlueField Always use MLX5_FLOW_TABLE_OTHER_VPORT flag when creating egress ACL table for single FDB. Not doing so on BlueField will make firmware fail the command. On BlueField the E-Switch manager is the ECPF (vport 0xFFFE) which is filled in the flow table creation command but as the other_vport field wasn't set the firmware complains about a bad parameter. This is different from a regular HCA where the E-Switch manager vport is the PF (vport 0x0). Passing MLX5_FLOW_TABLE_OTHER_VPORT will make the firmware happy both on BlueField and on regular HCAs without special condition for each. This fixes the bellow firmware syndrome: mlx5_cmd_check:819:(pid 571): CREATE_FLOW_TABLE(0x930) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x754a4) Fixes: db202995f503 ("net/mlx5: E-Switch, add logic to enable shared FDB") Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a46455694f7a..275af1d2b4d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2512,6 +2512,7 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master, struct mlx5_eswitch *esw = master->priv.eswitch; struct mlx5_flow_table_attr ft_attr = { .max_fte = 1, .prio = 0, .level = 0, + .flags = MLX5_FLOW_TABLE_OTHER_VPORT, }; struct mlx5_flow_namespace *egress_ns; struct mlx5_flow_table *acl; From 5c4e8ae7aa4875041102406801ee434e6c581aef Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 17 Nov 2021 11:47:21 +0200 Subject: [PATCH 56/85] net/mlx5: E-Switch, Check group pointer before reading bw_share value If log_esw_max_sched_depth is not supported group pointer of the vport is NULL. Hence, check the pointer before reading bw_share value. Fixes: 0fe132eac38c ("net/mlx5: E-switch, Allow to add vports to rate groups") Signed-off-by: Dmytro Linkin Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 4501e3d737f8..d377ddc70fc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -130,7 +130,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, /* If vports min rate divider is 0 but their group has bw_share configured, then * need to set bw_share for vports to minimal value. */ - if (!group_level && !max_guarantee && group->bw_share) + if (!group_level && !max_guarantee && group && group->bw_share) return 1; return 0; } From e219440da0c3a63b3cec23d08473436ae7d95fa6 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Tue, 23 Nov 2021 14:37:11 +0200 Subject: [PATCH 57/85] net/mlx5: E-Switch, Use indirect table only if all destinations support it When adding rule with multiple destinations, indirect table is used for all of the destinations if at least one of the destinations support it, this can cause creation of invalid indirect tables for the destinations that doesn't support it. Fixed it by using indirect table only if all destinations support it. Fixes: a508728a4c8b ("net/mlx5e: VF tunnel RX traffic offloading") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 275af1d2b4d3..32bc08a39925 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -329,14 +329,25 @@ static bool esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) { struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + bool result = false; int i; - for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + /* Indirect table is supported only for flows with in_port uplink + * and the destination is vport on the same eswitch as the uplink, + * return false in case at least one of destinations doesn't meet + * this criteria. + */ + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) { if (esw_attr->dests[i].rep && mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, - esw_attr->dests[i].mdev)) - return true; - return false; + esw_attr->dests[i].mdev)) { + result = true; + } else { + result = false; + break; + } + } + return result; } static int From 76091b0fb60970f610b7ba2d886cd7fb95c5eb2e Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Wed, 20 Oct 2021 12:45:05 +0300 Subject: [PATCH 58/85] net/mlx5: Fix use after free in mlx5_health_wait_pci_up The device health recovery flow calls mlx5_health_wait_pci_up() which queries the device for FW_RESET timeout after freeing the device timeouts structure on mlx5_function_teardown(). Fix this bug by moving timeouts structure init/cleanup to the device's init/uninit phases. Since it is necessary to reset default software timeouts on function reload, extract setting of defaults values from mlx5_tout_init() and call mlx5_tout_set_def_val() directly from mlx5_function_setup(). Fixes: 5945e1adeab5 ("net/mlx5: Read timeout values from init segment") Reported by: Niklas Schnelle Signed-off-by: Amir Tzin Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/lib/tout.c | 5 ++--- .../ethernet/mellanox/mlx5/core/lib/tout.h | 1 + .../net/ethernet/mellanox/mlx5/core/main.c | 22 ++++++++++--------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c index 0dd96a6b140d..c1df0d3595d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -31,11 +31,11 @@ static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_type dev->timeouts->to[type] = val; } -static void tout_set_def_val(struct mlx5_core_dev *dev) +void mlx5_tout_set_def_val(struct mlx5_core_dev *dev) { int i; - for (i = MLX5_TO_FW_PRE_INIT_TIMEOUT_MS; i < MAX_TIMEOUT_TYPES; i++) + for (i = 0; i < MAX_TIMEOUT_TYPES; i++) tout_set(dev, tout_def_sw_val[i], i); } @@ -45,7 +45,6 @@ int mlx5_tout_init(struct mlx5_core_dev *dev) if (!dev->timeouts) return -ENOMEM; - tout_set_def_val(dev); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h index 31faa5c17aa9..1c42ead782fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -34,6 +34,7 @@ int mlx5_tout_init(struct mlx5_core_dev *dev); void mlx5_tout_cleanup(struct mlx5_core_dev *dev); void mlx5_tout_query_iseg(struct mlx5_core_dev *dev); int mlx5_tout_query_dtor(struct mlx5_core_dev *dev); +void mlx5_tout_set_def_val(struct mlx5_core_dev *dev); u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type); #define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a92a92a52346..e127c0530b3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -992,11 +992,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) if (mlx5_core_is_pf(dev)) pcie_print_link_status(dev->pdev); - err = mlx5_tout_init(dev); - if (err) { - mlx5_core_err(dev, "Failed initializing timeouts, aborting\n"); - return err; - } + mlx5_tout_set_def_val(dev); /* wait for firmware to accept initialization segments configurations */ @@ -1005,13 +1001,13 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) if (err) { mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n", mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); - goto err_tout_cleanup; + return err; } err = mlx5_cmd_init(dev); if (err) { mlx5_core_err(dev, "Failed initializing command interface, aborting\n"); - goto err_tout_cleanup; + return err; } mlx5_tout_query_iseg(dev); @@ -1094,8 +1090,6 @@ err_disable_hca: err_cmd_cleanup: mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); -err_tout_cleanup: - mlx5_tout_cleanup(dev); return err; } @@ -1114,7 +1108,6 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) mlx5_core_disable_hca(dev, 0); mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); - mlx5_tout_cleanup(dev); return 0; } @@ -1476,6 +1469,12 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mlx5_debugfs_root); INIT_LIST_HEAD(&priv->traps); + err = mlx5_tout_init(dev); + if (err) { + mlx5_core_err(dev, "Failed initializing timeouts, aborting\n"); + goto err_timeout_init; + } + err = mlx5_health_init(dev); if (err) goto err_health_init; @@ -1501,6 +1500,8 @@ err_adev_init: err_pagealloc_init: mlx5_health_cleanup(dev); err_health_init: + mlx5_tout_cleanup(dev); +err_timeout_init: debugfs_remove(dev->priv.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); @@ -1518,6 +1519,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mlx5_adev_cleanup(dev); mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); + mlx5_tout_cleanup(dev); debugfs_remove_recursive(dev->priv.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); From 924cc4633f048b4fb4af3d1f9a51d10867625339 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sat, 6 Nov 2021 20:19:09 +0200 Subject: [PATCH 59/85] net/mlx5: Fix too early queueing of log timestamp work The log timestamp work should not be queued before the command interface is initialized, move it to a later stage in the init flow. Fixes: 5a1023deeed0 ("net/mlx5: Add periodic update of host time to firmware") Signed-off-by: Gal Pressman Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 64f1abc4dc36..380f50d5462d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -835,6 +835,9 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms); add_timer(&health->timer); + + if (mlx5_core_is_pf(dev)) + queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); } void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) @@ -902,8 +905,6 @@ int mlx5_health_init(struct mlx5_core_dev *dev) INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work); INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update); - if (mlx5_core_is_pf(dev)) - queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); return 0; From 502e82b91361955c66c8453b5b7a905b0b5bd5a1 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 7 Nov 2021 17:21:45 +0200 Subject: [PATCH 60/85] net/mlx5: Fix access to a non-supported register Validate MRTC register is supported before triggering a delayed work which accesses it. Fixes: 5a1023deeed0 ("net/mlx5: Add periodic update of host time to firmware") Signed-off-by: Aya Levin Reviewed-by: Gal Pressman Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 8 +++----- include/linux/mlx5/mlx5_ifc.h | 5 ++++- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 380f50d5462d..3ca998874c50 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -836,7 +836,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms); add_timer(&health->timer); - if (mlx5_core_is_pf(dev)) + if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc)) queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e127c0530b3a..7df9c7f8d9c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1071,18 +1071,16 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) mlx5_set_driver_version(dev); - mlx5_start_health_poll(dev); - err = mlx5_query_hca_caps(dev); if (err) { mlx5_core_err(dev, "query hca failed\n"); - goto stop_health; + goto reclaim_boot_pages; } + mlx5_start_health_poll(dev); + return 0; -stop_health: - mlx5_stop_health_poll(dev, boot); reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); err_disable_hca: diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3636df90899a..fbaab440a484 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9698,7 +9698,10 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 regs_84_to_68[0x11]; u8 tracer_registers[0x4]; - u8 regs_63_to_32[0x20]; + u8 regs_63_to_46[0x12]; + u8 mrtc[0x1]; + u8 regs_44_to_32[0xd]; + u8 regs_31_to_0[0x20]; }; From 8c8cf0382257b28378eeff535150c087a653ca19 Mon Sep 17 00:00:00 2001 From: Ben Ben-Ishay Date: Sun, 31 Oct 2021 18:31:02 +0200 Subject: [PATCH 61/85] net/mlx5e: SHAMPO, Fix constant expression result mlx5e_build_shampo_hd_umr uses counters i and index incorrectly as unsigned, thus the err state err_unmap could stuck in endless loop. Change i to int to solve the first issue. Reduce index check to solve the second issue, the caller function validates that index could not rotate. Fixes: 64509b052525 ("net/mlx5e: Add data path for SHAMPO feature") Signed-off-by: Ben Ben-Ishay Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 96967b0a2441..793511d5ee4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -543,13 +543,13 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, u16 klm_entries, u16 index) { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - u16 entries, pi, i, header_offset, err, wqe_bbs, new_entries; + u16 entries, pi, header_offset, err, wqe_bbs, new_entries; u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; struct page *page = shampo->last_page; u64 addr = shampo->last_addr; struct mlx5e_dma_info *dma_info; struct mlx5e_umr_wqe *umr_wqe; - int headroom; + int headroom, i; headroom = rq->buff.headroom; new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1)); @@ -601,9 +601,7 @@ update_klm: err_unmap: while (--i >= 0) { - if (--index < 0) - index = shampo->hd_per_wq - 1; - dma_info = &shampo->info[index]; + dma_info = &shampo->info[--index]; if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); mlx5e_page_release(rq, dma_info, true); From 21635d9203e1cf2b73b67e9a86059a62f62a3563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 30 Nov 2021 18:01:46 +0100 Subject: [PATCH 62/85] net: dsa: mv88e6xxx: Fix application of erratum 4.8 for 88E6393X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to SERDES scripts for 88E6393X, erratum 4.8 has to be applied every time before SerDes is powered on. Split the code for erratum 4.8 into separate function and call it in mv88e6393x_serdes_power(). Fixes: de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family") Signed-off-by: Marek Behún Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/serdes.c | 53 +++++++++++++++++++----------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 6ea003678798..0658ee3b014c 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -1271,9 +1271,9 @@ void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p) } } -static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane) +static int mv88e6393x_serdes_erratum_4_6(struct mv88e6xxx_chip *chip, int lane) { - u16 reg, pcs; + u16 reg; int err; /* mv88e6393x family errata 4.6: @@ -1300,11 +1300,32 @@ static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane) if (err) return err; - err = mv88e6390_serdes_power_sgmii(chip, lane, false); - if (err) - return err; + return mv88e6390_serdes_power_sgmii(chip, lane, false); } + return 0; +} + +int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip) +{ + int err; + + err = mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT0_LANE); + if (err) + return err; + + err = mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT9_LANE); + if (err) + return err; + + return mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT10_LANE); +} + +static int mv88e6393x_serdes_erratum_4_8(struct mv88e6xxx_chip *chip, int lane) +{ + u16 reg, pcs; + int err; + /* mv88e6393x family errata 4.8: * When a SERDES port is operating in 1000BASE-X or SGMII mode link may * not come up after hardware reset or software reset of SERDES core. @@ -1334,29 +1355,21 @@ static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane) MV88E6393X_ERRATA_4_8_REG, reg); } -int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip) -{ - int err; - - err = mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT0_LANE); - if (err) - return err; - - err = mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT9_LANE); - if (err) - return err; - - return mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT10_LANE); -} - int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, bool on) { u8 cmode = chip->ports[port].cmode; + int err; if (port != 0 && port != 9 && port != 10) return -EOPNOTSUPP; + if (on) { + err = mv88e6393x_serdes_erratum_4_8(chip, lane); + if (err) + return err; + } + switch (cmode) { case MV88E6XXX_PORT_STS_CMODE_SGMII: case MV88E6XXX_PORT_STS_CMODE_1000BASEX: From 8c3318b4874e2dee867f5ae8f6d38f78e044bf71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 30 Nov 2021 18:01:47 +0100 Subject: [PATCH 63/85] net: dsa: mv88e6xxx: Drop unnecessary check in mv88e6393x_serdes_erratum_4_6() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The check for lane is unnecessary, since the function is called only with allowed lane argument. Signed-off-by: Marek Behún Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/serdes.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 0658ee3b014c..3a6244596a67 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -1284,26 +1284,20 @@ static int mv88e6393x_serdes_erratum_4_6(struct mv88e6xxx_chip *chip, int lane) * It seems that after this workaround the SERDES is automatically * powered up (the bit is cleared), so power it down. */ - if (lane == MV88E6393X_PORT0_LANE || lane == MV88E6393X_PORT9_LANE || - lane == MV88E6393X_PORT10_LANE) { - err = mv88e6390_serdes_read(chip, lane, - MDIO_MMD_PHYXS, - MV88E6393X_SERDES_POC, ®); - if (err) - return err; + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6393X_SERDES_POC, ®); + if (err) + return err; - reg &= ~MV88E6393X_SERDES_POC_PDOWN; - reg |= MV88E6393X_SERDES_POC_RESET; + reg &= ~MV88E6393X_SERDES_POC_PDOWN; + reg |= MV88E6393X_SERDES_POC_RESET; - err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS, - MV88E6393X_SERDES_POC, reg); - if (err) - return err; + err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS, + MV88E6393X_SERDES_POC, reg); + if (err) + return err; - return mv88e6390_serdes_power_sgmii(chip, lane, false); - } - - return 0; + return mv88e6390_serdes_power_sgmii(chip, lane, false); } int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip) From 7527d66260ac0c603c6baca5146748061fcddbd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 30 Nov 2021 18:01:48 +0100 Subject: [PATCH 64/85] net: dsa: mv88e6xxx: Save power by disabling SerDes trasmitter and receiver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Save power on 88E6393X by disabling SerDes receiver and transmitter after SerDes is SerDes is disabled. Signed-off-by: Marek Behún Cc: stable@vger.kernel.org # de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family") Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/serdes.c | 46 +++++++++++++++++++++++++++--- drivers/net/dsa/mv88e6xxx/serdes.h | 3 ++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 3a6244596a67..ceb63d7f1f97 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -1271,6 +1271,28 @@ void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p) } } +static int mv88e6393x_serdes_power_lane(struct mv88e6xxx_chip *chip, int lane, + bool on) +{ + u16 reg; + int err; + + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6393X_SERDES_CTRL1, ®); + if (err) + return err; + + if (on) + reg &= ~(MV88E6393X_SERDES_CTRL1_TX_PDOWN | + MV88E6393X_SERDES_CTRL1_RX_PDOWN); + else + reg |= MV88E6393X_SERDES_CTRL1_TX_PDOWN | + MV88E6393X_SERDES_CTRL1_RX_PDOWN; + + return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS, + MV88E6393X_SERDES_CTRL1, reg); +} + static int mv88e6393x_serdes_erratum_4_6(struct mv88e6xxx_chip *chip, int lane) { u16 reg; @@ -1297,7 +1319,11 @@ static int mv88e6393x_serdes_erratum_4_6(struct mv88e6xxx_chip *chip, int lane) if (err) return err; - return mv88e6390_serdes_power_sgmii(chip, lane, false); + err = mv88e6390_serdes_power_sgmii(chip, lane, false); + if (err) + return err; + + return mv88e6393x_serdes_power_lane(chip, lane, false); } int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip) @@ -1362,17 +1388,29 @@ int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, err = mv88e6393x_serdes_erratum_4_8(chip, lane); if (err) return err; + + err = mv88e6393x_serdes_power_lane(chip, lane, true); + if (err) + return err; } switch (cmode) { case MV88E6XXX_PORT_STS_CMODE_SGMII: case MV88E6XXX_PORT_STS_CMODE_1000BASEX: case MV88E6XXX_PORT_STS_CMODE_2500BASEX: - return mv88e6390_serdes_power_sgmii(chip, lane, on); + err = mv88e6390_serdes_power_sgmii(chip, lane, on); + break; case MV88E6393X_PORT_STS_CMODE_5GBASER: case MV88E6393X_PORT_STS_CMODE_10GBASER: - return mv88e6390_serdes_power_10g(chip, lane, on); + err = mv88e6390_serdes_power_10g(chip, lane, on); + break; } - return 0; + if (err) + return err; + + if (!on) + err = mv88e6393x_serdes_power_lane(chip, lane, false); + + return err; } diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h index cbb3ba30caea..e9292c8beee4 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.h +++ b/drivers/net/dsa/mv88e6xxx/serdes.h @@ -93,6 +93,9 @@ #define MV88E6393X_SERDES_POC_PCS_MASK 0x0007 #define MV88E6393X_SERDES_POC_RESET BIT(15) #define MV88E6393X_SERDES_POC_PDOWN BIT(5) +#define MV88E6393X_SERDES_CTRL1 0xf003 +#define MV88E6393X_SERDES_CTRL1_TX_PDOWN BIT(9) +#define MV88E6393X_SERDES_CTRL1_RX_PDOWN BIT(8) #define MV88E6393X_ERRATA_4_8_REG 0xF074 #define MV88E6393X_ERRATA_4_8_BIT BIT(14) From 93fd8207bed80ce19aaf59932cbe1c03d418a37d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 30 Nov 2021 18:01:49 +0100 Subject: [PATCH 65/85] net: dsa: mv88e6xxx: Add fix for erratum 5.2 of 88E6393X family MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add fix for erratum 5.2 of the 88E6393X (Amethyst) family: for 10gbase-r mode, some undocumented registers need to be written some special values. Fixes: de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family") Signed-off-by: Marek Behún Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/serdes.c | 48 ++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index ceb63d7f1f97..9e4f18a4adc2 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -1375,6 +1375,50 @@ static int mv88e6393x_serdes_erratum_4_8(struct mv88e6xxx_chip *chip, int lane) MV88E6393X_ERRATA_4_8_REG, reg); } +static int mv88e6393x_serdes_erratum_5_2(struct mv88e6xxx_chip *chip, int lane, + u8 cmode) +{ + static const struct { + u16 dev, reg, val, mask; + } fixes[] = { + { MDIO_MMD_VEND1, 0x8093, 0xcb5a, 0xffff }, + { MDIO_MMD_VEND1, 0x8171, 0x7088, 0xffff }, + { MDIO_MMD_VEND1, 0x80c9, 0x311a, 0xffff }, + { MDIO_MMD_VEND1, 0x80a2, 0x8000, 0xff7f }, + { MDIO_MMD_VEND1, 0x80a9, 0x0000, 0xfff0 }, + { MDIO_MMD_VEND1, 0x80a3, 0x0000, 0xf8ff }, + { MDIO_MMD_PHYXS, MV88E6393X_SERDES_POC, + MV88E6393X_SERDES_POC_RESET, MV88E6393X_SERDES_POC_RESET }, + }; + int err, i; + u16 reg; + + /* mv88e6393x family errata 5.2: + * For optimal signal integrity the following sequence should be applied + * to SERDES operating in 10G mode. These registers only apply to 10G + * operation and have no effect on other speeds. + */ + if (cmode != MV88E6393X_PORT_STS_CMODE_10GBASER) + return 0; + + for (i = 0; i < ARRAY_SIZE(fixes); ++i) { + err = mv88e6390_serdes_read(chip, lane, fixes[i].dev, + fixes[i].reg, ®); + if (err) + return err; + + reg &= ~fixes[i].mask; + reg |= fixes[i].val; + + err = mv88e6390_serdes_write(chip, lane, fixes[i].dev, + fixes[i].reg, reg); + if (err) + return err; + } + + return 0; +} + int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, bool on) { @@ -1389,6 +1433,10 @@ int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, if (err) return err; + err = mv88e6393x_serdes_erratum_5_2(chip, lane, cmode); + if (err) + return err; + err = mv88e6393x_serdes_power_lane(chip, lane, true); if (err) return err; From 163000dbc772c1eae9bdfe7c8fe30155db1efd74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 30 Nov 2021 18:01:50 +0100 Subject: [PATCH 66/85] net: dsa: mv88e6xxx: Fix inband AN for 2500base-x on 88E6393X family MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inband AN is broken on Amethyst in 2500base-x mode when set by standard mechanism (via cmode). (There probably is some weird setting done by default in the switch for this mode that make it cycle in some state or something, because when the peer is the mvneta controller, it receives link change interrupts every ~0.3ms, but the link is always down.) Get around this by configuring the PCS mode to 1000base-x (where inband AN works), and then changing the SerDes frequency while SerDes transmitter and receiver are disabled, before enabling SerDes PHY. After disabling SerDes PHY, change the PCS mode back to 2500base-x, to avoid confusing the device (if we leave it at 1000base-x PCS mode but with different frequency, and then change cmode to sgmii, the device won't change the frequency because it thinks it already has the correct one). The register which changes the frequency is undocumented. I discovered it by going through all registers in the ranges 4.f000-4.f100 and 1e.8000-1e.8200 for all SerDes cmodes (sgmii, 1000base-x, 2500base-x, 5gbase-r, 10gbase-r, usxgmii) and filtering out registers that didn't make sense (the value was the same for modes which have different frequency). The result of this was: reg sgmii 1000base-x 2500base-x 5gbase-r 10gbase-r usxgmii 04.f002 005b 0058 0059 005c 005d 005f 04.f076 3000 0000 1000 4000 5000 7000 04.f07c 0950 0950 1850 0550 0150 0150 1e.8000 0059 0059 0058 0055 0051 0051 1e.8140 0e20 0e20 0e28 0e21 0e42 0e42 Register 04.f002 is the documented Port Operational Confiuration register, it's last 3 bits select PCS type, so changing this register also changes the frequency to the appropriate value. Registers 04.f076 and 04.f07c are not writable. Undocumented register 1e.8000 was the one: changing bits 3:0 from 9 to 8 changed SerDes frequency to 3.125 GHz, while leaving the value of PCS mode in register 04.f002.2:0 at 1000base-x. Inband autonegotiation started working correctly. (I didn't try anything with register 1e.8140 since 1e.8000 solved the problem.) Since I don't have documentation for this register 1e.8000.3:0, I am using the constants without names, but my hypothesis is that this register selects PHY frequency. If in the future I have access to an oscilloscope able to handle these frequencies, I will try to test this hypothesis. Fixes: de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family") Signed-off-by: Marek Behún Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/serdes.c | 61 +++++++++++++++++++++++++++++- drivers/net/dsa/mv88e6xxx/serdes.h | 1 + 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 9e4f18a4adc2..6f60376b932c 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -1419,6 +1419,54 @@ static int mv88e6393x_serdes_erratum_5_2(struct mv88e6xxx_chip *chip, int lane, return 0; } +static int mv88e6393x_serdes_fix_2500basex_an(struct mv88e6xxx_chip *chip, + int lane, u8 cmode, bool on) +{ + u16 reg; + int err; + + if (cmode != MV88E6XXX_PORT_STS_CMODE_2500BASEX) + return 0; + + /* Inband AN is broken on Amethyst in 2500base-x mode when set by + * standard mechanism (via cmode). + * We can get around this by configuring the PCS mode to 1000base-x + * and then writing value 0x58 to register 1e.8000. (This must be done + * while SerDes receiver and transmitter are disabled, which is, when + * this function is called.) + * It seem that when we do this configuration to 2500base-x mode (by + * changing PCS mode to 1000base-x and frequency to 3.125 GHz from + * 1.25 GHz) and then configure to sgmii or 1000base-x, the device + * thinks that it already has SerDes at 1.25 GHz and does not change + * the 1e.8000 register, leaving SerDes at 3.125 GHz. + * To avoid this, change PCS mode back to 2500base-x when disabling + * SerDes from 2500base-x mode. + */ + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6393X_SERDES_POC, ®); + if (err) + return err; + + reg &= ~(MV88E6393X_SERDES_POC_PCS_MASK | MV88E6393X_SERDES_POC_AN); + if (on) + reg |= MV88E6393X_SERDES_POC_PCS_1000BASEX | + MV88E6393X_SERDES_POC_AN; + else + reg |= MV88E6393X_SERDES_POC_PCS_2500BASEX; + reg |= MV88E6393X_SERDES_POC_RESET; + + err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS, + MV88E6393X_SERDES_POC, reg); + if (err) + return err; + + err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_VEND1, 0x8000, 0x58); + if (err) + return err; + + return 0; +} + int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, bool on) { @@ -1437,6 +1485,11 @@ int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, if (err) return err; + err = mv88e6393x_serdes_fix_2500basex_an(chip, lane, cmode, + true); + if (err) + return err; + err = mv88e6393x_serdes_power_lane(chip, lane, true); if (err) return err; @@ -1457,8 +1510,14 @@ int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, if (err) return err; - if (!on) + if (!on) { err = mv88e6393x_serdes_power_lane(chip, lane, false); + if (err) + return err; + + err = mv88e6393x_serdes_fix_2500basex_an(chip, lane, cmode, + false); + } return err; } diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h index e9292c8beee4..8dd8ed225b45 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.h +++ b/drivers/net/dsa/mv88e6xxx/serdes.h @@ -93,6 +93,7 @@ #define MV88E6393X_SERDES_POC_PCS_MASK 0x0007 #define MV88E6393X_SERDES_POC_RESET BIT(15) #define MV88E6393X_SERDES_POC_PDOWN BIT(5) +#define MV88E6393X_SERDES_POC_AN BIT(3) #define MV88E6393X_SERDES_CTRL1 0xf003 #define MV88E6393X_SERDES_CTRL1_TX_PDOWN BIT(9) #define MV88E6393X_SERDES_CTRL1_RX_PDOWN BIT(8) From ede359d8843a2779d232ed30bc36089d4b5962e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 30 Nov 2021 18:01:51 +0100 Subject: [PATCH 67/85] net: dsa: mv88e6xxx: Link in pcs_get_state() if AN is bypassed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function mv88e6xxx_serdes_pcs_get_state() currently does not report link up if AN is enabled, Link bit is set, but Speed and Duplex Resolved bit is not set, which testing shows is the case for when auto-negotiation was bypassed (we have AN enabled but link partner does not). An example of such link partner is Marvell 88X3310 PHY, when put into the mode where host interface changes between 10gbase-r, 5gbase-r, 2500base-x and sgmii according to copper speed. The 88X3310 does not enable AN in 2500base-x, and so SerDes on mv88e6xxx currently does not link with it. Fix this. Fixes: a5a6858b793f ("net: dsa: mv88e6xxx: extend phylink to Serdes PHYs") Signed-off-by: Marek Behún Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/serdes.c | 48 ++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 6f60376b932c..55273013bfb5 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -50,11 +50,22 @@ static int mv88e6390_serdes_write(struct mv88e6xxx_chip *chip, } static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, - u16 status, u16 lpa, + u16 ctrl, u16 status, u16 lpa, struct phylink_link_state *state) { + state->link = !!(status & MV88E6390_SGMII_PHY_STATUS_LINK); + if (status & MV88E6390_SGMII_PHY_STATUS_SPD_DPL_VALID) { - state->link = !!(status & MV88E6390_SGMII_PHY_STATUS_LINK); + /* The Spped and Duplex Resolved register is 1 if AN is enabled + * and complete, or if AN is disabled. So with disabled AN we + * still get here on link up. But we want to set an_complete + * only if AN was enabled, thus we look at BMCR_ANENABLE. + * (According to 802.3-2008 section 22.2.4.2.10, we should be + * able to get this same value from BMSR_ANEGCAPABLE, but tests + * show that these Marvell PHYs don't conform to this part of + * the specificaion - BMSR_ANEGCAPABLE is simply always 1.) + */ + state->an_complete = !!(ctrl & BMCR_ANENABLE); state->duplex = status & MV88E6390_SGMII_PHY_STATUS_DUPLEX_FULL ? DUPLEX_FULL : DUPLEX_HALF; @@ -81,6 +92,18 @@ static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, dev_err(chip->dev, "invalid PHY speed\n"); return -EINVAL; } + } else if (state->link && + state->interface != PHY_INTERFACE_MODE_SGMII) { + /* If Speed and Duplex Resolved register is 0 and link is up, it + * means that AN was enabled, but link partner had it disabled + * and the PHY invoked the Auto-Negotiation Bypass feature and + * linked anyway. + */ + state->duplex = DUPLEX_FULL; + if (state->interface == PHY_INTERFACE_MODE_2500BASEX) + state->speed = SPEED_2500; + else + state->speed = SPEED_1000; } else { state->link = false; } @@ -168,9 +191,15 @@ int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port, int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port, int lane, struct phylink_link_state *state) { - u16 lpa, status; + u16 lpa, status, ctrl; int err; + err = mv88e6352_serdes_read(chip, MII_BMCR, &ctrl); + if (err) { + dev_err(chip->dev, "can't read Serdes PHY control: %d\n", err); + return err; + } + err = mv88e6352_serdes_read(chip, 0x11, &status); if (err) { dev_err(chip->dev, "can't read Serdes PHY status: %d\n", err); @@ -183,7 +212,7 @@ int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port, return err; } - return mv88e6xxx_serdes_pcs_get_state(chip, status, lpa, state); + return mv88e6xxx_serdes_pcs_get_state(chip, ctrl, status, lpa, state); } int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port, @@ -883,9 +912,16 @@ int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port, static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip, int port, int lane, struct phylink_link_state *state) { - u16 lpa, status; + u16 lpa, status, ctrl; int err; + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, + MV88E6390_SGMII_BMCR, &ctrl); + if (err) { + dev_err(chip->dev, "can't read Serdes PHY control: %d\n", err); + return err; + } + err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS, MV88E6390_SGMII_PHY_STATUS, &status); if (err) { @@ -900,7 +936,7 @@ static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip, return err; } - return mv88e6xxx_serdes_pcs_get_state(chip, status, lpa, state); + return mv88e6xxx_serdes_pcs_get_state(chip, ctrl, status, lpa, state); } static int mv88e6390_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip, From e2dabc4f7e7b60299c20a36d6a7b24ed9bf8e572 Mon Sep 17 00:00:00 2001 From: Zhou Qingyang Date: Tue, 30 Nov 2021 19:08:48 +0800 Subject: [PATCH 68/85] net: qlogic: qlcnic: Fix a NULL pointer dereference in qlcnic_83xx_add_rings() In qlcnic_83xx_add_rings(), the indirect function of ahw->hw_ops->alloc_mbx_args will be called to allocate memory for cmd.req.arg, and there is a dereference of it in qlcnic_83xx_add_rings(), which could lead to a NULL pointer dereference on failure of the indirect function like qlcnic_83xx_alloc_mbx_args(). Fix this bug by adding a check of alloc_mbx_args(), this patch imitates the logic of mbx_cmd()'s failure handling. This bug was found by a static analyzer. The analysis employs differential checking to identify inconsistent security operations (e.g., checks or kfrees) between two code paths and confirms that the inconsistent operations are not recovered in the current function or the callers, so they constitute bugs. Note that, as a bug found by static analysis, it can be a false positive or hard to trigger. Multiple researchers have cross-reviewed the bug. Builds with CONFIG_QLCNIC=m show no new warnings, and our static analyzer no longer warns about this code. Fixes: 7f9664525f9c ("qlcnic: 83xx memory map and HW access routine") Signed-off-by: Zhou Qingyang Link: https://lore.kernel.org/r/20211130110848.109026-1-zhou1615@umn.edu Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index d51bac7ba5af..bd0607680329 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -1077,8 +1077,14 @@ static int qlcnic_83xx_add_rings(struct qlcnic_adapter *adapter) sds_mbx_size = sizeof(struct qlcnic_sds_mbx); context_id = recv_ctx->context_id; num_sds = adapter->drv_sds_rings - QLCNIC_MAX_SDS_RINGS; - ahw->hw_ops->alloc_mbx_args(&cmd, adapter, - QLCNIC_CMD_ADD_RCV_RINGS); + err = ahw->hw_ops->alloc_mbx_args(&cmd, adapter, + QLCNIC_CMD_ADD_RCV_RINGS); + if (err) { + dev_err(&adapter->pdev->dev, + "Failed to alloc mbx args %d\n", err); + return err; + } + cmd.req.arg[1] = 0 | (num_sds << 8) | (context_id << 16); /* set up status rings, mbx 2-81 */ From ee201011c1e1563c114a55c86eb164b236f18e84 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Tue, 30 Nov 2021 11:26:37 -0500 Subject: [PATCH 69/85] vrf: Reset IPCB/IP6CB when processing outbound pkts in vrf dev xmit IPCB/IP6CB need to be initialized when processing outbound v4 or v6 pkts in the codepath of vrf device xmit function so that leftover garbage doesn't cause futher code that uses the CB to incorrectly process the pkt. One occasion of the issue might occur when MPLS route uses the vrf device as the outgoing device such as when the route is added using "ip -f mpls route add