From 3c10ffddc61f8a1a59e29a110ba70b47e679206a Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 2 Sep 2021 22:04:00 +0300 Subject: [PATCH 01/66] net: xfrm: fix shift-out-of-bounds in xfrm_get_default Syzbot hit shift-out-of-bounds in xfrm_get_default. The problem was in missing validation check for user data. up->dirmask comes from user-space, so we need to check if this value is less than XFRM_USERPOLICY_DIRMASK_MAX to avoid shift-out-of-bounds bugs. Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") Reported-and-tested-by: syzbot+b2be9dd8ca6f6c73ee2d@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 03b66d154b2b..4719a6d54aa6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2005,6 +2005,11 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, return -EMSGSIZE; } + if (up->dirmask >= XFRM_USERPOLICY_DIRMASK_MAX) { + kfree_skb(r_skb); + return -EINVAL; + } + r_up = nlmsg_data(r_nlh); r_up->action = ((net->xfrm.policy_default & (1 << up->dirmask)) >> up->dirmask); From 844f7eaaed9267ae17d33778efe65548cc940205 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Sun, 12 Sep 2021 14:22:34 +0200 Subject: [PATCH 02/66] include/uapi/linux/xfrm.h: Fix XFRM_MSG_MAPPING ABI breakage Commit 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") broke ABI by changing the value of the XFRM_MSG_MAPPING enum item, thus also evading the build-time check in security/selinux/nlmsgtab.c:selinux_nlmsg_lookup for presence of proper security permission checks in nlmsg_xfrm_perms. Fix it by placing XFRM_MSG_SETDEFAULT/XFRM_MSG_GETDEFAULT to the end of the enum, right before __XFRM_MSG_MAX, and updating the nlmsg_xfrm_perms accordingly. Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") References: https://lore.kernel.org/netdev/20210901151402.GA2557@altlinux.org/ Signed-off-by: Eugene Syromiatnikov Acked-by: Antony Antony Acked-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 6 +++--- security/selinux/nlmsgtab.c | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index b96c1ea7166d..26f456b1f33e 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -213,13 +213,13 @@ enum { XFRM_MSG_GETSPDINFO, #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO + XFRM_MSG_MAPPING, +#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING + XFRM_MSG_SETDEFAULT, #define XFRM_MSG_SETDEFAULT XFRM_MSG_SETDEFAULT XFRM_MSG_GETDEFAULT, #define XFRM_MSG_GETDEFAULT XFRM_MSG_GETDEFAULT - - XFRM_MSG_MAPPING, -#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index d59276f48d4f..94ea2a8b2bb7 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -126,6 +126,8 @@ static const struct nlmsg_perm nlmsg_xfrm_perms[] = { XFRM_MSG_NEWSPDINFO, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_GETSPDINFO, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_MAPPING, NETLINK_XFRM_SOCKET__NLMSG_READ }, + { XFRM_MSG_SETDEFAULT, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, + { XFRM_MSG_GETDEFAULT, NETLINK_XFRM_SOCKET__NLMSG_READ }, }; static const struct nlmsg_perm nlmsg_audit_perms[] = @@ -189,7 +191,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm) * structures at the top of this file with the new mappings * before updating the BUILD_BUG_ON() macro! */ - BUILD_BUG_ON(XFRM_MSG_MAX != XFRM_MSG_MAPPING); + BUILD_BUG_ON(XFRM_MSG_MAX != XFRM_MSG_GETDEFAULT); err = nlmsg_perm(nlmsg_type, perm, nlmsg_xfrm_perms, sizeof(nlmsg_xfrm_perms)); break; From f8d858e607b2a36808ac6d4218f5f5203d7a7d63 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 14 Sep 2021 16:46:33 +0200 Subject: [PATCH 03/66] xfrm: make user policy API complete >From a userland POV, this API was based on some magic values: - dirmask and action were bitfields but meaning of bits (XFRM_POL_DEFAULT_*) are not exported; - action is confusing, if a bit is set, does it mean drop or accept? Let's try to simplify this uapi by using explicit field and macros. Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 9 ++++++--- net/xfrm/xfrm_user.c | 36 +++++++++++++++++++----------------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 26f456b1f33e..eda0426ec4c2 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -514,9 +514,12 @@ struct xfrm_user_offload { #define XFRM_OFFLOAD_INBOUND 2 struct xfrm_userpolicy_default { -#define XFRM_USERPOLICY_DIRMASK_MAX (sizeof(__u8) * 8) - __u8 dirmask; - __u8 action; +#define XFRM_USERPOLICY_UNSPEC 0 +#define XFRM_USERPOLICY_BLOCK 1 +#define XFRM_USERPOLICY_ACCEPT 2 + __u8 in; + __u8 fwd; + __u8 out; }; #ifndef __KERNEL__ diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4719a6d54aa6..90c88390f1fe 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1966,16 +1966,21 @@ static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct xfrm_userpolicy_default *up = nlmsg_data(nlh); - u8 dirmask; - u8 old_default = net->xfrm.policy_default; - if (up->dirmask >= XFRM_USERPOLICY_DIRMASK_MAX) - return -EINVAL; + if (up->in == XFRM_USERPOLICY_BLOCK) + net->xfrm.policy_default |= XFRM_POL_DEFAULT_IN; + else if (up->in == XFRM_USERPOLICY_ACCEPT) + net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_IN; - dirmask = (1 << up->dirmask) & XFRM_POL_DEFAULT_MASK; + if (up->fwd == XFRM_USERPOLICY_BLOCK) + net->xfrm.policy_default |= XFRM_POL_DEFAULT_FWD; + else if (up->fwd == XFRM_USERPOLICY_ACCEPT) + net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_FWD; - net->xfrm.policy_default = (old_default & (0xff ^ dirmask)) - | (up->action << up->dirmask); + if (up->out == XFRM_USERPOLICY_BLOCK) + net->xfrm.policy_default |= XFRM_POL_DEFAULT_OUT; + else if (up->out == XFRM_USERPOLICY_ACCEPT) + net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_OUT; rt_genid_bump_all(net); @@ -1988,13 +1993,11 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, struct sk_buff *r_skb; struct nlmsghdr *r_nlh; struct net *net = sock_net(skb->sk); - struct xfrm_userpolicy_default *r_up, *up; + struct xfrm_userpolicy_default *r_up; int len = NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_default)); u32 portid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; - up = nlmsg_data(nlh); - r_skb = nlmsg_new(len, GFP_ATOMIC); if (!r_skb) return -ENOMEM; @@ -2005,15 +2008,14 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, return -EMSGSIZE; } - if (up->dirmask >= XFRM_USERPOLICY_DIRMASK_MAX) { - kfree_skb(r_skb); - return -EINVAL; - } - r_up = nlmsg_data(r_nlh); - r_up->action = ((net->xfrm.policy_default & (1 << up->dirmask)) >> up->dirmask); - r_up->dirmask = up->dirmask; + r_up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ? + XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; + r_up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ? + XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; + r_up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ? + XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; nlmsg_end(r_skb, r_nlh); return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid); From 88d0adb5f13b1c52fbb7d755f6f79db18c2f0c2c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 14 Sep 2021 16:46:34 +0200 Subject: [PATCH 04/66] xfrm: notify default policy on update This configuration knob is very sensible, it should be notified when changing. Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 90c88390f1fe..0eba0c27c665 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1961,6 +1961,36 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, return skb; } +static int xfrm_notify_userpolicy(struct net *net) +{ + struct xfrm_userpolicy_default *up; + int len = NLMSG_ALIGN(sizeof(*up)); + struct nlmsghdr *nlh; + struct sk_buff *skb; + + skb = nlmsg_new(len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_GETDEFAULT, sizeof(*up), 0); + if (nlh == NULL) { + kfree_skb(skb); + return -EMSGSIZE; + } + + up = nlmsg_data(nlh); + up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ? + XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; + up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ? + XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; + up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ? + XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; + + nlmsg_end(skb, nlh); + + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); +} + static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { @@ -1984,6 +2014,7 @@ static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh, rt_genid_bump_all(net); + xfrm_notify_userpolicy(net); return 0; } From 24d5f16e407b75bc59d5419b957a9cab423b2681 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Tue, 7 Sep 2021 14:32:14 +0300 Subject: [PATCH 05/66] iwlwifi: mvm: Fix possible NULL dereference In __iwl_mvm_remove_time_event() check that 'te_data->vif' is NULL before dereferencing it. Fixes: 7b3954a1d69a ("iwlwifi: mvm: Explicitly stop session protection before unbinding") Reported-by: Dan Carpenter Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210907143156.e80e52167d93.Ie2247f43f8acb2cee6dff5b07a3947c79a772835@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 25af88a3edce..e91f8e889df7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -662,12 +662,13 @@ static bool __iwl_mvm_remove_time_event(struct iwl_mvm *mvm, u32 *uid) { u32 id; - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif); + struct iwl_mvm_vif *mvmvif; enum nl80211_iftype iftype; if (!te_data->vif) return false; + mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif); iftype = te_data->vif->type; /* From 27a221f433b7ac6604845b09696e60e803972d3c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 31 Aug 2021 11:18:02 +0300 Subject: [PATCH 06/66] iwlwifi: mvm: d3: Fix off by ones in iwl_mvm_wowlan_get_rsc_v5_data() These should be >= ARRAY_SIZE() instead of > ARRAY_SIZE() to prevent an out of bounds write on the next line. Fixes: 79e561f0f05a ("iwlwifi: mvm: d3: implement RSC command version 5") Signed-off-by: Dan Carpenter Acked-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210831081802.GA9846@kili --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 0e97d5e6c644..6e3a63a5a75c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -360,11 +360,11 @@ static void iwl_mvm_wowlan_get_rsc_v5_data(struct ieee80211_hw *hw, if (sta) { rsc = data->rsc->ucast_rsc; } else { - if (WARN_ON(data->gtks > ARRAY_SIZE(data->gtk_ids))) + if (WARN_ON(data->gtks >= ARRAY_SIZE(data->gtk_ids))) return; data->gtk_ids[data->gtks] = key->keyidx; rsc = data->rsc->mcast_rsc[data->gtks % 2]; - if (WARN_ON(key->keyidx > + if (WARN_ON(key->keyidx >= ARRAY_SIZE(data->rsc->mcast_key_id_map))) return; data->rsc->mcast_key_id_map[key->keyidx] = data->gtks % 2; From b6a46b4f6e4b35868e33930acaa7c50bf6cab821 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 31 Aug 2021 11:18:49 +0300 Subject: [PATCH 07/66] iwlwifi: mvm: d3: missing unlock in iwl_mvm_wowlan_program_keys() This code was refactored and the mutex_unlock(&mvm->mutex); was accidentally deleted. Fixes: af3aab9ce298 ("iwlwifi: mvm: d3: make key reprogramming iteration optional") Signed-off-by: Dan Carpenter Acked-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210831081849.GB9846@kili --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 6e3a63a5a75c..9f706fffb592 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -160,6 +160,7 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw, mvm->ptk_icvlen = key->icv_len; mvm->gtk_ivlen = key->iv_len; mvm->gtk_icvlen = key->icv_len; + mutex_unlock(&mvm->mutex); /* don't upload key again */ return; From 91dab18f0df171984688d0da258c4c9d95836416 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 17 Sep 2021 12:28:34 +0200 Subject: [PATCH 08/66] MAINTAINERS: Move Daniel Drake to credits Daniel Drake's @gentoo.org email bounces (is listed as retired Gentoo developer) and there was no activity from him regarding zd1211rw driver. Also his second address @laptop.org bounces. Cc: Daniel Drake Signed-off-by: Krzysztof Kozlowski Acked-by: Greg Kroah-Hartman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210917102834.25649-1-krzysztof.kozlowski@canonical.com --- CREDITS | 1 + MAINTAINERS | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index 7ef7b136e71d..d8f63e8329e8 100644 --- a/CREDITS +++ b/CREDITS @@ -971,6 +971,7 @@ D: PowerPC N: Daniel Drake E: dsd@gentoo.org D: USBAT02 CompactFlash support in usb-storage +D: ZD1211RW wireless driver S: UK N: Oleg Drokin diff --git a/MAINTAINERS b/MAINTAINERS index eeb4c70b3d5b..7dc658df9e00 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17800,7 +17800,6 @@ F: drivers/staging/nvec/ STAGING - OLPC SECONDARY DISPLAY CONTROLLER (DCON) M: Jens Frederich -M: Daniel Drake M: Jon Nettleton S: Maintained W: http://wiki.laptop.org/go/DCON @@ -20704,7 +20703,6 @@ S: Maintained F: mm/zbud.c ZD1211RW WIRELESS DRIVER -M: Daniel Drake M: Ulrich Kunitz L: linux-wireless@vger.kernel.org L: zd1211-devs@lists.sourceforge.net (subscribers-only) From 93ec1320b0170d7a207eda2d119c669b673401ed Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 22 Sep 2021 10:50:06 +0200 Subject: [PATCH 09/66] xfrm: fix rcu lock in xfrm_notify_userpolicy() As stated in the comment above xfrm_nlmsg_multicast(), rcu read lock must be held before calling this function. Reported-by: syzbot+3d9866419b4aa8f985d6@syzkaller.appspotmail.com Fixes: 703b94b93c19 ("xfrm: notify default policy on update") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0eba0c27c665..3a3cb09eec12 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1967,6 +1967,7 @@ static int xfrm_notify_userpolicy(struct net *net) int len = NLMSG_ALIGN(sizeof(*up)); struct nlmsghdr *nlh; struct sk_buff *skb; + int err; skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) @@ -1988,7 +1989,11 @@ static int xfrm_notify_userpolicy(struct net *net) nlmsg_end(skb, nlh); - return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); + rcu_read_lock(); + err = xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); + rcu_read_unlock(); + + return err; } static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh, From fb8c3a3c52400512fc8b3b61150057b888c30b0d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 20 Sep 2021 14:23:44 +0200 Subject: [PATCH 10/66] ath5k: fix building with LEDS=m Randconfig builds still show a failure for the ath5k driver, similar to the one that was fixed for ath9k earlier: WARNING: unmet direct dependencies detected for MAC80211_LEDS Depends on [n]: NET [=y] && WIRELESS [=y] && MAC80211 [=y] && (LEDS_CLASS [=m]=y || LEDS_CLASS [=m]=MAC80211 [=y]) Selected by [m]: - ATH5K [=m] && NETDEVICES [=y] && WLAN [=y] && WLAN_VENDOR_ATH [=y] && (PCI [=y] || ATH25) && MAC80211 [=y] net/mac80211/led.c: In function 'ieee80211_alloc_led_names': net/mac80211/led.c:34:22: error: 'struct led_trigger' has no member named 'name' 34 | local->rx_led.name = kasprintf(GFP_KERNEL, "%srx", | ^ Copying the same logic from my ath9k patch makes this one work as well, stubbing out the calls to the LED subsystem. Fixes: b64acb28da83 ("ath9k: fix build error with LEDS_CLASS=m") Fixes: 72cdab808714 ("ath9k: Do not select MAC80211_LEDS by default") Fixes: 3a078876caee ("ath5k: convert LED code to use mac80211 triggers") Link: https://lore.kernel.org/all/20210722105501.1000781-1-arnd@kernel.org/ Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210920122359.353810-1-arnd@kernel.org --- drivers/net/wireless/ath/ath5k/Kconfig | 4 +--- drivers/net/wireless/ath/ath5k/led.c | 10 ++++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath5k/Kconfig b/drivers/net/wireless/ath/ath5k/Kconfig index f35cd8de228e..6914b37bb0fb 100644 --- a/drivers/net/wireless/ath/ath5k/Kconfig +++ b/drivers/net/wireless/ath/ath5k/Kconfig @@ -3,9 +3,7 @@ config ATH5K tristate "Atheros 5xxx wireless cards support" depends on (PCI || ATH25) && MAC80211 select ATH_COMMON - select MAC80211_LEDS - select LEDS_CLASS - select NEW_LEDS + select MAC80211_LEDS if LEDS_CLASS=y || LEDS_CLASS=MAC80211 select ATH5K_AHB if ATH25 select ATH5K_PCI if !ATH25 help diff --git a/drivers/net/wireless/ath/ath5k/led.c b/drivers/net/wireless/ath/ath5k/led.c index 6a2a16856763..33e9928af363 100644 --- a/drivers/net/wireless/ath/ath5k/led.c +++ b/drivers/net/wireless/ath/ath5k/led.c @@ -89,7 +89,8 @@ static const struct pci_device_id ath5k_led_devices[] = { void ath5k_led_enable(struct ath5k_hw *ah) { - if (test_bit(ATH_STAT_LEDSOFT, ah->status)) { + if (IS_ENABLED(CONFIG_MAC80211_LEDS) && + test_bit(ATH_STAT_LEDSOFT, ah->status)) { ath5k_hw_set_gpio_output(ah, ah->led_pin); ath5k_led_off(ah); } @@ -104,7 +105,8 @@ static void ath5k_led_on(struct ath5k_hw *ah) void ath5k_led_off(struct ath5k_hw *ah) { - if (!test_bit(ATH_STAT_LEDSOFT, ah->status)) + if (!IS_ENABLED(CONFIG_MAC80211_LEDS) || + !test_bit(ATH_STAT_LEDSOFT, ah->status)) return; ath5k_hw_set_gpio(ah, ah->led_pin, !ah->led_on); } @@ -146,7 +148,7 @@ ath5k_register_led(struct ath5k_hw *ah, struct ath5k_led *led, static void ath5k_unregister_led(struct ath5k_led *led) { - if (!led->ah) + if (!IS_ENABLED(CONFIG_MAC80211_LEDS) || !led->ah) return; led_classdev_unregister(&led->led_dev); ath5k_led_off(led->ah); @@ -169,7 +171,7 @@ int ath5k_init_leds(struct ath5k_hw *ah) char name[ATH5K_LED_MAX_NAME_LEN + 1]; const struct pci_device_id *match; - if (!ah->pdev) + if (!IS_ENABLED(CONFIG_MAC80211_LEDS) || !ah->pdev) return 0; #ifdef CONFIG_ATH5K_AHB From fe5c735d0d47b495be6753d6aea4f8f78c909a0a Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Fri, 24 Sep 2021 15:21:54 +0300 Subject: [PATCH 11/66] iwlwifi: pcie: add configuration of a Wi-Fi adapter on Dell XPS 15 There is a Killer AX1650 2x2 Wi-Fi 6 and Bluetooth 5.1 wireless adapter found on Dell XPS 15 (9510) laptop, its configuration was present on Linux v5.7, however accidentally it has been removed from the list of supported devices, let's add it back. The problem is manifested on driver initialization: Intel(R) Wireless WiFi driver for Linux iwlwifi 0000:00:14.3: enabling device (0000 -> 0002) iwlwifi: No config found for PCI dev 43f0/1651, rev=0x354, rfid=0x10a100 iwlwifi: probe of 0000:00:14.3 failed with error -22 Bug: https://bugzilla.kernel.org/show_bug.cgi?id=213939 Fixes: 3f910a25839b ("iwlwifi: pcie: convert all AX101 devices to the device tables") Cc: Julien Wajsberg Signed-off-by: Vladimir Zapolskiy Acked-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210924122154.2376577-1-vladimir.zapolskiy@linaro.org --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 61b2797a34a8..e3996ff99bad 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -547,6 +547,8 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_DEV_INFO(0x43F0, 0x0074, iwl_ax201_cfg_qu_hr, NULL), IWL_DEV_INFO(0x43F0, 0x0078, iwl_ax201_cfg_qu_hr, NULL), IWL_DEV_INFO(0x43F0, 0x007C, iwl_ax201_cfg_qu_hr, NULL), + IWL_DEV_INFO(0x43F0, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0, iwl_ax201_killer_1650s_name), + IWL_DEV_INFO(0x43F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0, iwl_ax201_killer_1650i_name), IWL_DEV_INFO(0x43F0, 0x2074, iwl_ax201_cfg_qu_hr, NULL), IWL_DEV_INFO(0x43F0, 0x4070, iwl_ax201_cfg_qu_hr, NULL), IWL_DEV_INFO(0xA0F0, 0x0070, iwl_ax201_cfg_qu_hr, NULL), From 151a7c12c4fc8340b51e849e4d1fcb7d794777a5 Mon Sep 17 00:00:00 2001 From: Soeren Moch Date: Sun, 26 Sep 2021 22:19:05 +0200 Subject: [PATCH 12/66] Revert "brcmfmac: use ISO3166 country code and 0 rev as fallback" This reverts commit b0b524f079a23e440dd22b04e369368dde847533. Commit b0b524f079a2 ("brcmfmac: use ISO3166 country code and 0 rev as fallback") changes country setup to directly use ISO3166 country codes if no more specific code is configured. This was done under the assumption that brcmfmac firmwares can handle such simple direct mapping from country codes to firmware ccode values. Unfortunately this is not true for all chipset/firmware combinations. E.g. BCM4359/9 devices stop working as access point with this change, so revert the offending commit to avoid the regression. Signed-off-by: Soeren Moch Cc: stable@vger.kernel.org # 5.14.x Acked-by: Shawn Guo Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210926201905.211605-1-smoch@web.de --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index f7b96cd69242..9db12ffd2ff8 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -7463,23 +7463,18 @@ static s32 brcmf_translate_country_code(struct brcmf_pub *drvr, char alpha2[2], s32 found_index; int i; + country_codes = drvr->settings->country_codes; + if (!country_codes) { + brcmf_dbg(TRACE, "No country codes configured for device\n"); + return -EINVAL; + } + if ((alpha2[0] == ccreq->country_abbrev[0]) && (alpha2[1] == ccreq->country_abbrev[1])) { brcmf_dbg(TRACE, "Country code already set\n"); return -EAGAIN; } - country_codes = drvr->settings->country_codes; - if (!country_codes) { - brcmf_dbg(TRACE, "No country codes configured for device, using ISO3166 code and 0 rev\n"); - memset(ccreq, 0, sizeof(*ccreq)); - ccreq->country_abbrev[0] = alpha2[0]; - ccreq->country_abbrev[1] = alpha2[1]; - ccreq->ccode[0] = alpha2[0]; - ccreq->ccode[1] = alpha2[1]; - return 0; - } - found_index = -1; for (i = 0; i < country_codes->table_size; i++) { cc = &country_codes->table[i]; From 339031bafe6b281cf2dcb8364217288b9fdab555 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Sep 2021 16:44:34 +0200 Subject: [PATCH 13/66] netfilter: conntrack: fix boot failure with nf_conntrack.enable_hooks=1 This is a revert of 7b1957b049 ("netfilter: nf_defrag_ipv4: use net_generic infra") and a partial revert of 8b0adbe3e3 ("netfilter: nf_defrag_ipv6: use net_generic infra"). If conntrack is builtin and kernel is booted with: nf_conntrack.enable_hooks=1 .... kernel will fail to boot due to a NULL deref in nf_defrag_ipv4_enable(): Its called before the ipv4 defrag initcall is made, so net_generic() returns NULL. To resolve this, move the user refcount back to struct net so calls to those functions are possible even before their initcalls have run. Fixes: 7b1957b04956 ("netfilter: nf_defrag_ipv4: use net_generic infra") Fixes: 8b0adbe3e38d ("netfilter: nf_defrag_ipv6: use net_generic infra"). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 1 - include/net/netns/netfilter.h | 6 +++++ net/ipv4/netfilter/nf_defrag_ipv4.c | 30 +++++++-------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 25 +++++++---------- 5 files changed, 25 insertions(+), 39 deletions(-) diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index 0fd8a4159662..ceadf8ba25a4 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -17,7 +17,6 @@ struct inet_frags_ctl; struct nft_ct_frag6_pernet { struct ctl_table_header *nf_frag_frags_hdr; struct fqdir *fqdir; - unsigned int users; }; #endif /* _NF_DEFRAG_IPV6_H */ diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 986a2a9cfdfa..b593f95e9991 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -27,5 +27,11 @@ struct netns_nf { #if IS_ENABLED(CONFIG_DECNET) struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS]; #endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) + unsigned int defrag_ipv4_users; +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + unsigned int defrag_ipv6_users; +#endif }; #endif diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 613432a36f0a..e61ea428ea18 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -20,13 +20,8 @@ #endif #include -static unsigned int defrag4_pernet_id __read_mostly; static DEFINE_MUTEX(defrag4_mutex); -struct defrag4_pernet { - unsigned int users; -}; - static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb, u_int32_t user) { @@ -111,19 +106,15 @@ static const struct nf_hook_ops ipv4_defrag_ops[] = { static void __net_exit defrag4_net_exit(struct net *net) { - struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id); - - if (nf_defrag->users) { + if (net->nf.defrag_ipv4_users) { nf_unregister_net_hooks(net, ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); - nf_defrag->users = 0; + net->nf.defrag_ipv4_users = 0; } } static struct pernet_operations defrag4_net_ops = { .exit = defrag4_net_exit, - .id = &defrag4_pernet_id, - .size = sizeof(struct defrag4_pernet), }; static int __init nf_defrag_init(void) @@ -138,24 +129,23 @@ static void __exit nf_defrag_fini(void) int nf_defrag_ipv4_enable(struct net *net) { - struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id); int err = 0; mutex_lock(&defrag4_mutex); - if (nf_defrag->users == UINT_MAX) { + if (net->nf.defrag_ipv4_users == UINT_MAX) { err = -EOVERFLOW; goto out_unlock; } - if (nf_defrag->users) { - nf_defrag->users++; + if (net->nf.defrag_ipv4_users) { + net->nf.defrag_ipv4_users++; goto out_unlock; } err = nf_register_net_hooks(net, ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); if (err == 0) - nf_defrag->users = 1; + net->nf.defrag_ipv4_users = 1; out_unlock: mutex_unlock(&defrag4_mutex); @@ -165,12 +155,10 @@ EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); void nf_defrag_ipv4_disable(struct net *net) { - struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id); - mutex_lock(&defrag4_mutex); - if (nf_defrag->users) { - nf_defrag->users--; - if (nf_defrag->users == 0) + if (net->nf.defrag_ipv4_users) { + net->nf.defrag_ipv4_users--; + if (net->nf.defrag_ipv4_users == 0) nf_unregister_net_hooks(net, ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index a0108415275f..5c47be29b9ee 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -33,7 +33,7 @@ static const char nf_frags_cache_name[] = "nf-frags"; -unsigned int nf_frag_pernet_id __read_mostly; +static unsigned int nf_frag_pernet_id __read_mostly; static struct inet_frags nf_frags; static struct nft_ct_frag6_pernet *nf_frag_pernet(struct net *net) diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index e8a59d8bf2ad..cb4eb1d2c620 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -25,8 +25,6 @@ #include #include -extern unsigned int nf_frag_pernet_id; - static DEFINE_MUTEX(defrag6_mutex); static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, @@ -91,12 +89,10 @@ static const struct nf_hook_ops ipv6_defrag_ops[] = { static void __net_exit defrag6_net_exit(struct net *net) { - struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id); - - if (nf_frag->users) { + if (net->nf.defrag_ipv6_users) { nf_unregister_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); - nf_frag->users = 0; + net->nf.defrag_ipv6_users = 0; } } @@ -134,24 +130,23 @@ static void __exit nf_defrag_fini(void) int nf_defrag_ipv6_enable(struct net *net) { - struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id); int err = 0; mutex_lock(&defrag6_mutex); - if (nf_frag->users == UINT_MAX) { + if (net->nf.defrag_ipv6_users == UINT_MAX) { err = -EOVERFLOW; goto out_unlock; } - if (nf_frag->users) { - nf_frag->users++; + if (net->nf.defrag_ipv6_users) { + net->nf.defrag_ipv6_users++; goto out_unlock; } err = nf_register_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); if (err == 0) - nf_frag->users = 1; + net->nf.defrag_ipv6_users = 1; out_unlock: mutex_unlock(&defrag6_mutex); @@ -161,12 +156,10 @@ EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable); void nf_defrag_ipv6_disable(struct net *net) { - struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id); - mutex_lock(&defrag6_mutex); - if (nf_frag->users) { - nf_frag->users--; - if (nf_frag->users == 0) + if (net->nf.defrag_ipv6_users) { + net->nf.defrag_ipv6_users--; + if (net->nf.defrag_ipv6_users == 0) nf_unregister_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); } From e189ae161dd784aa5d454b0832f818cacc0e131b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Sep 2021 00:37:03 +0200 Subject: [PATCH 14/66] netfilter: nf_tables: add position handle in event notification Add position handle to allow to identify the rule location from netlink events. Otherwise, userspace cannot incrementally update a userspace cache through monitoring events. Skip handle dump if the rule has been either inserted (at the beginning of the ruleset) or appended (at the end of the ruleset), the NLM_F_APPEND netlink flag is sufficient in these two cases. Handle NLM_F_REPLACE as NLM_F_APPEND since the rule replacement expansion appends it after the specified rule handle. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b9546defdc28..085783b14075 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2866,8 +2866,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, u32 flags, int family, const struct nft_table *table, const struct nft_chain *chain, - const struct nft_rule *rule, - const struct nft_rule *prule) + const struct nft_rule *rule, u64 handle) { struct nlmsghdr *nlh; const struct nft_expr *expr, *next; @@ -2887,9 +2886,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, NFTA_RULE_PAD)) goto nla_put_failure; - if (event != NFT_MSG_DELRULE && prule) { - if (nla_put_be64(skb, NFTA_RULE_POSITION, - cpu_to_be64(prule->handle), + if (event != NFT_MSG_DELRULE && handle) { + if (nla_put_be64(skb, NFTA_RULE_POSITION, cpu_to_be64(handle), NFTA_RULE_PAD)) goto nla_put_failure; } @@ -2925,7 +2923,10 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, const struct nft_rule *rule, int event) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); + const struct nft_rule *prule; struct sk_buff *skb; + u64 handle = 0; + u16 flags = 0; int err; if (!ctx->report && @@ -2936,9 +2937,18 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; + if (event == NFT_MSG_NEWRULE && + !list_is_first(&rule->list, &ctx->chain->rules) && + !list_is_last(&rule->list, &ctx->chain->rules)) { + prule = list_prev_entry(rule, list); + handle = prule->handle; + } + if (ctx->flags & (NLM_F_APPEND | NLM_F_REPLACE)) + flags |= NLM_F_APPEND; + err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->family, ctx->table, - ctx->chain, rule, NULL); + event, flags, ctx->family, ctx->table, + ctx->chain, rule, handle); if (err < 0) { kfree_skb(skb); goto err; @@ -2964,6 +2974,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, struct net *net = sock_net(skb->sk); const struct nft_rule *rule, *prule; unsigned int s_idx = cb->args[0]; + u64 handle; prule = NULL; list_for_each_entry_rcu(rule, &chain->rules, list) { @@ -2975,12 +2986,17 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); } + if (prule) + handle = prule->handle; + else + handle = 0; + if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWRULE, NLM_F_MULTI | NLM_F_APPEND, table->family, - table, chain, rule, prule) < 0) + table, chain, rule, handle) < 0) return 1; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -3143,7 +3159,7 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, - family, table, chain, rule, NULL); + family, table, chain, rule, 0); if (err < 0) goto err_fill_rule_info; From 2c964c558641a3bddaee5719c9e6d8805f777812 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Sep 2021 00:27:38 +0200 Subject: [PATCH 15/66] netfilter: nf_tables: reverse order in rule replacement expansion Deactivate old rule first, then append the new rule, so rule replacement notification via netlink first reports the deletion of the old rule with handle X in first place, then it adds the new rule (reusing the handle X of the replaced old rule). Note that the abort path releases the transaction that has been created by nft_delrule() on error. Fixes: ca08987885a1 ("netfilter: nf_tables: deactivate expressions in rule replecement routine") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 085783b14075..c8acd26c7201 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3419,17 +3419,15 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, } if (info->nlh->nlmsg_flags & NLM_F_REPLACE) { + err = nft_delrule(&ctx, old_rule); + if (err < 0) + goto err_destroy_flow_rule; + trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (trans == NULL) { err = -ENOMEM; goto err_destroy_flow_rule; } - err = nft_delrule(&ctx, old_rule); - if (err < 0) { - nft_trans_destroy(trans); - goto err_destroy_flow_rule; - } - list_add_tail_rcu(&rule->list, &old_rule->list); } else { trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); From 603a1621caa097be23c7784e36cb8edf23cd31db Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Sep 2021 14:16:35 +0200 Subject: [PATCH 16/66] mwifiex: avoid null-pointer-subtraction warning clang complains about some NULL pointer arithmetic in this driver: drivers/net/wireless/marvell/mwifiex/sta_tx.c:65:59: error: performing pointer subtraction with a null pointer has undefined behavior [-Werror,-Wnull-pointer-subtraction] pad = ((void *)skb->data - (sizeof(*local_tx_pd) + hroom)- ^ drivers/net/wireless/marvell/mwifiex/uap_txrx.c:478:53: error: performing pointer subtraction with a null pointer has undefined behavior [-Werror,-Wnull-pointer-subtraction] pad = ((void *)skb->data - (sizeof(*txpd) + hroom) - NULL) & Rework that expression to do the same thing using a uintptr_t. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210927121656.940304-1-arnd@kernel.org --- drivers/net/wireless/marvell/mwifiex/sta_tx.c | 4 ++-- drivers/net/wireless/marvell/mwifiex/uap_txrx.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_tx.c b/drivers/net/wireless/marvell/mwifiex/sta_tx.c index 241305377e20..a9b5eb992220 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_tx.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_tx.c @@ -62,8 +62,8 @@ void *mwifiex_process_sta_txpd(struct mwifiex_private *priv, pkt_type = mwifiex_is_skb_mgmt_frame(skb) ? PKT_TYPE_MGMT : 0; - pad = ((void *)skb->data - (sizeof(*local_tx_pd) + hroom)- - NULL) & (MWIFIEX_DMA_ALIGN_SZ - 1); + pad = ((uintptr_t)skb->data - (sizeof(*local_tx_pd) + hroom)) & + (MWIFIEX_DMA_ALIGN_SZ - 1); skb_push(skb, sizeof(*local_tx_pd) + pad); local_tx_pd = (struct txpd *) skb->data; diff --git a/drivers/net/wireless/marvell/mwifiex/uap_txrx.c b/drivers/net/wireless/marvell/mwifiex/uap_txrx.c index 9bbdb8dfce62..245ff644f81e 100644 --- a/drivers/net/wireless/marvell/mwifiex/uap_txrx.c +++ b/drivers/net/wireless/marvell/mwifiex/uap_txrx.c @@ -475,8 +475,8 @@ void *mwifiex_process_uap_txpd(struct mwifiex_private *priv, pkt_type = mwifiex_is_skb_mgmt_frame(skb) ? PKT_TYPE_MGMT : 0; - pad = ((void *)skb->data - (sizeof(*txpd) + hroom) - NULL) & - (MWIFIEX_DMA_ALIGN_SZ - 1); + pad = ((uintptr_t)skb->data - (sizeof(*txpd) + hroom)) & + (MWIFIEX_DMA_ALIGN_SZ - 1); skb_push(skb, sizeof(*txpd) + pad); From 571fa247ab411f3233eeaaf837c6e646a513b9f8 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 28 Sep 2021 11:16:08 +0530 Subject: [PATCH 17/66] samples: bpf: Fix vmlinux.h generation for XDP samples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generate vmlinux.h only from the in-tree vmlinux, and remove enum declarations that would cause a build failure in case of version mismatches. There are now two options when building the samples: 1. Compile the kernel to use in-tree vmlinux for vmlinux.h 2. Override VMLINUX_BTF for samples using something like this: make VMLINUX_BTF=/sys/kernel/btf/vmlinux -C samples/bpf This change was tested with relative builds, e.g. cases like: * make O=build -C samples/bpf * make KBUILD_OUTPUT=build -C samples/bpf * make -C samples/bpf * cd samples/bpf && make When a suitable VMLINUX_BTF is not found, the following message is printed: /home/kkd/src/linux/samples/bpf/Makefile:333: *** Cannot find a vmlinux for VMLINUX_BTF at any of " ./vmlinux", build the kernel or set VMLINUX_BTF variable. Stop. Fixes: 384b6b3bbf0d (samples: bpf: Add vmlinux.h generation support) Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Andrii Nakryiko Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20210928054608.1799021-1-memxor@gmail.com --- samples/bpf/Makefile | 17 ++++++++--------- samples/bpf/xdp_redirect_map_multi.bpf.c | 5 ----- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4dc20be5fb96..5fd48a8d4f10 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -322,17 +322,11 @@ $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h -include $(BPF_SAMPLES_PATH)/Makefile.target -VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ - $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ - ../../../../vmlinux \ - /sys/kernel/btf/vmlinux \ - /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \ + $(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \ + $(abspath ./vmlinux) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) -ifeq ($(VMLINUX_BTF),) -$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") -endif - $(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) ifeq ($(VMLINUX_H),) $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ @@ -340,6 +334,11 @@ else $(Q)cp "$(VMLINUX_H)" $@ endif +ifeq ($(VMLINUX_BTF),) + $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\ + build the kernel or set VMLINUX_BTF variable) +endif + clean-files += vmlinux.h # Get Clang's default includes on this system, as opposed to those seen by diff --git a/samples/bpf/xdp_redirect_map_multi.bpf.c b/samples/bpf/xdp_redirect_map_multi.bpf.c index 8f59d430cb64..bb0a5a3bfcf0 100644 --- a/samples/bpf/xdp_redirect_map_multi.bpf.c +++ b/samples/bpf/xdp_redirect_map_multi.bpf.c @@ -5,11 +5,6 @@ #include "xdp_sample.bpf.h" #include "xdp_sample_shared.h" -enum { - BPF_F_BROADCAST = (1ULL << 3), - BPF_F_EXCLUDE_INGRESS = (1ULL << 4), -}; - struct { __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); __uint(key_size, sizeof(int)); From d75fe9cb1dd062684c9fb8a4581738170365dc06 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Thu, 23 Sep 2021 01:05:40 +0100 Subject: [PATCH 18/66] samples/bpf: Relicense bpf_insn.h as GPL-2.0-only OR BSD-2-Clause MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit libbpf and bpftool have been dual-licensed to facilitate inclusion in software that is not compatible with GPL2-only (ie: Apache2), but the samples are still GPL2-only. Given these files are samples, they get naturally copied around. For example, it is the case for samples/bpf/bpf_insn.h which was copied into the systemd tree: https://github.com/systemd/systemd/blob/main/src/shared/linux/bpf_insn.h Some more context on systemd's needs specifically: Most of systemd is (L)GPL2-or-later, which means there is no perceived incompatibility with Apache2 software and can thus be linked with OpenSSL 3.0. But given this GPL2-only header is included this is currently not possible. Dual-licensing this header solves this problem for us as we are scoping a move to OpenSSL 3.0, see: https://lists.freedesktop.org/archives/systemd-devel/2021-September/046882.html Dual-license this header as GPL-2.0-only OR BSD-2-Clause to follow the same licensing used by libbpf and bpftool: 1bc38b8ff6cc ("libbpf: relicense libbpf as LGPL-2.1 OR BSD-2-Clause") 907b22365115 ("tools: bpftool: dual license all files") Signed-off-by: Luca Boccassi Signed-off-by: Daniel Borkmann Acked-by: Simon Horman Acked-by: Daniel Mack Acked-by: Josef Bacik Acked-by: Joe Stringer Acked-by: Chenbo Feng Acked-by: Björn Töpel Acked-by: Magnus Karlsson Acked-by: Brendan Jackman Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210923000540.47344-1-luca.boccassi@gmail.com --- samples/bpf/bpf_insn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/bpf_insn.h b/samples/bpf/bpf_insn.h index aee04534483a..29c3bb6ad1cd 100644 --- a/samples/bpf/bpf_insn.h +++ b/samples/bpf/bpf_insn.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* eBPF instruction mini library */ #ifndef __BPF_INSN_H #define __BPF_INSN_H From 79e3445b38e0cab94264a3894c0c3d57c930b97e Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Tue, 28 Sep 2021 11:13:10 +0200 Subject: [PATCH 19/66] bpf, arm: Fix register clobbering in div/mod implementation On ARM CPUs that lack div/mod instructions, ALU32 BPF_DIV and BPF_MOD are implemented using a call to a helper function. Before, the emitted code for those function calls failed to preserve caller-saved ARM registers. Since some of those registers happen to be mapped to BPF registers, it resulted in eBPF register values being overwritten. This patch emits code to push and pop the remaining caller-saved ARM registers r2-r3 into the stack during the div/mod function call. ARM registers r0-r1 are used as arguments and return value, and those were already saved and restored correctly. Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index a951276f0547..a903b26cde40 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -36,6 +36,10 @@ * +-----+ * |RSVD | JIT scratchpad * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) + * | ... | caller-saved registers + * +-----+ + * | ... | arguments passed on stack + * ARM_SP during call => +-----| * | | * | ... | Function call stack * | | @@ -63,6 +67,12 @@ * * When popping registers off the stack at the end of a BPF function, we * reference them via the current ARM_FP register. + * + * Some eBPF operations are implemented via a call to a helper function. + * Such calls are "invisible" in the eBPF code, so it is up to the calling + * program to preserve any caller-saved ARM registers during the call. The + * JIT emits code to push and pop those registers onto the stack, immediately + * above the callee stack frame. */ #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \ @@ -70,6 +80,8 @@ #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) +#define CALLER_MASK (1 << ARM_R0 | 1 << ARM_R1 | 1 << ARM_R2 | 1 << ARM_R3) + enum { /* Stack layout - these are offsets from (top of stack - 4) */ BPF_R2_HI, @@ -464,6 +476,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) { + const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1); const s8 *tmp = bpf2a32[TMP_REG_1]; #if __LINUX_ARM_ARCH__ == 7 @@ -495,11 +508,17 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_MOV_R(ARM_R0, rm), ctx); } + /* Push caller-saved registers on stack */ + emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx); + /* Call appropriate function */ emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx); emit_blx_r(ARM_IP, ctx); + /* Restore caller-saved registers from stack */ + emit(ARM_POP(CALLER_MASK & ~exclude_mask), ctx); + /* Save return value */ if (rd != ARM_R0) emit(ARM_MOV_R(rd, ARM_R0), ctx); From 30e29a9a2bc6a4888335a6ede968b75cd329657a Mon Sep 17 00:00:00 2001 From: Tatsuhiko Yasumatsu Date: Thu, 30 Sep 2021 22:55:45 +0900 Subject: [PATCH 20/66] bpf: Fix integer overflow in prealloc_elems_and_freelist() In prealloc_elems_and_freelist(), the multiplication to calculate the size passed to bpf_map_area_alloc() could lead to an integer overflow. As a result, out-of-bounds write could occur in pcpu_freelist_populate() as reported by KASAN: [...] [ 16.968613] BUG: KASAN: slab-out-of-bounds in pcpu_freelist_populate+0xd9/0x100 [ 16.969408] Write of size 8 at addr ffff888104fc6ea0 by task crash/78 [ 16.970038] [ 16.970195] CPU: 0 PID: 78 Comm: crash Not tainted 5.15.0-rc2+ #1 [ 16.970878] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 [ 16.972026] Call Trace: [ 16.972306] dump_stack_lvl+0x34/0x44 [ 16.972687] print_address_description.constprop.0+0x21/0x140 [ 16.973297] ? pcpu_freelist_populate+0xd9/0x100 [ 16.973777] ? pcpu_freelist_populate+0xd9/0x100 [ 16.974257] kasan_report.cold+0x7f/0x11b [ 16.974681] ? pcpu_freelist_populate+0xd9/0x100 [ 16.975190] pcpu_freelist_populate+0xd9/0x100 [ 16.975669] stack_map_alloc+0x209/0x2a0 [ 16.976106] __sys_bpf+0xd83/0x2ce0 [...] The possibility of this overflow was originally discussed in [0], but was overlooked. Fix the integer overflow by changing elem_size to u64 from u32. [0] https://lore.kernel.org/bpf/728b238e-a481-eb50-98e9-b0f430ab01e7@gmail.com/ Fixes: 557c0c6e7df8 ("bpf: convert stackmap to pre-allocation") Signed-off-by: Tatsuhiko Yasumatsu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210930135545.173698-1-th.yasumatsu@gmail.com --- kernel/bpf/stackmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 09a3fd97d329..6e75bbee39f0 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -63,7 +63,8 @@ static inline int stack_map_data_size(struct bpf_map *map) static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) { - u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; + u64 elem_size = sizeof(struct stack_map_bucket) + + (u64)smap->map.value_size; int err; smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, From f9a10440f0b1f33faa792af26f4e9823a9b8b6a4 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Thu, 26 Aug 2021 17:07:17 +0300 Subject: [PATCH 21/66] net/mlx5e: IPSEC RX, enable checksum complete Currently in Rx data path IPsec crypto offloaded packets uses csum_none flag, so checksum is handled by the stack, this naturally have some performance/cpu utilization impact on such flows. As Nvidia NIC starting from ConnectX6DX provides checksum complete value out of the box also for such flows there is no sense in taking csum_none path, furthermore the stack (xfrm) have the method to handle checksum complete corrections for such flows i.e. IPsec trailer removal and consequently checksum value adjustment. Because of the above and in addition the ConnectX6DX is the first HW which supports IPsec crypto offload then it is safe to report csum complete for IPsec offloaded traffic. Fixes: b2ac7541e377 ("net/mlx5e: IPsec: Add Connect-X IPsec Rx data path offload") Signed-off-by: Raed Salem Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3c65fd0bcf31..29a6586ef28d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1001,14 +1001,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, goto csum_unnecessary; if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { - u8 ipproto = get_ip_proto(skb, network_depth, proto); - - if (unlikely(ipproto == IPPROTO_SCTP)) + if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP)) goto csum_unnecessary; - if (unlikely(mlx5_ipsec_is_rx_flow(cqe))) - goto csum_none; - stats->csum_complete++; skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); From 9d758d4a3a039b9d7086d4759ed255b748713eee Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 2 Sep 2021 10:33:32 +0300 Subject: [PATCH 22/66] net/mlx5e: Keep the value for maximum number of channels in-sync The value for maximum number of channels is first calculated based on the netdev's profile and current function resources (specifically, number of MSIX vectors, which depends among other things on the number of online cores in the system). This value is then used to calculate the netdev's number of rxqs/txqs. Once created (by alloc_etherdev_mqs), the number of netdev's rxqs/txqs is constant and we must not exceed it. To achieve this, keep the maximum number of channels in sync upon any netdevice re-attach. Use mlx5e_get_max_num_channels() for calculating the number of netdev's rxqs/txqs. After netdev is created, use mlx5e_calc_max_nch() (which coinsiders core device resources, profile, and netdev) to init or update priv->max_nch. Before this patch, the value of priv->max_nch might get out of sync, mistakenly allowing accesses to out-of-bounds objects, which would crash the system. Track the number of channels stats structures used in a separate field, as they are persistent to suspend/resume operations. All the collected stats of every channel index that ever existed should be preserved. They are reset only when struct mlx5e_priv is, in mlx5e_priv_cleanup(), which is part of the profile changing flow. There is no point anymore in blocking a profile change due to max_nch mismatch in mlx5e_netdev_change_profile(). Remove the limitation. Fixes: a1f240f18017 ("net/mlx5e: Adjust to max number of channles when re-attaching") Signed-off-by: Tariq Toukan Reviewed-by: Aya Levin Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 11 ++-- .../mellanox/mlx5/core/en/hv_vhca_stats.c | 6 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 59 ++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 3 +- .../ethernet/mellanox/mlx5/core/en_stats.c | 8 +-- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 4 +- 6 files changed, 57 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7b8c8187543a..2dca9219ca71 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -845,6 +845,7 @@ struct mlx5e_priv { struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_channel_stats trap_stats; struct mlx5e_ptp_stats ptp_stats; + u16 stats_nch; u16 max_nch; u8 max_opened_tc; bool tx_ptp_opened; @@ -1100,12 +1101,6 @@ int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, struct ethtool_pauseparam *pauseparam); /* mlx5e generic netdev management API */ -static inline unsigned int -mlx5e_calc_max_nch(struct mlx5e_priv *priv, const struct mlx5e_profile *profile) -{ - return priv->netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1); -} - static inline bool mlx5e_tx_mpwqe_supported(struct mlx5_core_dev *mdev) { @@ -1114,11 +1109,13 @@ mlx5e_tx_mpwqe_supported(struct mlx5_core_dev *mdev) } int mlx5e_priv_init(struct mlx5e_priv *priv, + const struct mlx5e_profile *profile, struct net_device *netdev, struct mlx5_core_dev *mdev); void mlx5e_priv_cleanup(struct mlx5e_priv *priv); struct net_device * -mlx5e_create_netdev(struct mlx5_core_dev *mdev, unsigned int txqs, unsigned int rxqs); +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, + unsigned int txqs, unsigned int rxqs); int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c index ac44bbe95c5c..d290d7276b8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c @@ -35,7 +35,7 @@ static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, void *data, { int ch, i = 0; - for (ch = 0; ch < priv->max_nch; ch++) { + for (ch = 0; ch < priv->stats_nch; ch++) { void *buf = data + i; if (WARN_ON_ONCE(buf + @@ -51,7 +51,7 @@ static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, void *data, static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv) { return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) * - priv->max_nch); + priv->stats_nch); } static void mlx5e_hv_vhca_stats_work(struct work_struct *work) @@ -100,7 +100,7 @@ static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent, sagent = &priv->stats_agent; block->version = MLX5_HV_VHCA_STATS_VERSION; - block->rings = priv->max_nch; + block->rings = priv->stats_nch; if (!block->command) { cancel_delayed_work_sync(&priv->stats_agent.work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3fd515e7bf30..774ce88d80cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3065,7 +3065,7 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) { int i; - for (i = 0; i < priv->max_nch; i++) { + for (i = 0; i < priv->stats_nch; i++) { struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i]; struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; @@ -4186,8 +4186,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 struct mlx5_core_dev *mdev = priv->mdev; u8 rx_cq_period_mode; - priv->max_nch = mlx5e_calc_max_nch(priv, priv->profile); - params->sw_mtu = mtu; params->hard_mtu = MLX5E_ETH_HARD_MTU; params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2, @@ -4682,8 +4680,35 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .rx_ptp_support = true, }; +static unsigned int +mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev, + const struct mlx5e_profile *profile) + +{ + unsigned int max_nch, tmp; + + /* core resources */ + max_nch = mlx5e_get_max_num_channels(mdev); + + /* netdev rx queues */ + tmp = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1); + max_nch = min_t(unsigned int, max_nch, tmp); + + /* netdev tx queues */ + tmp = netdev->num_tx_queues; + if (mlx5_qos_is_supported(mdev)) + tmp -= mlx5e_qos_max_leaf_nodes(mdev); + if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) + tmp -= profile->max_tc; + tmp = tmp / profile->max_tc; + max_nch = min_t(unsigned int, max_nch, tmp); + + return max_nch; +} + /* mlx5e generic netdev management API (move to en_common.c) */ int mlx5e_priv_init(struct mlx5e_priv *priv, + const struct mlx5e_profile *profile, struct net_device *netdev, struct mlx5_core_dev *mdev) { @@ -4691,6 +4716,8 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, priv->mdev = mdev; priv->netdev = netdev; priv->msglevel = MLX5E_MSG_LEVEL; + priv->max_nch = mlx5e_calc_max_nch(mdev, netdev, profile); + priv->stats_nch = priv->max_nch; priv->max_opened_tc = 1; if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL)) @@ -4734,7 +4761,8 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) } struct net_device * -mlx5e_create_netdev(struct mlx5_core_dev *mdev, unsigned int txqs, unsigned int rxqs) +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, + unsigned int txqs, unsigned int rxqs) { struct net_device *netdev; int err; @@ -4745,7 +4773,7 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, unsigned int txqs, unsigned int return NULL; } - err = mlx5e_priv_init(netdev_priv(netdev), netdev, mdev); + err = mlx5e_priv_init(netdev_priv(netdev), profile, netdev, mdev); if (err) { mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); goto err_free_netdev; @@ -4787,7 +4815,7 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) clear_bit(MLX5E_STATE_DESTROYING, &priv->state); /* max number of channels may have changed */ - max_nch = mlx5e_get_max_num_channels(priv->mdev); + max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile); if (priv->channels.params.num_channels > max_nch) { mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch); /* Reducing the number of channels - RXFH has to be reset, and @@ -4796,6 +4824,13 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED; priv->channels.params.num_channels = max_nch; } + if (max_nch != priv->max_nch) { + mlx5_core_warn(priv->mdev, + "MLX5E: Updating max number of channels from %u to %u\n", + priv->max_nch, max_nch); + priv->max_nch = max_nch; + } + /* 1. Set the real number of queues in the kernel the first time. * 2. Set our default XPS cpumask. * 3. Build the RQT. @@ -4860,7 +4895,7 @@ mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mde struct mlx5e_priv *priv = netdev_priv(netdev); int err; - err = mlx5e_priv_init(priv, netdev, mdev); + err = mlx5e_priv_init(priv, new_profile, netdev, mdev); if (err) { mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); return err; @@ -4886,20 +4921,12 @@ priv_cleanup: int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, const struct mlx5e_profile *new_profile, void *new_ppriv) { - unsigned int new_max_nch = mlx5e_calc_max_nch(priv, new_profile); const struct mlx5e_profile *orig_profile = priv->profile; struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; void *orig_ppriv = priv->ppriv; int err, rollback_err; - /* sanity */ - if (new_max_nch != priv->max_nch) { - netdev_warn(netdev, "%s: Replacing profile with different max channels\n", - __func__); - return -EINVAL; - } - /* cleanup old profile */ mlx5e_detach_netdev(priv); priv->profile->cleanup(priv); @@ -4995,7 +5022,7 @@ static int mlx5e_probe(struct auxiliary_device *adev, nch = mlx5e_get_max_num_channels(mdev); txqs = nch * profile->max_tc + ptp_txqs + qos_sqs; rxqs = nch * profile->rq_groups; - netdev = mlx5e_create_netdev(mdev, txqs, rxqs); + netdev = mlx5e_create_netdev(mdev, profile, txqs, rxqs); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index ae71a17fdb27..3dd1101cc693 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -596,7 +596,6 @@ static void mlx5e_build_rep_params(struct net_device *netdev) MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->max_nch = mlx5e_calc_max_nch(priv, priv->profile); params = &priv->channels.params; params->num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS; @@ -1169,7 +1168,7 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) nch = mlx5e_get_max_num_channels(dev); txqs = nch * profile->max_tc; rxqs = nch * profile->rq_groups; - netdev = mlx5e_create_netdev(dev, txqs, rxqs); + netdev = mlx5e_create_netdev(dev, profile, txqs, rxqs); if (!netdev) { mlx5_core_warn(dev, "Failed to create representor netdev for vport %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index e4f5b6395148..46bf78169f63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -450,7 +450,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->max_nch; i++) { + for (i = 0; i < priv->stats_nch; i++) { struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i]; int j; @@ -2119,7 +2119,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ptp) { return; } static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(channels) { - int max_nch = priv->max_nch; + int max_nch = priv->stats_nch; return (NUM_RQ_STATS * max_nch) + (NUM_CH_STATS * max_nch) + @@ -2133,7 +2133,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(channels) static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(channels) { bool is_xsk = priv->xsk.ever_used; - int max_nch = priv->max_nch; + int max_nch = priv->stats_nch; int i, j, tc; for (i = 0; i < max_nch; i++) @@ -2175,7 +2175,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(channels) static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels) { bool is_xsk = priv->xsk.ever_used; - int max_nch = priv->max_nch; + int max_nch = priv->stats_nch; int i, j, tc; for (i = 0; i < max_nch; i++) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 67571e5040d6..269ebb53eda6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -113,7 +113,7 @@ static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv) struct mlx5e_sw_stats s = { 0 }; int i, j; - for (i = 0; i < priv->max_nch; i++) { + for (i = 0; i < priv->stats_nch; i++) { struct mlx5e_channel_stats *channel_stats; struct mlx5e_rq_stats *rq_stats; @@ -711,7 +711,7 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u32 port_num, goto destroy_ht; } - err = mlx5e_priv_init(epriv, netdev, mdev); + err = mlx5e_priv_init(epriv, prof, netdev, mdev); if (err) goto destroy_mdev_resources; From 7dbc849b2ab3b8ea8f767361c46f914bb2b7779d Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 29 Sep 2021 15:51:26 +0300 Subject: [PATCH 23/66] net/mlx5e: Improve MQPRIO resiliency * Add netdev->tc_to_txq rollback in case of failure in mlx5e_update_netdev_queues(). * Fix broken transition between the two modes: MQPRIO DCB mode with tc==8, and MQPRIO channel mode. * Disable MQPRIO channel mode if re-attaching with a different number of channels. * Improve code sharing. Fixes: ec60c4581bd9 ("net/mlx5e: Support MQPRIO channel mode") Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 111 +++++++++++++----- 2 files changed, 80 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 2dca9219ca71..03a7a4ce5cd5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -252,6 +252,7 @@ struct mlx5e_params { struct { u16 mode; u8 num_tc; + struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; } mqprio; bool rx_cqe_compress_def; bool tunneled_offload_en; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 774ce88d80cd..0390395f421f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2264,7 +2264,7 @@ void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv) } static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc, - struct tc_mqprio_qopt_offload *mqprio) + struct netdev_tc_txq *tc_to_txq) { int tc, err; @@ -2282,11 +2282,8 @@ static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc, for (tc = 0; tc < ntc; tc++) { u16 count, offset; - /* For DCB mode, map netdev TCs to offset 0 - * We have our own UP to TXQ mapping for QoS - */ - count = mqprio ? mqprio->qopt.count[tc] : nch; - offset = mqprio ? mqprio->qopt.offset[tc] : 0; + count = tc_to_txq[tc].count; + offset = tc_to_txq[tc].offset; netdev_set_tc_queue(netdev, tc, count, offset); } @@ -2315,19 +2312,24 @@ int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) { + struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq; struct net_device *netdev = priv->netdev; int old_num_txqs, old_ntc; int num_rxqs, nch, ntc; int err; + int i; old_num_txqs = netdev->real_num_tx_queues; old_ntc = netdev->num_tc ? : 1; + for (i = 0; i < ARRAY_SIZE(old_tc_to_txq); i++) + old_tc_to_txq[i] = netdev->tc_to_txq[i]; nch = priv->channels.params.num_channels; - ntc = mlx5e_get_dcb_num_tc(&priv->channels.params); + ntc = priv->channels.params.mqprio.num_tc; num_rxqs = nch * priv->profile->rq_groups; + tc_to_txq = priv->channels.params.mqprio.tc_to_txq; - err = mlx5e_netdev_set_tcs(netdev, nch, ntc, NULL); + err = mlx5e_netdev_set_tcs(netdev, nch, ntc, tc_to_txq); if (err) goto err_out; err = mlx5e_update_tx_netdev_queues(priv); @@ -2350,11 +2352,14 @@ err_txqs: WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs)); err_tcs: - mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc, NULL); + WARN_ON_ONCE(mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc, + old_tc_to_txq)); err_out: return err; } +static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_netdev_queues); + static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, struct mlx5e_params *params) { @@ -2861,6 +2866,58 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) return 0; } +static void mlx5e_mqprio_build_default_tc_to_txq(struct netdev_tc_txq *tc_to_txq, + int ntc, int nch) +{ + int tc; + + memset(tc_to_txq, 0, sizeof(*tc_to_txq) * TC_MAX_QUEUE); + + /* Map netdev TCs to offset 0. + * We have our own UP to TXQ mapping for DCB mode of QoS + */ + for (tc = 0; tc < ntc; tc++) { + tc_to_txq[tc] = (struct netdev_tc_txq) { + .count = nch, + .offset = 0, + }; + } +} + +static void mlx5e_mqprio_build_tc_to_txq(struct netdev_tc_txq *tc_to_txq, + struct tc_mqprio_qopt *qopt) +{ + int tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) { + tc_to_txq[tc] = (struct netdev_tc_txq) { + .count = qopt->count[tc], + .offset = qopt->offset[tc], + }; + } +} + +static void mlx5e_params_mqprio_dcb_set(struct mlx5e_params *params, u8 num_tc) +{ + params->mqprio.mode = TC_MQPRIO_MODE_DCB; + params->mqprio.num_tc = num_tc; + mlx5e_mqprio_build_default_tc_to_txq(params->mqprio.tc_to_txq, num_tc, + params->num_channels); +} + +static void mlx5e_params_mqprio_channel_set(struct mlx5e_params *params, + struct tc_mqprio_qopt *qopt) +{ + params->mqprio.mode = TC_MQPRIO_MODE_CHANNEL; + params->mqprio.num_tc = qopt->num_tc; + mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, qopt); +} + +static void mlx5e_params_mqprio_reset(struct mlx5e_params *params) +{ + mlx5e_params_mqprio_dcb_set(params, 1); +} + static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv, struct tc_mqprio_qopt *mqprio) { @@ -2874,8 +2931,7 @@ static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv, return -EINVAL; new_params = priv->channels.params; - new_params.mqprio.mode = TC_MQPRIO_MODE_DCB; - new_params.mqprio.num_tc = tc ? tc : 1; + mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1); err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_num_channels_changed_ctx, NULL, true); @@ -2926,25 +2982,12 @@ static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv, return 0; } -static int mlx5e_mqprio_channel_set_tcs_ctx(struct mlx5e_priv *priv, void *ctx) -{ - struct tc_mqprio_qopt_offload *mqprio = (struct tc_mqprio_qopt_offload *)ctx; - struct net_device *netdev = priv->netdev; - u8 num_tc; - - if (priv->channels.params.mqprio.mode != TC_MQPRIO_MODE_CHANNEL) - return -EINVAL; - - num_tc = priv->channels.params.mqprio.num_tc; - mlx5e_netdev_set_tcs(netdev, 0, num_tc, mqprio); - - return 0; -} - static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv, struct tc_mqprio_qopt_offload *mqprio) { + mlx5e_fp_preactivate preactivate; struct mlx5e_params new_params; + bool nch_changed; int err; err = mlx5e_mqprio_channel_validate(priv, mqprio); @@ -2952,12 +2995,12 @@ static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv, return err; new_params = priv->channels.params; - new_params.mqprio.mode = TC_MQPRIO_MODE_CHANNEL; - new_params.mqprio.num_tc = mqprio->qopt.num_tc; - err = mlx5e_safe_switch_params(priv, &new_params, - mlx5e_mqprio_channel_set_tcs_ctx, mqprio, true); + mlx5e_params_mqprio_channel_set(&new_params, &mqprio->qopt); - return err; + nch_changed = mlx5e_get_dcb_num_tc(&priv->channels.params) > 1; + preactivate = nch_changed ? mlx5e_num_channels_changed_ctx : + mlx5e_update_netdev_queues_ctx; + return mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, true); } static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, @@ -4190,7 +4233,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 params->hard_mtu = MLX5E_ETH_HARD_MTU; params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2, priv->max_nch); - params->mqprio.num_tc = 1; + mlx5e_params_mqprio_reset(params); /* Set an initial non-zero value, so that mlx5e_select_queue won't * divide by zero if called before first activating channels. @@ -4823,6 +4866,10 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) */ priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED; priv->channels.params.num_channels = max_nch; + if (priv->channels.params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) { + mlx5_core_warn(priv->mdev, "MLX5E: Disabling MQPRIO channel mode\n"); + mlx5e_params_mqprio_reset(&priv->channels.params); + } } if (max_nch != priv->max_nch) { mlx5_core_warn(priv->mdev, From a586775f83bd729ad60b56352dbe067f4bb0beee Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 23 Sep 2021 17:57:47 +0300 Subject: [PATCH 24/66] net/mlx5: E-Switch, Fix double allocation of acl flow counter Flow counter is allocated in eswitch legacy acl setting functions without checking if already allocated by previous setting. Add a check to avoid such double allocation. Fixes: 07bab9502641 ("net/mlx5: E-Switch, Refactor eswitch ingress acl codes") Fixes: ea651a86d468 ("net/mlx5: E-Switch, Refactor eswitch egress acl codes") Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/esw/acl/egress_lgcy.c | 12 ++++++++---- .../mellanox/mlx5/core/esw/acl/ingress_lgcy.c | 4 +++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c index 0399a396d166..60a73990017c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -79,12 +79,16 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, int dest_num = 0; int err = 0; - if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { + if (vport->egress.legacy.drop_counter) { + drop_counter = vport->egress.legacy.drop_counter; + } else if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { drop_counter = mlx5_fc_create(esw->dev, false); - if (IS_ERR(drop_counter)) + if (IS_ERR(drop_counter)) { esw_warn(esw->dev, "vport[%d] configure egress drop rule counter err(%ld)\n", vport->vport, PTR_ERR(drop_counter)); + drop_counter = NULL; + } vport->egress.legacy.drop_counter = drop_counter; } @@ -123,7 +127,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; /* Attach egress drop flow counter */ - if (!IS_ERR_OR_NULL(drop_counter)) { + if (drop_counter) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; drop_ctr_dst.counter_id = mlx5_fc_id(drop_counter); @@ -162,7 +166,7 @@ void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw, esw_acl_egress_table_destroy(vport); clean_drop_counter: - if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter)) { + if (vport->egress.legacy.drop_counter) { mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter); vport->egress.legacy.drop_counter = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c index f75b86abaf1c..b1a5199260f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -160,7 +160,9 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, esw_acl_ingress_lgcy_rules_destroy(vport); - if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { + if (vport->ingress.legacy.drop_counter) { + counter = vport->ingress.legacy.drop_counter; + } else if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { counter = mlx5_fc_create(esw->dev, false); if (IS_ERR(counter)) { esw_warn(esw->dev, From 64728294703e77827cc31a1b164ca867400067f5 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 23 Sep 2021 16:56:09 +0300 Subject: [PATCH 25/66] net/mlx5: Force round second at 1PPS out start time Allow configuration of 1PPS start time only with time-stamp representing a round second. Prior to this patch driver allowed setting of a non-round-second which is not supported by the device. Avoid unexpected behavior by restricting start-time configuration to a round-second. Fixes: 4272f9b88db9 ("net/mlx5e: Change 1PPS out scheme") Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/lib/clock.c | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index ffac8a0e7a23..d2ed7b0a18ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -448,22 +448,20 @@ static u64 find_target_cycles(struct mlx5_core_dev *mdev, s64 target_ns) return cycles_now + cycles_delta; } -static u64 perout_conf_internal_timer(struct mlx5_core_dev *mdev, - s64 sec, u32 nsec) +static u64 perout_conf_internal_timer(struct mlx5_core_dev *mdev, s64 sec) { - struct timespec64 ts; + struct timespec64 ts = {}; s64 target_ns; ts.tv_sec = sec; - ts.tv_nsec = nsec; target_ns = timespec64_to_ns(&ts); return find_target_cycles(mdev, target_ns); } -static u64 perout_conf_real_time(s64 sec, u32 nsec) +static u64 perout_conf_real_time(s64 sec) { - return (u64)nsec | (u64)sec << 32; + return (u64)sec << 32; } static int mlx5_perout_configure(struct ptp_clock_info *ptp, @@ -501,8 +499,10 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, if (on) { bool rt_mode = mlx5_real_time_mode(mdev); - u32 nsec; - s64 sec; + s64 sec = rq->perout.start.sec; + + if (rq->perout.start.nsec) + return -EINVAL; pin_mode = MLX5_PIN_MODE_OUT; pattern = MLX5_OUT_PATTERN_PERIODIC; @@ -513,14 +513,11 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, if ((ns >> 1) != 500000000LL) return -EINVAL; - nsec = rq->perout.start.nsec; - sec = rq->perout.start.sec; - if (rt_mode && sec > U32_MAX) return -EINVAL; - time_stamp = rt_mode ? perout_conf_real_time(sec, nsec) : - perout_conf_internal_timer(mdev, sec, nsec); + time_stamp = rt_mode ? perout_conf_real_time(sec) : + perout_conf_internal_timer(mdev, sec); field_select |= MLX5_MTPPS_FS_PIN_MODE | MLX5_MTPPS_FS_PATTERN | @@ -717,7 +714,7 @@ static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev, ts_next_sec(&ts); target_ns = timespec64_to_ns(&ts); - return rt_mode ? perout_conf_real_time(ts.tv_sec, ts.tv_nsec) : + return rt_mode ? perout_conf_real_time(ts.tv_sec) : find_target_cycles(mdev, target_ns); } From 99b9a678b2e474756770900595cb09c94498bfca Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 23 Sep 2021 15:30:01 +0300 Subject: [PATCH 26/66] net/mlx5: Avoid generating event after PPS out in Real time mode When in Real-time mode, HW clock is synced with the PTP daemon. Hence driver should not re-calibrate the next pulse (via MTPPSE repetitive events mechanism). This patch arms repetitive events only in free-running mode. Fixes: 432119de33d9 ("net/mlx5: Add cyc2time HW translation mode support") Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/lib/clock.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index d2ed7b0a18ea..91e806c1aa21 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -472,6 +472,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, container_of(ptp, struct mlx5_clock, ptp_info); struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); + bool rt_mode = mlx5_real_time_mode(mdev); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; struct timespec64 ts; u32 field_select = 0; @@ -535,6 +536,9 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, if (err) return err; + if (rt_mode) + return 0; + return mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on); } @@ -702,20 +706,14 @@ static void ts_next_sec(struct timespec64 *ts) static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev, struct mlx5_clock *clock) { - bool rt_mode = mlx5_real_time_mode(mdev); struct timespec64 ts; s64 target_ns; - if (rt_mode) - ts = mlx5_ptp_gettimex_real_time(mdev, NULL); - else - mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL); - + mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL); ts_next_sec(&ts); target_ns = timespec64_to_ns(&ts); - return rt_mode ? perout_conf_real_time(ts.tv_sec) : - find_target_cycles(mdev, target_ns); + return find_target_cycles(mdev, target_ns); } static int mlx5_pps_event(struct notifier_block *nb, From ac8b7d50ae4c3f5325c599f3d6e939ecef6a585a Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 19 Aug 2021 16:01:28 +0300 Subject: [PATCH 27/66] net/mlx5: Fix length of irq_index in chars The maximum irq_index can be 2047, This means irq_name should have 4 characters reserve for the irq_index. Hence, increase it to 4. Fixes: 3af26495a247 ("net/mlx5: Enlarge interrupt field in CREATE_EQ") Signed-off-by: Shay Drory Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index c79a10b3454d..df54f62a38ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -13,8 +13,8 @@ #endif #define MLX5_MAX_IRQ_NAME (32) -/* max irq_index is 255. three chars */ -#define MLX5_MAX_IRQ_IDX_CHARS (3) +/* max irq_index is 2047, so four chars */ +#define MLX5_MAX_IRQ_IDX_CHARS (4) #define MLX5_SFS_PER_CTRL_IRQ 64 #define MLX5_IRQ_CTRL_SF_MAX 8 From f88c4876347400a577598e06f1b230a7b19ee0e9 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 14 Sep 2021 10:13:02 +0300 Subject: [PATCH 28/66] net/mlx5: Fix setting number of EQs of SFs When setting number of completion EQs of the SF, consider number of online CPUs. Without this consideration, when number of online cpus are less than 8, unnecessary 8 completion EQs are allocated. Fixes: c36326d38d93 ("net/mlx5: Round-Robin EQs over IRQs") Signed-off-by: Shay Drory Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index df54f62a38ac..763c83a02380 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -633,8 +633,9 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table) { if (table->sf_comp_pool) - return table->sf_comp_pool->xa_num_irqs.max - - table->sf_comp_pool->xa_num_irqs.min + 1; + return min_t(int, num_online_cpus(), + table->sf_comp_pool->xa_num_irqs.max - + table->sf_comp_pool->xa_num_irqs.min + 1); else return mlx5_irq_table_get_num_comp(table); } From dd1979cf3c710398a9eeba4853b908fe16426814 Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Sun, 29 Aug 2021 11:26:03 +0300 Subject: [PATCH 29/66] net/mlx5e: Fix the presented RQ index in PTP stats PTP-RQ counters title format contains PTP-RQ identifier, which is mistakenly not passed to sprinft(). This leads to unexpected garbage values instead. This patch fixes it. Before applying the patch: ethtool -S eth3 | grep ptp_rq ptp_rq15_packets: 0 ptp_rq8_bytes: 0 ptp_rq6_csum_complete: 0 ptp_rq14_csum_complete_tail: 0 ptp_rq3_csum_complete_tail_slow : 0 ptp_rq9_csum_unnecessary: 0 ptp_rq1_csum_unnecessary_inner: 0 ptp_rq7_csum_none: 0 ptp_rq10_xdp_drop: 0 ptp_rq9_xdp_redirect: 0 ptp_rq13_lro_packets: 0 ptp_rq12_lro_bytes: 0 ptp_rq10_ecn_mark: 0 ptp_rq9_removed_vlan_packets: 0 ptp_rq5_wqe_err: 0 ptp_rq8_mpwqe_filler_cqes: 0 ptp_rq2_mpwqe_filler_strides: 0 ptp_rq5_oversize_pkts_sw_drop: 0 ptp_rq6_buff_alloc_err: 0 ptp_rq15_cqe_compress_blks: 0 ptp_rq2_cqe_compress_pkts: 0 ptp_rq2_cache_reuse: 0 ptp_rq12_cache_full: 0 ptp_rq11_cache_empty: 256 ptp_rq12_cache_busy: 0 ptp_rq11_cache_waive: 0 ptp_rq12_congst_umr: 0 ptp_rq11_arfs_err: 0 ptp_rq9_recover: 0 After applying the patch: ethtool -S eth3 | grep ptp_rq ptp_rq0_packets: 0 ptp_rq0_bytes: 0 ptp_rq0_csum_complete: 0 ptp_rq0_csum_complete_tail: 0 ptp_rq0_csum_complete_tail_slow : 0 ptp_rq0_csum_unnecessary: 0 ptp_rq0_csum_unnecessary_inner: 0 ptp_rq0_csum_none: 0 ptp_rq0_xdp_drop: 0 ptp_rq0_xdp_redirect: 0 ptp_rq0_lro_packets: 0 ptp_rq0_lro_bytes: 0 ptp_rq0_ecn_mark: 0 ptp_rq0_removed_vlan_packets: 0 ptp_rq0_wqe_err: 0 ptp_rq0_mpwqe_filler_cqes: 0 ptp_rq0_mpwqe_filler_strides: 0 ptp_rq0_oversize_pkts_sw_drop: 0 ptp_rq0_buff_alloc_err: 0 ptp_rq0_cqe_compress_blks: 0 ptp_rq0_cqe_compress_pkts: 0 ptp_rq0_cache_reuse: 0 ptp_rq0_cache_full: 0 ptp_rq0_cache_empty: 256 ptp_rq0_cache_busy: 0 ptp_rq0_cache_waive: 0 ptp_rq0_congst_umr: 0 ptp_rq0_arfs_err: 0 ptp_rq0_recover: 0 Fixes: a28359e922c6 ("net/mlx5e: Add PTP-RX statistics") Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index ee688dec67a9..3a86f66d1295 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -13,8 +13,6 @@ struct mlx5e_ptp_fs { bool valid; }; -#define MLX5E_PTP_CHANNEL_IX 0 - struct mlx5e_ptp_params { struct mlx5e_params params; struct mlx5e_sq_param txq_sq_param; @@ -509,6 +507,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->stats = &c->priv->ptp_stats.rq; + rq->ix = MLX5E_PTP_CHANNEL_IX; rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); err = mlx5e_rq_set_handlers(rq, params, false); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h index c96668bd701c..a71a32e00ebb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h @@ -8,6 +8,8 @@ #include "en_stats.h" #include +#define MLX5E_PTP_CHANNEL_IX 0 + struct mlx5e_ptpsq { struct mlx5e_txqsq txqsq; struct mlx5e_cq ts_cq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 46bf78169f63..e1dd17019030 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -34,6 +34,7 @@ #include "en.h" #include "en_accel/tls.h" #include "en_accel/en_accel.h" +#include "en/ptp.h" static unsigned int stats_grps_num(struct mlx5e_priv *priv) { @@ -2076,7 +2077,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp) if (priv->rx_ptp_opened) { for (i = 0; i < NUM_PTP_RQ_STATS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_rq_stats_desc[i].format); + ptp_rq_stats_desc[i].format, MLX5E_PTP_CHANNEL_IX); } return idx; } From 3bf1742f3c69501dec300b55917b9352428cb4dd Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 13 Sep 2021 16:49:47 +0300 Subject: [PATCH 30/66] net/mlx5e: Mutually exclude setting of TX-port-TS and MQPRIO in channel mode TX-port-TS hijacks the PTP traffic to a specific HW TX-queue. This conflicts with MQPRIO in channel mode, which specifies explicitly which TC accepts the packet. This patch mutually excludes the above configuration. Fixes: ec60c4581bd9 ("net/mlx5e: Support MQPRIO channel mode") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 11 +++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++++++ 2 files changed, 19 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 306fb5d6a36d..9d451b8ee467 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -2036,6 +2036,17 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) } new_params = priv->channels.params; + /* Don't allow enabling TX-port-TS if MQPRIO mode channel offload is + * active, since it defines explicitly which TC accepts the packet. + * This conflicts with TX-port-TS hijacking the PTP traffic to a specific + * HW TX-queue. + */ + if (enable && new_params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) { + netdev_err(priv->netdev, + "%s: MQPRIO mode channel offload is active, cannot set the TX-port-TS\n", + __func__); + return -EINVAL; + } MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_TX_PORT_TS, enable); /* No need to verify SQ stop room as * ptpsq.txqsq.stop_room <= generic_sq->stop_room, and both diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0390395f421f..0c5197f9cea3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2945,9 +2945,17 @@ static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv, struct tc_mqprio_qopt_offload *mqprio) { struct net_device *netdev = priv->netdev; + struct mlx5e_ptp *ptp_channel; int agg_count = 0; int i; + ptp_channel = priv->channels.ptp; + if (ptp_channel && test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state)) { + netdev_err(netdev, + "Cannot activate MQPRIO mode channel since it conflicts with TX port TS\n"); + return -EINVAL; + } + if (mqprio->qopt.offset[0] != 0 || mqprio->qopt.num_tc < 1 || mqprio->qopt.num_tc > MLX5E_MAX_NUM_MQPRIO_CH_TC) return -EINVAL; From 4729445b47efebf089da4ccbcd1b116ffa2ad4af Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 30 Sep 2021 11:46:34 +0530 Subject: [PATCH 31/66] libbpf: Fix segfault in light skeleton for objects without BTF When fed an empty BPF object, bpftool gen skeleton -L crashes at btf__set_fd() since it assumes presence of obj->btf, however for the sequence below clang adds no .BTF section (hence no BTF). Reproducer: $ touch a.bpf.c $ clang -O2 -g -target bpf -c a.bpf.c $ bpftool gen skeleton -L a.bpf.o /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* THIS FILE IS AUTOGENERATED! */ struct a_bpf { struct bpf_loader_ctx ctx; Segmentation fault (core dumped) The same occurs for files compiled without BTF info, i.e. without clang's -g flag. Fixes: 67234743736a (libbpf: Generate loader program out of BPF ELF file.) Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210930061634.1840768-1-memxor@gmail.com --- tools/lib/bpf/libbpf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 88d8825fc6f6..e4f83c304ec9 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6894,7 +6894,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) if (obj->gen_loader) { /* reset FDs */ - btf__set_fd(obj->btf, -1); + if (obj->btf) + btf__set_fd(obj->btf, -1); for (i = 0; i < obj->nr_maps; i++) obj->maps[i].fd = -1; if (!err) From 10eff1f5788b6ffac212c254e2f3666219576889 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 30 Sep 2021 20:49:42 +0300 Subject: [PATCH 32/66] Revert "net: mdiobus: Fix memory leak in __mdiobus_register" This reverts commit ab609f25d19858513919369ff3d9a63c02cd9e2e. This patch is correct in the sense that we _should_ call device_put() in case of device_register() failure, but the problem in this code is more vast. We need to set bus->state to UNMDIOBUS_REGISTERED before calling device_register() to correctly release the device in mdiobus_free(). This patch prevents us from doing it, since in case of device_register() failure put_device() will be called 2 times and it will cause UAF or something else. Also, Reported-by: tag in revered commit was wrong, since syzbot reported different leak in same function. Link: https://lore.kernel.org/netdev/20210928092657.GI2048@kadam/ Acked-by: Yanfei Xu Signed-off-by: Pavel Skripkin Link: https://lore.kernel.org/r/f12fb1faa4eccf0f355788225335eb4309ff2599.1633024062.git.paskripkin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 6f4b4e5df639..53f034fc2ef7 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -537,7 +537,6 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) err = device_register(&bus->dev); if (err) { pr_err("mii_bus %s failed to register\n", bus->id); - put_device(&bus->dev); return -EINVAL; } From ca6e11c337daf7925ff8a2aac8e84490a8691905 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 30 Sep 2021 20:50:28 +0300 Subject: [PATCH 33/66] phy: mdio: fix memory leak Syzbot reported memory leak in MDIO bus interface, the problem was in wrong state logic. MDIOBUS_ALLOCATED indicates 2 states: 1. Bus is only allocated 2. Bus allocated and __mdiobus_register() fails, but device_register() was called In case of device_register() has been called we should call put_device() to correctly free the memory allocated for this device, but mdiobus_free() calls just kfree(dev) in case of MDIOBUS_ALLOCATED state To avoid this behaviour we need to set bus->state to MDIOBUS_UNREGISTERED _before_ calling device_register(), because put_device() should be called even in case of device_register() failure. Link: https://lore.kernel.org/netdev/YVMRWNDZDUOvQjHL@shell.armlinux.org.uk/ Fixes: 46abc02175b3 ("phylib: give mdio buses a device tree presence") Reported-and-tested-by: syzbot+398e7dc692ddbbb4cfec@syzkaller.appspotmail.com Reviewed-by: Dan Carpenter Signed-off-by: Pavel Skripkin Link: https://lore.kernel.org/r/eceae1429fbf8fa5c73dd2a0d39d525aa905074d.1633024062.git.paskripkin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 53f034fc2ef7..fca8e335d750 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -534,6 +534,13 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) bus->dev.groups = NULL; dev_set_name(&bus->dev, "%s", bus->id); + /* We need to set state to MDIOBUS_UNREGISTERED to correctly release + * the device in mdiobus_free() + * + * State will be updated later in this function in case of success + */ + bus->state = MDIOBUS_UNREGISTERED; + err = device_register(&bus->dev); if (err) { pr_err("mii_bus %s failed to register\n", bus->id); From 5fb14d20f8241461d351bef73e49871e4b2330ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Oct 2021 09:46:22 -0700 Subject: [PATCH 34/66] net: add kerneldoc comment for sk_peer_lock Fixes following warning: include/net/sock.h:533: warning: Function parameter or member 'sk_peer_lock' not described in 'sock' Fixes: 35306eb23814 ("af_unix: fix races in sk_peer_pid and sk_peer_cred accesses") Signed-off-by: Eric Dumazet Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20211001164622.58520-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/sock.h b/include/net/sock.h index ae929e21a376..ea6fbc88c8f9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -307,6 +307,7 @@ struct bpf_local_storage; * @sk_priority: %SO_PRIORITY setting * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family + * @sk_peer_lock: lock protecting @sk_peer_pid and @sk_peer_cred * @sk_peer_pid: &struct pid for this socket's peer * @sk_peer_cred: %SO_PEERCRED setting * @sk_rcvlowat: %SO_RCVLOWAT setting From b0e875bac0fab3e7a7431c2eee36a8ccc0c712ac Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 1 Oct 2021 11:59:10 -0700 Subject: [PATCH 35/66] libbpf: Fix memory leak in strset Free struct strset itself, not just its internal parts. Fixes: 90d76d3ececc ("libbpf: Extract internal set-of-strings datastructure APIs") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20211001185910.86492-1-andrii@kernel.org --- tools/lib/bpf/strset.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/bpf/strset.c b/tools/lib/bpf/strset.c index 1fb8b49de1d6..ea655318153f 100644 --- a/tools/lib/bpf/strset.c +++ b/tools/lib/bpf/strset.c @@ -88,6 +88,7 @@ void strset__free(struct strset *set) hashmap__free(set->strs_hash); free(set->strs_data); + free(set); } size_t strset__data_size(const struct strset *set) From 560ee196fe9e5037e5015e2cdb14b3aecb1cd7dc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Sep 2021 14:22:39 -0700 Subject: [PATCH 36/66] net_sched: fix NULL deref in fifo_set_limit() syzbot reported another NULL deref in fifo_set_limit() [1] I could repro the issue with : unshare -n tc qd add dev lo root handle 1:0 tbf limit 200000 burst 70000 rate 100Mbit tc qd replace dev lo parent 1:0 pfifo_fast tc qd change dev lo root handle 1:0 tbf limit 300000 burst 70000 rate 100Mbit pfifo_fast does not have a change() operation. Make fifo_set_limit() more robust about this. [1] BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 1cf99067 P4D 1cf99067 PUD 7ca49067 PMD 0 Oops: 0010 [#1] PREEMPT SMP KASAN CPU: 1 PID: 14443 Comm: syz-executor959 Not tainted 5.15.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:0x0 Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6. RSP: 0018:ffffc9000e2f7310 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffffffff8d6ecc00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff888024c27910 RDI: ffff888071e34000 RBP: ffff888071e34000 R08: 0000000000000001 R09: ffffffff8fcfb947 R10: 0000000000000001 R11: 0000000000000000 R12: ffff888024c27910 R13: ffff888071e34018 R14: 0000000000000000 R15: ffff88801ef74800 FS: 00007f321d897700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000000722c3000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: fifo_set_limit net/sched/sch_fifo.c:242 [inline] fifo_set_limit+0x198/0x210 net/sched/sch_fifo.c:227 tbf_change+0x6ec/0x16d0 net/sched/sch_tbf.c:418 qdisc_change net/sched/sch_api.c:1332 [inline] tc_modify_qdisc+0xd9a/0x1a60 net/sched/sch_api.c:1634 rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5572 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1340 netlink_sendmsg+0x86d/0xdb0 net/netlink/af_netlink.c:1929 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2409 ___sys_sendmsg+0xf3/0x170 net/socket.c:2463 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2492 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: fb0305ce1b03 ("net-sched: consolidate default fifo qdisc setup") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20210930212239.3430364-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_fifo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index a579a4131d22..e1040421b797 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -233,6 +233,9 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit) if (strncmp(q->ops->id + 1, "fifo", 4) != 0) return 0; + if (!q->ops->change) + return 0; + nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); if (nla) { nla->nla_type = RTM_NEWQDISC; From 019d9329e7481cfaccbd8ed17b1e04ca76970f13 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 30 Sep 2021 15:53:30 +0300 Subject: [PATCH 37/66] net: mscc: ocelot: fix VCAP filters remaining active after being deleted When ocelot_flower.c calls ocelot_vcap_filter_add(), the filter has a given filter->id.cookie. This filter is added to the block->rules list. However, when ocelot_flower.c calls ocelot_vcap_block_find_filter_by_id() which passes the cookie as argument, the filter is never found by filter->id.cookie when searching through the block->rules list. This is unsurprising, since the filter->id.cookie is an unsigned long, but the cookie argument provided to ocelot_vcap_block_find_filter_by_id() is a signed int, and the comparison fails. Fixes: 50c6cc5b9283 ("net: mscc: ocelot: store a namespaced VCAP filter ID") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210930125330.2078625-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_vcap.c | 4 ++-- include/soc/mscc/ocelot_vcap.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 7945393a0655..99d7376a70a7 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -998,8 +998,8 @@ ocelot_vcap_block_find_filter_by_index(struct ocelot_vcap_block *block, } struct ocelot_vcap_filter * -ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int cookie, - bool tc_offload) +ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, + unsigned long cookie, bool tc_offload) { struct ocelot_vcap_filter *filter; diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index 25fd525aaf92..4869ebbd438d 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -694,7 +694,7 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot, int ocelot_vcap_filter_del(struct ocelot *ocelot, struct ocelot_vcap_filter *rule); struct ocelot_vcap_filter * -ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int id, - bool tc_offload); +ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, + unsigned long cookie, bool tc_offload); #endif /* _OCELOT_VCAP_H_ */ From aec3f415f7244b7747a7952596971adb0df2f568 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Wed, 29 Sep 2021 22:50:49 +0900 Subject: [PATCH 38/66] net: stmmac: dwmac-rk: Fix ethernet on rk3399 based devices Commit 2d26f6e39afb ("net: stmmac: dwmac-rk: fix unbalanced pm_runtime_enable warnings") while getting rid of a runtime PM warning ended up breaking ethernet on rk3399 based devices. By dropping an extra reference to the device, the commit ends up enabling suspend / resume of the ethernet device - which appears to be broken. While the issue with runtime pm is being investigated, partially revert commit 2d26f6e39afb to restore the network on rk3399. Fixes: 2d26f6e39afb ("net: stmmac: dwmac-rk: fix unbalanced pm_runtime_enable warnings") Suggested-by: Heiko Stuebner Signed-off-by: Punit Agrawal Cc: Michael Riesch Tested-by: Heiko Stuebner Link: https://lore.kernel.org/r/20210929135049.3426058-1-punitagrawal@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index ed817011a94a..6924a6aacbd5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "stmmac_platform.h" @@ -1528,6 +1529,8 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) return ret; } + pm_runtime_get_sync(dev); + if (bsp_priv->integrated_phy) rk_gmac_integrated_phy_powerup(bsp_priv); @@ -1539,6 +1542,8 @@ static void rk_gmac_powerdown(struct rk_priv_data *gmac) if (gmac->integrated_phy) rk_gmac_integrated_phy_powerdown(gmac); + pm_runtime_put_sync(&gmac->pdev->dev); + phy_power_on(gmac, false); gmac_clk_enable(gmac, false); } From 5cfe5109a1d7fd9686d5c695827216788bf0b1ec Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 30 Sep 2021 08:12:43 +0300 Subject: [PATCH 39/66] MAINTAINERS: Remove Bin Luo as his email bounces The emails sent to luobin9@huawei.com bounce with error: "Recipient address rejected: Failed recipient validation check." So let's remove his entry and change the status of hinic driver till someone in Huawei will step-in to maintain it again. Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/045a32ccf394de66b7899c8b732f44dc5f4a1154.1632978665.git.leonro@nvidia.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1bd60368fb3e..6fbedd4784a3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8608,9 +8608,8 @@ F: Documentation/devicetree/bindings/iio/humidity/st,hts221.yaml F: drivers/iio/humidity/hts221* HUAWEI ETHERNET DRIVER -M: Bin Luo L: netdev@vger.kernel.org -S: Supported +S: Orphan F: Documentation/networking/device_drivers/ethernet/huawei/hinic.rst F: drivers/net/ethernet/huawei/hinic/ From 6fb721cf781808ee2ca5e737fb0592cc68de3381 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 26 Sep 2021 09:59:35 +0200 Subject: [PATCH 40/66] netfilter: nf_tables: honor NLM_F_CREATE and NLM_F_EXCL in event notification Include the NLM_F_CREATE and NLM_F_EXCL flags in netlink event notifications, otherwise userspace cannot distiguish between create and add commands. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 47 +++++++++++++++++++++++-------- net/netfilter/nft_quota.c | 2 +- 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 148f5d8ee5ab..a16171c5fd9e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1202,7 +1202,7 @@ struct nft_object *nft_obj_lookup(const struct net *net, void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, - int event, int family, int report, gfp_t gfp); + int event, u16 flags, int family, int report, gfp_t gfp); /** * struct nft_object_type - stateful object type diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c8acd26c7201..c0851fec11d4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -780,6 +780,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct nftables_pernet *nft_net; struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -790,8 +791,11 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->family, ctx->table); + event, flags, ctx->family, ctx->table); if (err < 0) { kfree_skb(skb); goto err; @@ -1563,6 +1567,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) { struct nftables_pernet *nft_net; struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -1573,8 +1578,11 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->family, ctx->table, + event, flags, ctx->family, ctx->table, ctx->chain); if (err < 0) { kfree_skb(skb); @@ -2945,6 +2953,8 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, } if (ctx->flags & (NLM_F_APPEND | NLM_F_REPLACE)) flags |= NLM_F_APPEND; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, ctx->family, ctx->table, @@ -3957,8 +3967,9 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, gfp_t gfp_flags) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); - struct sk_buff *skb; u32 portid = ctx->portid; + struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -3969,7 +3980,10 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; - err = nf_tables_fill_set(skb, ctx, set, event, 0); + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + + err = nf_tables_fill_set(skb, ctx, set, event, flags); if (err < 0) { kfree_skb(skb); goto err; @@ -5245,12 +5259,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb, static void nf_tables_setelem_notify(const struct nft_ctx *ctx, const struct nft_set *set, const struct nft_set_elem *elem, - int event, u16 flags) + int event) { struct nftables_pernet *nft_net; struct net *net = ctx->net; u32 portid = ctx->portid; struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) @@ -5260,6 +5275,9 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags, set, elem); if (err < 0) { @@ -6935,7 +6953,7 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info, void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, - int family, int report, gfp_t gfp) + u16 flags, int family, int report, gfp_t gfp) { struct nftables_pernet *nft_net = nft_pernet(net); struct sk_buff *skb; @@ -6960,8 +6978,9 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, if (skb == NULL) goto err; - err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family, - table, obj, false); + err = nf_tables_fill_obj_info(skb, net, portid, seq, event, + flags & (NLM_F_CREATE | NLM_F_EXCL), + family, table, obj, false); if (err < 0) { kfree_skb(skb); goto err; @@ -6978,7 +6997,7 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx, struct nft_object *obj, int event) { nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event, - ctx->family, ctx->report, GFP_KERNEL); + ctx->flags, ctx->family, ctx->report, GFP_KERNEL); } /* @@ -7759,6 +7778,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, { struct nftables_pernet *nft_net = nft_pernet(ctx->net); struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -7769,8 +7789,11 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, - ctx->seq, event, 0, + ctx->seq, event, flags, ctx->family, flowtable, hook_list); if (err < 0) { kfree_skb(skb); @@ -8648,7 +8671,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_setelem_activate(net, te->set, &te->elem); nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, - NFT_MSG_NEWSETELEM, 0); + NFT_MSG_NEWSETELEM); nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: @@ -8656,7 +8679,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, - NFT_MSG_DELSETELEM, 0); + NFT_MSG_DELSETELEM); nft_setelem_remove(net, te->set, &te->elem); if (!nft_setelem_is_catchall(te->set, &te->elem)) { atomic_dec(&te->set->nelems); diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 0363f533a42b..c4d1389f7185 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -60,7 +60,7 @@ static void nft_quota_obj_eval(struct nft_object *obj, if (overquota && !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0, - NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC); + NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC); } static int nft_quota_do_init(const struct nlattr * const tb[], From eed183abc0d3b8adb64fd1363b7cea7986cd58d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 2 Oct 2021 11:04:09 +0200 Subject: [PATCH 41/66] powerpc/fsl/dts: Fix phy-connection-type for fm1mac3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Property phy-connection-type contains invalid value "sgmii-2500" per scheme defined in file ethernet-controller.yaml. Correct phy-connection-type value should be "2500base-x". Signed-off-by: Pali Rohár Fixes: 84e0f1c13806 ("powerpc/mpc85xx: Add MDIO bus muxing support to the board device tree(s)") Acked-by: Scott Wood Signed-off-by: David S. Miller --- arch/powerpc/boot/dts/fsl/t1023rdb.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/boot/dts/fsl/t1023rdb.dts b/arch/powerpc/boot/dts/fsl/t1023rdb.dts index 5ba6fbfca274..f82f85c65964 100644 --- a/arch/powerpc/boot/dts/fsl/t1023rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1023rdb.dts @@ -154,7 +154,7 @@ fm1mac3: ethernet@e4000 { phy-handle = <&sgmii_aqr_phy3>; - phy-connection-type = "sgmii-2500"; + phy-connection-type = "2500base-x"; sleep = <&rcpm 0x20000000>; }; From 7cd8b1542a7ba0720c5a0a85ed414a122015228b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Oct 2021 19:20:33 +0300 Subject: [PATCH 42/66] ptp_pch: Load module automatically if ID matches The driver can't be loaded automatically because it misses module alias to be provided. Add corresponding MODULE_DEVICE_TABLE() call to the driver. Fixes: 863d08ece9bf ("supports eg20t ptp clock") Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/ptp/ptp_pch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c index a17e8cc642c5..8070f3fd98f0 100644 --- a/drivers/ptp/ptp_pch.c +++ b/drivers/ptp/ptp_pch.c @@ -644,6 +644,7 @@ static const struct pci_device_id pch_ieee1588_pcidev_id[] = { }, {0} }; +MODULE_DEVICE_TABLE(pci, pch_ieee1588_pcidev_id); static SIMPLE_DEV_PM_OPS(pch_pm_ops, pch_suspend, pch_resume); From b44d52a50bc6f191f0ae03f65de8401f3ef039b3 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 3 Oct 2021 17:50:53 +0200 Subject: [PATCH 43/66] dsa: tag_dsa: Fix mask for trunked packets A packet received on a trunk will have bit 2 set in Forward DSA tagged frame. Bit 1 can be either 0 or 1 and is otherwise undefined and bit 0 indicates the frame CFI. Masking with 7 thus results in frames as being identified as being from a trunk when in fact they are not. Fix the mask to just look at bit 2. Fixes: 5b60dadb71db ("net: dsa: tag_dsa: Support reception of packets from LAG devices") Signed-off-by: Andrew Lunn Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/tag_dsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 77d0ce89ab77..e5127b7d1c6a 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -210,7 +210,7 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, cmd = dsa_header[0] >> 6; switch (cmd) { case DSA_CMD_FORWARD: - trunk = !!(dsa_header[1] & 7); + trunk = !!(dsa_header[1] & 4); break; case DSA_CMD_TO_CPU: From 3f6cffb8604b537e3d7ea040d7f4368689638eaf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Oct 2021 16:01:40 -0700 Subject: [PATCH 44/66] etherdevice: use __dev_addr_set() Andrew points out that eth_hw_addr_set() replaces memcpy() calls so we can't use ether_addr_copy() which assumes both arguments are 2-bytes aligned. Reported-by: Andrew Lunn Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 928c411bd509..c58d50451485 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -308,7 +308,7 @@ static inline void ether_addr_copy(u8 *dst, const u8 *src) */ static inline void eth_hw_addr_set(struct net_device *dev, const u8 *addr) { - ether_addr_copy(dev->dev_addr, addr); + __dev_addr_set(dev, addr, ETH_ALEN); } /** From baf33d7a75642b4b38a87fdf1cd96b506df4849f Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Mon, 4 Oct 2021 14:28:58 +0800 Subject: [PATCH 45/66] r8152: avoid to resubmit rx immediately For the situation that the disconnect event comes very late when the device is unplugged, the driver would resubmit the RX bulk transfer after getting the callback with -EPROTO immediately and continually. Finally, soft lockup occurs. This patch avoids to resubmit RX immediately. It uses a workqueue to schedule the RX NAPI. And the NAPI would resubmit the RX. It let the disconnect event have opportunity to stop the submission before soft lockup. Reported-by: Jason-ch Chen Tested-by: Jason-ch Chen Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 60ba9b734055..f329e39100a7 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -767,6 +767,7 @@ enum rtl8152_flags { PHY_RESET, SCHEDULE_TASKLET, GREEN_ETHERNET, + RX_EPROTO, }; #define DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2 0x3082 @@ -1770,6 +1771,14 @@ static void read_bulk_callback(struct urb *urb) rtl_set_unplug(tp); netif_device_detach(tp->netdev); return; + case -EPROTO: + urb->actual_length = 0; + spin_lock_irqsave(&tp->rx_lock, flags); + list_add_tail(&agg->list, &tp->rx_done); + spin_unlock_irqrestore(&tp->rx_lock, flags); + set_bit(RX_EPROTO, &tp->flags); + schedule_delayed_work(&tp->schedule, 1); + return; case -ENOENT: return; /* the urb is in unlink state */ case -ETIME: @@ -2425,6 +2434,7 @@ static int rx_bottom(struct r8152 *tp, int budget) if (list_empty(&tp->rx_done)) goto out1; + clear_bit(RX_EPROTO, &tp->flags); INIT_LIST_HEAD(&rx_queue); spin_lock_irqsave(&tp->rx_lock, flags); list_splice_init(&tp->rx_done, &rx_queue); @@ -2441,7 +2451,7 @@ static int rx_bottom(struct r8152 *tp, int budget) agg = list_entry(cursor, struct rx_agg, list); urb = agg->urb; - if (urb->actual_length < ETH_ZLEN) + if (urb->status != 0 || urb->actual_length < ETH_ZLEN) goto submit; agg_free = rtl_get_free_rx(tp, GFP_ATOMIC); @@ -6643,6 +6653,10 @@ static void rtl_work_func_t(struct work_struct *work) netif_carrier_ok(tp->netdev)) tasklet_schedule(&tp->tx_tl); + if (test_and_clear_bit(RX_EPROTO, &tp->flags) && + !list_empty(&tp->rx_done)) + napi_schedule(&tp->napi); + mutex_unlock(&tp->control); out1: From dbe0b88064494b7bb6a9b2aa7e085b14a3112d44 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Oct 2021 18:05:07 -0700 Subject: [PATCH 46/66] net: bridge: use nla_total_size_64bit() in br_get_linkxstats_size() bridge_fill_linkxstats() is using nla_reserve_64bit(). We must use nla_total_size_64bit() instead of nla_total_size() for corresponding data structure. Fixes: 1080ab95e3c7 ("net: bridge: add support for IGMP/MLD stats and export them via netlink") Signed-off-by: Eric Dumazet Cc: Nikolay Aleksandrov Cc: Vivien Didelot Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 6c58fc14d2cb..29b8f6373fb9 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1666,7 +1666,7 @@ static size_t br_get_linkxstats_size(const struct net_device *dev, int attr) } return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) + - nla_total_size(sizeof(struct br_mcast_stats)) + + nla_total_size_64bit(sizeof(struct br_mcast_stats)) + nla_total_size(0); } From 0854a0513321cf70bea5fa483ebcaa983cc7c62e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Oct 2021 18:05:08 -0700 Subject: [PATCH 47/66] net: bridge: fix under estimation in br_get_linkxstats_size() Commit de1799667b00 ("net: bridge: add STP xstats") added an additional nla_reserve_64bit() in br_fill_linkxstats(), but forgot to update br_get_linkxstats_size() accordingly. This can trigger the following in rtnl_stats_get() WARN_ON(err == -EMSGSIZE); Fixes: de1799667b00 ("net: bridge: add STP xstats") Signed-off-by: Eric Dumazet Cc: Vivien Didelot Cc: Nikolay Aleksandrov Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 29b8f6373fb9..5c6c4305ed23 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1667,6 +1667,7 @@ static size_t br_get_linkxstats_size(const struct net_device *dev, int attr) return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) + nla_total_size_64bit(sizeof(struct br_mcast_stats)) + + (p ? nla_total_size_64bit(sizeof(p->stp_xstats)) : 0) + nla_total_size(0); } From a56d447f196fa9973c568f54c0d76d5391c3b0c0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Oct 2021 12:55:22 -0700 Subject: [PATCH 48/66] net/sched: sch_taprio: properly cancel timer from taprio_destroy() There is a comment in qdisc_create() about us not calling ops->reset() in some cases. err_out4: /* * Any broken qdiscs that would require a ops->reset() here? * The qdisc was never in action so it shouldn't be necessary. */ As taprio sets a timer before actually receiving a packet, we need to cancel it from ops->destroy, just in case ops->reset has not been called. syzbot reported: ODEBUG: free active (active state 0) object type: hrtimer hint: advance_sched+0x0/0x9a0 arch/x86/include/asm/atomic64_64.h:22 WARNING: CPU: 0 PID: 8441 at lib/debugobjects.c:505 debug_print_object+0x16e/0x250 lib/debugobjects.c:505 Modules linked in: CPU: 0 PID: 8441 Comm: syz-executor813 Not tainted 5.14.0-rc6-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:debug_print_object+0x16e/0x250 lib/debugobjects.c:505 Code: ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 af 00 00 00 48 8b 14 dd e0 d3 e3 89 4c 89 ee 48 c7 c7 e0 c7 e3 89 e8 5b 86 11 05 <0f> 0b 83 05 85 03 92 09 01 48 83 c4 18 5b 5d 41 5c 41 5d 41 5e c3 RSP: 0018:ffffc9000130f330 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000000 RDX: ffff88802baeb880 RSI: ffffffff815d87b5 RDI: fffff52000261e58 RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff815d25ee R11: 0000000000000000 R12: ffffffff898dd020 R13: ffffffff89e3ce20 R14: ffffffff81653630 R15: dffffc0000000000 FS: 0000000000f0d300(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffb64b3e000 CR3: 0000000036557000 CR4: 00000000001506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __debug_check_no_obj_freed lib/debugobjects.c:987 [inline] debug_check_no_obj_freed+0x301/0x420 lib/debugobjects.c:1018 slab_free_hook mm/slub.c:1603 [inline] slab_free_freelist_hook+0x171/0x240 mm/slub.c:1653 slab_free mm/slub.c:3213 [inline] kfree+0xe4/0x540 mm/slub.c:4267 qdisc_create+0xbcf/0x1320 net/sched/sch_api.c:1299 tc_modify_qdisc+0x4c8/0x1a60 net/sched/sch_api.c:1663 rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5571 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1340 netlink_sendmsg+0x86d/0xdb0 net/netlink/af_netlink.c:1929 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2403 ___sys_sendmsg+0xf3/0x170 net/socket.c:2457 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2486 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 Fixes: 44d4775ca518 ("net/sched: sch_taprio: reset child qdiscs before freeing them") Signed-off-by: Eric Dumazet Cc: Davide Caratti Reported-by: syzbot Acked-by: Vinicius Costa Gomes Acked-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 1ab2fc933a21..b9fd18d98646 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1641,6 +1641,10 @@ static void taprio_destroy(struct Qdisc *sch) list_del(&q->taprio_list); spin_unlock(&taprio_list_lock); + /* Note that taprio_reset() might not be called if an error + * happens in qdisc_create(), after taprio_init() has been called. + */ + hrtimer_cancel(&q->advance_timer); taprio_disable_offload(dev, q, NULL); From 25a9da6641f1f66006e93ddbefee13a437efa8c0 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Mon, 4 Oct 2021 17:50:02 -0400 Subject: [PATCH 49/66] net: sfp: Fix typo in state machine debug string The string should be "tx_disable" to match the state enum. Fixes: 4005a7cb4f55 ("net: phy: sftp: print debug message with text, not numbers") Signed-off-by: Sean Anderson Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 34e90216bd2c..ab77a9f439ef 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -134,7 +134,7 @@ static const char * const sm_state_strings[] = { [SFP_S_LINK_UP] = "link_up", [SFP_S_TX_FAULT] = "tx_fault", [SFP_S_REINIT] = "reinit", - [SFP_S_TX_DISABLE] = "rx_disable", + [SFP_S_TX_DISABLE] = "tx_disable", }; static const char *sm_state_to_str(unsigned short sm_state) From e3cf002d5a4452f8adc5543df341cf96fd702fcf Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Tue, 5 Oct 2021 11:45:21 +0800 Subject: [PATCH 50/66] net: pcs: xpcs: fix incorrect CL37 AN sequence According to Synopsys DesignWare Cores Ethernet PCS databook, it is required to disable Clause 37 auto-negotiation by programming bit-12 (AN_ENABLE) to 0 if it is already enabled, before programming various fields of VR_MII_AN_CTRL registers. After all these programming are done, it is then required to enable Clause 37 auto-negotiation by programming bit-12 (AN_ENABLE) to 1. Fixes: b97b5331b8ab ("net: pcs: add C37 SGMII AN support for intel mGbE controller") Cc: Vladimir Oltean Signed-off-by: Wong Vee Khee Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/pcs/pcs-xpcs.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index fb0a83dc09ac..a3e806cfa684 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -697,14 +697,17 @@ EXPORT_SYMBOL_GPL(xpcs_config_eee); static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, unsigned int mode) { - int ret; + int ret, mdio_ctrl; /* For AN for C37 SGMII mode, the settings are :- - * 1) VR_MII_AN_CTRL Bit(2:1)[PCS_MODE] = 10b (SGMII AN) - * 2) VR_MII_AN_CTRL Bit(3) [TX_CONFIG] = 0b (MAC side SGMII) + * 1) VR_MII_MMD_CTRL Bit(12) [AN_ENABLE] = 0b (Disable SGMII AN in case + it is already enabled) + * 2) VR_MII_AN_CTRL Bit(2:1)[PCS_MODE] = 10b (SGMII AN) + * 3) VR_MII_AN_CTRL Bit(3) [TX_CONFIG] = 0b (MAC side SGMII) * DW xPCS used with DW EQoS MAC is always MAC side SGMII. - * 3) VR_MII_DIG_CTRL1 Bit(9) [MAC_AUTO_SW] = 1b (Automatic + * 4) VR_MII_DIG_CTRL1 Bit(9) [MAC_AUTO_SW] = 1b (Automatic * speed/duplex mode change by HW after SGMII AN complete) + * 5) VR_MII_MMD_CTRL Bit(12) [AN_ENABLE] = 1b (Enable SGMII AN) * * Note: Since it is MAC side SGMII, there is no need to set * SR_MII_AN_ADV. MAC side SGMII receives AN Tx Config from @@ -712,6 +715,17 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, unsigned int mode) * between PHY and Link Partner. There is also no need to * trigger AN restart for MAC-side SGMII. */ + mdio_ctrl = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL); + if (mdio_ctrl < 0) + return mdio_ctrl; + + if (mdio_ctrl & AN_CL37_EN) { + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, + mdio_ctrl & ~AN_CL37_EN); + if (ret < 0) + return ret; + } + ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL); if (ret < 0) return ret; @@ -736,7 +750,15 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, unsigned int mode) else ret &= ~DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW; - return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1, ret); + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1, ret); + if (ret < 0) + return ret; + + if (phylink_autoneg_inband(mode)) + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, + mdio_ctrl | AN_CL37_EN); + + return ret; } static int xpcs_config_2500basex(struct dw_xpcs *xpcs) From 7707a4d01a648e4c655101a469c956cb11273655 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Oct 2021 14:24:15 -0700 Subject: [PATCH 51/66] netlink: annotate data races around nlk->bound While existing code is correct, KCSAN is reporting a data-race in netlink_insert / netlink_sendmsg [1] It is correct to read nlk->bound without a lock, as netlink_autobind() will acquire all needed locks. [1] BUG: KCSAN: data-race in netlink_insert / netlink_sendmsg write to 0xffff8881031c8b30 of 1 bytes by task 18752 on cpu 0: netlink_insert+0x5cc/0x7f0 net/netlink/af_netlink.c:597 netlink_autobind+0xa9/0x150 net/netlink/af_netlink.c:842 netlink_sendmsg+0x479/0x7c0 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg net/socket.c:723 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2392 ___sys_sendmsg net/socket.c:2446 [inline] __sys_sendmsg+0x1ed/0x270 net/socket.c:2475 __do_sys_sendmsg net/socket.c:2484 [inline] __se_sys_sendmsg net/socket.c:2482 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2482 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff8881031c8b30 of 1 bytes by task 18751 on cpu 1: netlink_sendmsg+0x270/0x7c0 net/netlink/af_netlink.c:1891 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg net/socket.c:723 [inline] __sys_sendto+0x2a8/0x370 net/socket.c:2019 __do_sys_sendto net/socket.c:2031 [inline] __se_sys_sendto net/socket.c:2027 [inline] __x64_sys_sendto+0x74/0x90 net/socket.c:2027 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x00 -> 0x01 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 18751 Comm: syz-executor.0 Not tainted 5.14.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: da314c9923fe ("netlink: Replace rhash_portid with bound") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 24b7cf447bc5..ada47e59647a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -594,7 +594,10 @@ static int netlink_insert(struct sock *sk, u32 portid) /* We need to ensure that the socket is hashed and visible. */ smp_wmb(); - nlk_sk(sk)->bound = portid; + /* Paired with lockless reads from netlink_bind(), + * netlink_connect() and netlink_sendmsg(). + */ + WRITE_ONCE(nlk_sk(sk)->bound, portid); err: release_sock(sk); @@ -1012,7 +1015,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->ngroups < BITS_PER_LONG) groups &= (1UL << nlk->ngroups) - 1; - bound = nlk->bound; + /* Paired with WRITE_ONCE() in netlink_insert() */ + bound = READ_ONCE(nlk->bound); if (bound) { /* Ensure nlk->portid is up-to-date. */ smp_rmb(); @@ -1098,8 +1102,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, /* No need for barriers here as we return to user-space without * using any of the bound attributes. + * Paired with WRITE_ONCE() in netlink_insert(). */ - if (!nlk->bound) + if (!READ_ONCE(nlk->bound)) err = netlink_autobind(sock); if (err == 0) { @@ -1888,7 +1893,8 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) dst_group = nlk->dst_group; } - if (!nlk->bound) { + /* Paired with WRITE_ONCE() in netlink_insert() */ + if (!READ_ONCE(nlk->bound)) { err = netlink_autobind(sock); if (err) goto out; From 590df78bc7d1d0425196a8e11ce6676d7023fb26 Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Tue, 5 Oct 2021 19:50:59 +0800 Subject: [PATCH 52/66] net: pcs: xpcs: fix incorrect steps on disable EEE When Energy-Efficient Ethernet(EEE) is disable from the MAC side, we need to clear the DW_VR_MII_EEE_TRN_LPI bit of DW_VR_MII_EEE_MCTRL1 register. Fixes: 7617af3d1a5e ("net: pcs: Introducing support for DWC xpcs Energy Efficient Ethernet") Cc: Michael Sit Wei Hong Signed-off-by: Wong Vee Khee Signed-off-by: David S. Miller --- drivers/net/pcs/pcs-xpcs.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index a3e806cfa684..7de631f5356f 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -666,6 +666,10 @@ int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable) { int ret; + ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL0); + if (ret < 0) + return ret; + if (enable) { /* Enable EEE */ ret = DW_VR_MII_EEE_LTX_EN | DW_VR_MII_EEE_LRX_EN | @@ -673,9 +677,6 @@ int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable) DW_VR_MII_EEE_TX_EN_CTRL | DW_VR_MII_EEE_RX_EN_CTRL | mult_fact_100ns << DW_VR_MII_EEE_MULT_FACT_100NS_SHIFT; } else { - ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL0); - if (ret < 0) - return ret; ret &= ~(DW_VR_MII_EEE_LTX_EN | DW_VR_MII_EEE_LRX_EN | DW_VR_MII_EEE_TX_QUIET_EN | DW_VR_MII_EEE_RX_QUIET_EN | DW_VR_MII_EEE_TX_EN_CTRL | DW_VR_MII_EEE_RX_EN_CTRL | @@ -690,7 +691,11 @@ int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable) if (ret < 0) return ret; - ret |= DW_VR_MII_EEE_TRN_LPI; + if (enable) + ret |= DW_VR_MII_EEE_TRN_LPI; + else + ret &= ~DW_VR_MII_EEE_TRN_LPI; + return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL1, ret); } EXPORT_SYMBOL_GPL(xpcs_config_eee); From d4aeaed80b0ebb020fadf2073b23462928dbdc17 Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Tue, 5 Oct 2021 19:51:00 +0800 Subject: [PATCH 53/66] net: stmmac: trigger PCS EEE to turn off on link down The current implementation enable PCS EEE feature in the event of link up, but PCS EEE feature is not disabled on link down. This patch makes sure PCE EEE feature is disabled on link down. Fixes: 656ed8b015f1 ("net: stmmac: fix EEE init issue when paired with EEE capable PHYs") Signed-off-by: Wong Vee Khee Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 981ccf47dcea..eb3b7bf771d7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -477,6 +477,10 @@ bool stmmac_eee_init(struct stmmac_priv *priv) stmmac_lpi_entry_timer_config(priv, 0); del_timer_sync(&priv->eee_ctrl_timer); stmmac_set_eee_timer(priv, priv->hw, 0, eee_tw_timer); + if (priv->hw->xpcs) + xpcs_config_eee(priv->hw->xpcs, + priv->plat->mult_fact_100ns, + false); } mutex_unlock(&priv->lock); return false; @@ -1038,7 +1042,7 @@ static void stmmac_mac_link_down(struct phylink_config *config, stmmac_mac_set(priv, priv->ioaddr, false); priv->eee_active = false; priv->tx_lpi_enabled = false; - stmmac_eee_init(priv); + priv->eee_enabled = stmmac_eee_init(priv); stmmac_set_eee_pls(priv, priv->hw, false); if (priv->dma_cap.fpesel) From d0c6416bd7091647f6041599f396bfa19ae30368 Mon Sep 17 00:00:00 2001 From: Jiang Wang Date: Mon, 4 Oct 2021 23:25:28 +0000 Subject: [PATCH 54/66] unix: Fix an issue in unix_shutdown causing the other end read/write failures Commit 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap") sets unix domain socket peer state to TCP_CLOSE in unix_shutdown. This could happen when the local end is shutdown but the other end is not. Then, the other end will get read or write failures which is not expected. Fix the issue by setting the local state to shutdown. Fixes: 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap") Reported-by: Casey Schaufler Suggested-by: Cong Wang Signed-off-by: Jiang Wang Signed-off-by: Daniel Borkmann Tested-by: Casey Schaufler Reviewed-by: Casey Schaufler Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211004232530.2377085-1-jiang.wang@bytedance.com --- net/unix/af_unix.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f505b89bda6a..915afcae6a12 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2860,6 +2860,9 @@ static int unix_shutdown(struct socket *sock, int mode) unix_state_lock(sk); sk->sk_shutdown |= mode; + if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && + mode == SHUTDOWN_MASK) + sk->sk_state = TCP_CLOSE; other = unix_peer(sk); if (other) sock_hold(other); @@ -2882,12 +2885,10 @@ static int unix_shutdown(struct socket *sock, int mode) other->sk_shutdown |= peer_mode; unix_state_unlock(other); other->sk_state_change(other); - if (peer_mode == SHUTDOWN_MASK) { + if (peer_mode == SHUTDOWN_MASK) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); - other->sk_state = TCP_CLOSE; - } else if (peer_mode & RCV_SHUTDOWN) { + else if (peer_mode & RCV_SHUTDOWN) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); - } } if (other) sock_put(other); From d03477ee10f4bc35d3573cf1823814378ef2dca2 Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Tue, 5 Oct 2021 19:42:19 -0700 Subject: [PATCH 55/66] gve: Correct available tx qpl check The qpl_map_size is rounded up to a multiple of sizeof(long), but the number of qpls doesn't have to be. Fixes: f5cedc84a30d2 ("gve: Add transmit and receive support") Signed-off-by: Catherine Sullivan Signed-off-by: Jeroen de Borst Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 1d3188e8e3b3..92dc18a4bcc4 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -780,7 +780,7 @@ struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv) gve_num_tx_qpls(priv)); /* we are out of rx qpls */ - if (id == priv->qpl_cfg.qpl_map_size) + if (id == gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv)) return NULL; set_bit(id, priv->qpl_cfg.qpl_id_map); From 922aa9bcac92b3ab6a423526a8e785b35a60b441 Mon Sep 17 00:00:00 2001 From: Tao Liu Date: Tue, 5 Oct 2021 19:42:20 -0700 Subject: [PATCH 56/66] gve: Avoid freeing NULL pointer Prevent possible crashes when cleaning up after unsuccessful initializations. Fixes: 893ce44df5658 ("gve: Add basic driver framework for Compute Engine Virtual NIC") Signed-off-by: Tao Liu Signed-off-by: Catherine Sully Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 27 ++++++++++++++-------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 099a2bc5ae67..29c5f994f92e 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -82,6 +82,9 @@ static int gve_alloc_counter_array(struct gve_priv *priv) static void gve_free_counter_array(struct gve_priv *priv) { + if (!priv->counter_array) + return; + dma_free_coherent(&priv->pdev->dev, priv->num_event_counters * sizeof(*priv->counter_array), @@ -142,6 +145,9 @@ static int gve_alloc_stats_report(struct gve_priv *priv) static void gve_free_stats_report(struct gve_priv *priv) { + if (!priv->stats_report) + return; + del_timer_sync(&priv->stats_report_timer); dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, priv->stats_report, priv->stats_report_bus); @@ -370,18 +376,19 @@ static void gve_free_notify_blocks(struct gve_priv *priv) { int i; - if (priv->msix_vectors) { - /* Free the irqs */ - for (i = 0; i < priv->num_ntfy_blks; i++) { - struct gve_notify_block *block = &priv->ntfy_blocks[i]; - int msix_idx = i; + if (!priv->msix_vectors) + return; - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, - NULL); - free_irq(priv->msix_vectors[msix_idx].vector, block); - } - free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); + /* Free the irqs */ + for (i = 0; i < priv->num_ntfy_blks; i++) { + struct gve_notify_block *block = &priv->ntfy_blocks[i]; + int msix_idx = i; + + irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, + NULL); + free_irq(priv->msix_vectors[msix_idx].vector, block); } + free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks), priv->ntfy_blocks, priv->ntfy_block_bus); From d4b111fda69a01e0a7439d05993f5dad567c93aa Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Tue, 5 Oct 2021 19:42:21 -0700 Subject: [PATCH 57/66] gve: Properly handle errors in gve_assign_qpl Ignored errors would result in crash. Fixes: ede3fcf5ec67f ("gve: Add support for raw addressing to the rx path") Signed-off-by: Catherine Sullivan Signed-off-by: Jeroen de Borst Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_rx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index bb8261368250..94941d4e4744 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -104,8 +104,14 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx) if (!rx->data.page_info) return -ENOMEM; - if (!rx->data.raw_addressing) + if (!rx->data.raw_addressing) { rx->data.qpl = gve_assign_rx_qpl(priv); + if (!rx->data.qpl) { + kvfree(rx->data.page_info); + rx->data.page_info = NULL; + return -ENOMEM; + } + } for (i = 0; i < slots; i++) { if (!rx->data.raw_addressing) { struct page *page = rx->data.qpl->pages[i]; From d34367991933d28bd7331f67a759be9a8c474014 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Oct 2021 14:04:17 -0700 Subject: [PATCH 58/66] rtnetlink: fix if_nlmsg_stats_size() under estimation rtnl_fill_statsinfo() is filling skb with one mandatory if_stats_msg structure. nlmsg_put(skb, pid, seq, type, sizeof(struct if_stats_msg), flags); But if_nlmsg_stats_size() never considered the needed storage. This bug did not show up because alloc_skb(X) allocates skb with extra tailroom, because of added alignments. This could very well be changed in the future to have deterministic behavior. Fixes: 10c9ead9f3c6 ("rtnetlink: add new RTM_GETSTATS message to dump link stats") Signed-off-by: Eric Dumazet Cc: Roopa Prabhu Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 972c8cb303a5..8ccce85562a1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5262,7 +5262,7 @@ nla_put_failure: static size_t if_nlmsg_stats_size(const struct net_device *dev, u32 filter_mask) { - size_t size = 0; + size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg)); if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0)) size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64)); From 2f57d4975fa027eabd35fdf23a49f8222ef3abf2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Oct 2021 17:30:30 -0700 Subject: [PATCH 59/66] gve: fix gve_get_stats() gve_get_stats() can report wrong numbers if/when u64_stats_fetch_retry() returns true. What is needed here is to sample values in temporary variables, and only use them after each loop is ended. Fixes: f5cedc84a30d ("gve: Add transmit and receive support") Signed-off-by: Eric Dumazet Cc: Catherine Sullivan Cc: Sagi Shahar Cc: Jon Olson Cc: Willem de Bruijn Cc: Luigi Rizzo Cc: Jeroen de Borst Cc: Tao Liu Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 29c5f994f92e..1b49e9feacac 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -41,6 +41,7 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) { struct gve_priv *priv = netdev_priv(dev); unsigned int start; + u64 packets, bytes; int ring; if (priv->rx) { @@ -48,10 +49,12 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) do { start = u64_stats_fetch_begin(&priv->rx[ring].statss); - s->rx_packets += priv->rx[ring].rpackets; - s->rx_bytes += priv->rx[ring].rbytes; + packets = priv->rx[ring].rpackets; + bytes = priv->rx[ring].rbytes; } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); + s->rx_packets += packets; + s->rx_bytes += bytes; } } if (priv->tx) { @@ -59,10 +62,12 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) do { start = u64_stats_fetch_begin(&priv->tx[ring].statss); - s->tx_packets += priv->tx[ring].pkt_done; - s->tx_bytes += priv->tx[ring].bytes_done; + packets = priv->tx[ring].pkt_done; + bytes = priv->tx[ring].bytes_done; } while (u64_stats_fetch_retry(&priv->tx[ring].statss, start)); + s->tx_packets += packets; + s->tx_bytes += bytes; } } } From 17c37d748f2b122a95b6d0524d410302ff89a2b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Oct 2021 18:01:38 -0700 Subject: [PATCH 60/66] gve: report 64bit tx_bytes counter from gve_handle_report_stats() Each tx queue maintains a 64bit counter for bytes, there is no reason to truncate this to 32bit (or this has not been documented) Fixes: 24aeb56f2d38 ("gve: Add Gvnic stats AQ command and ethtool show/set-priv-flags.") Signed-off-by: Eric Dumazet Cc: Yangchun Fu Cc: Kuo Zhao Cc: David Awogbemila Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 1b49e9feacac..bf8a4a7c43f7 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1197,9 +1197,10 @@ static void gve_handle_reset(struct gve_priv *priv) void gve_handle_report_stats(struct gve_priv *priv) { - int idx, stats_idx = 0, tx_bytes; - unsigned int start = 0; struct stats *stats = priv->stats_report->stats; + int idx, stats_idx = 0; + unsigned int start = 0; + u64 tx_bytes; if (!gve_get_report_stats(priv)) return; From 3707428ddabadde4086eb7c592e988f584344857 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 5 Oct 2021 16:11:05 -0700 Subject: [PATCH 61/66] ionic: move filter sync_needed bit set Move the setting of the filter-sync-needed bit to the error case in the filter add routine to be sure we're checking the live filter status rather than a copy of the pre-sync status. Fixes: 969f84394604 ("ionic: sync the filters in the work task") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 4 +++- drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c | 3 --- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 381966e8f557..ccf3ffcd3939 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1292,8 +1292,10 @@ int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr) if (err && err != -EEXIST) { /* set the state back to NEW so we can try again later */ f = ionic_rx_filter_by_addr(lif, addr); - if (f && f->state == IONIC_FILTER_STATE_SYNCED) + if (f && f->state == IONIC_FILTER_STATE_SYNCED) { f->state = IONIC_FILTER_STATE_NEW; + set_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state); + } spin_unlock_bh(&lif->rx_filters.lock); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index 25ecfcfa1281..69728f9013cb 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -349,9 +349,6 @@ loop_out: list_for_each_entry_safe(sync_item, spos, &sync_add_list, list) { (void)ionic_lif_addr_add(lif, sync_item->f.cmd.mac.addr); - if (sync_item->f.state != IONIC_FILTER_STATE_SYNCED) - set_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state); - list_del(&sync_item->list); devm_kfree(dev, sync_item); } From a50a0595230d38be15183699f7bbc963bf3d127a Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Wed, 6 Oct 2021 08:31:04 +0200 Subject: [PATCH 62/66] dt-bindings: net: dsa: marvell: fix compatible in example While the MV88E6390 switch chip exists, one is supposed to use a compatible of "marvell,mv88e6190" for it. Fix this in the given example. Signed-off-by: Marcel Ziswiler Fixes: a3c53be55c95 ("net: dsa: mv88e6xxx: Support multiple MDIO busses") Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/marvell.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt index 30c11fea491b..2363b412410c 100644 --- a/Documentation/devicetree/bindings/net/dsa/marvell.txt +++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt @@ -83,7 +83,7 @@ Example: #interrupt-cells = <2>; switch0: switch@0 { - compatible = "marvell,mv88e6390"; + compatible = "marvell,mv88e6190"; reg = <0>; reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>; From 857b6c6f665cca9828396d9743faf37fd09e9ac3 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 14 Sep 2021 10:54:42 +0200 Subject: [PATCH 63/66] i40e: fix endless loop under rtnl The loop in i40e_get_capabilities can never end. The problem is that although i40e_aq_discover_capabilities returns with an error if there's a firmware problem, the returned error is not checked. There is a check for pf->hw.aq.asq_last_status but that value is set to I40E_AQ_RC_OK on most firmware problems. When i40e_aq_discover_capabilities encounters a firmware problem, it will encounter the same problem on its next invocation. As the result, the loop becomes endless. We hit this with I40E_ERR_ADMIN_QUEUE_TIMEOUT but looking at the code, it can happen with a range of other firmware errors. I don't know what the correct behavior should be: whether the firmware should be retried a few times, or whether pf->hw.aq.asq_last_status should be always set to the encountered firmware error (but then it would be pointless and can be just replaced by the i40e_aq_discover_capabilities return value). However, the current behavior with an endless loop under the rtnl mutex(!) is unacceptable and Intel has not submitted a fix, although we explained the bug to them 7 months ago. This may not be the best possible fix but it's better than hanging the whole system on a firmware bug. Fixes: 56a62fc86895 ("i40e: init code and hardware support") Tested-by: Stefan Assmann Signed-off-by: Jiri Benc Reviewed-by: Jesse Brandeburg Tested-by: Dave Switzer Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2f20980dd9a5..b5b984754ec9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10113,7 +10113,7 @@ static int i40e_get_capabilities(struct i40e_pf *pf, if (pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) { /* retry with a larger buffer */ buf_len = data_size; - } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK) { + } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK || err) { dev_info(&pf->pdev->dev, "capability discovery failed, err %s aq_err %s\n", i40e_stat_str(&pf->hw, err), From 2e5a20573a926302b233b0c2e1077f5debc7ab2e Mon Sep 17 00:00:00 2001 From: Sylwester Dziedziuch Date: Fri, 24 Sep 2021 11:40:41 +0200 Subject: [PATCH 64/66] i40e: Fix freeing of uninitialized misc IRQ vector When VSI set up failed in i40e_probe() as part of PF switch set up driver was trying to free misc IRQ vectors in i40e_clear_interrupt_scheme and produced a kernel Oops: Trying to free already-free IRQ 266 WARNING: CPU: 0 PID: 5 at kernel/irq/manage.c:1731 __free_irq+0x9a/0x300 Workqueue: events work_for_cpu_fn RIP: 0010:__free_irq+0x9a/0x300 Call Trace: ? synchronize_irq+0x3a/0xa0 free_irq+0x2e/0x60 i40e_clear_interrupt_scheme+0x53/0x190 [i40e] i40e_probe.part.108+0x134b/0x1a40 [i40e] ? kmem_cache_alloc+0x158/0x1c0 ? acpi_ut_update_ref_count.part.1+0x8e/0x345 ? acpi_ut_update_object_reference+0x15e/0x1e2 ? strstr+0x21/0x70 ? irq_get_irq_data+0xa/0x20 ? mp_check_pin_attr+0x13/0xc0 ? irq_get_irq_data+0xa/0x20 ? mp_map_pin_to_irq+0xd3/0x2f0 ? acpi_register_gsi_ioapic+0x93/0x170 ? pci_conf1_read+0xa4/0x100 ? pci_bus_read_config_word+0x49/0x70 ? do_pci_enable_device+0xcc/0x100 local_pci_probe+0x41/0x90 work_for_cpu_fn+0x16/0x20 process_one_work+0x1a7/0x360 worker_thread+0x1cf/0x390 ? create_worker+0x1a0/0x1a0 kthread+0x112/0x130 ? kthread_flush_work_fn+0x10/0x10 ret_from_fork+0x1f/0x40 The problem is that at that point misc IRQ vectors were not allocated yet and we get a call trace that driver is trying to free already free IRQ vectors. Add a check in i40e_clear_interrupt_scheme for __I40E_MISC_IRQ_REQUESTED PF state before calling i40e_free_misc_vector. This state is set only if misc IRQ vectors were properly initialized. Fixes: c17401a1dd21 ("i40e: use separate state bit for miscellaneous IRQ setup") Reported-by: PJ Waskiewicz Signed-off-by: Sylwester Dziedziuch Signed-off-by: Mateusz Palczewski Tested-by: Dave Switzer Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b5b984754ec9..e04b540cedc8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4871,7 +4871,8 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf) { int i; - i40e_free_misc_vector(pf); + if (test_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) + i40e_free_misc_vector(pf); i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector, I40E_IWARP_IRQ_PILE_ID); From 54ee39439acd9f8b161703c6ad4f4e1835585277 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Tue, 24 Aug 2021 12:06:39 +0200 Subject: [PATCH 65/66] iavf: fix double unlock of crit_lock The crit_lock mutex could be unlocked twice as reported here https://lists.osuosl.org/pipermail/intel-wired-lan/Week-of-Mon-20210823/025525.html Remove the superfluous unlock. Technically the problem was already present before 5ac49f3c2702 as that commit only replaced the locking primitive, but no functional change. Reported-by: Dan Carpenter Fixes: 5ac49f3c2702 ("iavf: use mutexes for locking of critical sections") Fixes: bac8486116b0 ("iavf: Refactor the watchdog state machine") Signed-off-by: Stefan Assmann Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 23762a7ef740..cada4e0e40b4 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1965,7 +1965,6 @@ static void iavf_watchdog_task(struct work_struct *work) } adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; - mutex_unlock(&adapter->crit_lock); queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(10)); From 8d6c414cd2fb74aa6812e9bfec6178f8246c4f3a Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Tue, 5 Oct 2021 14:03:42 +0100 Subject: [PATCH 66/66] net: prefer socket bound to interface when not in VRF The commit 6da5b0f027a8 ("net: ensure unbound datagram socket to be chosen when not in a VRF") modified compute_score() so that a device match is always made, not just in the case of an l3mdev skb, then increments the score also for unbound sockets. This ensures that sockets bound to an l3mdev are never selected when not in a VRF. But as unbound and bound sockets are now scored equally, this results in the last opened socket being selected if there are matches in the default VRF for an unbound socket and a socket bound to a dev that is not an l3mdev. However, handling prior to this commit was to always select the bound socket in this case. Reinstate this handling by incrementing the score only for bound sockets. The required isolation due to choosing between an unbound socket and a socket bound to an l3mdev remains in place due to the device match always being made. The same approach is taken for compute_score() for stream sockets. Fixes: 6da5b0f027a8 ("net: ensure unbound datagram socket to be chosen when not in a VRF") Fixes: e78190581aff ("net: ensure unbound stream socket to be chosen when not in a VRF") Signed-off-by: Mike Manning Reviewed-by: David Ahern Link: https://lore.kernel.org/r/cf0a8523-b362-1edf-ee78-eef63cbbb428@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 4 +++- net/ipv4/udp.c | 3 ++- net/ipv6/inet6_hashtables.c | 2 +- net/ipv6/udp.c | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 80aeaf9e6e16..bfb522e51346 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -242,8 +242,10 @@ static inline int compute_score(struct sock *sk, struct net *net, if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return -1; + score = sk->sk_bound_dev_if ? 2 : 1; - score = sk->sk_family == PF_INET ? 2 : 1; + if (sk->sk_family == PF_INET) + score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2a7825a5b842..8536b2a7210b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -390,7 +390,8 @@ static int compute_score(struct sock *sk, struct net *net, dif, sdif); if (!dev_match) return -1; - score += 4; + if (sk->sk_bound_dev_if) + score += 4; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 55c290d55605..67c9114835c8 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -106,7 +106,7 @@ static inline int compute_score(struct sock *sk, struct net *net, if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return -1; - score = 1; + score = sk->sk_bound_dev_if ? 2 : 1; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e505bb007e9f..8d785232b479 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -133,7 +133,8 @@ static int compute_score(struct sock *sk, struct net *net, dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif); if (!dev_match) return -1; - score++; + if (sk->sk_bound_dev_if) + score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++;