From bb8b81e396f7afbe7c50d789e2107512274d2a35 Mon Sep 17 00:00:00 2001 From: Loris Reiff Date: Fri, 22 Jan 2021 17:42:31 +0100 Subject: [PATCH 01/36] bpf, cgroup: Fix optlen WARN_ON_ONCE toctou A toctou issue in `__cgroup_bpf_run_filter_getsockopt` can trigger a WARN_ON_ONCE in a check of `copy_from_user`. `*optlen` is checked to be non-negative in the individual getsockopt functions beforehand. Changing `*optlen` in a race to a negative value will result in a `copy_from_user(ctx.optval, optval, ctx.optlen)` with `ctx.optlen` being a negative integer. Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Signed-off-by: Loris Reiff Signed-off-by: Daniel Borkmann Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20210122164232.61770-1-loris.reiff@liblor.ch --- kernel/bpf/cgroup.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 96555a8a2c54..6ec8f02f463b 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1442,6 +1442,11 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, goto out; } + if (ctx.optlen < 0) { + ret = -EFAULT; + goto out; + } + if (copy_from_user(ctx.optval, optval, min(ctx.optlen, max_optlen)) != 0) { ret = -EFAULT; From f4a2da755a7e1f5d845c52aee71336cee289935a Mon Sep 17 00:00:00 2001 From: Loris Reiff Date: Fri, 22 Jan 2021 17:42:32 +0100 Subject: [PATCH 02/36] bpf, cgroup: Fix problematic bounds check Since ctx.optlen is signed, a larger value than max_value could be passed, as it is later on used as unsigned, which causes a WARN_ON_ONCE in the copy_to_user. Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Signed-off-by: Loris Reiff Signed-off-by: Daniel Borkmann Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20210122164232.61770-2-loris.reiff@liblor.ch --- kernel/bpf/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 6ec8f02f463b..6aa9e10c6335 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1464,7 +1464,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, goto out; } - if (ctx.optlen > max_optlen) { + if (ctx.optlen > max_optlen || ctx.optlen < 0) { ret = -EFAULT; goto out; } From b9557caaf872271671bdc1ef003d72f421eb72f6 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 20 Jan 2021 18:08:56 -0800 Subject: [PATCH 03/36] bpf, inode_storage: Put file handler if no storage was found Put file f if inode_storage_ptr() returns NULL. Fixes: 8ea636848aca ("bpf: Implement bpf_local_storage for inodes") Signed-off-by: Pan Bian Signed-off-by: Daniel Borkmann Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210121020856.25507-1-bianpan2016@163.com --- kernel/bpf/bpf_inode_storage.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 2f0597320b6d..6639640523c0 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -125,8 +125,12 @@ static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, fd = *(int *)key; f = fget_raw(fd); - if (!f || !inode_storage_ptr(f->f_inode)) + if (!f) return -EBADF; + if (!inode_storage_ptr(f->f_inode)) { + fput(f); + return -EBADF; + } sdata = bpf_local_storage_update(f->f_inode, (struct bpf_local_storage_map *)map, From 78031381ae9c88f4f914d66154f4745122149c58 Mon Sep 17 00:00:00 2001 From: Mikko Ylinen Date: Mon, 25 Jan 2021 08:39:36 +0200 Subject: [PATCH 04/36] bpf: Drop disabled LSM hooks from the sleepable set Some networking and keys LSM hooks are conditionally enabled and when building the new sleepable BPF LSM hooks with those LSM hooks disabled, the following build error occurs: BTFIDS vmlinux FAILED unresolved symbol bpf_lsm_socket_socketpair To fix the error, conditionally add the relevant networking/keys LSM hooks to the sleepable set. Fixes: 423f16108c9d8 ("bpf: Augment the set of sleepable LSM hooks") Signed-off-by: Mikko Ylinen Signed-off-by: Daniel Borkmann Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210125063936.89365-1-mikko.ylinen@linux.intel.com --- kernel/bpf/bpf_lsm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 70e5e0b6d69d..1622a44d1617 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -149,7 +149,11 @@ BTF_ID(func, bpf_lsm_file_ioctl) BTF_ID(func, bpf_lsm_file_lock) BTF_ID(func, bpf_lsm_file_open) BTF_ID(func, bpf_lsm_file_receive) + +#ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_inet_conn_established) +#endif /* CONFIG_SECURITY_NETWORK */ + BTF_ID(func, bpf_lsm_inode_create) BTF_ID(func, bpf_lsm_inode_free_security) BTF_ID(func, bpf_lsm_inode_getattr) @@ -166,7 +170,11 @@ BTF_ID(func, bpf_lsm_inode_symlink) BTF_ID(func, bpf_lsm_inode_unlink) BTF_ID(func, bpf_lsm_kernel_module_request) BTF_ID(func, bpf_lsm_kernfs_init_security) + +#ifdef CONFIG_KEYS BTF_ID(func, bpf_lsm_key_free) +#endif /* CONFIG_KEYS */ + BTF_ID(func, bpf_lsm_mmap_file) BTF_ID(func, bpf_lsm_netlink_send) BTF_ID(func, bpf_lsm_path_notify) @@ -181,6 +189,8 @@ BTF_ID(func, bpf_lsm_sb_show_options) BTF_ID(func, bpf_lsm_sb_statfs) BTF_ID(func, bpf_lsm_sb_umount) BTF_ID(func, bpf_lsm_settime) + +#ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_socket_accept) BTF_ID(func, bpf_lsm_socket_bind) BTF_ID(func, bpf_lsm_socket_connect) @@ -195,6 +205,8 @@ BTF_ID(func, bpf_lsm_socket_recvmsg) BTF_ID(func, bpf_lsm_socket_sendmsg) BTF_ID(func, bpf_lsm_socket_shutdown) BTF_ID(func, bpf_lsm_socket_socketpair) +#endif /* CONFIG_SECURITY_NETWORK */ + BTF_ID(func, bpf_lsm_syslog) BTF_ID(func, bpf_lsm_task_alloc) BTF_ID(func, bpf_lsm_task_getsecid) From 150a27328b681425c8cab239894a48f2aeb870e9 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 26 Jan 2021 16:13:20 +0000 Subject: [PATCH 05/36] bpf, preload: Fix build when $(O) points to a relative path Building the kernel with CONFIG_BPF_PRELOAD, and by providing a relative path for the output directory, may fail with the following error: $ make O=build bindeb-pkg ... /.../linux/tools/scripts/Makefile.include:5: *** O=build does not exist. Stop. make[7]: *** [/.../linux/kernel/bpf/preload/Makefile:9: kernel/bpf/preload/libbpf.a] Error 2 make[6]: *** [/.../linux/scripts/Makefile.build:500: kernel/bpf/preload] Error 2 make[5]: *** [/.../linux/scripts/Makefile.build:500: kernel/bpf] Error 2 make[4]: *** [/.../linux/Makefile:1799: kernel] Error 2 make[4]: *** Waiting for unfinished jobs.... In the case above, for the "bindeb-pkg" target, the error is produced by the "dummy" check in Makefile.include, called from libbpf's Makefile. This check changes directory to $(PWD) before checking for the existence of $(O). But at this step we have $(PWD) pointing to "/.../linux/build", and $(O) pointing to "build". So the Makefile.include tries in fact to assert the existence of a directory named "/.../linux/build/build", which does not exist. Note that the error does not occur for all make targets and architectures combinations. This was observed on x86 for "bindeb-pkg", or for a regular build for UML [0]. Here are some details. The root Makefile recursively calls itself once, after changing directory to $(O). The content for the variable $(PWD) is preserved across recursive calls to make, so it is unchanged at this step. For "bindeb-pkg", $(PWD) is eventually updated because the target writes a new Makefile (as debian/rules) and calls it indirectly through dpkg-buildpackage. This script does not preserve $(PWD), which is reset to the current working directory when the target in debian/rules is called. Although not investigated, it seems likely that something similar causes UML to change its value for $(PWD). Non-trivial fixes could be to remove the use of $(PWD) from the "dummy" check, or to make sure that $(PWD) and $(O) are preserved or updated to always play well and form a valid $(PWD)/$(O) path across the different targets and architectures. Instead, we take a simpler approach and just update $(O) when calling libbpf's Makefile, so it points to an absolute path which should always resolve for the "dummy" check run (through includes) by that Makefile. David Gow previously posted a slightly different version of this patch as a RFC [0], two months ago or so. [0] https://lore.kernel.org/bpf/20201119085022.3606135-1-davidgow@google.com/t/#u Fixes: d71fa5c9763c ("bpf: Add kernel module with user mode driver that populates bpffs.") Reported-by: David Gow Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Cc: Brendan Higgins Cc: Masahiro Yamada Link: https://lore.kernel.org/bpf/20210126161320.24561-1-quentin@isovalent.com --- kernel/bpf/preload/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile index 23ee310b6eb4..1951332dd15f 100644 --- a/kernel/bpf/preload/Makefile +++ b/kernel/bpf/preload/Makefile @@ -4,8 +4,11 @@ LIBBPF_SRCS = $(srctree)/tools/lib/bpf/ LIBBPF_A = $(obj)/libbpf.a LIBBPF_OUT = $(abspath $(obj)) +# Although not in use by libbpf's Makefile, set $(O) so that the "dummy" test +# in tools/scripts/Makefile.include always succeeds when building the kernel +# with $(O) pointing to a relative path, as in "make O=build bindeb-pkg". $(LIBBPF_A): - $(Q)$(MAKE) -C $(LIBBPF_SRCS) OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a + $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \ -I $(srctree)/tools/lib/ -Wno-unused-result From 01365633bd1c836240f9bbf86bbeee749795480a Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Thu, 28 Jan 2021 20:48:02 +0100 Subject: [PATCH 06/36] net: arcnet: Fix RESET flag handling The main arcnet interrupt handler calls arcnet_close() then arcnet_open(), if the RESET status flag is encountered. This is invalid: 1) In general, interrupt handlers should never call ->ndo_stop() and ->ndo_open() functions. They are usually full of blocking calls and other methods that are expected to be called only from drivers init and exit code paths. 2) arcnet_close() contains a del_timer_sync(). If the irq handler interrupts the to-be-deleted timer, del_timer_sync() will just loop forever. 3) arcnet_close() also calls tasklet_kill(), which has a warning if called from irq context. 4) For device reset, the sequence "arcnet_close(); arcnet_open();" is not complete. Some children arcnet drivers have special init/exit code sequences, which then embed a call to arcnet_open() and arcnet_close() accordingly. Check drivers/net/arcnet/com20020.c. Run the device RESET sequence from a scheduled workqueue instead. Signed-off-by: Ahmed S. Darwish Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20210128194802.727770-1-a.darwish@linutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/arcnet/arc-rimi.c | 4 +- drivers/net/arcnet/arcdevice.h | 6 +++ drivers/net/arcnet/arcnet.c | 66 +++++++++++++++++++++++++++++-- drivers/net/arcnet/com20020-isa.c | 4 +- drivers/net/arcnet/com20020-pci.c | 2 +- drivers/net/arcnet/com20020_cs.c | 2 +- drivers/net/arcnet/com90io.c | 4 +- drivers/net/arcnet/com90xx.c | 4 +- 8 files changed, 78 insertions(+), 14 deletions(-) diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c index 98df38fe553c..12d085405bd0 100644 --- a/drivers/net/arcnet/arc-rimi.c +++ b/drivers/net/arcnet/arc-rimi.c @@ -332,7 +332,7 @@ static int __init arc_rimi_init(void) dev->irq = 9; if (arcrimi_probe(dev)) { - free_netdev(dev); + free_arcdev(dev); return -EIO; } @@ -349,7 +349,7 @@ static void __exit arc_rimi_exit(void) iounmap(lp->mem_start); release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1); free_irq(dev->irq, dev); - free_netdev(dev); + free_arcdev(dev); } #ifndef MODULE diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index 22a49c6d7ae6..5d4a4c7efbbf 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -298,6 +298,10 @@ struct arcnet_local { int excnak_pending; /* We just got an excesive nak interrupt */ + /* RESET flag handling */ + int reset_in_progress; + struct work_struct reset_work; + struct { uint16_t sequence; /* sequence number (incs with each packet) */ __be16 aborted_seq; @@ -350,7 +354,9 @@ void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc) void arcnet_unregister_proto(struct ArcProto *proto); irqreturn_t arcnet_interrupt(int irq, void *dev_id); + struct net_device *alloc_arcdev(const char *name); +void free_arcdev(struct net_device *dev); int arcnet_open(struct net_device *dev); int arcnet_close(struct net_device *dev); diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index e04efc0a5c97..d76dd7d14299 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -387,10 +387,44 @@ static void arcnet_timer(struct timer_list *t) struct arcnet_local *lp = from_timer(lp, t, timer); struct net_device *dev = lp->dev; - if (!netif_carrier_ok(dev)) { + spin_lock_irq(&lp->lock); + + if (!lp->reset_in_progress && !netif_carrier_ok(dev)) { netif_carrier_on(dev); netdev_info(dev, "link up\n"); } + + spin_unlock_irq(&lp->lock); +} + +static void reset_device_work(struct work_struct *work) +{ + struct arcnet_local *lp; + struct net_device *dev; + + lp = container_of(work, struct arcnet_local, reset_work); + dev = lp->dev; + + /* Do not bring the network interface back up if an ifdown + * was already done. + */ + if (!netif_running(dev) || !lp->reset_in_progress) + return; + + rtnl_lock(); + + /* Do another check, in case of an ifdown that was triggered in + * the small race window between the exit condition above and + * acquiring RTNL. + */ + if (!netif_running(dev) || !lp->reset_in_progress) + goto out; + + dev_close(dev); + dev_open(dev, NULL); + +out: + rtnl_unlock(); } static void arcnet_reply_tasklet(unsigned long data) @@ -452,12 +486,25 @@ struct net_device *alloc_arcdev(const char *name) lp->dev = dev; spin_lock_init(&lp->lock); timer_setup(&lp->timer, arcnet_timer, 0); + INIT_WORK(&lp->reset_work, reset_device_work); } return dev; } EXPORT_SYMBOL(alloc_arcdev); +void free_arcdev(struct net_device *dev) +{ + struct arcnet_local *lp = netdev_priv(dev); + + /* Do not cancel this at ->ndo_close(), as the workqueue itself + * indirectly calls the ifdown path through dev_close(). + */ + cancel_work_sync(&lp->reset_work); + free_netdev(dev); +} +EXPORT_SYMBOL(free_arcdev); + /* Open/initialize the board. This is called sometime after booting when * the 'ifconfig' program is run. * @@ -587,6 +634,10 @@ int arcnet_close(struct net_device *dev) /* shut down the card */ lp->hw.close(dev); + + /* reset counters */ + lp->reset_in_progress = 0; + module_put(lp->hw.owner); return 0; } @@ -820,6 +871,9 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) spin_lock_irqsave(&lp->lock, flags); + if (lp->reset_in_progress) + goto out; + /* RESET flag was enabled - if device is not running, we must * clear it right away (but nothing else). */ @@ -852,11 +906,14 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) if (status & RESETflag) { arc_printk(D_NORMAL, dev, "spurious reset (status=%Xh)\n", status); - arcnet_close(dev); - arcnet_open(dev); + + lp->reset_in_progress = 1; + netif_stop_queue(dev); + netif_carrier_off(dev); + schedule_work(&lp->reset_work); /* get out of the interrupt handler! */ - break; + goto out; } /* RX is inhibited - we must have received something. * Prepare to receive into the next buffer. @@ -1052,6 +1109,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) udelay(1); lp->hw.intmask(dev, lp->intmask); +out: spin_unlock_irqrestore(&lp->lock, flags); return retval; } diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c index f983c4ce6b07..be618e4b9ed5 100644 --- a/drivers/net/arcnet/com20020-isa.c +++ b/drivers/net/arcnet/com20020-isa.c @@ -169,7 +169,7 @@ static int __init com20020_init(void) dev->irq = 9; if (com20020isa_probe(dev)) { - free_netdev(dev); + free_arcdev(dev); return -EIO; } @@ -182,7 +182,7 @@ static void __exit com20020_exit(void) unregister_netdev(my_dev); free_irq(my_dev->irq, my_dev); release_region(my_dev->base_addr, ARCNET_TOTAL_SIZE); - free_netdev(my_dev); + free_arcdev(my_dev); } #ifndef MODULE diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index eb7f76753c9c..8bdc44b7e09a 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -291,7 +291,7 @@ static void com20020pci_remove(struct pci_dev *pdev) unregister_netdev(dev); free_irq(dev->irq, dev); - free_netdev(dev); + free_arcdev(dev); } } diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c index cf607ffcf358..9cc5eb6a8e90 100644 --- a/drivers/net/arcnet/com20020_cs.c +++ b/drivers/net/arcnet/com20020_cs.c @@ -177,7 +177,7 @@ static void com20020_detach(struct pcmcia_device *link) dev = info->dev; if (dev) { dev_dbg(&link->dev, "kfree...\n"); - free_netdev(dev); + free_arcdev(dev); } dev_dbg(&link->dev, "kfree2...\n"); kfree(info); diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c index cf214b730671..3856b447d38e 100644 --- a/drivers/net/arcnet/com90io.c +++ b/drivers/net/arcnet/com90io.c @@ -396,7 +396,7 @@ static int __init com90io_init(void) err = com90io_probe(dev); if (err) { - free_netdev(dev); + free_arcdev(dev); return err; } @@ -419,7 +419,7 @@ static void __exit com90io_exit(void) free_irq(dev->irq, dev); release_region(dev->base_addr, ARCNET_TOTAL_SIZE); - free_netdev(dev); + free_arcdev(dev); } module_init(com90io_init) diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c index 3dc3d533cb19..d8dfb9ea0de8 100644 --- a/drivers/net/arcnet/com90xx.c +++ b/drivers/net/arcnet/com90xx.c @@ -554,7 +554,7 @@ err_free_irq: err_release_mem: release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1); err_free_dev: - free_netdev(dev); + free_arcdev(dev); return -EIO; } @@ -672,7 +672,7 @@ static void __exit com90xx_exit(void) release_region(dev->base_addr, ARCNET_TOTAL_SIZE); release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1); - free_netdev(dev); + free_arcdev(dev); } } From 8d520b4de3edca4f4fb242b5ddc659b6a9b9e65e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 28 Jan 2021 23:01:54 +0100 Subject: [PATCH 07/36] r8169: work around RTL8125 UDP hw bug It was reported that on RTL8125 network breaks under heavy UDP load, e.g. torrent traffic ([0], from comment 27). Realtek confirmed a hw bug and provided me with a test version of the r8125 driver including a workaround. Tests confirmed that the workaround fixes the issue. I modified the original version of the workaround to meet mainline code style. [0] https://bugzilla.kernel.org/show_bug.cgi?id=209839 v2: - rebased to net v3: - make rtl_skb_is_udp() more robust and use skb_header_pointer() to access the ip(v6) header v4: - remove dependency on ptp_classify.h - replace magic number with offsetof(struct udphdr, len) Fixes: f1bce4ad2f1c ("r8169: add support for RTL8125") Tested-by: xplo Signed-off-by: Heiner Kallweit Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/6e453d49-1801-e6de-d5f7-d7e6c7526c8f@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 71 +++++++++++++++++++++-- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index a569abe7f5ef..f2269c9f5f05 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4046,17 +4046,72 @@ err_out: return -EIO; } -static bool rtl_test_hw_pad_bug(struct rtl8169_private *tp) +static bool rtl_skb_is_udp(struct sk_buff *skb) { + int no = skb_network_offset(skb); + struct ipv6hdr *i6h, _i6h; + struct iphdr *ih, _ih; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + ih = skb_header_pointer(skb, no, sizeof(_ih), &_ih); + return ih && ih->protocol == IPPROTO_UDP; + case htons(ETH_P_IPV6): + i6h = skb_header_pointer(skb, no, sizeof(_i6h), &_i6h); + return i6h && i6h->nexthdr == IPPROTO_UDP; + default: + return false; + } +} + +#define RTL_MIN_PATCH_LEN 47 + +/* see rtl8125_get_patch_pad_len() in r8125 vendor driver */ +static unsigned int rtl8125_quirk_udp_padto(struct rtl8169_private *tp, + struct sk_buff *skb) +{ + unsigned int padto = 0, len = skb->len; + + if (rtl_is_8125(tp) && len < 128 + RTL_MIN_PATCH_LEN && + rtl_skb_is_udp(skb) && skb_transport_header_was_set(skb)) { + unsigned int trans_data_len = skb_tail_pointer(skb) - + skb_transport_header(skb); + + if (trans_data_len >= offsetof(struct udphdr, len) && + trans_data_len < RTL_MIN_PATCH_LEN) { + u16 dest = ntohs(udp_hdr(skb)->dest); + + /* dest is a standard PTP port */ + if (dest == 319 || dest == 320) + padto = len + RTL_MIN_PATCH_LEN - trans_data_len; + } + + if (trans_data_len < sizeof(struct udphdr)) + padto = max_t(unsigned int, padto, + len + sizeof(struct udphdr) - trans_data_len); + } + + return padto; +} + +static unsigned int rtl_quirk_packet_padto(struct rtl8169_private *tp, + struct sk_buff *skb) +{ + unsigned int padto; + + padto = rtl8125_quirk_udp_padto(tp, skb); + switch (tp->mac_version) { case RTL_GIGA_MAC_VER_34: case RTL_GIGA_MAC_VER_60: case RTL_GIGA_MAC_VER_61: case RTL_GIGA_MAC_VER_63: - return true; + padto = max_t(unsigned int, padto, ETH_ZLEN); default: - return false; + break; } + + return padto; } static void rtl8169_tso_csum_v1(struct sk_buff *skb, u32 *opts) @@ -4128,9 +4183,10 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, opts[1] |= transport_offset << TCPHO_SHIFT; } else { - if (unlikely(skb->len < ETH_ZLEN && rtl_test_hw_pad_bug(tp))) - /* eth_skb_pad would free the skb on error */ - return !__skb_put_padto(skb, ETH_ZLEN, false); + unsigned int padto = rtl_quirk_packet_padto(tp, skb); + + /* skb_padto would free the skb on error */ + return !__skb_put_padto(skb, padto, false); } return true; @@ -4307,6 +4363,9 @@ static netdev_features_t rtl8169_features_check(struct sk_buff *skb, if (skb->len < ETH_ZLEN) features &= ~NETIF_F_CSUM_MASK; + if (rtl_quirk_packet_padto(tp, skb)) + features &= ~NETIF_F_CSUM_MASK; + if (transport_offset > TCPHO_MAX && rtl_chip_supports_csum_v2(tp)) features &= ~NETIF_F_CSUM_MASK; From 5399d52233c47905bbf97dcbaa2d7a9cc31670ba Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 29 Jan 2021 23:53:50 +0000 Subject: [PATCH 08/36] rxrpc: Fix deadlock around release of dst cached on udp tunnel AF_RXRPC sockets use UDP ports in encap mode. This causes socket and dst from an incoming packet to get stolen and attached to the UDP socket from whence it is leaked when that socket is closed. When a network namespace is removed, the wait for dst records to be cleaned up happens before the cleanup of the rxrpc and UDP socket, meaning that the wait never finishes. Fix this by moving the rxrpc (and, by dependence, the afs) private per-network namespace registrations to the device group rather than subsys group. This allows cached rxrpc local endpoints to be cleared and their UDP sockets closed before we try waiting for the dst records. The symptom is that lines looking like the following: unregister_netdevice: waiting for lo to become free get emitted at regular intervals after running something like the referenced syzbot test. Thanks to Vadim for tracking this down and work out the fix. Reported-by: syzbot+df400f2f24a1677cd7e0@syzkaller.appspotmail.com Reported-by: Vadim Fedorenko Fixes: 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook") Signed-off-by: David Howells Acked-by: Vadim Fedorenko Link: https://lore.kernel.org/r/161196443016.3868642.5577440140646403533.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- fs/afs/main.c | 6 +++--- net/rxrpc/af_rxrpc.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/afs/main.c b/fs/afs/main.c index accdd8970e7c..b2975256dadb 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -193,7 +193,7 @@ static int __init afs_init(void) goto error_cache; #endif - ret = register_pernet_subsys(&afs_net_ops); + ret = register_pernet_device(&afs_net_ops); if (ret < 0) goto error_net; @@ -213,7 +213,7 @@ static int __init afs_init(void) error_proc: afs_fs_exit(); error_fs: - unregister_pernet_subsys(&afs_net_ops); + unregister_pernet_device(&afs_net_ops); error_net: #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); @@ -244,7 +244,7 @@ static void __exit afs_exit(void) proc_remove(afs_proc_symlink); afs_fs_exit(); - unregister_pernet_subsys(&afs_net_ops); + unregister_pernet_device(&afs_net_ops); #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); #endif diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0a2f4817ec6c..41671af6b33f 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -990,7 +990,7 @@ static int __init af_rxrpc_init(void) goto error_security; } - ret = register_pernet_subsys(&rxrpc_net_ops); + ret = register_pernet_device(&rxrpc_net_ops); if (ret) goto error_pernet; @@ -1035,7 +1035,7 @@ error_key_type: error_sock: proto_unregister(&rxrpc_proto); error_proto: - unregister_pernet_subsys(&rxrpc_net_ops); + unregister_pernet_device(&rxrpc_net_ops); error_pernet: rxrpc_exit_security(); error_security: @@ -1057,7 +1057,7 @@ static void __exit af_rxrpc_exit(void) unregister_key_type(&key_type_rxrpc); sock_unregister(PF_RXRPC); proto_unregister(&rxrpc_proto); - unregister_pernet_subsys(&rxrpc_net_ops); + unregister_pernet_device(&rxrpc_net_ops); ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0); ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0); From eb4e8fac00d1e01ada5e57c05d24739156086677 Mon Sep 17 00:00:00 2001 From: Chinmay Agarwal Date: Wed, 27 Jan 2021 22:24:54 +0530 Subject: [PATCH 09/36] neighbour: Prevent a dead entry from updating gc_list Following race condition was detected: - neigh_flush_dev() is under execution and calls neigh_mark_dead(n) marking the neighbour entry 'n' as dead. - Executing: __netif_receive_skb() -> __netif_receive_skb_core() -> arp_rcv() -> arp_process().arp_process() calls __neigh_lookup() which takes a reference on neighbour entry 'n'. - Moves further along neigh_flush_dev() and calls neigh_cleanup_and_release(n), but since reference count increased in t2, 'n' couldn't be destroyed. - Moves further along, arp_process() and calls neigh_update()-> __neigh_update() -> neigh_update_gc_list(), which adds the neighbour entry back in gc_list(neigh_mark_dead(), removed it earlier in t0 from gc_list) - arp_process() finally calls neigh_release(n), destroying the neighbour entry. This leads to 'n' still being part of gc_list, but the actual neighbour structure has been freed. The situation can be prevented from happening if we disallow a dead entry to have any possibility of updating gc_list. This is what the patch intends to achieve. Fixes: 9c29a2f55ec0 ("neighbor: Fix locking order for gc_list changes") Signed-off-by: Chinmay Agarwal Reviewed-by: Cong Wang Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210127165453.GA20514@chinagar-linux.qualcomm.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 277ed854aef1..6d2d557442dc 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1245,13 +1245,14 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, old = neigh->nud_state; err = -EPERM; + if (neigh->dead) { + NL_SET_ERR_MSG(extack, "Neighbor entry is now dead"); + new = old; + goto out; + } if (!(flags & NEIGH_UPDATE_F_ADMIN) && (old & (NUD_NOARP | NUD_PERMANENT))) goto out; - if (neigh->dead) { - NL_SET_ERR_MSG(extack, "Neighbor entry is now dead"); - goto out; - } ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); From 18fe0fae61252b5ae6e26553e2676b5fac555951 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 1 Feb 2021 09:33:24 +0100 Subject: [PATCH 10/36] mac80211: fix station rate table updates on assoc If the driver uses .sta_add, station entries are only uploaded after the sta is in assoc state. Fix early station rate table updates by deferring them until the sta has been uploaded. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210201083324.3134-1-nbd@nbd.name [use rcu_access_pointer() instead since we won't dereference here] Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.c | 5 ++++- net/mac80211/rate.c | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index c9a8a2433e8a..48322e45e7dd 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -125,8 +125,11 @@ int drv_sta_state(struct ieee80211_local *local, } else if (old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC) { ret = drv_sta_add(local, sdata, &sta->sta); - if (ret == 0) + if (ret == 0) { sta->uploaded = true; + if (rcu_access_pointer(sta->sta.rates)) + drv_sta_rate_tbl_update(local, sdata, &sta->sta); + } } else if (old_state == IEEE80211_STA_ASSOC && new_state == IEEE80211_STA_AUTH) { drv_sta_remove(local, sdata, &sta->sta); diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 45927202c71c..63652c39c8e0 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -960,7 +960,8 @@ int rate_control_set_rates(struct ieee80211_hw *hw, if (old) kfree_rcu(old, rcu_head); - drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta); + if (sta->uploaded) + drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta); ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta)); From 2e99dedc73f004f650b197c9b269c15c7e01ad15 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 2 Dec 2020 15:50:17 +0800 Subject: [PATCH 11/36] igc: Report speed and duplex as unknown when device is runtime suspended Similar to commit 165ae7a8feb5 ("igb: Report speed and duplex as unknown when device is runtime suspended"), if we try to read speed and duplex sysfs while the device is runtime suspended, igc will complain and stops working: [ 123.449883] igc 0000:03:00.0 enp3s0: PCIe link lost, device now detached [ 123.450052] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 123.450056] #PF: supervisor read access in kernel mode [ 123.450058] #PF: error_code(0x0000) - not-present page [ 123.450059] PGD 0 P4D 0 [ 123.450064] Oops: 0000 [#1] SMP NOPTI [ 123.450068] CPU: 0 PID: 2525 Comm: udevadm Tainted: G U W OE 5.10.0-1002-oem #2+rkl2-Ubuntu [ 123.450078] RIP: 0010:igc_rd32+0x1c/0x90 [igc] [ 123.450080] Code: c0 5d c3 b8 fd ff ff ff c3 0f 1f 44 00 00 0f 1f 44 00 00 55 89 f0 48 89 e5 41 56 41 55 41 54 49 89 c4 53 48 8b 57 08 48 01 d0 <44> 8b 28 41 83 fd ff 74 0c 5b 44 89 e8 41 5c 41 5d 4 [ 123.450083] RSP: 0018:ffffb0d100d6fcc0 EFLAGS: 00010202 [ 123.450085] RAX: 0000000000000008 RBX: ffffb0d100d6fd30 RCX: 0000000000000000 [ 123.450087] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff945a12716c10 [ 123.450089] RBP: ffffb0d100d6fce0 R08: ffff945a12716550 R09: ffff945a09874000 [ 123.450090] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000008 [ 123.450092] R13: ffff945a12716000 R14: ffff945a037da280 R15: ffff945a037da290 [ 123.450094] FS: 00007f3b34c868c0(0000) GS:ffff945b89200000(0000) knlGS:0000000000000000 [ 123.450096] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 123.450098] CR2: 0000000000000008 CR3: 00000001144de006 CR4: 0000000000770ef0 [ 123.450100] PKRU: 55555554 [ 123.450101] Call Trace: [ 123.450111] igc_ethtool_get_link_ksettings+0xd6/0x1b0 [igc] [ 123.450118] __ethtool_get_link_ksettings+0x71/0xb0 [ 123.450123] duplex_show+0x74/0xc0 [ 123.450129] dev_attr_show+0x1d/0x40 [ 123.450134] sysfs_kf_seq_show+0xa1/0x100 [ 123.450137] kernfs_seq_show+0x27/0x30 [ 123.450142] seq_read+0xb7/0x400 [ 123.450148] ? common_file_perm+0x72/0x170 [ 123.450151] kernfs_fop_read+0x35/0x1b0 [ 123.450155] vfs_read+0xb5/0x1b0 [ 123.450157] ksys_read+0x67/0xe0 [ 123.450160] __x64_sys_read+0x1a/0x20 [ 123.450164] do_syscall_64+0x38/0x90 [ 123.450168] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 123.450170] RIP: 0033:0x7f3b351fe142 [ 123.450173] Code: c0 e9 c2 fe ff ff 50 48 8d 3d 3a ca 0a 00 e8 f5 19 02 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 [ 123.450174] RSP: 002b:00007fffef2ec138 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 123.450177] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3b351fe142 [ 123.450179] RDX: 0000000000001001 RSI: 00005644c047f070 RDI: 0000000000000003 [ 123.450180] RBP: 00007fffef2ec340 R08: 00005644c047f070 R09: 00007f3b352d9320 [ 123.450182] R10: 00005644c047c010 R11: 0000000000000246 R12: 00005644c047cbf0 [ 123.450184] R13: 00005644c047e6d0 R14: 0000000000000003 R15: 00007fffef2ec140 [ 123.450189] Modules linked in: rfcomm ccm cmac algif_hash algif_skcipher af_alg bnep toshiba_acpi industrialio toshiba_haps hp_accel lis3lv02d btusb btrtl btbcm btintel bluetooth ecdh_generic ecc joydev input_leds nls_iso8859_1 snd_sof_pci snd_sof_intel_byt snd_sof_intel_ipc snd_sof_intel_hda_common snd_soc_hdac_hda snd_hda_codec_hdmi snd_sof_xtensa_dsp snd_sof_intel_hda snd_sof snd_hda_ext_core snd_soc_acpi_intel_match snd_soc_acpi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_intel_dspcfg soundwire_intel soundwire_generic_allocation soundwire_cadence snd_hda_codec snd_hda_core ath10k_pci snd_hwdep intel_rapl_msr intel_rapl_common ath10k_core soundwire_bus snd_soc_core x86_pkg_temp_thermal ath intel_powerclamp snd_compress ac97_bus snd_pcm_dmaengine mac80211 snd_pcm coretemp snd_seq_midi snd_seq_midi_event snd_rawmidi kvm_intel cfg80211 snd_seq snd_seq_device snd_timer mei_hdcp kvm libarc4 snd crct10dif_pclmul ghash_clmulni_intel aesni_intel mei_me dell_wmi [ 123.450266] dell_smbios soundcore sparse_keymap dcdbas crypto_simd cryptd mei dell_uart_backlight glue_helper ee1004 wmi_bmof intel_wmi_thunderbolt dell_wmi_descriptor mac_hid efi_pstore acpi_pad acpi_tad intel_cstate sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 btrfs blake2b_generic raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear dm_mirror dm_region_hash dm_log hid_generic usbhid hid i915 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops cec crc32_pclmul rc_core drm intel_lpss_pci i2c_i801 ahci igc intel_lpss i2c_smbus idma64 xhci_pci libahci virt_dma xhci_pci_renesas wmi video pinctrl_tigerlake [ 123.450335] CR2: 0000000000000008 [ 123.450338] ---[ end trace 9f731e38b53c35cc ]--- The more generic approach will be wrap get_link_ksettings() with begin() and complete() callbacks, and calls runtime resume and runtime suspend routine respectively. However, igc is like igb, runtime resume routine uses rtnl_lock() which upper ethtool layer also uses. So to prevent a deadlock on rtnl, take a different approach, use pm_runtime_suspended() to avoid reading register while device is runtime suspended. Fixes: 8c5ad0dae93c ("igc: Add ethtool support") Signed-off-by: Kai-Heng Feng Acked-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 831f2f09de5f..ec8cd69d4992 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1714,7 +1714,8 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, Asym_Pause); } - status = rd32(IGC_STATUS); + status = pm_runtime_suspended(&adapter->pdev->dev) ? + 0 : rd32(IGC_STATUS); if (status & IGC_STATUS_LU) { if (status & IGC_STATUS_SPEED_1000) { From ebc8d125062e7dccb7922b2190b097c20d88ad96 Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Sun, 20 Dec 2020 22:18:19 +0800 Subject: [PATCH 12/36] igc: set the default return value to -IGC_ERR_NVM in igc_write_nvm_srwr This patch sets the default return value to -IGC_ERR_NVM in igc_write_nvm_srwr. Without this change it wouldn't lead to a shadow RAM write EEWR timeout. Fixes: ab4056126813 ("igc: Add NVM support") Signed-off-by: Kevin Lo Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_i225.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index 8b67d9b49a83..7ec04e48860c 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -219,9 +219,9 @@ static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words, u16 *data) { struct igc_nvm_info *nvm = &hw->nvm; + s32 ret_val = -IGC_ERR_NVM; u32 attempts = 100000; u32 i, k, eewr = 0; - s32 ret_val = 0; /* A check for invalid values: offset too large, too many words, * too many words for the offset, and not enough words. @@ -229,7 +229,6 @@ static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words, if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) || words == 0) { hw_dbg("nvm parameter(s) out of bounds\n"); - ret_val = -IGC_ERR_NVM; goto out; } From b881145642ce0bbe2be521e0882e72a5cebe93b8 Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Thu, 7 Jan 2021 14:10:38 +0800 Subject: [PATCH 13/36] igc: check return value of ret_val in igc_config_fc_after_link_up Check return value from ret_val to make error check actually work. Fixes: 4eb8080143a9 ("igc: Add setup link functionality") Signed-off-by: Kevin Lo Acked-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c index 09cd0ec7ee87..67b8ffd21d8a 100644 --- a/drivers/net/ethernet/intel/igc/igc_mac.c +++ b/drivers/net/ethernet/intel/igc/igc_mac.c @@ -638,7 +638,7 @@ s32 igc_config_fc_after_link_up(struct igc_hw *hw) } out: - return 0; + return ret_val; } /** From 50af06d43eab6b09afc37aa7c8bbf69b14a3b2f7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 1 Feb 2021 16:29:56 +0100 Subject: [PATCH 14/36] staging: rtl8723bs: Move wiphy setup to after reading the regulatory settings from the chip Commit 81f153faacd0 ("staging: rtl8723bs: fix wireless regulatory API misuse") moved the wiphy_apply_custom_regulatory() call to earlier in the driver's init-sequence, so that it gets called before wiphy_register(). But at this point in time the eFuses which code the regulatory-settings for the chip have not been read by the driver yet, causing _rtw_reg_apply_flags() to set the IEEE80211_CHAN_DISABLED flag on *all* channels. On the device where I initially tested the fix, a Jumper EZpad 7 tablet, this does not cause any problems because shortly after init the rtw_reg_notifier() gets called fixing things up. I guess this happens into response to receiving a (broadcast) packet with regulatory info from the access-point ? But on another device with a RTL8723BS wifi chip, an Acer Switch 10E (SW3-016), the rtw_reg_notifier() never gets called. I assume that some fuse has been set on this device to ignore regulatory info received from access-points. This means that on the Acer the driver is stuck in a state with all channels disabled, leading to non working Wifi. We cannot move the wiphy_apply_custom_regulatory() call back, because that call must be made before the wiphy_register() call. Instead move the entire rtw_wdev_alloc() call to after the Efuses have been read, fixing all channels being disabled in the initial channel-map. Fixes: 81f153faacd0 ("staging: rtl8723bs: fix wireless regulatory API misuse") Cc: Johannes Berg Signed-off-by: Hans de Goede Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20210201152956.370186-2-hdegoede@redhat.com Signed-off-by: Johannes Berg --- drivers/staging/rtl8723bs/os_dep/sdio_intf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c index b2208e5f190a..301ffff12e82 100644 --- a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c +++ b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c @@ -339,8 +339,6 @@ static struct adapter *rtw_sdio_if1_init(struct dvobj_priv *dvobj, const struct padapter = rtw_netdev_priv(pnetdev); - rtw_wdev_alloc(padapter, dvobj_to_dev(dvobj)); - /* 3 3. init driver special setting, interface, OS and hardware relative */ /* 4 3.1 set hardware operation functions */ @@ -378,6 +376,8 @@ static struct adapter *rtw_sdio_if1_init(struct dvobj_priv *dvobj, const struct goto free_hal_data; } + rtw_wdev_alloc(padapter, dvobj_to_dev(dvobj)); + /* 3 8. get WLan MAC address */ /* set mac addr */ rtw_macaddr_cfg(&psdio->func->dev, padapter->eeprompriv.mac_addr); From f559a356043a55bab25a4c00505ea65c50a956fb Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Sat, 23 Jan 2021 00:22:23 +0000 Subject: [PATCH 15/36] i40e: Revert "i40e: don't report link up for a VF who hasn't enabled queues" This reverts commit 2ad1274fa35ace5c6360762ba48d33b63da2396c VF queues were not brought up when PF was brought up after being downed if the VF driver disabled VFs queues during PF down. This could happen in some older or external VF driver implementations. The problem was that PF driver used vf->queues_enabled as a condition to decide what link-state it would send out which caused the issue. Remove the check for vf->queues_enabled in the VF link notify. Now VF will always be notified of the current link status. Also remove the queues_enabled member from i40e_vf structure as it is not used anymore. Otherwise VNF implementation was broken and caused a link flap. The original commit was a workaround to avoid breaking existing VFs though it's really a fault of the VF code not the PF. The commit should be safe to revert as all of the VFs we know of have been fixed. Also, since we now know there is a related bug in the workaround, removing it is preferred. Fixes: 2ad1274fa35a ("i40e: don't report link up for a VF who hasn't enabled") Signed-off-by: Aleksandr Loktionov Signed-off-by: Arkadiusz Kubalewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 13 +------------ drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 1 - 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 7efc61aacb0a..1b6ec9be155a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -55,12 +55,7 @@ static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf) pfe.event = VIRTCHNL_EVENT_LINK_CHANGE; pfe.severity = PF_EVENT_SEVERITY_INFO; - - /* Always report link is down if the VF queues aren't enabled */ - if (!vf->queues_enabled) { - pfe.event_data.link_event.link_status = false; - pfe.event_data.link_event.link_speed = 0; - } else if (vf->link_forced) { + if (vf->link_forced) { pfe.event_data.link_event.link_status = vf->link_up; pfe.event_data.link_event.link_speed = (vf->link_up ? i40e_virtchnl_link_speed(ls->link_speed) : 0); @@ -70,7 +65,6 @@ static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf) pfe.event_data.link_event.link_speed = i40e_virtchnl_link_speed(ls->link_speed); } - i40e_aq_send_msg_to_vf(hw, abs_vf_id, VIRTCHNL_OP_EVENT, 0, (u8 *)&pfe, sizeof(pfe), NULL); } @@ -2443,8 +2437,6 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg) } } - vf->queues_enabled = true; - error_param: /* send the response to the VF */ return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, @@ -2466,9 +2458,6 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; - /* Immediately mark queues as disabled */ - vf->queues_enabled = false; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 5491215d81de..091e32c1bb46 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -98,7 +98,6 @@ struct i40e_vf { unsigned int tx_rate; /* Tx bandwidth limit in Mbps */ bool link_forced; bool link_up; /* only valid if VF link is forced */ - bool queues_enabled; /* true if the VF queues are enabled */ bool spoofchk; u16 num_vlan; From f72f2fb8fb6be095b98af5d740ac50cffd0b0cae Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Sat, 30 Jan 2021 21:43:34 +0800 Subject: [PATCH 16/36] net: dsa: mv88e6xxx: override existent unicast portvec in port_fdb_add Having multiple destination ports for a unicast address does not make sense. Make port_db_load_purge override existent unicast portvec instead of adding a new port bit. Fixes: 884729399260 ("net: dsa: mv88e6xxx: handle multiple ports in ATU") Signed-off-by: DENG Qingfang Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210130134334.10243-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index eafe6bedc692..54aa942eedaa 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1676,7 +1676,11 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, if (!entry.portvec) entry.state = 0; } else { - entry.portvec |= BIT(port); + if (state == MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC) + entry.portvec = BIT(port); + else + entry.portvec |= BIT(port); + entry.state = state; } From 5e9eff5dfa460cd1a74b7c1fde4fced7c04383af Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Thu, 28 Jan 2021 22:34:01 -0600 Subject: [PATCH 17/36] ibmvnic: device remove has higher precedence over reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returning -EBUSY in ibmvnic_remove() does not actually hold the removal procedure since driver core doesn't care for the return value (see __device_release_driver() in drivers/base/dd.c calling dev->bus->remove()) though vio_bus_remove (in arch/powerpc/platforms/pseries/vio.c) records the return value and passes it on. [1] During the device removal precedure, checking for resetting bit is dropped so that we can continue executing all the cleanup calls in the rest of the remove function. Otherwise, it can cause latent memory leaks and kernel crashes. [1] https://lore.kernel.org/linuxppc-dev/20210117101242.dpwayq6wdgfdzirl@pengutronix.de/T/#m48f5befd96bc9842ece2a3ad14f4c27747206a53 Reported-by: Uwe Kleine-König Fixes: 7d7195a026ba ("ibmvnic: Do not process device remove during device reset") Signed-off-by: Lijun Pan Link: https://lore.kernel.org/r/20210129043402.95744-1-ljp@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 8820c98ea891..f79034c786c8 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -5444,11 +5444,6 @@ static int ibmvnic_remove(struct vio_dev *dev) unsigned long flags; spin_lock_irqsave(&adapter->state_lock, flags); - if (test_bit(0, &adapter->resetting)) { - spin_unlock_irqrestore(&adapter->state_lock, flags); - return -EBUSY; - } - adapter->state = VNIC_REMOVING; spin_unlock_irqrestore(&adapter->state_lock, flags); From 938e0fcd3253efdef8924714158911286d08cfe1 Mon Sep 17 00:00:00 2001 From: Alexander Ovechkin Date: Mon, 1 Feb 2021 23:00:49 +0300 Subject: [PATCH 18/36] net: sched: replaced invalid qdisc tree flush helper in qdisc_replace Commit e5f0e8f8e456 ("net: sched: introduce and use qdisc tree flush/purge helpers") introduced qdisc tree flush/purge helpers, but erroneously used flush helper instead of purge helper in qdisc_replace function. This issue was found in our CI, that tests various qdisc setups by configuring qdisc and sending data through it. Call of invalid helper sporadically leads to corruption of vt_tree/cf_tree of hfsc_class that causes kernel oops: Oops: 0000 [#1] SMP PTI CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.11.0-8f6859df #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:rb_insert_color+0x18/0x190 Code: c3 31 c0 c3 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 48 8b 07 48 85 c0 0f 84 05 01 00 00 48 8b 10 f6 c2 01 0f 85 34 01 00 00 <48> 8b 4a 08 49 89 d0 48 39 c1 74 7d 48 85 c9 74 32 f6 01 01 75 2d RSP: 0018:ffffc900000b8bb0 EFLAGS: 00010246 RAX: ffff8881ef4c38b0 RBX: ffff8881d956e400 RCX: ffff8881ef4c38b0 RDX: 0000000000000000 RSI: ffff8881d956f0a8 RDI: ffff8881d956e4b0 RBP: 0000000000000000 R08: 000000d5c4e249da R09: 1600000000000000 R10: ffffc900000b8be0 R11: ffffc900000b8b28 R12: 0000000000000001 R13: 000000000000005a R14: ffff8881f0905000 R15: ffff8881f0387d00 FS: 0000000000000000(0000) GS:ffff8881f8b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 00000001f4796004 CR4: 0000000000060ee0 Call Trace: init_vf.isra.19+0xec/0x250 [sch_hfsc] hfsc_enqueue+0x245/0x300 [sch_hfsc] ? fib_rules_lookup+0x12a/0x1d0 ? __dev_queue_xmit+0x4b6/0x930 ? hfsc_delete_class+0x250/0x250 [sch_hfsc] __dev_queue_xmit+0x4b6/0x930 ? ip6_finish_output2+0x24d/0x590 ip6_finish_output2+0x24d/0x590 ? ip6_output+0x6c/0x130 ip6_output+0x6c/0x130 ? __ip6_finish_output+0x110/0x110 mld_sendpack+0x224/0x230 mld_ifc_timer_expire+0x186/0x2c0 ? igmp6_group_dropped+0x200/0x200 call_timer_fn+0x2d/0x150 run_timer_softirq+0x20c/0x480 ? tick_sched_do_timer+0x60/0x60 ? tick_sched_timer+0x37/0x70 __do_softirq+0xf7/0x2cb irq_exit+0xa0/0xb0 smp_apic_timer_interrupt+0x74/0x150 apic_timer_interrupt+0xf/0x20 Fixes: e5f0e8f8e456 ("net: sched: introduce and use qdisc tree flush/purge helpers") Signed-off-by: Alexander Ovechkin Reported-by: Alexander Kuznetsov Acked-by: Dmitry Monakhov Acked-by: Dmitry Yakunin Acked-by: Cong Wang Link: https://lore.kernel.org/r/20210201200049.299153-1-ovov@yandex-team.ru Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 639e465a108f..5b490b5591df 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1143,7 +1143,7 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, old = *pold; *pold = new; if (old != NULL) - qdisc_tree_flush_backlog(old); + qdisc_purge_queue(old); sch_tree_unlock(sch); return old; From c518adafa39f37858697ac9309c6cf1805581446 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Mon, 1 Feb 2021 11:47:19 +0300 Subject: [PATCH 19/36] vsock: fix the race conditions in multi-transport support There are multiple similar bugs implicitly introduced by the commit c0cfa2d8a788fcf4 ("vsock: add multi-transports support") and commit 6a2c0962105ae8ce ("vsock: prevent transport modules unloading"). The bug pattern: [1] vsock_sock.transport pointer is copied to a local variable, [2] lock_sock() is called, [3] the local variable is used. VSOCK multi-transport support introduced the race condition: vsock_sock.transport value may change between [1] and [2]. Let's copy vsock_sock.transport pointer to local variables after the lock_sock() call. Fixes: c0cfa2d8a788fcf4 ("vsock: add multi-transports support") Signed-off-by: Alexander Popov Reviewed-by: Stefano Garzarella Reviewed-by: Jorgen Hansen Link: https://lore.kernel.org/r/20210201084719.2257066-1-alex.popov@linux.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index b12d3a322242..6894f21dc147 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1014,9 +1014,12 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; } else if (sock->type == SOCK_STREAM) { - const struct vsock_transport *transport = vsk->transport; + const struct vsock_transport *transport; + lock_sock(sk); + transport = vsk->transport; + /* Listening sockets that have connections in their accept * queue can be read. */ @@ -1099,10 +1102,11 @@ static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = 0; sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; lock_sock(sk); + transport = vsk->transport; + err = vsock_auto_bind(vsk); if (err) goto out; @@ -1561,10 +1565,11 @@ static int vsock_stream_setsockopt(struct socket *sock, err = 0; sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; lock_sock(sk); + transport = vsk->transport; + switch (optname) { case SO_VM_SOCKETS_BUFFER_SIZE: COPY_IN(val); @@ -1697,7 +1702,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; total_written = 0; err = 0; @@ -1706,6 +1710,8 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); + transport = vsk->transport; + /* Callers should not provide a destination with stream sockets. */ if (msg->msg_namelen) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; @@ -1840,11 +1846,12 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; err = 0; lock_sock(sk); + transport = vsk->transport; + if (!transport || sk->sk_state != TCP_ESTABLISHED) { /* Recvmsg is supposed to return 0 if a peer performs an * orderly shutdown. Differentiate between that case and when a From 28e104d00281ade30250b24e098bf50887671ea4 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Sat, 30 Jan 2021 01:27:47 +0300 Subject: [PATCH 20/36] net: ip_tunnel: fix mtu calculation dev->hard_header_len for tunnel interface is set only when header_ops are set too and already contains full overhead of any tunnel encapsulation. That's why there is not need to use this overhead twice in mtu calc. Fixes: fdafed459998 ("ip_gre: set dev->hard_header_len and dev->needed_headroom properly") Reported-by: Slava Bacherikov Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/1611959267-20536-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski --- net/ipv4/ip_tunnel.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 64594aa755f0..76a420c76f16 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -317,7 +317,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) } dev->needed_headroom = t_hlen + hlen; - mtu -= (dev->hard_header_len + t_hlen); + mtu -= t_hlen; if (mtu < IPV4_MIN_MTU) mtu = IPV4_MIN_MTU; @@ -347,7 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, nt = netdev_priv(dev); t_hlen = nt->hlen + sizeof(struct iphdr); dev->min_mtu = ETH_MIN_MTU; - dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen; + dev->max_mtu = IP_MAX_MTU - t_hlen; ip_tunnel_add(itn, nt); return nt; @@ -488,11 +488,10 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, int mtu; tunnel_hlen = md ? tunnel_hlen : tunnel->hlen; - pkt_size = skb->len - tunnel_hlen - dev->hard_header_len; + pkt_size = skb->len - tunnel_hlen; if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - - sizeof(struct iphdr) - tunnel_hlen; + mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen); else mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; @@ -972,7 +971,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); - int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen; + int max_mtu = IP_MAX_MTU - t_hlen; if (new_mtu < ETH_MIN_MTU) return -EINVAL; @@ -1149,10 +1148,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], mtu = ip_tunnel_bind_dev(dev); if (tb[IFLA_MTU]) { - unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen; + unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr)); - mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, - (unsigned int)(max - sizeof(struct iphdr))); + mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max); } err = dev_set_mtu(dev, mtu); From c3df39ac9b0e3747bf8233ea9ce4ed5ceb3199d3 Mon Sep 17 00:00:00 2001 From: Dongseok Yi Date: Sat, 30 Jan 2021 08:13:27 +0900 Subject: [PATCH 21/36] udp: ipv4: manipulate network header of NATed UDP GRO fraglist UDP/IP header of UDP GROed frag_skbs are not updated even after NAT forwarding. Only the header of head_skb from ip_finish_output_gso -> skb_gso_segment is updated but following frag_skbs are not updated. A call path skb_mac_gso_segment -> inet_gso_segment -> udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list does not try to update UDP/IP header of the segment list but copy only the MAC header. Update port, addr and check of each skb of the segment list in __udp_gso_segment_list. It covers both SNAT and DNAT. Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.) Signed-off-by: Dongseok Yi Acked-by: Steffen Klassert Link: https://lore.kernel.org/r/1611962007-80092-1-git-send-email-dseok.yi@samsung.com Signed-off-by: Jakub Kicinski --- include/net/udp.h | 2 +- net/ipv4/udp_offload.c | 69 +++++++++++++++++++++++++++++++++++++++--- net/ipv6/udp_offload.c | 2 +- 3 files changed, 66 insertions(+), 7 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 877832bed471..01351ba25b87 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, - netdev_features_t features); + netdev_features_t features, bool is_ipv6); static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) { diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index ff39e94781bf..cfc872689b99 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -187,8 +187,67 @@ out_unlock: } EXPORT_SYMBOL(skb_udp_tunnel_segment); +static void __udpv4_gso_segment_csum(struct sk_buff *seg, + __be32 *oldip, __be32 *newip, + __be16 *oldport, __be16 *newport) +{ + struct udphdr *uh; + struct iphdr *iph; + + if (*oldip == *newip && *oldport == *newport) + return; + + uh = udp_hdr(seg); + iph = ip_hdr(seg); + + if (uh->check) { + inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip, + true); + inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport, + false); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + *oldport = *newport; + + csum_replace4(&iph->check, *oldip, *newip); + *oldip = *newip; +} + +static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs) +{ + struct sk_buff *seg; + struct udphdr *uh, *uh2; + struct iphdr *iph, *iph2; + + seg = segs; + uh = udp_hdr(seg); + iph = ip_hdr(seg); + + if ((udp_hdr(seg)->dest == udp_hdr(seg->next)->dest) && + (udp_hdr(seg)->source == udp_hdr(seg->next)->source) && + (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) && + (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr)) + return segs; + + while ((seg = seg->next)) { + uh2 = udp_hdr(seg); + iph2 = ip_hdr(seg); + + __udpv4_gso_segment_csum(seg, + &iph2->saddr, &iph->saddr, + &uh2->source, &uh->source); + __udpv4_gso_segment_csum(seg, + &iph2->daddr, &iph->daddr, + &uh2->dest, &uh->dest); + } + + return segs; +} + static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, - netdev_features_t features) + netdev_features_t features, + bool is_ipv6) { unsigned int mss = skb_shinfo(skb)->gso_size; @@ -198,11 +257,11 @@ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss); - return skb; + return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb); } struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, - netdev_features_t features) + netdev_features_t features, bool is_ipv6) { struct sock *sk = gso_skb->sk; unsigned int sum_truesize = 0; @@ -214,7 +273,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, __be16 newlen; if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) - return __udp_gso_segment_list(gso_skb, features); + return __udp_gso_segment_list(gso_skb, features, is_ipv6); mss = skb_shinfo(gso_skb)->gso_size; if (gso_skb->len <= sizeof(*uh) + mss) @@ -328,7 +387,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) - return __udp_gso_segment(skb, features); + return __udp_gso_segment(skb, features, false); mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index c7bd7b1a04c1..faa823c24292 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -42,7 +42,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, goto out; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) - return __udp_gso_segment(skb, features); + return __udp_gso_segment(skb, features, true); mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) From 31628201545548e1ef167f2c55eb6fd7d3562f12 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 30 Jan 2021 20:05:18 +0100 Subject: [PATCH 22/36] docs: networking: swap words in icmp_errors_use_inbound_ifaddr doc Signed-off-by: Vincent Bernat Link: https://lore.kernel.org/r/20210130190518.854806-1-vincent@bernat.ch Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c7b775da9554..fa544e9037b9 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1196,7 +1196,7 @@ icmp_errors_use_inbound_ifaddr - BOOLEAN If non-zero, the message will be sent with the primary address of the interface that received the packet that caused the icmp error. - This is the behaviour network many administrators will expect from + This is the behaviour many network administrators will expect from a router. And it can make debugging complicated network layouts much easier. From ed5e83a3c02948dad9dc4e68fb4e535baa5da630 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Mon, 1 Feb 2021 18:11:10 +0200 Subject: [PATCH 23/36] net/mlx5: Fix function calculation for page trees The function calculation always results in a value of 0. This works generally, but when the release all pages feature is enabled it will result in crashes. Fixes: 0aa128475d33 ("net/mlx5: Maintain separate page trees for ECPF and PF functions") Signed-off-by: Daniel Jurgens Reported-by: Colin Ian King Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index eaa8958e24d7..c0656d4782e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -76,7 +76,7 @@ enum { static u32 get_function(u16 func_id, bool ec_function) { - return func_id & (ec_function << 16); + return (u32)func_id | (ec_function << 16); } static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function) From a5bfe6b4675e0eefbd9418055b5cc6e89af27eb4 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 20 Jan 2021 17:41:18 +0200 Subject: [PATCH 24/36] net/mlx5: Fix leak upon failure of rule creation When creation of a new rule that requires allocation of an FTE fails, need to call to tree_put_node on the FTE in order to release its' resource. Fixes: cefc23554fc2 ("net/mlx5: Fix FTE cleanup") Signed-off-by: Maor Gottlieb Reviewed-by: Alaa Hleihel Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 0fcee702b808..ee4d86c1f436 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1760,6 +1760,7 @@ search_again_locked: if (!fte_tmp) continue; rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp); + /* No error check needed here, because insert_fte() is not called */ up_write_ref_node(&fte_tmp->node, false); tree_put_node(&fte_tmp->node, false); kmem_cache_free(steering->ftes_cache, fte); @@ -1812,6 +1813,8 @@ skip_search: up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); + if (IS_ERR(rule)) + tree_put_node(&fte->node, false); return rule; } rule = ERR_PTR(-ENOENT); @@ -1910,6 +1913,8 @@ search_again_locked: up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); + if (IS_ERR(rule)) + tree_put_node(&fte->node, false); tree_put_node(&g->node, false); return rule; From 5a2ba25a55c4dc0f143567c99aede768b6628ebd Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 28 Jan 2021 14:37:59 +0200 Subject: [PATCH 25/36] net/mlx5e: Update max_opened_tc also when channels are closed max_opened_tc is used for stats, so that potentially non-zero stats won't disappear when num_tc decreases. However, mlx5e_setup_tc_mqprio fails to update it in the flow where channels are closed. This commit fixes it. The new value of priv->channels.params.num_tc is always checked on exit. In case of errors it will just be the old value, and in case of success it will be the updated value. Fixes: 05909babce53 ("net/mlx5e: Avoid reset netdev stats on configuration changes") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a9d824a9cb05..3fc7d18ac868 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3627,12 +3627,10 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_num_channels_changed_ctx, NULL); - if (err) - goto out; - priv->max_opened_tc = max_t(u8, priv->max_opened_tc, - new_channels.params.num_tc); out: + priv->max_opened_tc = max_t(u8, priv->max_opened_tc, + priv->channels.params.num_tc); mutex_unlock(&priv->state_lock); return err; } From a34ffec8af8ff1c730697a99e09ec7b74a3423b6 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Sun, 31 Jan 2021 18:47:15 +0200 Subject: [PATCH 26/36] net/mlx5e: Release skb in case of failure in tc update skb In case of failure in tc update skb the packet is dropped without freeing the skb. Fixed by freeing the skb in case failure in tc update skb. Fixes: d6d27782864f ("net/mlx5: E-Switch, Restore chain id on miss") Fixes: c75690972228 ("net/mlx5e: Add tc chains offload support for nic flows") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7f5851c61218..ca4b55839a8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1262,8 +1262,10 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); if (mlx5e_cqe_regb_chain(cqe)) - if (!mlx5e_tc_update_skb(cqe, skb)) + if (!mlx5e_tc_update_skb(cqe, skb)) { + dev_kfree_skb_any(skb); goto free_wqe; + } napi_gro_receive(rq->cq.napi, skb); @@ -1316,8 +1318,10 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (rep->vlan && skb_vlan_tag_present(skb)) skb_vlan_pop(skb); - if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv)) + if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv)) { + dev_kfree_skb_any(skb); goto free_wqe; + } napi_gro_receive(rq->cq.napi, skb); @@ -1371,8 +1375,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); - if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv)) + if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv)) { + dev_kfree_skb_any(skb); goto mpwrq_cqe_out; + } napi_gro_receive(rq->cq.napi, skb); @@ -1528,8 +1534,10 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); if (mlx5e_cqe_regb_chain(cqe)) - if (!mlx5e_tc_update_skb(cqe, skb)) + if (!mlx5e_tc_update_skb(cqe, skb)) { + dev_kfree_skb_any(skb); goto mpwrq_cqe_out; + } napi_gro_receive(rq->cq.napi, skb); From 88c7a9fd9bdd3e453f04018920964c6f848a591a Mon Sep 17 00:00:00 2001 From: Xie He Date: Sun, 31 Jan 2021 21:57:06 -0800 Subject: [PATCH 27/36] net: lapb: Copy the skb before sending a packet When sending a packet, we will prepend it with an LAPB header. This modifies the shared parts of a cloned skb, so we should copy the skb rather than just clone it, before we prepend the header. In "Documentation/networking/driver.rst" (the 2nd point), it states that drivers shouldn't modify the shared parts of a cloned skb when transmitting. The "dev_queue_xmit_nit" function in "net/core/dev.c", which is called when an skb is being sent, clones the skb and sents the clone to AF_PACKET sockets. Because the LAPB drivers first remove a 1-byte pseudo-header before handing over the skb to us, if we don't copy the skb before prepending the LAPB header, the first byte of the packets received on AF_PACKET sockets can be corrupted. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xie He Acked-by: Martin Schiller Link: https://lore.kernel.org/r/20210201055706.415842-1-xie.he.0141@gmail.com Signed-off-by: Jakub Kicinski --- net/lapb/lapb_out.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c index 7a4d0715d1c3..a966d29c772d 100644 --- a/net/lapb/lapb_out.c +++ b/net/lapb/lapb_out.c @@ -82,7 +82,8 @@ void lapb_kick(struct lapb_cb *lapb) skb = skb_dequeue(&lapb->write_queue); do { - if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { + skbn = skb_copy(skb, GFP_ATOMIC); + if (!skbn) { skb_queue_head(&lapb->write_queue, skb); break; } From 43f4a20a1266d393840ce010f547486d14cc0071 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Mon, 1 Feb 2021 11:35:39 +0200 Subject: [PATCH 28/36] net: mvpp2: TCAM entry enable should be written after SRAM data Last TCAM data contains TCAM enable bit. It should be written after SRAM data before entry enabled. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Stefan Chulski Link: https://lore.kernel.org/r/1612172139-28343-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index a30eb90ba3d2..dd590086fe6a 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -29,16 +29,16 @@ static int mvpp2_prs_hw_write(struct mvpp2 *priv, struct mvpp2_prs_entry *pe) /* Clear entry invalidation bit */ pe->tcam[MVPP2_PRS_TCAM_INV_WORD] &= ~MVPP2_PRS_TCAM_INV_MASK; - /* Write tcam index - indirect access */ - mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, pe->index); - for (i = 0; i < MVPP2_PRS_TCAM_WORDS; i++) - mvpp2_write(priv, MVPP2_PRS_TCAM_DATA_REG(i), pe->tcam[i]); - /* Write sram index - indirect access */ mvpp2_write(priv, MVPP2_PRS_SRAM_IDX_REG, pe->index); for (i = 0; i < MVPP2_PRS_SRAM_WORDS; i++) mvpp2_write(priv, MVPP2_PRS_SRAM_DATA_REG(i), pe->sram[i]); + /* Write tcam index - indirect access */ + mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, pe->index); + for (i = 0; i < MVPP2_PRS_TCAM_WORDS; i++) + mvpp2_write(priv, MVPP2_PRS_TCAM_DATA_REG(i), pe->tcam[i]); + return 0; } From a11148e6fcce2ae53f47f0a442d098d860b4f7db Mon Sep 17 00:00:00 2001 From: Sabyrzhan Tasbolatov Date: Tue, 2 Feb 2021 02:32:33 +0600 Subject: [PATCH 29/36] net/rds: restrict iovecs length for RDS_CMSG_RDMA_ARGS syzbot found WARNING in rds_rdma_extra_size [1] when RDS_CMSG_RDMA_ARGS control message is passed with user-controlled 0x40001 bytes of args->nr_local, causing order >= MAX_ORDER condition. The exact value 0x40001 can be checked with UIO_MAXIOV which is 0x400. So for kcalloc() 0x400 iovecs with sizeof(struct rds_iovec) = 0x10 is the closest limit, with 0x10 leftover. Same condition is currently done in rds_cmsg_rdma_args(). [1] WARNING: mm/page_alloc.c:5011 [..] Call Trace: alloc_pages_current+0x18c/0x2a0 mm/mempolicy.c:2267 alloc_pages include/linux/gfp.h:547 [inline] kmalloc_order+0x2e/0xb0 mm/slab_common.c:837 kmalloc_order_trace+0x14/0x120 mm/slab_common.c:853 kmalloc_array include/linux/slab.h:592 [inline] kcalloc include/linux/slab.h:621 [inline] rds_rdma_extra_size+0xb2/0x3b0 net/rds/rdma.c:568 rds_rm_size net/rds/send.c:928 [inline] Reported-by: syzbot+1bd2b07f93745fa38425@syzkaller.appspotmail.com Signed-off-by: Sabyrzhan Tasbolatov Acked-by: Santosh Shilimkar Link: https://lore.kernel.org/r/20210201203233.1324704-1-snovitoll@gmail.com Signed-off-by: Jakub Kicinski --- net/rds/rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 1d0afb1dd77b..6f1a50d50d06 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -565,6 +565,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args, if (args->nr_local == 0) return -EINVAL; + if (args->nr_local > UIO_MAXIOV) + return -EMSGSIZE; + iov->iov = kcalloc(args->nr_local, sizeof(struct rds_iovec), GFP_KERNEL); From cc9f07a838c4988ed244d0907cb71d54b85482a5 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 1 Feb 2021 21:50:56 +0100 Subject: [PATCH 30/36] r8169: fix WoL on shutdown if CONFIG_DEBUG_SHIRQ is set So far phy_disconnect() is called before free_irq(). If CONFIG_DEBUG_SHIRQ is set and interrupt is shared, then free_irq() creates an "artificial" interrupt by calling the interrupt handler. The "link change" flag is set in the interrupt status register, causing phylib to eventually call phy_suspend(). Because the net_device is detached from the PHY already, the PHY driver can't recognize that WoL is configured and powers down the PHY. Fixes: f1e911d5d0df ("r8169: add basic phylib support") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/fe732c2c-a473-9088-3974-df83cfbd6efd@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index f2269c9f5f05..0d78408b4e26 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4704,10 +4704,10 @@ static int rtl8169_close(struct net_device *dev) cancel_work_sync(&tp->wk.work); - phy_disconnect(tp->phydev); - free_irq(pci_irq_vector(pdev, 0), tp); + phy_disconnect(tp->phydev); + dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray, tp->RxPhyAddr); dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray, From 4ace7a6e287b7e3b33276cd9fe870c326f880480 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Feb 2021 08:55:25 +0300 Subject: [PATCH 31/36] net: ipa: pass correct dma_handle to dma_free_coherent() The "ring->addr = addr;" assignment is done a few lines later so we can't use "ring->addr" yet. The correct dma_handle is "addr". Fixes: 650d1603825d ("soc: qcom: ipa: the generic software interface") Signed-off-by: Dan Carpenter Reviewed-by: Alex Elder Link: https://lore.kernel.org/r/YBjpTU2oejkNIULT@mwanda Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 14d9a791924b..b8f39e48a009 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -1373,7 +1373,7 @@ static int gsi_ring_alloc(struct gsi *gsi, struct gsi_ring *ring, u32 count) /* Hardware requires a 2^n ring size, with alignment equal to size */ ring->virt = dma_alloc_coherent(dev, size, &addr, GFP_KERNEL); if (ring->virt && addr % size) { - dma_free_coherent(dev, size, ring->virt, ring->addr); + dma_free_coherent(dev, size, ring->virt, addr); dev_err(dev, "unable to alloc 0x%zx-aligned ring buffer\n", size); return -EINVAL; /* Not a good error value, but distinct */ From e6cdd6d80baedadb96d7060a509f51769e53021d Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 17:26:06 -0600 Subject: [PATCH 32/36] net: ipa: add a missing __iomem attribute The virt local variable in gsi_channel_state() does not have an __iomem attribute but should. Fix this. Signed-off-by: Alex Elder Reviewed-by: Amy Parker Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index b8f39e48a009..34e5f2155d62 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -440,7 +440,7 @@ static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id) static enum gsi_channel_state gsi_channel_state(struct gsi_channel *channel) { u32 channel_id = gsi_channel_id(channel); - void *virt = channel->gsi->virt; + void __iomem *virt = channel->gsi->virt; u32 val; val = ioread32(virt + GSI_CH_C_CNTXT_0_OFFSET(channel_id)); From 088f8a2396d813e7ee49272a1a59b55139c81e64 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 17:26:07 -0600 Subject: [PATCH 33/36] net: ipa: be explicit about endianness Sparse warns that the assignment of the metadata mask for a QMAP endpoint in ipa_endpoint_init_hdr_metadata_mask() is a bad assignment. We know we want the mask value to be big endian, even though the value we write is in host byte order. Use a __force tag to indicate we really mean it. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_endpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 9f4be9812a1f..448d89da1e45 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -588,7 +588,7 @@ static void ipa_endpoint_init_hdr_metadata_mask(struct ipa_endpoint *endpoint) /* Note that HDR_ENDIANNESS indicates big endian header fields */ if (endpoint->data->qmap) - val = cpu_to_be32(IPA_ENDPOINT_QMAP_METADATA_MASK); + val = (__force u32)cpu_to_be32(IPA_ENDPOINT_QMAP_METADATA_MASK); iowrite32(val, endpoint->ipa->reg_virt + offset); } From c13899f187285eaa5bfc30f8692888ba2e7765cb Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 17:26:08 -0600 Subject: [PATCH 34/36] net: ipa: use the right accessor in ipa_endpoint_status_skip() When extracting the destination endpoint ID from the status in ipa_endpoint_status_skip(), u32_get_bits() is used. This happens to work, but it's wrong: the structure field is only 8 bits wide instead of 32. Fix this by using u8_get_bits() to get the destination endpoint ID. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_endpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 448d89da1e45..612afece303f 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1164,8 +1164,8 @@ static bool ipa_endpoint_status_skip(struct ipa_endpoint *endpoint, return true; if (!status->pkt_len) return true; - endpoint_id = u32_get_bits(status->endp_dst_idx, - IPA_STATUS_DST_IDX_FMASK); + endpoint_id = u8_get_bits(status->endp_dst_idx, + IPA_STATUS_DST_IDX_FMASK); if (endpoint_id != endpoint->endpoint_id) return true; From 113b6ea09ccd46157d8d37fa9fabf1ca2315e503 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 17:26:09 -0600 Subject: [PATCH 35/36] net: ipa: fix two format specifier errors Fix two format specifiers that used %lu for a size_t in "ipa_mem.c". Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index 0cc3a3374caa..f25029b9ec85 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -336,7 +336,7 @@ static void ipa_imem_exit(struct ipa *ipa) size = iommu_unmap(domain, ipa->imem_iova, ipa->imem_size); if (size != ipa->imem_size) - dev_warn(dev, "unmapped %zu IMEM bytes, expected %lu\n", + dev_warn(dev, "unmapped %zu IMEM bytes, expected %zu\n", size, ipa->imem_size); } else { dev_err(dev, "couldn't get IPA IOMMU domain for IMEM\n"); @@ -440,7 +440,7 @@ static void ipa_smem_exit(struct ipa *ipa) size = iommu_unmap(domain, ipa->smem_iova, ipa->smem_size); if (size != ipa->smem_size) - dev_warn(dev, "unmapped %zu SMEM bytes, expected %lu\n", + dev_warn(dev, "unmapped %zu SMEM bytes, expected %zu\n", size, ipa->smem_size); } else { From 6c9f18f294c4a1a6d8b1097e39c325481664ee1c Mon Sep 17 00:00:00 2001 From: Andreas Oetken Date: Tue, 2 Feb 2021 10:03:04 +0100 Subject: [PATCH 36/36] net: hsr: align sup_multicast_addr in struct hsr_priv to u16 boundary sup_multicast_addr is passed to ether_addr_equal for address comparison which casts the address inputs to u16 leading to an unaligned access. Aligning the sup_multicast_addr to u16 boundary fixes the issue. Signed-off-by: Andreas Oetken Link: https://lore.kernel.org/r/20210202090304.2740471-1-ennoerlangen@gmail.com Signed-off-by: Jakub Kicinski --- net/hsr/hsr_main.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 7dc92ce5a134..a9c30a608e35 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -217,7 +217,10 @@ struct hsr_priv { u8 net_id; /* for PRP, it occupies most significant 3 bits * of lan_id */ - unsigned char sup_multicast_addr[ETH_ALEN]; + unsigned char sup_multicast_addr[ETH_ALEN] __aligned(sizeof(u16)); + /* Align to u16 boundary to avoid unaligned access + * in ether_addr_equal + */ #ifdef CONFIG_DEBUG_FS struct dentry *node_tbl_root; #endif