From 863580418bc82062083be854355f2213d3d804f5 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 1 Sep 2021 14:43:50 +0300 Subject: [PATCH 001/543] regulator: qcom-rpmh-regulator: fix pm8009-1 ldo7 resource name Fix a typo in the pm8009 LDO7 declaration, it uses resource name ldo%s6 instead of ldo%s7. Fixes: 951384cabc5d ("regulator: qcom-rpmh-regulator: add pm8009-1 chip revision") Signed-off-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210901114350.1106073-1-dmitry.baryshkov@linaro.org Signed-off-by: Mark Brown --- drivers/regulator/qcom-rpmh-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index 6cca910a76de..7f458d510483 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -991,7 +991,7 @@ static const struct rpmh_vreg_init_data pm8009_1_vreg_data[] = { RPMH_VREG("ldo4", "ldo%s4", &pmic5_nldo, "vdd-l4"), RPMH_VREG("ldo5", "ldo%s5", &pmic5_pldo, "vdd-l5-l6"), RPMH_VREG("ldo6", "ldo%s6", &pmic5_pldo, "vdd-l5-l6"), - RPMH_VREG("ldo7", "ldo%s6", &pmic5_pldo_lv, "vdd-l7"), + RPMH_VREG("ldo7", "ldo%s7", &pmic5_pldo_lv, "vdd-l7"), {} }; From 49ca6153208f6efc409c1deb82dd5bcbb519d7e1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 24 Aug 2021 09:39:31 +0200 Subject: [PATCH 002/543] bpf: Relicense disassembler as GPL-2.0-only OR BSD-2-Clause Some time ago we dual-licensed both libbpf and bpftool through commits 1bc38b8ff6cc ("libbpf: relicense libbpf as LGPL-2.1 OR BSD-2-Clause") and 907b22365115 ("tools: bpftool: dual license all files"). The latter missed the disasm.{c,h} which we pull in via kernel/bpf/ such that we have a single source for verifier as well as bpftool asm dumping, see also f4ac7e0b5cc8 ("bpf: move instruction printing into a separate file"). It is currently GPL-2.0-only and missed the conversion in 907b22365115, therefore relicense the two as GPL-2.0-only OR BSD-2-Clause as well. Spotted-by: Quentin Monnet Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Thomas Graf Acked-by: Brendan Jackman Acked-by: Jakub Kicinski Acked-by: Jiri Olsa Acked-by: Simon Horman Acked-by: Martin KaFai Lau Acked-by: Xu Kuohai Acked-by: Edward Cree --- kernel/bpf/disasm.c | 2 +- kernel/bpf/disasm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index ca3cd9aaa6ce..7b4afb7d96db 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-only +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * Copyright (c) 2016 Facebook */ diff --git a/kernel/bpf/disasm.h b/kernel/bpf/disasm.h index e546b18d27da..a4b040793f44 100644 --- a/kernel/bpf/disasm.h +++ b/kernel/bpf/disasm.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * Copyright (c) 2016 Facebook */ From aeef8b5089b76852bd84889f2809e69a7cfb414e Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Wed, 11 Aug 2021 17:07:37 -0400 Subject: [PATCH 003/543] x86/pat: Pass valid address to sanitize_phys() The end address passed to memtype_reserve() is handed directly to sanitize_phys(). However, end is exclusive and sanitize_phys() expects an inclusive address. If end falls at the end of the physical address space, sanitize_phys() will return 0. This can result in drivers failing to load, and the following warning: WARNING: CPU: 26 PID: 749 at arch/x86/mm/pat.c:354 reserve_memtype+0x262/0x450 reserve_memtype failed: [mem 0x3ffffff00000-0xffffffffffffffff], req uncached-minus Call Trace: [] reserve_memtype+0x262/0x450 [] ioremap_nocache+0x1a/0x20 [] mpt3sas_base_map_resources+0x151/0xa60 [mpt3sas] [] mpt3sas_base_attach+0xf5/0xa50 [mpt3sas] ---[ end trace 6d6eea4438db89ef ]--- ioremap reserve_memtype failed -22 mpt3sas_cm0: unable to map adapter memory! or resource not found mpt3sas_cm0: failure at drivers/scsi/mpt3sas/mpt3sas_scsih.c:10597/_scsih_probe()! Fix this by passing the inclusive end address to sanitize_phys(). Fixes: 510ee090abc3 ("x86/mm/pat: Prepare {reserve, free}_memtype() for "decoy" addresses") Signed-off-by: Jeff Moyer Signed-off-by: Thomas Gleixner Reviewed-by: David Hildenbrand Reviewed-by: Dan Williams Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/x49o8a3pu5i.fsf@segfault.boston.devel.redhat.com --- arch/x86/mm/pat/memtype.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 3112ca7786ed..4ba2a3ee4bce 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -583,7 +583,12 @@ int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_type, int err = 0; start = sanitize_phys(start); - end = sanitize_phys(end); + + /* + * The end address passed into this function is exclusive, but + * sanitize_phys() expects an inclusive address. + */ + end = sanitize_phys(end - 1) + 1; if (start >= end) { WARN(1, "%s failed: [mem %#010Lx-%#010Lx], req %s\n", __func__, start, end - 1, cattr_name(req_type)); From d7109fe3a0991a0f7b4ac099b78c908e3b619787 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 26 Aug 2021 18:03:17 +0300 Subject: [PATCH 004/543] x86/platform: Increase maximum GPIO number for X86_64 By default the 512 GPIOs is the maximum on any x86 platform. With, for example, Intel Tiger Lake-H the SoC based controller occupies up to 480 pins. This leaves only 32 available for GPIO expanders or other drivers, like PMIC. Hence, bump the maximum GPIO number to 1024 for X86_64 and leave 512 for X86_32. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Reviewed-by: Linus Walleij Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20210826150317.29435-1-andriy.shevchenko@linux.intel.com --- arch/x86/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 421fa9e38c60..10163887c5eb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -340,6 +340,11 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK config ARCH_HIBERNATION_POSSIBLE def_bool y +config ARCH_NR_GPIO + int + default 1024 if X86_64 + default 512 + config ARCH_SUSPEND_POSSIBLE def_bool y From 0ddc5e55e6f1da1286fb2646f4248bf7da31a601 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 3 Sep 2021 09:29:07 +0100 Subject: [PATCH 005/543] Documentation: Fix irq-domain.rst build warning Correctly escape the * not to be used as emphasis. Also take this opportunity to clarify the fate of the rest of the legacy APIs. Reported-by: Stephen Rothwell Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210903085343.923036-1-maz@kernel.org --- Documentation/core-api/irq/irq-domain.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/core-api/irq/irq-domain.rst b/Documentation/core-api/irq/irq-domain.rst index 6979b4af2c1f..9c0e8758037a 100644 --- a/Documentation/core-api/irq/irq-domain.rst +++ b/Documentation/core-api/irq/irq-domain.rst @@ -175,9 +175,10 @@ for IRQ numbers that are passed to struct device registrations. In that case the Linux IRQ numbers cannot be dynamically assigned and the legacy mapping should be used. -As the name implies, the *_legacy() functions are deprecated and only +As the name implies, the \*_legacy() functions are deprecated and only exist to ease the support of ancient platforms. No new users should be -added. +added. Same goes for the \*_simple() functions when their use results +in the legacy behaviour. The legacy map assumes a contiguous range of IRQ numbers has already been allocated for the controller and that the IRQ number can be From f1940d4e9cbe6208e7e77e433c587af108152a17 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 31 Aug 2021 16:39:16 +0200 Subject: [PATCH 006/543] Drivers: hv: vmbus: Fix kernel crash upon unbinding a device from uio_hv_generic driver The following crash happens when a never-used device is unbound from uio_hv_generic driver: kernel BUG at mm/slub.c:321! invalid opcode: 0000 [#1] SMP PTI CPU: 0 PID: 4001 Comm: bash Kdump: loaded Tainted: G X --------- --- 5.14.0-0.rc2.23.el9.x86_64 #1 Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090008 12/07/2018 RIP: 0010:__slab_free+0x1d5/0x3d0 ... Call Trace: ? pick_next_task_fair+0x18e/0x3b0 ? __cond_resched+0x16/0x40 ? vunmap_pmd_range.isra.0+0x154/0x1c0 ? __vunmap+0x22d/0x290 ? hv_ringbuffer_cleanup+0x36/0x40 [hv_vmbus] kfree+0x331/0x380 ? hv_uio_remove+0x43/0x60 [uio_hv_generic] hv_ringbuffer_cleanup+0x36/0x40 [hv_vmbus] vmbus_free_ring+0x21/0x60 [hv_vmbus] hv_uio_remove+0x4f/0x60 [uio_hv_generic] vmbus_remove+0x23/0x30 [hv_vmbus] __device_release_driver+0x17a/0x230 device_driver_detach+0x3c/0xa0 unbind_store+0x113/0x130 ... The problem appears to be that we free 'ring_info->pkt_buffer' twice: first, when the device is unbound from in-kernel driver (netvsc in this case) and second from hv_uio_remove(). Normally, ring buffer is supposed to be re-initialized from hv_uio_open() but this happens when UIO device is being opened and this is not guaranteed to happen. Generally, it is OK to call hv_ringbuffer_cleanup() twice for the same channel (which is being handed over between in-kernel drivers and UIO) even if we didn't call hv_ringbuffer_init() in between. We, however, need to avoid kfree() call for an already freed pointer. Fixes: adae1e931acd ("Drivers: hv: vmbus: Copy packets sent by Hyper-V out of the ring buffer") Signed-off-by: Vitaly Kuznetsov Reviewed-by: Andrea Parri Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210831143916.144983-1-vkuznets@redhat.com Signed-off-by: Wei Liu --- drivers/hv/ring_buffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 2aee356840a2..314015d9e912 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -245,6 +245,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) mutex_unlock(&ring_info->ring_buffer_mutex); kfree(ring_info->pkt_buffer); + ring_info->pkt_buffer = NULL; ring_info->pkt_buffer_size = 0; } From 5457773ef99f25fcc4b238ac76b68e28273250f4 Mon Sep 17 00:00:00 2001 From: Tobias Schramm Date: Fri, 27 Aug 2021 07:03:57 +0200 Subject: [PATCH 007/543] spi: rockchip: handle zero length transfers without timing out Previously zero length transfers submitted to the Rokchip SPI driver would time out in the SPI layer. This happens because the SPI peripheral does not trigger a transfer completion interrupt for zero length transfers. Fix that by completing zero length transfers immediately at start of transfer. Signed-off-by: Tobias Schramm Link: https://lore.kernel.org/r/20210827050357.165409-1-t.schramm@manjaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-rockchip.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 540861ca2ba3..553b6b9d0222 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -600,6 +600,12 @@ static int rockchip_spi_transfer_one( int ret; bool use_dma; + /* Zero length transfers won't trigger an interrupt on completion */ + if (!xfer->len) { + spi_finalize_current_transfer(ctlr); + return 1; + } + WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) && (readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_BUSY)); From a61cb6017df0a9be072a35259e6e9ae7aa0ef6b3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 6 Sep 2021 14:40:00 +0200 Subject: [PATCH 008/543] dma-mapping: fix the kerneldoc for dma_map_sg_attrs Add the missing description for the nents parameter, and fix a trivial misalignment. Fixes: fffe3cc8c219 ("dma-mapping: allow map_sg() ops to return negative error codes") Reported-by: Stephen Rothwell Signed-off-by: Christoph Hellwig --- kernel/dma/mapping.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 7ee5284bff58..06fec5547e7c 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -206,7 +206,8 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, /** * dma_map_sg_attrs - Map the given buffer for DMA * @dev: The device for which to perform the DMA operation - * @sg: The sg_table object describing the buffer + * @sg: The sg_table object describing the buffer + * @nents: Number of entries to map * @dir: DMA direction * @attrs: Optional DMA attributes for the map operation * From 3a029e1f3d6e2ee809e85abecce619a48016bd4b Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 6 Sep 2021 17:36:38 +0100 Subject: [PATCH 009/543] selftests/bpf: Fix build of task_pt_regs test for arm64 struct pt_regs is not exported to userspace on all archs. arm64 and s390 export "user_pt_regs" instead, which causes build failure at the moment: progs/test_task_pt_regs.c:8:16: error: variable has incomplete type 'struct pt_regs' struct pt_regs current_regs = {}; Instead of using pt_regs from ptrace.h, use the larger kernel struct from vmlinux.h directly. Since the test runner task_pt_regs.c does not have access to the kernel struct definition, copy it into a char array. Fixes: 576d47bb1a92 ("bpf: selftests: Add bpf_task_pt_regs() selftest") Suggested-by: Andrii Nakryiko Signed-off-by: Jean-Philippe Brucker Signed-off-by: Daniel Borkmann Tested-by: Ilya Leoshkevich Acked-by: Ilya Leoshkevich Link: https://lore.kernel.org/bpf/20210906163635.302307-1-jean-philippe@linaro.org --- .../selftests/bpf/prog_tests/task_pt_regs.c | 1 - .../selftests/bpf/progs/test_task_pt_regs.c | 19 +++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c index 53f0e0fa1a53..37c20b5ffa70 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c +++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE #include -#include #include "test_task_pt_regs.skel.h" void test_task_pt_regs(void) diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c index 6c059f1cfa1b..e6cb09259408 100644 --- a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c +++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c @@ -1,12 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include +#include "vmlinux.h" #include #include -struct pt_regs current_regs = {}; -struct pt_regs ctx_regs = {}; +#define PT_REGS_SIZE sizeof(struct pt_regs) + +/* + * The kernel struct pt_regs isn't exported in its entirety to userspace. + * Pass it as an array to task_pt_regs.c + */ +char current_regs[PT_REGS_SIZE] = {}; +char ctx_regs[PT_REGS_SIZE] = {}; int uprobe_res = 0; SEC("uprobe/trigger_func") @@ -17,8 +22,10 @@ int handle_uprobe(struct pt_regs *ctx) current = bpf_get_current_task_btf(); regs = (struct pt_regs *) bpf_task_pt_regs(current); - __builtin_memcpy(¤t_regs, regs, sizeof(*regs)); - __builtin_memcpy(&ctx_regs, ctx, sizeof(*ctx)); + if (bpf_probe_read_kernel(current_regs, PT_REGS_SIZE, regs)) + return 0; + if (bpf_probe_read_kernel(ctx_regs, PT_REGS_SIZE, ctx)) + return 0; /* Prove that uprobe was run */ uprobe_res = 1; From 8343268ec3cf4e097aa8b2071f0cd6779e2c4953 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 18 Aug 2021 14:19:13 +0300 Subject: [PATCH 010/543] net/mlx5: Bridge, fix uninitialized variable usage In some conditions variable 'err' is not assigned with value in mlx5_esw_bridge_port_obj_attr_set() and mlx5_esw_bridge_port_changeupper() functions after recent changes to support LAG. Initialize the variable with zero value in both cases. Reported-by: Colin King Reported-by: Tim Gardner Reported-by: Naresh Kamboju CC: linux-kernel@vger.kernel.org Fixes: ff9b7521468b ("net/mlx5: Bridge, support LAG") Signed-off-by: Vlad Buslov --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 0c38c2e319be..b5ddaa82755f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -137,7 +137,7 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr u16 vport_num, esw_owner_vhca_id; struct netlink_ext_ack *extack; int ifindex = upper->ifindex; - int err; + int err = 0; if (!netif_is_bridge_master(upper)) return 0; @@ -244,7 +244,7 @@ mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev, struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_attr_info->info); const struct switchdev_attr *attr = port_attr_info->attr; u16 vport_num, esw_owner_vhca_id; - int err; + int err = 0; if (!mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, &esw_owner_vhca_id)) From 897ae4b40e80be7dcbf2b3079d85fa6339a6b751 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 17 Aug 2021 15:59:17 +0300 Subject: [PATCH 011/543] net/mlx5: Fix rdma aux device on devlink reload RDMA auxdev parameter registration was skipped for eswitch manager PCI PF. Due to this when devlink parameter is read, it reads as false in below code flow. $ devlink dev reload pci/0000:06:00.0 devlink_reload() mlx5_load_one() mlx5_attach_device() is_ib_enabled() devlink_param_driverinit_value_get() Due to this, is_ib_enabled() returns false for the RDMA auxiliary device. This results into a skipping RDMA auxiliary device creation on reload. There is no need to check for eswitch manager capability to support RDMA auxiliary device. Hence, fix it by skipping eswitch manager capability. Fixes: 87158cedf00e ("net/mlx5: Support enable_rdma devlink dev param") Signed-off-by: Parav Pandit Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index e84287ffc7ce..dcf9f27ba2ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -658,11 +658,10 @@ static const struct devlink_param enable_rdma_param = static int mlx5_devlink_rdma_param_register(struct devlink *devlink) { - struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; int err; - if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND) || MLX5_ESWITCH_MANAGER(dev)) + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) return 0; err = devlink_param_register(devlink, &enable_rdma_param); @@ -679,9 +678,7 @@ static int mlx5_devlink_rdma_param_register(struct devlink *devlink) static void mlx5_devlink_rdma_param_unregister(struct devlink *devlink) { - struct mlx5_core_dev *dev = devlink_priv(devlink); - - if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND) || MLX5_ESWITCH_MANAGER(dev)) + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) return; devlink_param_unpublish(devlink, &enable_rdma_param); From da8252d5805d4a80120a0c2151277e5fb9e8aa9e Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 11 Aug 2021 16:40:16 +0300 Subject: [PATCH 012/543] net/mlx5: Lag, don't update lag if lag isn't supported In NICs that don't support LAG, the LAG control structure won't be allocated. If it wasn't allocated it means LAG doesn't exists and can be skipped. Fixes: cac1eb2cf2e3 ("net/mlx5: Lag, properly lock eswitch if needed") Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 49ca57c6d31d..ca5690b0a7ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -927,9 +927,12 @@ void mlx5_lag_disable_change(struct mlx5_core_dev *dev) struct mlx5_core_dev *dev1; struct mlx5_lag *ldev; + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + mlx5_dev_list_lock(); - ldev = mlx5_lag_dev(dev); dev0 = ldev->pf[MLX5_LAG_P1].dev; dev1 = ldev->pf[MLX5_LAG_P2].dev; @@ -946,8 +949,11 @@ void mlx5_lag_enable_change(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; - mlx5_dev_list_lock(); ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mlx5_dev_list_lock(); ldev->mode_changes_in_progress--; mlx5_dev_list_unlock(); mlx5_queue_bond_work(ldev, 0); From dfe6fd72b5f1878b16aa2c8603e031bbcd66b96d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 18 Aug 2021 13:09:26 -0700 Subject: [PATCH 013/543] net/mlx5: FWTrace, cancel work on alloc pd error flow Handle error flow on mlx5_core_alloc_pd() failure, read_fw_strings_work must be canceled. Fixes: c71ad41ccb0c ("net/mlx5: FW tracer, events handling") Reported-by: Pavel Machek (CIP) Suggested-by: Pavel Machek (CIP) Signed-off-by: Saeed Mahameed Reviewed-by: Aya Levin --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 3f8a98093f8c..f9cf9fb31547 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -1007,7 +1007,7 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn); if (err) { mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err); - return err; + goto err_cancel_work; } err = mlx5_fw_tracer_create_mkey(tracer); @@ -1031,6 +1031,7 @@ err_notifier_unregister: mlx5_core_destroy_mkey(dev, &tracer->buff.mkey); err_dealloc_pd: mlx5_core_dealloc_pd(dev, tracer->buff.pdn); +err_cancel_work: cancel_work_sync(&tracer->read_fw_strings_work); return err; } From ee27e330a953595903979ffdb84926843595a9fe Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 1 Sep 2021 11:48:13 +0300 Subject: [PATCH 014/543] net/mlx5: Fix potential sleeping in atomic context Fixes the below flow of sleeping in atomic context by releasing the RCU lock before calling to free_match_list. build_match_list() <- disables preempt -> free_match_list() -> tree_put_node() -> down_write_ref_node() <- take write lock Fixes: 693c6883bbc4 ("net/mlx5: Add hash table for flow groups in flow table") Reported-by: Dan Carpenter Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9fe8e3c204d6..fe501ba88bea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1682,14 +1682,13 @@ static int build_match_list(struct match_list *match_head, curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); if (!curr_match) { + rcu_read_unlock(); free_match_list(match_head, ft_locked); - err = -ENOMEM; - goto out; + return -ENOMEM; } curr_match->g = g; list_add_tail(&curr_match->list, &match_head->list); } -out: rcu_read_unlock(); return err; } From c91c1da72b47fc4c5e353cdd9099ba94ae07d2fa Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 15 Jul 2021 10:53:28 +0300 Subject: [PATCH 015/543] net/mlx5e: Fix mutual exclusion between CQE compression and HW TS Some profiles of the driver don't support a dedicated PTP-RQ, hence can't support HW TS and CQE compression simultaneously. When HW TS is enabled the COE compression is disabled, and should be restored when the HW TS is turned off. Add rx_filter as an input to modifying CQE compression to enforce this restriction. Fixes: 256f79d13c1d ("net/mlx5e: Fix HW TS with CQE compression according to profile") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 11 ++++++----- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 669a75f3537a..7b8c8187543a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -922,7 +922,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work); int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr); int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr); -int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val, bool rx_filter); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 2cfd12953909..306fb5d6a36d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1884,7 +1884,7 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) return set_pflag_cqe_based_moder(netdev, enable, true); } -int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val) +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val, bool rx_filter) { bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS); struct mlx5e_params new_params; @@ -1896,8 +1896,7 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val if (curr_val == new_val) return 0; - if (new_val && !priv->profile->rx_ptp_support && - priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) { + if (new_val && !priv->profile->rx_ptp_support && rx_filter) { netdev_err(priv->netdev, "Profile doesn't support enabling of CQE compression while hardware time-stamping is enabled.\n"); return -EINVAL; @@ -1905,7 +1904,7 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val new_params = priv->channels.params; MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); - if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) + if (rx_filter) new_params.ptp_rx = new_val; if (new_params.ptp_rx == priv->channels.params.ptp_rx) @@ -1928,12 +1927,14 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + bool rx_filter; int err; if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -EOPNOTSUPP; - err = mlx5e_modify_rx_cqe_compression_locked(priv, enable); + rx_filter = priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE; + err = mlx5e_modify_rx_cqe_compression_locked(priv, enable, rx_filter); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 47efd858964d..3fd515e7bf30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3554,14 +3554,14 @@ static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filte if (!rx_filter) /* Reset CQE compression to Admin default */ - return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def); + return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def, false); if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) return 0; /* Disable CQE compression */ netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); - err = mlx5e_modify_rx_cqe_compression_locked(priv, false); + err = mlx5e_modify_rx_cqe_compression_locked(priv, false, true); if (err) netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); From 8db6a54f3cae6a803b2cbf5390662bca641f7da8 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 29 Aug 2021 13:41:08 +0300 Subject: [PATCH 016/543] net/mlx5e: Fix condition when retrieving PTP-rqn When activating the PTP-RQ, redirect the RQT from drop-RQ to PTP-RQ. Use mlx5e_channels_get_ptp_rqn to retrieve the rqn. This helper returns a boolean (not status), hence caller should consider return value 0 as a fail. Change the caller interpretation of the return value. Fixes: 43ec0f41fa73 ("net/mlx5e: Hide all implementation details of mlx5e_rx_res") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index bf0313e2682b..13056cb9757d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -572,7 +572,7 @@ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_chann if (res->features & MLX5E_RX_RES_FEATURE_PTP) { u32 rqn; - if (mlx5e_channels_get_ptp_rqn(chs, &rqn)) + if (!mlx5e_channels_get_ptp_rqn(chs, &rqn)) rqn = res->drop_rqn; err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn); From 0f31ab217dc52a3044044d416be0248b1778c4da Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 7 Sep 2021 22:02:40 -0500 Subject: [PATCH 017/543] dt-bindings: net: sun8i-emac: Add compatible for D1 The D1 SoC contains EMAC hardware which is compatible with the A64 EMAC. Add the new compatible string, with the A64 as a fallback. Signed-off-by: Samuel Holland Acked-by: Maxime Ripard Signed-off-by: David S. Miller --- .../devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml index 7f2578d48e3f..9eb4bb529ad5 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml @@ -19,7 +19,9 @@ properties: - const: allwinner,sun8i-v3s-emac - const: allwinner,sun50i-a64-emac - items: - - const: allwinner,sun50i-h6-emac + - enum: + - allwinner,sun20i-d1-emac + - allwinner,sun50i-h6-emac - const: allwinner,sun50i-a64-emac reg: From d9ea761fdd197351890418acd462c51f241014a7 Mon Sep 17 00:00:00 2001 From: "Lin, Zhenpeng" Date: Wed, 8 Sep 2021 03:40:59 +0000 Subject: [PATCH 018/543] dccp: don't duplicate ccid when cloning dccp sock Commit 2677d2067731 ("dccp: don't free ccid2_hc_tx_sock ...") fixed a UAF but reintroduced CVE-2017-6074. When the sock is cloned, two dccps_hc_tx_ccid will reference to the same ccid. So one can free the ccid object twice from two socks after cloning. This issue was found by "Hadar Manor" as well and assigned with CVE-2020-16119, which was fixed in Ubuntu's kernel. So here I port the patch from Ubuntu to fix it. The patch prevents cloned socks from referencing the same ccid. Fixes: 2677d2067731410 ("dccp: don't free ccid2_hc_tx_sock ...") Signed-off-by: Zhenpeng Lin Signed-off-by: David S. Miller --- net/dccp/minisocks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index c5c74a34d139..91e7a2202697 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -94,6 +94,8 @@ struct sock *dccp_create_openreq_child(const struct sock *sk, newdp->dccps_role = DCCP_ROLE_SERVER; newdp->dccps_hc_rx_ackvec = NULL; newdp->dccps_service_list = NULL; + newdp->dccps_hc_rx_ccid = NULL; + newdp->dccps_hc_tx_ccid = NULL; newdp->dccps_service = dreq->dreq_service; newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo; newdp->dccps_timestamp_time = dreq->dreq_timestamp_time; From 581edcd0c8a076eba2ec9e20db50921ee80f5cbc Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 8 Sep 2021 12:13:10 +0800 Subject: [PATCH 019/543] mctp: perform route destruction under RCU read lock The kernel test robot reports: [ 843.509974][ T345] ============================= [ 843.524220][ T345] WARNING: suspicious RCU usage [ 843.538791][ T345] 5.14.0-rc2-00606-g889b7da23abf #1 Not tainted [ 843.553617][ T345] ----------------------------- [ 843.567412][ T345] net/mctp/route.c:310 RCU-list traversed in non-reader section!! - we're missing the rcu read lock acquire around the destruction path. This change adds the acquire/release - the path is already atomic, and we're using the _rcu list iterators. Reported-by: kernel test robot Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- net/mctp/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mctp/route.c b/net/mctp/route.c index 5265525011ad..5ca186d53cb0 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -1083,8 +1083,10 @@ static void __net_exit mctp_routes_net_exit(struct net *net) { struct mctp_route *rt; + rcu_read_lock(); list_for_each_entry_rcu(rt, &net->mctp.routes, list) mctp_route_release(rt); + rcu_read_unlock(); } static struct pernet_operations mctp_net_ops = { From d437f5aa23aa2b7bd07cd44b839d7546cc17166f Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 7 Sep 2021 22:07:03 -0700 Subject: [PATCH 020/543] ibmvnic: check failover_pending in login response If a failover occurs before a login response is received, the login response buffer maybe undefined. Check that there was no failover before accessing the login response buffer. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index a775c69e4fd7..6aa6ff89a765 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -4700,6 +4700,14 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, return 0; } + if (adapter->failover_pending) { + adapter->init_done_rc = -EAGAIN; + netdev_dbg(netdev, "Failover pending, ignoring login response\n"); + complete(&adapter->init_done); + /* login response buffer will be released on reset */ + return 0; + } + netdev->mtu = adapter->req_mtu - ETH_HLEN; netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); From d7e203ffd3ba5965e88952e7364a42ab32064408 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Sep 2021 15:46:10 +0200 Subject: [PATCH 021/543] ne2000: fix unused function warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Geert noticed a warning on MIPS TX49xx, Atari and presuambly other platforms when the driver is built-in but NETDEV_LEGACY_INIT is disabled: drivers/net/ethernet/8390/ne.c:909:20: warning: ‘ne_add_devices’ defined but not used [-Wunused-function] Merge the two module init functions into a single one with an IS_ENABLED() check to replace the incorrect #ifdef. Fixes: 4228c3942821 ("make legacy ISA probe optional") Reported-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/ne.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c index 53660bc8d6ff..9afc712f5948 100644 --- a/drivers/net/ethernet/8390/ne.c +++ b/drivers/net/ethernet/8390/ne.c @@ -922,13 +922,16 @@ static void __init ne_add_devices(void) } } -#ifdef MODULE static int __init ne_init(void) { int retval; - ne_add_devices(); + + if (IS_MODULE(CONFIG_NE2000)) + ne_add_devices(); + retval = platform_driver_probe(&ne_driver, ne_drv_probe); - if (retval) { + + if (IS_MODULE(CONFIG_NE2000) && retval) { if (io[0] == 0) pr_notice("ne.c: You must supply \"io=0xNNN\"" " value(s) for ISA cards.\n"); @@ -941,18 +944,8 @@ static int __init ne_init(void) return retval; } module_init(ne_init); -#else /* MODULE */ -static int __init ne_init(void) -{ - int retval = platform_driver_probe(&ne_driver, ne_drv_probe); - /* Unregister unused platform_devices. */ - ne_loop_rm_unreg(0); - return retval; -} -module_init(ne_init); - -#ifdef CONFIG_NETDEV_LEGACY_INIT +#if !defined(MODULE) && defined(CONFIG_NETDEV_LEGACY_INIT) struct net_device * __init ne_probe(int unit) { int this_dev; @@ -994,7 +987,6 @@ struct net_device * __init ne_probe(int unit) return ERR_PTR(-ENODEV); } #endif -#endif /* MODULE */ static void __exit ne_exit(void) { From ea269a6f720782ed94171fb962b14ce07c372138 Mon Sep 17 00:00:00 2001 From: Nathan Rossi Date: Thu, 2 Sep 2021 05:14:49 +0000 Subject: [PATCH 022/543] net: phylink: Update SFP selected interface on advertising changes Currently changes to the advertising state via ethtool do not cause any reselection of the configured interface mode after the SFP is already inserted and initially configured. While it is not typical to change the advertised link modes for an interface using an SFP in certain use cases it is desirable. In the case of a SFP port that is capable of handling both SFP and SFP+ modules it will automatically select between 1G and 10G modes depending on the supported mode of the SFP. However if the SFP module is capable of working in multiple modes (e.g. a SFP+ DAC that can operate at 1G or 10G), one end of the cable may be attached to a SFP 1000base-x port thus the SFP+ end must be manually configured to the 1000base-x mode in order for the link to be established. This change causes the ethtool setting of advertised mode changes to reselect the interface mode so that the link can be established. Additionally when a module is inserted the advertising mode is reset to match the supported modes of the module. Signed-off-by: Nathan Rossi Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index a1464b764d4d..0a0abe8e4be0 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1607,6 +1607,32 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, if (config.an_enabled && phylink_is_empty_linkmode(config.advertising)) return -EINVAL; + /* If this link is with an SFP, ensure that changes to advertised modes + * also cause the associated interface to be selected such that the + * link can be configured correctly. + */ + if (pl->sfp_port && pl->sfp_bus) { + config.interface = sfp_select_interface(pl->sfp_bus, + config.advertising); + if (config.interface == PHY_INTERFACE_MODE_NA) { + phylink_err(pl, + "selection of interface failed, advertisement %*pb\n", + __ETHTOOL_LINK_MODE_MASK_NBITS, + config.advertising); + return -EINVAL; + } + + /* Revalidate with the selected interface */ + linkmode_copy(support, pl->supported); + if (phylink_validate(pl, support, &config)) { + phylink_err(pl, "validation of %s/%s with support %*pb failed\n", + phylink_an_mode_str(pl->cur_link_an_mode), + phy_modes(config.interface), + __ETHTOOL_LINK_MODE_MASK_NBITS, support); + return -EINVAL; + } + } + mutex_lock(&pl->state_mutex); pl->link_config.speed = config.speed; pl->link_config.duplex = config.duplex; @@ -2186,7 +2212,9 @@ static int phylink_sfp_config(struct phylink *pl, u8 mode, if (phy_interface_mode_is_8023z(iface) && pl->phydev) return -EINVAL; - changed = !linkmode_equal(pl->supported, support); + changed = !linkmode_equal(pl->supported, support) || + !linkmode_equal(pl->link_config.advertising, + config.advertising); if (changed) { linkmode_copy(pl->supported, support); linkmode_copy(pl->link_config.advertising, config.advertising); From b5c102238cea985d8126b173d06b9e1de88037ee Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 7 Sep 2021 12:05:54 -0500 Subject: [PATCH 023/543] net: ipa: initialize all filter table slots There is an off-by-one problem in ipa_table_init_add(), when initializing filter tables. In that function, the number of filter table entries is determined based on the number of set bits in the filter map. However that count does *not* include the extra "slot" in the filter table that holds the filter map itself. Meanwhile, ipa_table_addr() *does* include the filter map in the memory it returns, but because the count it's provided doesn't include it, it includes one too few table entries. Fix this by including the extra slot for the filter map in the count computed in ipa_table_init_add(). Note: ipa_filter_reset_table() does not have this problem; it resets filter table entries one by one, but does not overwrite the filter bitmap. Fixes: 2b9feef2b6c2 ("soc: qcom: ipa: filter and routing tables") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c index 2324e1b93e37..1da334f54944 100644 --- a/drivers/net/ipa/ipa_table.c +++ b/drivers/net/ipa/ipa_table.c @@ -430,7 +430,8 @@ static void ipa_table_init_add(struct gsi_trans *trans, bool filter, * table region determines the number of entries it has. */ if (filter) { - count = hweight32(ipa->filter_map); + /* Include one extra "slot" to hold the filter map itself */ + count = 1 + hweight32(ipa->filter_map); hash_count = hash_mem->size ? count : 0; } else { count = mem->size / sizeof(__le64); From 276aae377206d60b9b7b7df4586cd9f2a813f5d0 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Wed, 8 Sep 2021 15:43:35 +0800 Subject: [PATCH 024/543] net: stmmac: fix system hang caused by eee_ctrl_timer during suspend/resume commit 5f58591323bf ("net: stmmac: delete the eee_ctrl_timer after napi disabled"), this patch tries to fix system hang caused by eee_ctrl_timer, unfortunately, it only can resolve it for system reboot stress test. System hang also can be reproduced easily during system suspend/resume stess test when mount NFS on i.MX8MP EVK board. In stmmac driver, eee feature is combined to phylink framework. When do system suspend, phylink_stop() would queue delayed work, it invokes stmmac_mac_link_down(), where to deactivate eee_ctrl_timer synchronizly. In above commit, try to fix issue by deactivating eee_ctrl_timer obviously, but it is not enough. Looking into eee_ctrl_timer expire callback stmmac_eee_ctrl_timer(), it could enable hareware eee mode again. What is unexpected is that LPI interrupt (MAC_Interrupt_Enable.LPIEN bit) is always asserted. This interrupt has chance to be issued when LPI state entry/exit from the MAC, and at that time, clock could have been already disabled. The result is that system hang when driver try to touch register from interrupt handler. The reason why above commit can fix system hang issue in stmmac_release() is that, deactivate eee_ctrl_timer not just after napi disabled, further after irq freed. In conclusion, hardware would generate LPI interrupt when clock has been disabled during suspend or resume, since hardware is in eee mode and LPI interrupt enabled. Interrupts from MAC, MTL and DMA level are enabled and never been disabled when system suspend, so postpone clocks management from suspend stage to noirq suspend stage should be more safe. Fixes: 5f58591323bf ("net: stmmac: delete the eee_ctrl_timer after napi disabled") Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ------ .../ethernet/stmicro/stmmac/stmmac_platform.c | 44 +++++++++++++++++++ 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ece02b35a6ce..246f84fedbc8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7118,7 +7118,6 @@ int stmmac_suspend(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); u32 chan; - int ret; if (!ndev || !netif_running(ndev)) return 0; @@ -7150,13 +7149,6 @@ int stmmac_suspend(struct device *dev) } else { stmmac_mac_set(priv, priv->ioaddr, false); pinctrl_pm_select_sleep_state(priv->device); - /* Disable clock in case of PWM is off */ - clk_disable_unprepare(priv->plat->clk_ptp_ref); - ret = pm_runtime_force_suspend(dev); - if (ret) { - mutex_unlock(&priv->lock); - return ret; - } } mutex_unlock(&priv->lock); @@ -7242,12 +7234,6 @@ int stmmac_resume(struct device *dev) priv->irq_wake = 0; } else { pinctrl_pm_select_default_state(priv->device); - /* enable the clk previously disabled */ - ret = pm_runtime_force_resume(dev); - if (ret) - return ret; - if (priv->plat->clk_ptp_ref) - clk_prepare_enable(priv->plat->clk_ptp_ref); /* reset the phy so that it's ready */ if (priv->mii) stmmac_mdio_reset(priv->mii); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 5ca710844cc1..4885f9ad1b1e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -9,6 +9,7 @@ *******************************************************************************/ #include +#include #include #include #include @@ -771,9 +772,52 @@ static int __maybe_unused stmmac_runtime_resume(struct device *dev) return stmmac_bus_clks_config(priv, true); } +static int stmmac_pltfr_noirq_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + int ret; + + if (!netif_running(ndev)) + return 0; + + if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { + /* Disable clock in case of PWM is off */ + clk_disable_unprepare(priv->plat->clk_ptp_ref); + + ret = pm_runtime_force_suspend(dev); + if (ret) + return ret; + } + + return 0; +} + +static int stmmac_pltfr_noirq_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + int ret; + + if (!netif_running(ndev)) + return 0; + + if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { + /* enable the clk previously disabled */ + ret = pm_runtime_force_resume(dev); + if (ret) + return ret; + + clk_prepare_enable(priv->plat->clk_ptp_ref); + } + + return 0; +} + const struct dev_pm_ops stmmac_pltfr_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_suspend, stmmac_pltfr_resume) SET_RUNTIME_PM_OPS(stmmac_runtime_suspend, stmmac_runtime_resume, NULL) + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_noirq_suspend, stmmac_pltfr_noirq_resume) }; EXPORT_SYMBOL_GPL(stmmac_pltfr_pm_ops); From 34b1999da935a33be6239226bfa6cd4f704c5c88 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 19 Aug 2021 16:27:17 +0300 Subject: [PATCH 025/543] x86/mm: Fix kern_addr_valid() to cope with existing but not present entries Jiri Olsa reported a fault when running: # cat /proc/kallsyms | grep ksys_read ffffffff8136d580 T ksys_read # objdump -d --start-address=0xffffffff8136d580 --stop-address=0xffffffff8136d590 /proc/kcore /proc/kcore: file format elf64-x86-64 Segmentation fault general protection fault, probably for non-canonical address 0xf887ffcbff000: 0000 [#1] SMP PTI CPU: 12 PID: 1079 Comm: objdump Not tainted 5.14.0-rc5qemu+ #508 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-4.fc34 04/01/2014 RIP: 0010:kern_addr_valid Call Trace: read_kcore ? rcu_read_lock_sched_held ? rcu_read_lock_sched_held ? rcu_read_lock_sched_held ? trace_hardirqs_on ? rcu_read_lock_sched_held ? lock_acquire ? lock_acquire ? rcu_read_lock_sched_held ? lock_acquire ? rcu_read_lock_sched_held ? rcu_read_lock_sched_held ? rcu_read_lock_sched_held ? lock_release ? _raw_spin_unlock ? __handle_mm_fault ? rcu_read_lock_sched_held ? lock_acquire ? rcu_read_lock_sched_held ? lock_release proc_reg_read ? vfs_read vfs_read ksys_read do_syscall_64 entry_SYSCALL_64_after_hwframe The fault happens because kern_addr_valid() dereferences existent but not present PMD in the high kernel mappings. Such PMDs are created when free_kernel_image_pages() frees regions larger than 2Mb. In this case, a part of the freed memory is mapped with PMDs and the set_memory_np_noalias() -> ... -> __change_page_attr() sequence will mark the PMD as not present rather than wipe it completely. Have kern_addr_valid() check whether higher level page table entries are present before trying to dereference them to fix this issue and to avoid similar issues in the future. Stable backporting note: ------------------------ Note that the stable marking is for all active stable branches because there could be cases where pagetable entries exist but are not valid - see 9a14aefc1d28 ("x86: cpa, fix lookup_address"), for example. So make sure to be on the safe side here and use pXY_present() accessors rather than pXY_none() which could #GP when accessing pages in the direct map. Also see: c40a56a7818c ("x86/mm/init: Remove freed kernel image areas from alias mapping") for more info. Reported-by: Jiri Olsa Signed-off-by: Mike Rapoport Signed-off-by: Borislav Petkov Reviewed-by: David Hildenbrand Acked-by: Dave Hansen Tested-by: Jiri Olsa Cc: # 4.4+ Link: https://lkml.kernel.org/r/20210819132717.19358-1-rppt@kernel.org --- arch/x86/mm/init_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ddeaba947eb3..879886c6cc53 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1433,18 +1433,18 @@ int kern_addr_valid(unsigned long addr) return 0; p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) + if (!p4d_present(*p4d)) return 0; pud = pud_offset(p4d, addr); - if (pud_none(*pud)) + if (!pud_present(*pud)) return 0; if (pud_large(*pud)) return pfn_valid(pud_pfn(*pud)); pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + if (!pmd_present(*pmd)) return 0; if (pmd_large(*pmd)) From 3c4cea8fa7f71f00c5279547043a84bc2a4d8b8c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 8 Sep 2021 13:42:09 +0200 Subject: [PATCH 026/543] vhost_net: fix OoB on sendmsg() failure. If the sendmsg() call in vhost_tx_batch() fails, both the 'batched_xdp' and 'done_idx' indexes are left unchanged. If such failure happens when batched_xdp == VHOST_NET_BATCH, the next call to vhost_net_build_xdp() will access and write memory outside the xdp buffers area. Since sendmsg() can only error with EBADFD, this change addresses the issue explicitly freeing the XDP buffers batch on error. Fixes: 0a0be13b8fe2 ("vhost_net: batch submitting XDP buffers to underlayer sockets") Suggested-by: Jason Wang Signed-off-by: Paolo Abeni Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/vhost/net.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 3a249ee7e144..28ef323882fb 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -467,7 +467,7 @@ static void vhost_tx_batch(struct vhost_net *net, .num = nvq->batched_xdp, .ptr = nvq->xdp, }; - int err; + int i, err; if (nvq->batched_xdp == 0) goto signal_used; @@ -476,6 +476,15 @@ static void vhost_tx_batch(struct vhost_net *net, err = sock->ops->sendmsg(sock, msghdr, 0); if (unlikely(err < 0)) { vq_err(&nvq->vq, "Fail to batch sending packets\n"); + + /* free pages owned by XDP; since this is an unlikely error path, + * keep it simple and avoid more complex bulk update for the + * used pages + */ + for (i = 0; i < nvq->batched_xdp; ++i) + put_page(virt_to_head_page(nvq->xdp[i].data)); + nvq->batched_xdp = 0; + nvq->done_idx = 0; return; } From 273c29e944bda9a20a30c26cfc34c9a3f363280b Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 8 Sep 2021 09:58:20 -0700 Subject: [PATCH 027/543] ibmvnic: check failover_pending in login response If a failover occurs before a login response is received, the login response buffer maybe undefined. Check that there was no failover before accessing the login response buffer. Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 6aa6ff89a765..a4579b340120 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -4708,6 +4708,14 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, return 0; } + if (adapter->failover_pending) { + adapter->init_done_rc = -EAGAIN; + netdev_dbg(netdev, "Failover pending, ignoring login response\n"); + complete(&adapter->init_done); + /* login response buffer will be released on reset */ + return 0; + } + netdev->mtu = adapter->req_mtu - ETH_HLEN; netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); From d82d5303c4c539db86588ffb5dc5b26c3f1513e8 Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Wed, 8 Sep 2021 12:02:32 -0700 Subject: [PATCH 028/543] net: macb: fix use after free on rmmod plat_dev->dev->platform_data is released by platform_device_unregister(), use of pclk and hclk is a use-after-free. Since device unregister won't need a clk device we adjust the function call sequence to fix this issue. [ 31.261225] BUG: KASAN: use-after-free in macb_remove+0x77/0xc6 [macb_pci] [ 31.275563] Freed by task 306: [ 30.276782] platform_device_release+0x25/0x80 Suggested-by: Nicolas Ferre Signed-off-by: Tong Zhang Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c index 8b7b59908a1a..f66d22de5168 100644 --- a/drivers/net/ethernet/cadence/macb_pci.c +++ b/drivers/net/ethernet/cadence/macb_pci.c @@ -111,9 +111,9 @@ static void macb_remove(struct pci_dev *pdev) struct platform_device *plat_dev = pci_get_drvdata(pdev); struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev); - platform_device_unregister(plat_dev); clk_unregister(plat_data->pclk); clk_unregister(plat_data->hclk); + platform_device_unregister(plat_dev); } static const struct pci_device_id dev_id_table[] = { From 04f08eb44b5011493d77b602fdec29ff0f5c6cd5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Sep 2021 17:00:29 -0700 Subject: [PATCH 029/543] net/af_unix: fix a data-race in unix_dgram_poll syzbot reported another data-race in af_unix [1] Lets change __skb_insert() to use WRITE_ONCE() when changing skb head qlen. Also, change unix_dgram_poll() to use lockless version of unix_recvq_full() It is verry possible we can switch all/most unix_recvq_full() to the lockless version, this will be done in a future kernel version. [1] HEAD commit: 8596e589b787732c8346f0482919e83cc9362db1 BUG: KCSAN: data-race in skb_queue_tail / unix_dgram_poll write to 0xffff88814eeb24e0 of 4 bytes by task 25815 on cpu 0: __skb_insert include/linux/skbuff.h:1938 [inline] __skb_queue_before include/linux/skbuff.h:2043 [inline] __skb_queue_tail include/linux/skbuff.h:2076 [inline] skb_queue_tail+0x80/0xa0 net/core/skbuff.c:3264 unix_dgram_sendmsg+0xff2/0x1600 net/unix/af_unix.c:1850 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg net/socket.c:723 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2392 ___sys_sendmsg net/socket.c:2446 [inline] __sys_sendmmsg+0x315/0x4b0 net/socket.c:2532 __do_sys_sendmmsg net/socket.c:2561 [inline] __se_sys_sendmmsg net/socket.c:2558 [inline] __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2558 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff88814eeb24e0 of 4 bytes by task 25834 on cpu 1: skb_queue_len include/linux/skbuff.h:1869 [inline] unix_recvq_full net/unix/af_unix.c:194 [inline] unix_dgram_poll+0x2bc/0x3e0 net/unix/af_unix.c:2777 sock_poll+0x23e/0x260 net/socket.c:1288 vfs_poll include/linux/poll.h:90 [inline] ep_item_poll fs/eventpoll.c:846 [inline] ep_send_events fs/eventpoll.c:1683 [inline] ep_poll fs/eventpoll.c:1798 [inline] do_epoll_wait+0x6ad/0xf00 fs/eventpoll.c:2226 __do_sys_epoll_wait fs/eventpoll.c:2238 [inline] __se_sys_epoll_wait fs/eventpoll.c:2233 [inline] __x64_sys_epoll_wait+0xf6/0x120 fs/eventpoll.c:2233 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x0000001b -> 0x00000001 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 25834 Comm: syz-executor.1 Tainted: G W 5.14.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 86b18aaa2b5b ("skbuff: fix a data race in skb_queue_len()") Cc: Qian Cai Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- net/unix/af_unix.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6bdb0db3e825..841e2f0f5240 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1940,7 +1940,7 @@ static inline void __skb_insert(struct sk_buff *newsk, WRITE_ONCE(newsk->prev, prev); WRITE_ONCE(next->prev, newsk); WRITE_ONCE(prev->next, newsk); - list->qlen++; + WRITE_ONCE(list->qlen, list->qlen + 1); } static inline void __skb_queue_splice(const struct sk_buff_head *list, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index eb47b9de2380..92345c9bb60c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3073,7 +3073,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, other = unix_peer(sk); if (other && unix_peer(other) != sk && - unix_recvq_full(other) && + unix_recvq_full_lockless(other) && unix_dgram_peer_wake_me(sk, other)) writable = 0; From 9b6ff7eb666415e1558f1ba8a742f5db6a9954de Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Thu, 9 Sep 2021 12:32:00 +0800 Subject: [PATCH 030/543] net/l2tp: Fix reference count leak in l2tp_udp_recv_core The reference count leak issue may take place in an error handling path. If both conditions of tunnel->version == L2TP_HDR_VER_3 and the return value of l2tp_v3_ensure_opt_in_linear is nonzero, the function would directly jump to label invalid, without decrementing the reference count of the l2tp_session object session increased earlier by l2tp_tunnel_get_session(). This may result in refcount leaks. Fix this issue by decrease the reference count before jumping to the label invalid. Fixes: 4522a70db7aa ("l2tp: fix reading optional fields of L2TPv3") Signed-off-by: Xiyu Yang Signed-off-by: Xin Xiong Signed-off-by: Xin Tan Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 53486b162f01..93271a2632b8 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -869,8 +869,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) } if (tunnel->version == L2TP_HDR_VER_3 && - l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) { + l2tp_session_dec_refcount(session); goto invalid; + } l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); From 2a48d96fd58a666ae231c3dd6fe4a458798ac645 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 9 Sep 2021 17:23:22 +0800 Subject: [PATCH 031/543] net: stmmac: platform: fix build warning when with !CONFIG_PM_SLEEP Use __maybe_unused for noirq_suspend()/noirq_resume() hooks to avoid build warning with !CONFIG_PM_SLEEP: >> drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c:796:12: error: 'stmmac_pltfr_noirq_resume' defined but not used [-Werror=unused-function] 796 | static int stmmac_pltfr_noirq_resume(struct device *dev) | ^~~~~~~~~~~~~~~~~~~~~~~~~ >> drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c:775:12: error: 'stmmac_pltfr_noirq_suspend' defined but not used [-Werror=unused-function] 775 | static int stmmac_pltfr_noirq_suspend(struct device *dev) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Fixes: 276aae377206 ("net: stmmac: fix system hang caused by eee_ctrl_timer during suspend/resume") Reported-by: kernel test robot Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 4885f9ad1b1e..62cec9bfcd33 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -772,7 +772,7 @@ static int __maybe_unused stmmac_runtime_resume(struct device *dev) return stmmac_bus_clks_config(priv, true); } -static int stmmac_pltfr_noirq_suspend(struct device *dev) +static int __maybe_unused stmmac_pltfr_noirq_suspend(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); @@ -793,7 +793,7 @@ static int stmmac_pltfr_noirq_suspend(struct device *dev) return 0; } -static int stmmac_pltfr_noirq_resume(struct device *dev) +static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); From 415446185b939dcdcd6a483e705c805ab961e54c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Thu, 9 Sep 2021 11:28:45 +0200 Subject: [PATCH 032/543] sfc: fallback for lack of xdp tx queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If there are not enough resources to allocate one TX queue per core for XDP TX it was completely disabled. This patch implements a fallback solution for sharing the available queues using __netif_tx_lock for synchronization. In the normal case that there is one TX queue per CPU, no locking is done, as it was before. With this fallback solution, XDP TX will work in much more cases that were failing, specially in machines with many CPUs. It's hard for XDP users to know what features are supported across different NICs and configurations, so they will benefit on having wider support. Signed-off-by: Íñigo Huguet Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx_channels.c | 54 ++++++++++++++++++++----- drivers/net/ethernet/sfc/net_driver.h | 8 ++++ drivers/net/ethernet/sfc/tx.c | 21 ++++++---- 3 files changed, 64 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index e5b0d795c301..e7849f1260a1 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -166,32 +166,46 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, * We need a channel per event queue, plus a VI per tx queue. * This may be more pessimistic than it needs to be. */ - if (n_channels + n_xdp_ev > max_channels) { + if (n_channels >= max_channels) { + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_DISABLED; netif_err(efx, drv, efx->net_dev, "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", n_xdp_ev, n_channels, max_channels); netif_err(efx, drv, efx->net_dev, - "XDP_TX and XDP_REDIRECT will not work on this interface"); - efx->n_xdp_channels = 0; - efx->xdp_tx_per_channel = 0; - efx->xdp_tx_queue_count = 0; + "XDP_TX and XDP_REDIRECT will not work on this interface\n"); } else if (n_channels + n_xdp_tx > efx->max_vis) { + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_DISABLED; netif_err(efx, drv, efx->net_dev, "Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n", n_xdp_tx, n_channels, efx->max_vis); netif_err(efx, drv, efx->net_dev, - "XDP_TX and XDP_REDIRECT will not work on this interface"); - efx->n_xdp_channels = 0; - efx->xdp_tx_per_channel = 0; - efx->xdp_tx_queue_count = 0; + "XDP_TX and XDP_REDIRECT will not work on this interface\n"); + } else if (n_channels + n_xdp_ev > max_channels) { + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_SHARED; + netif_warn(efx, drv, efx->net_dev, + "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", + n_xdp_ev, n_channels, max_channels); + + n_xdp_ev = max_channels - n_channels; + netif_warn(efx, drv, efx->net_dev, + "XDP_TX and XDP_REDIRECT will work with reduced performance (%d cpus/tx_queue)\n", + DIV_ROUND_UP(n_xdp_tx, tx_per_ev * n_xdp_ev)); } else { + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_DEDICATED; + } + + if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DISABLED) { efx->n_xdp_channels = n_xdp_ev; efx->xdp_tx_per_channel = tx_per_ev; efx->xdp_tx_queue_count = n_xdp_tx; n_channels += n_xdp_ev; netif_dbg(efx, drv, efx->net_dev, "Allocating %d TX and %d event queues for XDP\n", - n_xdp_tx, n_xdp_ev); + n_xdp_ev * tx_per_ev, n_xdp_ev); + } else { + efx->n_xdp_channels = 0; + efx->xdp_tx_per_channel = 0; + efx->xdp_tx_queue_count = 0; } if (vec_count < n_channels) { @@ -921,7 +935,25 @@ int efx_set_channels(struct efx_nic *efx) } } } - WARN_ON(xdp_queue_number != efx->xdp_tx_queue_count); + WARN_ON(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DEDICATED && + xdp_queue_number != efx->xdp_tx_queue_count); + WARN_ON(efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED && + xdp_queue_number > efx->xdp_tx_queue_count); + + /* If we have less XDP TX queues than CPUs, assign the already existing + * queues to the exceeding CPUs (this means that we will have to use + * locking when transmitting with XDP) + */ + next_queue = 0; + while (xdp_queue_number < efx->xdp_tx_queue_count) { + tx_queue = efx->xdp_tx_queues[next_queue++]; + channel = tx_queue->channel; + netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n", + channel->channel, tx_queue->label, + xdp_queue_number, tx_queue->queue); + + efx->xdp_tx_queues[xdp_queue_number++] = tx_queue; + } rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); if (rc) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 9b4b25704271..e731c766f709 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -782,6 +782,12 @@ struct efx_async_filter_insertion { #define EFX_RPS_MAX_IN_FLIGHT 8 #endif /* CONFIG_RFS_ACCEL */ +enum efx_xdp_tx_queues_mode { + EFX_XDP_TX_QUEUES_DEDICATED, /* one queue per core, locking not needed */ + EFX_XDP_TX_QUEUES_SHARED, /* each queue used by more than 1 core */ + EFX_XDP_TX_QUEUES_DISABLED /* xdp tx not available */ +}; + /** * struct efx_nic - an Efx NIC * @name: Device name (net device name or bus id before net device registered) @@ -820,6 +826,7 @@ struct efx_async_filter_insertion { * should be allocated for this NIC * @xdp_tx_queue_count: Number of entries in %xdp_tx_queues. * @xdp_tx_queues: Array of pointers to tx queues used for XDP transmit. + * @xdp_txq_queues_mode: XDP TX queues sharing strategy. * @rxq_entries: Size of receive queues requested by user. * @txq_entries: Size of transmit queues requested by user. * @txq_stop_thresh: TX queue fill level at or above which we stop it. @@ -979,6 +986,7 @@ struct efx_nic { unsigned int xdp_tx_queue_count; struct efx_tx_queue **xdp_tx_queues; + enum efx_xdp_tx_queues_mode xdp_txq_queues_mode; unsigned rxq_entries; unsigned txq_entries; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 0c6650d2e239..c7afd6cda902 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -430,21 +430,23 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, int cpu; int i; - cpu = raw_smp_processor_id(); + if (unlikely(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DISABLED)) + return -EINVAL; + if (unlikely(n && !xdpfs)) + return -EINVAL; + if (unlikely(!n)) + return 0; - if (!efx->xdp_tx_queue_count || - unlikely(cpu >= efx->xdp_tx_queue_count)) + cpu = raw_smp_processor_id(); + if (unlikely(cpu >= efx->xdp_tx_queue_count)) return -EINVAL; tx_queue = efx->xdp_tx_queues[cpu]; if (unlikely(!tx_queue)) return -EINVAL; - if (unlikely(n && !xdpfs)) - return -EINVAL; - - if (!n) - return 0; + if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED) + HARD_TX_LOCK(efx->net_dev, tx_queue->core_txq, cpu); /* Check for available space. We should never need multiple * descriptors per frame. @@ -484,6 +486,9 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, if (flush && i > 0) efx_nic_push_buffers(tx_queue); + if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED) + HARD_TX_UNLOCK(efx->net_dev, tx_queue->core_txq); + return i == 0 ? -EIO : i; } From 6215b608a8c4d4a478721e14a6faa0dc56e4a693 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Thu, 9 Sep 2021 11:28:46 +0200 Subject: [PATCH 033/543] sfc: last resort fallback for lack of xdp tx queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous patch addressed the situation of having some free resources for xdp tx but not enough for one tx queue per CPU. This patch address the worst case of not having resources at all for xdp tx. Instead of using queues dedicated to xdp, normal queues used by network stack are shared for both cases, using __netif_tx_lock for synchronization. Also queue stop/restart must be considered in the xdp path to avoid freezing the queue. This is not the ideal situation we might want to be, and a performance penalty is expected both for normal and xdp traffic, but at least XDP will work in all possible situations (with a warning in the logs), improving a bit the pain of not knowing in what situations we can use it and in what situations we cannot. Signed-off-by: Íñigo Huguet Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx_channels.c | 82 ++++++++++++++----------- drivers/net/ethernet/sfc/net_driver.h | 2 +- drivers/net/ethernet/sfc/tx.c | 14 ++++- 3 files changed, 58 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index e7849f1260a1..3dbea028b325 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -167,19 +167,19 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, * This may be more pessimistic than it needs to be. */ if (n_channels >= max_channels) { - efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_DISABLED; - netif_err(efx, drv, efx->net_dev, - "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", - n_xdp_ev, n_channels, max_channels); - netif_err(efx, drv, efx->net_dev, - "XDP_TX and XDP_REDIRECT will not work on this interface\n"); + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; + netif_warn(efx, drv, efx->net_dev, + "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", + n_xdp_ev, n_channels, max_channels); + netif_warn(efx, drv, efx->net_dev, + "XDP_TX and XDP_REDIRECT might decrease device's performance\n"); } else if (n_channels + n_xdp_tx > efx->max_vis) { - efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_DISABLED; - netif_err(efx, drv, efx->net_dev, - "Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n", - n_xdp_tx, n_channels, efx->max_vis); - netif_err(efx, drv, efx->net_dev, - "XDP_TX and XDP_REDIRECT will not work on this interface\n"); + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; + netif_warn(efx, drv, efx->net_dev, + "Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n", + n_xdp_tx, n_channels, efx->max_vis); + netif_warn(efx, drv, efx->net_dev, + "XDP_TX and XDP_REDIRECT might decrease device's performance\n"); } else if (n_channels + n_xdp_ev > max_channels) { efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_SHARED; netif_warn(efx, drv, efx->net_dev, @@ -194,7 +194,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_DEDICATED; } - if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DISABLED) { + if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_BORROWED) { efx->n_xdp_channels = n_xdp_ev; efx->xdp_tx_per_channel = tx_per_ev; efx->xdp_tx_queue_count = n_xdp_tx; @@ -205,7 +205,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, } else { efx->n_xdp_channels = 0; efx->xdp_tx_per_channel = 0; - efx->xdp_tx_queue_count = 0; + efx->xdp_tx_queue_count = n_xdp_tx; } if (vec_count < n_channels) { @@ -872,6 +872,20 @@ rollback: goto out; } +static inline int +efx_set_xdp_tx_queue(struct efx_nic *efx, int xdp_queue_number, + struct efx_tx_queue *tx_queue) +{ + if (xdp_queue_number >= efx->xdp_tx_queue_count) + return -EINVAL; + + netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n", + tx_queue->channel->channel, tx_queue->label, + xdp_queue_number, tx_queue->queue); + efx->xdp_tx_queues[xdp_queue_number] = tx_queue; + return 0; +} + int efx_set_channels(struct efx_nic *efx) { struct efx_tx_queue *tx_queue; @@ -910,20 +924,9 @@ int efx_set_channels(struct efx_nic *efx) if (efx_channel_is_xdp_tx(channel)) { efx_for_each_channel_tx_queue(tx_queue, channel) { tx_queue->queue = next_queue++; - - /* We may have a few left-over XDP TX - * queues owing to xdp_tx_queue_count - * not dividing evenly by EFX_MAX_TXQ_PER_CHANNEL. - * We still allocate and probe those - * TXQs, but never use them. - */ - if (xdp_queue_number < efx->xdp_tx_queue_count) { - netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n", - channel->channel, tx_queue->label, - xdp_queue_number, tx_queue->queue); - efx->xdp_tx_queues[xdp_queue_number] = tx_queue; + rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); + if (rc == 0) xdp_queue_number++; - } } } else { efx_for_each_channel_tx_queue(tx_queue, channel) { @@ -932,6 +935,17 @@ int efx_set_channels(struct efx_nic *efx) channel->channel, tx_queue->label, tx_queue->queue); } + + /* If XDP is borrowing queues from net stack, it must use the queue + * with no csum offload, which is the first one of the channel + * (note: channel->tx_queue_by_type is not initialized yet) + */ + if (efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_BORROWED) { + tx_queue = &channel->tx_queue[0]; + rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); + if (rc == 0) + xdp_queue_number++; + } } } } @@ -940,19 +954,15 @@ int efx_set_channels(struct efx_nic *efx) WARN_ON(efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED && xdp_queue_number > efx->xdp_tx_queue_count); - /* If we have less XDP TX queues than CPUs, assign the already existing - * queues to the exceeding CPUs (this means that we will have to use - * locking when transmitting with XDP) + /* If we have more CPUs than assigned XDP TX queues, assign the already + * existing queues to the exceeding CPUs */ next_queue = 0; while (xdp_queue_number < efx->xdp_tx_queue_count) { tx_queue = efx->xdp_tx_queues[next_queue++]; - channel = tx_queue->channel; - netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n", - channel->channel, tx_queue->label, - xdp_queue_number, tx_queue->queue); - - efx->xdp_tx_queues[xdp_queue_number++] = tx_queue; + rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); + if (rc == 0) + xdp_queue_number++; } rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index e731c766f709..f6981810039d 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -785,7 +785,7 @@ struct efx_async_filter_insertion { enum efx_xdp_tx_queues_mode { EFX_XDP_TX_QUEUES_DEDICATED, /* one queue per core, locking not needed */ EFX_XDP_TX_QUEUES_SHARED, /* each queue used by more than 1 core */ - EFX_XDP_TX_QUEUES_DISABLED /* xdp tx not available */ + EFX_XDP_TX_QUEUES_BORROWED /* queues borrowed from net stack */ }; /** diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index c7afd6cda902..d16e031e95f4 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -428,10 +428,8 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, unsigned int len; int space; int cpu; - int i; + int i = 0; - if (unlikely(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DISABLED)) - return -EINVAL; if (unlikely(n && !xdpfs)) return -EINVAL; if (unlikely(!n)) @@ -448,6 +446,15 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED) HARD_TX_LOCK(efx->net_dev, tx_queue->core_txq, cpu); + /* If we're borrowing net stack queues we have to handle stop-restart + * or we might block the queue and it will be considered as frozen + */ + if (efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_BORROWED) { + if (netif_tx_queue_stopped(tx_queue->core_txq)) + goto unlock; + efx_tx_maybe_stop_queue(tx_queue); + } + /* Check for available space. We should never need multiple * descriptors per frame. */ @@ -486,6 +493,7 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, if (flush && i > 0) efx_nic_push_buffers(tx_queue); +unlock: if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED) HARD_TX_UNLOCK(efx->net_dev, tx_queue->core_txq); From e011912651bdf72840d88e8a8de3716bbcc4be99 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 8 Sep 2021 21:49:53 -0700 Subject: [PATCH 034/543] net: ni65: Avoid typecast of pointer to u32 Building alpha:allmodconfig results in the following error. drivers/net/ethernet/amd/ni65.c: In function 'ni65_stop_start': drivers/net/ethernet/amd/ni65.c:751:37: error: cast from pointer to integer of different size buffer[i] = (u32) isa_bus_to_virt(tmdp->u.buffer); 'buffer[]' is declared as unsigned long, so replace the typecast to u32 with a typecast to unsigned long to fix the problem. Cc: Arnd Bergmann Signed-off-by: Guenter Roeck Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/ni65.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c index b5df7ad5a83f..032e8922b482 100644 --- a/drivers/net/ethernet/amd/ni65.c +++ b/drivers/net/ethernet/amd/ni65.c @@ -748,7 +748,7 @@ static void ni65_stop_start(struct net_device *dev,struct priv *p) #ifdef XMT_VIA_SKB skb_save[i] = p->tmd_skb[i]; #endif - buffer[i] = (u32) isa_bus_to_virt(tmdp->u.buffer); + buffer[i] = (unsigned long)isa_bus_to_virt(tmdp->u.buffer); blen[i] = tmdp->blen; tmdp->u.s.status = 0x0; } From 2bab94090b01bc593d8bc25f68df41f198721173 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 6 Sep 2021 21:53:58 -0700 Subject: [PATCH 035/543] spi: tegra20-slink: Declare runtime suspend and resume functions conditionally The following build error is seen with CONFIG_PM=n. drivers/spi/spi-tegra20-slink.c:1188:12: error: 'tegra_slink_runtime_suspend' defined but not used drivers/spi/spi-tegra20-slink.c:1200:12: error: 'tegra_slink_runtime_resume' defined but not used Declare the functions only if PM is enabled. While at it, remove the unnecessary forward declarations. Signed-off-by: Guenter Roeck Reviewed-by: Geert Uytterhoeven Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20210907045358.2138282-1-linux@roeck-us.net Signed-off-by: Mark Brown --- drivers/spi/spi-tegra20-slink.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index ebd27f883033..8ce840c7ecc8 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -204,9 +204,6 @@ struct tegra_slink_data { struct dma_async_tx_descriptor *tx_dma_desc; }; -static int tegra_slink_runtime_suspend(struct device *dev); -static int tegra_slink_runtime_resume(struct device *dev); - static inline u32 tegra_slink_readl(struct tegra_slink_data *tspi, unsigned long reg) { @@ -1185,6 +1182,7 @@ static int tegra_slink_resume(struct device *dev) } #endif +#ifdef CONFIG_PM static int tegra_slink_runtime_suspend(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); @@ -1210,6 +1208,7 @@ static int tegra_slink_runtime_resume(struct device *dev) } return 0; } +#endif /* CONFIG_PM */ static const struct dev_pm_ops slink_pm_ops = { SET_RUNTIME_PM_OPS(tegra_slink_runtime_suspend, From bfe84435090a6c85271b02a42b1d83fef9ff7cc7 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Thu, 9 Sep 2021 08:12:23 -0700 Subject: [PATCH 036/543] ice: Correctly deal with PFs that do not support RDMA There are two cases where the current PF does not support RDMA functionality. The first is if the NVM loaded on the device is set to not support RDMA (common_caps.rdma is false). The second is if the kernel bonding driver has included the current PF in an active link aggregate. When the driver has determined that this PF does not support RDMA, then auxiliary devices should not be created on the auxiliary bus. Without a device on the auxiliary bus, even if the irdma driver is present, there will be no RDMA activity attempted on this PF. Currently, in the reset flow, an attempt to create auxiliary devices is performed without regard to the ability of the PF. There needs to be a check in ice_aux_plug_dev (as the central point that creates auxiliary devices) to see if the PF is in a state to support the functionality. When disabling and re-enabling RDMA due to the inclusion/removal of the PF in a link aggregate, we also need to set/clear the bit which controls auxiliary device creation so that a reset recovery in a link aggregate situation doesn't try to create auxiliary devices when it shouldn't. Fixes: f9f5301e7e2d ("ice: Register auxiliary device to provide RDMA") Reported-by: Yongxin Liu Signed-off-by: Dave Ertman Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice.h | 2 ++ drivers/net/ethernet/intel/ice/ice_idc.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index eadcb9958346..3c4f08d20414 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -695,6 +695,7 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf) { if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix) { set_bit(ICE_FLAG_RDMA_ENA, pf->flags); + set_bit(ICE_FLAG_AUX_ENA, pf->flags); ice_plug_aux_dev(pf); } } @@ -707,5 +708,6 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf) { ice_unplug_aux_dev(pf); clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); + clear_bit(ICE_FLAG_AUX_ENA, pf->flags); } #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index 1f2afdf6cd48..adcc9a251595 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -271,6 +271,12 @@ int ice_plug_aux_dev(struct ice_pf *pf) struct auxiliary_device *adev; int ret; + /* if this PF doesn't support a technology that requires auxiliary + * devices, then gracefully exit + */ + if (!ice_is_aux_ena(pf)) + return 0; + iadev = kzalloc(sizeof(*iadev), GFP_KERNEL); if (!iadev) return -ENOMEM; From e3f0cc1a945fcefec0c7c9d9dfd028a51daa1846 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 9 Sep 2021 10:33:28 -0700 Subject: [PATCH 037/543] r6040: Restore MDIO clock frequency after MAC reset A number of users have reported that they were not able to get the PHY to successfully link up, especially after commit c36757eb9dee ("net: phy: consider AN_RESTART status when reading link status") where we stopped reading just BMSR, but we also read BMCR to determine the link status. Andrius at NetBSD did a wonderful job at debugging the problem and found out that the MDIO bus clock frequency would be incorrectly set back to its default value which would prevent the MDIO bus controller from reading PHY registers properly. Back when we only read BMSR, if we read all 1s, we could falsely indicate a link status, though in general there is a cable plugged in, so this went unnoticed. After a second read of BMCR was added, a wrong read will lead to the inability to determine a link UP condition which is when it started to be visibly broken, even if it was long before that. The fix consists in restoring the value of the MD_CSR register that was set prior to the MAC reset. Link: http://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=53494 Fixes: 90f750a81a29 ("r6040: consolidate MAC reset to its own function") Reported-by: Andrius V Reported-by: Darek Strugacz Tested-by: Darek Strugacz Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/rdc/r6040.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 4b2eca5e08e2..01ef5efd7bc2 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -119,6 +119,8 @@ #define PHY_ST 0x8A /* PHY status register */ #define MAC_SM 0xAC /* MAC status machine */ #define MAC_SM_RST 0x0002 /* MAC status machine reset */ +#define MD_CSC 0xb6 /* MDC speed control register */ +#define MD_CSC_DEFAULT 0x0030 #define MAC_ID 0xBE /* Identifier register */ #define TX_DCNT 0x80 /* TX descriptor count */ @@ -355,8 +357,9 @@ static void r6040_reset_mac(struct r6040_private *lp) { void __iomem *ioaddr = lp->base; int limit = MAC_DEF_TIMEOUT; - u16 cmd; + u16 cmd, md_csc; + md_csc = ioread16(ioaddr + MD_CSC); iowrite16(MAC_RST, ioaddr + MCR1); while (limit--) { cmd = ioread16(ioaddr + MCR1); @@ -368,6 +371,10 @@ static void r6040_reset_mac(struct r6040_private *lp) iowrite16(MAC_SM_RST, ioaddr + MAC_SM); iowrite16(0, ioaddr + MAC_SM); mdelay(5); + + /* Restore MDIO clock frequency */ + if (md_csc != MD_CSC_DEFAULT) + iowrite16(md_csc, ioaddr + MD_CSC); } static void r6040_init_mac_regs(struct net_device *dev) From dc41c4a98a76640e7085815f937eadd1f336ba85 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 9 Sep 2021 20:49:47 +0300 Subject: [PATCH 038/543] net/packet: clarify source of pr_*() messages Add pr_fmt macro to spell out the source of messages in prefix. Before this patch: packet size is too long (1543 > 1518) With this patch: af_packet: packet size is too long (1543 > 1518) Signed-off-by: Baruch Siach Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 543365f58e97..2a2bc64f75cf 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -46,6 +46,8 @@ * Copyright (C) 2011, */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include From 20e100f52730cd0db609e559799c1712b5f27582 Mon Sep 17 00:00:00 2001 From: Shai Malin Date: Fri, 10 Sep 2021 11:33:56 +0300 Subject: [PATCH 039/543] qed: Handle management FW error Handle MFW (management FW) error response in order to avoid a crash during recovery flows. Changes from v1: - Add "Fixes tag". Fixes: tag 5e7ba042fd05 ("qed: Fix reading stale configuration information") Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 6e5a6cc97d0e..24cd41567775 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -3367,6 +3367,7 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, struct qed_nvm_image_att *p_image_att) { enum nvm_image_type type; + int rc; u32 i; /* Translate image_id into MFW definitions */ @@ -3395,7 +3396,10 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, return -EINVAL; } - qed_mcp_nvm_info_populate(p_hwfn); + rc = qed_mcp_nvm_info_populate(p_hwfn); + if (rc) + return rc; + for (i = 0; i < p_hwfn->nvm_info.num_images; i++) if (type == p_hwfn->nvm_info.image_att[i].image_type) break; From 666eb96d85dcbc93aacc186a037db2e05b92b9f5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 10 Sep 2021 12:15:11 +0100 Subject: [PATCH 040/543] qlcnic: Remove redundant initialization of variable ret The variable ret is being initialized with a value that is never read, it is being updated later on. The assignment is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index 0a2f34fc8b24..27dffa299ca6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -1354,10 +1354,10 @@ static int qlcnic_83xx_copy_fw_file(struct qlcnic_adapter *adapter) struct qlc_83xx_fw_info *fw_info = adapter->ahw->fw_info; const struct firmware *fw = fw_info->fw; u32 dest, *p_cache, *temp; - int i, ret = -EIO; __le32 *temp_le; u8 data[16]; size_t size; + int i, ret; u64 addr; temp = vzalloc(fw->size); From 2f1aaf3ea666b737ad717b3d88667225aca23149 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 9 Sep 2021 08:49:59 -0700 Subject: [PATCH 041/543] bpf, mm: Fix lockdep warning triggered by stack_map_get_build_id_offset() Currently the bpf selftest "get_stack_raw_tp" triggered the warning: [ 1411.304463] WARNING: CPU: 3 PID: 140 at include/linux/mmap_lock.h:164 find_vma+0x47/0xa0 [ 1411.304469] Modules linked in: bpf_testmod(O) [last unloaded: bpf_testmod] [ 1411.304476] CPU: 3 PID: 140 Comm: systemd-journal Tainted: G W O 5.14.0+ #53 [ 1411.304479] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 1411.304481] RIP: 0010:find_vma+0x47/0xa0 [ 1411.304484] Code: de 48 89 ef e8 ba f5 fe ff 48 85 c0 74 2e 48 83 c4 08 5b 5d c3 48 8d bf 28 01 00 00 be ff ff ff ff e8 2d 9f d8 00 85 c0 75 d4 <0f> 0b 48 89 de 48 8 [ 1411.304487] RSP: 0018:ffffabd440403db8 EFLAGS: 00010246 [ 1411.304490] RAX: 0000000000000000 RBX: 00007f00ad80a0e0 RCX: 0000000000000000 [ 1411.304492] RDX: 0000000000000001 RSI: ffffffff9776b144 RDI: ffffffff977e1b0e [ 1411.304494] RBP: ffff9cf5c2f50000 R08: ffff9cf5c3eb25d8 R09: 00000000fffffffe [ 1411.304496] R10: 0000000000000001 R11: 00000000ef974e19 R12: ffff9cf5c39ae0e0 [ 1411.304498] R13: 0000000000000000 R14: 0000000000000000 R15: ffff9cf5c39ae0e0 [ 1411.304501] FS: 00007f00ae754780(0000) GS:ffff9cf5fba00000(0000) knlGS:0000000000000000 [ 1411.304504] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1411.304506] CR2: 000000003e34343c CR3: 0000000103a98005 CR4: 0000000000370ee0 [ 1411.304508] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1411.304510] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1411.304512] Call Trace: [ 1411.304517] stack_map_get_build_id_offset+0x17c/0x260 [ 1411.304528] __bpf_get_stack+0x18f/0x230 [ 1411.304541] bpf_get_stack_raw_tp+0x5a/0x70 [ 1411.305752] RAX: 0000000000000000 RBX: 5541f689495641d7 RCX: 0000000000000000 [ 1411.305756] RDX: 0000000000000001 RSI: ffffffff9776b144 RDI: ffffffff977e1b0e [ 1411.305758] RBP: ffff9cf5c02b2f40 R08: ffff9cf5ca7606c0 R09: ffffcbd43ee02c04 [ 1411.306978] bpf_prog_32007c34f7726d29_bpf_prog1+0xaf/0xd9c [ 1411.307861] R10: 0000000000000001 R11: 0000000000000044 R12: ffff9cf5c2ef60e0 [ 1411.307865] R13: 0000000000000005 R14: 0000000000000000 R15: ffff9cf5c2ef6108 [ 1411.309074] bpf_trace_run2+0x8f/0x1a0 [ 1411.309891] FS: 00007ff485141700(0000) GS:ffff9cf5fae00000(0000) knlGS:0000000000000000 [ 1411.309896] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1411.311221] syscall_trace_enter.isra.20+0x161/0x1f0 [ 1411.311600] CR2: 00007ff48514d90e CR3: 0000000107114001 CR4: 0000000000370ef0 [ 1411.312291] do_syscall_64+0x15/0x80 [ 1411.312941] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1411.313803] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 1411.314223] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1411.315082] RIP: 0033:0x7f00ad80a0e0 [ 1411.315626] Call Trace: [ 1411.315632] stack_map_get_build_id_offset+0x17c/0x260 To reproduce, first build `test_progs` binary: make -C tools/testing/selftests/bpf -j60 and then run the binary at tools/testing/selftests/bpf directory: ./test_progs -t get_stack_raw_tp The warning is due to commit 5b78ed24e8ec ("mm/pagemap: add mmap_assert_locked() annotations to find_vma*()") which added mmap_assert_locked() in find_vma() function. The mmap_assert_locked() function asserts that mm->mmap_lock needs to be held. But this is not the case for bpf_get_stack() or bpf_get_stackid() helper (kernel/bpf/stackmap.c), which uses mmap_read_trylock_non_owner() instead. Since mm->mmap_lock is not held in bpf_get_stack[id]() use case, the above warning is emitted during test run. This patch fixed the issue by (1). using mmap_read_trylock() instead of mmap_read_trylock_non_owner() to satisfy lockdep checking in find_vma(), and (2). droping lockdep for mmap_lock right before the irq_work_queue(). The function mmap_read_trylock_non_owner() is also removed since after this patch nobody calls it any more. Fixes: 5b78ed24e8ec ("mm/pagemap: add mmap_assert_locked() annotations to find_vma*()") Suggested-by: Jason Gunthorpe Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Reviewed-by: Liam R. Howlett Cc: Luigi Rizzo Cc: Jason Gunthorpe Cc: linux-mm@kvack.org Link: https://lore.kernel.org/bpf/20210909155000.1610299-1-yhs@fb.com --- include/linux/mmap_lock.h | 9 --------- kernel/bpf/stackmap.c | 10 ++++++++-- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 0540f0156f58..3af8f7fb067d 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -144,15 +144,6 @@ static inline void mmap_read_unlock(struct mm_struct *mm) __mmap_lock_trace_released(mm, false); } -static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm) -{ - if (mmap_read_trylock(mm)) { - rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_); - return true; - } - return false; -} - static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) { up_read_non_owner(&mm->mmap_lock); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index e8eefdf8cf3e..09a3fd97d329 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -179,7 +179,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, * with build_id. */ if (!user || !current || !current->mm || irq_work_busy || - !mmap_read_trylock_non_owner(current->mm)) { + !mmap_read_trylock(current->mm)) { /* cannot access current->mm, fall back to ips */ for (i = 0; i < trace_nr; i++) { id_offs[i].status = BPF_STACK_BUILD_ID_IP; @@ -204,9 +204,15 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, } if (!work) { - mmap_read_unlock_non_owner(current->mm); + mmap_read_unlock(current->mm); } else { work->mm = current->mm; + + /* The lock will be released once we're out of interrupt + * context. Tell lockdep that we've released it now so + * it doesn't complain that we forgot to release it. + */ + rwsem_release(¤t->mm->mmap_lock.dep_map, _RET_IP_); irq_work_queue(&work->irq_work); } } From 345e1ae0c6ba54f6a4d32154e80cadc2ee2ef1af Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 27 Aug 2021 23:22:02 +0100 Subject: [PATCH 042/543] afs: Fix missing put on afs_read objects and missing get on the key therein The afs_read objects created by afs_req_issue_op() get leaked because afs_alloc_read() returns a ref and then afs_fetch_data() gets its own ref which is released when the operation completes, but the initial ref is never released. Fix this by discarding the initial ref at the end of afs_req_issue_op(). This leak also covered another bug whereby a ref isn't got on the key attached to the read record by afs_req_issue_op(). This isn't a problem as long as the afs_read req never goes away... Fix this by calling key_get() in afs_req_issue_op(). This was found by the generic/074 test. It leaks a bunch of kmalloc-192 objects each time it is run, which can be observed by watching /proc/slabinfo. Fixes: f7605fa869cf ("afs: Fix leak of afs_read objects") Reported-by: Marc Dionne Signed-off-by: David Howells Reviewed-and-tested-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/163010394740.3035676.8516846193899793357.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/163111665914.283156.3038561975681836591.stgit@warthog.procyon.org.uk/ --- fs/afs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index db035ae2a134..6688fff14b0b 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -295,7 +295,7 @@ static void afs_req_issue_op(struct netfs_read_subrequest *subreq) fsreq->subreq = subreq; fsreq->pos = subreq->start + subreq->transferred; fsreq->len = subreq->len - subreq->transferred; - fsreq->key = subreq->rreq->netfs_priv; + fsreq->key = key_get(subreq->rreq->netfs_priv); fsreq->vnode = vnode; fsreq->iter = &fsreq->def_iter; @@ -304,6 +304,7 @@ static void afs_req_issue_op(struct netfs_read_subrequest *subreq) fsreq->pos, fsreq->len); afs_fetch_data(fsreq->vnode, fsreq); + afs_put_read(fsreq); } static int afs_symlink_readpage(struct page *page) From 581b2027af0018944ba301d68e7af45c6d1128b5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 1 Sep 2021 09:15:21 +0100 Subject: [PATCH 043/543] afs: Fix page leak There's a loop in afs_extend_writeback() that adds extra pages to a write we want to make to improve the efficiency of the writeback by making it larger. This loop stops, however, if we hit a page we can't write back from immediately, but it doesn't get rid of the page ref we speculatively acquired. This was caused by the removal of the cleanup loop when the code switched from using find_get_pages_contig() to xarray scanning as the latter only gets a single page at a time, not a batch. Fix this by putting the page on a ref on an early break from the loop. Unfortunately, we can't just add that page to the pagevec we're employing as we'll go through that and add those pages to the RPC call. This was found by the generic/074 test. It leaks ~4GiB of RAM each time it is run - which can be observed with "top". Fixes: e87b03f5830e ("afs: Prepare for use of THPs") Reported-by: Marc Dionne Signed-off-by: David Howells Reviewed-and-tested-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/163111666635.283156.177701903478910460.stgit@warthog.procyon.org.uk/ --- fs/afs/write.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index c0534697268e..66b235266893 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -471,13 +471,18 @@ static void afs_extend_writeback(struct address_space *mapping, } /* Has the page moved or been split? */ - if (unlikely(page != xas_reload(&xas))) + if (unlikely(page != xas_reload(&xas))) { + put_page(page); break; + } - if (!trylock_page(page)) + if (!trylock_page(page)) { + put_page(page); break; + } if (!PageDirty(page) || PageWriteback(page)) { unlock_page(page); + put_page(page); break; } @@ -487,6 +492,7 @@ static void afs_extend_writeback(struct address_space *mapping, t = afs_page_dirty_to(page, priv); if (f != 0 && !new_content) { unlock_page(page); + put_page(page); break; } From 08dad2f4d541fcfe5e7bfda72cc6314bbfd2802f Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Fri, 10 Sep 2021 21:55:34 +0200 Subject: [PATCH 044/543] net: stmmac: allow CSR clock of 300MHz The Synopsys Ethernet IP uses the CSR clock as a base clock for MDC. The divisor used is set in the MAC_MDIO_Address register field CR (Clock Rate) The divisor is there to change the CSR clock into a clock that falls below the IEEE 802.3 specified max frequency of 2.5MHz. If the CSR clock is 300MHz, the code falls back to using the reset value in the MAC_MDIO_Address register, as described in the comment above this code. However, 300MHz is actually an allowed value and the proper divider can be estimated quite easily (it's just 1Hz difference!) A CSR frequency of 300MHz with the maximum clock rate value of 0x5 (STMMAC_CSR_250_300M, a divisor of 124) gives somewhere around ~2.42MHz which is below the IEEE 802.3 specified maximum. For the ARTPEC-8 SoC, the CSR clock is this problematic 300MHz, and unfortunately, the reset-value of the MAC_MDIO_Address CR field is 0x0. This leads to a clock rate of zero and a divisor of 42, and gives an MDC frequency of ~7.14MHz. Allow CSR clock of 300MHz by making the comparison inclusive. Signed-off-by: Jesper Nilsson Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 246f84fedbc8..553c4403258a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -309,7 +309,7 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv) priv->clk_csr = STMMAC_CSR_100_150M; else if ((clk_rate >= CSR_F_150M) && (clk_rate < CSR_F_250M)) priv->clk_csr = STMMAC_CSR_150_250M; - else if ((clk_rate >= CSR_F_250M) && (clk_rate < CSR_F_300M)) + else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M)) priv->clk_csr = STMMAC_CSR_250_300M; } From 7ad9bb9d0f357dcab5eb9a0f28d1c8983c48434c Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Fri, 10 Sep 2021 18:57:13 +0000 Subject: [PATCH 045/543] asm-generic/hyperv: provide cpumask_to_vpset_noself This is a new variant which removes `self' cpu from the vpset. It will be used in Hyper-V enlightened IPI code. Signed-off-by: Wei Liu Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210910185714.299411-2-wei.liu@kernel.org --- include/asm-generic/mshyperv.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 9a000ba2bb75..9a134806f1d5 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -184,10 +184,12 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number) return hv_vp_index[cpu_number]; } -static inline int cpumask_to_vpset(struct hv_vpset *vpset, - const struct cpumask *cpus) +static inline int __cpumask_to_vpset(struct hv_vpset *vpset, + const struct cpumask *cpus, + bool exclude_self) { int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; + int this_cpu = smp_processor_id(); /* valid_bank_mask can represent up to 64 banks */ if (hv_max_vp_index / 64 >= 64) @@ -205,6 +207,8 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, * Some banks may end up being empty but this is acceptable. */ for_each_cpu(cpu, cpus) { + if (exclude_self && cpu == this_cpu) + continue; vcpu = hv_cpu_number_to_vp_number(cpu); if (vcpu == VP_INVAL) return -1; @@ -219,6 +223,19 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, return nr_bank; } +static inline int cpumask_to_vpset(struct hv_vpset *vpset, + const struct cpumask *cpus) +{ + return __cpumask_to_vpset(vpset, cpus, false); +} + +static inline int cpumask_to_vpset_noself(struct hv_vpset *vpset, + const struct cpumask *cpus) +{ + WARN_ON_ONCE(preemptible()); + return __cpumask_to_vpset(vpset, cpus, true); +} + void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die); bool hv_is_hyperv_initialized(void); bool hv_is_hibernation_supported(void); From dfb5c1e12c28e35e4d4e5bc8022b0e9d585b89a7 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Fri, 10 Sep 2021 18:57:14 +0000 Subject: [PATCH 046/543] x86/hyperv: remove on-stack cpumask from hv_send_ipi_mask_allbutself It is not a good practice to allocate a cpumask on stack, given it may consume up to 1 kilobytes of stack space if the kernel is configured to have 8192 cpus. The internal helper functions __send_ipi_mask{,_ex} need to loop over the provided mask anyway, so it is not too difficult to skip `self' there. We can thus do away with the on-stack cpumask in hv_send_ipi_mask_allbutself. Adjust call sites of __send_ipi_mask as needed. Reported-by: Linus Torvalds Suggested-by: Michael Kelley Suggested-by: Linus Torvalds Fixes: 68bb7bfb7985d ("X86/Hyper-V: Enable IPI enlightenments") Signed-off-by: Wei Liu Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210910185714.299411-3-wei.liu@kernel.org --- arch/x86/hyperv/hv_apic.c | 43 +++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 90e682a92820..32a1ad356c18 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -99,7 +99,8 @@ static void hv_apic_eoi_write(u32 reg, u32 val) /* * IPI implementation on Hyper-V. */ -static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) +static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, + bool exclude_self) { struct hv_send_ipi_ex **arg; struct hv_send_ipi_ex *ipi_arg; @@ -123,7 +124,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) if (!cpumask_equal(mask, cpu_present_mask)) { ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; - nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); + if (exclude_self) + nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask); + else + nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); } if (nr_bank < 0) goto ipi_mask_ex_done; @@ -138,15 +142,25 @@ ipi_mask_ex_done: return hv_result_success(status); } -static bool __send_ipi_mask(const struct cpumask *mask, int vector) +static bool __send_ipi_mask(const struct cpumask *mask, int vector, + bool exclude_self) { - int cur_cpu, vcpu; + int cur_cpu, vcpu, this_cpu = smp_processor_id(); struct hv_send_ipi ipi_arg; u64 status; + unsigned int weight; trace_hyperv_send_ipi_mask(mask, vector); - if (cpumask_empty(mask)) + weight = cpumask_weight(mask); + + /* + * Do nothing if + * 1. the mask is empty + * 2. the mask only contains self when exclude_self is true + */ + if (weight == 0 || + (exclude_self && weight == 1 && cpumask_test_cpu(this_cpu, mask))) return true; if (!hv_hypercall_pg) @@ -172,6 +186,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) ipi_arg.cpu_mask = 0; for_each_cpu(cur_cpu, mask) { + if (exclude_self && cur_cpu == this_cpu) + continue; vcpu = hv_cpu_number_to_vp_number(cur_cpu); if (vcpu == VP_INVAL) return false; @@ -191,7 +207,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) return hv_result_success(status); do_ex_hypercall: - return __send_ipi_mask_ex(mask, vector); + return __send_ipi_mask_ex(mask, vector, exclude_self); } static bool __send_ipi_one(int cpu, int vector) @@ -208,7 +224,7 @@ static bool __send_ipi_one(int cpu, int vector) return false; if (vp >= 64) - return __send_ipi_mask_ex(cpumask_of(cpu), vector); + return __send_ipi_mask_ex(cpumask_of(cpu), vector, false); status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp)); return hv_result_success(status); @@ -222,20 +238,13 @@ static void hv_send_ipi(int cpu, int vector) static void hv_send_ipi_mask(const struct cpumask *mask, int vector) { - if (!__send_ipi_mask(mask, vector)) + if (!__send_ipi_mask(mask, vector, false)) orig_apic.send_IPI_mask(mask, vector); } static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) { - unsigned int this_cpu = smp_processor_id(); - struct cpumask new_mask; - const struct cpumask *local_mask; - - cpumask_copy(&new_mask, mask); - cpumask_clear_cpu(this_cpu, &new_mask); - local_mask = &new_mask; - if (!__send_ipi_mask(local_mask, vector)) + if (!__send_ipi_mask(mask, vector, true)) orig_apic.send_IPI_mask_allbutself(mask, vector); } @@ -246,7 +255,7 @@ static void hv_send_ipi_allbutself(int vector) static void hv_send_ipi_all(int vector) { - if (!__send_ipi_mask(cpu_online_mask, vector)) + if (!__send_ipi_mask(cpu_online_mask, vector, false)) orig_apic.send_IPI_all(vector); } From ce062a0adbfe933b1932235fdfd874c4c91d1bb0 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Sat, 11 Sep 2021 17:50:09 +0200 Subject: [PATCH 047/543] net: dsa: qca8k: fix kernel panic with legacy mdio mapping When the mdio legacy mapping is used the mii_bus priv registered by DSA refer to the dsa switch struct instead of the qca8k_priv struct and causes a kernel panic. Create dedicated function when the internal dedicated mdio driver is used to properly handle the 2 different implementation. Fixes: 759bafb8a322 ("net: dsa: qca8k: add support for internal phy and internal mdio") Signed-off-by: Ansuel Smith Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 1f63f50f73f1..bda5a9bf4f52 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -643,10 +643,8 @@ qca8k_mdio_busy_wait(struct mii_bus *bus, u32 reg, u32 mask) } static int -qca8k_mdio_write(struct mii_bus *salve_bus, int phy, int regnum, u16 data) +qca8k_mdio_write(struct mii_bus *bus, int phy, int regnum, u16 data) { - struct qca8k_priv *priv = salve_bus->priv; - struct mii_bus *bus = priv->bus; u16 r1, r2, page; u32 val; int ret; @@ -682,10 +680,8 @@ exit: } static int -qca8k_mdio_read(struct mii_bus *salve_bus, int phy, int regnum) +qca8k_mdio_read(struct mii_bus *bus, int phy, int regnum) { - struct qca8k_priv *priv = salve_bus->priv; - struct mii_bus *bus = priv->bus; u16 r1, r2, page; u32 val; int ret; @@ -726,6 +722,24 @@ exit: return ret; } +static int +qca8k_internal_mdio_write(struct mii_bus *slave_bus, int phy, int regnum, u16 data) +{ + struct qca8k_priv *priv = slave_bus->priv; + struct mii_bus *bus = priv->bus; + + return qca8k_mdio_write(bus, phy, regnum, data); +} + +static int +qca8k_internal_mdio_read(struct mii_bus *slave_bus, int phy, int regnum) +{ + struct qca8k_priv *priv = slave_bus->priv; + struct mii_bus *bus = priv->bus; + + return qca8k_mdio_read(bus, phy, regnum); +} + static int qca8k_phy_write(struct dsa_switch *ds, int port, int regnum, u16 data) { @@ -775,8 +789,8 @@ qca8k_mdio_register(struct qca8k_priv *priv, struct device_node *mdio) bus->priv = (void *)priv; bus->name = "qca8k slave mii"; - bus->read = qca8k_mdio_read; - bus->write = qca8k_mdio_write; + bus->read = qca8k_internal_mdio_read; + bus->write = qca8k_internal_mdio_write; snprintf(bus->id, MII_BUS_ID_SIZE, "qca8k-%d", ds->index); From 1b704b27beb11ce147d64b21c914e57afbfb5656 Mon Sep 17 00:00:00 2001 From: Andrea Claudi Date: Sat, 11 Sep 2021 16:14:18 +0200 Subject: [PATCH 048/543] selftest: net: fix typo in altname test If altname deletion of the short alternative name fails, the error message printed is: "Failed to add short alternative name". This is obviously a typo, as we are testing altname deletion. Fix this using a proper error message. Fixes: f95e6c9c4617 ("selftest: net: add alternative names test") Signed-off-by: Andrea Claudi Signed-off-by: David S. Miller --- tools/testing/selftests/net/altnames.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/altnames.sh b/tools/testing/selftests/net/altnames.sh index 4254ddc3f70b..1ef9e4159bba 100755 --- a/tools/testing/selftests/net/altnames.sh +++ b/tools/testing/selftests/net/altnames.sh @@ -45,7 +45,7 @@ altnames_test() check_err $? "Got unexpected long alternative name from link show JSON" ip link property del $DUMMY_DEV altname $SHORT_NAME - check_err $? "Failed to add short alternative name" + check_err $? "Failed to delete short alternative name" ip -j -p link show $SHORT_NAME &>/dev/null check_fail $? "Unexpected success while trying to do link show with deleted short alternative name" From f11ee2ad25b22c2ee587045dd6999434375532f7 Mon Sep 17 00:00:00 2001 From: Len Baker Date: Sat, 11 Sep 2021 12:28:18 +0200 Subject: [PATCH 049/543] net: mana: Prefer struct_size over open coded arithmetic As noted in the "Deprecated Interfaces, Language Features, Attributes, and Conventions" documentation [1], size calculations (especially multiplication) should not be performed in memory allocator (or similar) function arguments due to the risk of them overflowing. This could lead to values wrapping around and a smaller allocation being made than the caller was expecting. Using those allocations could lead to linear overflows of heap memory and other misbehaviors. So, use the struct_size() helper to do the arithmetic instead of the argument "size + count * size" in the kzalloc() function. [1] https://www.kernel.org/doc/html/v5.14/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments Signed-off-by: Len Baker Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/microsoft/mana/hw_channel.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index c1310ea1c216..d5c485a6d284 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -398,9 +398,7 @@ static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, int err; u16 i; - dma_buf = kzalloc(sizeof(*dma_buf) + - q_depth * sizeof(struct hwc_work_request), - GFP_KERNEL); + dma_buf = kzalloc(struct_size(dma_buf, reqs, q_depth), GFP_KERNEL); if (!dma_buf) return -ENOMEM; From 16c8d2df7ec0eed31b7d3b61cb13206a7fb930cc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 12 Sep 2021 06:45:07 -0600 Subject: [PATCH 050/543] io_uring: ensure symmetry in handling iter types in loop_rw_iter() When setting up the next segment, we check what type the iter is and handle it accordingly. However, when incrementing and processed amount we do not, and both iter advance and addr/len are adjusted, regardless of type. Split the increment side just like we do on the setup side. Fixes: 4017eb91a9e7 ("io_uring: make loop_rw_iter() use original user supplied pointers") Cc: stable@vger.kernel.org Reported-by: Valentina Palmiotti Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 16fb7436043c..66a7414c3756 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3263,12 +3263,15 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) ret = nr; break; } + if (!iov_iter_is_bvec(iter)) { + iov_iter_advance(iter, nr); + } else { + req->rw.len -= nr; + req->rw.addr += nr; + } ret += nr; if (nr != iovec.iov_len) break; - req->rw.len -= nr; - req->rw.addr += nr; - iov_iter_advance(iter, nr); } return ret; From 7a842fb589e3cdbe205bc16dc37c30cf13383159 Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Sun, 12 Sep 2021 03:40:50 +0800 Subject: [PATCH 051/543] io-wq: code clean of io_wqe_create_worker() Remove do_create to save a local variable. Signed-off-by: Hao Xu Signed-off-by: Jens Axboe --- fs/io-wq.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 6c55362c1f99..a1685b40a4bf 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -246,8 +246,6 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe, */ static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) { - bool do_create = false; - /* * Most likely an attempt to queue unbounded work on an io_wq that * wasn't setup with any unbounded workers. @@ -256,18 +254,15 @@ static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) pr_warn_once("io-wq is not configured for unbound workers"); raw_spin_lock(&wqe->lock); - if (acct->nr_workers < acct->max_workers) { - acct->nr_workers++; - do_create = true; + if (acct->nr_workers == acct->max_workers) { + raw_spin_unlock(&wqe->lock); + return true; } + acct->nr_workers++; raw_spin_unlock(&wqe->lock); - if (do_create) { - atomic_inc(&acct->nr_running); - atomic_inc(&wqe->wq->worker_refs); - return create_io_worker(wqe->wq, wqe, acct->index); - } - - return true; + atomic_inc(&acct->nr_running); + atomic_inc(&wqe->wq->worker_refs); + return create_io_worker(wqe->wq, wqe, acct->index); } static void io_wqe_inc_running(struct io_worker *worker) From 767a65e9f31789d80e41edd03a802314905e8fbf Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Sun, 12 Sep 2021 03:40:52 +0800 Subject: [PATCH 052/543] io-wq: fix potential race of acct->nr_workers Given max_worker is 1, and we currently have 1 running and it is exiting. There may be race like: io_wqe_enqueue worker1 no work there and timeout unlock(wqe->lock) ->insert work -->io_worker_exit lock(wqe->lock) ->if(!nr_workers) //it's still 1 unlock(wqe->lock) goto run_cancel lock(wqe->lock) nr_workers-- ->dec_running ->worker creation fails unlock(wqe->lock) We enqueued one work but there is no workers, causes hung. Signed-off-by: Hao Xu Signed-off-by: Jens Axboe --- fs/io-wq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index a1685b40a4bf..3d4460df845c 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -176,7 +176,6 @@ static void io_worker_ref_put(struct io_wq *wq) static void io_worker_exit(struct io_worker *worker) { struct io_wqe *wqe = worker->wqe; - struct io_wqe_acct *acct = io_wqe_get_acct(worker); if (refcount_dec_and_test(&worker->ref)) complete(&worker->ref_done); @@ -186,7 +185,6 @@ static void io_worker_exit(struct io_worker *worker) if (worker->flags & IO_WORKER_F_FREE) hlist_nulls_del_rcu(&worker->nulls_node); list_del_rcu(&worker->all_list); - acct->nr_workers--; preempt_disable(); io_wqe_dec_running(worker); worker->flags = 0; @@ -569,6 +567,7 @@ loop: } /* timed out, exit unless we're the last worker */ if (last_timeout && acct->nr_workers > 1) { + acct->nr_workers--; raw_spin_unlock(&wqe->lock); __set_current_state(TASK_RUNNING); break; From 67f3b2f822b7e71cfc9b42dbd9f3144fa2933e0b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 6 Sep 2021 14:50:03 +0800 Subject: [PATCH 053/543] blk-mq: avoid to iterate over stale request blk-mq can't run allocating driver tag and updating ->rqs[tag] atomically, meantime blk-mq doesn't clear ->rqs[tag] after the driver tag is released. So there is chance to iterating over one stale request just after the tag is allocated and before updating ->rqs[tag]. scsi_host_busy_iter() calls scsi_host_check_in_flight() to count scsi in-flight requests after scsi host is blocked, so no new scsi command can be marked as SCMD_STATE_INFLIGHT. However, driver tag allocation still can be run by blk-mq core. One request is marked as SCMD_STATE_INFLIGHT, but this request may have been kept in another slot of ->rqs[], meantime the slot can be allocated out but ->rqs[] isn't updated yet. Then this in-flight request is counted twice as SCMD_STATE_INFLIGHT. This way causes trouble in handling scsi error. Fixes the issue by not iterating over stale request. Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Reported-by: luojiaxing Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20210906065003.439019-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 86f87346232a..ff5caeb82542 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -208,7 +208,7 @@ static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags, spin_lock_irqsave(&tags->lock, flags); rq = tags->rqs[bitnr]; - if (!rq || !refcount_inc_not_zero(&rq->ref)) + if (!rq || rq->tag != bitnr || !refcount_inc_not_zero(&rq->ref)) rq = NULL; spin_unlock_irqrestore(&tags->lock, flags); return rq; From 3978d816523991dd86cf9aae88c295230a5ea3b2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 1 Sep 2021 19:22:50 +0100 Subject: [PATCH 054/543] afs: Add missing vnode validation checks afs_d_revalidate() should only be validating the directory entry it is given and the directory to which that belongs; it shouldn't be validating the inode/vnode to which that dentry points. Besides, validation need to be done even if we don't call afs_d_revalidate() - which might be the case if we're starting from a file descriptor. In order for afs_d_revalidate() to be fixed, validation points must be added in some other places. Certain directory operations, such as afs_unlink(), already check this, but not all and not all file operations either. Note that the validation of a vnode not only checks to see if the attributes we have are correct, but also gets a promise from the server to notify us if that file gets changed by a third party. Add the following checks: - Check the vnode we're going to make a hard link to. - Check the vnode we're going to move/rename. - Check the vnode we're going to read from. - Check the vnode we're going to write to. - Check the vnode we're going to sync. - Check the vnode we're going to make a mapped page writable for. Some of these aren't strictly necessary as we're going to perform a server operation that might get the attributes anyway from which we can determine if something changed - though it might not get us a callback promise. Signed-off-by: David Howells Tested-by: Markus Suvanto cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/163111667354.283156.12720698333342917516.stgit@warthog.procyon.org.uk/ --- fs/afs/dir.c | 11 +++++++++++ fs/afs/file.c | 16 +++++++++++++++- fs/afs/write.c | 17 +++++++++++++++-- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index ac829e63c570..a8e3ae55f1f9 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1792,6 +1792,10 @@ static int afs_link(struct dentry *from, struct inode *dir, goto error; } + ret = afs_validate(vnode, op->key); + if (ret < 0) + goto error_op; + afs_op_set_vnode(op, 0, dvnode); afs_op_set_vnode(op, 1, vnode); op->file[0].dv_delta = 1; @@ -1805,6 +1809,8 @@ static int afs_link(struct dentry *from, struct inode *dir, op->create.reason = afs_edit_dir_for_link; return afs_do_sync_operation(op); +error_op: + afs_put_operation(op); error: d_drop(dentry); _leave(" = %d", ret); @@ -1989,6 +1995,11 @@ static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, if (IS_ERR(op)) return PTR_ERR(op); + ret = afs_validate(vnode, op->key); + op->error = ret; + if (ret < 0) + goto error; + afs_op_set_vnode(op, 0, orig_dvnode); afs_op_set_vnode(op, 1, new_dvnode); /* May be same as orig_dvnode */ op->file[0].dv_delta = 1; diff --git a/fs/afs/file.c b/fs/afs/file.c index 6688fff14b0b..4c8d786b53e0 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -24,12 +24,13 @@ static void afs_invalidatepage(struct page *page, unsigned int offset, static int afs_releasepage(struct page *page, gfp_t gfp_flags); static void afs_readahead(struct readahead_control *ractl); +static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); const struct file_operations afs_file_operations = { .open = afs_open, .release = afs_release, .llseek = generic_file_llseek, - .read_iter = generic_file_read_iter, + .read_iter = afs_file_read_iter, .write_iter = afs_file_write, .mmap = afs_file_mmap, .splice_read = generic_file_splice_read, @@ -503,3 +504,16 @@ static int afs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_ops = &afs_vm_ops; return ret; } + +static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); + struct afs_file *af = iocb->ki_filp->private_data; + int ret; + + ret = afs_validate(vnode, af->key); + if (ret < 0) + return ret; + + return generic_file_read_iter(iocb, iter); +} diff --git a/fs/afs/write.c b/fs/afs/write.c index 66b235266893..32a764c24284 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -807,6 +807,7 @@ int afs_writepages(struct address_space *mapping, ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) { struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); + struct afs_file *af = iocb->ki_filp->private_data; ssize_t result; size_t count = iov_iter_count(from); @@ -822,6 +823,10 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) if (!count) return 0; + result = afs_validate(vnode, af->key); + if (result < 0) + return result; + result = generic_file_write_iter(iocb, from); _leave(" = %zd", result); @@ -835,13 +840,18 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) */ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct inode *inode = file_inode(file); - struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); + struct afs_file *af = file->private_data; + int ret; _enter("{%llx:%llu},{n=%pD},%d", vnode->fid.vid, vnode->fid.vnode, file, datasync); + ret = afs_validate(vnode, af->key); + if (ret < 0) + return ret; + return file_write_and_wait_range(file, start, end); } @@ -855,11 +865,14 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) struct file *file = vmf->vma->vm_file; struct inode *inode = file_inode(file); struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_file *af = file->private_data; unsigned long priv; vm_fault_t ret = VM_FAULT_RETRY; _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index); + afs_validate(vnode, af->key); + sb_start_pagefault(inode->i_sb); /* Wait for the page to be written to the cache before we allow it to From 63d49d843ef5fffeea069e0ffdfbd2bf40ba01c6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 1 Sep 2021 18:11:33 +0100 Subject: [PATCH 055/543] afs: Fix incorrect triggering of sillyrename on 3rd-party invalidation The AFS filesystem is currently triggering the silly-rename cleanup from afs_d_revalidate() when it sees that a dentry has been changed by a third party[1]. It should not be doing this as the cleanup includes deleting the silly-rename target file on iput. Fix this by removing the places in the d_revalidate handling that validate anything other than the directory and the dirent. It probably should not be looking to validate the target inode of the dentry also. This includes removing the point in afs_d_revalidate() where the inode that a dentry used to point to was marked as being deleted (AFS_VNODE_DELETED). We don't know it got deleted. It could have been renamed or it could have hard links remaining. This was reproduced by cloning a git repo onto an afs volume on one machine, switching to another machine and doing "git status", then switching back to the first and doing "git status". The second status would show weird output due to ".git/index" getting deleted by the above mentioned mechanism. A simpler way to do it is to do: machine 1: touch a machine 2: touch b; mv -f b a machine 1: stat a on an afs volume. The bug shows up as the stat failing with ENOENT and the file server log showing that machine 1 deleted "a". Fixes: 79ddbfa500b3 ("afs: Implement sillyrename for unlink and rename") Reported-by: Markus Suvanto Signed-off-by: David Howells Tested-by: Markus Suvanto cc: linux-afs@lists.infradead.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=214217#c4 [1] Link: https://lore.kernel.org/r/163111668100.283156.3851669884664475428.stgit@warthog.procyon.org.uk/ --- fs/afs/dir.c | 46 +++++++--------------------------------------- 1 file changed, 7 insertions(+), 39 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index a8e3ae55f1f9..4579bbda4634 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1077,9 +1077,9 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, */ static int afs_d_revalidate_rcu(struct dentry *dentry) { - struct afs_vnode *dvnode, *vnode; + struct afs_vnode *dvnode; struct dentry *parent; - struct inode *dir, *inode; + struct inode *dir; long dir_version, de_version; _enter("%p", dentry); @@ -1109,18 +1109,6 @@ static int afs_d_revalidate_rcu(struct dentry *dentry) return -ECHILD; } - /* Check to see if the vnode referred to by the dentry still - * has a callback. - */ - if (d_really_is_positive(dentry)) { - inode = d_inode_rcu(dentry); - if (inode) { - vnode = AFS_FS_I(inode); - if (!afs_check_validity(vnode)) - return -ECHILD; - } - } - return 1; /* Still valid */ } @@ -1156,17 +1144,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (IS_ERR(key)) key = NULL; - if (d_really_is_positive(dentry)) { - inode = d_inode(dentry); - if (inode) { - vnode = AFS_FS_I(inode); - afs_validate(vnode, key); - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - goto out_bad; - } - } - - /* lock down the parent dentry so we can peer at it */ + /* Hold the parent dentry so we can peer at it */ parent = dget_parent(dentry); dir = AFS_FS_I(d_inode(parent)); @@ -1175,7 +1153,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (test_bit(AFS_VNODE_DELETED, &dir->flags)) { _debug("%pd: parent dir deleted", dentry); - goto out_bad_parent; + goto not_found; } /* We only need to invalidate a dentry if the server's copy changed @@ -1201,12 +1179,12 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) case 0: /* the filename maps to something */ if (d_really_is_negative(dentry)) - goto out_bad_parent; + goto not_found; inode = d_inode(dentry); if (is_bad_inode(inode)) { printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n", dentry); - goto out_bad_parent; + goto not_found; } vnode = AFS_FS_I(inode); @@ -1228,9 +1206,6 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) dentry, fid.unique, vnode->fid.unique, vnode->vfs_inode.i_generation); - write_seqlock(&vnode->cb_lock); - set_bit(AFS_VNODE_DELETED, &vnode->flags); - write_sequnlock(&vnode->cb_lock); goto not_found; } goto out_valid; @@ -1245,7 +1220,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) default: _debug("failed to iterate dir %pd: %d", parent, ret); - goto out_bad_parent; + goto not_found; } out_valid: @@ -1256,16 +1231,9 @@ out_valid_noupdate: _leave(" = 1 [valid]"); return 1; - /* the dirent, if it exists, now points to a different vnode */ not_found: - spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_NFSFS_RENAMED; - spin_unlock(&dentry->d_lock); - -out_bad_parent: _debug("dropping dentry %pd2", dentry); dput(parent); -out_bad: key_put(key); _leave(" = 0 [bad]"); From 6e0e99d58a6530cf65f10e4bb16630c5be6c254d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Sep 2021 16:43:10 +0100 Subject: [PATCH 056/543] afs: Fix mmap coherency vs 3rd-party changes Fix the coherency management of mmap'd data such that 3rd-party changes become visible as soon as possible after the callback notification is delivered by the fileserver. This is done by the following means: (1) When we break a callback on a vnode specified by the CB.CallBack call from the server, we queue a work item (vnode->cb_work) to go and clobber all the PTEs mapping to that inode. This causes the CPU to trip through the ->map_pages() and ->page_mkwrite() handlers if userspace attempts to access the page(s) again. (Ideally, this would be done in the service handler for CB.CallBack, but the server is waiting for our reply before considering, and we have a list of vnodes, all of which need breaking - and the process of getting the mmap_lock and stripping the PTEs on all CPUs could be quite slow.) (2) Call afs_validate() from the ->map_pages() handler to check to see if the file has changed and to get a new callback promise from the server. Also handle the fileserver telling us that it's dropping all callbacks, possibly after it's been restarted by sending us a CB.InitCallBackState* call by the following means: (3) Maintain a per-cell list of afs files that are currently mmap'd (cell->fs_open_mmaps). (4) Add a work item to each server that is invoked if there are any open mmaps when CB.InitCallBackState happens. This work item goes through the aforementioned list and invokes the vnode->cb_work work item for each one that is currently using this server. This causes the PTEs to be cleared, causing ->map_pages() or ->page_mkwrite() to be called again, thereby calling afs_validate() again. I've chosen to simply strip the PTEs at the point of notification reception rather than invalidate all the pages as well because (a) it's faster, (b) we may get a notification for other reasons than the data being altered (in which case we don't want to clobber the pagecache) and (c) we need to ask the server to find out - and I don't want to wait for the reply before holding up userspace. This was tested using the attached test program: #include #include #include #include #include #include int main(int argc, char *argv[]) { size_t size = getpagesize(); unsigned char *p; bool mod = (argc == 3); int fd; if (argc != 2 && argc != 3) { fprintf(stderr, "Format: %s [mod]\n", argv[0]); exit(2); } fd = open(argv[1], mod ? O_RDWR : O_RDONLY); if (fd < 0) { perror(argv[1]); exit(1); } p = mmap(NULL, size, mod ? PROT_READ|PROT_WRITE : PROT_READ, MAP_SHARED, fd, 0); if (p == MAP_FAILED) { perror("mmap"); exit(1); } for (;;) { if (mod) { p[0]++; msync(p, size, MS_ASYNC); fsync(fd); } printf("%02x", p[0]); fflush(stdout); sleep(1); } } It runs in two modes: in one mode, it mmaps a file, then sits in a loop reading the first byte, printing it and sleeping for a second; in the second mode it mmaps a file, then sits in a loop incrementing the first byte and flushing, then printing and sleeping. Two instances of this program can be run on different machines, one doing the reading and one doing the writing. The reader should see the changes made by the writer, but without this patch, they aren't because validity checking is being done lazily - only on entry to the filesystem. Testing the InitCallBackState change is more complicated. The server has to be taken offline, the saved callback state file removed and then the server restarted whilst the reading-mode program continues to run. The client machine then has to poke the server to trigger the InitCallBackState call. Signed-off-by: David Howells Tested-by: Markus Suvanto cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/163111668833.283156.382633263709075739.stgit@warthog.procyon.org.uk/ --- fs/afs/callback.c | 42 +++++++++++++++++++++++++++-- fs/afs/cell.c | 2 ++ fs/afs/file.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++- fs/afs/internal.h | 8 ++++++ fs/afs/server.c | 2 ++ fs/afs/super.c | 1 + mm/memory.c | 1 + 7 files changed, 120 insertions(+), 3 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 7d9b23d981bf..1dd7543dbf9f 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -20,6 +20,37 @@ #include #include "internal.h" +/* + * Handle invalidation of an mmap'd file. We invalidate all the PTEs referring + * to the pages in this file's pagecache, forcing the kernel to go through + * ->fault() or ->page_mkwrite() - at which point we can handle invalidation + * more fully. + */ +void afs_invalidate_mmap_work(struct work_struct *work) +{ + struct afs_vnode *vnode = container_of(work, struct afs_vnode, cb_work); + + unmap_mapping_pages(vnode->vfs_inode.i_mapping, 0, 0, false); +} + +void afs_server_init_callback_work(struct work_struct *work) +{ + struct afs_server *server = container_of(work, struct afs_server, initcb_work); + struct afs_vnode *vnode; + struct afs_cell *cell = server->cell; + + down_read(&cell->fs_open_mmaps_lock); + + list_for_each_entry(vnode, &cell->fs_open_mmaps, cb_mmap_link) { + if (vnode->cb_server == server) { + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + queue_work(system_unbound_wq, &vnode->cb_work); + } + } + + up_read(&cell->fs_open_mmaps_lock); +} + /* * Allow the fileserver to request callback state (re-)initialisation. * Unfortunately, UUIDs are not guaranteed unique. @@ -29,8 +60,10 @@ void afs_init_callback_state(struct afs_server *server) rcu_read_lock(); do { server->cb_s_break++; - server = rcu_dereference(server->uuid_next); - } while (0); + if (!list_empty(&server->cell->fs_open_mmaps)) + queue_work(system_unbound_wq, &server->initcb_work); + + } while ((server = rcu_dereference(server->uuid_next))); rcu_read_unlock(); } @@ -49,6 +82,11 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) afs_lock_may_be_available(vnode); + if (reason != afs_cb_break_for_deleted && + vnode->status.type == AFS_FTYPE_FILE && + atomic_read(&vnode->cb_nr_mmap)) + queue_work(system_unbound_wq, &vnode->cb_work); + trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true); } else { trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, false); diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 887b673f6223..d88407fb9bc0 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -166,6 +166,8 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, seqlock_init(&cell->volume_lock); cell->fs_servers = RB_ROOT; seqlock_init(&cell->fs_lock); + INIT_LIST_HEAD(&cell->fs_open_mmaps); + init_rwsem(&cell->fs_open_mmaps_lock); rwlock_init(&cell->vl_servers_lock); cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS); diff --git a/fs/afs/file.c b/fs/afs/file.c index 4c8d786b53e0..e6c447ae91f3 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -25,6 +25,9 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags); static void afs_readahead(struct readahead_control *ractl); static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); +static void afs_vm_open(struct vm_area_struct *area); +static void afs_vm_close(struct vm_area_struct *area); +static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); const struct file_operations afs_file_operations = { .open = afs_open, @@ -60,8 +63,10 @@ const struct address_space_operations afs_fs_aops = { }; static const struct vm_operations_struct afs_vm_ops = { + .open = afs_vm_open, + .close = afs_vm_close, .fault = filemap_fault, - .map_pages = filemap_map_pages, + .map_pages = afs_vm_map_pages, .page_mkwrite = afs_page_mkwrite, }; @@ -492,19 +497,79 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) return 1; } +static void afs_add_open_mmap(struct afs_vnode *vnode) +{ + if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) { + down_write(&vnode->volume->cell->fs_open_mmaps_lock); + + list_add_tail(&vnode->cb_mmap_link, + &vnode->volume->cell->fs_open_mmaps); + + up_write(&vnode->volume->cell->fs_open_mmaps_lock); + } +} + +static void afs_drop_open_mmap(struct afs_vnode *vnode) +{ + if (!atomic_dec_and_test(&vnode->cb_nr_mmap)) + return; + + down_write(&vnode->volume->cell->fs_open_mmaps_lock); + + if (atomic_read(&vnode->cb_nr_mmap) == 0) + list_del_init(&vnode->cb_mmap_link); + + up_write(&vnode->volume->cell->fs_open_mmaps_lock); + flush_work(&vnode->cb_work); +} + /* * Handle setting up a memory mapping on an AFS file. */ static int afs_file_mmap(struct file *file, struct vm_area_struct *vma) { + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); int ret; + afs_add_open_mmap(vnode); + ret = generic_file_mmap(file, vma); if (ret == 0) vma->vm_ops = &afs_vm_ops; + else + afs_drop_open_mmap(vnode); return ret; } +static void afs_vm_open(struct vm_area_struct *vma) +{ + afs_add_open_mmap(AFS_FS_I(file_inode(vma->vm_file))); +} + +static void afs_vm_close(struct vm_area_struct *vma) +{ + afs_drop_open_mmap(AFS_FS_I(file_inode(vma->vm_file))); +} + +static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff) +{ + struct afs_vnode *vnode = AFS_FS_I(file_inode(vmf->vma->vm_file)); + struct afs_file *af = vmf->vma->vm_file->private_data; + + switch (afs_validate(vnode, af->key)) { + case 0: + return filemap_map_pages(vmf, start_pgoff, end_pgoff); + case -ENOMEM: + return VM_FAULT_OOM; + case -EINTR: + case -ERESTARTSYS: + return VM_FAULT_RETRY; + case -ESTALE: + default: + return VM_FAULT_SIGBUS; + } +} + static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 5ed416f4ff33..0deeb76c67d0 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -390,6 +390,8 @@ struct afs_cell { /* Active fileserver interaction state. */ struct rb_root fs_servers; /* afs_server (by server UUID) */ seqlock_t fs_lock; /* For fs_servers */ + struct rw_semaphore fs_open_mmaps_lock; + struct list_head fs_open_mmaps; /* List of vnodes that are mmapped */ /* VL server list. */ rwlock_t vl_servers_lock; /* Lock on vl_servers */ @@ -503,6 +505,7 @@ struct afs_server { struct hlist_node addr4_link; /* Link in net->fs_addresses4 */ struct hlist_node addr6_link; /* Link in net->fs_addresses6 */ struct hlist_node proc_link; /* Link in net->fs_proc */ + struct work_struct initcb_work; /* Work for CB.InitCallBackState* */ struct afs_server *gc_next; /* Next server in manager's list */ time64_t unuse_time; /* Time at which last unused */ unsigned long flags; @@ -657,7 +660,10 @@ struct afs_vnode { afs_lock_type_t lock_type : 8; /* outstanding callback notification on this file */ + struct work_struct cb_work; /* Work for mmap'd files */ + struct list_head cb_mmap_link; /* Link in cell->fs_open_mmaps */ void *cb_server; /* Server with callback/filelock */ + atomic_t cb_nr_mmap; /* Number of mmaps */ unsigned int cb_s_break; /* Mass break counter on ->server */ unsigned int cb_v_break; /* Mass break counter on ->volume */ unsigned int cb_break; /* Break counter on vnode */ @@ -965,6 +971,8 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def; /* * callback.c */ +extern void afs_invalidate_mmap_work(struct work_struct *); +extern void afs_server_init_callback_work(struct work_struct *work); extern void afs_init_callback_state(struct afs_server *); extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason); extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason); diff --git a/fs/afs/server.c b/fs/afs/server.c index 684a2b02b9ff..6e5b9a19b234 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -235,6 +235,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, server->addr_version = alist->version; server->uuid = *uuid; rwlock_init(&server->fs_lock); + INIT_WORK(&server->initcb_work, afs_server_init_callback_work); init_waitqueue_head(&server->probe_wq); INIT_LIST_HEAD(&server->probe_link); spin_lock_init(&server->probe_lock); @@ -467,6 +468,7 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) afs_give_up_callbacks(net, server); + flush_work(&server->initcb_work); afs_put_server(net, server, afs_server_trace_destroy); } diff --git a/fs/afs/super.c b/fs/afs/super.c index e38bb1e7a4d2..d110def8aa8e 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -698,6 +698,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb) vnode->lock_state = AFS_VNODE_LOCK_NONE; init_rwsem(&vnode->rmdir_lock); + INIT_WORK(&vnode->cb_work, afs_invalidate_mmap_work); _leave(" = %p", &vnode->vfs_inode); return &vnode->vfs_inode; diff --git a/mm/memory.c b/mm/memory.c index 25fc46e87214..adf9b9ef8277 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3403,6 +3403,7 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, unmap_mapping_range_tree(&mapping->i_mmap, &details); i_mmap_unlock_write(mapping); } +EXPORT_SYMBOL_GPL(unmap_mapping_pages); /** * unmap_mapping_range - unmap the portion of all mmaps in the specified From 4fe6a946823a9bc8619fd16b7ea7d15914a30f22 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Sep 2021 21:51:01 +0100 Subject: [PATCH 057/543] afs: Try to avoid taking RCU read lock when checking vnode validity Try to avoid taking the RCU read lock when checking the validity of a vnode's callback state. The only thing it's needed for is to pin the parent volume's server list whilst we search it to find the record of the server we're currently using to see if it has been reinitialised (ie. it sent us a CB.InitCallBackState* RPC). Do this by the following means: (1) Keep an additional per-cell counter (fs_s_break) that's incremented each time any of the fileservers in the cell reinitialises. Since the new counter can be accessed without RCU from the vnode, we can check that first - and only if it differs, get the RCU read lock and check the volume's server list. (2) Replace afs_get_s_break_rcu() with afs_check_server_good() which now indicates whether the callback promise is still expected to be present on the server. This does the checks as described in (1). (3) Restructure afs_check_validity() to take account of the change in (2). We can also get rid of the valid variable and just use the need_clear variable with the addition of the afs_cb_break_no_promise reason. (4) afs_check_validity() probably shouldn't be altering vnode->cb_v_break and vnode->cb_s_break when it doesn't have cb_lock exclusively locked. Move the change to vnode->cb_v_break to __afs_break_callback(). Delegate the change to vnode->cb_s_break to afs_select_fileserver() and set vnode->cb_fs_s_break there also. (5) afs_validate() no longer needs to get the RCU read lock around its call to afs_check_validity() - and can skip the call entirely if we don't have a promise. Signed-off-by: David Howells Tested-by: Markus Suvanto cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/163111669583.283156.1397603105683094563.stgit@warthog.procyon.org.uk/ --- fs/afs/callback.c | 2 + fs/afs/inode.c | 88 +++++++++++++++++++------------------- fs/afs/internal.h | 2 + fs/afs/rotate.c | 1 + include/trace/events/afs.h | 8 +++- 5 files changed, 54 insertions(+), 47 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 1dd7543dbf9f..1b4d5809808d 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -60,6 +60,7 @@ void afs_init_callback_state(struct afs_server *server) rcu_read_lock(); do { server->cb_s_break++; + atomic_inc(&server->cell->fs_s_break); if (!list_empty(&server->cell->fs_open_mmaps)) queue_work(system_unbound_wq, &server->initcb_work); @@ -77,6 +78,7 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { vnode->cb_break++; + vnode->cb_v_break = vnode->volume->cb_v_break; afs_clear_permits(vnode); if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 80b6c8d967d5..126daf9969db 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -587,22 +587,32 @@ static void afs_zap_data(struct afs_vnode *vnode) } /* - * Get the server reinit counter for a vnode's current server. + * Check to see if we have a server currently serving this volume and that it + * hasn't been reinitialised or dropped from the list. */ -static bool afs_get_s_break_rcu(struct afs_vnode *vnode, unsigned int *_s_break) +static bool afs_check_server_good(struct afs_vnode *vnode) { - struct afs_server_list *slist = rcu_dereference(vnode->volume->servers); + struct afs_server_list *slist; struct afs_server *server; + bool good; int i; + if (vnode->cb_fs_s_break == atomic_read(&vnode->volume->cell->fs_s_break)) + return true; + + rcu_read_lock(); + + slist = rcu_dereference(vnode->volume->servers); for (i = 0; i < slist->nr_servers; i++) { server = slist->servers[i].server; if (server == vnode->cb_server) { - *_s_break = READ_ONCE(server->cb_s_break); - return true; + good = (vnode->cb_s_break == server->cb_s_break); + rcu_read_unlock(); + return good; } } + rcu_read_unlock(); return false; } @@ -611,57 +621,46 @@ static bool afs_get_s_break_rcu(struct afs_vnode *vnode, unsigned int *_s_break) */ bool afs_check_validity(struct afs_vnode *vnode) { - struct afs_volume *volume = vnode->volume; enum afs_cb_break_reason need_clear = afs_cb_break_no_break; time64_t now = ktime_get_real_seconds(); - bool valid; - unsigned int cb_break, cb_s_break, cb_v_break; + unsigned int cb_break; int seq = 0; do { read_seqbegin_or_lock(&vnode->cb_lock, &seq); - cb_v_break = READ_ONCE(volume->cb_v_break); cb_break = vnode->cb_break; - if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags) && - afs_get_s_break_rcu(vnode, &cb_s_break)) { - if (vnode->cb_s_break != cb_s_break || - vnode->cb_v_break != cb_v_break) { - vnode->cb_s_break = cb_s_break; - vnode->cb_v_break = cb_v_break; - need_clear = afs_cb_break_for_vsbreak; - valid = false; - } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + if (vnode->cb_v_break != vnode->volume->cb_v_break) + need_clear = afs_cb_break_for_v_break; + else if (!afs_check_server_good(vnode)) + need_clear = afs_cb_break_for_s_reinit; + else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) need_clear = afs_cb_break_for_zap; - valid = false; - } else if (vnode->cb_expires_at - 10 <= now) { + else if (vnode->cb_expires_at - 10 <= now) need_clear = afs_cb_break_for_lapsed; - valid = false; - } else { - valid = true; - } } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { - valid = true; + ; } else { - vnode->cb_v_break = cb_v_break; - valid = false; + need_clear = afs_cb_break_no_promise; } } while (need_seqretry(&vnode->cb_lock, seq)); done_seqretry(&vnode->cb_lock, seq); - if (need_clear != afs_cb_break_no_break) { - write_seqlock(&vnode->cb_lock); - if (cb_break == vnode->cb_break) - __afs_break_callback(vnode, need_clear); - else - trace_afs_cb_miss(&vnode->fid, need_clear); - write_sequnlock(&vnode->cb_lock); - valid = false; - } + if (need_clear == afs_cb_break_no_break) + return true; - return valid; + write_seqlock(&vnode->cb_lock); + if (need_clear == afs_cb_break_no_promise) + vnode->cb_v_break = vnode->volume->cb_v_break; + else if (cb_break == vnode->cb_break) + __afs_break_callback(vnode, need_clear); + else + trace_afs_cb_miss(&vnode->fid, need_clear); + write_sequnlock(&vnode->cb_lock); + return false; } /* @@ -675,21 +674,20 @@ bool afs_check_validity(struct afs_vnode *vnode) */ int afs_validate(struct afs_vnode *vnode, struct key *key) { - bool valid; int ret; _enter("{v={%llx:%llu} fl=%lx},%x", vnode->fid.vid, vnode->fid.vnode, vnode->flags, key_serial(key)); - rcu_read_lock(); - valid = afs_check_validity(vnode); - rcu_read_unlock(); + if (unlikely(test_bit(AFS_VNODE_DELETED, &vnode->flags))) { + if (vnode->vfs_inode.i_nlink) + clear_nlink(&vnode->vfs_inode); + goto valid; + } - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - clear_nlink(&vnode->vfs_inode); - - if (valid) + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags) && + afs_check_validity(vnode)) goto valid; down_write(&vnode->validate_lock); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 0deeb76c67d0..c97618855b46 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -392,6 +392,7 @@ struct afs_cell { seqlock_t fs_lock; /* For fs_servers */ struct rw_semaphore fs_open_mmaps_lock; struct list_head fs_open_mmaps; /* List of vnodes that are mmapped */ + atomic_t fs_s_break; /* Counter of CB.InitCallBackState messages */ /* VL server list. */ rwlock_t vl_servers_lock; /* Lock on vl_servers */ @@ -664,6 +665,7 @@ struct afs_vnode { struct list_head cb_mmap_link; /* Link in cell->fs_open_mmaps */ void *cb_server; /* Server with callback/filelock */ atomic_t cb_nr_mmap; /* Number of mmaps */ + unsigned int cb_fs_s_break; /* Mass server break counter (cell->fs_s_break) */ unsigned int cb_s_break; /* Mass break counter on ->server */ unsigned int cb_v_break; /* Mass break counter on ->volume */ unsigned int cb_break; /* Break counter on vnode */ diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index d83f13c44b92..79e1a5f6701b 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -374,6 +374,7 @@ selected_server: if (vnode->cb_server != server) { vnode->cb_server = server; vnode->cb_s_break = server->cb_s_break; + vnode->cb_fs_s_break = atomic_read(&server->cell->fs_s_break); vnode->cb_v_break = vnode->volume->cb_v_break; clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 9f73ed2cf061..bca73e8c8cde 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -306,11 +306,13 @@ enum afs_flock_operation { enum afs_cb_break_reason { afs_cb_break_no_break, + afs_cb_break_no_promise, afs_cb_break_for_callback, afs_cb_break_for_deleted, afs_cb_break_for_lapsed, + afs_cb_break_for_s_reinit, afs_cb_break_for_unlink, - afs_cb_break_for_vsbreak, + afs_cb_break_for_v_break, afs_cb_break_for_volume_callback, afs_cb_break_for_zap, }; @@ -602,11 +604,13 @@ enum afs_cb_break_reason { #define afs_cb_break_reasons \ EM(afs_cb_break_no_break, "no-break") \ + EM(afs_cb_break_no_promise, "no-promise") \ EM(afs_cb_break_for_callback, "break-cb") \ EM(afs_cb_break_for_deleted, "break-del") \ EM(afs_cb_break_for_lapsed, "break-lapsed") \ + EM(afs_cb_break_for_s_reinit, "s-reinit") \ EM(afs_cb_break_for_unlink, "break-unlink") \ - EM(afs_cb_break_for_vsbreak, "break-vs") \ + EM(afs_cb_break_for_v_break, "break-v") \ EM(afs_cb_break_for_volume_callback, "break-v-cb") \ E_(afs_cb_break_for_zap, "break-zap") From b537a3c21775075395af475dcc6ef212fcf29db8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 10 Sep 2021 00:01:52 +0100 Subject: [PATCH 058/543] afs: Fix corruption in reads at fpos 2G-4G from an OpenAFS server AFS-3 has two data fetch RPC variants, FS.FetchData and FS.FetchData64, and Linux's afs client switches between them when talking to a non-YFS server if the read size, the file position or the sum of the two have the upper 32 bits set of the 64-bit value. This is a problem, however, since the file position and length fields of FS.FetchData are *signed* 32-bit values. Fix this by capturing the capability bits obtained from the fileserver when it's sent an FS.GetCapabilities RPC, rather than just discarding them, and then picking out the VICED_CAPABILITY_64BITFILES flag. This can then be used to decide whether to use FS.FetchData or FS.FetchData64 - and also FS.StoreData or FS.StoreData64 - rather than using upper_32_bits() to switch on the parameter values. This capabilities flag could also be used to limit the maximum size of the file, but all servers must be checked for that. Note that the issue does not exist with FS.StoreData - that uses *unsigned* 32-bit values. It's also not a problem with Auristor servers as its YFS.FetchData64 op uses unsigned 64-bit values. This can be tested by cloning a git repo through an OpenAFS client to an OpenAFS server and then doing "git status" on it from a Linux afs client[1]. Provided the clone has a pack file that's in the 2G-4G range, the git status will show errors like: error: packfile .git/objects/pack/pack-5e813c51d12b6847bbc0fcd97c2bca66da50079c.pack does not match index error: packfile .git/objects/pack/pack-5e813c51d12b6847bbc0fcd97c2bca66da50079c.pack does not match index This can be observed in the server's FileLog with something like the following appearing: Sun Aug 29 19:31:39 2021 SRXAFS_FetchData, Fid = 2303380852.491776.3263114, Host 192.168.11.201:7001, Id 1001 Sun Aug 29 19:31:39 2021 CheckRights: len=0, for host=192.168.11.201:7001 Sun Aug 29 19:31:39 2021 FetchData_RXStyle: Pos 18446744071815340032, Len 3154 Sun Aug 29 19:31:39 2021 FetchData_RXStyle: file size 2400758866 ... Sun Aug 29 19:31:40 2021 SRXAFS_FetchData returns 5 Note the file position of 18446744071815340032. This is the requested file position sign-extended. Fixes: b9b1f8d5930a ("AFS: write support fixes") Reported-by: Markus Suvanto Signed-off-by: David Howells Reviewed-by: Marc Dionne Tested-by: Markus Suvanto cc: linux-afs@lists.infradead.org cc: openafs-devel@openafs.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=214217#c9 [1] Link: https://lore.kernel.org/r/951332.1631308745@warthog.procyon.org.uk/ --- fs/afs/fs_probe.c | 8 +++++++- fs/afs/fsclient.c | 31 ++++++++++++++++++++----------- fs/afs/internal.h | 1 + fs/afs/protocol_afs.h | 15 +++++++++++++++ fs/afs/protocol_yfs.h | 6 ++++++ 5 files changed, 49 insertions(+), 12 deletions(-) create mode 100644 fs/afs/protocol_afs.h diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index e7e98ad63a91..c0031a3ab42f 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -9,6 +9,7 @@ #include #include "afs_fs.h" #include "internal.h" +#include "protocol_afs.h" #include "protocol_yfs.h" static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ; @@ -102,7 +103,7 @@ void afs_fileserver_probe_result(struct afs_call *call) struct afs_addr_list *alist = call->alist; struct afs_server *server = call->server; unsigned int index = call->addr_ix; - unsigned int rtt_us = 0; + unsigned int rtt_us = 0, cap0; int ret = call->error; _enter("%pU,%u", &server->uuid, index); @@ -159,6 +160,11 @@ responded: clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags); alist->addrs[index].srx_service = call->service_id; } + cap0 = ntohl(call->tmp); + if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES) + set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); + else + clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); } if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index dd3f45d906d2..4943413d9c5f 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -456,9 +456,7 @@ void afs_fs_fetch_data(struct afs_operation *op) struct afs_read *req = op->fetch.req; __be32 *bp; - if (upper_32_bits(req->pos) || - upper_32_bits(req->len) || - upper_32_bits(req->pos + req->len)) + if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags)) return afs_fs_fetch_data64(op); _enter(""); @@ -1113,9 +1111,7 @@ void afs_fs_store_data(struct afs_operation *op) (unsigned long long)op->store.pos, (unsigned long long)op->store.i_size); - if (upper_32_bits(op->store.pos) || - upper_32_bits(op->store.size) || - upper_32_bits(op->store.i_size)) + if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags)) return afs_fs_store_data64(op); call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData, @@ -1229,7 +1225,7 @@ static void afs_fs_setattr_size(struct afs_operation *op) key_serial(op->key), vp->fid.vid, vp->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); - if (upper_32_bits(attr->ia_size)) + if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags)) return afs_fs_setattr_size64(op); call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData_as_Status, @@ -1657,20 +1653,33 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call) return ret; count = ntohl(call->tmp); - call->count = count; call->count2 = count; - afs_extract_discard(call, count * sizeof(__be32)); + if (count == 0) { + call->unmarshall = 4; + call->tmp = 0; + break; + } + + /* Extract the first word of the capabilities to call->tmp */ + afs_extract_to_tmp(call); call->unmarshall++; fallthrough; - /* Extract capabilities words */ case 2: ret = afs_extract_data(call, false); if (ret < 0) return ret; - /* TODO: Examine capabilities */ + afs_extract_discard(call, (count - 1) * sizeof(__be32)); + call->unmarshall++; + fallthrough; + + /* Extract remaining capabilities words */ + case 3: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; call->unmarshall++; break; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index c97618855b46..b0fe27ae60db 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -520,6 +520,7 @@ struct afs_server { #define AFS_SERVER_FL_IS_YFS 16 /* Server is YFS not AFS */ #define AFS_SERVER_FL_NO_IBULK 17 /* Fileserver doesn't support FS.InlineBulkStatus */ #define AFS_SERVER_FL_NO_RM2 18 /* Fileserver doesn't support YFS.RemoveFile2 */ +#define AFS_SERVER_FL_HAS_FS64 19 /* Fileserver supports FS.{Fetch,Store}Data64 */ atomic_t ref; /* Object refcount */ atomic_t active; /* Active user count */ u32 addr_version; /* Address list version */ diff --git a/fs/afs/protocol_afs.h b/fs/afs/protocol_afs.h new file mode 100644 index 000000000000..0c39358c8b70 --- /dev/null +++ b/fs/afs/protocol_afs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* AFS protocol bits + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + + +#define AFSCAPABILITIESMAX 196 /* Maximum number of words in a capability set */ + +/* AFS3 Fileserver capabilities word 0 */ +#define AFS3_VICED_CAPABILITY_ERRORTRANS 0x0001 /* Uses UAE errors */ +#define AFS3_VICED_CAPABILITY_64BITFILES 0x0002 /* FetchData64 & StoreData64 supported */ +#define AFS3_VICED_CAPABILITY_WRITELOCKACL 0x0004 /* Can lock a file even without lock perm */ +#define AFS3_VICED_CAPABILITY_SANEACLS 0x0008 /* ACLs reviewed for sanity - don't use */ diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h index b5bd03b1d3c7..e4cd89c44c46 100644 --- a/fs/afs/protocol_yfs.h +++ b/fs/afs/protocol_yfs.h @@ -168,3 +168,9 @@ enum yfs_lock_type { yfs_LockMandatoryWrite = 0x101, yfs_LockMandatoryExtend = 0x102, }; + +/* RXYFS Viced Capability Flags */ +#define YFS_VICED_CAPABILITY_ERRORTRANS 0x0001 /* Deprecated v0.195 */ +#define YFS_VICED_CAPABILITY_64BITFILES 0x0002 /* Deprecated v0.195 */ +#define YFS_VICED_CAPABILITY_WRITELOCKACL 0x0004 /* Can lock a file even without lock perm */ +#define YFS_VICED_CAPABILITY_SANEACLS 0x0008 /* Deprecated v0.195 */ From 9d37e1cab2a9d2cee2737973fa455e6f89eee46a Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 Sep 2021 21:55:19 +0100 Subject: [PATCH 059/543] afs: Fix updating of i_blocks on file/dir extension When an afs file or directory is modified locally such that the total file size is extended, i_blocks needs to be recalculated too. Fix this by making afs_write_end() and afs_edit_dir_add() call afs_set_i_size() rather than setting inode->i_size directly as that also recalculates inode->i_blocks. This can be tested by creating and writing into directories and files and then examining them with du. Without this change, directories show a 4 blocks (they start out at 2048 bytes) and files show 0 blocks; with this change, they should show a number of blocks proportional to the file size rounded up to 1024. Fixes: 31143d5d515e ("AFS: implement basic file write support") Fixes: 63a4681ff39c ("afs: Locally edit directory data for mkdir/create/unlink/...") Reported-by: Markus Suvanto Signed-off-by: David Howells Reviewed-by: Marc Dionne Tested-by: Markus Suvanto cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/163113612442.352844.11162345591911691150.stgit@warthog.procyon.org.uk/ --- fs/afs/dir_edit.c | 4 ++-- fs/afs/inode.c | 10 ---------- fs/afs/internal.h | 10 ++++++++++ fs/afs/write.c | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c index f4600c1353ad..540b9fc96824 100644 --- a/fs/afs/dir_edit.c +++ b/fs/afs/dir_edit.c @@ -263,7 +263,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode, if (b == nr_blocks) { _debug("init %u", b); afs_edit_init_block(meta, block, b); - i_size_write(&vnode->vfs_inode, (b + 1) * AFS_DIR_BLOCK_SIZE); + afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE); } /* Only lower dir pages have a counter in the header. */ @@ -296,7 +296,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode, new_directory: afs_edit_init_block(meta, meta, 0); i_size = AFS_DIR_BLOCK_SIZE; - i_size_write(&vnode->vfs_inode, i_size); + afs_set_i_size(vnode, i_size); slot = AFS_DIR_RESV_BLOCKS0; page = page0; block = meta; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 126daf9969db..8fcffea2daf5 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -53,16 +53,6 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren dump_stack(); } -/* - * Set the file size and block count. Estimate the number of 512 bytes blocks - * used, rounded up to nearest 1K for consistency with other AFS clients. - */ -static void afs_set_i_size(struct afs_vnode *vnode, u64 size) -{ - i_size_write(&vnode->vfs_inode, size); - vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1; -} - /* * Initialise an inode from the vnode status. */ diff --git a/fs/afs/internal.h b/fs/afs/internal.h index b0fe27ae60db..0ad97a8fc0d4 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -1596,6 +1596,16 @@ static inline void afs_update_dentry_version(struct afs_operation *op, (void *)(unsigned long)dir_vp->scb.status.data_version; } +/* + * Set the file size and block count. Estimate the number of 512 bytes blocks + * used, rounded up to nearest 1K for consistency with other AFS clients. + */ +static inline void afs_set_i_size(struct afs_vnode *vnode, u64 size) +{ + i_size_write(&vnode->vfs_inode, size); + vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1; +} + /* * Check for a conflicting operation on a directory that we just unlinked from. * If someone managed to sneak a link or an unlink in on the file we just diff --git a/fs/afs/write.c b/fs/afs/write.c index 32a764c24284..2dfe3b3a53d6 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -137,7 +137,7 @@ int afs_write_end(struct file *file, struct address_space *mapping, write_seqlock(&vnode->cb_lock); i_size = i_size_read(&vnode->vfs_inode); if (maybe_i_size > i_size) - i_size_write(&vnode->vfs_inode, maybe_i_size); + afs_set_i_size(vnode, maybe_i_size); write_sequnlock(&vnode->cb_lock); } From 7bb057134d609b9c038a00b6876cf0d37d0118ce Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Fri, 3 Sep 2021 14:39:13 +0200 Subject: [PATCH 060/543] USB: serial: option: add Telit LN920 compositions This patch adds the following Telit LN920 compositions: 0x1060: tty, adb, rmnet, tty, tty, tty, tty 0x1061: tty, adb, mbim, tty, tty, tty, tty 0x1062: rndis, tty, adb, tty, tty, tty, tty 0x1063: tty, adb, ecm, tty, tty, tty, tty Signed-off-by: Carlo Lobrano Link: https://lore.kernel.org/r/20210903123913.1086513-1-c.lobrano@gmail.com Reviewed-by: Daniele Palmas Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 29c765cc8495..a79f51e35115 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1205,6 +1205,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1056, 0xff), /* Telit FD980 */ .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1060, 0xff), /* Telit LN920 (rmnet) */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1061, 0xff), /* Telit LN920 (MBIM) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1062, 0xff), /* Telit LN920 (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1063, 0xff), /* Telit LN920 (ECM) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), From 349bff48ae0f5f8aa2075d0bdc2091a30bd634f6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 27 Aug 2021 17:53:10 +0300 Subject: [PATCH 061/543] platform/x86/intel: punit_ipc: Drop wrong use of ACPI_PTR() ACPI_PTR() is more harmful than helpful. For example, in this case if CONFIG_ACPI=n, the ID table left unused which is not what we want. Instead of adding ifdeffery here and there, drop ACPI_PTR() and unused acpi.h. Fixes: fdca4f16f57d ("platform:x86: add Intel P-Unit mailbox IPC driver") Reported-by: kernel test robot Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210827145310.76239-1-andriy.shevchenko@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/punit_ipc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/punit_ipc.c b/drivers/platform/x86/intel/punit_ipc.c index f58b8543f6ac..66bb39fd0ef9 100644 --- a/drivers/platform/x86/intel/punit_ipc.c +++ b/drivers/platform/x86/intel/punit_ipc.c @@ -8,7 +8,6 @@ * which provide mailbox interface for power management usage. */ -#include #include #include #include @@ -319,7 +318,7 @@ static struct platform_driver intel_punit_ipc_driver = { .remove = intel_punit_ipc_remove, .driver = { .name = "intel_punit_ipc", - .acpi_match_table = ACPI_PTR(punit_ipc_acpi_ids), + .acpi_match_table = punit_ipc_acpi_ids, }, }; From 4c4a3d7cffb42da21ea8891fc7e6808ae05dbcb5 Mon Sep 17 00:00:00 2001 From: Matan Ziv-Av Date: Sun, 29 Aug 2021 15:07:08 +0300 Subject: [PATCH 062/543] lg-laptop: Correctly handle dmi_get_system_info() returning NULL The laptop model is identified by parsing the product name. If no product name is available, do not try to parse it. Default model is 2017. Signed-off-by: Matan Ziv-Av Link: https://lore.kernel.org/r/93ff3bb-503b-f73-bf18-87bae1699ed@svgalib.org Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/lg-laptop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c index 3e520d5bca07..88b551caeaaf 100644 --- a/drivers/platform/x86/lg-laptop.c +++ b/drivers/platform/x86/lg-laptop.c @@ -655,7 +655,7 @@ static int acpi_add(struct acpi_device *device) goto out_platform_registered; } product = dmi_get_system_info(DMI_PRODUCT_NAME); - if (strlen(product) > 4) + if (product && strlen(product) > 4) switch (product[4]) { case '5': case '6': From 3bf1669b0e033c885ebcb1ddc2334088dd125f2d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 5 Sep 2021 15:02:09 +0200 Subject: [PATCH 063/543] platform/x86: touchscreen_dmi: Add info for the Chuwi HiBook (CWI514) tablet Add touchscreen info for the Chuwi HiBook (CWI514) tablet. This includes info for getting the firmware directly from the UEFI, so that the user does not need to manually install the firmware in /lib/firmware/silead. This change will make the touchscreen on these devices work OOTB, without requiring any manual setup. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210905130210.32810-1-hdegoede@redhat.com --- drivers/platform/x86/touchscreen_dmi.c | 37 ++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 0e1451b1d9c6..1f9cb756b103 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -141,6 +141,33 @@ static const struct ts_dmi_data chuwi_hi10_pro_data = { .properties = chuwi_hi10_pro_props, }; +static const struct property_entry chuwi_hibook_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 30), + PROPERTY_ENTRY_U32("touchscreen-min-y", 4), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1892), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1276), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-chuwi-hibook.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + { } +}; + +static const struct ts_dmi_data chuwi_hibook_data = { + .embedded_fw = { + .name = "silead/gsl1680-chuwi-hibook.fw", + .prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 }, + .length = 40392, + .sha256 = { 0xf7, 0xc0, 0xe8, 0x5a, 0x6c, 0xf2, 0xeb, 0x8d, + 0x12, 0xc4, 0x45, 0xbf, 0x55, 0x13, 0x4c, 0x1a, + 0x13, 0x04, 0x31, 0x08, 0x65, 0x73, 0xf7, 0xa8, + 0x1b, 0x7d, 0x59, 0xc9, 0xe6, 0x97, 0xf7, 0x38 }, + }, + .acpi_name = "MSSL0017:00", + .properties = chuwi_hibook_props, +}; + static const struct property_entry chuwi_vi8_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-x", 4), PROPERTY_ENTRY_U32("touchscreen-min-y", 6), @@ -979,6 +1006,16 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), }, }, + { + /* Chuwi HiBook (CWI514) */ + .driver_data = (void *)&chuwi_hibook_data, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"), + DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), + /* Above matches are too generic, add bios-date match */ + DMI_MATCH(DMI_BIOS_DATE, "05/07/2016"), + }, + }, { /* Chuwi Vi8 (CWI506) */ .driver_data = (void *)&chuwi_vi8_data, From 196159d278ae3b49e7bbb7c76822e6008fd89b97 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 5 Sep 2021 15:02:10 +0200 Subject: [PATCH 064/543] platform/x86: touchscreen_dmi: Update info for the Chuwi Hi10 Plus (CWI527) tablet Add info for getting the firmware directly from the UEFI for the Chuwi Hi10 Plus (CWI527), so that the user does not need to manually install the firmware in /lib/firmware/silead. This change will make the touchscreen on these devices work OOTB, without requiring any manual setup. Also tweak the min and width/height values a bit for more accurate position reporting. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210905130210.32810-2-hdegoede@redhat.com --- drivers/platform/x86/touchscreen_dmi.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 1f9cb756b103..033f797861d8 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -100,10 +100,10 @@ static const struct ts_dmi_data chuwi_hi10_air_data = { }; static const struct property_entry chuwi_hi10_plus_props[] = { - PROPERTY_ENTRY_U32("touchscreen-min-x", 0), - PROPERTY_ENTRY_U32("touchscreen-min-y", 5), - PROPERTY_ENTRY_U32("touchscreen-size-x", 1914), - PROPERTY_ENTRY_U32("touchscreen-size-y", 1283), + PROPERTY_ENTRY_U32("touchscreen-min-x", 12), + PROPERTY_ENTRY_U32("touchscreen-min-y", 10), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1908), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1270), PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-chuwi-hi10plus.fw"), PROPERTY_ENTRY_U32("silead,max-fingers", 10), PROPERTY_ENTRY_BOOL("silead,home-button"), @@ -111,6 +111,15 @@ static const struct property_entry chuwi_hi10_plus_props[] = { }; static const struct ts_dmi_data chuwi_hi10_plus_data = { + .embedded_fw = { + .name = "silead/gsl1680-chuwi-hi10plus.fw", + .prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 }, + .length = 34056, + .sha256 = { 0xfd, 0x0a, 0x08, 0x08, 0x3c, 0xa6, 0x34, 0x4e, + 0x2c, 0x49, 0x9c, 0xcd, 0x7d, 0x44, 0x9d, 0x38, + 0x10, 0x68, 0xb5, 0xbd, 0xb7, 0x2a, 0x63, 0xb5, + 0x67, 0x0b, 0x96, 0xbd, 0x89, 0x67, 0x85, 0x09 }, + }, .acpi_name = "MSSL0017:00", .properties = chuwi_hi10_plus_props, }; From b1a89856fbf63fffde6a4771d8f1ac21df549e50 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 6 Sep 2021 23:07:29 -0700 Subject: [PATCH 065/543] m68k: Double cast io functions to unsigned long m68k builds fail widely with errors such as arch/m68k/include/asm/raw_io.h:20:19: error: cast to pointer from integer of different size arch/m68k/include/asm/raw_io.h:30:32: error: cast to pointer from integer of different size [-Werror=int-to-p On m68k, io functions are defined as macros. The problem is seen if the macro parameter variable size differs from the size of a pointer. Cast the parameter of all io macros to unsigned long before casting it to a pointer to fix the problem. Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20210907060729.2391992-1-linux@roeck-us.net Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/raw_io.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/m68k/include/asm/raw_io.h b/arch/m68k/include/asm/raw_io.h index 911826ea83ce..80eb2396d01e 100644 --- a/arch/m68k/include/asm/raw_io.h +++ b/arch/m68k/include/asm/raw_io.h @@ -17,21 +17,21 @@ * two accesses to memory, which may be undesirable for some devices. */ #define in_8(addr) \ - ({ u8 __v = (*(__force volatile u8 *) (addr)); __v; }) + ({ u8 __v = (*(__force volatile u8 *) (unsigned long)(addr)); __v; }) #define in_be16(addr) \ - ({ u16 __v = (*(__force volatile u16 *) (addr)); __v; }) + ({ u16 __v = (*(__force volatile u16 *) (unsigned long)(addr)); __v; }) #define in_be32(addr) \ - ({ u32 __v = (*(__force volatile u32 *) (addr)); __v; }) + ({ u32 __v = (*(__force volatile u32 *) (unsigned long)(addr)); __v; }) #define in_le16(addr) \ - ({ u16 __v = le16_to_cpu(*(__force volatile __le16 *) (addr)); __v; }) + ({ u16 __v = le16_to_cpu(*(__force volatile __le16 *) (unsigned long)(addr)); __v; }) #define in_le32(addr) \ - ({ u32 __v = le32_to_cpu(*(__force volatile __le32 *) (addr)); __v; }) + ({ u32 __v = le32_to_cpu(*(__force volatile __le32 *) (unsigned long)(addr)); __v; }) -#define out_8(addr,b) (void)((*(__force volatile u8 *) (addr)) = (b)) -#define out_be16(addr,w) (void)((*(__force volatile u16 *) (addr)) = (w)) -#define out_be32(addr,l) (void)((*(__force volatile u32 *) (addr)) = (l)) -#define out_le16(addr,w) (void)((*(__force volatile __le16 *) (addr)) = cpu_to_le16(w)) -#define out_le32(addr,l) (void)((*(__force volatile __le32 *) (addr)) = cpu_to_le32(l)) +#define out_8(addr,b) (void)((*(__force volatile u8 *) (unsigned long)(addr)) = (b)) +#define out_be16(addr,w) (void)((*(__force volatile u16 *) (unsigned long)(addr)) = (w)) +#define out_be32(addr,l) (void)((*(__force volatile u32 *) (unsigned long)(addr)) = (l)) +#define out_le16(addr,w) (void)((*(__force volatile __le16 *) (unsigned long)(addr)) = cpu_to_le16(w)) +#define out_le32(addr,l) (void)((*(__force volatile __le32 *) (unsigned long)(addr)) = cpu_to_le32(l)) #define raw_inb in_8 #define raw_inw in_be16 From a7b68ed15d1fd72c1e451d5eb6edebee2a624b90 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 7 Sep 2021 14:45:11 +0200 Subject: [PATCH 066/543] m68k: mvme: Remove overdue #warnings in RTC handling The warnings were introduced when converting the MVME147 and MVME16x RTC handling from gettod to hwclk. Replace the #warning by a comment, and return an error to inform the upper layer that writing to the RTC is not yet supported. Signed-off-by: Geert Uytterhoeven Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20210907124511.2723414-1-geert@linux-m68k.org --- arch/m68k/mvme147/config.c | 4 +++- arch/m68k/mvme16x/config.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index e1e90c49a496..dfd6202fd403 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -171,7 +171,6 @@ static int bcd2int (unsigned char b) int mvme147_hwclk(int op, struct rtc_time *t) { -#warning check me! if (!op) { m147_rtc->ctrl = RTC_READ; t->tm_year = bcd2int (m147_rtc->bcd_year); @@ -183,6 +182,9 @@ int mvme147_hwclk(int op, struct rtc_time *t) m147_rtc->ctrl = 0; if (t->tm_year < 70) t->tm_year += 100; + } else { + /* FIXME Setting the time is not yet supported */ + return -EOPNOTSUPP; } return 0; } diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index b59593c7cfb9..b4422c2dfbbf 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -436,7 +436,6 @@ int bcd2int (unsigned char b) int mvme16x_hwclk(int op, struct rtc_time *t) { -#warning check me! if (!op) { rtc->ctrl = RTC_READ; t->tm_year = bcd2int (rtc->bcd_year); @@ -448,6 +447,9 @@ int mvme16x_hwclk(int op, struct rtc_time *t) rtc->ctrl = 0; if (t->tm_year < 70) t->tm_year += 100; + } else { + /* FIXME Setting the time is not yet supported */ + return -EOPNOTSUPP; } return 0; } From eca4cf12acda306f851f6d2a05b1c9ef62cf0e81 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 12 Sep 2021 12:34:47 -0400 Subject: [PATCH 067/543] bnxt_en: Fix error recovery regression The recent patch has introduced a regression by not reading the reset count in the ERROR_RECOVERY async event handler. We may have just gone through a reset and the reset count has just incremented. If we don't update the reset count in the ERROR_RECOVERY event handler, the health check timer will see that the reset count has changed and will initiate an unintended reset. Restore the unconditional update of the reset count in bnxt_async_event_process() if error recovery watchdog is enabled. Also, update the reset count at the end of the reset sequence to make it even more robust. Fixes: 1b2b91831983 ("bnxt_en: Fix possible unintended driver initiated error recovery") Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 9b86516e59a1..8b0a2ae1367c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2213,12 +2213,11 @@ static int bnxt_async_event_process(struct bnxt *bp, DIV_ROUND_UP(fw_health->polling_dsecs * HZ, bp->current_interval * 10); fw_health->tmr_counter = fw_health->tmr_multiplier; - if (!fw_health->enabled) { + if (!fw_health->enabled) fw_health->last_fw_heartbeat = bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG); - fw_health->last_fw_reset_cnt = - bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); - } + fw_health->last_fw_reset_cnt = + bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); netif_info(bp, drv, bp->dev, "Error recovery info: error recovery[1], master[%d], reset count[%u], health status: 0x%x\n", fw_health->master, fw_health->last_fw_reset_cnt, @@ -12207,6 +12206,11 @@ static void bnxt_fw_reset_task(struct work_struct *work) return; } + if ((bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) && + bp->fw_health->enabled) { + bp->fw_health->last_fw_reset_cnt = + bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); + } bp->fw_reset_state = 0; /* Make sure fw_reset_state is 0 before clearing the flag */ smp_mb__before_atomic(); From 1affc01fdc6035189a5ab2a24948c9419ee0ecf2 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Sun, 12 Sep 2021 12:34:48 -0400 Subject: [PATCH 068/543] bnxt_en: make bnxt_free_skbs() safe to call after bnxt_free_mem() The call to bnxt_free_mem(..., false) in the bnxt_half_open_nic() error path will deallocate ring descriptor memory via bnxt_free_?x_rings(), but because irq_re_init is false, the ring info itself is not freed. To simplify error paths, deallocation functions have generally been written to be safe when called on unallocated memory. It should always be safe to call dev_close(), which calls bnxt_free_skbs() a second time, even in this semi- allocated ring state. Calling bnxt_free_skbs() a second time with the rings already freed will cause NULL pointer dereference. Fix it by checking the rings are valid before proceeding in bnxt_free_tx_skbs() and bnxt_free_one_rx_ring_skbs(). Fixes: 975bc99a4a39 ("bnxt_en: Refactor bnxt_free_rx_skbs().") Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8b0a2ae1367c..9f9806f1c0fc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2729,6 +2729,9 @@ static void bnxt_free_tx_skbs(struct bnxt *bp) struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; int j; + if (!txr->tx_buf_ring) + continue; + for (j = 0; j < max_idx;) { struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[j]; struct sk_buff *skb; @@ -2813,6 +2816,9 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr) } skip_rx_tpa_free: + if (!rxr->rx_buf_ring) + goto skip_rx_buf_free; + for (i = 0; i < max_idx; i++) { struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[i]; dma_addr_t mapping = rx_buf->mapping; @@ -2835,6 +2841,11 @@ skip_rx_tpa_free: kfree(data); } } + +skip_rx_buf_free: + if (!rxr->rx_agg_ring) + goto skip_rx_agg_free; + for (i = 0; i < max_agg_idx; i++) { struct bnxt_sw_rx_agg_bd *rx_agg_buf = &rxr->rx_agg_ring[i]; struct page *page = rx_agg_buf->page; @@ -2851,6 +2862,8 @@ skip_rx_tpa_free: __free_page(page); } + +skip_rx_agg_free: if (rxr->rx_page) { __free_page(rxr->rx_page); rxr->rx_page = NULL; From 985941e1dd5e996311c29688ca0d3aa1ff8eb0b6 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 12 Sep 2021 12:34:49 -0400 Subject: [PATCH 069/543] bnxt_en: Clean up completion ring page arrays completely We recently changed the completion ring page arrays to be dynamically allocated to better support the expanded range of ring depths. The cleanup path for this was not quite complete. It might cause the shutdown path to crash if we need to abort before the completion ring arrays have been allocated and initialized. Fix it by initializing the ring_mem->pg_arr to NULL after freeing the completion ring page array. Add a check in bnxt_free_ring() to skip referencing the rmem->pg_arr if it is NULL. Fixes: 03c7448790b8 ("bnxt_en: Don't use static arrays for completion ring pages") Reviewed-by: Andy Gospodarek Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 9f9806f1c0fc..f32431a7e5a6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2912,6 +2912,9 @@ static void bnxt_free_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem) struct pci_dev *pdev = bp->pdev; int i; + if (!rmem->pg_arr) + goto skip_pages; + for (i = 0; i < rmem->nr_pages; i++) { if (!rmem->pg_arr[i]) continue; @@ -2921,6 +2924,7 @@ static void bnxt_free_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem) rmem->pg_arr[i] = NULL; } +skip_pages: if (rmem->pg_tbl) { size_t pg_tbl_size = rmem->nr_pages * 8; @@ -3240,10 +3244,14 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp) static void bnxt_free_cp_arrays(struct bnxt_cp_ring_info *cpr) { + struct bnxt_ring_struct *ring = &cpr->cp_ring_struct; + kfree(cpr->cp_desc_ring); cpr->cp_desc_ring = NULL; + ring->ring_mem.pg_arr = NULL; kfree(cpr->cp_desc_mapping); cpr->cp_desc_mapping = NULL; + ring->ring_mem.dma_arr = NULL; } static int bnxt_alloc_cp_arrays(struct bnxt_cp_ring_info *cpr, int n) From d7807a9adf4856171f8441f13078c33941df48ab Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Mon, 13 Sep 2021 12:04:42 +0800 Subject: [PATCH 070/543] Revert "ipv4: fix memory leaks in ip_cmsg_send() callers" This reverts commit 919483096bfe75dda338e98d56da91a263746a0a. There is only when ip_options_get() return zero need to free. It already called kfree() when return error. Fixes: 919483096bfe ("ipv4: fix memory leaks in ip_cmsg_send() callers") Signed-off-by: Yajun Deng Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/ping.c | 5 ++--- net/ipv4/raw.c | 5 ++--- net/ipv4/udp.c | 5 ++--- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b297bb28556e..7cef9987ab4a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -279,7 +279,7 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, case IP_RETOPTS: err = cmsg->cmsg_len - sizeof(struct cmsghdr); - /* Our caller is responsible for freeing ipc->opt */ + /* Our caller is responsible for freeing ipc->opt when err = 0 */ err = ip_options_get(net, &ipc->opt, KERNEL_SOCKPTR(CMSG_DATA(cmsg)), err < 40 ? err : 40); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 1e44a43acfe2..c588f9f2f46c 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -727,10 +727,9 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sk, msg, &ipc, false); - if (unlikely(err)) { - kfree(ipc.opt); + if (unlikely(err)) return err; - } + if (ipc.opt) free = 1; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bb446e60cf58..1c98063a3ae8 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -562,10 +562,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sk, msg, &ipc, false); - if (unlikely(err)) { - kfree(ipc.opt); + if (unlikely(err)) goto out; - } + if (ipc.opt) free = 1; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8851c9463b4b..d5f5981d7a43 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1122,10 +1122,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (err > 0) err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); - if (unlikely(err < 0)) { - kfree(ipc.opt); + if (unlikely(err < 0)) return err; - } + if (ipc.opt) free = 1; connected = 0; From e50e711351bdc656a8e6ca1022b4293cae8dcd59 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 13 Sep 2021 10:53:49 +0300 Subject: [PATCH 071/543] udp_tunnel: Fix udp_tunnel_nic work-queue type Turn udp_tunnel_nic work-queue to an ordered work-queue. This queue holds the UDP-tunnel configuration commands of the different netdevs. When the netdevs are functions of the same NIC the order of execution may be crucial. Problem example: NIC with 2 PFs, both PFs declare offload quota of up to 3 UDP-ports. $ifconfig eth2 1.1.1.1/16 up $ip link add eth2_19503 type vxlan id 5049 remote 1.1.1.2 dev eth2 dstport 19053 $ip link set dev eth2_19503 up $ip link add eth2_19504 type vxlan id 5049 remote 1.1.1.3 dev eth2 dstport 19054 $ip link set dev eth2_19504 up $ip link add eth2_19505 type vxlan id 5049 remote 1.1.1.4 dev eth2 dstport 19055 $ip link set dev eth2_19505 up $ip link add eth2_19506 type vxlan id 5049 remote 1.1.1.5 dev eth2 dstport 19056 $ip link set dev eth2_19506 up NIC RX port offload infrastructure offloads the first 3 UDP-ports (on all devices which sets NETIF_F_RX_UDP_TUNNEL_PORT feature) and not UDP-port 19056. So both PFs gets this offload configuration. $ip link set dev eth2_19504 down This triggers udp-tunnel-core to remove the UDP-port 19504 from offload-ports-list and offload UDP-port 19056 instead. In this scenario it is important that the UDP-port of 19504 will be removed from both PFs before trying to add UDP-port 19056. The NIC can stop offloading a UDP-port only when all references are removed. Otherwise the NIC may report exceeding of the offload quota. Fixes: cc4e3835eff4 ("udp_tunnel: add central NIC RX port offload infrastructure") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- net/ipv4/udp_tunnel_nic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index 0d122edc368d..b91003538d87 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -935,7 +935,7 @@ static int __init udp_tunnel_nic_init_module(void) { int err; - udp_tunnel_nic_workqueue = alloc_workqueue("udp_tunnel_nic", 0, 0); + udp_tunnel_nic_workqueue = alloc_ordered_workqueue("udp_tunnel_nic", 0); if (!udp_tunnel_nic_workqueue) return -ENOMEM; From f4bb62e64c88c93060c051195d3bbba804e56945 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Mon, 13 Sep 2021 16:28:52 +0700 Subject: [PATCH 072/543] tipc: increase timeout in tipc_sk_enqueue() In tipc_sk_enqueue() we use hardcoded 2 jiffies to extract socket buffer from generic queue to particular socket. The 2 jiffies is too short in case there are other high priority tasks get CPU cycles for multiple jiffies update. As result, no buffer could be enqueued to particular socket. To solve this, we switch to use constant timeout 20msecs. Then, the function will be expired between 2 jiffies (CONFIG_100HZ) and 20 jiffies (CONFIG_1000HZ). Fixes: c637c1035534 ("tipc: resolve race problem at unicast message reception") Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a0a27d87f631..ad570c2450be 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2423,7 +2423,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, u32 dport, struct sk_buff_head *xmitq) { - unsigned long time_limit = jiffies + 2; + unsigned long time_limit = jiffies + usecs_to_jiffies(20000); struct sk_buff *skb; unsigned int lim; atomic_t *dcnt; From e87b5052271e39d62337ade531992b7e5d8c2cfa Mon Sep 17 00:00:00 2001 From: zhang kai Date: Thu, 9 Sep 2021 16:39:18 +0800 Subject: [PATCH 073/543] ipv6: delay fib6_sernum increase in fib6_add only increase fib6_sernum in net namespace after add fib6_info successfully. Signed-off-by: zhang kai Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1bec5b22f80d..0371d2c14145 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1378,7 +1378,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, int err = -ENOMEM; int allow_create = 1; int replace_required = 0; - int sernum = fib6_new_sernum(info->nl_net); if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) @@ -1478,7 +1477,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (!err) { if (rt->nh) list_add(&rt->nh_list, &rt->nh->f6i_list); - __fib6_update_sernum_upto_root(rt, sernum); + __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); fib6_start_gc(info->nl_net, rt); } From 111b64e35ea03d58c882832744f571a88bb2e2e2 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Sun, 12 Sep 2021 13:58:07 +0200 Subject: [PATCH 074/543] net: dsa: lantiq_gswip: Add 200ms assert delay The delay is especially needed by the xRX300 and xRX330 SoCs. Without this patch, some phys are sometimes not properly detected. The patch was tested on BT Home Hub 5A and D-Link DWR-966. Fixes: a09d042b0862 ("net: dsa: lantiq: allow to use all GPHYs on xRX300 and xRX330") Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: Martin Blumenstingl Acked-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/dsa/lantiq_gswip.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 64d6dfa83122..267324889dd6 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1885,6 +1885,12 @@ static int gswip_gphy_fw_load(struct gswip_priv *priv, struct gswip_gphy_fw *gph reset_control_assert(gphy_fw->reset); + /* The vendor BSP uses a 200ms delay after asserting the reset line. + * Without this some users are observing that the PHY is not coming up + * on the MDIO bus. + */ + msleep(200); + ret = request_firmware(&fw, gphy_fw->fw_name, dev); if (ret) { dev_err(dev, "failed to load firmware: %s, error: %i\n", From b871895b148256f1721bc565d803860242755a0b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 3 Sep 2021 22:57:06 +1000 Subject: [PATCH 075/543] powerpc/64s: system call scv tabort fix for corrupt irq soft-mask state If a system call is made with a transaction active, the kernel immediately aborts it and returns. scv system calls disable irqs even earlier in their interrupt handler, and tabort_syscall does not fix this up. This can result in irq soft-mask state being messed up on the next kernel entry, and crashing at BUG_ON(arch_irq_disabled_regs(regs)) in the kernel exit handlers, or possibly worse. This can't easily be fixed in asm because at this point an async irq may have hit, which is soft-masked and marked pending. The pending interrupt has to be replayed before returning to userspace. The fix is to move the tabort_syscall code to C in the main syscall handler, and just skip the system call but otherwise return as usual, which will take care of the pending irqs. This also does a bunch of other things including possible signal delivery to the process, but the doomed transaction should still be aborted when it is eventually returned to. The sc system call path is changed to use the new C function as well to reduce code and path differences. This slows down how quickly system calls are aborted when called while a transaction is active, which could potentially impact TM performance. But making any system call is already bad for performance, and TM is on the way out, so go with simpler over faster. Fixes: 7fa95f9adaee7 ("powerpc/64s: system call support for scv/rfscv instructions") Reported-by: Eirik Fuller Signed-off-by: Nicholas Piggin [mpe: Use #ifdef rather than IS_ENABLED() to fix build error on 32-bit] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210903125707.1601269-1-npiggin@gmail.com --- arch/powerpc/kernel/interrupt.c | 30 ++++++++++++++++++++++ arch/powerpc/kernel/interrupt_64.S | 41 ------------------------------ 2 files changed, 30 insertions(+), 41 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index a73f3f70a657..2ccc7ea5db00 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32) @@ -136,6 +137,35 @@ notrace long system_call_exception(long r3, long r4, long r5, */ irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); + /* + * If the system call was made with a transaction active, doom it and + * return without performing the system call. Unless it was an + * unsupported scv vector, in which case it's treated like an illegal + * instruction. + */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && + !trap_is_unsupported_scv(regs)) { + /* Enable TM in the kernel, and disable EE (for scv) */ + hard_irq_disable(); + mtmsr(mfmsr() | MSR_TM); + + /* tabort, this dooms the transaction, nothing else */ + asm volatile(".long 0x7c00071d | ((%0) << 16)" + :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); + + /* + * Userspace will never see the return value. Execution will + * resume after the tbegin. of the aborted transaction with the + * checkpointed register state. A context switch could occur + * or signal delivered to the process before resuming the + * doomed transaction context, but that should all be handled + * as expected. + */ + return -ENOSYS; + } +#endif // CONFIG_PPC_TRANSACTIONAL_MEM + local_irq_enable(); if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index d4212d2ff0b5..ec950b08a8dc 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -12,7 +12,6 @@ #include #include #include -#include .section ".toc","aw" SYS_CALL_TABLE: @@ -55,12 +54,6 @@ COMPAT_SYS_CALL_TABLE: .globl system_call_vectored_\name system_call_vectored_\name: _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -BEGIN_FTR_SECTION - extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ - bne tabort_syscall -END_FTR_SECTION_IFSET(CPU_FTR_TM) -#endif SCV_INTERRUPT_TO_KERNEL mr r10,r1 ld r1,PACAKSAVE(r13) @@ -247,12 +240,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_common_real) .globl system_call_common system_call_common: _ASM_NOKPROBE_SYMBOL(system_call_common) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -BEGIN_FTR_SECTION - extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ - bne tabort_syscall -END_FTR_SECTION_IFSET(CPU_FTR_TM) -#endif mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) @@ -425,34 +412,6 @@ SOFT_MASK_TABLE(.Lsyscall_rst_start, 1b) RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart) #endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -tabort_syscall: -_ASM_NOKPROBE_SYMBOL(tabort_syscall) - /* Firstly we need to enable TM in the kernel */ - mfmsr r10 - li r9, 1 - rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG - mtmsrd r10, 0 - - /* tabort, this dooms the transaction, nothing else */ - li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) - TABORT(R9) - - /* - * Return directly to userspace. We have corrupted user register state, - * but userspace will never see that register state. Execution will - * resume after the tbegin of the aborted transaction with the - * checkpointed register state. - */ - li r9, MSR_RI - andc r10, r10, r9 - mtmsrd r10, 1 - mtspr SPRN_SRR0, r11 - mtspr SPRN_SRR1, r12 - RFI_TO_USER - b . /* prevent speculative execution */ -#endif - /* * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not * touched, no exit work created, then this can be used. From 5379ef2a60431232b9bb01c6d3580b875123d723 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 3 Sep 2021 22:57:07 +1000 Subject: [PATCH 076/543] selftests/powerpc: Add scv versions of the basic TM syscall tests The basic TM vs syscall test code hard codes an sc instruction for the system call, which fails to cover scv even when the userspace libc has support for it. Duplicate the tests with hard coded scv variants so both are tested when possible. Signed-off-by: Nicholas Piggin [mpe: Fix build on old toolchains by using .long for scv] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210903125707.1601269-2-npiggin@gmail.com --- .../selftests/powerpc/tm/tm-syscall-asm.S | 37 ++++++++++++++++++- .../testing/selftests/powerpc/tm/tm-syscall.c | 36 ++++++++++++++---- 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S index bd1ca25febe4..aed632d29fff 100644 --- a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S +++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#include +#include #include .text @@ -26,3 +26,38 @@ FUNC_START(getppid_tm_suspended) 1: li r3, -1 blr + + +.macro scv level + .long (0x44000001 | (\level) << 5) +.endm + +FUNC_START(getppid_scv_tm_active) + PUSH_BASIC_STACK(0) + tbegin. + beq 1f + li r0, __NR_getppid + scv 0 + tend. + POP_BASIC_STACK(0) + blr +1: + li r3, -1 + POP_BASIC_STACK(0) + blr + +FUNC_START(getppid_scv_tm_suspended) + PUSH_BASIC_STACK(0) + tbegin. + beq 1f + li r0, __NR_getppid + tsuspend. + scv 0 + tresume. + tend. + POP_BASIC_STACK(0) + blr +1: + li r3, -1 + POP_BASIC_STACK(0) + blr diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c index 467a6b3134b2..b763354c2eb4 100644 --- a/tools/testing/selftests/powerpc/tm/tm-syscall.c +++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c @@ -19,23 +19,36 @@ #include "utils.h" #include "tm.h" +#ifndef PPC_FEATURE2_SCV +#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */ +#endif + extern int getppid_tm_active(void); extern int getppid_tm_suspended(void); +extern int getppid_scv_tm_active(void); +extern int getppid_scv_tm_suspended(void); unsigned retries = 0; #define TEST_DURATION 10 /* seconds */ -pid_t getppid_tm(bool suspend) +pid_t getppid_tm(bool scv, bool suspend) { int i; pid_t pid; for (i = 0; i < TM_RETRIES; i++) { - if (suspend) - pid = getppid_tm_suspended(); - else - pid = getppid_tm_active(); + if (suspend) { + if (scv) + pid = getppid_scv_tm_suspended(); + else + pid = getppid_tm_suspended(); + } else { + if (scv) + pid = getppid_scv_tm_active(); + else + pid = getppid_tm_active(); + } if (pid >= 0) return pid; @@ -82,15 +95,24 @@ int tm_syscall(void) * Test a syscall within a suspended transaction and verify * that it succeeds. */ - FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */ + FAIL_IF(getppid_tm(false, true) == -1); /* Should succeed. */ /* * Test a syscall within an active transaction and verify that * it fails with the correct failure code. */ - FAIL_IF(getppid_tm(false) != -1); /* Should fail... */ + FAIL_IF(getppid_tm(false, false) != -1); /* Should fail... */ FAIL_IF(!failure_is_persistent()); /* ...persistently... */ FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */ + + /* Now do it all again with scv if it is available. */ + if (have_hwcap2(PPC_FEATURE2_SCV)) { + FAIL_IF(getppid_tm(true, true) == -1); /* Should succeed. */ + FAIL_IF(getppid_tm(true, false) != -1); /* Should fail... */ + FAIL_IF(!failure_is_persistent()); /* ...persistently... */ + FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */ + } + gettimeofday(&now, 0); } From ae7aaecc3f2f78b76ab3a8d6178610f55aadfa56 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 8 Sep 2021 20:17:17 +1000 Subject: [PATCH 077/543] powerpc/64s: system call rfscv workaround for TM bugs The rfscv instruction does not work correctly with the fake-suspend mode in POWER9, which can end up with the hypervisor restoring an incorrect checkpoint. Work around this by setting the _TIF_RESTOREALL flag if a system call returns to a transaction active state, causing rfid to be used instead of rfscv to return, which will do the right thing. The contents of the registers are irrelevant because they will be overwritten in this case anyway. Fixes: 7fa95f9adaee7 ("powerpc/64s: system call support for scv/rfscv instructions") Reported-by: Eirik Fuller Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210908101718.118522-1-npiggin@gmail.com --- arch/powerpc/kernel/interrupt.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 2ccc7ea5db00..de10a2697258 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -137,6 +137,19 @@ notrace long system_call_exception(long r3, long r4, long r5, */ irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); + /* + * If system call is called with TM active, set _TIF_RESTOREALL to + * prevent RFSCV being used to return to userspace, because POWER9 + * TM implementation has problems with this instruction returning to + * transactional state. Final register values are not relevant because + * the transaction will be aborted upon return anyway. Or in the case + * of unsupported_scv SIGILL fault, the return state does not much + * matter because it's an edge case. + */ + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) + current_thread_info()->flags |= _TIF_RESTOREALL; + /* * If the system call was made with a transaction active, doom it and * return without performing the system call. Unless it was an From 267cdfa21385d78c794768233678756e32b39ead Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 8 Sep 2021 20:17:18 +1000 Subject: [PATCH 078/543] KVM: PPC: Book3S HV: Tolerate treclaim. in fake-suspend mode changing registers POWER9 DD2.2 and 2.3 hardware implements a "fake-suspend" mode where certain TM instructions executed in HV=0 mode cause softpatch interrupts so the hypervisor can emulate them and prevent problematic processor conditions. In this fake-suspend mode, the treclaim. instruction does not modify registers. Unfortunately the rfscv instruction executed by the guest do not generate softpatch interrupts, which can cause the hypervisor to lose track of the fake-suspend mode, and it can execute this treclaim. while not in fake-suspend mode. This modifies GPRs and crashes the hypervisor. It's not trivial to disable scv in the guest with HFSCR now, because they assume a POWER9 has scv available. So this fix saves and restores checkpointed registers across the treclaim. Fixes: 7854f7545bff ("KVM: PPC: Book3S: Rework TM save/restore code and make it C-callable") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210908101718.118522-2-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 36 +++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 75079397c2a5..90484425a1e6 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2536,7 +2536,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) /* The following code handles the fake_suspend = 1 case */ mflr r0 std r0, PPC_LR_STKOFF(r1) - stdu r1, -PPC_MIN_STKFRM(r1) + stdu r1, -TM_FRAME_SIZE(r1) /* Turn on TM. */ mfmsr r8 @@ -2551,10 +2551,42 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) nop + /* + * It's possible that treclaim. may modify registers, if we have lost + * track of fake-suspend state in the guest due to it using rfscv. + * Save and restore registers in case this occurs. + */ + mfspr r3, SPRN_DSCR + mfspr r4, SPRN_XER + mfspr r5, SPRN_AMR + /* SPRN_TAR would need to be saved here if the kernel ever used it */ + mfcr r12 + SAVE_NVGPRS(r1) + SAVE_GPR(2, r1) + SAVE_GPR(3, r1) + SAVE_GPR(4, r1) + SAVE_GPR(5, r1) + stw r12, 8(r1) + std r1, HSTATE_HOST_R1(r13) + /* We have to treclaim here because that's the only way to do S->N */ li r3, TM_CAUSE_KVM_RESCHED TRECLAIM(R3) + GET_PACA(r13) + ld r1, HSTATE_HOST_R1(r13) + REST_GPR(2, r1) + REST_GPR(3, r1) + REST_GPR(4, r1) + REST_GPR(5, r1) + lwz r12, 8(r1) + REST_NVGPRS(r1) + mtspr SPRN_DSCR, r3 + mtspr SPRN_XER, r4 + mtspr SPRN_AMR, r5 + mtcr r12 + HMT_MEDIUM + /* * We were in fake suspend, so we are not going to save the * register state as the guest checkpointed state (since @@ -2582,7 +2614,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) std r5, VCPU_TFHAR(r9) std r6, VCPU_TFIAR(r9) - addi r1, r1, PPC_MIN_STKFRM + addi r1, r1, TM_FRAME_SIZE ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr From 3a1e92d0896e928ac2a5b58962d05a39afef2e23 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 9 Sep 2021 12:13:30 +0530 Subject: [PATCH 079/543] powerpc/mce: Fix access error in mce handler We queue an irq work for deferred processing of mce event in realmode mce handler, where translation is disabled. Queuing of the work may result in accessing memory outside RMO region, such access needs the translation to be enabled for an LPAR running with hash mmu else the kernel crashes. After enabling translation in mce_handle_error() we used to leave it enabled to avoid crashing here, but now with the commit 74c3354bc1d89 ("powerpc/pseries/mce: restore msr before returning from handler") we are restoring the MSR to disable translation. Hence to fix this enable the translation before queuing the work. Without this change following trace is seen on injecting SLB multihit in an LPAR running with hash mmu. Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries CPU: 5 PID: 1883 Comm: insmod Tainted: G OE 5.14.0-mce+ #137 NIP: c000000000735d60 LR: c000000000318640 CTR: 0000000000000000 REGS: c00000001ebff9a0 TRAP: 0300 Tainted: G OE (5.14.0-mce+) MSR: 8000000000001003 CR: 28008228 XER: 00000001 CFAR: c00000000031863c DAR: c00000027fa8fe08 DSISR: 40000000 IRQMASK: 0 ... NIP llist_add_batch+0x0/0x40 LR __irq_work_queue_local+0x70/0xc0 Call Trace: 0xc00000001ebffc0c (unreliable) irq_work_queue+0x40/0x70 machine_check_queue_event+0xbc/0xd0 machine_check_early_common+0x16c/0x1f4 Fixes: 74c3354bc1d89 ("powerpc/pseries/mce: restore msr before returning from handler") Signed-off-by: Ganesh Goudar [mpe: Fix comment formatting, trim oops in change log for readability] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210909064330.312432-1-ganeshgr@linux.ibm.com --- arch/powerpc/kernel/mce.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 47a683cd00d2..fd829f7f25a4 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -249,6 +249,7 @@ void machine_check_queue_event(void) { int index; struct machine_check_event evt; + unsigned long msr; if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; @@ -262,8 +263,20 @@ void machine_check_queue_event(void) memcpy(&local_paca->mce_info->mce_event_queue[index], &evt, sizeof(evt)); - /* Queue irq work to process this event later. */ - irq_work_queue(&mce_event_process_work); + /* + * Queue irq work to process this event later. Before + * queuing the work enable translation for non radix LPAR, + * as irq_work_queue may try to access memory outside RMO + * region. + */ + if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) { + msr = mfmsr(); + mtmsr(msr | MSR_IR | MSR_DR); + irq_work_queue(&mce_event_process_work); + mtmsr(msr); + } else { + irq_work_queue(&mce_event_process_work); + } } void mce_common_process_ue(struct pt_regs *regs, From f7ec554b73c5239a96afb9a9c3eb18cb11f539b7 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Mon, 13 Sep 2021 21:08:20 +0800 Subject: [PATCH 080/543] net: hns3: add option to turn off page pool feature When page pool is added to the hns3 driver, it is always enabled unconditionally, which means spilt page handling in the hns3 driver is dead code. As there is a requirement to test the performance between spilt page handling in driver and page pool, so add a module param to support disabling the page pool. When the page pool is proved to perform better in most case, the spilt page handling in driver can be removed. Fixes: 93188e9642c3 ("net: hns3: support skb's frag page recycling based on page pool") Signed-off-by: Yunsheng Lin Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 22af3d6ce178..293243bbe407 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -61,6 +61,9 @@ static unsigned int tx_sgl = 1; module_param(tx_sgl, uint, 0600); MODULE_PARM_DESC(tx_sgl, "Minimum number of frags when using dma_map_sg() to optimize the IOMMU mapping"); +static bool page_pool_enabled = true; +module_param(page_pool_enabled, bool, 0400); + #define HNS3_SGL_SIZE(nfrag) (sizeof(struct scatterlist) * (nfrag) + \ sizeof(struct sg_table)) #define HNS3_MAX_SGL_SIZE ALIGN(HNS3_SGL_SIZE(HNS3_MAX_TSO_BD_NUM), \ @@ -4753,7 +4756,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring) goto out_with_desc_cb; if (!HNAE3_IS_TX_RING(ring)) { - hns3_alloc_page_pool(ring); + if (page_pool_enabled) + hns3_alloc_page_pool(ring); ret = hns3_alloc_ring_buffers(ring); if (ret) From d18e81183b1cb9c309266cbbce9acd3e0c528d04 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Mon, 13 Sep 2021 21:08:21 +0800 Subject: [PATCH 081/543] net: hns3: pad the short tunnel frame before sending to hardware The hardware cannot handle short tunnel frames below 65 bytes, and will cause vlan tag missing problem. So pads packet size to 65 bytes for tunnel frames to fix this bug. Fixes: 3db084d28dc0("net: hns3: Fix for vxlan tx checksum bug") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 293243bbe407..adc54a726661 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -76,6 +76,7 @@ module_param(page_pool_enabled, bool, 0400); #define HNS3_OUTER_VLAN_TAG 2 #define HNS3_MIN_TX_LEN 33U +#define HNS3_MIN_TUN_PKT_LEN 65U /* hns3_pci_tbl - PCI Device ID Table * @@ -1427,8 +1428,11 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto, l4.tcp->doff); break; case IPPROTO_UDP: - if (hns3_tunnel_csum_bug(skb)) - return skb_checksum_help(skb); + if (hns3_tunnel_csum_bug(skb)) { + int ret = skb_put_padto(skb, HNS3_MIN_TUN_PKT_LEN); + + return ret ? ret : skb_checksum_help(skb); + } hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1); hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4T_S, From 1dc839ec09d3ab2a4156dc98328b8bc3586f2b70 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Mon, 13 Sep 2021 21:08:22 +0800 Subject: [PATCH 082/543] net: hns3: change affinity_mask to numa node range Currently, affinity_mask is set to a single cpu. As a result, irqbalance becomes invalid in SUBSET or EXACT mode. To solve this problem, change affinity_mask to numa node range. In this way, irqbalance can be performed on the cpu of the numa node. Fixes: 0812545487ec ("net: hns3: add interrupt affinity support for misc interrupt") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e55ba2e511b1..aed97c934bfb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1528,9 +1528,10 @@ static void hclge_init_kdump_kernel_config(struct hclge_dev *hdev) static int hclge_configure(struct hclge_dev *hdev) { struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + const struct cpumask *cpumask = cpu_online_mask; struct hclge_cfg cfg; unsigned int i; - int ret; + int node, ret; ret = hclge_get_cfg(hdev, &cfg); if (ret) @@ -1595,11 +1596,12 @@ static int hclge_configure(struct hclge_dev *hdev) hclge_init_kdump_kernel_config(hdev); - /* Set the init affinity based on pci func number */ - i = cpumask_weight(cpumask_of_node(dev_to_node(&hdev->pdev->dev))); - i = i ? PCI_FUNC(hdev->pdev->devfn) % i : 0; - cpumask_set_cpu(cpumask_local_spread(i, dev_to_node(&hdev->pdev->dev)), - &hdev->affinity_mask); + /* Set the affinity based on numa node */ + node = dev_to_node(&hdev->pdev->dev); + if (node != NUMA_NO_NODE) + cpumask = cpumask_of_node(node); + + cpumask_copy(&hdev->affinity_mask, cpumask); return ret; } From b81d8948746520f989e86d66292ff72b5056114a Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Mon, 13 Sep 2021 21:08:23 +0800 Subject: [PATCH 083/543] net: hns3: disable mac in flr process The firmware will not disable mac in flr process. Therefore, the driver needs to proactively disable mac during flr, which is the same as the function reset. Fixes: 35d93a30040c ("net: hns3: adjust the process of PF reset") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index aed97c934bfb..f1e46ba799f9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -8127,11 +8127,12 @@ static void hclge_ae_stop(struct hnae3_handle *handle) hclge_clear_arfs_rules(hdev); spin_unlock_bh(&hdev->fd_rule_lock); - /* If it is not PF reset, the firmware will disable the MAC, + /* If it is not PF reset or FLR, the firmware will disable the MAC, * so it only need to stop phy here. */ if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) && - hdev->reset_type != HNAE3_FUNC_RESET) { + hdev->reset_type != HNAE3_FUNC_RESET && + hdev->reset_type != HNAE3_FLR_RESET) { hclge_mac_stop_phy(hdev); hclge_update_link_status(hdev); return; From 472430a7b066f19afa1b55867d621b2d6d323e0d Mon Sep 17 00:00:00 2001 From: Jiaran Zhang Date: Mon, 13 Sep 2021 21:08:24 +0800 Subject: [PATCH 084/543] net: hns3: fix the exception when query imp info When the command for querying imp info is issued to the firmware, if the firmware does not support the command, the returned value of bd num is 0. Add protection mechanism before alloc memory to prevent apply for 0-length memory. Fixes: 0b198b0d80ea ("net: hns3: refactor dump m7 info of debugfs") Signed-off-by: Jiaran Zhang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 68ed1715ac52..87d96f82c318 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -1724,6 +1724,10 @@ hclge_dbg_get_imp_stats_info(struct hclge_dev *hdev, char *buf, int len) } bd_num = le32_to_cpu(req->bd_num); + if (!bd_num) { + dev_err(&hdev->pdev->dev, "imp statistics bd number is 0!\n"); + return -EINVAL; + } desc_src = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); if (!desc_src) From 427900d27d86b820c559037a984bd403f910860f Mon Sep 17 00:00:00 2001 From: Jiaran Zhang Date: Mon, 13 Sep 2021 21:08:25 +0800 Subject: [PATCH 085/543] net: hns3: fix the timing issue of VF clearing interrupt sources Currently, the VF does not clear the interrupt source immediately after receiving the interrupt. As a result, if the second interrupt task is triggered when processing the first interrupt task, clearing the interrupt source before exiting will clear the interrupt sources of the two tasks at the same time. As a result, no interrupt is triggered for the second task. The VF detects the missed message only when the next interrupt is generated. Clearing it immediately after executing check_evt_cause ensures that: 1. Even if two interrupt tasks are triggered at the same time, they can be processed. 2. If the second task is triggered during the processing of the first task and the interrupt source is not cleared, the interrupt is reported after vector0 is enabled. Fixes: b90fcc5bd904 ("net: hns3: add reset handling for VF when doing Core/Global/IMP reset") Signed-off-by: Jiaran Zhang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 82e727020120..a69e892277b3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2465,6 +2465,8 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) hclgevf_enable_vector(&hdev->misc_vector, false); event_cause = hclgevf_check_evt_cause(hdev, &clearval); + if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER) + hclgevf_clear_event_cause(hdev, clearval); switch (event_cause) { case HCLGEVF_VECTOR0_EVENT_RST: @@ -2477,10 +2479,8 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) break; } - if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER) { - hclgevf_clear_event_cause(hdev, clearval); + if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER) hclgevf_enable_vector(&hdev->misc_vector, true); - } return IRQ_HANDLED; } From 0bd46e22c5ec3dbfb81b60de475151e3f6b411c2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 9 Sep 2021 12:14:40 +0300 Subject: [PATCH 086/543] nvmet: fix a width vs precision bug in nvmet_subsys_attr_serial_show() This was intended to limit the number of characters printed from "subsys->serial" to NVMET_SN_MAX_SIZE. But accidentally the width specifier was used instead of the precision specifier so it only affects the alignment and not the number of characters printed. Fixes: f04064814c2a ("nvmet: fixup buffer overrun in nvmet_subsys_attr_serial()") Signed-off-by: Dan Carpenter Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index d784f3c200b4..be5d82421e3a 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1067,7 +1067,7 @@ static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item, { struct nvmet_subsys *subsys = to_subsys(item); - return snprintf(page, PAGE_SIZE, "%*s\n", + return snprintf(page, PAGE_SIZE, "%.*s\n", NVMET_SN_MAX_SIZE, subsys->serial); } From 9edceaf43050f5ba1dd7d0011bcf68a736a17743 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 2 Sep 2021 11:20:02 +0200 Subject: [PATCH 087/543] nvme: avoid race in shutdown namespace removal When we remove the siblings entry, we update ns->head->list, hence we can't separate the removal and test for being empty. They have to be in the same critical section to avoid a race. To avoid breaking the refcounting imbalance again, add a list empty check to nvme_find_ns_head. Fixes: 5396fdac56d8 ("nvme: fix refcounting imbalance when all paths are down") Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Tested-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7efb31b87f37..97f8211cf92c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3524,7 +3524,9 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) { - if (h->ns_id == nsid && nvme_tryget_ns_head(h)) + if (h->ns_id != nsid) + continue; + if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) return h; } @@ -3843,6 +3845,10 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); + if (list_empty(&ns->head->list)) { + list_del_init(&ns->head->entry); + last_path = true; + } mutex_unlock(&ns->ctrl->subsys->lock); /* guarantee not available in head->list */ @@ -3863,13 +3869,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) list_del_init(&ns->list); up_write(&ns->ctrl->namespaces_rwsem); - /* Synchronize with nvme_init_ns_head() */ - mutex_lock(&ns->head->subsys->lock); - if (list_empty(&ns->head->list)) { - list_del_init(&ns->head->entry); - last_path = true; - } - mutex_unlock(&ns->head->subsys->lock); if (last_path) nvme_mpath_shutdown_disk(ns->head); nvme_put_ns(ns); From 510e1a724ab1bf38150be2c1acabb303f98d0047 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Fri, 10 Sep 2021 19:53:37 -0400 Subject: [PATCH 088/543] dma-debug: prevent an error message from causing runtime problems For some drivers, that use the DMA API. This error message can be reached several millions of times per second, causing spam to the kernel's printk buffer and bringing the CPU usage up to 100% (so, it should be rate limited). However, since there is at least one driver that is in the mainline and suffers from the error condition, it is more useful to err_printk() here instead of just rate limiting the error message (in hopes that it will make it easier for other drivers that suffer from this issue to be spotted). Link: https://lkml.kernel.org/r/fd67fbac-64bf-f0ea-01e1-5938ccfab9d0@arm.com Reported-by: Jeremy Linton Signed-off-by: Hamza Mahfooz Signed-off-by: Christoph Hellwig --- kernel/dma/debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 6c90c69e5311..95445bd6eb72 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -567,7 +567,8 @@ static void add_dma_entry(struct dma_debug_entry *entry) pr_err("cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; } else if (rc == -EEXIST) { - pr_err("cacheline tracking EEXIST, overlapping mappings aren't supported\n"); + err_printk(entry->dev, entry, + "cacheline tracking EEXIST, overlapping mappings aren't supported\n"); } } From dd47c104533dedb90434a3f142e94a671ac623a6 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Mon, 13 Sep 2021 17:44:15 +0200 Subject: [PATCH 089/543] io-wq: provide IO_WQ_* constants for IORING_REGISTER_IOWQ_MAX_WORKERS arg items The items passed in the array pointed by the arg parameter of IORING_REGISTER_IOWQ_MAX_WORKERS io_uring_register operation carry certain semantics: they refer to different io-wq worker categories; provide IO_WQ_* constants in the UAPI, so these categories can be referenced in the user space code. Suggested-by: Jens Axboe Complements: 2e480058ddc21ec5 ("io-wq: provide a way to limit max number of workers") Signed-off-by: Eugene Syromiatnikov Link: https://lore.kernel.org/r/20210913154415.GA12890@asgard.redhat.com Signed-off-by: Jens Axboe --- fs/io-wq.c | 5 +++++ include/uapi/linux/io_uring.h | 8 +++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 3d4460df845c..c2e0e8e80949 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "io-wq.h" @@ -1281,6 +1282,10 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count) { int i, node, prev = 0; + BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND); + BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND); + BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2); + for (i = 0; i < 2; i++) { if (new_count[i] > task_rlimit(current, RLIMIT_NPROC)) new_count[i] = task_rlimit(current, RLIMIT_NPROC); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 59ef35154e3d..b270a07b285e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -317,13 +317,19 @@ enum { IORING_REGISTER_IOWQ_AFF = 17, IORING_UNREGISTER_IOWQ_AFF = 18, - /* set/get max number of workers */ + /* set/get max number of io-wq workers */ IORING_REGISTER_IOWQ_MAX_WORKERS = 19, /* this goes last */ IORING_REGISTER_LAST }; +/* io-wq worker categories */ +enum { + IO_WQ_BOUND, + IO_WQ_UNBOUND, +}; + /* deprecated, see struct io_uring_rsrc_update */ struct io_uring_files_update { __u32 offset; From a69ae291e1cc2d08ae77c2029579c59c9bde5061 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 13 Sep 2021 17:35:47 +0100 Subject: [PATCH 090/543] x86/uaccess: Fix 32-bit __get_user_asm_u64() when CC_HAS_ASM_GOTO_OUTPUT=y Commit 865c50e1d279 ("x86/uaccess: utilize CONFIG_CC_HAS_ASM_GOTO_OUTPUT") added an optimised version of __get_user_asm() for x86 using 'asm goto'. Like the non-optimised code, the 32-bit implementation of 64-bit get_user() expands to a pair of 32-bit accesses. Unlike the non-optimised code, the _original_ pointer is incremented to copy the high word instead of loading through a new pointer explicitly constructed to point at a 32-bit type. Consequently, if the pointer points at a 64-bit type then we end up loading the wrong data for the upper 32-bits. This was observed as a mount() failure in Android targeting i686 after b0cfcdd9b967 ("d_path: make 'prepend()' fill up the buffer exactly on overflow") because the call to copy_from_kernel_nofault() from prepend_copy() ends up in __get_kernel_nofault() and casts the source pointer to a 'u64 __user *'. An attempt to mount at "/debug_ramdisk" therefore ends up failing trying to mount "/debumdismdisk". Use the existing '__gu_ptr' source pointer to unsigned int for 32-bit __get_user_asm_u64() instead of the original pointer. Cc: Bill Wendling Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Peter Zijlstra Reported-by: Greg Kroah-Hartman Fixes: 865c50e1d279 ("x86/uaccess: utilize CONFIG_CC_HAS_ASM_GOTO_OUTPUT") Signed-off-by: Will Deacon Reviewed-by: Nick Desaulniers Tested-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index c9fa7be3df82..5c95d242f38d 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -301,8 +301,8 @@ do { \ unsigned int __gu_low, __gu_high; \ const unsigned int __user *__gu_ptr; \ __gu_ptr = (const void __user *)(ptr); \ - __get_user_asm(__gu_low, ptr, "l", "=r", label); \ - __get_user_asm(__gu_high, ptr+1, "l", "=r", label); \ + __get_user_asm(__gu_low, __gu_ptr, "l", "=r", label); \ + __get_user_asm(__gu_high, __gu_ptr+1, "l", "=r", label); \ (x) = ((unsigned long long)__gu_high << 32) | __gu_low; \ } while (0) #else From 76ae847497bc5207c479de5e2ac487270008b19b Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 10 Sep 2021 16:40:38 -0700 Subject: [PATCH 091/543] Documentation: raise minimum supported version of GCC to 5.1 commit fad7cd3310db ("nbd: add the check to prevent overflow in __nbd_ioctl()") raised an issue from the fallback helpers added in commit f0907827a8a9 ("compiler.h: enable builtin overflow checkers and add fallback code") Specifically, the helpers for checking whether the results of a multiplication overflowed (__unsigned_mul_overflow, __signed_add_overflow) use the division operator when !COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW. This is problematic for 64b operands on 32b hosts. Also, because the macro is type agnostic, it is very difficult to write a similarly type generic macro that dispatches to one of: * div64_s64 * div64_u64 * div_s64 * div_u64 Raising the minimum supported versions allows us to remove all of the fallback helpers for !COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW, instead dispatching the compiler builtins. arm64 has already raised the minimum supported GCC version to 5.1, do this for all targets now. See the link below for the previous discussion. Link: https://lore.kernel.org/all/20210909182525.372ee687@canb.auug.org.au/ Link: https://lore.kernel.org/lkml/CAK7LNASs6dvU6D3jL2GG3jW58fXfaj6VNOe55NJnTB8UPuk2pA@mail.gmail.com/ Link: https://github.com/ClangBuiltLinux/linux/issues/1438 Reported-by: Stephen Rothwell Reported-by: Nathan Chancellor Suggested-by: Rasmus Villemoes Signed-off-by: Nick Desaulniers Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- Documentation/process/changes.rst | 2 +- scripts/min-tool-version.sh | 8 +------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index d3a8557b66a1..e35ab74a0f80 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -29,7 +29,7 @@ you probably needn't concern yourself with pcmciautils. ====================== =============== ======================================== Program Minimal version Command to check the version ====================== =============== ======================================== -GNU C 4.9 gcc --version +GNU C 5.1 gcc --version Clang/LLVM (optional) 10.0.1 clang --version GNU make 3.81 make --version binutils 2.23 ld -v diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh index 319f92104f56..4edc708baa63 100755 --- a/scripts/min-tool-version.sh +++ b/scripts/min-tool-version.sh @@ -17,13 +17,7 @@ binutils) echo 2.23.0 ;; gcc) - # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293 - # https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk - if [ "$SRCARCH" = arm64 ]; then - echo 5.1.0 - else - echo 4.9.0 - fi + echo 5.1.0 ;; icc) # temporary From 4eb6bd55cfb22ffc20652732340c4962f3ac9a91 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 10 Sep 2021 16:40:39 -0700 Subject: [PATCH 092/543] compiler.h: drop fallback overflow checkers Once upgrading the minimum supported version of GCC to 5.1, we can drop the fallback code for !COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW. This is effectively a revert of commit f0907827a8a9 ("compiler.h: enable builtin overflow checkers and add fallback code") Link: https://github.com/ClangBuiltLinux/linux/issues/1438#issuecomment-916745801 Suggested-by: Rasmus Villemoes Signed-off-by: Nick Desaulniers Acked-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 13 --- include/linux/compiler-gcc.h | 4 - include/linux/overflow.h | 138 +--------------------------- tools/include/linux/compiler-gcc.h | 4 - tools/include/linux/overflow.h | 140 +---------------------------- 5 files changed, 6 insertions(+), 293 deletions(-) diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 49b0ac8b6fd3..3c4de9b6c6e3 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -62,19 +62,6 @@ #define __no_sanitize_coverage #endif -/* - * Not all versions of clang implement the type-generic versions - * of the builtin overflow checkers. Fortunately, clang implements - * __has_builtin allowing us to avoid awkward version - * checks. Unfortunately, we don't know which version of gcc clang - * pretends to be, so the macro may or may not be defined. - */ -#if __has_builtin(__builtin_mul_overflow) && \ - __has_builtin(__builtin_add_overflow) && \ - __has_builtin(__builtin_sub_overflow) -#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 -#endif - #if __has_feature(shadow_call_stack) # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index cb9217fc60af..3f7f6fa0e051 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -128,10 +128,6 @@ #define __no_sanitize_coverage #endif -#if GCC_VERSION >= 50100 -#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 -#endif - /* * Turn individual warnings and errors on and off locally, depending * on version. diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0f12345c21fb..4669632bd72b 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -6,12 +6,9 @@ #include /* - * In the fallback code below, we need to compute the minimum and - * maximum values representable in a given type. These macros may also - * be useful elsewhere, so we provide them outside the - * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block. - * - * It would seem more obvious to do something like + * We need to compute the minimum and maximum values representable in a given + * type. These macros may also be useful elsewhere. It would seem more obvious + * to do something like: * * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0) * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0) @@ -54,7 +51,6 @@ static inline bool __must_check __must_check_overflow(bool overflow) return unlikely(overflow); } -#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW /* * For simplicity and code hygiene, the fallback code below insists on * a, b and *d having the same type (similar to the min() and max() @@ -90,134 +86,6 @@ static inline bool __must_check __must_check_overflow(bool overflow) __builtin_mul_overflow(__a, __b, __d); \ })) -#else - - -/* Checking for unsigned overflow is relatively easy without causing UB. */ -#define __unsigned_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a + __b; \ - *__d < __a; \ -}) -#define __unsigned_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a - __b; \ - __a < __b; \ -}) -/* - * If one of a or b is a compile-time constant, this avoids a division. - */ -#define __unsigned_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a * __b; \ - __builtin_constant_p(__b) ? \ - __b > 0 && __a > type_max(typeof(__a)) / __b : \ - __a > 0 && __b > type_max(typeof(__b)) / __a; \ -}) - -/* - * For signed types, detecting overflow is much harder, especially if - * we want to avoid UB. But the interface of these macros is such that - * we must provide a result in *d, and in fact we must produce the - * result promised by gcc's builtins, which is simply the possibly - * wrapped-around value. Fortunately, we can just formally do the - * operations in the widest relevant unsigned type (u64) and then - * truncate the result - gcc is smart enough to generate the same code - * with and without the (u64) casts. - */ - -/* - * Adding two signed integers can overflow only if they have the same - * sign, and overflow has happened iff the result has the opposite - * sign. - */ -#define __signed_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a + (u64)__b; \ - (((~(__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Subtraction is similar, except that overflow can now happen only - * when the signs are opposite. In this case, overflow has happened if - * the result has the opposite sign of a. - */ -#define __signed_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a - (u64)__b; \ - ((((__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Signed multiplication is rather hard. gcc always follows C99, so - * division is truncated towards 0. This means that we can write the - * overflow check like this: - * - * (a > 0 && (b > MAX/a || b < MIN/a)) || - * (a < -1 && (b > MIN/a || b < MAX/a) || - * (a == -1 && b == MIN) - * - * The redundant casts of -1 are to silence an annoying -Wtype-limits - * (included in -Wextra) warning: When the type is u8 or u16, the - * __b_c_e in check_mul_overflow obviously selects - * __unsigned_mul_overflow, but unfortunately gcc still parses this - * code and warns about the limited range of __b. - */ - -#define __signed_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - typeof(a) __tmax = type_max(typeof(a)); \ - typeof(a) __tmin = type_min(typeof(a)); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a * (u64)__b; \ - (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ - (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ - (__b == (typeof(__b))-1 && __a == __tmin); \ -}) - - -#define check_add_overflow(a, b, d) __must_check_overflow( \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_add_overflow(a, b, d), \ - __unsigned_add_overflow(a, b, d))) - -#define check_sub_overflow(a, b, d) __must_check_overflow( \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_sub_overflow(a, b, d), \ - __unsigned_sub_overflow(a, b, d))) - -#define check_mul_overflow(a, b, d) __must_check_overflow( \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_mul_overflow(a, b, d), \ - __unsigned_mul_overflow(a, b, d))) - -#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ - /** check_shl_overflow() - Calculate a left-shifted value and check overflow * * @a: Value to be shifted diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 95c072b70d0e..a590a1dfafd9 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -38,7 +38,3 @@ #endif #define __printf(a, b) __attribute__((format(printf, a, b))) #define __scanf(a, b) __attribute__((format(scanf, a, b))) - -#if GCC_VERSION >= 50100 -#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 -#endif diff --git a/tools/include/linux/overflow.h b/tools/include/linux/overflow.h index 8712ff70995f..dcb0c1bf6866 100644 --- a/tools/include/linux/overflow.h +++ b/tools/include/linux/overflow.h @@ -5,12 +5,9 @@ #include /* - * In the fallback code below, we need to compute the minimum and - * maximum values representable in a given type. These macros may also - * be useful elsewhere, so we provide them outside the - * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block. - * - * It would seem more obvious to do something like + * We need to compute the minimum and maximum values representable in a given + * type. These macros may also be useful elsewhere. It would seem more obvious + * to do something like: * * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0) * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0) @@ -36,8 +33,6 @@ #define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) #define type_min(T) ((T)((T)-type_max(T)-(T)1)) - -#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW /* * For simplicity and code hygiene, the fallback code below insists on * a, b and *d having the same type (similar to the min() and max() @@ -73,135 +68,6 @@ __builtin_mul_overflow(__a, __b, __d); \ }) -#else - - -/* Checking for unsigned overflow is relatively easy without causing UB. */ -#define __unsigned_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a + __b; \ - *__d < __a; \ -}) -#define __unsigned_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a - __b; \ - __a < __b; \ -}) -/* - * If one of a or b is a compile-time constant, this avoids a division. - */ -#define __unsigned_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a * __b; \ - __builtin_constant_p(__b) ? \ - __b > 0 && __a > type_max(typeof(__a)) / __b : \ - __a > 0 && __b > type_max(typeof(__b)) / __a; \ -}) - -/* - * For signed types, detecting overflow is much harder, especially if - * we want to avoid UB. But the interface of these macros is such that - * we must provide a result in *d, and in fact we must produce the - * result promised by gcc's builtins, which is simply the possibly - * wrapped-around value. Fortunately, we can just formally do the - * operations in the widest relevant unsigned type (u64) and then - * truncate the result - gcc is smart enough to generate the same code - * with and without the (u64) casts. - */ - -/* - * Adding two signed integers can overflow only if they have the same - * sign, and overflow has happened iff the result has the opposite - * sign. - */ -#define __signed_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a + (u64)__b; \ - (((~(__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Subtraction is similar, except that overflow can now happen only - * when the signs are opposite. In this case, overflow has happened if - * the result has the opposite sign of a. - */ -#define __signed_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a - (u64)__b; \ - ((((__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Signed multiplication is rather hard. gcc always follows C99, so - * division is truncated towards 0. This means that we can write the - * overflow check like this: - * - * (a > 0 && (b > MAX/a || b < MIN/a)) || - * (a < -1 && (b > MIN/a || b < MAX/a) || - * (a == -1 && b == MIN) - * - * The redundant casts of -1 are to silence an annoying -Wtype-limits - * (included in -Wextra) warning: When the type is u8 or u16, the - * __b_c_e in check_mul_overflow obviously selects - * __unsigned_mul_overflow, but unfortunately gcc still parses this - * code and warns about the limited range of __b. - */ - -#define __signed_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - typeof(a) __tmax = type_max(typeof(a)); \ - typeof(a) __tmin = type_min(typeof(a)); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a * (u64)__b; \ - (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ - (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ - (__b == (typeof(__b))-1 && __a == __tmin); \ -}) - - -#define check_add_overflow(a, b, d) \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_add_overflow(a, b, d), \ - __unsigned_add_overflow(a, b, d)) - -#define check_sub_overflow(a, b, d) \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_sub_overflow(a, b, d), \ - __unsigned_sub_overflow(a, b, d)) - -#define check_mul_overflow(a, b, d) \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_mul_overflow(a, b, d), \ - __unsigned_mul_overflow(a, b, d)) - - -#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ - /** * array_size() - Calculate size of 2-dimensional array. * From adac17e3f61f54927b961d97bd303dd5795a307b Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 10 Sep 2021 16:40:40 -0700 Subject: [PATCH 093/543] mm/ksm: remove old GCC 4.9+ check The minimum supported version of GCC has been raised to GCC 5.1. Signed-off-by: Nick Desaulniers Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- mm/ksm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index 025338128cd9..a5716fdec1aa 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -651,10 +651,8 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node) * from &migrate_nodes. This will verify that future list.h changes * don't break STABLE_NODE_DUP_HEAD. Only recent gcc can handle it. */ -#if defined(GCC_VERSION) && GCC_VERSION >= 40903 BUILD_BUG_ON(STABLE_NODE_DUP_HEAD <= &migrate_nodes); BUILD_BUG_ON(STABLE_NODE_DUP_HEAD >= &migrate_nodes + 1); -#endif if (stable_node->head == &migrate_nodes) list_del(&stable_node->list); From c0a5c81ca9bedaf38356a6290bf895313fd7361c Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 10 Sep 2021 16:40:41 -0700 Subject: [PATCH 094/543] Kconfig.debug: drop GCC 5+ version check for DWARF5 Now that the minimum supported version of GCC is 5.1, we no longer need this Kconfig version check for CONFIG_DEBUG_INFO_DWARF5. Signed-off-by: Nick Desaulniers Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ed4a31e34098..d566f601780f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -295,7 +295,7 @@ config DEBUG_INFO_DWARF4 config DEBUG_INFO_DWARF5 bool "Generate DWARF Version 5 debuginfo" - depends on GCC_VERSION >= 50000 || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502))) + depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502))) depends on !DEBUG_INFO_BTF help Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc From d20758951f8f28c0ee1b2a8a6bb8189858083895 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 10 Sep 2021 16:40:42 -0700 Subject: [PATCH 095/543] riscv: remove Kconfig check for GCC version for ARCH_RV64I The minimum supported version of GCC is now 5.1. The check wasn't correct as written anyways since GCC_VERSION is 0 when CC=clang. Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: linux-riscv@lists.infradead.org Signed-off-by: Nick Desaulniers Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c79955655fa4..5fc1d0cc82e1 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -235,7 +235,7 @@ config ARCH_RV32I config ARCH_RV64I bool "RV64I" select 64BIT - select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000 + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8) select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL From 6563139d90ad6178a990d051c7980f0998b5d2e8 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 10 Sep 2021 16:40:43 -0700 Subject: [PATCH 096/543] powerpc: remove GCC version check for UPD_CONSTR Now that GCC 5.1 is the minimum supported version, we can drop this workaround for older versions of GCC. This adversely affected clang, too. Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Segher Boessenkool Cc: Christophe Leroy Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Nick Desaulniers Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/asm-const.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/powerpc/include/asm/asm-const.h b/arch/powerpc/include/asm/asm-const.h index 0ce2368bd20f..dbfa5e1e3198 100644 --- a/arch/powerpc/include/asm/asm-const.h +++ b/arch/powerpc/include/asm/asm-const.h @@ -12,16 +12,6 @@ # define ASM_CONST(x) __ASM_CONST(x) #endif -/* - * Inline assembly memory constraint - * - * GCC 4.9 doesn't properly handle pre update memory constraint "m<>" - * - */ -#if defined(GCC_VERSION) && GCC_VERSION < 50000 -#define UPD_CONSTR "" -#else #define UPD_CONSTR "<>" -#endif #endif /* _ASM_POWERPC_ASM_CONST_H */ From 42a7ba1695fcd534216aa3712a6cf42da3340527 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 10 Sep 2021 16:40:44 -0700 Subject: [PATCH 097/543] arm64: remove GCC version check for ARCH_SUPPORTS_INT128 Now that GCC 5.1 is the minimally supported compiler version, this Kconfig check is no longer necessary. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Nick Desaulniers Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 077f2ec4eeb2..5c7ae4c3954b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -86,7 +86,7 @@ config ARM64 select ARCH_SUPPORTS_LTO_CLANG_THIN select ARCH_SUPPORTS_CFI_CLANG select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT select ARCH_WANT_DEFAULT_BPF_JIT From 156102fe0bb669f40f2fd27856b21f9fa8157090 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 10 Sep 2021 16:40:45 -0700 Subject: [PATCH 098/543] Makefile: drop GCC < 5 -fno-var-tracking-assignments workaround Now that GCC 5.1 is the minimally supported version, we can drop this workaround for older versions of GCC. Signed-off-by: Nick Desaulniers Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- Makefile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Makefile b/Makefile index 2d1e491f7737..f9ef07f573e0 100644 --- a/Makefile +++ b/Makefile @@ -849,12 +849,6 @@ endif DEBUG_CFLAGS := -# Workaround for GCC versions < 5.0 -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61801 -ifdef CONFIG_CC_IS_GCC -DEBUG_CFLAGS += $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments)) -endif - ifdef CONFIG_DEBUG_INFO ifdef CONFIG_DEBUG_INFO_SPLIT From 4e59869aa6550657cb148ad49835605660ec9b88 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 10 Sep 2021 16:40:46 -0700 Subject: [PATCH 099/543] compiler-gcc.h: drop checks for older GCC versions Now that GCC 5.1 is the minimally supported default, drop the values we don't use. Signed-off-by: Nick Desaulniers Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 4 +--- tools/include/linux/compiler-gcc.h | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 3f7f6fa0e051..fd82ce169ce9 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -98,10 +98,8 @@ #if GCC_VERSION >= 70000 #define KASAN_ABI_VERSION 5 -#elif GCC_VERSION >= 50000 +#else #define KASAN_ABI_VERSION 4 -#elif GCC_VERSION >= 40902 -#define KASAN_ABI_VERSION 3 #endif #if __has_attribute(__no_sanitize_address__) diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index a590a1dfafd9..43d9a46d36f0 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -16,9 +16,7 @@ # define __fallthrough __attribute__ ((fallthrough)) #endif -#if GCC_VERSION >= 40300 -# define __compiletime_error(message) __attribute__((error(message))) -#endif /* GCC_VERSION >= 40300 */ +#define __compiletime_error(message) __attribute__((error(message))) /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) From 6f20fa2dfa549401860479328371f0d5cee9b114 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 10 Sep 2021 16:40:47 -0700 Subject: [PATCH 100/543] vmlinux.lds.h: remove old check for GCC 4.9 Now that GCC 5.1 is the minimally supported version of GCC, we can effectively revert commit 85c2ce9104eb ("sched, vmlinux.lds: Increase STRUCT_ALIGNMENT to 64 bytes for GCC-4.9") Cc: Peter Zijlstra Signed-off-by: Nick Desaulniers Acked-by: Kees Cook Signed-off-by: Linus Torvalds --- include/asm-generic/vmlinux.lds.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index aa50bf2959fe..f2984af2b85b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -116,11 +116,7 @@ * GCC 4.5 and later have a 32 bytes section alignment for structures. * Except GCC 4.9, that feels the need to align on 64 bytes. */ -#if __GNUC__ == 4 && __GNUC_MINOR__ == 9 -#define STRUCT_ALIGNMENT 64 -#else #define STRUCT_ALIGNMENT 32 -#endif #define STRUCT_ALIGN() . = ALIGN(STRUCT_ALIGNMENT) /* From 6d2ef226f2f18d530e48ead0cb5704505628b797 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 13 Sep 2021 10:20:01 -0700 Subject: [PATCH 101/543] compiler_attributes.h: drop __has_attribute() support for gcc4 Now that GCC 5.1 is the minimally supported default, the manual workaround for older gcc versions not having __has_attribute() are no longer relevant and can be removed. Signed-off-by: Linus Torvalds --- include/linux/compiler_attributes.h | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 2487be0e7199..ba417a5c80af 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -20,26 +20,6 @@ * Provide links to the documentation of each supported compiler, if it exists. */ -/* - * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17. - * In the meantime, to support gcc < 5, we implement __has_attribute - * by hand. - */ -#ifndef __has_attribute -# define __has_attribute(x) __GCC4_has_attribute_##x -# define __GCC4_has_attribute___assume_aligned__ 1 -# define __GCC4_has_attribute___copy__ 0 -# define __GCC4_has_attribute___designated_init__ 0 -# define __GCC4_has_attribute___externally_visible__ 1 -# define __GCC4_has_attribute___no_caller_saved_registers__ 0 -# define __GCC4_has_attribute___noclone__ 1 -# define __GCC4_has_attribute___no_profile_instrument_function__ 0 -# define __GCC4_has_attribute___nonstring__ 0 -# define __GCC4_has_attribute___no_sanitize_address__ 1 -# define __GCC4_has_attribute___no_sanitize_undefined__ 1 -# define __GCC4_has_attribute___fallthrough__ 0 -#endif - /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alias-function-attribute */ From d9a7e9df731670acdc69e81748941ad338f47fab Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Sun, 12 Sep 2021 11:50:29 -0700 Subject: [PATCH 102/543] cpufreq: intel_pstate: Override parameters if HWP forced by BIOS If HWP has been already been enabled by BIOS, it may be necessary to override some kernel command line parameters. Once it has been enabled it requires a reset to be disabled. Suggested-by: Rafael J. Wysocki Signed-off-by: Doug Smythies Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 1097f826ad70..8c176b7dae41 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3205,11 +3205,15 @@ static int __init intel_pstate_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return -ENODEV; - if (no_load) - return -ENODEV; - id = x86_match_cpu(hwp_support_ids); if (id) { + bool hwp_forced = intel_pstate_hwp_is_enabled(); + + if (hwp_forced) + pr_info("HWP enabled by BIOS\n"); + else if (no_load) + return -ENODEV; + copy_cpu_funcs(&core_funcs); /* * Avoid enabling HWP for processors without EPP support, @@ -3219,8 +3223,7 @@ static int __init intel_pstate_init(void) * If HWP is enabled already, though, there is no choice but to * deal with it. */ - if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || - intel_pstate_hwp_is_enabled()) { + if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || hwp_forced) { hwp_active++; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; @@ -3235,7 +3238,11 @@ static int __init intel_pstate_init(void) goto hwp_cpu_matched; } + pr_info("HWP not enabled\n"); } else { + if (no_load) + return -ENODEV; + id = x86_match_cpu(intel_pstate_cpu_ids); if (!id) { pr_info("CPU model not supported\n"); @@ -3314,10 +3321,9 @@ static int __init intel_pstate_setup(char *str) else if (!strcmp(str, "passive")) default_driver = &intel_cpufreq; - if (!strcmp(str, "no_hwp")) { - pr_info("HWP disabled\n"); + if (!strcmp(str, "no_hwp")) no_hwp = 1; - } + if (!strcmp(str, "force")) force_load = 1; if (!strcmp(str, "hwp_only")) From df26327ea097eb78e7967c45df6b23010c43c28d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 13 Sep 2021 10:29:44 -0700 Subject: [PATCH 103/543] Drop some straggling mentions of gcc-4.9 as being stale Fix up the admin-guide README file to the new gcc-5.1 requirement, and remove a stale comment about gcc support for the __assume_aligned__ attribute. Signed-off-by: Linus Torvalds --- Documentation/admin-guide/README.rst | 2 +- Documentation/translations/zh_CN/admin-guide/README.rst | 2 +- Documentation/translations/zh_TW/admin-guide/README.rst | 2 +- include/linux/compiler_attributes.h | 1 - 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index 35314b63008c..caa3c09a5c3f 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -259,7 +259,7 @@ Configuring the kernel Compiling the kernel -------------------- - - Make sure you have at least gcc 4.9 available. + - Make sure you have at least gcc 5.1 available. For more information, refer to :ref:`Documentation/process/changes.rst `. Please note that you can still run a.out user programs with this kernel. diff --git a/Documentation/translations/zh_CN/admin-guide/README.rst b/Documentation/translations/zh_CN/admin-guide/README.rst index 669a022f6817..980eb20521cf 100644 --- a/Documentation/translations/zh_CN/admin-guide/README.rst +++ b/Documentation/translations/zh_CN/admin-guide/README.rst @@ -223,7 +223,7 @@ Linux内核5.x版本 编译内核 --------- - - 确保您至少有gcc 4.9可用。 + - 确保您至少有gcc 5.1可用。 有关更多信息,请参阅 :ref:`Documentation/process/changes.rst ` 。 请注意,您仍然可以使用此内核运行a.out用户程序。 diff --git a/Documentation/translations/zh_TW/admin-guide/README.rst b/Documentation/translations/zh_TW/admin-guide/README.rst index b752e50359e6..6ce97edbab37 100644 --- a/Documentation/translations/zh_TW/admin-guide/README.rst +++ b/Documentation/translations/zh_TW/admin-guide/README.rst @@ -226,7 +226,7 @@ Linux內核5.x版本 編譯內核 --------- - - 確保您至少有gcc 4.9可用。 + - 確保您至少有gcc 5.1可用。 有關更多信息,請參閱 :ref:`Documentation/process/changes.rst ` 。 請注意,您仍然可以使用此內核運行a.out用戶程序。 diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index ba417a5c80af..ee19cebabcf5 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -54,7 +54,6 @@ * compiler should see some alignment anyway, when the return value is * massaged by 'flags = ptr & 3; ptr &= ~3;'). * - * Optional: only supported since gcc >= 4.9 * Optional: not supported by icc * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute From 099dd788e31b4f426ef49c2785069804925a84e1 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 13 Sep 2021 14:51:10 -0500 Subject: [PATCH 104/543] cifs: remove pathname for file from SPDX header checkpatch complains about source files with filenames (e.g. in these cases just below the SPDX header in comments at the top of various files in fs/cifs). It also is helpful to change this now so will be less confusing when the parent directory is renamed e.g. from fs/cifs to fs/smb_client (or fs/smbfs) Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cache.c | 2 +- fs/cifs/cifs_debug.c | 1 - fs/cifs/cifs_fs_sb.h | 1 - fs/cifs/cifs_ioctl.h | 1 - fs/cifs/cifs_spnego.c | 2 +- fs/cifs/cifs_spnego.h | 2 +- fs/cifs/cifs_unicode.c | 1 - fs/cifs/cifsacl.c | 1 - fs/cifs/cifsacl.h | 1 - fs/cifs/cifsencrypt.c | 1 - fs/cifs/cifsfs.c | 1 - fs/cifs/cifsfs.h | 1 - fs/cifs/cifsglob.h | 1 - fs/cifs/cifspdu.h | 1 - fs/cifs/cifsproto.h | 1 - fs/cifs/cifssmb.c | 1 - fs/cifs/connect.c | 1 - fs/cifs/dir.c | 1 - fs/cifs/dns_resolve.c | 1 - fs/cifs/dns_resolve.h | 4 ++-- fs/cifs/export.c | 1 - fs/cifs/file.c | 1 - fs/cifs/fscache.c | 2 +- fs/cifs/fscache.h | 2 +- fs/cifs/inode.c | 1 - fs/cifs/ioctl.c | 1 - fs/cifs/link.c | 1 - fs/cifs/misc.c | 1 - fs/cifs/netmisc.c | 1 - fs/cifs/ntlmssp.h | 1 - fs/cifs/readdir.c | 1 - fs/cifs/rfc1002pdu.h | 1 - fs/cifs/sess.c | 1 - fs/cifs/smb2file.c | 1 - fs/cifs/smb2glob.h | 1 - fs/cifs/smb2inode.c | 1 - fs/cifs/smb2misc.c | 1 - fs/cifs/smb2pdu.c | 1 - fs/cifs/smb2pdu.h | 1 - fs/cifs/smb2proto.h | 1 - fs/cifs/smb2status.h | 1 - fs/cifs/smb2transport.c | 1 - fs/cifs/smberr.h | 1 - fs/cifs/transport.c | 1 - fs/cifs/winucase.c | 1 - fs/cifs/xattr.c | 1 - fs/smbfs_common/smbfsctl.h | 2 +- include/uapi/linux/cifs/cifs_mount.h | 1 - 48 files changed, 8 insertions(+), 49 deletions(-) diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c index 8a3b30ec860c..8be57aaedab6 100644 --- a/fs/cifs/cache.c +++ b/fs/cifs/cache.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cache.c - CIFS filesystem cache index structure definitions + * CIFS filesystem cache index structure definitions * * Copyright (c) 2010 Novell, Inc. * Authors(s): Suresh Jayaraman (sjayaraman@suse.de> diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 51a824fc926a..de2c12bcfa4b 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * fs/cifs_debug.c * * Copyright (C) International Business Machines Corp., 2000,2005 * diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 4fd788586399..f97407520ea1 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifs_fs_sb.h * * Copyright (c) International Business Machines Corp., 2002,2004 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h index ef723be358af..b87cbbe6d2d4 100644 --- a/fs/cifs/cifs_ioctl.h +++ b/fs/cifs/cifs_ioctl.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifs_ioctl.h * * Structure definitions for io control for cifs/smb3 * diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 8fa26a8530f8..353bd0dd7026 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifs_spnego.c -- SPNEGO upcall management for CIFS + * SPNEGO upcall management for CIFS * * Copyright (c) 2007 Red Hat, Inc. * Author(s): Jeff Layton (jlayton@redhat.com) diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h index 31387d0ea32e..e6a0451877d4 100644 --- a/fs/cifs/cifs_spnego.h +++ b/fs/cifs/cifs_spnego.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifs_spnego.h -- SPNEGO upcall management for CIFS + * SPNEGO upcall management for CIFS * * Copyright (c) 2007 Red Hat, Inc. * Author(s): Jeff Layton (jlayton@redhat.com) diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 171ad8b42107..e7582dd79179 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * fs/cifs/cifs_unicode.c * * Copyright (c) International Business Machines Corp., 2000,2009 * Modified by Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 388eb536cff1..ee3aab3dd4ac 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifsacl.c * * Copyright (C) International Business Machines Corp., 2007,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h index f8292bcf8594..ccbfc754bd3c 100644 --- a/fs/cifs/cifsacl.h +++ b/fs/cifs/cifsacl.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifsacl.h * * Copyright (c) International Business Machines Corp., 2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 2e6f40344037..d118282071b3 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifsencrypt.c * * Encryption and hashing operations relating to NTLM, NTLMv2. See MS-NLMP * for more detailed information diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8c20bfa187ac..9fa930dfd78d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifsfs.c * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index d25a4099b32e..b50da1901ebd 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifsfs.h * * Copyright (c) International Business Machines Corp., 2002, 2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c068f7d8d879..7dd9878e26bf 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifsglob.h * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 98e8e5aa0613..d2ff438fd31f 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifspdu.h * * Copyright (c) International Business Machines Corp., 2002,2009 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f9740c21ca3d..54966c1a8eb6 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifsproto.h * * Copyright (c) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a8e41c1e80ca..243d17696f06 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifssmb.c * * Copyright (C) International Business Machines Corp., 2002,2010 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0db344807ef1..399a0f084573 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/connect.c * * Copyright (C) International Business Machines Corp., 2002,2011 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 5f8a302ffcb2..6e8e7cc26ae2 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/dir.c * * vfs operations that deal with dentries * diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 8c616aaeb7c4..0458d28d71aa 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/dns_resolve.c * * Copyright (c) 2007 Igor Mammedov * Author(s): Igor Mammedov (niallain@gmail.com) diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h index 9fa2807ef79e..afc0df381246 100644 --- a/fs/cifs/dns_resolve.h +++ b/fs/cifs/dns_resolve.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/dns_resolve.h -- DNS Resolver upcall management for CIFS DFS - * Handles host name to IP address resolution + * DNS Resolver upcall management for CIFS DFS + * Handles host name to IP address resolution * * Copyright (c) International Business Machines Corp., 2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/export.c b/fs/cifs/export.c index 747a540db954..37c28415df1e 100644 --- a/fs/cifs/export.c +++ b/fs/cifs/export.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/export.c * * Copyright (C) International Business Machines Corp., 2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index d0216472f1c6..4d10c9343890 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/file.c * * vfs operations that deal with files * diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index fab47fa7df74..8eedd20c44ab 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/fscache.c - CIFS filesystem cache interface + * CIFS filesystem cache interface * * Copyright (c) 2010 Novell, Inc. * Author(s): Suresh Jayaraman diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h index 82e856b9cf89..9baa1d0f22bd 100644 --- a/fs/cifs/fscache.h +++ b/fs/cifs/fscache.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/fscache.h - CIFS filesystem cache interface definitions + * CIFS filesystem cache interface definitions * * Copyright (c) 2010 Novell, Inc. * Authors(s): Suresh Jayaraman (sjayaraman@suse.de> diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 50c01cff4c84..19af2d0ec8d5 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/inode.c * * Copyright (C) International Business Machines Corp., 2002,2010 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 42c6a0bac6c8..44ae99454fb3 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/ioctl.c * * vfs operations that deal with io control * diff --git a/fs/cifs/link.c b/fs/cifs/link.c index f0a6d63bc08c..852e54ee82c2 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/link.c * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 9469f1cf0b46..f4313935e734 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/misc.c * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 0e728aac67e9..fa9fbd6a819c 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * fs/cifs/netmisc.c * * Copyright (c) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 378133ce8869..25a2b8ef88b9 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/ntlmssp.h * * Copyright (c) International Business Machines Corp., 2002,2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 54d77c99e21c..1929e80c09ee 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/readdir.c * * Directory search handling * diff --git a/fs/cifs/rfc1002pdu.h b/fs/cifs/rfc1002pdu.h index 137f7c95afd6..ae1d025da294 100644 --- a/fs/cifs/rfc1002pdu.h +++ b/fs/cifs/rfc1002pdu.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/rfc1002pdu.h * * Protocol Data Unit definitions for RFC 1001/1002 support * diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 118403fbeda2..23e02db7923f 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/sess.c * * SMB/CIFS session setup handling routines * diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index c9d8a50062b8..f5dcc4940b6d 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2file.c * * Copyright (C) International Business Machines Corp., 2002, 2011 * Author(s): Steve French (sfrench@us.ibm.com), diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index d0e9f3782bd9..ca692b2283cd 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smb2glob.h * * Definitions for various global variables and structures * diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 957b2594f02e..8297703492ee 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2inode.c * * Copyright (C) International Business Machines Corp., 2002, 2011 * Etersoft, 2012 diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 668f77108831..29b5554f6263 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2misc.c * * Copyright (C) International Business Machines Corp., 2002,2011 * Etersoft, 2012 diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index b6d2e3591927..672ae78e866a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2pdu.c * * Copyright (C) International Business Machines Corp., 2009, 2013 * Etersoft, 2012 diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index e9cac7970b66..f32c99c9ba13 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smb2pdu.h * * Copyright (c) International Business Machines Corp., 2009, 2013 * Etersoft, 2012 diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 263767f644f8..547945443fa7 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smb2proto.h * * Copyright (c) International Business Machines Corp., 2002, 2011 * Etersoft, 2012 diff --git a/fs/cifs/smb2status.h b/fs/cifs/smb2status.h index 0215ef36e240..a9e958166fc5 100644 --- a/fs/cifs/smb2status.h +++ b/fs/cifs/smb2status.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smb2status.h * * SMB2 Status code (network error) definitions * Definitions are from MS-ERREF diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 6f7952ea4941..f59b956f9d25 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2transport.c * * Copyright (C) International Business Machines Corp., 2002, 2011 * Etersoft, 2012 diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h index 60189efb3236..aeffdad829e2 100644 --- a/fs/cifs/smberr.h +++ b/fs/cifs/smberr.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smberr.h * * Copyright (c) International Business Machines Corp., 2002,2004 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 75a95de320cf..b7379329b741 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/transport.c * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/winucase.c b/fs/cifs/winucase.c index 59b6c577aa0a..2f075b5b50df 100644 --- a/fs/cifs/winucase.c +++ b/fs/cifs/winucase.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * fs/cifs/winucase.c * * Copyright (c) Jeffrey Layton , 2013 * diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 9ed481e79ce0..7d8b72d67c80 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/xattr.c * * Copyright (c) International Business Machines Corp., 2003, 2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/smbfs_common/smbfsctl.h b/fs/smbfs_common/smbfsctl.h index d01e8c9d7a31..926f87cd6af0 100644 --- a/fs/smbfs_common/smbfsctl.h +++ b/fs/smbfs_common/smbfsctl.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ /* - * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions + * SMB, CIFS, SMB2 FSCTL definitions * * Copyright (c) International Business Machines Corp., 2002,2013 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/include/uapi/linux/cifs/cifs_mount.h b/include/uapi/linux/cifs/cifs_mount.h index 69829205fdb5..8e87d27b0951 100644 --- a/include/uapi/linux/cifs/cifs_mount.h +++ b/include/uapi/linux/cifs/cifs_mount.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */ /* - * include/uapi/linux/cifs/cifs_mount.h * * Author(s): Scott Lovenberg (scott.lovenberg@gmail.com) * From d0ee23f9d78be5531c4b055ea424ed0b489dfe9b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 13 Sep 2021 15:09:00 -0700 Subject: [PATCH 105/543] tools: compiler-gcc.h: Guard error attribute use with __has_attribute When building objtool with HOSTCC=clang, there are several errors along the lines of orc_dump.c:201:28: error: unknown attribute 'error' ignored [-Werror,-Wunknown-attributes] This occurs after commit 4e59869aa655 ("compiler-gcc.h: drop checks for older GCC versions"), which removed the GCC_VERSION gating. The removed version check just so happened to prevent __compiletime_error() from being defined with clang because it pretends to be GCC 4.2.1 for compatibility but the error attribute was not added to clang until 14.0.0. Commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") and commit a3f8a30f3f00 ("Compiler Attributes: use feature checks instead of version checks") refactored the handling of attributes in the main kernel to avoid situations like this but that refactoring has never been done for the tools directory. Refactoring is a rather large undertaking and this has never been an issue before so instead, just guard the definition of __compiletime_error() with __has_attribute() so that there are no more errors. Fixes: 4e59869aa655 ("compiler-gcc.h: drop checks for older GCC versions") Signed-off-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- tools/include/linux/compiler-gcc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 43d9a46d36f0..8816f06fc6c7 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -16,7 +16,9 @@ # define __fallthrough __attribute__ ((fallthrough)) #endif -#define __compiletime_error(message) __attribute__((error(message))) +#if __has_attribute(__error__) +# define __compiletime_error(message) __attribute__((error(message))) +#endif /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) From 0e6491b559704da720f6da09dd0a52c4df44c514 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Sat, 11 Sep 2021 08:55:57 +0800 Subject: [PATCH 106/543] bpf: Add oversize check before call kvcalloc() Commit 7661809d493b ("mm: don't allow oversized kvmalloc() calls") add the oversize check. When the allocation is larger than what kmalloc() supports, the following warning triggered: WARNING: CPU: 0 PID: 8408 at mm/util.c:597 kvmalloc_node+0x108/0x110 mm/util.c:597 Modules linked in: CPU: 0 PID: 8408 Comm: syz-executor221 Not tainted 5.14.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:kvmalloc_node+0x108/0x110 mm/util.c:597 Call Trace: kvmalloc include/linux/mm.h:806 [inline] kvmalloc_array include/linux/mm.h:824 [inline] kvcalloc include/linux/mm.h:829 [inline] check_btf_line kernel/bpf/verifier.c:9925 [inline] check_btf_info kernel/bpf/verifier.c:10049 [inline] bpf_check+0xd634/0x150d0 kernel/bpf/verifier.c:13759 bpf_prog_load kernel/bpf/syscall.c:2301 [inline] __sys_bpf+0x11181/0x126e0 kernel/bpf/syscall.c:4587 __do_sys_bpf kernel/bpf/syscall.c:4691 [inline] __se_sys_bpf kernel/bpf/syscall.c:4689 [inline] __x64_sys_bpf+0x78/0x90 kernel/bpf/syscall.c:4689 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Reported-by: syzbot+f3e749d4c662818ae439@syzkaller.appspotmail.com Signed-off-by: Bixuan Cui Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210911005557.45518-1-cuibixuan@huawei.com --- kernel/bpf/verifier.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 047ac4b4703b..e76b55917905 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9912,6 +9912,8 @@ static int check_btf_line(struct bpf_verifier_env *env, nr_linfo = attr->line_info_cnt; if (!nr_linfo) return 0; + if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info)) + return -EINVAL; rec_size = attr->line_info_rec_size; if (rec_size < MIN_BPF_LINEINFO_SIZE || From 4c51de1e8f928a5b05248714d832d7d991ac319a Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 13 Sep 2021 18:29:46 -0500 Subject: [PATCH 107/543] cifs: fix incorrect kernel doc comments Correct kernel-doc comments pointed out by the automated kernel test robot. Reported-by: kernel test robot Signed-off-by: Steve French --- fs/cifs/connect.c | 12 ++++++++++-- fs/cifs/ioctl.c | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 399a0f084573..7881115cfbee 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1089,7 +1089,7 @@ next_pdu: module_put_and_exit(0); } -/** +/* * Returns true if srcaddr isn't specified and rhs isn't specified, or * if srcaddr is specified and matches the IP address of the rhs argument */ @@ -1549,6 +1549,9 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) /** * cifs_setup_ipc - helper to setup the IPC tcon for the session + * @ses: smb session to issue the request on + * @ctx: the superblock configuration context to use for building the + * new tree connection for the IPC (interprocess communication RPC) * * A new IPC connection is made and stored in the session * tcon_ipc. The IPC tcon has the same lifetime as the session. @@ -1604,6 +1607,7 @@ out: /** * cifs_free_ipc - helper to release the session IPC tcon + * @ses: smb session to unmount the IPC from * * Needs to be called everytime a session is destroyed. * @@ -1854,6 +1858,8 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx __attribute__((unused)), /** * cifs_get_smb_ses - get a session matching @ctx data from @server + * @server: server to setup the session to + * @ctx: superblock configuration context to use to setup the session * * This function assumes it is being called from cifs_mount() where we * already got a server reference (server refcount +1). See @@ -2064,6 +2070,8 @@ cifs_put_tcon(struct cifs_tcon *tcon) /** * cifs_get_tcon - get a tcon matching @ctx data from @ses + * @ses: smb session to issue the request on + * @ctx: the superblock configuration context to use for building the * * - tcon refcount is the number of mount points using the tcon. * - ses refcount is the number of tcon using the session. @@ -3029,7 +3037,7 @@ build_unc_path_to_root(const struct smb3_fs_context *ctx, return full_path; } -/** +/* * expand_dfs_referral - Perform a dfs referral query and update the cifs_sb * * If a referral is found, cifs_sb->ctx->mount_options will be (re-)allocated diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 44ae99454fb3..0359b604bdbc 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -358,7 +358,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) if (pSMBFile == NULL) break; tcon = tlink_tcon(pSMBFile->tlink); - caps = le64_to_cpu(tcon->fsUnixInfo.Capability); + /* caps = le64_to_cpu(tcon->fsUnixInfo.Capability); */ if (get_user(ExtAttrBits, (int __user *)arg)) { rc = -EFAULT; From 8520e224f547cd070c7c8f97b1fc6d58cff7ccaa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Sep 2021 01:07:57 +0200 Subject: [PATCH 108/543] bpf, cgroups: Fix cgroup v2 fallback on v1/v2 mixed mode Fix cgroup v1 interference when non-root cgroup v2 BPF programs are used. Back in the days, commit bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") embedded per-socket cgroup information into sock->sk_cgrp_data and in order to save 8 bytes in struct sock made both mutually exclusive, that is, when cgroup v1 socket tagging (e.g. net_cls/net_prio) is used, then cgroup v2 falls back to the root cgroup in sock_cgroup_ptr() (&cgrp_dfl_root.cgrp). The assumption made was "there is no reason to mix the two and this is in line with how legacy and v2 compatibility is handled" as stated in bd1060a1d671. However, with Kubernetes more widely supporting cgroups v2 as well nowadays, this assumption no longer holds, and the possibility of the v1/v2 mixed mode with the v2 root fallback being hit becomes a real security issue. Many of the cgroup v2 BPF programs are also used for policy enforcement, just to pick _one_ example, that is, to programmatically deny socket related system calls like connect(2) or bind(2). A v2 root fallback would implicitly cause a policy bypass for the affected Pods. In production environments, we have recently seen this case due to various circumstances: i) a different 3rd party agent and/or ii) a container runtime such as [0] in the user's environment configuring legacy cgroup v1 net_cls tags, which triggered implicitly mentioned root fallback. Another case is Kubernetes projects like kind [1] which create Kubernetes nodes in a container and also add cgroup namespaces to the mix, meaning programs which are attached to the cgroup v2 root of the cgroup namespace get attached to a non-root cgroup v2 path from init namespace point of view. And the latter's root is out of reach for agents on a kind Kubernetes node to configure. Meaning, any entity on the node setting cgroup v1 net_cls tag will trigger the bypass despite cgroup v2 BPF programs attached to the namespace root. Generally, this mutual exclusiveness does not hold anymore in today's user environments and makes cgroup v2 usage from BPF side fragile and unreliable. This fix adds proper struct cgroup pointer for the cgroup v2 case to struct sock_cgroup_data in order to address these issues; this implicitly also fixes the tradeoffs being made back then with regards to races and refcount leaks as stated in bd1060a1d671, and removes the fallback, so that cgroup v2 BPF programs always operate as expected. [0] https://github.com/nestybox/sysbox/ [1] https://kind.sigs.k8s.io/ Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Stanislav Fomichev Acked-by: Tejun Heo Link: https://lore.kernel.org/bpf/20210913230759.2313-1-daniel@iogearbox.net --- include/linux/cgroup-defs.h | 107 +++++++++-------------------------- include/linux/cgroup.h | 22 +------ kernel/cgroup/cgroup.c | 50 ++++------------ net/core/netclassid_cgroup.c | 7 +-- net/core/netprio_cgroup.c | 10 +--- 5 files changed, 41 insertions(+), 155 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index e1c705fdfa7c..db2e147e069f 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -752,107 +752,54 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains * per-socket cgroup information except for memcg association. * - * On legacy hierarchies, net_prio and net_cls controllers directly set - * attributes on each sock which can then be tested by the network layer. - * On the default hierarchy, each sock is associated with the cgroup it was - * created in and the networking layer can match the cgroup directly. - * - * To avoid carrying all three cgroup related fields separately in sock, - * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer. - * On boot, sock_cgroup_data records the cgroup that the sock was created - * in so that cgroup2 matches can be made; however, once either net_prio or - * net_cls starts being used, the area is overridden to carry prioidx and/or - * classid. The two modes are distinguished by whether the lowest bit is - * set. Clear bit indicates cgroup pointer while set bit prioidx and - * classid. - * - * While userland may start using net_prio or net_cls at any time, once - * either is used, cgroup2 matching no longer works. There is no reason to - * mix the two and this is in line with how legacy and v2 compatibility is - * handled. On mode switch, cgroup references which are already being - * pointed to by socks may be leaked. While this can be remedied by adding - * synchronization around sock_cgroup_data, given that the number of leaked - * cgroups is bound and highly unlikely to be high, this seems to be the - * better trade-off. + * On legacy hierarchies, net_prio and net_cls controllers directly + * set attributes on each sock which can then be tested by the network + * layer. On the default hierarchy, each sock is associated with the + * cgroup it was created in and the networking layer can match the + * cgroup directly. */ struct sock_cgroup_data { - union { -#ifdef __LITTLE_ENDIAN - struct { - u8 is_data : 1; - u8 no_refcnt : 1; - u8 unused : 6; - u8 padding; - u16 prioidx; - u32 classid; - } __packed; -#else - struct { - u32 classid; - u16 prioidx; - u8 padding; - u8 unused : 6; - u8 no_refcnt : 1; - u8 is_data : 1; - } __packed; + struct cgroup *cgroup; /* v2 */ +#ifdef CONFIG_CGROUP_NET_CLASSID + u32 classid; /* v1 */ +#endif +#ifdef CONFIG_CGROUP_NET_PRIO + u16 prioidx; /* v1 */ #endif - u64 val; - }; }; -/* - * There's a theoretical window where the following accessors race with - * updaters and return part of the previous pointer as the prioidx or - * classid. Such races are short-lived and the result isn't critical. - */ static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) { - /* fallback to 1 which is always the ID of the root cgroup */ - return (skcd->is_data & 1) ? skcd->prioidx : 1; +#ifdef CONFIG_CGROUP_NET_PRIO + return READ_ONCE(skcd->prioidx); +#else + return 1; +#endif } static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) { - /* fallback to 0 which is the unconfigured default classid */ - return (skcd->is_data & 1) ? skcd->classid : 0; +#ifdef CONFIG_CGROUP_NET_CLASSID + return READ_ONCE(skcd->classid); +#else + return 0; +#endif } -/* - * If invoked concurrently, the updaters may clobber each other. The - * caller is responsible for synchronization. - */ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, u16 prioidx) { - struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; - - if (sock_cgroup_prioidx(&skcd_buf) == prioidx) - return; - - if (!(skcd_buf.is_data & 1)) { - skcd_buf.val = 0; - skcd_buf.is_data = 1; - } - - skcd_buf.prioidx = prioidx; - WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ +#ifdef CONFIG_CGROUP_NET_PRIO + WRITE_ONCE(skcd->prioidx, prioidx); +#endif } static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, u32 classid) { - struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; - - if (sock_cgroup_classid(&skcd_buf) == classid) - return; - - if (!(skcd_buf.is_data & 1)) { - skcd_buf.val = 0; - skcd_buf.is_data = 1; - } - - skcd_buf.classid = classid; - WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ +#ifdef CONFIG_CGROUP_NET_CLASSID + WRITE_ONCE(skcd->classid, classid); +#endif } #else /* CONFIG_SOCK_CGROUP_DATA */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 7bf60454a313..75c151413fda 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -829,33 +829,13 @@ static inline void cgroup_account_cputime_field(struct task_struct *task, */ #ifdef CONFIG_SOCK_CGROUP_DATA -#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) -extern spinlock_t cgroup_sk_update_lock; -#endif - -void cgroup_sk_alloc_disable(void); void cgroup_sk_alloc(struct sock_cgroup_data *skcd); void cgroup_sk_clone(struct sock_cgroup_data *skcd); void cgroup_sk_free(struct sock_cgroup_data *skcd); static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) { -#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) - unsigned long v; - - /* - * @skcd->val is 64bit but the following is safe on 32bit too as we - * just need the lower ulong to be written and read atomically. - */ - v = READ_ONCE(skcd->val); - - if (v & 3) - return &cgrp_dfl_root.cgrp; - - return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; -#else - return (struct cgroup *)(unsigned long)skcd->val; -#endif + return skcd->cgroup; } #else /* CONFIG_CGROUP_DATA */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 881ce1470beb..8afa8690d288 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6572,74 +6572,44 @@ int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v) */ #ifdef CONFIG_SOCK_CGROUP_DATA -#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) - -DEFINE_SPINLOCK(cgroup_sk_update_lock); -static bool cgroup_sk_alloc_disabled __read_mostly; - -void cgroup_sk_alloc_disable(void) -{ - if (cgroup_sk_alloc_disabled) - return; - pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n"); - cgroup_sk_alloc_disabled = true; -} - -#else - -#define cgroup_sk_alloc_disabled false - -#endif - void cgroup_sk_alloc(struct sock_cgroup_data *skcd) { - if (cgroup_sk_alloc_disabled) { - skcd->no_refcnt = 1; - return; - } - /* Don't associate the sock with unrelated interrupted task's cgroup. */ if (in_interrupt()) return; rcu_read_lock(); - while (true) { struct css_set *cset; cset = task_css_set(current); if (likely(cgroup_tryget(cset->dfl_cgrp))) { - skcd->val = (unsigned long)cset->dfl_cgrp; + skcd->cgroup = cset->dfl_cgrp; cgroup_bpf_get(cset->dfl_cgrp); break; } cpu_relax(); } - rcu_read_unlock(); } void cgroup_sk_clone(struct sock_cgroup_data *skcd) { - if (skcd->val) { - if (skcd->no_refcnt) - return; - /* - * We might be cloning a socket which is left in an empty - * cgroup and the cgroup might have already been rmdir'd. - * Don't use cgroup_get_live(). - */ - cgroup_get(sock_cgroup_ptr(skcd)); - cgroup_bpf_get(sock_cgroup_ptr(skcd)); - } + struct cgroup *cgrp = sock_cgroup_ptr(skcd); + + /* + * We might be cloning a socket which is left in an empty + * cgroup and the cgroup might have already been rmdir'd. + * Don't use cgroup_get_live(). + */ + cgroup_get(cgrp); + cgroup_bpf_get(cgrp); } void cgroup_sk_free(struct sock_cgroup_data *skcd) { struct cgroup *cgrp = sock_cgroup_ptr(skcd); - if (skcd->no_refcnt) - return; cgroup_bpf_put(cgrp); cgroup_put(cgrp); } diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index b49c57d35a88..1a6a86693b74 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -71,11 +71,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file); - if (sock) { - spin_lock(&cgroup_sk_update_lock); + if (sock) sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); - spin_unlock(&cgroup_sk_update_lock); - } if (--ctx->batch == 0) { ctx->batch = UPDATE_CLASSID_BATCH; return n + 1; @@ -121,8 +118,6 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, struct css_task_iter it; struct task_struct *p; - cgroup_sk_alloc_disable(); - cs->classid = (u32)value; css_task_iter_start(css, 0, &it); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 99a431c56f23..8456dfbe2eb4 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kernfs_open_file *of, if (!dev) return -ENODEV; - cgroup_sk_alloc_disable(); - rtnl_lock(); ret = netprio_set_prio(of_css(of), dev, prio); @@ -221,12 +219,10 @@ static ssize_t write_priomap(struct kernfs_open_file *of, static int update_netprio(const void *v, struct file *file, unsigned n) { struct socket *sock = sock_from_file(file); - if (sock) { - spin_lock(&cgroup_sk_update_lock); + + if (sock) sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, (unsigned long)v); - spin_unlock(&cgroup_sk_update_lock); - } return 0; } @@ -235,8 +231,6 @@ static void net_prio_attach(struct cgroup_taskset *tset) struct task_struct *p; struct cgroup_subsys_state *css; - cgroup_sk_alloc_disable(); - cgroup_taskset_for_each(p, css, tset) { void *v = (void *)(unsigned long)css->id; From d8079d8026f82e4435445297d1b77bba1c4c7960 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Sep 2021 01:07:58 +0200 Subject: [PATCH 109/543] bpf, selftests: Add cgroup v1 net_cls classid helpers Minimal set of helpers for net_cls classid cgroupv1 management in order to set an id, join from a process, initiate setup and teardown. cgroupv2 helpers are left as-is, but reused where possible. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210913230759.2313-2-daniel@iogearbox.net --- tools/testing/selftests/bpf/cgroup_helpers.c | 137 +++++++++++++++++-- tools/testing/selftests/bpf/cgroup_helpers.h | 20 ++- 2 files changed, 143 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 033051717ba5..f3daa44a8266 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -12,27 +12,36 @@ #include #include - #include "cgroup_helpers.h" /* * To avoid relying on the system setup, when setup_cgroup_env is called - * we create a new mount namespace, and cgroup namespace. The cgroup2 - * root is mounted at CGROUP_MOUNT_PATH + * we create a new mount namespace, and cgroup namespace. The cgroupv2 + * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't + * have cgroupv2 enabled at this point in time. It's easier to create our + * own mount namespace and manage it ourselves. We assume /mnt exists. * - * Unfortunately, most people don't have cgroupv2 enabled at this point in time. - * It's easier to create our own mount namespace and manage it ourselves. - * - * We assume /mnt exists. + * Related cgroupv1 helpers are named *classid*(), since we only use the + * net_cls controller for tagging net_cls.classid. We assume the default + * mount under /sys/fs/cgroup/net_cls, which should be the case for the + * vast majority of users. */ #define WALK_FD_LIMIT 16 + #define CGROUP_MOUNT_PATH "/mnt" +#define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" +#define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" #define CGROUP_WORK_DIR "/cgroup-test-work-dir" + #define format_cgroup_path(buf, path) \ snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \ CGROUP_WORK_DIR, path) +#define format_classid_path(buf) \ + snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ + CGROUP_WORK_DIR) + /** * enable_all_controllers() - Enable all available cgroup v2 controllers * @@ -139,8 +148,7 @@ static int nftwfunc(const char *filename, const struct stat *statptr, return 0; } - -static int join_cgroup_from_top(char *cgroup_path) +static int join_cgroup_from_top(const char *cgroup_path) { char cgroup_procs_path[PATH_MAX + 1]; pid_t pid = getpid(); @@ -313,3 +321,114 @@ int cgroup_setup_and_join(const char *path) { } return cg_fd; } + +/** + * setup_classid_environment() - Setup the cgroupv1 net_cls environment + * + * After calling this function, cleanup_classid_environment should be called + * once testing is complete. + * + * This function will print an error to stderr and return 1 if it is unable + * to setup the cgroup environment. If setup is successful, 0 is returned. + */ +int setup_classid_environment(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + + if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) && + errno != EBUSY) { + log_err("mount cgroup base"); + return 1; + } + + if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) { + log_err("mkdir cgroup net_cls"); + return 1; + } + + if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && + errno != EBUSY) { + log_err("mount cgroup net_cls"); + return 1; + } + + cleanup_classid_environment(); + + if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { + log_err("mkdir cgroup work dir"); + return 1; + } + + return 0; +} + +/** + * set_classid() - Set a cgroupv1 net_cls classid + * @id: the numeric classid + * + * Writes the passed classid into the cgroup work dir's net_cls.classid + * file in order to later on trigger socket tagging. + * + * On success, it returns 0, otherwise on failure it returns 1. If there + * is a failure, it prints the error to stderr. + */ +int set_classid(unsigned int id) +{ + char cgroup_workdir[PATH_MAX - 42]; + char cgroup_classid_path[PATH_MAX + 1]; + int fd, rc = 0; + + format_classid_path(cgroup_workdir); + snprintf(cgroup_classid_path, sizeof(cgroup_classid_path), + "%s/net_cls.classid", cgroup_workdir); + + fd = open(cgroup_classid_path, O_WRONLY); + if (fd < 0) { + log_err("Opening cgroup classid: %s", cgroup_classid_path); + return 1; + } + + if (dprintf(fd, "%u\n", id) < 0) { + log_err("Setting cgroup classid"); + rc = 1; + } + + close(fd); + return rc; +} + +/** + * join_classid() - Join a cgroupv1 net_cls classid + * + * This function expects the cgroup work dir to be already created, as we + * join it here. This causes the process sockets to be tagged with the given + * net_cls classid. + * + * On success, it returns 0, otherwise on failure it returns 1. + */ +int join_classid(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + return join_cgroup_from_top(cgroup_workdir); +} + +/** + * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment + * + * At call time, it moves the calling process to the root cgroup, and then + * runs the deletion process. + * + * On failure, it will print an error to stderr, and try to continue. + */ +void cleanup_classid_environment(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + join_cgroup_from_top(NETCLS_MOUNT_PATH); + nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); +} diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 5fe3d88e4f0d..629da3854b3e 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __CGROUP_HELPERS_H #define __CGROUP_HELPERS_H + #include #include @@ -8,12 +9,21 @@ #define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) - +/* cgroupv2 related */ int cgroup_setup_and_join(const char *path); int create_and_get_cgroup(const char *path); -int join_cgroup(const char *path); -int setup_cgroup_environment(void); -void cleanup_cgroup_environment(void); unsigned long long get_cgroup_id(const char *path); -#endif +int join_cgroup(const char *path); + +int setup_cgroup_environment(void); +void cleanup_cgroup_environment(void); + +/* cgroupv1 related */ +int set_classid(unsigned int id); +int join_classid(void); + +int setup_classid_environment(void); +void cleanup_classid_environment(void); + +#endif /* __CGROUP_HELPERS_H */ From 43d2b88c29f2d120b4dc22f27b3483eb14bd9815 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Sep 2021 01:07:59 +0200 Subject: [PATCH 110/543] bpf, selftests: Add test case for mixed cgroup v1/v2 Minimal selftest which implements a small BPF policy program to the connect(2) hook which rejects TCP connection requests to port 60123 with EPERM. This is being attached to a non-root cgroup v2 path. The test asserts that this works under cgroup v2-only and under a mixed cgroup v1/v2 environment where net_classid is set in the former case. Before fix: # ./test_progs -t cgroup_v1v2 test_cgroup_v1v2:PASS:server_fd 0 nsec test_cgroup_v1v2:PASS:client_fd 0 nsec test_cgroup_v1v2:PASS:cgroup_fd 0 nsec test_cgroup_v1v2:PASS:server_fd 0 nsec run_test:PASS:skel_open 0 nsec run_test:PASS:prog_attach 0 nsec test_cgroup_v1v2:PASS:cgroup-v2-only 0 nsec run_test:PASS:skel_open 0 nsec run_test:PASS:prog_attach 0 nsec run_test:PASS:join_classid 0 nsec (network_helpers.c:219: errno: None) Unexpected success to connect to server test_cgroup_v1v2:FAIL:cgroup-v1v2 unexpected error: -1 (errno 0) #27 cgroup_v1v2:FAIL Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED After fix: # ./test_progs -t cgroup_v1v2 #27 cgroup_v1v2:OK Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210913230759.2313-3-daniel@iogearbox.net --- tools/testing/selftests/bpf/network_helpers.c | 27 +++++-- tools/testing/selftests/bpf/network_helpers.h | 1 + .../selftests/bpf/prog_tests/cgroup_v1v2.c | 79 +++++++++++++++++++ .../selftests/bpf/progs/connect4_dropper.c | 26 ++++++ 4 files changed, 127 insertions(+), 6 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c create mode 100644 tools/testing/selftests/bpf/progs/connect4_dropper.c diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 7e9f6375757a..6db1af8fdee7 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -208,11 +208,26 @@ error_close: static int connect_fd_to_addr(int fd, const struct sockaddr_storage *addr, - socklen_t addrlen) + socklen_t addrlen, const bool must_fail) { - if (connect(fd, (const struct sockaddr *)addr, addrlen)) { - log_err("Failed to connect to server"); - return -1; + int ret; + + errno = 0; + ret = connect(fd, (const struct sockaddr *)addr, addrlen); + if (must_fail) { + if (!ret) { + log_err("Unexpected success to connect to server"); + return -1; + } + if (errno != EPERM) { + log_err("Unexpected error from connect to server"); + return -1; + } + } else { + if (ret) { + log_err("Failed to connect to server"); + return -1; + } } return 0; @@ -257,7 +272,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) strlen(opts->cc) + 1)) goto error_close; - if (connect_fd_to_addr(fd, &addr, addrlen)) + if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail)) goto error_close; return fd; @@ -289,7 +304,7 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) return -1; } - if (connect_fd_to_addr(client_fd, &addr, len)) + if (connect_fd_to_addr(client_fd, &addr, len, false)) return -1; return 0; diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index da7e132657d5..d198181a5648 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -20,6 +20,7 @@ typedef __u16 __sum16; struct network_helper_opts { const char *cc; int timeout_ms; + bool must_fail; }; /* ipv4 test vector */ diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c new file mode 100644 index 000000000000..ab3b9bc5e6d1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include "connect4_dropper.skel.h" + +#include "cgroup_helpers.h" +#include "network_helpers.h" + +static int run_test(int cgroup_fd, int server_fd, bool classid) +{ + struct network_helper_opts opts = { + .must_fail = true, + }; + struct connect4_dropper *skel; + int fd, err = 0; + + skel = connect4_dropper__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return -1; + + skel->links.connect_v4_dropper = + bpf_program__attach_cgroup(skel->progs.connect_v4_dropper, + cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.connect_v4_dropper, "prog_attach")) { + err = -1; + goto out; + } + + if (classid && !ASSERT_OK(join_classid(), "join_classid")) { + err = -1; + goto out; + } + + fd = connect_to_fd_opts(server_fd, &opts); + if (fd < 0) + err = -1; + else + close(fd); +out: + connect4_dropper__destroy(skel); + return err; +} + +void test_cgroup_v1v2(void) +{ + struct network_helper_opts opts = {}; + int server_fd, client_fd, cgroup_fd; + static const int port = 60123; + + /* Step 1: Check base connectivity works without any BPF. */ + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); + if (!ASSERT_GE(server_fd, 0, "server_fd")) + return; + client_fd = connect_to_fd_opts(server_fd, &opts); + if (!ASSERT_GE(client_fd, 0, "client_fd")) { + close(server_fd); + return; + } + close(client_fd); + close(server_fd); + + /* Step 2: Check BPF policy prog attached to cgroups drops connectivity. */ + cgroup_fd = test__join_cgroup("/connect_dropper"); + if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd")) + return; + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); + if (!ASSERT_GE(server_fd, 0, "server_fd")) { + close(cgroup_fd); + return; + } + ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only"); + setup_classid_environment(); + set_classid(42); + ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2"); + cleanup_classid_environment(); + close(server_fd); + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c new file mode 100644 index 000000000000..b565d997810a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include +#include + +#include + +#include +#include + +#define VERDICT_REJECT 0 +#define VERDICT_PROCEED 1 + +SEC("cgroup/connect4") +int connect_v4_dropper(struct bpf_sock_addr *ctx) +{ + if (ctx->type != SOCK_STREAM) + return VERDICT_PROCEED; + if (ctx->user_port == bpf_htons(60123)) + return VERDICT_REJECT; + return VERDICT_PROCEED; +} + +char _license[] SEC("license") = "GPL"; From 41d3a6bd1d37149b18331fc4bb789c5456a7aeb0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Sep 2021 13:08:51 -0600 Subject: [PATCH 111/543] io_uring: pin SQPOLL data before unlocking ring lock We need to re-check sqd->thread after we've dropped the lock. Pin the sqd before doing the lockdep lock dance, and check if the thread is alive after that. It's either NULL or alive, as the SQPOLL thread cannot exit without holding the same sqd->lock. Reported-and-tested-by: syzbot+337de45f13a4fd54d708@syzkaller.appspotmail.com Fixes: fa84693b3c89 ("io_uring: ensure IORING_REGISTER_IOWQ_MAX_WORKERS works with SQPOLL") Signed-off-by: Jens Axboe --- fs/io_uring.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 66a7414c3756..a864a94364c6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -10563,10 +10563,12 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, * ordering. Fine to drop uring_lock here, we hold * a ref to the ctx. */ + refcount_inc(&sqd->refs); mutex_unlock(&ctx->uring_lock); mutex_lock(&sqd->lock); mutex_lock(&ctx->uring_lock); - tctx = sqd->thread->io_uring; + if (sqd->thread) + tctx = sqd->thread->io_uring; } } else { tctx = current->io_uring; @@ -10580,16 +10582,20 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, if (ret) goto err; - if (sqd) + if (sqd) { mutex_unlock(&sqd->lock); + io_put_sq_data(sqd); + } if (copy_to_user(arg, new_count, sizeof(new_count))) return -EFAULT; return 0; err: - if (sqd) + if (sqd) { mutex_unlock(&sqd->lock); + io_put_sq_data(sqd); + } return ret; } From 6a2ea0d34af1ca807d5ba6a8350a037ff3cd35cc Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Aug 2021 16:55:31 -0700 Subject: [PATCH 112/543] scsi: st: Add missing break in switch statement in st_ioctl() Clang + -Wimplicit-fallthrough warns: drivers/scsi/st.c:3831:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] default: ^ drivers/scsi/st.c:3831:2: note: insert 'break;' to avoid fall-through default: ^ break; 1 warning generated. Clang's -Wimplicit-fallthrough is a little bit more pedantic than GCC's, requiring every case block to end in break, return, or fallthrough, rather than allowing implicit fallthroughs to cases that just contain break or return. Add a break so that there is no more warning, as has been done all over the tree already. Link: https://lore.kernel.org/r/20210817235531.172995-1-nathan@kernel.org Fixes: 2e27f576abc6 ("scsi: scsi_ioctl: Call scsi_cmd_ioctl() from scsi_ioctl()") Reviewed-by: Gustavo A. R. Silva Signed-off-by: Nathan Chancellor Signed-off-by: Martin K. Petersen --- drivers/scsi/st.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 9d04929f03a1..ae8636d3780b 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -3823,6 +3823,7 @@ static long st_ioctl(struct file *file, unsigned int cmd_in, unsigned long arg) case CDROM_SEND_PACKET: if (!capable(CAP_SYS_RAWIO)) return -EPERM; + break; default: break; } From 96fafe7c6523886308605d30ec92c7936abe7c2c Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 30 Aug 2021 16:10:50 -0700 Subject: [PATCH 113/543] scsi: elx: efct: Fix void-pointer-to-enum-cast warning for efc_nport_topology The kernel test robot flagged an warning for ".../efc_device.c:932:6: warning: cast to smaller integer type 'enum efc_nport_topology' from 'void *'" For the topology events, the "arg" field is generically defined as a void * and is used to pass different arguments. Most of the arguments are pointers to data structures. But for the EFC_EVT_NPORT_TOPOLOGY_NOTIFY event, the argument is an enum value, and the code is typecasting the void * to an enum generating the warning. Fix by converting the EFC_EVT_NPORT_TOPOLOGY_NOTIFY event to pass a pointer to the enum, thus it's a straight-forward pointer dereference in the event handler. Link: https://lore.kernel.org/r/20210830231050.5951-1-jsmart2021@gmail.com Fixes: 202bfdffae27 ("scsi: elx: libefc: FC node ELS and state handling") Reported-by: kernel test robot Co-developed-by: Ram Vegesna Signed-off-by: Ram Vegesna Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/elx/libefc/efc_device.c | 7 +++---- drivers/scsi/elx/libefc/efc_fabric.c | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/elx/libefc/efc_device.c b/drivers/scsi/elx/libefc/efc_device.c index 725ca2a23fb2..52be01333c6e 100644 --- a/drivers/scsi/elx/libefc/efc_device.c +++ b/drivers/scsi/elx/libefc/efc_device.c @@ -928,22 +928,21 @@ __efc_d_wait_topology_notify(struct efc_sm_ctx *ctx, break; case EFC_EVT_NPORT_TOPOLOGY_NOTIFY: { - enum efc_nport_topology topology = - (enum efc_nport_topology)arg; + enum efc_nport_topology *topology = arg; WARN_ON(node->nport->domain->attached); WARN_ON(node->send_ls_acc != EFC_NODE_SEND_LS_ACC_PLOGI); node_printf(node, "topology notification, topology=%d\n", - topology); + *topology); /* At the time the PLOGI was received, the topology was unknown, * so we didn't know which node would perform the domain attach: * 1. The node from which the PLOGI was sent (p2p) or * 2. The node to which the FLOGI was sent (fabric). */ - if (topology == EFC_NPORT_TOPO_P2P) { + if (*topology == EFC_NPORT_TOPO_P2P) { /* if this is p2p, need to attach to the domain using * the d_id from the PLOGI received */ diff --git a/drivers/scsi/elx/libefc/efc_fabric.c b/drivers/scsi/elx/libefc/efc_fabric.c index d397220d9e54..3270ce40196c 100644 --- a/drivers/scsi/elx/libefc/efc_fabric.c +++ b/drivers/scsi/elx/libefc/efc_fabric.c @@ -107,7 +107,6 @@ void efc_fabric_notify_topology(struct efc_node *node) { struct efc_node *tmp_node; - enum efc_nport_topology topology = node->nport->topology; unsigned long index; /* @@ -118,7 +117,7 @@ efc_fabric_notify_topology(struct efc_node *node) if (tmp_node != node) { efc_node_post_event(tmp_node, EFC_EVT_NPORT_TOPOLOGY_NOTIFY, - (void *)topology); + &node->nport->topology); } } } From 59936430e6a6acb0ef943e9306506b2e9c2e45a8 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 30 Aug 2021 16:12:43 -0700 Subject: [PATCH 114/543] scsi: lpfc: Fix CPU to/from endian warnings introduced by ELS processing The kernel test robot reported the following sparse warning: ".../lpfc_els.c:3984:25: sparse: sparse: cast from restricted __be16" For the error being flagged, using be32_to_cpu() on a be16 data type, it was simple enough. But a review of other elements and warnings were also evaluated. This patch corrected several items in the original patch: - Using be32_to_cpu() on a be16 data type - cpu_to_le32() used on a std uint32_t (CPU) data type. Note: This is a byte array, but stored in LE layout by hardware at 32-bit boundaries. So it possibly needed conversion. - Using cpu_to_le32() on a std uint16_t and assigned to a char typeA - Using le32_to_cpu() on a le16 type - Missing cpu_to_le16() on an assignment Link: https://lore.kernel.org/r/20210830231243.6227-1-jsmart2021@gmail.com Fixes: 9064aeb2df8e ("scsi: lpfc: Add EDC ELS support") Reported-by: kernel test robot Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 8 ++++---- drivers/scsi/lpfc/lpfc_hw4.h | 2 +- drivers/scsi/lpfc/lpfc_init.c | 16 ++++++++-------- drivers/scsi/lpfc/lpfc_sli.c | 5 +++-- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 1254a575fd47..f3fc79b99165 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -4015,11 +4015,11 @@ lpfc_cmpl_els_edc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, be32_to_cpu(pcgd->desc_tag), be32_to_cpu(pcgd->desc_len), be32_to_cpu(pcgd->xmt_signal_capability), - be32_to_cpu(pcgd->xmt_signal_frequency.count), - be32_to_cpu(pcgd->xmt_signal_frequency.units), + be16_to_cpu(pcgd->xmt_signal_frequency.count), + be16_to_cpu(pcgd->xmt_signal_frequency.units), be32_to_cpu(pcgd->rcv_signal_capability), - be32_to_cpu(pcgd->rcv_signal_frequency.count), - be32_to_cpu(pcgd->rcv_signal_frequency.units)); + be16_to_cpu(pcgd->rcv_signal_frequency.count), + be16_to_cpu(pcgd->rcv_signal_frequency.units)); /* Compare driver and Fport capabilities and choose * least common. diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index 79a4872c2edb..7359505e6041 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -1167,7 +1167,7 @@ struct lpfc_mbx_read_object { /* Version 0 */ #define lpfc_mbx_rd_object_rlen_MASK 0x00FFFFFF #define lpfc_mbx_rd_object_rlen_WORD word0 uint32_t rd_object_offset; - uint32_t rd_object_name[LPFC_MBX_OBJECT_NAME_LEN_DW]; + __le32 rd_object_name[LPFC_MBX_OBJECT_NAME_LEN_DW]; #define LPFC_OBJ_NAME_SZ 104 /* 26 x sizeof(uint32_t) is 104. */ uint32_t rd_object_cnt; struct lpfc_mbx_host_buf rd_object_hbuf[4]; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 0ec322f0e3cb..597e5a1ef060 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -5518,7 +5518,7 @@ lpfc_cgn_update_stat(struct lpfc_hba *phba, uint32_t dtag) if (phba->cgn_fpin_frequency && phba->cgn_fpin_frequency != LPFC_FPIN_INIT_FREQ) { value = LPFC_CGN_TIMER_TO_MIN / phba->cgn_fpin_frequency; - cp->cgn_stat_npm = cpu_to_le32(value); + cp->cgn_stat_npm = value; } value = lpfc_cgn_calc_crc32(cp, LPFC_CGN_INFO_SZ, LPFC_CGN_CRC32_SEED); @@ -5547,9 +5547,9 @@ lpfc_cgn_save_evt_cnt(struct lpfc_hba *phba) uint32_t mbps; uint32_t dvalue, wvalue, lvalue, avalue; uint64_t latsum; - uint16_t *ptr; - uint32_t *lptr; - uint16_t *mptr; + __le16 *ptr; + __le32 *lptr; + __le16 *mptr; /* Make sure we have a congestion info buffer */ if (!phba->cgn_i) @@ -5570,7 +5570,7 @@ lpfc_cgn_save_evt_cnt(struct lpfc_hba *phba) if (phba->cgn_fpin_frequency && phba->cgn_fpin_frequency != LPFC_FPIN_INIT_FREQ) { value = LPFC_CGN_TIMER_TO_MIN / phba->cgn_fpin_frequency; - cp->cgn_stat_npm = cpu_to_le32(value); + cp->cgn_stat_npm = value; } /* Read and clear the latency counters for this minute */ @@ -5753,7 +5753,7 @@ lpfc_cgn_save_evt_cnt(struct lpfc_hba *phba) dvalue += le32_to_cpu(cp->cgn_drvr_hr[i]); wvalue += le32_to_cpu(cp->cgn_warn_hr[i]); lvalue += le32_to_cpu(cp->cgn_latency_hr[i]); - mbps += le32_to_cpu(cp->cgn_bw_hr[i]); + mbps += le16_to_cpu(cp->cgn_bw_hr[i]); avalue += le32_to_cpu(cp->cgn_alarm_hr[i]); } if (lvalue) /* Avg of latency averages */ @@ -13411,8 +13411,8 @@ lpfc_init_congestion_buf(struct lpfc_hba *phba) /* last used Index initialized to 0xff already */ - cp->cgn_warn_freq = LPFC_FPIN_INIT_FREQ; - cp->cgn_alarm_freq = LPFC_FPIN_INIT_FREQ; + cp->cgn_warn_freq = cpu_to_le16(LPFC_FPIN_INIT_FREQ); + cp->cgn_alarm_freq = cpu_to_le16(LPFC_FPIN_INIT_FREQ); crc = lpfc_cgn_calc_crc32(cp, LPFC_CGN_INFO_SZ, LPFC_CGN_CRC32_SEED); cp->cgn_info_crc = cpu_to_le32(crc); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index ffd8a140638c..78ce38d7251c 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -22090,6 +22090,7 @@ lpfc_read_object(struct lpfc_hba *phba, char *rdobject, uint32_t *datap, uint32_t shdr_status, shdr_add_status; union lpfc_sli4_cfg_shdr *shdr; struct lpfc_dmabuf *pcmd; + u32 rd_object_name[LPFC_MBX_OBJECT_NAME_LEN_DW] = {0}; /* sanity check on queue memory */ if (!datap) @@ -22113,10 +22114,10 @@ lpfc_read_object(struct lpfc_hba *phba, char *rdobject, uint32_t *datap, memset((void *)read_object->u.request.rd_object_name, 0, LPFC_OBJ_NAME_SZ); - sprintf((uint8_t *)read_object->u.request.rd_object_name, rdobject); + scnprintf((char *)rd_object_name, sizeof(rd_object_name), rdobject); for (j = 0; j < strlen(rdobject); j++) read_object->u.request.rd_object_name[j] = - cpu_to_le32(read_object->u.request.rd_object_name[j]); + cpu_to_le32(rd_object_name[j]); pcmd = kmalloc(sizeof(*pcmd), GFP_KERNEL); if (pcmd) From 37e384095f20cca728500fe5344cd308aa6fd7ff Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 7 Sep 2021 22:09:27 -0700 Subject: [PATCH 115/543] scsi: lpfc: Fix compilation errors on kernels with no CONFIG_DEBUG_FS The Kernel test robot flagged the following warning: ".../lpfc_init.c:7788:35: error: 'struct lpfc_sli4_hba' has no member named 'c_stat'" Reviewing this issue highlighted that one of the recent patches caused the driver to no longer compile cleanly if CONFIG_DEBUG_FS is not set. Correct the different areas that are failing to compile. Link: https://lore.kernel.org/r/20210908050927.37275-1-jsmart2021@gmail.com Fixes: 02243836ad6f ("scsi: lpfc: Add support for the CM framework") Reviewed-by: Nathan Chancellor Build-tested-by: Nathan Chancellor Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 4 ++-- drivers/scsi/lpfc/lpfc_nvme.c | 2 -- drivers/scsi/lpfc/lpfc_scsi.c | 6 +----- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 597e5a1ef060..195169badb37 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -8277,11 +8277,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) return 0; out_free_hba_hdwq_info: - free_percpu(phba->sli4_hba.c_stat); #ifdef CONFIG_SCSI_LPFC_DEBUG_FS + free_percpu(phba->sli4_hba.c_stat); out_free_hba_idle_stat: - kfree(phba->sli4_hba.idle_stat); #endif + kfree(phba->sli4_hba.idle_stat); out_free_hba_eq_info: free_percpu(phba->sli4_hba.eq_info); out_free_hba_cpu_map: diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 73a3568ff17e..479b3eed6208 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1489,9 +1489,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, struct lpfc_nvme_qhandle *lpfc_queue_info; struct lpfc_nvme_fcpreq_priv *freqpriv; struct nvme_common_command *sqe; -#ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint64_t start = 0; -#endif /* Validate pointers. LLDD fault handling with transport does * have timing races. diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 0fde1e874c7a..63d8ac9f68a7 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5578,12 +5578,8 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device)); int err, idx; u8 *uuid = NULL; -#ifdef CONFIG_SCSI_LPFC_DEBUG_FS - uint64_t start = 0L; + uint64_t start; - if (phba->ktime_on) - start = ktime_get_ns(); -#endif start = ktime_get_ns(); rdata = lpfc_rport_data_from_scsi_device(cmnd->device); From 5d1e15108b8d058d537f19cdef4170d2ae4eed08 Mon Sep 17 00:00:00 2001 From: Chi Minghao Date: Tue, 31 Aug 2021 04:40:58 -0700 Subject: [PATCH 116/543] scsi: lpfc: Remove unneeded variable Fix the following coccicheck REVIEW: ./drivers/scsi/lpfc/lpfc_scsi.c:1498:9-12 REVIEW Unneeded variable Link: https://lore.kernel.org/r/20210831114058.17817-1-lv.ruyi@zte.com.cn Reported-by: Zeal Robot Reviewed-by: James Smart Signed-off-by: Chi Minghao Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_scsi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 63d8ac9f68a7..befdf864c43b 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -1495,7 +1495,6 @@ static int lpfc_bg_err_opcodes(struct lpfc_hba *phba, struct scsi_cmnd *sc, uint8_t *txop, uint8_t *rxop) { - uint8_t ret = 0; if (sc->prot_flags & SCSI_PROT_IP_CHECKSUM) { switch (scsi_get_prot_op(sc)) { @@ -1548,7 +1547,7 @@ lpfc_bg_err_opcodes(struct lpfc_hba *phba, struct scsi_cmnd *sc, } } - return ret; + return 0; } #endif From 65ef27f7798b57138351d28fd2f61f2afa164400 Mon Sep 17 00:00:00 2001 From: ChanWoo Lee Date: Wed, 1 Sep 2021 11:56:17 +0900 Subject: [PATCH 117/543] scsi: ufs: ufshpb: Remove unused parameters The following parameters are not used in the function. Remove them. *func(): ufshpb_set_hpb_read_to_upiu -> struct ufshpb_lu *hpb -> u32 lpn Link: https://lore.kernel.org/r/20210901025617.31174-1-cw9316.lee@samsung.com Reviewed-by: Daejun Park Reviewed-by: Bart Van Assche Signed-off-by: ChanWoo Lee Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshpb.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c index 02fb51ae8b25..589af5f6b940 100644 --- a/drivers/scsi/ufs/ufshpb.c +++ b/drivers/scsi/ufs/ufshpb.c @@ -333,9 +333,8 @@ ufshpb_get_pos_from_lpn(struct ufshpb_lu *hpb, unsigned long lpn, int *rgn_idx, } static void -ufshpb_set_hpb_read_to_upiu(struct ufs_hba *hba, struct ufshpb_lu *hpb, - struct ufshcd_lrb *lrbp, u32 lpn, __be64 ppn, - u8 transfer_len, int read_id) +ufshpb_set_hpb_read_to_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, + __be64 ppn, u8 transfer_len, int read_id) { unsigned char *cdb = lrbp->cmd->cmnd; __be64 ppn_tmp = ppn; @@ -703,8 +702,7 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) } } - ufshpb_set_hpb_read_to_upiu(hba, hpb, lrbp, lpn, ppn, transfer_len, - read_id); + ufshpb_set_hpb_read_to_upiu(hba, lrbp, ppn, transfer_len, read_id); hpb->stats.hit_cnt++; return 0; From 4e28550829258f7dab97383acaa477bd724c0ff4 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 1 Sep 2021 16:53:36 +0800 Subject: [PATCH 118/543] scsi: iscsi: Adjust iface sysfs attr detection ISCSI_NET_PARAM_IFACE_ENABLE belongs to enum iscsi_net_param instead of iscsi_iface_param so move it to ISCSI_NET_PARAM. Otherwise, when we call into the driver, we might not match and return that we don't want attr visible in sysfs. Found in code review. Link: https://lore.kernel.org/r/20210901085336.2264295-1-libaokun1@huawei.com Fixes: e746f3451ec7 ("scsi: iscsi: Fix iface sysfs attr detection") Reviewed-by: Lee Duncan Signed-off-by: Baokun Li Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index d8b05d8b5470..922e4c7bd88e 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -441,9 +441,7 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, struct iscsi_transport *t = iface->transport; int param = -1; - if (attr == &dev_attr_iface_enabled.attr) - param = ISCSI_NET_PARAM_IFACE_ENABLE; - else if (attr == &dev_attr_iface_def_taskmgmt_tmo.attr) + if (attr == &dev_attr_iface_def_taskmgmt_tmo.attr) param = ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO; else if (attr == &dev_attr_iface_header_digest.attr) param = ISCSI_IFACE_PARAM_HDRDGST_EN; @@ -483,7 +481,9 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, if (param != -1) return t->attr_is_visible(ISCSI_IFACE_PARAM, param); - if (attr == &dev_attr_iface_vlan_id.attr) + if (attr == &dev_attr_iface_enabled.attr) + param = ISCSI_NET_PARAM_IFACE_ENABLE; + else if (attr == &dev_attr_iface_vlan_id.attr) param = ISCSI_NET_PARAM_VLAN_ID; else if (attr == &dev_attr_iface_vlan_priority.attr) param = ISCSI_NET_PARAM_VLAN_PRIORITY; From e4953a93104c1fb1ef7989541f9867cc276467f9 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Wed, 1 Sep 2021 20:55:42 +0530 Subject: [PATCH 119/543] scsi: mpt3sas: Call cpu_relax() before calling udelay() Call cpu_relax() while waiting for the current blk-mq polling instance to complete. Link: https://lore.kernel.org/r/20210901152542.27866-1-sreekanth.reddy@broadcom.com Reviewed-by: Bart Van Assche Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 6c82435bc9cc..27eb652b564f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -1582,8 +1582,10 @@ mpt3sas_base_pause_mq_polling(struct MPT3SAS_ADAPTER *ioc) * wait for current poll to complete. */ for (qid = 0; qid < iopoll_q_count; qid++) { - while (atomic_read(&ioc->io_uring_poll_queues[qid].busy)) + while (atomic_read(&ioc->io_uring_poll_queues[qid].busy)) { + cpu_relax(); udelay(500); + } } } From 265dfe8ebbabae7959060bd1c3f75c2473b697ed Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 6 Sep 2021 17:01:12 +0800 Subject: [PATCH 120/543] scsi: sd: Free scsi_disk device via put_device() After a device is initialized via device_initialize() it should be freed via put_device(). sd_probe() currently gets this wrong, fix it up. Link: https://lore.kernel.org/r/20210906090112.531442-1-ming.lei@redhat.com Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cbd9999f93a6..a8039beb5a02 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3401,15 +3401,16 @@ static int sd_probe(struct device *dev) } device_initialize(&sdkp->dev); - sdkp->dev.parent = dev; + sdkp->dev.parent = get_device(dev); sdkp->dev.class = &sd_disk_class; dev_set_name(&sdkp->dev, "%s", dev_name(dev)); error = device_add(&sdkp->dev); - if (error) - goto out_free_index; + if (error) { + put_device(&sdkp->dev); + goto out; + } - get_device(dev); dev_set_drvdata(dev, sdkp); gd->major = sd_major((index & 0xf0) >> 4); From 7215e909814fed7cda33c954943a4050d8348204 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Mon, 6 Sep 2021 23:06:42 +0900 Subject: [PATCH 121/543] scsi: sd_zbc: Ensure buffer size is aligned to SECTOR_SIZE Reporting zones on a SCSI device sometimes fail with the following error: [76248.516390] ata16.00: invalid transfer count 131328 [76248.523618] sd 15:0:0:0: [sda] REPORT ZONES start lba 536870912 failed The error (from drivers/ata/libata-scsi.c:ata_scsi_zbc_in_xlat()) indicates that buffer size is not aligned to SECTOR_SIZE. This happens when the __vmalloc() failed. Consider we are reporting 4096 zones, then we will have "bufsize = roundup((4096 + 1) * 64, SECTOR_SIZE)" = (513 * 512) = 262656. Then, __vmalloc() failure halves the bufsize to 131328, which is no longer aligned to SECTOR_SIZE. Use rounddown() to ensure the size is always aligned to SECTOR_SIZE and fix the comment as well. Link: https://lore.kernel.org/r/20210906140642.2267569-1-naohiro.aota@wdc.com Fixes: 23a50861adda ("scsi: sd_zbc: Cleanup sd_zbc_alloc_report_buffer()") Cc: stable@vger.kernel.org # 5.5+ Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Reviewed-by: Himanshu Madhani Signed-off-by: Naohiro Aota Signed-off-by: Martin K. Petersen --- drivers/scsi/sd_zbc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index b9757f24b0d6..8197d31a81f9 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -154,8 +154,8 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, /* * Report zone buffer size should be at most 64B times the number of - * zones requested plus the 64B reply header, but should be at least - * SECTOR_SIZE for ATA devices. + * zones requested plus the 64B reply header, but should be aligned + * to SECTOR_SIZE for ATA devices. * Make sure that this size does not exceed the hardware capabilities. * Furthermore, since the report zone command cannot be split, make * sure that the allocated buffer can always be mapped by limiting the @@ -174,7 +174,7 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, *buflen = bufsize; return buf; } - bufsize >>= 1; + bufsize = rounddown(bufsize >> 1, SECTOR_SIZE); } return NULL; From ef7ae7f746e95c6fa4ec2bcfacb949c36263da78 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 6 Sep 2021 17:18:09 +0200 Subject: [PATCH 122/543] scsi: target: Fix the pgr/alua_support_store functions Commit 356ba2a8bc8d ("scsi: target: tcmu: Make pgr_support and alua_support attributes writable") introduced support for changeable alua_support and pgr_support target attributes. These can only be changed if the backstore is user-backed, otherwise the kernel returns -EINVAL. This triggers a warning in the targetcli/rtslib code when performing a target restore that includes non-userbacked backstores: # targetctl restore Storage Object block/storage1: Cannot set attribute alua_support: [Errno 22] Invalid argument, skipped Storage Object block/storage1: Cannot set attribute pgr_support: [Errno 22] Invalid argument, skipped Fix this warning by returning an error code only if we are really going to flip the PGR/ALUA bit in the transport_flags field, otherwise we will do nothing and return success. Return ENOSYS instead of EINVAL if the pgr/alua attributes can not be changed, this way it will be possible for userspace to understand if the operation failed because an invalid value has been passed to strtobool() or because the attributes are fixed. Fixes: 356ba2a8bc8d ("scsi: target: tcmu: Make pgr_support and alua_support attributes writable") Link: https://lore.kernel.org/r/20210906151809.52811-1-mlombard@redhat.com Reviewed-by: Bodo Stroesser Signed-off-by: Maurizio Lombardi Signed-off-by: Martin K. Petersen --- drivers/target/target_core_configfs.c | 32 +++++++++++++++++---------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 102ec644bc8a..023bd4516a68 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -1110,20 +1110,24 @@ static ssize_t alua_support_store(struct config_item *item, { struct se_dev_attrib *da = to_attrib(item); struct se_device *dev = da->da_dev; - bool flag; + bool flag, oldflag; int ret; + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + oldflag = !(dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA); + if (flag == oldflag) + return count; + if (!(dev->transport->transport_flags_changeable & TRANSPORT_FLAG_PASSTHROUGH_ALUA)) { pr_err("dev[%p]: Unable to change SE Device alua_support:" " alua_support has fixed value\n", dev); - return -EINVAL; + return -ENOSYS; } - ret = strtobool(page, &flag); - if (ret < 0) - return ret; - if (flag) dev->transport_flags &= ~TRANSPORT_FLAG_PASSTHROUGH_ALUA; else @@ -1145,20 +1149,24 @@ static ssize_t pgr_support_store(struct config_item *item, { struct se_dev_attrib *da = to_attrib(item); struct se_device *dev = da->da_dev; - bool flag; + bool flag, oldflag; int ret; + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + oldflag = !(dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR); + if (flag == oldflag) + return count; + if (!(dev->transport->transport_flags_changeable & TRANSPORT_FLAG_PASSTHROUGH_PGR)) { pr_err("dev[%p]: Unable to change SE Device pgr_support:" " pgr_support has fixed value\n", dev); - return -EINVAL; + return -ENOSYS; } - ret = strtobool(page, &flag); - if (ret < 0) - return ret; - if (flag) dev->transport_flags &= ~TRANSPORT_FLAG_PASSTHROUGH_PGR; else From 450907424d9ebcc28fab42a065c3cddce49ee97d Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 7 Sep 2021 09:52:25 -0700 Subject: [PATCH 123/543] scsi: elx: efct: Do not hold lock while calling fc_vport_terminate() Smatch checker reported the following error: drivers/base/power/sysfs.c:833 dpm_sysfs_remove() warn: sleeping in atomic context With a calling sequence of: efct_lio_npiv_drop_nport() <- disables preempt -> fc_vport_terminate() -> device_del() -> dpm_sysfs_remove() Issue is efct_lio_npiv_drop_nport() is making the fc_vport_terminate() call while holding a lock w/ ipl raised. It is unnecessary to hold the lock over this call, shift where the lock is taken. Link: https://lore.kernel.org/r/20210907165225.10821-1-jsmart2021@gmail.com Reported-by: Dan Carpenter Co-developed-by: Ram Vegesna Signed-off-by: Ram Vegesna Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/elx/efct/efct_lio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/elx/efct/efct_lio.c b/drivers/scsi/elx/efct/efct_lio.c index bb3b460dc0bc..4d73e92909ab 100644 --- a/drivers/scsi/elx/efct/efct_lio.c +++ b/drivers/scsi/elx/efct/efct_lio.c @@ -880,11 +880,11 @@ efct_lio_npiv_drop_nport(struct se_wwn *wwn) struct efct *efct = lio_vport->efct; unsigned long flags = 0; - spin_lock_irqsave(&efct->tgt_efct.efct_lio_lock, flags); - if (lio_vport->fc_vport) fc_vport_terminate(lio_vport->fc_vport); + spin_lock_irqsave(&efct->tgt_efct.efct_lio_lock, flags); + list_for_each_entry_safe(vport, next_vport, &efct->tgt_efct.vport_list, list_entry) { if (vport->lio_vport == lio_vport) { From 1f97c29beee774e407839768439b7f51831c3ea1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 7 Sep 2021 23:00:44 +0200 Subject: [PATCH 124/543] scsi: ncr53c8xx: Remove unused retrieve_from_waiting_list() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop retrieve_from_waiting_list() to avoid this warning: drivers/scsi/ncr53c8xx.c:8000:26: warning: ‘retrieve_from_waiting_list’ defined but not used [-Wunused-function] Link: https://lore.kernel.org/r/YTfS/LH5vCN6afDW@ls3530 Fixes: 1c22e327545c ("scsi: ncr53c8xx: Remove unused code") Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Helge Deller Signed-off-by: Martin K. Petersen --- drivers/scsi/ncr53c8xx.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c index 7a4f5d4dd670..2b8c6fa5e775 100644 --- a/drivers/scsi/ncr53c8xx.c +++ b/drivers/scsi/ncr53c8xx.c @@ -1939,11 +1939,8 @@ static void ncr_start_next_ccb (struct ncb *np, struct lcb * lp, int maxn); static void ncr_put_start_queue(struct ncb *np, struct ccb *cp); static void insert_into_waiting_list(struct ncb *np, struct scsi_cmnd *cmd); -static struct scsi_cmnd *retrieve_from_waiting_list(int to_remove, struct ncb *np, struct scsi_cmnd *cmd); static void process_waiting_list(struct ncb *np, int sts); -#define remove_from_waiting_list(np, cmd) \ - retrieve_from_waiting_list(1, (np), (cmd)) #define requeue_waiting_list(np) process_waiting_list((np), DID_OK) #define reset_waiting_list(np) process_waiting_list((np), DID_RESET) @@ -7997,26 +7994,6 @@ static void insert_into_waiting_list(struct ncb *np, struct scsi_cmnd *cmd) } } -static struct scsi_cmnd *retrieve_from_waiting_list(int to_remove, struct ncb *np, struct scsi_cmnd *cmd) -{ - struct scsi_cmnd **pcmd = &np->waiting_list; - - while (*pcmd) { - if (cmd == *pcmd) { - if (to_remove) { - *pcmd = (struct scsi_cmnd *) cmd->next_wcmd; - cmd->next_wcmd = NULL; - } -#ifdef DEBUG_WAITING_LIST - printk("%s: cmd %lx retrieved from waiting list\n", ncr_name(np), (u_long) cmd); -#endif - return cmd; - } - pcmd = (struct scsi_cmnd **) &(*pcmd)->next_wcmd; - } - return NULL; -} - static void process_waiting_list(struct ncb *np, int sts) { struct scsi_cmnd *waiting_list, *wcmd; From 17dfd54d391ea9f8d136fb137962987cb2c6444c Mon Sep 17 00:00:00 2001 From: jing yangyang Date: Thu, 19 Aug 2021 20:08:05 -0700 Subject: [PATCH 125/543] scsi: megaraid: Fix Coccinelle warning WARNING !A || A && B is equivalent to !A || B This issue was detected with the help of Coccinelle. Link: https://lore.kernel.org/r/20210820030805.12383-1-jing.yangyang@zte.com.cn Reported-by: Zeal Robot Acked-by: Sumit Saxena Signed-off-by: jing yangyang Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index e4298bf4a482..17c87ac8bb51 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -8773,8 +8773,7 @@ int megasas_update_device_list(struct megasas_instance *instance, if (event_type & SCAN_VD_CHANNEL) { if (!instance->requestorId || - (instance->requestorId && - megasas_get_ld_vf_affiliation(instance, 0))) { + megasas_get_ld_vf_affiliation(instance, 0)) { dcmd_ret = megasas_ld_list_query(instance, MR_LD_QUERY_TYPE_EXPOSED_TO_HOST); if (dcmd_ret != DCMD_SUCCESS) From fc13fc07490982c89f5d9d8d671ec29a39cddc85 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 3 Sep 2021 08:11:39 -0600 Subject: [PATCH 126/543] scsi: Remove SCSI CDROM MAINTAINERS entry There's little point in keeping this one separately maintained these days, so just remove the entry and it'll fall under the SCSI subsystem where it belongs. Link: https://lore.kernel.org/r/c5e12bd1-10de-634c-d6b3-dac79ed01af5@kernel.dk Signed-off-by: Jens Axboe Signed-off-by: Martin K. Petersen --- MAINTAINERS | 7 ------- 1 file changed, 7 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index eeb4c70b3d5b..fd12a39f92ef 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16650,13 +16650,6 @@ M: Lubomir Rintel S: Supported F: drivers/char/pcmcia/scr24x_cs.c -SCSI CDROM DRIVER -M: Jens Axboe -L: linux-scsi@vger.kernel.org -S: Maintained -W: http://www.kernel.dk -F: drivers/scsi/sr* - SCSI RDMA PROTOCOL (SRP) INITIATOR M: Bart Van Assche L: linux-rdma@vger.kernel.org From e699a4e1d37314eb842ba9de19a7ccee7f75da10 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 26 Aug 2021 12:57:14 +0100 Subject: [PATCH 127/543] scsi: sr: Fix spelling mistake "does'nt" -> "doesn't" There is a spelling mistake in a literal string. Fix it. Link: https://lore.kernel.org/r/20210826115714.11844-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/sr_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index 79d9aa2df528..ddd00efc4882 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -523,7 +523,7 @@ static int sr_read_sector(Scsi_CD *cd, int lba, int blksize, unsigned char *dest return rc; cd->readcd_known = 0; sr_printk(KERN_INFO, cd, - "CDROM does'nt support READ CD (0xbe) command\n"); + "CDROM doesn't support READ CD (0xbe) command\n"); /* fall & retry the other way */ } /* ... if this fails, we switch the blocksize using MODE SELECT */ From 655a68b2203e44912afe462dff9d83d68ac88333 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 2 Sep 2021 23:36:43 +0100 Subject: [PATCH 128/543] scsi: megaraid: Clean up some inconsistent indenting There are a few statements where the indentation is not correct, clean these up. Link: https://lore.kernel.org/r/20210902223643.56979-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 17c87ac8bb51..39d8754e63ac 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -1916,7 +1916,7 @@ void megasas_set_dynamic_target_properties(struct scsi_device *sdev, raid = MR_LdRaidGet(ld, local_map_ptr); if (raid->capability.ldPiMode == MR_PROT_INFO_TYPE_CONTROLLER) - blk_queue_update_dma_alignment(sdev->request_queue, 0x7); + blk_queue_update_dma_alignment(sdev->request_queue, 0x7); mr_device_priv_data->is_tm_capable = raid->capability.tmCapable; @@ -8033,7 +8033,7 @@ skip_firing_dcmds: if (instance->adapter_type != MFI_SERIES) { megasas_release_fusion(instance); - pd_seq_map_sz = sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + + pd_seq_map_sz = sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + (sizeof(struct MR_PD_CFG_SEQ) * (MAX_PHYSICAL_DEVICES - 1)); for (i = 0; i < 2 ; i++) { From 04c260bdaeede8c703bddc21099e4da96f2909e2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 2 Sep 2021 23:42:15 +0100 Subject: [PATCH 129/543] scsi: mpt3sas: Clean up some inconsistent indenting There are a couple of statements where the indentation is not correct, clean these up. Remove a redundant break statement. Link: https://lore.kernel.org/r/20210902224215.57286-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 770b241d7bb2..1b79f01f03a4 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -2178,7 +2178,7 @@ mpt3sas_send_diag_release(struct MPT3SAS_ADAPTER *ioc, u8 buffer_type, mpt3sas_check_cmd_timeout(ioc, ioc->ctl_cmds.status, mpi_request, sizeof(Mpi2DiagReleaseRequest_t)/4, reset_needed); - *issue_reset = reset_needed; + *issue_reset = reset_needed; rc = -EFAULT; goto out; } diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 2f82b1e629af..d383d4a03436 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -10749,8 +10749,7 @@ _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event) case MPI2_EVENT_PCIE_TOPOLOGY_CHANGE_LIST: _scsih_pcie_topology_change_event(ioc, fw_event); ioc->current_event = NULL; - return; - break; + return; } out: fw_event_work_put(fw_event); From 1cbc9ad3eecd492be33b727b4606ae75bc880676 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 31 Aug 2021 17:53:17 +0300 Subject: [PATCH 130/543] scsi: ufs: ufs-pci: Fix Intel LKF link stability Intel LKF can experience link errors. Make fixes to increase link stability, especially when switching to high speed modes. Link: https://lore.kernel.org/r/20210831145317.26306-1-adrian.hunter@intel.com Fixes: b2c57925df1f ("scsi: ufs: ufs-pci: Add support for Intel LKF") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 78 +++++++++++++++++++++++++++++++++++ drivers/scsi/ufs/ufshcd.c | 3 +- drivers/scsi/ufs/ufshcd.h | 1 + 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index b3bcc5c882da..149c1aa09103 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -128,6 +128,81 @@ static int ufs_intel_link_startup_notify(struct ufs_hba *hba, return err; } +static int ufs_intel_set_lanes(struct ufs_hba *hba, u32 lanes) +{ + struct ufs_pa_layer_attr pwr_info = hba->pwr_info; + int ret; + + pwr_info.lane_rx = lanes; + pwr_info.lane_tx = lanes; + ret = ufshcd_config_pwr_mode(hba, &pwr_info); + if (ret) + dev_err(hba->dev, "%s: Setting %u lanes, err = %d\n", + __func__, lanes, ret); + return ret; +} + +static int ufs_intel_lkf_pwr_change_notify(struct ufs_hba *hba, + enum ufs_notify_change_status status, + struct ufs_pa_layer_attr *dev_max_params, + struct ufs_pa_layer_attr *dev_req_params) +{ + int err = 0; + + switch (status) { + case PRE_CHANGE: + if (ufshcd_is_hs_mode(dev_max_params) && + (hba->pwr_info.lane_rx != 2 || hba->pwr_info.lane_tx != 2)) + ufs_intel_set_lanes(hba, 2); + memcpy(dev_req_params, dev_max_params, sizeof(*dev_req_params)); + break; + case POST_CHANGE: + if (ufshcd_is_hs_mode(dev_req_params)) { + u32 peer_granularity; + + usleep_range(1000, 1250); + err = ufshcd_dme_peer_get(hba, UIC_ARG_MIB(PA_GRANULARITY), + &peer_granularity); + } + break; + default: + break; + } + + return err; +} + +static int ufs_intel_lkf_apply_dev_quirks(struct ufs_hba *hba) +{ + u32 granularity, peer_granularity; + u32 pa_tactivate, peer_pa_tactivate; + int ret; + + ret = ufshcd_dme_get(hba, UIC_ARG_MIB(PA_GRANULARITY), &granularity); + if (ret) + goto out; + + ret = ufshcd_dme_peer_get(hba, UIC_ARG_MIB(PA_GRANULARITY), &peer_granularity); + if (ret) + goto out; + + ret = ufshcd_dme_get(hba, UIC_ARG_MIB(PA_TACTIVATE), &pa_tactivate); + if (ret) + goto out; + + ret = ufshcd_dme_peer_get(hba, UIC_ARG_MIB(PA_TACTIVATE), &peer_pa_tactivate); + if (ret) + goto out; + + if (granularity == peer_granularity) { + u32 new_peer_pa_tactivate = pa_tactivate + 2; + + ret = ufshcd_dme_peer_set(hba, UIC_ARG_MIB(PA_TACTIVATE), new_peer_pa_tactivate); + } +out: + return ret; +} + #define INTEL_ACTIVELTR 0x804 #define INTEL_IDLELTR 0x808 @@ -351,6 +426,7 @@ static int ufs_intel_lkf_init(struct ufs_hba *hba) struct ufs_host *ufs_host; int err; + hba->nop_out_timeout = 200; hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; hba->caps |= UFSHCD_CAP_CRYPTO; err = ufs_intel_common_init(hba); @@ -381,6 +457,8 @@ static struct ufs_hba_variant_ops ufs_intel_lkf_hba_vops = { .exit = ufs_intel_common_exit, .hce_enable_notify = ufs_intel_hce_enable_notify, .link_startup_notify = ufs_intel_link_startup_notify, + .pwr_change_notify = ufs_intel_lkf_pwr_change_notify, + .apply_dev_quirks = ufs_intel_lkf_apply_dev_quirks, .resume = ufs_intel_resume, .device_reset = ufs_intel_device_reset, }; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 3841ab49f556..67889d74761c 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4776,7 +4776,7 @@ static int ufshcd_verify_dev_init(struct ufs_hba *hba) mutex_lock(&hba->dev_cmd.lock); for (retries = NOP_OUT_RETRIES; retries > 0; retries--) { err = ufshcd_exec_dev_cmd(hba, DEV_CMD_TYPE_NOP, - NOP_OUT_TIMEOUT); + hba->nop_out_timeout); if (!err || err == -ETIMEDOUT) break; @@ -9483,6 +9483,7 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) hba->host = host; hba->dev = dev; hba->dev_ref_clk_freq = REF_CLK_FREQ_INVAL; + hba->nop_out_timeout = NOP_OUT_TIMEOUT; INIT_LIST_HEAD(&hba->clk_list_head); spin_lock_init(&hba->outstanding_lock); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 52ea6f350b18..4723f27a55d1 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -858,6 +858,7 @@ struct ufs_hba { /* Device management request data */ struct ufs_dev_cmd dev_cmd; ktime_t last_dme_cmd_tstamp; + int nop_out_timeout; /* Keeps information of the UFS device connected to this host */ struct ufs_dev_info dev_info; From 4521428c48118b0f5f7a637ce7dedd76c29bcdaa Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 11 Sep 2021 14:11:59 +0200 Subject: [PATCH 131/543] scsi: sd: Make sd_spinup_disk() less noisy sd_spinup_disk() is a little bit noisy after commit 848ade90ba9c ("scsi: sd: Do not exit sd_spinup_disk() quietly"): scsi 0:0:0:0: Direct-Access Multiple Card Reader 1.00 PQ: 0 ANSI: 0 sd 0:0:0:0: Attached scsi generic sg0 type 0 sd 0:0:0:0: [sda] Media removed, stopped polling sd 0:0:0:0: [sda] Media removed, stopped polling sd 0:0:0:0: [sda] Attached SCSI removable disk sd 0:0:0:0: [sda] Media removed, stopped polling There's not really a benefit in printing the same message multiple times. Therefore print it only if media_present was previously set. Link: https://lore.kernel.org/r/a2d0a249-6035-9697-626a-e14ec50ef6ee@gmail.com Reviewed-by: Bart Van Assche Signed-off-by: Heiner Kallweit Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a8039beb5a02..523bf2fdc253 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2124,6 +2124,8 @@ sd_spinup_disk(struct scsi_disk *sdkp) retries = 0; do { + bool media_was_present = sdkp->media_present; + cmd[0] = TEST_UNIT_READY; memset((void *) &cmd[1], 0, 9); @@ -2138,7 +2140,8 @@ sd_spinup_disk(struct scsi_disk *sdkp) * with any more polling. */ if (media_not_present(sdkp, &sshdr)) { - sd_printk(KERN_NOTICE, sdkp, "Media removed, stopped polling\n"); + if (media_was_present) + sd_printk(KERN_NOTICE, sdkp, "Media removed, stopped polling\n"); return; } From 1a0db7744e453844aa2db3f2959aea4a378025ea Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sat, 11 Sep 2021 18:53:06 +0800 Subject: [PATCH 132/543] scsi: bsg: Fix device unregistration device_initialize() is used to take a refcount on the device. However, put_device() is not called during device teardown. This leads to a leak of private data of the driver core, dev_name(), etc. This is reported by kmemleak at boot time if we compile kernel with DEBUG_TEST_DRIVER_REMOVE. Fix memory leaks during unregistration and implement a release function. Link: https://lore.kernel.org/r/20210911105306.1511-1-yuzenghui@huawei.com Fixes: ead09dd3aed5 ("scsi: bsg: Simplify device registration") Reviewed-by: Johan Hovold Signed-off-by: Zenghui Yu Signed-off-by: Martin K. Petersen --- block/bsg.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 351095193788..882f56bff14f 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -165,13 +165,20 @@ static const struct file_operations bsg_fops = { .llseek = default_llseek, }; +static void bsg_device_release(struct device *dev) +{ + struct bsg_device *bd = container_of(dev, struct bsg_device, device); + + ida_simple_remove(&bsg_minor_ida, MINOR(bd->device.devt)); + kfree(bd); +} + void bsg_unregister_queue(struct bsg_device *bd) { if (bd->queue->kobj.sd) sysfs_remove_link(&bd->queue->kobj, "bsg"); cdev_device_del(&bd->cdev, &bd->device); - ida_simple_remove(&bsg_minor_ida, MINOR(bd->device.devt)); - kfree(bd); + put_device(&bd->device); } EXPORT_SYMBOL_GPL(bsg_unregister_queue); @@ -193,11 +200,13 @@ struct bsg_device *bsg_register_queue(struct request_queue *q, if (ret < 0) { if (ret == -ENOSPC) dev_err(parent, "bsg: too many bsg devices\n"); - goto out_kfree; + kfree(bd); + return ERR_PTR(ret); } bd->device.devt = MKDEV(bsg_major, ret); bd->device.class = bsg_class; bd->device.parent = parent; + bd->device.release = bsg_device_release; dev_set_name(&bd->device, "%s", name); device_initialize(&bd->device); @@ -205,7 +214,7 @@ struct bsg_device *bsg_register_queue(struct request_queue *q, bd->cdev.owner = THIS_MODULE; ret = cdev_device_add(&bd->cdev, &bd->device); if (ret) - goto out_ida_remove; + goto out_put_device; if (q->kobj.sd) { ret = sysfs_create_link(&q->kobj, &bd->device.kobj, "bsg"); @@ -217,10 +226,8 @@ struct bsg_device *bsg_register_queue(struct request_queue *q, out_device_del: cdev_device_del(&bd->cdev, &bd->device); -out_ida_remove: - ida_simple_remove(&bsg_minor_ida, MINOR(bd->device.devt)); -out_kfree: - kfree(bd); +out_put_device: + put_device(&bd->device); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(bsg_register_queue); From b564171ade70570b7f335fa8ed17adb28409e3ac Mon Sep 17 00:00:00 2001 From: Li Li Date: Fri, 10 Sep 2021 09:42:10 -0700 Subject: [PATCH 133/543] binder: fix freeze race Currently cgroup freezer is used to freeze the application threads, and BINDER_FREEZE is used to freeze the corresponding binder interface. There's already a mechanism in ioctl(BINDER_FREEZE) to wait for any existing transactions to drain out before actually freezing the binder interface. But freezing an app requires 2 steps, freezing the binder interface with ioctl(BINDER_FREEZE) and then freezing the application main threads with cgroupfs. This is not an atomic operation. The following race issue might happen. 1) Binder interface is frozen by ioctl(BINDER_FREEZE); 2) Main thread A initiates a new sync binder transaction to process B; 3) Main thread A is frozen by "echo 1 > cgroup.freeze"; 4) The response from process B reaches the frozen thread, which will unexpectedly fail. This patch provides a mechanism to check if there's any new pending transaction happening between ioctl(BINDER_FREEZE) and freezing the main thread. If there's any, the main thread freezing operation can be rolled back to finish the pending transaction. Furthermore, the response might reach the binder driver before the rollback actually happens. That will still cause failed transaction. As the other process doesn't wait for another response of the response, the response transaction failure can be fixed by treating the response transaction like an oneway/async one, allowing it to reach the frozen thread. And it will be consumed when the thread gets unfrozen later. NOTE: This patch reuses the existing definition of struct binder_frozen_status_info but expands the bit assignments of __u32 member sync_recv. To ensure backward compatibility, bit 0 of sync_recv still indicates there's an outstanding sync binder transaction. This patch adds new information to bit 1 of sync_recv, indicating the binder transaction happens exactly when there's a race. If an existing userspace app runs on a new kernel, a sync binder call will set bit 0 of sync_recv so ioctl(BINDER_GET_FROZEN_INFO) still return the expected value (true). The app just doesn't check bit 1 intentionally so it doesn't have the ability to tell if there's a race. This behavior is aligned with what happens on an old kernel which doesn't set bit 1 at all. A new userspace app can 1) check bit 0 to know if there's a sync binder transaction happened when being frozen - same as before; and 2) check bit 1 to know if that sync binder transaction happened exactly when there's a race - a new information for rollback decision. the same time, confirmed the pending transactions succeeded. Fixes: 432ff1e91694 ("binder: BINDER_FREEZE ioctl") Acked-by: Todd Kjos Cc: stable Signed-off-by: Li Li Test: stress test with apps being frozen and initiating binder calls at Link: https://lore.kernel.org/r/20210910164210.2282716-2-dualli@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 35 ++++++++++++++++++++++++----- drivers/android/binder_internal.h | 2 ++ include/uapi/linux/android/binder.h | 7 ++++++ 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d9030cb6b1e4..1a68c2f590cf 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3038,9 +3038,8 @@ static void binder_transaction(struct binder_proc *proc, if (reply) { binder_enqueue_thread_work(thread, tcomplete); binder_inner_proc_lock(target_proc); - if (target_thread->is_dead || target_proc->is_frozen) { - return_error = target_thread->is_dead ? - BR_DEAD_REPLY : BR_FROZEN_REPLY; + if (target_thread->is_dead) { + return_error = BR_DEAD_REPLY; binder_inner_proc_unlock(target_proc); goto err_dead_proc_or_thread; } @@ -4648,6 +4647,22 @@ static int binder_ioctl_get_node_debug_info(struct binder_proc *proc, return 0; } +static bool binder_txns_pending_ilocked(struct binder_proc *proc) +{ + struct rb_node *n; + struct binder_thread *thread; + + if (proc->outstanding_txns > 0) + return true; + + for (n = rb_first(&proc->threads); n; n = rb_next(n)) { + thread = rb_entry(n, struct binder_thread, rb_node); + if (thread->transaction_stack) + return true; + } + return false; +} + static int binder_ioctl_freeze(struct binder_freeze_info *info, struct binder_proc *target_proc) { @@ -4679,8 +4694,13 @@ static int binder_ioctl_freeze(struct binder_freeze_info *info, (!target_proc->outstanding_txns), msecs_to_jiffies(info->timeout_ms)); - if (!ret && target_proc->outstanding_txns) - ret = -EAGAIN; + /* Check pending transactions that wait for reply */ + if (ret >= 0) { + binder_inner_proc_lock(target_proc); + if (binder_txns_pending_ilocked(target_proc)) + ret = -EAGAIN; + binder_inner_proc_unlock(target_proc); + } if (ret < 0) { binder_inner_proc_lock(target_proc); @@ -4696,6 +4716,7 @@ static int binder_ioctl_get_freezer_info( { struct binder_proc *target_proc; bool found = false; + __u32 txns_pending; info->sync_recv = 0; info->async_recv = 0; @@ -4705,7 +4726,9 @@ static int binder_ioctl_get_freezer_info( if (target_proc->pid == info->pid) { found = true; binder_inner_proc_lock(target_proc); - info->sync_recv |= target_proc->sync_recv; + txns_pending = binder_txns_pending_ilocked(target_proc); + info->sync_recv |= target_proc->sync_recv | + (txns_pending << 1); info->async_recv |= target_proc->async_recv; binder_inner_proc_unlock(target_proc); } diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 810c0b84d3f8..402c4d4362a8 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -378,6 +378,8 @@ struct binder_ref { * binder transactions * (protected by @inner_lock) * @sync_recv: process received sync transactions since last frozen + * bit 0: received sync transaction after being frozen + * bit 1: new pending sync transaction during freezing * (protected by @inner_lock) * @async_recv: process received async transactions since last frozen * (protected by @inner_lock) diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 20e435fe657a..3246f2c74696 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -225,7 +225,14 @@ struct binder_freeze_info { struct binder_frozen_status_info { __u32 pid; + + /* process received sync transactions since last frozen + * bit 0: received sync transaction after being frozen + * bit 1: new pending sync transaction during freezing + */ __u32 sync_recv; + + /* process received async transactions since last frozen */ __u32 async_recv; }; From 5fdb55c1ac9585eb23bb2541d5819224429e103d Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 30 Aug 2021 12:51:46 -0700 Subject: [PATCH 134/543] binder: make sure fd closes complete During BC_FREE_BUFFER processing, the BINDER_TYPE_FDA object cleanup may close 1 or more fds. The close operations are completed using the task work mechanism -- which means the thread needs to return to userspace or the file object may never be dereferenced -- which can lead to hung processes. Force the binder thread back to userspace if an fd is closed during BC_FREE_BUFFER handling. Fixes: 80cd795630d6 ("binder: fix use-after-free due to ksys_close() during fdget()") Cc: stable Reviewed-by: Martijn Coenen Acked-by: Christian Brauner Signed-off-by: Todd Kjos Link: https://lore.kernel.org/r/20210830195146.587206-1-tkjos@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 1a68c2f590cf..9edacc8b9768 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1852,6 +1852,7 @@ static void binder_deferred_fd_close(int fd) } static void binder_transaction_buffer_release(struct binder_proc *proc, + struct binder_thread *thread, struct binder_buffer *buffer, binder_size_t failed_at, bool is_failure) @@ -2011,8 +2012,16 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, &proc->alloc, &fd, buffer, offset, sizeof(fd)); WARN_ON(err); - if (!err) + if (!err) { binder_deferred_fd_close(fd); + /* + * Need to make sure the thread goes + * back to userspace to complete the + * deferred close + */ + if (thread) + thread->looper_need_return = true; + } } } break; default: @@ -3104,7 +3113,7 @@ err_bad_parent: err_copy_data_failed: binder_free_txn_fixups(t); trace_binder_transaction_failed_buffer_release(t->buffer); - binder_transaction_buffer_release(target_proc, t->buffer, + binder_transaction_buffer_release(target_proc, NULL, t->buffer, buffer_offset, true); if (target_node) binder_dec_node_tmpref(target_node); @@ -3183,7 +3192,9 @@ err_invalid_target_handle: * Cleanup buffer and free it. */ static void -binder_free_buf(struct binder_proc *proc, struct binder_buffer *buffer) +binder_free_buf(struct binder_proc *proc, + struct binder_thread *thread, + struct binder_buffer *buffer) { binder_inner_proc_lock(proc); if (buffer->transaction) { @@ -3211,7 +3222,7 @@ binder_free_buf(struct binder_proc *proc, struct binder_buffer *buffer) binder_node_inner_unlock(buf_node); } trace_binder_transaction_buffer_release(buffer); - binder_transaction_buffer_release(proc, buffer, 0, false); + binder_transaction_buffer_release(proc, thread, buffer, 0, false); binder_alloc_free_buf(&proc->alloc, buffer); } @@ -3413,7 +3424,7 @@ static int binder_thread_write(struct binder_proc *proc, proc->pid, thread->pid, (u64)data_ptr, buffer->debug_id, buffer->transaction ? "active" : "finished"); - binder_free_buf(proc, buffer); + binder_free_buf(proc, thread, buffer); break; } @@ -4106,7 +4117,7 @@ retry: buffer->transaction = NULL; binder_cleanup_transaction(t, "fd fixups failed", BR_FAILED_REPLY); - binder_free_buf(proc, buffer); + binder_free_buf(proc, thread, buffer); binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d %stransaction %d fd fixups failed %d/%d, line %d\n", proc->pid, thread->pid, From 7a8aa39d44564703620d937bb54cdea2d003657f Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 13 Sep 2021 17:05:51 +0100 Subject: [PATCH 135/543] nvmem: core: Add stubs for nvmem_cell_read_variable_le_u32/64 if !CONFIG_NVMEM When I added nvmem_cell_read_variable_le_u32() and nvmem_cell_read_variable_le_u64() I forgot to add the "static inline" stub functions for when CONFIG_NVMEM wasn't defined. Add them now. This was causing problems with randconfig builds that compiled `drivers/soc/qcom/cpr.c`. Fixes: 6feba6a62c57 ("PM: AVS: qcom-cpr: Use nvmem_cell_read_variable_le_u32()") Fixes: a28e824fb827 ("nvmem: core: Add functions to make number reading easy") Reported-by: kernel test robot Reviewed-by: Bjorn Andersson Signed-off-by: Douglas Anderson Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210913160551.12907-1-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-consumer.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 923dada24eb4..c0c0cefc3b92 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -150,6 +150,20 @@ static inline int nvmem_cell_read_u64(struct device *dev, return -EOPNOTSUPP; } +static inline int nvmem_cell_read_variable_le_u32(struct device *dev, + const char *cell_id, + u32 *val) +{ + return -EOPNOTSUPP; +} + +static inline int nvmem_cell_read_variable_le_u64(struct device *dev, + const char *cell_id, + u64 *val) +{ + return -EOPNOTSUPP; +} + static inline struct nvmem_device *nvmem_device_get(struct device *dev, const char *name) { From 212b5d2d3ed9d7db2702e4805f36a346c3985e1d Mon Sep 17 00:00:00 2001 From: Jian Cai Date: Mon, 13 Sep 2021 10:46:13 -0600 Subject: [PATCH 136/543] coresight: syscfg: Fix compiler warning This fixes warnings with -Wimplicit-function-declaration, e.g. drivers/hwtracing/coresight/coresight-syscfg.c:455:15: error: implicit declaration of function 'kzalloc' [-Werror, -Wimplicit-function-declaration] csdev_item = kzalloc(sizeof(struct cscfg_registered_csdev), GFP_KERNEL); Link: https://lore.kernel.org/r/20210830172820.2840433-1-jiancai@google.com Fixes: 85e2414c518a ("coresight: syscfg: Initial coresight system configuration") Reviewed-by: Guenter Roeck Signed-off-by: Jian Cai Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20210913164613.1675791-2-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-syscfg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwtracing/coresight/coresight-syscfg.c b/drivers/hwtracing/coresight/coresight-syscfg.c index fc0760f55c53..43054568430f 100644 --- a/drivers/hwtracing/coresight/coresight-syscfg.c +++ b/drivers/hwtracing/coresight/coresight-syscfg.c @@ -5,6 +5,7 @@ */ #include +#include #include "coresight-config.h" #include "coresight-etm-perf.h" From 92dc0b1f46e12cfabd28d709bb34f7a39431b44f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Sep 2021 14:45:38 +0200 Subject: [PATCH 137/543] staging: greybus: uart: fix tty use after free User space can hold a tty open indefinitely and tty drivers must not release the underlying structures until the last user is gone. Switch to using the tty-port reference counter to manage the life time of the greybus tty state to avoid use after free after a disconnect. Fixes: a18e15175708 ("greybus: more uart work") Cc: stable@vger.kernel.org # 4.9 Reviewed-by: Alex Elder Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210906124538.22358-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/uart.c | 62 ++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/drivers/staging/greybus/uart.c b/drivers/staging/greybus/uart.c index e6d860a9678e..dc4ed0ff1ae2 100644 --- a/drivers/staging/greybus/uart.c +++ b/drivers/staging/greybus/uart.c @@ -761,6 +761,17 @@ out: gbphy_runtime_put_autosuspend(gb_tty->gbphy_dev); } +static void gb_tty_port_destruct(struct tty_port *port) +{ + struct gb_tty *gb_tty = container_of(port, struct gb_tty, port); + + if (gb_tty->minor != GB_NUM_MINORS) + release_minor(gb_tty); + kfifo_free(&gb_tty->write_fifo); + kfree(gb_tty->buffer); + kfree(gb_tty); +} + static const struct tty_operations gb_ops = { .install = gb_tty_install, .open = gb_tty_open, @@ -786,6 +797,7 @@ static const struct tty_port_operations gb_port_ops = { .dtr_rts = gb_tty_dtr_rts, .activate = gb_tty_port_activate, .shutdown = gb_tty_port_shutdown, + .destruct = gb_tty_port_destruct, }; static int gb_uart_probe(struct gbphy_device *gbphy_dev, @@ -798,17 +810,11 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, int retval; int minor; - gb_tty = kzalloc(sizeof(*gb_tty), GFP_KERNEL); - if (!gb_tty) - return -ENOMEM; - connection = gb_connection_create(gbphy_dev->bundle, le16_to_cpu(gbphy_dev->cport_desc->id), gb_uart_request_handler); - if (IS_ERR(connection)) { - retval = PTR_ERR(connection); - goto exit_tty_free; - } + if (IS_ERR(connection)) + return PTR_ERR(connection); max_payload = gb_operation_get_payload_size_max(connection); if (max_payload < sizeof(struct gb_uart_send_data_request)) { @@ -816,13 +822,23 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, goto exit_connection_destroy; } + gb_tty = kzalloc(sizeof(*gb_tty), GFP_KERNEL); + if (!gb_tty) { + retval = -ENOMEM; + goto exit_connection_destroy; + } + + tty_port_init(&gb_tty->port); + gb_tty->port.ops = &gb_port_ops; + gb_tty->minor = GB_NUM_MINORS; + gb_tty->buffer_payload_max = max_payload - sizeof(struct gb_uart_send_data_request); gb_tty->buffer = kzalloc(gb_tty->buffer_payload_max, GFP_KERNEL); if (!gb_tty->buffer) { retval = -ENOMEM; - goto exit_connection_destroy; + goto exit_put_port; } INIT_WORK(&gb_tty->tx_work, gb_uart_tx_write_work); @@ -830,7 +846,7 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, retval = kfifo_alloc(&gb_tty->write_fifo, GB_UART_WRITE_FIFO_SIZE, GFP_KERNEL); if (retval) - goto exit_buf_free; + goto exit_put_port; gb_tty->credits = GB_UART_FIRMWARE_CREDITS; init_completion(&gb_tty->credits_complete); @@ -844,7 +860,7 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, } else { retval = minor; } - goto exit_kfifo_free; + goto exit_put_port; } gb_tty->minor = minor; @@ -853,9 +869,6 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, init_waitqueue_head(&gb_tty->wioctl); mutex_init(&gb_tty->mutex); - tty_port_init(&gb_tty->port); - gb_tty->port.ops = &gb_port_ops; - gb_tty->connection = connection; gb_tty->gbphy_dev = gbphy_dev; gb_connection_set_data(connection, gb_tty); @@ -863,7 +876,7 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, retval = gb_connection_enable_tx(connection); if (retval) - goto exit_release_minor; + goto exit_put_port; send_control(gb_tty, gb_tty->ctrlout); @@ -890,16 +903,10 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, exit_connection_disable: gb_connection_disable(connection); -exit_release_minor: - release_minor(gb_tty); -exit_kfifo_free: - kfifo_free(&gb_tty->write_fifo); -exit_buf_free: - kfree(gb_tty->buffer); +exit_put_port: + tty_port_put(&gb_tty->port); exit_connection_destroy: gb_connection_destroy(connection); -exit_tty_free: - kfree(gb_tty); return retval; } @@ -930,15 +937,10 @@ static void gb_uart_remove(struct gbphy_device *gbphy_dev) gb_connection_disable_rx(connection); tty_unregister_device(gb_tty_driver, gb_tty->minor); - /* FIXME - free transmit / receive buffers */ - gb_connection_disable(connection); - tty_port_destroy(&gb_tty->port); gb_connection_destroy(connection); - release_minor(gb_tty); - kfifo_free(&gb_tty->write_fifo); - kfree(gb_tty->buffer); - kfree(gb_tty); + + tty_port_put(&gb_tty->port); } static int gb_tty_init(void) From 8480ed9c2bbd56fc86524998e5f2e3e22f5038f6 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 27 Aug 2021 14:32:06 +0200 Subject: [PATCH 138/543] xen/balloon: use a kernel thread instead a workqueue Today the Xen ballooning is done via delayed work in a workqueue. This might result in workqueue hangups being reported in case of large amounts of memory are being ballooned in one go (here 16GB): BUG: workqueue lockup - pool cpus=6 node=0 flags=0x0 nice=0 stuck for 64s! Showing busy workqueues and worker pools: workqueue events: flags=0x0 pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=2/256 refcnt=3 in-flight: 229:balloon_process pending: cache_reap workqueue events_freezable_power_: flags=0x84 pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=1/256 refcnt=2 pending: disk_events_workfn workqueue mm_percpu_wq: flags=0x8 pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=1/256 refcnt=2 pending: vmstat_update pool 12: cpus=6 node=0 flags=0x0 nice=0 hung=64s workers=3 idle: 2222 43 This can easily be avoided by using a dedicated kernel thread for doing the ballooning work. Reported-by: Jan Beulich Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210827123206.15429-1-jgross@suse.com Signed-off-by: Juergen Gross --- drivers/xen/balloon.c | 62 +++++++++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 671c71245a7b..2d2803883306 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include #include #include @@ -115,7 +117,7 @@ static struct ctl_table xen_root[] = { #define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1) /* - * balloon_process() state: + * balloon_thread() state: * * BP_DONE: done or nothing to do, * BP_WAIT: wait to be rescheduled, @@ -130,6 +132,8 @@ enum bp_state { BP_ECANCELED }; +/* Main waiting point for xen-balloon thread. */ +static DECLARE_WAIT_QUEUE_HEAD(balloon_thread_wq); static DEFINE_MUTEX(balloon_mutex); @@ -144,10 +148,6 @@ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)]; static LIST_HEAD(ballooned_pages); static DECLARE_WAIT_QUEUE_HEAD(balloon_wq); -/* Main work function, always executed in process context. */ -static void balloon_process(struct work_struct *work); -static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); - /* When ballooning out (allocating memory to return to Xen) we don't really want the kernel to try too hard since that can trigger the oom killer. */ #define GFP_BALLOON \ @@ -366,7 +366,7 @@ static void xen_online_page(struct page *page, unsigned int order) static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { if (val == MEM_ONLINE) - schedule_delayed_work(&balloon_worker, 0); + wake_up(&balloon_thread_wq); return NOTIFY_OK; } @@ -491,18 +491,43 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) } /* - * As this is a work item it is guaranteed to run as a single instance only. + * Stop waiting if either state is not BP_EAGAIN and ballooning action is + * needed, or if the credit has changed while state is BP_EAGAIN. + */ +static bool balloon_thread_cond(enum bp_state state, long credit) +{ + if (state != BP_EAGAIN) + credit = 0; + + return current_credit() != credit || kthread_should_stop(); +} + +/* + * As this is a kthread it is guaranteed to run as a single instance only. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. */ -static void balloon_process(struct work_struct *work) +static int balloon_thread(void *unused) { enum bp_state state = BP_DONE; long credit; + unsigned long timeout; + set_freezable(); + for (;;) { + if (state == BP_EAGAIN) + timeout = balloon_stats.schedule_delay * HZ; + else + timeout = 3600 * HZ; + credit = current_credit(); + + wait_event_interruptible_timeout(balloon_thread_wq, + balloon_thread_cond(state, credit), timeout); + + if (kthread_should_stop()) + return 0; - do { mutex_lock(&balloon_mutex); credit = current_credit(); @@ -529,12 +554,7 @@ static void balloon_process(struct work_struct *work) mutex_unlock(&balloon_mutex); cond_resched(); - - } while (credit && state == BP_DONE); - - /* Schedule more work if there is some still to be done. */ - if (state == BP_EAGAIN) - schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ); + } } /* Resets the Xen limit, sets new target, and kicks off processing. */ @@ -542,7 +562,7 @@ void balloon_set_new_target(unsigned long target) { /* No need for lock. Not read-modify-write updates. */ balloon_stats.target_pages = target; - schedule_delayed_work(&balloon_worker, 0); + wake_up(&balloon_thread_wq); } EXPORT_SYMBOL_GPL(balloon_set_new_target); @@ -647,7 +667,7 @@ void free_xenballooned_pages(int nr_pages, struct page **pages) /* The balloon may be too large now. Shrink it if needed. */ if (current_credit()) - schedule_delayed_work(&balloon_worker, 0); + wake_up(&balloon_thread_wq); mutex_unlock(&balloon_mutex); } @@ -679,6 +699,8 @@ static void __init balloon_add_region(unsigned long start_pfn, static int __init balloon_init(void) { + struct task_struct *task; + if (!xen_domain()) return -ENODEV; @@ -722,6 +744,12 @@ static int __init balloon_init(void) } #endif + task = kthread_run(balloon_thread, NULL, "xen-balloon"); + if (IS_ERR(task)) { + pr_err("xen-balloon thread could not be started, ballooning will not work!\n"); + return PTR_ERR(task); + } + /* Init the xen-balloon driver. */ xen_balloon_init(); From 0560204b360a332c321124dbc5cdfd3364533a74 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 3 Sep 2021 10:49:36 +0200 Subject: [PATCH 139/543] PM: base: power: don't try to use non-existing RTC for storing data If there is no legacy RTC device, don't try to use it for storing trace data across suspend/resume. Cc: Signed-off-by: Juergen Gross Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20210903084937.19392-2-jgross@suse.com Signed-off-by: Juergen Gross --- drivers/base/power/trace.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index a97f33d0c59f..94665037f4a3 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -165,6 +166,9 @@ void generate_pm_trace(const void *tracedata, unsigned int user) const char *file = *(const char **)(tracedata + 2); unsigned int user_hash_value, file_hash_value; + if (!x86_platform.legacy.rtc) + return; + user_hash_value = user % USERHASH; file_hash_value = hash_string(lineno, file, FILEHASH); set_magic_time(user_hash_value, file_hash_value, dev_hash_value); @@ -267,6 +271,9 @@ static struct notifier_block pm_trace_nb = { static int __init early_resume_init(void) { + if (!x86_platform.legacy.rtc) + return 0; + hash_value_early_read = read_magic_time(); register_pm_notifier(&pm_trace_nb); return 0; @@ -277,6 +284,9 @@ static int __init late_resume_init(void) unsigned int val = hash_value_early_read; unsigned int user, file, dev; + if (!x86_platform.legacy.rtc) + return 0; + user = val % USERHASH; val = val / USERHASH; file = val % FILEHASH; From 9af4bf2171c1a9e3f2ebb21140c0e34e60b2a22a Mon Sep 17 00:00:00 2001 From: Lee Shawn C Date: Tue, 6 Jul 2021 23:25:41 +0800 Subject: [PATCH 140/543] drm/i915/dp: return proper DPRX link training result After DPRX link training, intel_dp_link_train_phy() did not return the training result properly. If link training failed, i915 driver would not run into link train fallback function. And no hotplug uevent would be received by user space application. Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR non-transparent mode link training") Cc: Ville Syrjala Cc: Imre Deak Cc: Jani Nikula Cc: Cooper Chiou Cc: William Tseng Signed-off-by: Lee Shawn C Reviewed-by: Imre Deak Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20210706152541.25021-1-shawn.c.lee@intel.com (cherry picked from commit dab1b47e57e053b2a02c22ead8e7449f79961335) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_link_training.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 053a3c2f7267..508a514c5e37 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -848,7 +848,7 @@ intel_dp_link_train_all_phys(struct intel_dp *intel_dp, } if (ret) - intel_dp_link_train_phy(intel_dp, crtc_state, DP_PHY_DPRX); + ret = intel_dp_link_train_phy(intel_dp, crtc_state, DP_PHY_DPRX); if (intel_dp->set_idle_link_train) intel_dp->set_idle_link_train(intel_dp, crtc_state); From c8dead5751b81dfa6b10449b740ed1062ff670c5 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 20 Aug 2021 15:52:59 +0800 Subject: [PATCH 141/543] drm/i915/dp: Use max params for panels < eDP 1.4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users reported that after commit 2bbd6dba84d4 ("drm/i915: Try to use fast+narrow link on eDP again and fall back to the old max strategy on failure"), the screen starts to have wobbly effect. Commit a5c936add6a2 ("drm/i915/dp: Use slow and wide link training for everything") doesn't help either, that means the affected eDP 1.2 panels only work with max params. So use max params for panels < eDP 1.4 as Windows does to solve the issue. v3: - Do the eDP rev check in intel_edp_init_dpcd() v2: - Check eDP 1.4 instead of DPCD 1.1 to apply max params Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3714 Fixes: 2bbd6dba84d4 ("drm/i915: Try to use fast+narrow link on eDP again and fall back to the old max strategy on failure") Fixes: a5c936add6a2 ("drm/i915/dp: Use slow and wide link training for everything") Suggested-by: Ville Syrjälä Signed-off-by: Kai-Heng Feng Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210820075301.693099-1-kai.heng.feng@canonical.com (cherry picked from commit d7f213c131adf0bec8b731553eb82990cdac265d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 04175f359fd6..abe3d61b6243 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2445,11 +2445,14 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) */ if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd)) == - sizeof(intel_dp->edp_dpcd)) + sizeof(intel_dp->edp_dpcd)) { drm_dbg_kms(&dev_priv->drm, "eDP DPCD: %*ph\n", (int)sizeof(intel_dp->edp_dpcd), intel_dp->edp_dpcd); + intel_dp->use_max_params = intel_dp->edp_dpcd[0] < DP_EDP_14; + } + /* * This has to be called after intel_dp->edp_dpcd is filled, PSR checks * for SET_POWER_CAPABLE bit in intel_dp->edp_dpcd[1] From 415406380c29694e12b164f05e467659381feca5 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 23 Aug 2021 09:31:37 -0700 Subject: [PATCH 142/543] drm/i915/guc: drop guc_communication_enabled The function is only used from within GEM_BUG_ON(), which is causing warnings with Wunneeded-internal-declaration in some builds. Since the function is a simple wrapper around a CT function, we can just call the CT function directly instead. Fixes: 1fb12c587152 ("drm/i915/guc: skip disabling CTBs before sanitizing the GuC") Reported-by: kernel test robot Signed-off-by: Daniele Ceraolo Spurio Cc: Matthew Brost Cc: John Harrison Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210823163137.19770-1-daniele.ceraolospurio@intel.com (cherry picked from commit 5db1856781e45c9610f7652a19cc656b984235e7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index b104fb7607eb..86c318516e14 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -172,11 +172,6 @@ void intel_uc_driver_remove(struct intel_uc *uc) __uc_free_load_err_log(uc); } -static inline bool guc_communication_enabled(struct intel_guc *guc) -{ - return intel_guc_ct_enabled(&guc->ct); -} - /* * Events triggered while CT buffers are disabled are logged in the SCRATCH_15 * register using the same bits used in the CT message payload. Since our @@ -210,7 +205,7 @@ static void guc_get_mmio_msg(struct intel_guc *guc) static void guc_handle_mmio_msg(struct intel_guc *guc) { /* we need communication to be enabled to reply to GuC */ - GEM_BUG_ON(!guc_communication_enabled(guc)); + GEM_BUG_ON(!intel_guc_ct_enabled(&guc->ct)); spin_lock_irq(&guc->irq_lock); if (guc->mmio_msg) { @@ -226,7 +221,7 @@ static int guc_enable_communication(struct intel_guc *guc) struct drm_i915_private *i915 = gt->i915; int ret; - GEM_BUG_ON(guc_communication_enabled(guc)); + GEM_BUG_ON(intel_guc_ct_enabled(&guc->ct)); ret = i915_inject_probe_error(i915, -ENXIO); if (ret) @@ -662,7 +657,7 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication) return 0; /* Make sure we enable communication if and only if it's disabled */ - GEM_BUG_ON(enable_communication == guc_communication_enabled(guc)); + GEM_BUG_ON(enable_communication == intel_guc_ct_enabled(&guc->ct)); if (enable_communication) guc_enable_communication(guc); From 04a3ab6acd54b104838b3f6bd715447631c6e87d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 31 Aug 2021 14:29:31 +0200 Subject: [PATCH 143/543] drm/i915/gem: Fix the mman selftest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the I915_MMAP_TYPE_FIXED mmap type requires the TTM backend, so for that mmap type, use __i915_gem_object_create_user() instead of i915_gem_object_create_internal(), as we really want to tests objects mmap-able by user-space. This also means that the out-of-space error happens at object creation and returns -ENXIO rather than -ENOSPC, so fix the code up to expect that on out-of-offset-space errors. Finally only use I915_MMAP_TYPE_FIXED for LMEM and SMEM for now if testing on LMEM-capable devices. For stolen LMEM, we still take the same path as for integrated, as that haven't been moved over to TTM yet, and user-space should not be able to create out of stolen LMEM anyway. v2: - Check the presence of the obj->ops->mmap_offset callback rather than hardcoding the supported mmap regions in can_mmap() (Maarten Lankhorst) Fixes: 7961c5b60f23 ("drm/i915: Add TTM offset argument to mmap.") Cc: Maarten Lankhorst Signed-off-by: Thomas Hellström Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210831122931.157536-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 450cede7f3804ca7f8b3da210ebefa61c0958f22) Signed-off-by: Jani Nikula --- .../drm/i915/gem/selftests/i915_gem_mman.c | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index b20f5621f62b..a2c34e5a1c54 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -581,6 +581,20 @@ static enum i915_mmap_type default_mapping(struct drm_i915_private *i915) return I915_MMAP_TYPE_GTT; } +static struct drm_i915_gem_object * +create_sys_or_internal(struct drm_i915_private *i915, + unsigned long size) +{ + if (HAS_LMEM(i915)) { + struct intel_memory_region *sys_region = + i915->mm.regions[INTEL_REGION_SMEM]; + + return __i915_gem_object_create_user(i915, size, &sys_region, 1); + } + + return i915_gem_object_create_internal(i915, size); +} + static bool assert_mmap_offset(struct drm_i915_private *i915, unsigned long size, int expected) @@ -589,7 +603,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, u64 offset; int ret; - obj = i915_gem_object_create_internal(i915, size); + obj = create_sys_or_internal(i915, size); if (IS_ERR(obj)) return expected && expected == PTR_ERR(obj); @@ -633,6 +647,7 @@ static int igt_mmap_offset_exhaustion(void *arg) struct drm_mm_node *hole, *next; int loop, err = 0; u64 offset; + int enospc = HAS_LMEM(i915) ? -ENXIO : -ENOSPC; /* Disable background reaper */ disable_retire_worker(i915); @@ -683,14 +698,14 @@ static int igt_mmap_offset_exhaustion(void *arg) } /* Too large */ - if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) { + if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, enospc)) { pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); err = -EINVAL; goto out; } /* Fill the hole, further allocation attempts should then fail */ - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + obj = create_sys_or_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); pr_err("Unable to create object for reclaimed hole\n"); @@ -703,7 +718,7 @@ static int igt_mmap_offset_exhaustion(void *arg) goto err_obj; } - if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { + if (!assert_mmap_offset(i915, PAGE_SIZE, enospc)) { pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); err = -EINVAL; goto err_obj; @@ -839,10 +854,9 @@ static int wc_check(struct drm_i915_gem_object *obj) static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); bool no_map; - if (HAS_LMEM(i915)) + if (obj->ops->mmap_offset) return type == I915_MMAP_TYPE_FIXED; else if (type == I915_MMAP_TYPE_FIXED) return false; From 031536665f64aaeb7e7439d96689a4011407abb8 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:48 +0200 Subject: [PATCH 144/543] drm/i915: Release ctx->syncobj on final put, not on ctx close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gem context refcounting is another exercise in least locking design it seems, where most things get destroyed upon context closure (which can race with anything really). Only the actual memory allocation and the locks survive while holding a reference. This tripped up Jason when reimplementing the single timeline feature in commit 00dae4d3d35d4f526929633b76e00b0ab4d3970d Author: Jason Ekstrand Date: Thu Jul 8 10:48:12 2021 -0500 drm/i915: Implement SINGLE_TIMELINE with a syncobj (v4) We could fix the bug by holding ctx->mutex in execbuf and clear the pointer (again while holding the mutex) context_close, but it's cleaner to just make the context object actually invariant over its _entire_ lifetime. This way any other ioctl that's potentially racing, but holding a full reference, can still rely on ctx->syncobj being an immutable pointer. Which without this change, is not the case. Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Fixes: 00dae4d3d35d ("drm/i915: Implement SINGLE_TIMELINE with a syncobj (v4)") Cc: Jason Ekstrand Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Brost Cc: Matthew Auld Cc: Maarten Lankhorst Cc: "Thomas Hellström" Cc: Lionel Landwerlin Cc: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-2-daniel.vetter@ffwll.ch (cherry picked from commit c238980efd3b35af70fc926066cf7440f50a97a9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index cff72679ad7c..9ccf4b29b82e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,6 +986,9 @@ void i915_gem_context_release(struct kref *ref) trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); + if (ctx->syncobj) + drm_syncobj_put(ctx->syncobj); + mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); @@ -1205,9 +1208,6 @@ static void context_close(struct i915_gem_context *ctx) if (vm) i915_vm_close(vm); - if (ctx->syncobj) - drm_syncobj_put(ctx->syncobj); - ctx->file_priv = ERR_PTR(-EBADF); /* From 00598d5c69318a1fcb4147878e16754ba9103be6 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 7 Sep 2021 16:27:04 -0700 Subject: [PATCH 145/543] drm/i915: Get PM ref before accessing HW register Seeing these errors when GT is likely in suspend state- "RPM wakelock ref not held during HW access" Ensure GT is awake before trying to access HW registers. Avoid reading the register if that is not the case. Signed-off-by: Vinay Belgaumkar Fixes: 41e5c17ebfc2 ("drm/i915/guc/slpc: Sysfs hooks for SLPC") Reviewed-by: Tvrtko Ursulin Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210907232704.12982-1-vinay.belgaumkar@intel.com (cherry picked from commit f25e3908b9cd4a3fe819e9bdcdde58f20bacb34c) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_rps.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index d812b27835f8..591a5224287e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1973,8 +1973,14 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) u32 intel_rps_read_punit_req(struct intel_rps *rps) { struct intel_uncore *uncore = rps_to_uncore(rps); + struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; + intel_wakeref_t wakeref; + u32 freq = 0; - return intel_uncore_read(uncore, GEN6_RPNSWREQ); + with_intel_runtime_pm_if_in_use(rpm, wakeref) + freq = intel_uncore_read(uncore, GEN6_RPNSWREQ); + + return freq; } static u32 intel_rps_get_req(u32 pureq) From 79e9e30a9292a62d25ab75488d3886108db1eaad Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 3 Sep 2021 00:05:50 -0500 Subject: [PATCH 146/543] serial: 8250: 8250_omap: Fix RX_LVL register offset Commit b67e830d38fa ("serial: 8250: 8250_omap: Fix possible interrupt storm on K3 SoCs") introduced fixup including a register read to RX_LVL, however, we should be using word offset than byte offset since our registers are on 4 byte boundary (port.regshift = 2) for 8250_omap. Fixes: b67e830d38fa ("serial: 8250: 8250_omap: Fix possible interrupt storm on K3 SoCs") Cc: stable Cc: Jan Kiszka Cc: Vignesh Raghavendra Signed-off-by: Nishanth Menon Link: https://lore.kernel.org/r/20210903050550.29050-1-nm@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 891fd8345e25..73e5f1dbd075 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -106,7 +106,7 @@ #define UART_OMAP_EFR2_TIMEOUT_BEHAVE BIT(6) /* RX FIFO occupancy indicator */ -#define UART_OMAP_RX_LVL 0x64 +#define UART_OMAP_RX_LVL 0x19 struct omap8250_priv { int line; From 74e1eb3b4a1ef2e564b4bdeb6e92afe844e900de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 11 Sep 2021 15:20:17 +0200 Subject: [PATCH 147/543] serial: mvebu-uart: fix driver's tx_empty callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver's tx_empty callback should signal when the transmit shift register is empty. So when the last character has been sent. STAT_TX_FIFO_EMP bit signals only that HW transmit FIFO is empty, which happens when the last byte is loaded into transmit shift register. STAT_TX_EMP bit signals when the both HW transmit FIFO and transmit shift register are empty. So replace STAT_TX_FIFO_EMP check by STAT_TX_EMP in mvebu_uart_tx_empty() callback function. Fixes: 30530791a7a0 ("serial: mvebu-uart: initial support for Armada-3700 serial port") Cc: stable Signed-off-by: Pali Rohár Link: https://lore.kernel.org/r/20210911132017.25505-1-pali@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mvebu-uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 231de29a6452..ab226da75f7b 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -163,7 +163,7 @@ static unsigned int mvebu_uart_tx_empty(struct uart_port *port) st = readl(port->membase + UART_STAT); spin_unlock_irqrestore(&port->lock, flags); - return (st & STAT_TX_FIFO_EMP) ? TIOCSER_TEMT : 0; + return (st & STAT_TX_EMP) ? TIOCSER_TEMT : 0; } static unsigned int mvebu_uart_get_mctrl(struct uart_port *port) From 13be2efc390acd2a46a69a359f6efc00ca434599 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Mar 2021 12:21:40 +0000 Subject: [PATCH 148/543] rtc: cmos: Disable irq around direct invocation of cmos_interrupt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As previously noted in commit 66e4f4a9cc38 ("rtc: cmos: Use spin_lock_irqsave() in cmos_interrupt()"): <4>[ 254.192378] WARNING: inconsistent lock state <4>[ 254.192384] 5.12.0-rc1-CI-CI_DRM_9834+ #1 Not tainted <4>[ 254.192396] -------------------------------- <4>[ 254.192400] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. <4>[ 254.192409] rtcwake/5309 [HC0[0]:SC0[0]:HE1:SE1] takes: <4>[ 254.192429] ffffffff8263c5f8 (rtc_lock){?...}-{2:2}, at: cmos_interrupt+0x18/0x100 <4>[ 254.192481] {IN-HARDIRQ-W} state was registered at: <4>[ 254.192488] lock_acquire+0xd1/0x3d0 <4>[ 254.192504] _raw_spin_lock+0x2a/0x40 <4>[ 254.192519] cmos_interrupt+0x18/0x100 <4>[ 254.192536] rtc_handler+0x1f/0xc0 <4>[ 254.192553] acpi_ev_fixed_event_detect+0x109/0x13c <4>[ 254.192574] acpi_ev_sci_xrupt_handler+0xb/0x28 <4>[ 254.192596] acpi_irq+0x13/0x30 <4>[ 254.192620] __handle_irq_event_percpu+0x43/0x2c0 <4>[ 254.192641] handle_irq_event_percpu+0x2b/0x70 <4>[ 254.192661] handle_irq_event+0x2f/0x50 <4>[ 254.192680] handle_fasteoi_irq+0x9e/0x150 <4>[ 254.192693] __common_interrupt+0x76/0x140 <4>[ 254.192715] common_interrupt+0x96/0xc0 <4>[ 254.192732] asm_common_interrupt+0x1e/0x40 <4>[ 254.192750] _raw_spin_unlock_irqrestore+0x38/0x60 <4>[ 254.192767] resume_irqs+0xba/0xf0 <4>[ 254.192786] dpm_resume_noirq+0x245/0x3d0 <4>[ 254.192811] suspend_devices_and_enter+0x230/0xaa0 <4>[ 254.192835] pm_suspend.cold.8+0x301/0x34a <4>[ 254.192859] state_store+0x7b/0xe0 <4>[ 254.192879] kernfs_fop_write_iter+0x11d/0x1c0 <4>[ 254.192899] new_sync_write+0x11d/0x1b0 <4>[ 254.192916] vfs_write+0x265/0x390 <4>[ 254.192933] ksys_write+0x5a/0xd0 <4>[ 254.192949] do_syscall_64+0x33/0x80 <4>[ 254.192965] entry_SYSCALL_64_after_hwframe+0x44/0xae <4>[ 254.192986] irq event stamp: 43775 <4>[ 254.192994] hardirqs last enabled at (43775): [] asm_sysvec_apic_timer_interrupt+0x12/0x20 <4>[ 254.193023] hardirqs last disabled at (43774): [] sysvec_apic_timer_interrupt+0xa/0xb0 <4>[ 254.193049] softirqs last enabled at (42548): [] __do_softirq+0x342/0x48e <4>[ 254.193074] softirqs last disabled at (42543): [] irq_exit_rcu+0xad/0xd0 <4>[ 254.193101] other info that might help us debug this: <4>[ 254.193107] Possible unsafe locking scenario: <4>[ 254.193112] CPU0 <4>[ 254.193117] ---- <4>[ 254.193121] lock(rtc_lock); <4>[ 254.193137] <4>[ 254.193142] lock(rtc_lock); <4>[ 254.193156] *** DEADLOCK *** <4>[ 254.193161] 6 locks held by rtcwake/5309: <4>[ 254.193174] #0: ffff888104861430 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x5a/0xd0 <4>[ 254.193232] #1: ffff88810f823288 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0xe7/0x1c0 <4>[ 254.193282] #2: ffff888100cef3c0 (kn->active#285 <7>[ 254.192706] i915 0000:00:02.0: [drm:intel_modeset_setup_hw_state [i915]] [CRTC:51:pipe A] hw state readout: disabled <4>[ 254.193307] ){.+.+}-{0:0}, at: kernfs_fop_write_iter+0xf0/0x1c0 <4>[ 254.193333] #3: ffffffff82649fa8 (system_transition_mutex){+.+.}-{3:3}, at: pm_suspend.cold.8+0xce/0x34a <4>[ 254.193387] #4: ffffffff827a2108 (acpi_scan_lock){+.+.}-{3:3}, at: acpi_suspend_begin+0x47/0x70 <4>[ 254.193433] #5: ffff8881019ea178 (&dev->mutex){....}-{3:3}, at: device_resume+0x68/0x1e0 <4>[ 254.193485] stack backtrace: <4>[ 254.193492] CPU: 1 PID: 5309 Comm: rtcwake Not tainted 5.12.0-rc1-CI-CI_DRM_9834+ #1 <4>[ 254.193514] Hardware name: Google Soraka/Soraka, BIOS MrChromebox-4.10 08/25/2019 <4>[ 254.193524] Call Trace: <4>[ 254.193536] dump_stack+0x7f/0xad <4>[ 254.193567] mark_lock.part.47+0x8ca/0xce0 <4>[ 254.193604] __lock_acquire+0x39b/0x2590 <4>[ 254.193626] ? asm_sysvec_apic_timer_interrupt+0x12/0x20 <4>[ 254.193660] lock_acquire+0xd1/0x3d0 <4>[ 254.193677] ? cmos_interrupt+0x18/0x100 <4>[ 254.193716] _raw_spin_lock+0x2a/0x40 <4>[ 254.193735] ? cmos_interrupt+0x18/0x100 <4>[ 254.193758] cmos_interrupt+0x18/0x100 <4>[ 254.193785] cmos_resume+0x2ac/0x2d0 <4>[ 254.193813] ? acpi_pm_set_device_wakeup+0x1f/0x110 <4>[ 254.193842] ? pnp_bus_suspend+0x10/0x10 <4>[ 254.193864] pnp_bus_resume+0x5e/0x90 <4>[ 254.193885] dpm_run_callback+0x5f/0x240 <4>[ 254.193914] device_resume+0xb2/0x1e0 <4>[ 254.193942] ? pm_dev_err+0x25/0x25 <4>[ 254.193974] dpm_resume+0xea/0x3f0 <4>[ 254.194005] dpm_resume_end+0x8/0x10 <4>[ 254.194030] suspend_devices_and_enter+0x29b/0xaa0 <4>[ 254.194066] pm_suspend.cold.8+0x301/0x34a <4>[ 254.194094] state_store+0x7b/0xe0 <4>[ 254.194124] kernfs_fop_write_iter+0x11d/0x1c0 <4>[ 254.194151] new_sync_write+0x11d/0x1b0 <4>[ 254.194183] vfs_write+0x265/0x390 <4>[ 254.194207] ksys_write+0x5a/0xd0 <4>[ 254.194232] do_syscall_64+0x33/0x80 <4>[ 254.194251] entry_SYSCALL_64_after_hwframe+0x44/0xae <4>[ 254.194274] RIP: 0033:0x7f07d79691e7 <4>[ 254.194293] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 <4>[ 254.194312] RSP: 002b:00007ffd9cc2c768 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 <4>[ 254.194337] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f07d79691e7 <4>[ 254.194352] RDX: 0000000000000004 RSI: 0000556ebfc63590 RDI: 000000000000000b <4>[ 254.194366] RBP: 0000556ebfc63590 R08: 0000000000000000 R09: 0000000000000004 <4>[ 254.194379] R10: 0000556ebf0ec2a6 R11: 0000000000000246 R12: 0000000000000004 which breaks S3-resume on fi-kbl-soraka presumably as that's slow enough to trigger the alarm during the suspend. Fixes: 6950d046eb6e ("rtc: cmos: Replace spin_lock_irqsave with spin_lock in hard IRQ") References: 66e4f4a9cc38 ("rtc: cmos: Use spin_lock_irqsave() in cmos_interrupt()"): Signed-off-by: Chris Wilson Cc: Xiaofei Tan Cc: Alexandre Belloni Cc: Alessandro Zummo Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210305122140.28774-1-chris@chris-wilson.co.uk --- drivers/rtc/rtc-cmos.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index eb15067a605e..4eb53412b808 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -1047,7 +1047,9 @@ static void cmos_check_wkalrm(struct device *dev) * ACK the rtc irq here */ if (t_now >= cmos->alarm_expires && cmos_use_acpi_alarm()) { + local_irq_disable(); cmos_interrupt(0, (void *)cmos->rtc); + local_irq_enable(); return; } From 81065b35e2486c024c7aa86caed452e1f01a59d4 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 13 Sep 2021 14:52:39 -0700 Subject: [PATCH 149/543] x86/mce: Avoid infinite loop for copy from user recovery There are two cases for machine check recovery: 1) The machine check was triggered by ring3 (application) code. This is the simpler case. The machine check handler simply queues work to be executed on return to user. That code unmaps the page from all users and arranges to send a SIGBUS to the task that triggered the poison. 2) The machine check was triggered in kernel code that is covered by an exception table entry. In this case the machine check handler still queues a work entry to unmap the page, etc. but this will not be called right away because the #MC handler returns to the fix up code address in the exception table entry. Problems occur if the kernel triggers another machine check before the return to user processes the first queued work item. Specifically, the work is queued using the ->mce_kill_me callback structure in the task struct for the current thread. Attempting to queue a second work item using this same callback results in a loop in the linked list of work functions to call. So when the kernel does return to user, it enters an infinite loop processing the same entry for ever. There are some legitimate scenarios where the kernel may take a second machine check before returning to the user. 1) Some code (e.g. futex) first tries a get_user() with page faults disabled. If this fails, the code retries with page faults enabled expecting that this will resolve the page fault. 2) Copy from user code retries a copy in byte-at-time mode to check whether any additional bytes can be copied. On the other side of the fence are some bad drivers that do not check the return value from individual get_user() calls and may access multiple user addresses without noticing that some/all calls have failed. Fix by adding a counter (current->mce_count) to keep track of repeated machine checks before task_work() is called. First machine check saves the address information and calls task_work_add(). Subsequent machine checks before that task_work call back is executed check that the address is in the same page as the first machine check (since the callback will offline exactly one page). Expected worst case is four machine checks before moving on (e.g. one user access with page faults disabled, then a repeat to the same address with page faults enabled ... repeat in copy tail bytes). Just in case there is some code that loops forever enforce a limit of 10. [ bp: Massage commit message, drop noinstr, fix typo, extend panic messages. ] Fixes: 5567d11c21a1 ("x86/mce: Send #MC singal from task work") Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/YT/IJ9ziLqmtqEPu@agluck-desk2.amr.corp.intel.com --- arch/x86/kernel/cpu/mce/core.c | 43 +++++++++++++++++++++++++--------- include/linux/sched.h | 1 + 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 8cb7816d03b4..193204aee880 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1253,6 +1253,9 @@ static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *fin static void kill_me_now(struct callback_head *ch) { + struct task_struct *p = container_of(ch, struct task_struct, mce_kill_me); + + p->mce_count = 0; force_sig(SIGBUS); } @@ -1262,6 +1265,7 @@ static void kill_me_maybe(struct callback_head *cb) int flags = MF_ACTION_REQUIRED; int ret; + p->mce_count = 0; pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr); if (!p->mce_ripv) @@ -1290,17 +1294,34 @@ static void kill_me_maybe(struct callback_head *cb) } } -static void queue_task_work(struct mce *m, int kill_current_task) +static void queue_task_work(struct mce *m, char *msg, int kill_current_task) { - current->mce_addr = m->addr; - current->mce_kflags = m->kflags; - current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV); - current->mce_whole_page = whole_page(m); + int count = ++current->mce_count; - if (kill_current_task) - current->mce_kill_me.func = kill_me_now; - else - current->mce_kill_me.func = kill_me_maybe; + /* First call, save all the details */ + if (count == 1) { + current->mce_addr = m->addr; + current->mce_kflags = m->kflags; + current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV); + current->mce_whole_page = whole_page(m); + + if (kill_current_task) + current->mce_kill_me.func = kill_me_now; + else + current->mce_kill_me.func = kill_me_maybe; + } + + /* Ten is likely overkill. Don't expect more than two faults before task_work() */ + if (count > 10) + mce_panic("Too many consecutive machine checks while accessing user data", m, msg); + + /* Second or later call, make sure page address matches the one from first call */ + if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT)) + mce_panic("Consecutive machine checks to different user pages", m, msg); + + /* Do not call task_work_add() more than once */ + if (count > 1) + return; task_work_add(current, ¤t->mce_kill_me, TWA_RESUME); } @@ -1438,7 +1459,7 @@ noinstr void do_machine_check(struct pt_regs *regs) /* If this triggers there is no way to recover. Die hard. */ BUG_ON(!on_thread_stack() || !user_mode(regs)); - queue_task_work(&m, kill_current_task); + queue_task_work(&m, msg, kill_current_task); } else { /* @@ -1456,7 +1477,7 @@ noinstr void do_machine_check(struct pt_regs *regs) } if (m.kflags & MCE_IN_KERNEL_COPYIN) - queue_task_work(&m, kill_current_task); + queue_task_work(&m, msg, kill_current_task); } out: mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); diff --git a/include/linux/sched.h b/include/linux/sched.h index 1780260f237b..361c7bc72cbb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1468,6 +1468,7 @@ struct task_struct { mce_whole_page : 1, __mce_reserved : 62; struct callback_head mce_kill_me; + int mce_count; #endif #ifdef CONFIG_KRETPROBES From f81c08f897adafd2ed43f86f00207ff929f0b2eb Mon Sep 17 00:00:00 2001 From: Faizel K B Date: Thu, 2 Sep 2021 17:14:44 +0530 Subject: [PATCH 150/543] usb: testusb: Fix for showing the connection speed testusb' application which uses 'usbtest' driver reports 'unknown speed' from the function 'find_testdev'. The variable 'entry->speed' was not updated from the application. The IOCTL mentioned in the FIXME comment can only report whether the connection is low speed or not. Speed is read using the IOCTL USBDEVFS_GET_SPEED which reports the proper speed grade. The call is implemented in the function 'handle_testdev' where the file descriptor was availble locally. Sample output is given below where 'high speed' is printed as the connected speed. sudo ./testusb -a high speed /dev/bus/usb/001/011 0 /dev/bus/usb/001/011 test 0, 0.000015 secs /dev/bus/usb/001/011 test 1, 0.194208 secs /dev/bus/usb/001/011 test 2, 0.077289 secs /dev/bus/usb/001/011 test 3, 0.170604 secs /dev/bus/usb/001/011 test 4, 0.108335 secs /dev/bus/usb/001/011 test 5, 2.788076 secs /dev/bus/usb/001/011 test 6, 2.594610 secs /dev/bus/usb/001/011 test 7, 2.905459 secs /dev/bus/usb/001/011 test 8, 2.795193 secs /dev/bus/usb/001/011 test 9, 8.372651 secs /dev/bus/usb/001/011 test 10, 6.919731 secs /dev/bus/usb/001/011 test 11, 16.372687 secs /dev/bus/usb/001/011 test 12, 16.375233 secs /dev/bus/usb/001/011 test 13, 2.977457 secs /dev/bus/usb/001/011 test 14 --> 22 (Invalid argument) /dev/bus/usb/001/011 test 17, 0.148826 secs /dev/bus/usb/001/011 test 18, 0.068718 secs /dev/bus/usb/001/011 test 19, 0.125992 secs /dev/bus/usb/001/011 test 20, 0.127477 secs /dev/bus/usb/001/011 test 21 --> 22 (Invalid argument) /dev/bus/usb/001/011 test 24, 4.133763 secs /dev/bus/usb/001/011 test 27, 2.140066 secs /dev/bus/usb/001/011 test 28, 2.120713 secs /dev/bus/usb/001/011 test 29, 0.507762 secs Signed-off-by: Faizel K B Link: https://lore.kernel.org/r/20210902114444.15106-1-faizel.kb@dicortech.com Signed-off-by: Greg Kroah-Hartman --- tools/usb/testusb.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/usb/testusb.c b/tools/usb/testusb.c index ee8208b2f946..69c3ead25313 100644 --- a/tools/usb/testusb.c +++ b/tools/usb/testusb.c @@ -265,12 +265,6 @@ nomem: } entry->ifnum = ifnum; - - /* FIXME update USBDEVFS_CONNECTINFO so it tells about high speed etc */ - - fprintf(stderr, "%s speed\t%s\t%u\n", - speed(entry->speed), entry->name, entry->ifnum); - entry->next = testdevs; testdevs = entry; return 0; @@ -299,6 +293,14 @@ static void *handle_testdev (void *arg) return 0; } + status = ioctl(fd, USBDEVFS_GET_SPEED, NULL); + if (status < 0) + fprintf(stderr, "USBDEVFS_GET_SPEED failed %d\n", status); + else + dev->speed = status; + fprintf(stderr, "%s speed\t%s\t%u\n", + speed(dev->speed), dev->name, dev->ifnum); + restart: for (i = 0; i < TEST_CASES; i++) { if (dev->test != -1 && dev->test != i) From 79f528afa93918519574773ea49a444c104bc1bd Mon Sep 17 00:00:00 2001 From: Anton Eidelman Date: Sun, 12 Sep 2021 12:54:57 -0600 Subject: [PATCH 151/543] nvme-multipath: fix ANA state updates when a namespace is not present nvme_update_ana_state() has a deficiency that results in a failure to properly update the ana state for a namespace in the following case: NSIDs in ctrl->namespaces: 1, 3, 4 NSIDs in desc->nsids: 1, 2, 3, 4 Loop iteration 0: ns index = 0, n = 0, ns->head->ns_id = 1, nsid = 1, MATCH. Loop iteration 1: ns index = 1, n = 1, ns->head->ns_id = 3, nsid = 2, NO MATCH. Loop iteration 2: ns index = 2, n = 2, ns->head->ns_id = 4, nsid = 4, MATCH. Where the update to the ANA state of NSID 3 is missed. To fix this increment n and retry the update with the same ns when ns->head->ns_id is higher than nsid, Signed-off-by: Anton Eidelman Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/nvme/host/multipath.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5d7bc58a27bd..e8ccdd398f78 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -600,14 +600,17 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { - unsigned nsid = le32_to_cpu(desc->nsids[n]); - + unsigned nsid; +again: + nsid = le32_to_cpu(desc->nsids[n]); if (ns->head->ns_id < nsid) continue; if (ns->head->ns_id == nsid) nvme_update_ns_ana_state(desc, ns); if (++n == nr_nsids) break; + if (ns->head->ns_id > nsid) + goto again; } up_read(&ctrl->namespaces_rwsem); return 0; From 9817d763dbe15327b9b3ff4404fa6f27f927e744 Mon Sep 17 00:00:00 2001 From: Ruozhu Li Date: Mon, 6 Sep 2021 11:51:34 +0800 Subject: [PATCH 152/543] nvme-rdma: destroy cm id before destroy qp to avoid use after free We should always destroy cm_id before destroy qp to avoid to get cma event after qp was destroyed, which may lead to use after free. In RDMA connection establishment error flow, don't destroy qp in cm event handler.Just report cm_error to upper level, qp will be destroy in nvme_rdma_alloc_queue() after destroy cm id. Signed-off-by: Ruozhu Li Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a68704e39084..042c594bc57e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -656,8 +656,8 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) return; - nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); + nvme_rdma_destroy_queue_ib(queue); mutex_destroy(&queue->queue_lock); } @@ -1815,14 +1815,10 @@ static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue) for (i = 0; i < queue->queue_size; i++) { ret = nvme_rdma_post_recv(queue, &queue->rsp_ring[i]); if (ret) - goto out_destroy_queue_ib; + return ret; } return 0; - -out_destroy_queue_ib: - nvme_rdma_destroy_queue_ib(queue); - return ret; } static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue, @@ -1916,14 +1912,10 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) if (ret) { dev_err(ctrl->ctrl.device, "rdma_connect_locked failed (%d).\n", ret); - goto out_destroy_queue_ib; + return ret; } return 0; - -out_destroy_queue_ib: - nvme_rdma_destroy_queue_ib(queue); - return ret; } static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, @@ -1954,8 +1946,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: - nvme_rdma_destroy_queue_ib(queue); - fallthrough; case RDMA_CM_EVENT_ADDR_ERROR: dev_dbg(queue->ctrl->ctrl.device, "CM error event %d\n", ev->event); From 70f437fb4395ad4d1d16fab9a1ad9fbc9fc0579b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Sep 2021 08:54:52 -0700 Subject: [PATCH 153/543] nvme-tcp: fix io_work priority inversion Dispatching requests inline with the .queue_rq() call may block while holding the send_mutex. If the tcp io_work also happens to schedule, it may see the req_list is non-empty, leaving "pending" true and remaining in TASK_RUNNING. Since io_work is of higher scheduling priority, the .queue_rq task may not get a chance to run, blocking forward progress and leading to io timeouts. Instead of checking for pending requests within io_work, let the queueing restart io_work outside the send_mutex lock if there is more work to be done. Fixes: a0fdd1418007f ("nvme-tcp: rerun io_work if req_list is not empty") Reported-by: Samuel Jones Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index e2ab12f3f51c..e4249b7dc056 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -274,6 +274,12 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) } while (ret > 0); } +static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) +{ + return !list_empty(&queue->send_list) || + !llist_empty(&queue->req_list) || queue->more_requests; +} + static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, bool sync, bool last) { @@ -294,9 +300,10 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, nvme_tcp_send_all(queue); queue->more_requests = false; mutex_unlock(&queue->send_mutex); - } else if (last) { - queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } + + if (last && nvme_tcp_queue_more(queue)) + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue) @@ -906,12 +913,6 @@ done: read_unlock_bh(&sk->sk_callback_lock); } -static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) -{ - return !list_empty(&queue->send_list) || - !llist_empty(&queue->req_list) || queue->more_requests; -} - static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) { queue->request = NULL; @@ -1145,8 +1146,7 @@ static void nvme_tcp_io_work(struct work_struct *w) pending = true; else if (unlikely(result < 0)) break; - } else - pending = !llist_empty(&queue->req_list); + } result = nvme_tcp_try_recv(queue); if (result > 0) From f5dfd98a80ff8d50cf4ae2820857d7f5a46cbab9 Mon Sep 17 00:00:00 2001 From: Pavel Hofman Date: Mon, 6 Sep 2021 15:08:22 +0200 Subject: [PATCH 154/543] usb: gadget: u_audio: EP-OUT bInterval in fback frequency The patch increases the bitshift in feedback frequency calculation with EP-OUT bInterval value. Tests have revealed that Win10 and OSX UAC2 drivers require the feedback frequency to be based on the actual packet interval instead of on the USB2 microframe. Otherwise they ignore the feedback value. Linux snd-usb-audio driver detects the applied bitshift automatically. Tested-by: Henrik Enquist Signed-off-by: Pavel Hofman Cc: stable Link: https://lore.kernel.org/r/20210906130822.12256-1-pavel.hofman@ivitera.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_audio.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 32ef22857083..ad16163b5ff8 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -96,11 +96,13 @@ static const struct snd_pcm_hardware uac_pcm_hardware = { }; static void u_audio_set_fback_frequency(enum usb_device_speed speed, + struct usb_ep *out_ep, unsigned long long freq, unsigned int pitch, void *buf) { u32 ff = 0; + const struct usb_endpoint_descriptor *ep_desc; /* * Because the pitch base is 1000000, the final divider here @@ -128,8 +130,13 @@ static void u_audio_set_fback_frequency(enum usb_device_speed speed, * byte fromat (that is Q16.16) * * ff = (freq << 16) / 8000 + * + * Win10 and OSX UAC2 drivers require number of samples per packet + * in order to honor the feedback value. + * Linux snd-usb-audio detects the applied bit-shift automatically. */ - freq <<= 4; + ep_desc = out_ep->desc; + freq <<= 4 + (ep_desc->bInterval - 1); } ff = DIV_ROUND_CLOSEST_ULL((freq * pitch), 1953125); @@ -267,7 +274,7 @@ static void u_audio_iso_fback_complete(struct usb_ep *ep, pr_debug("%s: iso_complete status(%d) %d/%d\n", __func__, status, req->actual, req->length); - u_audio_set_fback_frequency(audio_dev->gadget->speed, + u_audio_set_fback_frequency(audio_dev->gadget->speed, audio_dev->out_ep, params->c_srate, prm->pitch, req->buf); @@ -526,7 +533,7 @@ int u_audio_start_capture(struct g_audio *audio_dev) * be meauserd at start of playback */ prm->pitch = 1000000; - u_audio_set_fback_frequency(audio_dev->gadget->speed, + u_audio_set_fback_frequency(audio_dev->gadget->speed, ep, params->c_srate, prm->pitch, req_fback->buf); From 17956b53ebff6a490baf580a836cbd3eae94892b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 6 Sep 2021 12:42:21 +0300 Subject: [PATCH 155/543] usb: gadget: r8a66597: fix a loop in set_feature() This loop is supposed to loop until if reads something other than CS_IDST or until it times out after 30,000 attempts. But because of the || vs && bug, it will never time out and instead it will loop a minimum of 30,000 times. This bug is quite old but the code is only used in USB_DEVICE_TEST_MODE so it probably doesn't affect regular usage. Fixes: 96fe53ef5498 ("usb: gadget: r8a66597-udc: add support for TEST_MODE") Cc: stable Reviewed-by: Yoshihiro Shimoda Acked-by: Felipe Balbi Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20210906094221.GA10957@kili Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/r8a66597-udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/r8a66597-udc.c b/drivers/usb/gadget/udc/r8a66597-udc.c index 65cae4883454..38e4d6b505a0 100644 --- a/drivers/usb/gadget/udc/r8a66597-udc.c +++ b/drivers/usb/gadget/udc/r8a66597-udc.c @@ -1250,7 +1250,7 @@ static void set_feature(struct r8a66597 *r8a66597, struct usb_ctrlrequest *ctrl) do { tmp = r8a66597_read(r8a66597, INTSTS0) & CTSQ; udelay(1); - } while (tmp != CS_IDST || timeout-- > 0); + } while (tmp != CS_IDST && timeout-- > 0); if (tmp == CS_IDST) r8a66597_bset(r8a66597, From b69ec50b3e55c4b2a85c8bc46763eaf330605847 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 7 Sep 2021 08:26:19 +0200 Subject: [PATCH 156/543] usb: cdns3: fix race condition before setting doorbell For DEV_VER_V3 version there exist race condition between clearing ep_sts.EP_STS_TRBERR and setting ep_cmd.EP_CMD_DRDY bit. Setting EP_CMD_DRDY will be ignored by controller when EP_STS_TRBERR is set. So, between these two instructions we have a small time gap in which the EP_STSS_TRBERR can be set. In such case the transfer will not start after setting doorbell. Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") cc: # 5.12.x Tested-by: Aswath Govindraju Reviewed-by: Aswath Govindraju Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20210907062619.34622-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 5d8c982019af..1f3b4a142212 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -1100,6 +1100,19 @@ static int cdns3_ep_run_stream_transfer(struct cdns3_endpoint *priv_ep, return 0; } +static void cdns3_rearm_drdy_if_needed(struct cdns3_endpoint *priv_ep) +{ + struct cdns3_device *priv_dev = priv_ep->cdns3_dev; + + if (priv_dev->dev_ver < DEV_VER_V3) + return; + + if (readl(&priv_dev->regs->ep_sts) & EP_STS_TRBERR) { + writel(EP_STS_TRBERR, &priv_dev->regs->ep_sts); + writel(EP_CMD_DRDY, &priv_dev->regs->ep_cmd); + } +} + /** * cdns3_ep_run_transfer - start transfer on no-default endpoint hardware * @priv_ep: endpoint object @@ -1351,6 +1364,7 @@ static int cdns3_ep_run_transfer(struct cdns3_endpoint *priv_ep, /*clearing TRBERR and EP_STS_DESCMIS before seting DRDY*/ writel(EP_STS_TRBERR | EP_STS_DESCMIS, &priv_dev->regs->ep_sts); writel(EP_CMD_DRDY, &priv_dev->regs->ep_cmd); + cdns3_rearm_drdy_if_needed(priv_ep); trace_cdns3_doorbell_epx(priv_ep->name, readl(&priv_dev->regs->ep_traddr)); } From 856e6e8e0f9300befa87dde09edb578555c99a82 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 31 Aug 2021 16:42:36 +0800 Subject: [PATCH 157/543] usb: dwc2: check return value after calling platform_get_resource() It will cause null-ptr-deref if platform_get_resource() returns NULL, we need check the return value. Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20210831084236.1359677-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/hcd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 2a7828971d05..a215ec9e172e 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -5191,6 +5191,10 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg) hcd->has_tt = 1; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + retval = -EINVAL; + goto error1; + } hcd->rsrc_start = res->start; hcd->rsrc_len = resource_size(res); From 91fac0741d4817945c6ee0a17591421e7f5ecb86 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Sep 2021 10:23:18 +0200 Subject: [PATCH 158/543] USB: cdc-acm: fix minor-number release If the driver runs out of minor numbers it would release minor 0 and allow another device to claim the minor while still in use. Fortunately, registering the tty class device of the second device would fail (with a stack dump) due to the sysfs name collision so no memory is leaked. Fixes: cae2bc768d17 ("usb: cdc-acm: Decrement tty port's refcount if probe() fail") Cc: stable@vger.kernel.org # 4.19 Cc: Jaejoong Kim Acked-by: Oliver Neukum Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210907082318.7757-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 7 +++++-- drivers/usb/class/cdc-acm.h | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 8bbd8e29e60d..4e2f1552f4b7 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -726,7 +726,8 @@ static void acm_port_destruct(struct tty_port *port) { struct acm *acm = container_of(port, struct acm, port); - acm_release_minor(acm); + if (acm->minor != ACM_MINOR_INVALID) + acm_release_minor(acm); usb_put_intf(acm->control); kfree(acm->country_codes); kfree(acm); @@ -1323,8 +1324,10 @@ made_compressed_probe: usb_get_intf(acm->control); /* undone in destruct() */ minor = acm_alloc_minor(acm); - if (minor < 0) + if (minor < 0) { + acm->minor = ACM_MINOR_INVALID; goto err_put_port; + } acm->minor = minor; acm->dev = usb_dev; diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 8aef5eb769a0..3aa7f0a3ad71 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -22,6 +22,8 @@ #define ACM_TTY_MAJOR 166 #define ACM_TTY_MINORS 256 +#define ACM_MINOR_INVALID ACM_TTY_MINORS + /* * Requests. */ From aad06846a2304e48e7a223fad8971eed16179606 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 7 Sep 2021 05:30:02 -0700 Subject: [PATCH 159/543] usb: ehci: Simplify platform driver registration Use platform_register_drivers() and platform_unregister_drivers() to register and unregister ehci platform drivers. This simplifies the code and prevents the following build errors seen with sparc:allmodconfig. drivers/usb/host/ehci-hcd.c:1301: error: "PLATFORM_DRIVER" redefined drivers/usb/host/ehci-sh.c:173:31: error: 'ehci_hcd_sh_driver' defined but not used Acked-by: Alan Stern Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20210907123002.3951446-1-linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hcd.c | 75 +++++++++++++------------------------ 1 file changed, 27 insertions(+), 48 deletions(-) diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 6bdc6d6bf74d..1776c05d0a48 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1278,29 +1279,39 @@ MODULE_LICENSE ("GPL"); #ifdef CONFIG_USB_EHCI_SH #include "ehci-sh.c" -#define PLATFORM_DRIVER ehci_hcd_sh_driver #endif #ifdef CONFIG_PPC_PS3 #include "ehci-ps3.c" -#define PS3_SYSTEM_BUS_DRIVER ps3_ehci_driver #endif #ifdef CONFIG_USB_EHCI_HCD_PPC_OF #include "ehci-ppc-of.c" -#define OF_PLATFORM_DRIVER ehci_hcd_ppc_of_driver #endif #ifdef CONFIG_XPS_USB_HCD_XILINX #include "ehci-xilinx-of.c" -#define XILINX_OF_PLATFORM_DRIVER ehci_hcd_xilinx_of_driver #endif #ifdef CONFIG_SPARC_LEON #include "ehci-grlib.c" -#define PLATFORM_DRIVER ehci_grlib_driver #endif +static struct platform_driver * const platform_drivers[] = { +#ifdef CONFIG_USB_EHCI_SH + &ehci_hcd_sh_driver, +#endif +#ifdef CONFIG_USB_EHCI_HCD_PPC_OF + &ehci_hcd_ppc_of_driver, +#endif +#ifdef CONFIG_XPS_USB_HCD_XILINX + &ehci_hcd_xilinx_of_driver, +#endif +#ifdef CONFIG_SPARC_LEON + &ehci_grlib_driver, +#endif +}; + static int __init ehci_hcd_init(void) { int retval = 0; @@ -1324,47 +1335,23 @@ static int __init ehci_hcd_init(void) ehci_debug_root = debugfs_create_dir("ehci", usb_debug_root); #endif -#ifdef PLATFORM_DRIVER - retval = platform_driver_register(&PLATFORM_DRIVER); + retval = platform_register_drivers(platform_drivers, ARRAY_SIZE(platform_drivers)); if (retval < 0) goto clean0; -#endif -#ifdef PS3_SYSTEM_BUS_DRIVER - retval = ps3_ehci_driver_register(&PS3_SYSTEM_BUS_DRIVER); +#ifdef CONFIG_PPC_PS3 + retval = ps3_ehci_driver_register(&ps3_ehci_driver); if (retval < 0) - goto clean2; + goto clean1; #endif -#ifdef OF_PLATFORM_DRIVER - retval = platform_driver_register(&OF_PLATFORM_DRIVER); - if (retval < 0) - goto clean3; -#endif + return 0; -#ifdef XILINX_OF_PLATFORM_DRIVER - retval = platform_driver_register(&XILINX_OF_PLATFORM_DRIVER); - if (retval < 0) - goto clean4; +#ifdef CONFIG_PPC_PS3 +clean1: #endif - return retval; - -#ifdef XILINX_OF_PLATFORM_DRIVER - /* platform_driver_unregister(&XILINX_OF_PLATFORM_DRIVER); */ -clean4: -#endif -#ifdef OF_PLATFORM_DRIVER - platform_driver_unregister(&OF_PLATFORM_DRIVER); -clean3: -#endif -#ifdef PS3_SYSTEM_BUS_DRIVER - ps3_ehci_driver_unregister(&PS3_SYSTEM_BUS_DRIVER); -clean2: -#endif -#ifdef PLATFORM_DRIVER - platform_driver_unregister(&PLATFORM_DRIVER); + platform_unregister_drivers(platform_drivers, ARRAY_SIZE(platform_drivers)); clean0: -#endif #ifdef CONFIG_DYNAMIC_DEBUG debugfs_remove(ehci_debug_root); ehci_debug_root = NULL; @@ -1376,18 +1363,10 @@ module_init(ehci_hcd_init); static void __exit ehci_hcd_cleanup(void) { -#ifdef XILINX_OF_PLATFORM_DRIVER - platform_driver_unregister(&XILINX_OF_PLATFORM_DRIVER); -#endif -#ifdef OF_PLATFORM_DRIVER - platform_driver_unregister(&OF_PLATFORM_DRIVER); -#endif -#ifdef PLATFORM_DRIVER - platform_driver_unregister(&PLATFORM_DRIVER); -#endif -#ifdef PS3_SYSTEM_BUS_DRIVER - ps3_ehci_driver_unregister(&PS3_SYSTEM_BUS_DRIVER); +#ifdef CONFIG_PPC_PS3 + ps3_ehci_driver_unregister(&ps3_ehci_driver); #endif + platform_unregister_drivers(platform_drivers, ARRAY_SIZE(platform_drivers)); #ifdef CONFIG_DYNAMIC_DEBUG debugfs_remove(ehci_debug_root); #endif From d91adc5322ab53df4b6d1989242bfb6c63163eb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 31 Aug 2021 08:54:19 +0200 Subject: [PATCH 160/543] Revert "USB: bcma: Add a check for devm_gpiod_get" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f3de5d857bb2362b00e2a8d4bc886cd49dcb66db. That commit broke USB on all routers that have USB always powered on and don't require toggling any GPIO. It's a majority of devices actually. The original code worked and seemed safe: vcc GPIO is optional and bcma_hci_platform_power_gpio() takes care of checking the pointer before using it. This revert fixes: [ 10.801127] bcma_hcd: probe of bcma0:11 failed with error -2 Fixes: f3de5d857bb2 ("USB: bcma: Add a check for devm_gpiod_get") Cc: stable Cc: Chuhong Yuan Signed-off-by: Rafał Miłecki Link: https://lore.kernel.org/r/20210831065419.18371-1-zajec5@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/bcma-hcd.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/host/bcma-hcd.c b/drivers/usb/host/bcma-hcd.c index 337b425dd4b0..2df52f75f6b3 100644 --- a/drivers/usb/host/bcma-hcd.c +++ b/drivers/usb/host/bcma-hcd.c @@ -406,12 +406,9 @@ static int bcma_hcd_probe(struct bcma_device *core) return -ENOMEM; usb_dev->core = core; - if (core->dev.of_node) { + if (core->dev.of_node) usb_dev->gpio_desc = devm_gpiod_get(&core->dev, "vcc", GPIOD_OUT_HIGH); - if (IS_ERR(usb_dev->gpio_desc)) - return PTR_ERR(usb_dev->gpio_desc); - } switch (core->id.id) { case BCMA_CORE_USB20_HOST: From 8cfac9a6744fcb143cb3e94ce002f09fd17fadbb Mon Sep 17 00:00:00 2001 From: Li Jun Date: Wed, 8 Sep 2021 10:28:19 +0800 Subject: [PATCH 161/543] usb: dwc3: core: balance phy init and exit After we start to do core soft reset while usb role switch, the phy init is invoked at every switch to device mode, but its counter part de-init is missing, this causes the actual phy init can not be done when we really want to re-init phy like system resume, because the counter maintained by phy core is not 0. considering phy init is actually redundant for role switch, so move out the phy init from core soft reset to dwc3 core init where is the only place required. Fixes: f88359e1588b ("usb: dwc3: core: Do core softreset when switch mode") Cc: Tested-by: faqiang.zhu Tested-by: John Stultz #HiKey960 Acked-by: Felipe Balbi Signed-off-by: Li Jun Link: https://lore.kernel.org/r/1631068099-13559-1-git-send-email-jun.li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 01866dcb953b..0104a80b185e 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -264,19 +264,6 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc) { u32 reg; int retries = 1000; - int ret; - - usb_phy_init(dwc->usb2_phy); - usb_phy_init(dwc->usb3_phy); - ret = phy_init(dwc->usb2_generic_phy); - if (ret < 0) - return ret; - - ret = phy_init(dwc->usb3_generic_phy); - if (ret < 0) { - phy_exit(dwc->usb2_generic_phy); - return ret; - } /* * We're resetting only the device side because, if we're in host mode, @@ -310,9 +297,6 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc) udelay(1); } while (--retries); - phy_exit(dwc->usb3_generic_phy); - phy_exit(dwc->usb2_generic_phy); - return -ETIMEDOUT; done: @@ -982,9 +966,21 @@ static int dwc3_core_init(struct dwc3 *dwc) dwc->phys_ready = true; } + usb_phy_init(dwc->usb2_phy); + usb_phy_init(dwc->usb3_phy); + ret = phy_init(dwc->usb2_generic_phy); + if (ret < 0) + goto err0a; + + ret = phy_init(dwc->usb3_generic_phy); + if (ret < 0) { + phy_exit(dwc->usb2_generic_phy); + goto err0a; + } + ret = dwc3_core_soft_reset(dwc); if (ret) - goto err0a; + goto err1; if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD && !DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) { From 91bb163e1e4f88092f50dfaa5a816b658753e4b2 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Thu, 9 Sep 2021 14:45:15 +0400 Subject: [PATCH 162/543] usb: dwc2: gadget: Fix ISOC flow for BDMA and Slave According USB spec each ISOC transaction should be performed in a designated for that transaction interval. On bus errors or delays in operating system scheduling of client software can result in no packet being transferred for a (micro)frame. An error indication should be returned as status to the client software in such a case. Current implementation in case of missed/dropped interval send same data in next possible interval instead of reporting missed isoc. This fix complete requests with -ENODATA if interval elapsed. HSOTG core in BDMA and Slave modes haven't HW support for (micro)frames tracking, this is why SW should care about tracking of (micro)frames. Because of that method and consider operating system scheduling delays, added few additional checking's of elapsed target (micro)frame: 1. Immediately before enabling EP to start transfer. 2. With any transfer completion interrupt. 3. With incomplete isoc in/out interrupt. 4. With EP disabled interrupt because of incomplete transfer. 5. With OUT token received while EP disabled interrupt (for OUT transfers). 6. With NAK replied to IN token interrupt (for IN transfers). As part of ISOC flow, additionally fixed 'current' and 'target' frame calculation functions. In HS mode SOF limits provided by DSTS register is 0x3fff, but in non HS mode this limit is 0x7ff. Tested by internal tool which also using for dwc3 testing. Signed-off-by: Minas Harutyunyan Cc: stable Link: https://lore.kernel.org/r/95d1423adf4b0f68187c9894820c4b7e964a3f7f.1631175721.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 191 +++++++++++++++++++++----------------- 1 file changed, 107 insertions(+), 84 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 837237e4bc96..f09cbdfac9df 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -115,10 +115,16 @@ static inline bool using_desc_dma(struct dwc2_hsotg *hsotg) */ static inline void dwc2_gadget_incr_frame_num(struct dwc2_hsotg_ep *hs_ep) { + struct dwc2_hsotg *hsotg = hs_ep->parent; + u16 limit = DSTS_SOFFN_LIMIT; + + if (hsotg->gadget.speed != USB_SPEED_HIGH) + limit >>= 3; + hs_ep->target_frame += hs_ep->interval; - if (hs_ep->target_frame > DSTS_SOFFN_LIMIT) { + if (hs_ep->target_frame > limit) { hs_ep->frame_overrun = true; - hs_ep->target_frame &= DSTS_SOFFN_LIMIT; + hs_ep->target_frame &= limit; } else { hs_ep->frame_overrun = false; } @@ -136,10 +142,16 @@ static inline void dwc2_gadget_incr_frame_num(struct dwc2_hsotg_ep *hs_ep) */ static inline void dwc2_gadget_dec_frame_num_by_one(struct dwc2_hsotg_ep *hs_ep) { + struct dwc2_hsotg *hsotg = hs_ep->parent; + u16 limit = DSTS_SOFFN_LIMIT; + + if (hsotg->gadget.speed != USB_SPEED_HIGH) + limit >>= 3; + if (hs_ep->target_frame) hs_ep->target_frame -= 1; else - hs_ep->target_frame = DSTS_SOFFN_LIMIT; + hs_ep->target_frame = limit; } /** @@ -1018,6 +1030,12 @@ static void dwc2_gadget_start_isoc_ddma(struct dwc2_hsotg_ep *hs_ep) dwc2_writel(hsotg, ctrl, depctl); } +static bool dwc2_gadget_target_frame_elapsed(struct dwc2_hsotg_ep *hs_ep); +static void dwc2_hsotg_complete_request(struct dwc2_hsotg *hsotg, + struct dwc2_hsotg_ep *hs_ep, + struct dwc2_hsotg_req *hs_req, + int result); + /** * dwc2_hsotg_start_req - start a USB request from an endpoint's queue * @hsotg: The controller state. @@ -1170,14 +1188,19 @@ static void dwc2_hsotg_start_req(struct dwc2_hsotg *hsotg, } } - if (hs_ep->isochronous && hs_ep->interval == 1) { - hs_ep->target_frame = dwc2_hsotg_read_frameno(hsotg); - dwc2_gadget_incr_frame_num(hs_ep); - - if (hs_ep->target_frame & 0x1) - ctrl |= DXEPCTL_SETODDFR; - else - ctrl |= DXEPCTL_SETEVENFR; + if (hs_ep->isochronous) { + if (!dwc2_gadget_target_frame_elapsed(hs_ep)) { + if (hs_ep->interval == 1) { + if (hs_ep->target_frame & 0x1) + ctrl |= DXEPCTL_SETODDFR; + else + ctrl |= DXEPCTL_SETEVENFR; + } + ctrl |= DXEPCTL_CNAK; + } else { + dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA); + return; + } } ctrl |= DXEPCTL_EPENA; /* ensure ep enabled */ @@ -1325,12 +1348,16 @@ static bool dwc2_gadget_target_frame_elapsed(struct dwc2_hsotg_ep *hs_ep) u32 target_frame = hs_ep->target_frame; u32 current_frame = hsotg->frame_number; bool frame_overrun = hs_ep->frame_overrun; + u16 limit = DSTS_SOFFN_LIMIT; + + if (hsotg->gadget.speed != USB_SPEED_HIGH) + limit >>= 3; if (!frame_overrun && current_frame >= target_frame) return true; if (frame_overrun && current_frame >= target_frame && - ((current_frame - target_frame) < DSTS_SOFFN_LIMIT / 2)) + ((current_frame - target_frame) < limit / 2)) return true; return false; @@ -1713,11 +1740,9 @@ static struct dwc2_hsotg_req *get_ep_head(struct dwc2_hsotg_ep *hs_ep) */ static void dwc2_gadget_start_next_request(struct dwc2_hsotg_ep *hs_ep) { - u32 mask; struct dwc2_hsotg *hsotg = hs_ep->parent; int dir_in = hs_ep->dir_in; struct dwc2_hsotg_req *hs_req; - u32 epmsk_reg = dir_in ? DIEPMSK : DOEPMSK; if (!list_empty(&hs_ep->queue)) { hs_req = get_ep_head(hs_ep); @@ -1733,9 +1758,6 @@ static void dwc2_gadget_start_next_request(struct dwc2_hsotg_ep *hs_ep) } else { dev_dbg(hsotg->dev, "%s: No more ISOC-OUT requests\n", __func__); - mask = dwc2_readl(hsotg, epmsk_reg); - mask |= DOEPMSK_OUTTKNEPDISMSK; - dwc2_writel(hsotg, mask, epmsk_reg); } } @@ -2306,19 +2328,6 @@ static void dwc2_hsotg_ep0_zlp(struct dwc2_hsotg *hsotg, bool dir_in) dwc2_hsotg_program_zlp(hsotg, hsotg->eps_out[0]); } -static void dwc2_hsotg_change_ep_iso_parity(struct dwc2_hsotg *hsotg, - u32 epctl_reg) -{ - u32 ctrl; - - ctrl = dwc2_readl(hsotg, epctl_reg); - if (ctrl & DXEPCTL_EOFRNUM) - ctrl |= DXEPCTL_SETEVENFR; - else - ctrl |= DXEPCTL_SETODDFR; - dwc2_writel(hsotg, ctrl, epctl_reg); -} - /* * dwc2_gadget_get_xfersize_ddma - get transferred bytes amount from desc * @hs_ep - The endpoint on which transfer went @@ -2439,20 +2448,11 @@ static void dwc2_hsotg_handle_outdone(struct dwc2_hsotg *hsotg, int epnum) dwc2_hsotg_ep0_zlp(hsotg, true); } - /* - * Slave mode OUT transfers do not go through XferComplete so - * adjust the ISOC parity here. - */ - if (!using_dma(hsotg)) { - if (hs_ep->isochronous && hs_ep->interval == 1) - dwc2_hsotg_change_ep_iso_parity(hsotg, DOEPCTL(epnum)); - else if (hs_ep->isochronous && hs_ep->interval > 1) - dwc2_gadget_incr_frame_num(hs_ep); - } - /* Set actual frame number for completed transfers */ - if (!using_desc_dma(hsotg) && hs_ep->isochronous) - req->frame_number = hsotg->frame_number; + if (!using_desc_dma(hsotg) && hs_ep->isochronous) { + req->frame_number = hs_ep->target_frame; + dwc2_gadget_incr_frame_num(hs_ep); + } dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, result); } @@ -2766,6 +2766,12 @@ static void dwc2_hsotg_complete_in(struct dwc2_hsotg *hsotg, return; } + /* Set actual frame number for completed transfers */ + if (!using_desc_dma(hsotg) && hs_ep->isochronous) { + hs_req->req.frame_number = hs_ep->target_frame; + dwc2_gadget_incr_frame_num(hs_ep); + } + dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, 0); } @@ -2826,23 +2832,18 @@ static void dwc2_gadget_handle_ep_disabled(struct dwc2_hsotg_ep *hs_ep) dwc2_hsotg_txfifo_flush(hsotg, hs_ep->fifo_index); - if (hs_ep->isochronous) { - dwc2_hsotg_complete_in(hsotg, hs_ep); - return; - } - if ((epctl & DXEPCTL_STALL) && (epctl & DXEPCTL_EPTYPE_BULK)) { int dctl = dwc2_readl(hsotg, DCTL); dctl |= DCTL_CGNPINNAK; dwc2_writel(hsotg, dctl, DCTL); } - return; - } + } else { - if (dctl & DCTL_GOUTNAKSTS) { - dctl |= DCTL_CGOUTNAK; - dwc2_writel(hsotg, dctl, DCTL); + if (dctl & DCTL_GOUTNAKSTS) { + dctl |= DCTL_CGOUTNAK; + dwc2_writel(hsotg, dctl, DCTL); + } } if (!hs_ep->isochronous) @@ -2863,8 +2864,6 @@ static void dwc2_gadget_handle_ep_disabled(struct dwc2_hsotg_ep *hs_ep) /* Update current frame number value. */ hsotg->frame_number = dwc2_hsotg_read_frameno(hsotg); } while (dwc2_gadget_target_frame_elapsed(hs_ep)); - - dwc2_gadget_start_next_request(hs_ep); } /** @@ -2881,8 +2880,8 @@ static void dwc2_gadget_handle_ep_disabled(struct dwc2_hsotg_ep *hs_ep) static void dwc2_gadget_handle_out_token_ep_disabled(struct dwc2_hsotg_ep *ep) { struct dwc2_hsotg *hsotg = ep->parent; + struct dwc2_hsotg_req *hs_req; int dir_in = ep->dir_in; - u32 doepmsk; if (dir_in || !ep->isochronous) return; @@ -2896,28 +2895,39 @@ static void dwc2_gadget_handle_out_token_ep_disabled(struct dwc2_hsotg_ep *ep) return; } - if (ep->interval > 1 && - ep->target_frame == TARGET_FRAME_INITIAL) { + if (ep->target_frame == TARGET_FRAME_INITIAL) { u32 ctrl; ep->target_frame = hsotg->frame_number; - dwc2_gadget_incr_frame_num(ep); + if (ep->interval > 1) { + ctrl = dwc2_readl(hsotg, DOEPCTL(ep->index)); + if (ep->target_frame & 0x1) + ctrl |= DXEPCTL_SETODDFR; + else + ctrl |= DXEPCTL_SETEVENFR; - ctrl = dwc2_readl(hsotg, DOEPCTL(ep->index)); - if (ep->target_frame & 0x1) - ctrl |= DXEPCTL_SETODDFR; - else - ctrl |= DXEPCTL_SETEVENFR; - - dwc2_writel(hsotg, ctrl, DOEPCTL(ep->index)); + dwc2_writel(hsotg, ctrl, DOEPCTL(ep->index)); + } } - dwc2_gadget_start_next_request(ep); - doepmsk = dwc2_readl(hsotg, DOEPMSK); - doepmsk &= ~DOEPMSK_OUTTKNEPDISMSK; - dwc2_writel(hsotg, doepmsk, DOEPMSK); + while (dwc2_gadget_target_frame_elapsed(ep)) { + hs_req = get_ep_head(ep); + if (hs_req) + dwc2_hsotg_complete_request(hsotg, ep, hs_req, -ENODATA); + + dwc2_gadget_incr_frame_num(ep); + /* Update current frame number value. */ + hsotg->frame_number = dwc2_hsotg_read_frameno(hsotg); + } + + if (!ep->req) + dwc2_gadget_start_next_request(ep); + } +static void dwc2_hsotg_ep_stop_xfr(struct dwc2_hsotg *hsotg, + struct dwc2_hsotg_ep *hs_ep); + /** * dwc2_gadget_handle_nak - handle NAK interrupt * @hs_ep: The endpoint on which interrupt is asserted. @@ -2935,7 +2945,9 @@ static void dwc2_gadget_handle_out_token_ep_disabled(struct dwc2_hsotg_ep *ep) static void dwc2_gadget_handle_nak(struct dwc2_hsotg_ep *hs_ep) { struct dwc2_hsotg *hsotg = hs_ep->parent; + struct dwc2_hsotg_req *hs_req; int dir_in = hs_ep->dir_in; + u32 ctrl; if (!dir_in || !hs_ep->isochronous) return; @@ -2977,13 +2989,29 @@ static void dwc2_gadget_handle_nak(struct dwc2_hsotg_ep *hs_ep) dwc2_writel(hsotg, ctrl, DIEPCTL(hs_ep->index)); } - - dwc2_hsotg_complete_request(hsotg, hs_ep, - get_ep_head(hs_ep), 0); } - if (!using_desc_dma(hsotg)) + if (using_desc_dma(hsotg)) + return; + + ctrl = dwc2_readl(hsotg, DIEPCTL(hs_ep->index)); + if (ctrl & DXEPCTL_EPENA) + dwc2_hsotg_ep_stop_xfr(hsotg, hs_ep); + else + dwc2_hsotg_txfifo_flush(hsotg, hs_ep->fifo_index); + + while (dwc2_gadget_target_frame_elapsed(hs_ep)) { + hs_req = get_ep_head(hs_ep); + if (hs_req) + dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA); + dwc2_gadget_incr_frame_num(hs_ep); + /* Update current frame number value. */ + hsotg->frame_number = dwc2_hsotg_read_frameno(hsotg); + } + + if (!hs_ep->req) + dwc2_gadget_start_next_request(hs_ep); } /** @@ -3048,12 +3076,8 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx, * need to look at completing IN requests here * if operating slave mode */ - if (hs_ep->isochronous && hs_ep->interval > 1) - dwc2_gadget_incr_frame_num(hs_ep); - - dwc2_hsotg_complete_in(hsotg, hs_ep); - if (ints & DXEPINT_NAKINTRPT) - ints &= ~DXEPINT_NAKINTRPT; + if (!hs_ep->isochronous || !(ints & DXEPINT_NAKINTRPT)) + dwc2_hsotg_complete_in(hsotg, hs_ep); if (idx == 0 && !hs_ep->req) dwc2_hsotg_enqueue_setup(hsotg); @@ -3062,10 +3086,8 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx, * We're using DMA, we need to fire an OutDone here * as we ignore the RXFIFO. */ - if (hs_ep->isochronous && hs_ep->interval > 1) - dwc2_gadget_incr_frame_num(hs_ep); - - dwc2_hsotg_handle_outdone(hsotg, idx); + if (!hs_ep->isochronous || !(ints & DXEPINT_OUTTKNEPDIS)) + dwc2_hsotg_handle_outdone(hsotg, idx); } } @@ -4085,6 +4107,7 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep, mask |= DIEPMSK_NAKMSK; dwc2_writel(hsotg, mask, DIEPMSK); } else { + epctrl |= DXEPCTL_SNAK; mask = dwc2_readl(hsotg, DOEPMSK); mask |= DOEPMSK_OUTTKNEPDISMSK; dwc2_writel(hsotg, mask, DOEPMSK); From 58877b0824da15698bd85a0a9dbfa8c354e6ecb7 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Thu, 9 Sep 2021 12:11:58 +0530 Subject: [PATCH 163/543] usb: core: hcd: Add support for deferring roothub registration It has been observed with certain PCIe USB cards (like Inateck connected to AM64 EVM or J7200 EVM) that as soon as the primary roothub is registered, port status change is handled even before xHC is running leading to cold plug USB devices not detected. For such cases, registering both the root hubs along with the second HCD is required. Add support for deferring roothub registration in usb_add_hcd(), so that both primary and secondary roothubs are registered along with the second HCD. CC: stable@vger.kernel.org # 5.4+ Suggested-by: Mathias Nyman Tested-by: Chris Chiu Acked-by: Alan Stern Signed-off-by: Kishon Vijay Abraham I Link: https://lore.kernel.org/r/20210909064200.16216-2-kishon@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 31 ++++++++++++++++++++++++------- include/linux/usb/hcd.h | 2 ++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 0f8b7c93310e..99ff2d23be05 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2775,6 +2775,7 @@ int usb_add_hcd(struct usb_hcd *hcd, { int retval; struct usb_device *rhdev; + struct usb_hcd *shared_hcd; if (!hcd->skip_phy_initialization && usb_hcd_is_primary_hcd(hcd)) { hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev); @@ -2935,13 +2936,26 @@ int usb_add_hcd(struct usb_hcd *hcd, goto err_hcd_driver_start; } - /* starting here, usbcore will pay attention to this root hub */ - retval = register_root_hub(hcd); - if (retval != 0) - goto err_register_root_hub; + /* starting here, usbcore will pay attention to the shared HCD roothub */ + shared_hcd = hcd->shared_hcd; + if (!usb_hcd_is_primary_hcd(hcd) && shared_hcd && HCD_DEFER_RH_REGISTER(shared_hcd)) { + retval = register_root_hub(shared_hcd); + if (retval != 0) + goto err_register_root_hub; - if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) - usb_hcd_poll_rh_status(hcd); + if (shared_hcd->uses_new_polling && HCD_POLL_RH(shared_hcd)) + usb_hcd_poll_rh_status(shared_hcd); + } + + /* starting here, usbcore will pay attention to this root hub */ + if (!HCD_DEFER_RH_REGISTER(hcd)) { + retval = register_root_hub(hcd); + if (retval != 0) + goto err_register_root_hub; + + if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) + usb_hcd_poll_rh_status(hcd); + } return retval; @@ -2985,6 +2999,7 @@ EXPORT_SYMBOL_GPL(usb_add_hcd); void usb_remove_hcd(struct usb_hcd *hcd) { struct usb_device *rhdev = hcd->self.root_hub; + bool rh_registered; dev_info(hcd->self.controller, "remove, state %x\n", hcd->state); @@ -2995,6 +3010,7 @@ void usb_remove_hcd(struct usb_hcd *hcd) dev_dbg(hcd->self.controller, "roothub graceful disconnect\n"); spin_lock_irq (&hcd_root_hub_lock); + rh_registered = hcd->rh_registered; hcd->rh_registered = 0; spin_unlock_irq (&hcd_root_hub_lock); @@ -3004,7 +3020,8 @@ void usb_remove_hcd(struct usb_hcd *hcd) cancel_work_sync(&hcd->died_work); mutex_lock(&usb_bus_idr_lock); - usb_disconnect(&rhdev); /* Sets rhdev to NULL */ + if (rh_registered) + usb_disconnect(&rhdev); /* Sets rhdev to NULL */ mutex_unlock(&usb_bus_idr_lock); /* diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 548a028f2dab..2c1fc9212cf2 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -124,6 +124,7 @@ struct usb_hcd { #define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */ #define HCD_FLAG_DEAD 6 /* controller has died? */ #define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */ +#define HCD_FLAG_DEFER_RH_REGISTER 8 /* Defer roothub registration */ /* The flags can be tested using these macros; they are likely to * be slightly faster than test_bit(). @@ -134,6 +135,7 @@ struct usb_hcd { #define HCD_WAKEUP_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING)) #define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING)) #define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD)) +#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER)) /* * Specifies if interfaces are authorized by default From b7a0a792f864583207c593b50fd1b752ed89f4c1 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Thu, 9 Sep 2021 12:11:59 +0530 Subject: [PATCH 164/543] xhci: Set HCD flag to defer primary roothub registration Set "HCD_FLAG_DEFER_RH_REGISTER" to hcd->flags in xhci_run() to defer registering primary roothub in usb_add_hcd(). This will make sure both primary roothub and secondary roothub will be registered along with the second HCD. This is required for cold plugged USB devices to be detected in certain PCIe USB cards (like Inateck USB card connected to AM64 EVM or J7200 EVM). CC: stable@vger.kernel.org # 5.4+ Suggested-by: Mathias Nyman Tested-by: Chris Chiu Signed-off-by: Kishon Vijay Abraham I Link: https://lore.kernel.org/r/20210909064200.16216-3-kishon@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index f3dabd02382c..93c38b557afd 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -692,6 +692,7 @@ int xhci_run(struct usb_hcd *hcd) if (ret) xhci_free_command(xhci, command); } + set_bit(HCD_FLAG_DEFER_RH_REGISTER, &hcd->flags); xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Finished xhci_run for USB2 roothub"); From 5cf86349e98b14f505f83aae45a6df2bacc15a7a Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Thu, 9 Sep 2021 12:12:00 +0530 Subject: [PATCH 165/543] usb: core: hcd: Modularize HCD stop configuration in usb_stop_hcd() No functional change. Since configuration to stop HCD is invoked from multiple places, group all of them in usb_stop_hcd(). Tested-by: Chris Chiu Acked-by: Alan Stern Signed-off-by: Kishon Vijay Abraham I Link: https://lore.kernel.org/r/20210909064200.16216-4-kishon@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 99ff2d23be05..7ee6e4cc0d89 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2760,6 +2760,26 @@ static void usb_put_invalidate_rhdev(struct usb_hcd *hcd) usb_put_dev(rhdev); } +/** + * usb_stop_hcd - Halt the HCD + * @hcd: the usb_hcd that has to be halted + * + * Stop the root-hub polling timer and invoke the HCD's ->stop callback. + */ +static void usb_stop_hcd(struct usb_hcd *hcd) +{ + hcd->rh_pollable = 0; + clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); + del_timer_sync(&hcd->rh_timer); + + hcd->driver->stop(hcd); + hcd->state = HC_STATE_HALT; + + /* In case the HCD restarted the timer, stop it again. */ + clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); + del_timer_sync(&hcd->rh_timer); +} + /** * usb_add_hcd - finish generic HCD structure initialization and register * @hcd: the usb_hcd structure to initialize @@ -2960,13 +2980,7 @@ int usb_add_hcd(struct usb_hcd *hcd, return retval; err_register_root_hub: - hcd->rh_pollable = 0; - clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); - del_timer_sync(&hcd->rh_timer); - hcd->driver->stop(hcd); - hcd->state = HC_STATE_HALT; - clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); - del_timer_sync(&hcd->rh_timer); + usb_stop_hcd(hcd); err_hcd_driver_start: if (usb_hcd_is_primary_hcd(hcd) && hcd->irq > 0) free_irq(irqnum, hcd); @@ -3039,16 +3053,7 @@ void usb_remove_hcd(struct usb_hcd *hcd) * interrupt occurs), but usb_hcd_poll_rh_status() won't invoke * the hub_status_data() callback. */ - hcd->rh_pollable = 0; - clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); - del_timer_sync(&hcd->rh_timer); - - hcd->driver->stop(hcd); - hcd->state = HC_STATE_HALT; - - /* In case the HCD restarted the timer, stop it again. */ - clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); - del_timer_sync(&hcd->rh_timer); + usb_stop_hcd(hcd); if (usb_hcd_is_primary_hcd(hcd)) { if (hcd->irq > 0) From dbe2518b2d8eabffa74dbf7d9fdd7dacddab7fc0 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Sat, 11 Sep 2021 22:58:30 +0400 Subject: [PATCH 166/543] usb: dwc2: gadget: Fix ISOC transfer complete handling for DDMA When last descriptor in a descriptor list completed with XferComplete interrupt, core switching to handle next descriptor and assert BNA interrupt. Both these interrupts are set while dwc2_hsotg_epint() handler called. Each interrupt should be handled separately: first XferComplete interrupt then BNA interrupt, otherwise last completed transfer will not be giveback to function driver as completed request. Fixes: 729cac693eec ("usb: dwc2: Change ISOC DDMA flow") Cc: stable Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/a36981accc26cd674c5d8f8da6164344b94ec1fe.1631386531.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index f09cbdfac9df..11d85a6e0b0d 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3067,9 +3067,7 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx, /* In DDMA handle isochronous requests separately */ if (using_desc_dma(hsotg) && hs_ep->isochronous) { - /* XferCompl set along with BNA */ - if (!(ints & DXEPINT_BNAINTR)) - dwc2_gadget_complete_isoc_request_ddma(hs_ep); + dwc2_gadget_complete_isoc_request_ddma(hs_ep); } else if (dir_in) { /* * We get OutDone from the FIFO, so we only From 595091a1426a3b2625dad322f69fe569dc9d8943 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Thu, 9 Sep 2021 10:48:10 -0700 Subject: [PATCH 167/543] usb: gadget: f_uac2: Add missing companion descriptor for feedback EP The f_uac2 function fails to enumerate when connected in SuperSpeed due to the feedback endpoint missing the companion descriptor. Add a new ss_epin_fback_desc_comp descriptor and append it behind the ss_epin_fback_desc both in the static definition of the ss_audio_desc structure as well as its dynamic construction in setup_headers(). Fixes: 24f779dac8f3 ("usb: gadget: f_uac2/u_audio: add feedback endpoint support") Cc: stable Signed-off-by: Jack Pham Link: https://lore.kernel.org/r/20210909174811.12534-2-jackp@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 3c34995276e7..d89c1ebb07f4 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -406,6 +406,14 @@ static struct usb_endpoint_descriptor ss_epin_fback_desc = { .bInterval = 4, }; +static struct usb_ss_ep_comp_descriptor ss_epin_fback_desc_comp = { + .bLength = sizeof(ss_epin_fback_desc_comp), + .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, + .bMaxBurst = 0, + .bmAttributes = 0, + .wBytesPerInterval = cpu_to_le16(4), +}; + /* Audio Streaming IN Interface - Alt0 */ static struct usb_interface_descriptor std_as_in_if0_desc = { @@ -597,6 +605,7 @@ static struct usb_descriptor_header *ss_audio_desc[] = { (struct usb_descriptor_header *)&ss_epout_desc_comp, (struct usb_descriptor_header *)&as_iso_out_desc, (struct usb_descriptor_header *)&ss_epin_fback_desc, + (struct usb_descriptor_header *)&ss_epin_fback_desc_comp, (struct usb_descriptor_header *)&std_as_in_if0_desc, (struct usb_descriptor_header *)&std_as_in_if1_desc, @@ -705,6 +714,7 @@ static void setup_headers(struct f_uac2_opts *opts, { struct usb_ss_ep_comp_descriptor *epout_desc_comp = NULL; struct usb_ss_ep_comp_descriptor *epin_desc_comp = NULL; + struct usb_ss_ep_comp_descriptor *epin_fback_desc_comp = NULL; struct usb_endpoint_descriptor *epout_desc; struct usb_endpoint_descriptor *epin_desc; struct usb_endpoint_descriptor *epin_fback_desc; @@ -730,6 +740,7 @@ static void setup_headers(struct f_uac2_opts *opts, epout_desc_comp = &ss_epout_desc_comp; epin_desc_comp = &ss_epin_desc_comp; epin_fback_desc = &ss_epin_fback_desc; + epin_fback_desc_comp = &ss_epin_fback_desc_comp; ep_int_desc = &ss_ep_int_desc; } @@ -773,8 +784,11 @@ static void setup_headers(struct f_uac2_opts *opts, headers[i++] = USBDHDR(&as_iso_out_desc); - if (EPOUT_FBACK_IN_EN(opts)) + if (EPOUT_FBACK_IN_EN(opts)) { headers[i++] = USBDHDR(epin_fback_desc); + if (epin_fback_desc_comp) + headers[i++] = USBDHDR(epin_fback_desc_comp); + } } if (EPIN_EN(opts)) { From f0e8a206a2a53a919e1709c654cb65d519f7befb Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Thu, 9 Sep 2021 10:48:11 -0700 Subject: [PATCH 168/543] usb: gadget: f_uac2: Populate SS descriptors' wBytesPerInterval For Isochronous endpoints, the SS companion descriptor's wBytesPerInterval field is required to reserve bus time in order to transmit the required payload during the service interval. If left at 0, the UAC2 function is unable to transact data on its playback or capture endpoints in SuperSpeed mode. Since f_uac2 currently does not support any bursting this value can be exactly equal to the calculated wMaxPacketSize. Tested with Windows 10 as a host. Fixes: f8cb3d556be3 ("usb: f_uac2: adds support for SS and SSP") Cc: stable Signed-off-by: Jack Pham Link: https://lore.kernel.org/r/20210909174811.12534-3-jackp@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index d89c1ebb07f4..be864560bfea 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -1178,6 +1178,9 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) agdev->out_ep_maxpsize = max_t(u16, agdev->out_ep_maxpsize, le16_to_cpu(ss_epout_desc.wMaxPacketSize)); + ss_epin_desc_comp.wBytesPerInterval = ss_epin_desc.wMaxPacketSize; + ss_epout_desc_comp.wBytesPerInterval = ss_epout_desc.wMaxPacketSize; + // HS and SS endpoint addresses are copied from autoconfigured FS descriptors hs_ep_int_desc.bEndpointAddress = fs_ep_int_desc.bEndpointAddress; hs_epout_desc.bEndpointAddress = fs_epout_desc.bEndpointAddress; From 8d753db5c227d1f403c4bc9cae4ae02c862413cd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 8 Sep 2021 21:55:56 +0200 Subject: [PATCH 169/543] misc: genwqe: Fixes DMA mask setting Commit 505b08777d78 ("misc: genwqe: Use dma_set_mask_and_coherent to simplify code") changed the logic in the code. Instead of a ||, a && should have been used to keep the code the same. Fixes: 505b08777d78 ("misc: genwqe: Use dma_set_mask_and_coherent to simplify code") Cc: stable Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/be49835baa8ba6daba5813b399edf6300f7fdbda.1631130862.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/misc/genwqe/card_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c index 2e1befbd1ad9..693981891870 100644 --- a/drivers/misc/genwqe/card_base.c +++ b/drivers/misc/genwqe/card_base.c @@ -1090,7 +1090,7 @@ static int genwqe_pci_setup(struct genwqe_dev *cd) /* check for 64-bit DMA address supported (DAC) */ /* check for 32-bit DMA address supported (SAC) */ - if (dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64)) || + if (dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64)) && dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32))) { dev_err(&pci_dev->dev, "err: neither DMA32 nor DMA64 supported\n"); From 06e49073dfba24df4b1073a068631b13a0039c34 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 1 Sep 2021 17:38:06 -0700 Subject: [PATCH 170/543] tty: synclink_gt: rename a conflicting function name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'set_signals()' in synclink_gt.c conflicts with an exported symbol in arch/um/, so change set_signals() to set_gtsignals(). Keep the function names similar by also changing get_signals() to get_gtsignals(). ../drivers/tty/synclink_gt.c:442:13: error: conflicting types for ‘set_signals’ static void set_signals(struct slgt_info *info); ^~~~~~~~~~~ In file included from ../include/linux/irqflags.h:16:0, from ../include/linux/spinlock.h:58, from ../include/linux/mm_types.h:9, from ../include/linux/buildid.h:5, from ../include/linux/module.h:14, from ../drivers/tty/synclink_gt.c:46: ../arch/um/include/asm/irqflags.h:6:5: note: previous declaration of ‘set_signals’ was here int set_signals(int enable); ^~~~~~~~~~~ Fixes: 705b6c7b34f2 ("[PATCH] new driver synclink_gt") Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Paul Fulghum Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20210902003806.17054-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/synclink_gt.c | 44 +++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index a9acd93e85b7..25c558e65ece 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -438,8 +438,8 @@ static void reset_tbufs(struct slgt_info *info); static void tdma_reset(struct slgt_info *info); static bool tx_load(struct slgt_info *info, const char *buf, unsigned int count); -static void get_signals(struct slgt_info *info); -static void set_signals(struct slgt_info *info); +static void get_gtsignals(struct slgt_info *info); +static void set_gtsignals(struct slgt_info *info); static void set_rate(struct slgt_info *info, u32 data_rate); static void bh_transmit(struct slgt_info *info); @@ -720,7 +720,7 @@ static void set_termios(struct tty_struct *tty, struct ktermios *old_termios) if ((old_termios->c_cflag & CBAUD) && !C_BAUD(tty)) { info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR); spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); } @@ -730,7 +730,7 @@ static void set_termios(struct tty_struct *tty, struct ktermios *old_termios) if (!C_CRTSCTS(tty) || !tty_throttled(tty)) info->signals |= SerialSignal_RTS; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); } @@ -1181,7 +1181,7 @@ static inline void line_info(struct seq_file *m, struct slgt_info *info) /* output current serial signal states */ spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); stat_buf[0] = 0; @@ -1281,7 +1281,7 @@ static void throttle(struct tty_struct * tty) if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals &= ~SerialSignal_RTS; - set_signals(info); + set_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1306,7 +1306,7 @@ static void unthrottle(struct tty_struct * tty) if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals |= SerialSignal_RTS; - set_signals(info); + set_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1477,7 +1477,7 @@ static int hdlcdev_open(struct net_device *dev) /* inform generic HDLC layer of current DCD status */ spin_lock_irqsave(&info->lock, flags); - get_signals(info); + get_gtsignals(info); spin_unlock_irqrestore(&info->lock, flags); if (info->signals & SerialSignal_DCD) netif_carrier_on(dev); @@ -2229,7 +2229,7 @@ static void isr_txeom(struct slgt_info *info, unsigned short status) if (info->params.mode != MGSL_MODE_ASYNC && info->drop_rts_on_tx_done) { info->signals &= ~SerialSignal_RTS; info->drop_rts_on_tx_done = false; - set_signals(info); + set_gtsignals(info); } #if SYNCLINK_GENERIC_HDLC @@ -2394,7 +2394,7 @@ static void shutdown(struct slgt_info *info) if (!info->port.tty || info->port.tty->termios.c_cflag & HUPCL) { info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_gtsignals(info); } flush_cond_wait(&info->gpio_wait_q); @@ -2422,7 +2422,7 @@ static void program_hw(struct slgt_info *info) else async_mode(info); - set_signals(info); + set_gtsignals(info); info->dcd_chkcount = 0; info->cts_chkcount = 0; @@ -2430,7 +2430,7 @@ static void program_hw(struct slgt_info *info) info->dsr_chkcount = 0; slgt_irq_on(info, IRQ_DCD | IRQ_CTS | IRQ_DSR | IRQ_RI); - get_signals(info); + get_gtsignals(info); if (info->netcount || (info->port.tty && info->port.tty->termios.c_cflag & CREAD)) @@ -2667,7 +2667,7 @@ static int wait_mgsl_event(struct slgt_info *info, int __user *mask_ptr) spin_lock_irqsave(&info->lock,flags); /* return immediately if state matches requested events */ - get_signals(info); + get_gtsignals(info); s = info->signals; events = mask & @@ -3085,7 +3085,7 @@ static int tiocmget(struct tty_struct *tty) unsigned long flags; spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); result = ((info->signals & SerialSignal_RTS) ? TIOCM_RTS:0) + @@ -3124,7 +3124,7 @@ static int tiocmset(struct tty_struct *tty, info->signals &= ~SerialSignal_DTR; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); return 0; } @@ -3135,7 +3135,7 @@ static int carrier_raised(struct tty_port *port) struct slgt_info *info = container_of(port, struct slgt_info, port); spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); return (info->signals & SerialSignal_DCD) ? 1 : 0; } @@ -3150,7 +3150,7 @@ static void dtr_rts(struct tty_port *port, int on) info->signals |= SerialSignal_RTS | SerialSignal_DTR; else info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_gtsignals(info); spin_unlock_irqrestore(&info->lock,flags); } @@ -3948,10 +3948,10 @@ static void tx_start(struct slgt_info *info) if (info->params.mode != MGSL_MODE_ASYNC) { if (info->params.flags & HDLC_FLAG_AUTO_RTS) { - get_signals(info); + get_gtsignals(info); if (!(info->signals & SerialSignal_RTS)) { info->signals |= SerialSignal_RTS; - set_signals(info); + set_gtsignals(info); info->drop_rts_on_tx_done = true; } } @@ -4005,7 +4005,7 @@ static void reset_port(struct slgt_info *info) rx_stop(info); info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_gtsignals(info); slgt_irq_off(info, IRQ_ALL | IRQ_MASTER); } @@ -4427,7 +4427,7 @@ static void tx_set_idle(struct slgt_info *info) /* * get state of V24 status (input) signals */ -static void get_signals(struct slgt_info *info) +static void get_gtsignals(struct slgt_info *info) { unsigned short status = rd_reg16(info, SSR); @@ -4489,7 +4489,7 @@ static void msc_set_vcr(struct slgt_info *info) /* * set state of V24 control (output) signals */ -static void set_signals(struct slgt_info *info) +static void set_gtsignals(struct slgt_info *info) { unsigned char val = rd_reg8(info, VCR); if (info->signals & SerialSignal_DTR) From da546d6b748e570aa6e44acaa515cfc43baeaa0d Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Fri, 3 Sep 2021 00:03:25 +0200 Subject: [PATCH 171/543] arm64: dts: qcom: ipq8074: remove USB tx-fifo-resize property tx-fifo-resize is now added by default by the dwc3-qcom driver to the SNPS DWC3 child node. So, lets drop the tx-fifo-resize property from dwc3-qcom nodes as having it there will cause the dwc3-qcom driver to error and abort probe with: [ 1.362938] dwc3-qcom 8af8800.usb: unable to add property [ 1.368405] dwc3-qcom 8af8800.usb: failed to register DWC3 Core, err=-17 Fixes: cefdd52fa045 ("usb: dwc3: dwc3-qcom: Enable tx-fifo-resize property by default") Signed-off-by: Robert Marko Link: https://lore.kernel.org/r/20210902220325.1783567-1-robimarko@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/ipq8074.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index a620ac0d0b19..db333001df4d 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -487,7 +487,6 @@ interrupts = ; phys = <&qusb_phy_0>, <&usb0_ssphy>; phy-names = "usb2-phy", "usb3-phy"; - tx-fifo-resize; snps,is-utmi-l1-suspend; snps,hird-threshold = /bits/ 8 <0x0>; snps,dis_u2_susphy_quirk; @@ -528,7 +527,6 @@ interrupts = ; phys = <&qusb_phy_1>, <&usb1_ssphy>; phy-names = "usb2-phy", "usb3-phy"; - tx-fifo-resize; snps,is-utmi-l1-suspend; snps,hird-threshold = /bits/ 8 <0x0>; snps,dis_u2_susphy_quirk; From 7049d853cfb928f50b6041cb4a5c6d6c1d8dd201 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 14 Sep 2021 11:11:19 +0200 Subject: [PATCH 172/543] tty: unexport tty_ldisc_release Initially, tty_ldisc_release() was exported for speakup (spk_tty) while in staging. Later, the call to this function was removed as it was bogus anyway. Remove the export now. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20210914091134.17426-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ldisc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 756a4bfa6a69..3e4e0b20b4bb 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -812,7 +812,6 @@ void tty_ldisc_release(struct tty_struct *tty) tty_ldisc_debug(tty, "released\n"); } -EXPORT_SYMBOL_GPL(tty_ldisc_release); /** * tty_ldisc_init - ldisc setup for new tty From 25a1433216489de4abc889910f744e952cb6dbae Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 6 Sep 2021 21:35:48 +0900 Subject: [PATCH 173/543] mcb: fix error handling in mcb_alloc_bus() There are two bugs: 1) If ida_simple_get() fails then this code calls put_device(carrier) but we haven't yet called get_device(carrier) and probably that leads to a use after free. 2) After device_initialize() then we need to use put_device() to release the bus. This will free the internal resources tied to the device and call mcb_free_bus() which will free the rest. Fixes: 5d9e2ab9fea4 ("mcb: Implement bus->dev.release callback") Fixes: 18d288198099 ("mcb: Correctly initialize the bus's device") Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Signed-off-by: Johannes Thumshirn Link: https://lore.kernel.org/r/32e160cf6864ce77f9d62948338e24db9fd8ead9.1630931319.git.johannes.thumshirn@wdc.com Signed-off-by: Greg Kroah-Hartman --- drivers/mcb/mcb-core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c index edf4ee6eff25..cf128b3471d7 100644 --- a/drivers/mcb/mcb-core.c +++ b/drivers/mcb/mcb-core.c @@ -275,8 +275,8 @@ struct mcb_bus *mcb_alloc_bus(struct device *carrier) bus_nr = ida_simple_get(&mcb_ida, 0, 0, GFP_KERNEL); if (bus_nr < 0) { - rc = bus_nr; - goto err_free; + kfree(bus); + return ERR_PTR(bus_nr); } bus->bus_nr = bus_nr; @@ -291,12 +291,12 @@ struct mcb_bus *mcb_alloc_bus(struct device *carrier) dev_set_name(&bus->dev, "mcb:%d", bus_nr); rc = device_add(&bus->dev); if (rc) - goto err_free; + goto err_put; return bus; -err_free: - put_device(carrier); - kfree(bus); + +err_put: + put_device(&bus->dev); return ERR_PTR(rc); } EXPORT_SYMBOL_NS_GPL(mcb_alloc_bus, MCB); From 3c3c8e88c8712bfe06cd10d7ca77a94a33610cd6 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 13 Sep 2021 21:01:14 -0500 Subject: [PATCH 174/543] platform/x86: amd-pmc: Increase the response register timeout There have been reports of approximately a 0.9%-1.7% failure rate in SMU communication timeouts with s0i3 entry on some OEM designs. Currently the design in amd-pmc is to try every 100us for up to 20ms. However the GPU driver which also communicates with the SMU using a mailbox register which the driver polls every 1us for up to 2000ms. In the GPU driver this was increased by commit 055162645a40 ("drm/amd/pm: increase time out value when sending msg to SMU") Increase the maximum timeout used by amd-pmc to 2000ms to match this behavior. This has been shown to improve the stability for machines that randomly have failures. Cc: stable@kernel.org Reported-by: Julian Sikorski BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1629 Signed-off-by: Mario Limonciello Acked-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20210914020115.655-1-mario.limonciello@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 3481479a2942..d6a7c896ac86 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -71,7 +71,7 @@ #define AMD_CPU_ID_YC 0x14B5 #define PMC_MSG_DELAY_MIN_US 100 -#define RESPONSE_REGISTER_LOOP_MAX 200 +#define RESPONSE_REGISTER_LOOP_MAX 20000 #define SOC_SUBSYSTEM_IP_MAX 12 #define DELAY_MIN_US 2000 From 550ac9c1aaaaf51fd42e20d461f0b1cdbd55b3d2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Sep 2021 11:08:36 -0700 Subject: [PATCH 175/543] net-caif: avoid user-triggerable WARN_ON(1) syszbot triggers this warning, which looks something we can easily prevent. If we initialize priv->list_field in chnl_net_init(), then always use list_del_init(), we can remove robust_list_del() completely. WARNING: CPU: 0 PID: 3233 at net/caif/chnl_net.c:67 robust_list_del net/caif/chnl_net.c:67 [inline] WARNING: CPU: 0 PID: 3233 at net/caif/chnl_net.c:67 chnl_net_uninit+0xc9/0x2e0 net/caif/chnl_net.c:375 Modules linked in: CPU: 0 PID: 3233 Comm: syz-executor.3 Not tainted 5.14.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:robust_list_del net/caif/chnl_net.c:67 [inline] RIP: 0010:chnl_net_uninit+0xc9/0x2e0 net/caif/chnl_net.c:375 Code: 89 eb e8 3a a3 ba f8 48 89 d8 48 c1 e8 03 42 80 3c 28 00 0f 85 bf 01 00 00 48 81 fb 00 14 4e 8d 48 8b 2b 75 d0 e8 17 a3 ba f8 <0f> 0b 5b 5d 41 5c 41 5d e9 0a a3 ba f8 4c 89 e3 e8 02 a3 ba f8 4c RSP: 0018:ffffc90009067248 EFLAGS: 00010202 RAX: 0000000000008780 RBX: ffffffff8d4e1400 RCX: ffffc9000fd34000 RDX: 0000000000040000 RSI: ffffffff88bb6e49 RDI: 0000000000000003 RBP: ffff88802cd9ee08 R08: 0000000000000000 R09: ffffffff8d0e6647 R10: ffffffff88bb6dc2 R11: 0000000000000000 R12: ffff88803791ae08 R13: dffffc0000000000 R14: 00000000e600ffce R15: ffff888073ed3480 FS: 00007fed10fa0700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2c322000 CR3: 00000000164a6000 CR4: 00000000001506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: register_netdevice+0xadf/0x1500 net/core/dev.c:10347 ipcaif_newlink+0x4c/0x260 net/caif/chnl_net.c:468 __rtnl_newlink+0x106d/0x1750 net/core/rtnetlink.c:3458 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3506 rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5572 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1340 netlink_sendmsg+0x86d/0xdb0 net/netlink/af_netlink.c:1929 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 __sys_sendto+0x21c/0x320 net/socket.c:2036 __do_sys_sendto net/socket.c:2048 [inline] __se_sys_sendto net/socket.c:2044 [inline] __x64_sys_sendto+0xdd/0x1b0 net/socket.c:2044 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: cc36a070b590 ("net-caif: add CAIF netdevice") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/caif/chnl_net.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 37b67194c0df..414dc5671c45 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -53,20 +53,6 @@ struct chnl_net { enum caif_states state; }; -static void robust_list_del(struct list_head *delete_node) -{ - struct list_head *list_node; - struct list_head *n; - ASSERT_RTNL(); - list_for_each_safe(list_node, n, &chnl_net_list) { - if (list_node == delete_node) { - list_del(list_node); - return; - } - } - WARN_ON(1); -} - static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) { struct sk_buff *skb; @@ -364,6 +350,7 @@ static int chnl_net_init(struct net_device *dev) ASSERT_RTNL(); priv = netdev_priv(dev); strncpy(priv->name, dev->name, sizeof(priv->name)); + INIT_LIST_HEAD(&priv->list_field); return 0; } @@ -372,7 +359,7 @@ static void chnl_net_uninit(struct net_device *dev) struct chnl_net *priv; ASSERT_RTNL(); priv = netdev_priv(dev); - robust_list_del(&priv->list_field); + list_del_init(&priv->list_field); } static const struct net_device_ops netdev_ops = { @@ -537,7 +524,7 @@ static void __exit chnl_exit_module(void) rtnl_lock(); list_for_each_safe(list_node, _tmp, &chnl_net_list) { dev = list_entry(list_node, struct chnl_net, list_field); - list_del(list_node); + list_del_init(list_node); delete_device(dev); } rtnl_unlock(); From d53c66594dc7606b191bb2976901a691d291a316 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Mon, 30 Aug 2021 15:02:09 +0300 Subject: [PATCH 176/543] habanalabs: fix potential race in interrupt wait ioctl We have a potential race where a user interrupt can be received in between user thread value comparison and before request was added to wait list. This means that if no consecutive interrupt will be received, user thread will timeout and fail. The solution is to add the request to wait list before we perform the comparison. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/common/command_submission.c | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 7b0516cf808b..9a8b9191c28c 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -2740,10 +2740,20 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, else interrupt = &hdev->user_interrupt[interrupt_offset]; + /* Add pending user interrupt to relevant list for the interrupt + * handler to monitor + */ + spin_lock_irqsave(&interrupt->wait_list_lock, flags); + list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + + /* We check for completion value as interrupt could have been received + * before we added the node to the wait list + */ if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) { dev_err(hdev->dev, "Failed to copy completion value from user\n"); rc = -EFAULT; - goto free_fence; + goto remove_pending_user_interrupt; } if (completion_value >= target_value) @@ -2752,14 +2762,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, *status = CS_WAIT_STATUS_BUSY; if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED)) - goto free_fence; - - /* Add pending user interrupt to relevant list for the interrupt - * handler to monitor - */ - spin_lock_irqsave(&interrupt->wait_list_lock, flags); - list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + goto remove_pending_user_interrupt; wait_again: /* Wait for interrupt handler to signal completion */ @@ -2770,6 +2773,15 @@ wait_again: * If comparison fails, keep waiting until timeout expires */ if (completion_rc > 0) { + spin_lock_irqsave(&interrupt->wait_list_lock, flags); + /* reinit_completion must be called before we check for user + * completion value, otherwise, if interrupt is received after + * the comparison and before the next wait_for_completion, + * we will reach timeout and fail + */ + reinit_completion(&pend->fence.completion); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) { dev_err(hdev->dev, "Failed to copy completion value from user\n"); rc = -EFAULT; @@ -2780,11 +2792,7 @@ wait_again: if (completion_value >= target_value) { *status = CS_WAIT_STATUS_COMPLETED; } else { - spin_lock_irqsave(&interrupt->wait_list_lock, flags); - reinit_completion(&pend->fence.completion); timeout = completion_rc; - - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); goto wait_again; } } else if (completion_rc == -ERESTARTSYS) { @@ -2802,7 +2810,6 @@ remove_pending_user_interrupt: list_del(&pend->wait_list_node); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); -free_fence: kfree(pend); hl_ctx_put(ctx); From beb71ee36e4de93c2b21d916fb94558333d99974 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Wed, 1 Sep 2021 15:48:04 +0300 Subject: [PATCH 177/543] habanalabs: fix kernel OOPs related to staged cs In case of single staged cs with both first/last indications set, we reach a scenario where in cs_release function flow we don't cancel the TDR work before freeing the cs memory, this lead to kernel OOPs since when the timer expires the work pointer will be freed already. In addition treat wait encaps cs "not found" handle as "OK" for the user in order to keep the user interface for both legacy and encpas signal/wait features the same. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/common/command_submission.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 9a8b9191c28c..deb080830ecb 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -405,7 +405,7 @@ static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs) static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) { bool next_entry_found = false; - struct hl_cs *next; + struct hl_cs *next, *first_cs; if (!cs_needs_timeout(cs)) return; @@ -415,9 +415,16 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) /* We need to handle tdr only once for the complete staged submission. * Hence, we choose the CS that reaches this function first which is * the CS marked as 'staged_last'. + * In case single staged cs was submitted which has both first and last + * indications, then "cs_find_first" below will return NULL, since we + * removed the cs node from the list before getting here, + * in such cases just continue with the cs to cancel it's TDR work. */ - if (cs->staged_cs && cs->staged_last) - cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); + if (cs->staged_cs && cs->staged_last) { + first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); + if (first_cs) + cs = first_cs; + } spin_unlock(&hdev->cs_mirror_lock); @@ -2026,9 +2033,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, spin_unlock(&ctx->sig_mgr.lock); if (!handle_found) { - dev_err(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n", + /* treat as signal CS already finished */ + dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n", signal_seq); - rc = -EINVAL; + rc = 0; goto free_cs_chunk_array; } From 3e08f157c2587fc7ada93abed41aae19bcbf8a6b Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Wed, 30 Dec 2020 08:05:18 +0200 Subject: [PATCH 178/543] habanalabs/gaudi: use direct MSI in single mode Due to FLR scenario when running inside a VM, we must not use indirect MSI because it might cause some issues on VM destroy. In a VM we use single MSI mode in contrary to multi MSI mode which is used in bare-metal. Hence direct MSI should be used in single MSI mode only. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 9 ++++++--- .../misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 383865be3c2c..5249f8fd4d59 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -5802,6 +5802,7 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, { struct gaudi_device *gaudi = hdev->asic_specific; struct packet_msg_prot *cq_pkt; + u64 msi_addr; u32 tmp; cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); @@ -5823,10 +5824,12 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, cq_pkt->ctl = cpu_to_le32(tmp); cq_pkt->value = cpu_to_le32(1); - if (!gaudi->multi_msi_mode) - msi_vec = 0; + if (gaudi->multi_msi_mode) + msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4; + else + msi_addr = mmPCIE_CORE_MSI_REQ; - cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4); + cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); } static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) diff --git a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h index ffdfbd9b3220..1a6576666794 100644 --- a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h +++ b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h @@ -308,6 +308,8 @@ #define mmPCIE_AUX_FLR_CTRL 0xC07394 #define mmPCIE_AUX_DBI 0xC07490 +#define mmPCIE_CORE_MSI_REQ 0xC04100 + #define mmPSOC_PCI_PLL_NR 0xC72100 #define mmSRAM_W_PLL_NR 0x4C8100 #define mmPSOC_HBM_PLL_NR 0xC74100 From d09ff62c820b5950ab9958e77620a8498efe9386 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Thu, 2 Sep 2021 09:47:53 +0300 Subject: [PATCH 179/543] habanalabs: fail collective wait when not supported As collective wait operation is required only when NIC ports are available, we disable the option to submit a CS in case all the ports are disabled, which is the current situation in the upstream driver. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index deb080830ecb..5b7de857fbc1 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -1995,6 +1995,15 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, goto free_cs_chunk_array; } + if (!hdev->nic_ports_mask) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); + dev_err(hdev->dev, + "Collective operations not supported when NIC ports are disabled"); + rc = -EINVAL; + goto free_cs_chunk_array; + } + collective_engine_id = chunk->collective_engine_id; } From fcffb759f7d53b8e6c6e91804eec994205099dd3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 3 Sep 2021 09:02:03 +0100 Subject: [PATCH 180/543] habanalabs: Fix spelling mistake "FEADBACK" -> "FEEDBACK" There is a spelling mistake in a literal string. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 5249f8fd4d59..14da87b38e83 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -395,7 +395,7 @@ static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = { static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = { { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" }, - { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" }, + { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" }, { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" }, { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" }, { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" }, From 0a5ff77bf0a94468d541735f919a633f167787e9 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 12 Sep 2021 10:25:49 +0300 Subject: [PATCH 181/543] habanalabs/gaudi: fix LBW RR configuration Couple of fixes to the LBW RR configuration: 1. Add missing configuration of the SM RR registers in the DMA_IF. 2. Remove HBW range that doesn't belong. 3. Add entire gap + DBG area, from end of TPC7 to end of entire DBG space. Signed-off-by: Oded Gabbay --- .../misc/habanalabs/gaudi/gaudi_security.c | 115 ++++++++++-------- 1 file changed, 67 insertions(+), 48 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi_security.c b/drivers/misc/habanalabs/gaudi/gaudi_security.c index cb265c00cf73..25ac87cebd45 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_security.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_security.c @@ -8,16 +8,21 @@ #include "gaudiP.h" #include "../include/gaudi/asic_reg/gaudi_regs.h" -#define GAUDI_NUMBER_OF_RR_REGS 24 -#define GAUDI_NUMBER_OF_LBW_RANGES 12 +#define GAUDI_NUMBER_OF_LBW_RR_REGS 28 +#define GAUDI_NUMBER_OF_HBW_RR_REGS 24 +#define GAUDI_NUMBER_OF_LBW_RANGES 10 -static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = { + mmDMA_IF_W_S_SOB_HIT_WPROT, mmDMA_IF_W_S_DMA0_HIT_WPROT, mmDMA_IF_W_S_DMA1_HIT_WPROT, + mmDMA_IF_E_S_SOB_HIT_WPROT, mmDMA_IF_E_S_DMA0_HIT_WPROT, mmDMA_IF_E_S_DMA1_HIT_WPROT, + mmDMA_IF_W_N_SOB_HIT_WPROT, mmDMA_IF_W_N_DMA0_HIT_WPROT, mmDMA_IF_W_N_DMA1_HIT_WPROT, + mmDMA_IF_E_N_SOB_HIT_WPROT, mmDMA_IF_E_N_DMA0_HIT_WPROT, mmDMA_IF_E_N_DMA1_HIT_WPROT, mmSIF_RTR_0_LBW_RANGE_PROT_HIT_AW, @@ -38,13 +43,17 @@ static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_7_LBW_RANGE_PROT_HIT_AW, }; -static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = { + mmDMA_IF_W_S_SOB_HIT_RPROT, mmDMA_IF_W_S_DMA0_HIT_RPROT, mmDMA_IF_W_S_DMA1_HIT_RPROT, + mmDMA_IF_E_S_SOB_HIT_RPROT, mmDMA_IF_E_S_DMA0_HIT_RPROT, mmDMA_IF_E_S_DMA1_HIT_RPROT, + mmDMA_IF_W_N_SOB_HIT_RPROT, mmDMA_IF_W_N_DMA0_HIT_RPROT, mmDMA_IF_W_N_DMA1_HIT_RPROT, + mmDMA_IF_E_N_SOB_HIT_RPROT, mmDMA_IF_E_N_DMA0_HIT_RPROT, mmDMA_IF_E_N_DMA1_HIT_RPROT, mmSIF_RTR_0_LBW_RANGE_PROT_HIT_AR, @@ -65,13 +74,17 @@ static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_7_LBW_RANGE_PROT_HIT_AR, }; -static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = { + mmDMA_IF_W_S_SOB_MIN_WPROT_0, mmDMA_IF_W_S_DMA0_MIN_WPROT_0, mmDMA_IF_W_S_DMA1_MIN_WPROT_0, + mmDMA_IF_E_S_SOB_MIN_WPROT_0, mmDMA_IF_E_S_DMA0_MIN_WPROT_0, mmDMA_IF_E_S_DMA1_MIN_WPROT_0, + mmDMA_IF_W_N_SOB_MIN_WPROT_0, mmDMA_IF_W_N_DMA0_MIN_WPROT_0, mmDMA_IF_W_N_DMA1_MIN_WPROT_0, + mmDMA_IF_E_N_SOB_MIN_WPROT_0, mmDMA_IF_E_N_DMA0_MIN_WPROT_0, mmDMA_IF_E_N_DMA1_MIN_WPROT_0, mmSIF_RTR_0_LBW_RANGE_PROT_MIN_AW_0, @@ -92,13 +105,17 @@ static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_7_LBW_RANGE_PROT_MIN_AW_0, }; -static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = { + mmDMA_IF_W_S_SOB_MAX_WPROT_0, mmDMA_IF_W_S_DMA0_MAX_WPROT_0, mmDMA_IF_W_S_DMA1_MAX_WPROT_0, + mmDMA_IF_E_S_SOB_MAX_WPROT_0, mmDMA_IF_E_S_DMA0_MAX_WPROT_0, mmDMA_IF_E_S_DMA1_MAX_WPROT_0, + mmDMA_IF_W_N_SOB_MAX_WPROT_0, mmDMA_IF_W_N_DMA0_MAX_WPROT_0, mmDMA_IF_W_N_DMA1_MAX_WPROT_0, + mmDMA_IF_E_N_SOB_MAX_WPROT_0, mmDMA_IF_E_N_DMA0_MAX_WPROT_0, mmDMA_IF_E_N_DMA1_MAX_WPROT_0, mmSIF_RTR_0_LBW_RANGE_PROT_MAX_AW_0, @@ -119,13 +136,17 @@ static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_7_LBW_RANGE_PROT_MAX_AW_0, }; -static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = { + mmDMA_IF_W_S_SOB_MIN_RPROT_0, mmDMA_IF_W_S_DMA0_MIN_RPROT_0, mmDMA_IF_W_S_DMA1_MIN_RPROT_0, + mmDMA_IF_E_S_SOB_MIN_RPROT_0, mmDMA_IF_E_S_DMA0_MIN_RPROT_0, mmDMA_IF_E_S_DMA1_MIN_RPROT_0, + mmDMA_IF_W_N_SOB_MIN_RPROT_0, mmDMA_IF_W_N_DMA0_MIN_RPROT_0, mmDMA_IF_W_N_DMA1_MIN_RPROT_0, + mmDMA_IF_E_N_SOB_MIN_RPROT_0, mmDMA_IF_E_N_DMA0_MIN_RPROT_0, mmDMA_IF_E_N_DMA1_MIN_RPROT_0, mmSIF_RTR_0_LBW_RANGE_PROT_MIN_AR_0, @@ -146,13 +167,17 @@ static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_7_LBW_RANGE_PROT_MIN_AR_0, }; -static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = { + mmDMA_IF_W_S_SOB_MAX_RPROT_0, mmDMA_IF_W_S_DMA0_MAX_RPROT_0, mmDMA_IF_W_S_DMA1_MAX_RPROT_0, + mmDMA_IF_E_S_SOB_MAX_RPROT_0, mmDMA_IF_E_S_DMA0_MAX_RPROT_0, mmDMA_IF_E_S_DMA1_MAX_RPROT_0, + mmDMA_IF_W_N_SOB_MAX_RPROT_0, mmDMA_IF_W_N_DMA0_MAX_RPROT_0, mmDMA_IF_W_N_DMA1_MAX_RPROT_0, + mmDMA_IF_E_N_SOB_MAX_RPROT_0, mmDMA_IF_E_N_DMA0_MAX_RPROT_0, mmDMA_IF_E_N_DMA1_MAX_RPROT_0, mmSIF_RTR_0_LBW_RANGE_PROT_MAX_AR_0, @@ -173,7 +198,7 @@ static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_7_LBW_RANGE_PROT_MAX_AR_0, }; -static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_HIT_AW, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_HIT_AW, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_HIT_AW, @@ -200,7 +225,7 @@ static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_HIT_AW }; -static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_HIT_AR, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_HIT_AR, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_HIT_AR, @@ -227,7 +252,7 @@ static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_HIT_AR }; -static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AW_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_LOW_AW_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AW_0, @@ -254,7 +279,7 @@ static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_LOW_AW_0 }; -static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AW_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_HIGH_AW_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AW_0, @@ -281,7 +306,7 @@ static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_HIGH_AW_0 }; -static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AW_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_LOW_AW_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AW_0, @@ -308,7 +333,7 @@ static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_LOW_AW_0 }; -static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AW_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_HIGH_AW_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AW_0, @@ -335,7 +360,7 @@ static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_HIGH_AW_0 }; -static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AR_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_LOW_AR_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AR_0, @@ -362,7 +387,7 @@ static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_LOW_AR_0 }; -static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AR_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_HIGH_AR_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AR_0, @@ -389,7 +414,7 @@ static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_HIGH_AR_0 }; -static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AR_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_LOW_AR_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AR_0, @@ -416,7 +441,7 @@ static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_LOW_AR_0 }; -static u64 gaudi_rr_hbw_mask_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_mask_high_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AR_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_HIGH_AR_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AR_0, @@ -12849,50 +12874,44 @@ static void gaudi_init_range_registers_lbw(struct hl_device *hdev) u32 lbw_rng_end[GAUDI_NUMBER_OF_LBW_RANGES]; int i, j; - lbw_rng_start[0] = (0xFBFE0000 & 0x3FFFFFF) - 1; - lbw_rng_end[0] = (0xFBFFF000 & 0x3FFFFFF) + 1; + lbw_rng_start[0] = (0xFC0E8000 & 0x3FFFFFF) - 1; /* 0x000E7FFF */ + lbw_rng_end[0] = (0xFC11FFFF & 0x3FFFFFF) + 1; /* 0x00120000 */ - lbw_rng_start[1] = (0xFC0E8000 & 0x3FFFFFF) - 1; - lbw_rng_end[1] = (0xFC120000 & 0x3FFFFFF) + 1; + lbw_rng_start[1] = (0xFC1E8000 & 0x3FFFFFF) - 1; /* 0x001E7FFF */ + lbw_rng_end[1] = (0xFC48FFFF & 0x3FFFFFF) + 1; /* 0x00490000 */ - lbw_rng_start[2] = (0xFC1E8000 & 0x3FFFFFF) - 1; - lbw_rng_end[2] = (0xFC48FFFF & 0x3FFFFFF) + 1; + lbw_rng_start[2] = (0xFC600000 & 0x3FFFFFF) - 1; /* 0x005FFFFF */ + lbw_rng_end[2] = (0xFCC48FFF & 0x3FFFFFF) + 1; /* 0x00C49000 */ - lbw_rng_start[3] = (0xFC600000 & 0x3FFFFFF) - 1; - lbw_rng_end[3] = (0xFCC48FFF & 0x3FFFFFF) + 1; + lbw_rng_start[3] = (0xFCC4A000 & 0x3FFFFFF) - 1; /* 0x00C49FFF */ + lbw_rng_end[3] = (0xFCCDFFFF & 0x3FFFFFF) + 1; /* 0x00CE0000 */ - lbw_rng_start[4] = (0xFCC4A000 & 0x3FFFFFF) - 1; - lbw_rng_end[4] = (0xFCCDFFFF & 0x3FFFFFF) + 1; + lbw_rng_start[4] = (0xFCCE4000 & 0x3FFFFFF) - 1; /* 0x00CE3FFF */ + lbw_rng_end[4] = (0xFCD1FFFF & 0x3FFFFFF) + 1; /* 0x00D20000 */ - lbw_rng_start[5] = (0xFCCE4000 & 0x3FFFFFF) - 1; - lbw_rng_end[5] = (0xFCD1FFFF & 0x3FFFFFF) + 1; + lbw_rng_start[5] = (0xFCD24000 & 0x3FFFFFF) - 1; /* 0x00D23FFF */ + lbw_rng_end[5] = (0xFCD5FFFF & 0x3FFFFFF) + 1; /* 0x00D60000 */ - lbw_rng_start[6] = (0xFCD24000 & 0x3FFFFFF) - 1; - lbw_rng_end[6] = (0xFCD5FFFF & 0x3FFFFFF) + 1; + lbw_rng_start[6] = (0xFCD64000 & 0x3FFFFFF) - 1; /* 0x00D63FFF */ + lbw_rng_end[6] = (0xFCD9FFFF & 0x3FFFFFF) + 1; /* 0x00DA0000 */ - lbw_rng_start[7] = (0xFCD64000 & 0x3FFFFFF) - 1; - lbw_rng_end[7] = (0xFCD9FFFF & 0x3FFFFFF) + 1; + lbw_rng_start[7] = (0xFCDA4000 & 0x3FFFFFF) - 1; /* 0x00DA3FFF */ + lbw_rng_end[7] = (0xFCDDFFFF & 0x3FFFFFF) + 1; /* 0x00DE0000 */ - lbw_rng_start[8] = (0xFCDA4000 & 0x3FFFFFF) - 1; - lbw_rng_end[8] = (0xFCDDFFFF & 0x3FFFFFF) + 1; + lbw_rng_start[8] = (0xFCDE4000 & 0x3FFFFFF) - 1; /* 0x00DE3FFF */ + lbw_rng_end[8] = (0xFCE05FFF & 0x3FFFFFF) + 1; /* 0x00E06000 */ - lbw_rng_start[9] = (0xFCDE4000 & 0x3FFFFFF) - 1; - lbw_rng_end[9] = (0xFCE05FFF & 0x3FFFFFF) + 1; + lbw_rng_start[9] = (0xFCFC9000 & 0x3FFFFFF) - 1; /* 0x00FC8FFF */ + lbw_rng_end[9] = (0xFFFFFFFE & 0x3FFFFFF) + 1; /* 0x03FFFFFF */ - lbw_rng_start[10] = (0xFEC43000 & 0x3FFFFFF) - 1; - lbw_rng_end[10] = (0xFEC43FFF & 0x3FFFFFF) + 1; - - lbw_rng_start[11] = (0xFE484000 & 0x3FFFFFF) - 1; - lbw_rng_end[11] = (0xFE484FFF & 0x3FFFFFF) + 1; - - for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) { + for (i = 0 ; i < GAUDI_NUMBER_OF_LBW_RR_REGS ; i++) { WREG32(gaudi_rr_lbw_hit_aw_regs[i], (1 << GAUDI_NUMBER_OF_LBW_RANGES) - 1); WREG32(gaudi_rr_lbw_hit_ar_regs[i], (1 << GAUDI_NUMBER_OF_LBW_RANGES) - 1); } - for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) + for (i = 0 ; i < GAUDI_NUMBER_OF_LBW_RR_REGS ; i++) for (j = 0 ; j < GAUDI_NUMBER_OF_LBW_RANGES ; j++) { WREG32(gaudi_rr_lbw_min_aw_regs[i] + (j << 2), lbw_rng_start[j]); @@ -12939,12 +12958,12 @@ static void gaudi_init_range_registers_hbw(struct hl_device *hdev) * 6th range is the host */ - for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) { + for (i = 0 ; i < GAUDI_NUMBER_OF_HBW_RR_REGS ; i++) { WREG32(gaudi_rr_hbw_hit_aw_regs[i], 0x1F); WREG32(gaudi_rr_hbw_hit_ar_regs[i], 0x1D); } - for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) { + for (i = 0 ; i < GAUDI_NUMBER_OF_HBW_RR_REGS ; i++) { WREG32(gaudi_rr_hbw_base_low_aw_regs[i], dram_addr_lo); WREG32(gaudi_rr_hbw_base_low_ar_regs[i], dram_addr_lo); From 3d3200ae167ba048a29e0c815987a3fdc90fc8d2 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Thu, 9 Sep 2021 09:56:37 +0300 Subject: [PATCH 182/543] habanalabs: rate limit multi CS completion errors As user can send wrong arguments to multi CS API, we rate limit the amount of errors dumped to dmesg, in addition we change the severity to warning. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 5b7de857fbc1..a4ed91ed991d 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -2630,7 +2630,8 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) * completed after the poll function. */ if (!mcs_data.completion_bitmap) { - dev_err(hdev->dev, "Multi-CS got completion on wait but no CS completed\n"); + dev_warn_ratelimited(hdev->dev, + "Multi-CS got completion on wait but no CS completed\n"); rc = -EFAULT; } } From 42254c2a4991b98ca3f86040a1a7b7b32a0c8c4a Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Sun, 12 Sep 2021 14:30:35 +0300 Subject: [PATCH 183/543] habanalabs: fix wait offset handling Add handling for case where the user doesn't set wait offset, and keeps it as 0. In such a case the driver will decrement one from this zero value which will cause the code to wait for wrong number of signals. The solution is to treat this case as in legacy wait cs, and wait for the next signal. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/hw_queue.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 76b7de8f1406..0743319b10c7 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -437,6 +437,7 @@ void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev, struct hl_cs_compl *cs_cmpl) { struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; + u32 offset = 0; cs_cmpl->hw_sob = handle->hw_sob; @@ -446,9 +447,13 @@ void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev, * set offset 1 for example he mean to wait only for the first * signal only, which will be pre_sob_val, and if he set offset 2 * then the value required is (pre_sob_val + 1) and so on... + * if user set wait offset to 0, then treat it as legacy wait cs, + * wait for the next signal. */ - cs_cmpl->sob_val = handle->pre_sob_val + - (job->encaps_sig_wait_offset - 1); + if (job->encaps_sig_wait_offset) + offset = job->encaps_sig_wait_offset - 1; + + cs_cmpl->sob_val = handle->pre_sob_val + offset; } static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, From c8fee41957f036cbc8e840bd91e2087731df7f7e Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Sun, 12 Sep 2021 15:49:00 +0300 Subject: [PATCH 184/543] habanalabs: expose a single cs seq in staged submissions Staged submission consists of multiple command submissions. In order to be explicit, driver should return a single cs sequence for every cs in the submission, or else user may try to wait on an internal CS rather than waiting for the whole submission. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index a4ed91ed991d..91b57544f7c6 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -1295,6 +1295,12 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, if (rc) goto free_cs_object; + /* If this is a staged submission we must return the staged sequence + * rather than the internal CS sequence + */ + if (cs->staged_cs) + *cs_seq = cs->staged_sequence; + /* Validate ALL the CS chunks before submitting the CS */ for (i = 0 ; i < num_chunks ; i++) { struct hl_cs_chunk *chunk = &cs_chunk_array[i]; From 59583f747664046aaae5588d56d5954fab66cce8 Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Wed, 8 Sep 2021 09:48:22 +0200 Subject: [PATCH 185/543] sparc32: page align size in arch_dma_alloc Commit 53b7670e5735 ("sparc: factor the dma coherent mapping into helper") lost the page align for the calls to dma_make_coherent and srmmu_unmapiorange. The latter cannot handle a non page aligned len argument. Signed-off-by: Andreas Larsson Reviewed-by: Sam Ravnborg Signed-off-by: Christoph Hellwig --- arch/sparc/kernel/ioport.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 8e1d72a16759..7ceae24b0ca9 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -356,7 +356,9 @@ err_nomem: void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - if (!sparc_dma_free_resource(cpu_addr, PAGE_ALIGN(size))) + size = PAGE_ALIGN(size); + + if (!sparc_dma_free_resource(cpu_addr, size)) return; dma_make_coherent(dma_addr, size); From 4f884f3962767877d7aabbc1ec124d2c307a4257 Mon Sep 17 00:00:00 2001 From: zhenggy Date: Tue, 14 Sep 2021 09:51:15 +0800 Subject: [PATCH 186/543] tcp: fix tp->undo_retrans accounting in tcp_sacktag_one() Commit 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time") may directly retrans a multiple segments TSO/GSO packet without split, Since this commit, we can no longer assume that a retransmitted packet is a single segment. This patch fixes the tp->undo_retrans accounting in tcp_sacktag_one() that use the actual segments(pcount) of the retransmitted packet. Before that commit (10d3be569243), the assumption underlying the tp->undo_retrans-- seems correct. Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time") Signed-off-by: zhenggy Reviewed-by: Eric Dumazet Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3f7bd7ae7d7a..141e85e6422b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1346,7 +1346,7 @@ static u8 tcp_sacktag_one(struct sock *sk, if (dup_sack && (sacked & TCPCB_RETRANS)) { if (tp->undo_marker && tp->undo_retrans > 0 && after(end_seq, tp->undo_marker)) - tp->undo_retrans--; + tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount); if ((sacked & TCPCB_SACKED_ACKED) && before(start_seq, state->reord)) state->reord = start_seq; From d198b27762644c71362e43a7533f89c92b115bcf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Sep 2021 22:18:51 -0700 Subject: [PATCH 187/543] Revert "Revert "ipv4: fix memory leaks in ip_cmsg_send() callers"" This reverts commit d7807a9adf4856171f8441f13078c33941df48ab. As mentioned in https://lkml.org/lkml/2021/9/13/1819 5 years old commit 919483096bfe ("ipv4: fix memory leaks in ip_cmsg_send() callers") was a correct fix. ip_cmsg_send() can loop over multiple cmsghdr() If IP_RETOPTS has been successful, but following cmsghdr generates an error, we do not free ipc.ok If IP_RETOPTS is not successful, we have freed the allocated temporary space, not the one currently in ipc.opt. Sure, code could be refactored, but let's not bring back old bugs. Fixes: d7807a9adf48 ("Revert "ipv4: fix memory leaks in ip_cmsg_send() callers"") Signed-off-by: Eric Dumazet Cc: Yajun Deng Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/ping.c | 5 +++-- net/ipv4/raw.c | 5 +++-- net/ipv4/udp.c | 5 +++-- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 7cef9987ab4a..b297bb28556e 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -279,7 +279,7 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, case IP_RETOPTS: err = cmsg->cmsg_len - sizeof(struct cmsghdr); - /* Our caller is responsible for freeing ipc->opt when err = 0 */ + /* Our caller is responsible for freeing ipc->opt */ err = ip_options_get(net, &ipc->opt, KERNEL_SOCKPTR(CMSG_DATA(cmsg)), err < 40 ? err : 40); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index c588f9f2f46c..1e44a43acfe2 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -727,9 +727,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sk, msg, &ipc, false); - if (unlikely(err)) + if (unlikely(err)) { + kfree(ipc.opt); return err; - + } if (ipc.opt) free = 1; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1c98063a3ae8..bb446e60cf58 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -562,9 +562,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sk, msg, &ipc, false); - if (unlikely(err)) + if (unlikely(err)) { + kfree(ipc.opt); goto out; - + } if (ipc.opt) free = 1; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d5f5981d7a43..8851c9463b4b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1122,9 +1122,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (err > 0) err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); - if (unlikely(err < 0)) + if (unlikely(err < 0)) { + kfree(ipc.opt); return err; - + } if (ipc.opt) free = 1; connected = 0; From 8fb0f47a9d7acf620d0fd97831b69da9bc5e22ed Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 10 Sep 2021 11:18:36 -0600 Subject: [PATCH 188/543] iov_iter: add helper to save iov_iter state In an ideal world, when someone is passed an iov_iter and returns X bytes, then X bytes would have been consumed/advanced from the iov_iter. But we have use cases that always consume the entire iterator, a few examples of that are iomap and bdev O_DIRECT. This means we cannot rely on the state of the iov_iter once we've called ->read_iter() or ->write_iter(). This would be easier if we didn't always have to deal with truncate of the iov_iter, as rewinding would be trivial without that. We recently added a commit to track the truncate state, but that grew the iov_iter by 8 bytes and wasn't the best solution. Implement a helper to save enough of the iov_iter state to sanely restore it after we've called the read/write iterator helpers. This currently only works for IOVEC/BVEC/KVEC as that's all we need, support for other iterator types are left as an exercise for the reader. Link: https://lore.kernel.org/linux-fsdevel/CAHk-=wiacKV4Gh-MYjteU0LwNBSGpWrK-Ov25HdqB1ewinrFPg@mail.gmail.com/ Signed-off-by: Jens Axboe --- include/linux/uio.h | 15 +++++++++++++++ lib/iov_iter.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/include/linux/uio.h b/include/linux/uio.h index 5265024e8b90..984c4ab74859 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -27,6 +27,12 @@ enum iter_type { ITER_DISCARD, }; +struct iov_iter_state { + size_t iov_offset; + size_t count; + unsigned long nr_segs; +}; + struct iov_iter { u8 iter_type; bool data_source; @@ -55,6 +61,14 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i) return i->iter_type; } +static inline void iov_iter_save_state(struct iov_iter *iter, + struct iov_iter_state *state) +{ + state->iov_offset = iter->iov_offset; + state->count = iter->count; + state->nr_segs = iter->nr_segs; +} + static inline bool iter_is_iovec(const struct iov_iter *i) { return iov_iter_type(i) == ITER_IOVEC; @@ -233,6 +247,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); +void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f2d50d69a6c3..755c10c5138c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1972,3 +1972,39 @@ int import_single_range(int rw, void __user *buf, size_t len, return 0; } EXPORT_SYMBOL(import_single_range); + +/** + * iov_iter_restore() - Restore a &struct iov_iter to the same state as when + * iov_iter_save_state() was called. + * + * @i: &struct iov_iter to restore + * @state: state to restore from + * + * Used after iov_iter_save_state() to bring restore @i, if operations may + * have advanced it. + * + * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC + */ +void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) +{ + if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && + !iov_iter_is_kvec(i)) + return; + i->iov_offset = state->iov_offset; + i->count = state->count; + /* + * For the *vec iters, nr_segs + iov is constant - if we increment + * the vec, then we also decrement the nr_segs count. Hence we don't + * need to track both of these, just one is enough and we can deduct + * the other from that. ITER_KVEC and ITER_IOVEC are the same struct + * size, so we can just increment the iov pointer as they are unionzed. + * ITER_BVEC _may_ be the same size on some archs, but on others it is + * not. Be safe and handle it separately. + */ + BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); + if (iov_iter_is_bvec(i)) + i->bvec -= state->nr_segs - i->nr_segs; + else + i->iov -= state->nr_segs - i->nr_segs; + i->nr_segs = state->nr_segs; +} From 44df58d441a94de40d52fca67dc60790daee4266 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Tue, 14 Sep 2021 22:38:52 +0800 Subject: [PATCH 189/543] io_uring: fix missing sigmask restore in io_cqring_wait() Move get_timespec() section in io_cqring_wait() before the sigmask saving, otherwise we'll fail to restore sigmask once get_timespec() returns error. Fixes: c73ebb685fb6 ("io_uring: add timeout support for io_uring_enter()") Signed-off-by: Xiaoguang Wang Link: https://lore.kernel.org/r/20210914143852.9663-1-xiaoguang.wang@linux.alibaba.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a864a94364c6..94afd087e97b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7518,6 +7518,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, break; } while (1); + if (uts) { + struct timespec64 ts; + + if (get_timespec64(&ts, uts)) + return -EFAULT; + timeout = timespec64_to_jiffies(&ts); + } + if (sig) { #ifdef CONFIG_COMPAT if (in_compat_syscall()) @@ -7531,14 +7539,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; } - if (uts) { - struct timespec64 ts; - - if (get_timespec64(&ts, uts)) - return -EFAULT; - timeout = timespec64_to_jiffies(&ts); - } - init_waitqueue_func_entry(&iowq.wq, io_wake_function); iowq.wq.private = current; INIT_LIST_HEAD(&iowq.wq.entry); From 1619b69edce14c4a4665fa8ff4c587dcc77202a9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 14 Sep 2021 22:17:23 +1000 Subject: [PATCH 190/543] powerpc/boot: Fix build failure since GCC 4.9 removal Stephen reported that the build was broken since commit 6d2ef226f2f1 ("compiler_attributes.h: drop __has_attribute() support for gcc4"), with errors such as: include/linux/compiler_attributes.h:296:5: warning: "__has_attribute" is not defined, evaluates to 0 [-Wundef] 296 | #if __has_attribute(__warning__) | ^~~~~~~~~~~~~~~ make[2]: *** [arch/powerpc/boot/Makefile:225: arch/powerpc/boot/crt0.o] Error 1 But we expect __has_attribute() to always be defined now that we've stopped using GCC 4. Linus debugged it to the point of reading the GCC sources, and noticing that the problem is that __has_attribute() is not defined when preprocessing assembly files, which is what we're doing here. Our assembly files don't include, or need, compiler_attributes.h, but they are getting it unconditionally from the -include in BOOT_CFLAGS, which is then added in its entirety to BOOT_AFLAGS. That -include was added in commit 77433830ed16 ("powerpc: boot: include compiler_attributes.h") so that we'd have "fallthrough" and other attributes defined for the C files in arch/powerpc/boot. But it's not needed for assembly files. The minimal fix is to move the addition to BOOT_CFLAGS of -include compiler_attributes.h until after we've copied BOOT_CFLAGS into BOOT_AFLAGS. That avoids including compiler_attributes.h for asm files, but makes no other change to BOOT_CFLAGS or BOOT_AFLAGS. Reported-by: Stephen Rothwell Debugged-by: Linus Torvalds Signed-off-by: Michael Ellerman Tested-by: Guenter Roeck Signed-off-by: Linus Torvalds --- arch/powerpc/boot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 6900d0ac2421..089ee3ea55c8 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -35,7 +35,6 @@ endif BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \ -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ - -include $(srctree)/include/linux/compiler_attributes.h \ $(LINUXINCLUDE) ifdef CONFIG_PPC64_BOOT_WRAPPER @@ -70,6 +69,7 @@ ifeq ($(call cc-option-yn, -fstack-protector),y) BOOTCFLAGS += -fno-stack-protector endif +BOOTCFLAGS += -include $(srctree)/include/linux/compiler_attributes.h BOOTCFLAGS += -I$(objtree)/$(obj) -I$(srctree)/$(obj) DTC_FLAGS ?= -p 1024 From 9c7b0ba887513b6681e7883d306df0a0fd580afa Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 14 Sep 2021 16:12:52 +0100 Subject: [PATCH 191/543] io_uring: auto-removal for direct open/accept It might be inconvenient that direct open/accept deviates from the update semantics and fails if the slot is taken instead of removing a file sitting there. Implement this auto-removal. Note that removal might need to allocate and so may fail. However, if an empty slot is specified, it's guaraneed to not fail on the fd installation side for valid userspace programs. It's needed for users who can't tolerate such failures, e.g. accept where the other end never retries. Suggested-by: Franz-B. Tuneke Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/c896f14ea46b0eaa6c09d93149e665c2c37979b4.1631632300.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 52 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 94afd087e97b..ae489ab275dd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8287,11 +8287,27 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file, #endif } +static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, + struct io_rsrc_node *node, void *rsrc) +{ + struct io_rsrc_put *prsrc; + + prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); + if (!prsrc) + return -ENOMEM; + + prsrc->tag = *io_get_tag_slot(data, idx); + prsrc->rsrc = rsrc; + list_add(&prsrc->list, &node->rsrc_list); + return 0; +} + static int io_install_fixed_file(struct io_kiocb *req, struct file *file, unsigned int issue_flags, u32 slot_index) { struct io_ring_ctx *ctx = req->ctx; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + bool needs_switch = false; struct io_fixed_file *file_slot; int ret = -EBADF; @@ -8307,9 +8323,22 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file, slot_index = array_index_nospec(slot_index, ctx->nr_user_files); file_slot = io_fixed_file_slot(&ctx->file_table, slot_index); - ret = -EBADF; - if (file_slot->file_ptr) - goto err; + + if (file_slot->file_ptr) { + struct file *old_file; + + ret = io_rsrc_node_switch_start(ctx); + if (ret) + goto err; + + old_file = (struct file *)(file_slot->file_ptr & FFS_MASK); + ret = io_queue_rsrc_removal(ctx->file_data, slot_index, + ctx->rsrc_node, old_file); + if (ret) + goto err; + file_slot->file_ptr = 0; + needs_switch = true; + } *io_get_tag_slot(ctx->file_data, slot_index) = 0; io_fixed_file_set(file_slot, file); @@ -8321,27 +8350,14 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file, ret = 0; err: + if (needs_switch) + io_rsrc_node_switch(ctx, ctx->file_data); io_ring_submit_unlock(ctx, !force_nonblock); if (ret) fput(file); return ret; } -static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, - struct io_rsrc_node *node, void *rsrc) -{ - struct io_rsrc_put *prsrc; - - prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); - if (!prsrc) - return -ENOMEM; - - prsrc->tag = *io_get_tag_slot(data, idx); - prsrc->rsrc = rsrc; - list_add(&prsrc->list, &node->rsrc_list); - return 0; -} - static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct io_uring_rsrc_update2 *up, unsigned nr_args) From 4ad3ea1c69354328edcccb83c8a4d7d2f55e3c6a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 24 Aug 2021 15:54:25 -0700 Subject: [PATCH 192/543] drm/i915/selftests: Do not use import_obj uninitialized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang warns a couple of times: drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:63:6: warning: variable 'import_obj' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] if (import != &obj->base) { ^~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:80:22: note: uninitialized use occurs here i915_gem_object_put(import_obj); ^~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:63:2: note: remove the 'if' if its condition is always false if (import != &obj->base) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:38:46: note: initialize the variable 'import_obj' to silence this warning struct drm_i915_gem_object *obj, *import_obj; ^ = NULL Shuffle the import_obj initialization above these if statements so that it is not used uninitialized. Fixes: d7b2cb380b3a ("drm/i915/gem: Correct the locking and pin pattern for dma-buf (v8)") Reported-by: Dan Carpenter Reviewed-by: Thomas Hellström Signed-off-by: Nathan Chancellor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210824225427.2065517-2-nathan@kernel.org (cherry picked from commit 4796054b381a586f4177a24e3d8b5a6a0a32ce62) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index ffae7df5e4d7..532c7955b300 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -59,13 +59,13 @@ static int igt_dmabuf_import_self(void *arg) err = PTR_ERR(import); goto out_dmabuf; } + import_obj = to_intel_bo(import); if (import != &obj->base) { pr_err("i915_gem_prime_import created a new object!\n"); err = -EINVAL; goto out_import; } - import_obj = to_intel_bo(import); i915_gem_object_lock(import_obj, NULL); err = __i915_gem_object_get_pages(import_obj); @@ -176,6 +176,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, err = PTR_ERR(import); goto out_dmabuf; } + import_obj = to_intel_bo(import); if (import == &obj->base) { pr_err("i915_gem_prime_import reused gem object!\n"); @@ -183,8 +184,6 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, goto out_import; } - import_obj = to_intel_bo(import); - i915_gem_object_lock(import_obj, NULL); err = __i915_gem_object_get_pages(import_obj); if (err) { From 347c4db2afc7f9cf536144d167579ccf1e9bf028 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 24 Aug 2021 15:54:26 -0700 Subject: [PATCH 193/543] drm/i915/selftests: Always initialize err in igt_dmabuf_import_same_driver_lmem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang warns: drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:127:13: warning: variable 'err' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] } else if (PTR_ERR(import) != -EOPNOTSUPP) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:138:9: note: uninitialized use occurs here return err; ^~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:127:9: note: remove the 'if' if its condition is always true } else if (PTR_ERR(import) != -EOPNOTSUPP) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:95:9: note: initialize the variable 'err' to silence this warning int err; ^ = 0 The test is expected to pass if i915_gem_prime_import() returns -EOPNOTSUPP so initialize err to zero in this case. Fixes: cdb35d1ed6d2 ("drm/i915/gem: Migrate to system at dma-buf attach time (v7)") Reported-by: Dan Carpenter Reviewed-by: Thomas Hellström Signed-off-by: Nathan Chancellor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210824225427.2065517-3-nathan@kernel.org (cherry picked from commit 46f20a353b80d02492655d99714f0566018a17e8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index 532c7955b300..4a6bb64c3a35 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -128,6 +128,8 @@ static int igt_dmabuf_import_same_driver_lmem(void *arg) pr_err("i915_gem_prime_import failed with the wrong err=%ld\n", PTR_ERR(import)); err = PTR_ERR(import); + } else { + err = 0; } dma_buf_put(dmabuf); From 7889367d7795b3b1766e33ac1ae8a5fbc292108b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 24 Aug 2021 15:54:27 -0700 Subject: [PATCH 194/543] drm/i915: Enable -Wsometimes-uninitialized This warning helps catch uninitialized variables. It should have been enabled at the same time as commit b2423184ac33 ("drm/i915: Enable -Wuninitialized") but I did not realize they were disabled separately. Enable it now that i915 is clean so that it stays that way. Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210824225427.2065517-4-nathan@kernel.org (cherry picked from commit 43192617f7816bb74584c1df06f57363afd15337) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 642a5b5a1b81..335ba9f43d8f 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -19,7 +19,6 @@ subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) # clang warnings subdir-ccflags-y += $(call cc-disable-warning, sign-compare) -subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) subdir-ccflags-y += $(call cc-disable-warning, frame-address) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror From cdef1196608892b9a46caa5f2b64095a7f0be60c Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 14 Sep 2021 16:56:23 +0000 Subject: [PATCH 195/543] cpufreq: schedutil: Destroy mutex before kobject_put() frees the memory Since commit e5c6b312ce3c ("cpufreq: schedutil: Use kobject release() method to free sugov_tunables") kobject_put() has kfree()d the attr_set before gov_attr_set_put() returns. kobject_put() isn't the last user of attr_set in gov_attr_set_put(), the subsequent mutex_destroy() triggers a use-after-free: | BUG: KASAN: use-after-free in mutex_is_locked+0x20/0x60 | Read of size 8 at addr ffff000800ca4250 by task cpuhp/2/20 | | CPU: 2 PID: 20 Comm: cpuhp/2 Not tainted 5.15.0-rc1 #12369 | Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development | Platform, BIOS EDK II Jul 30 2018 | Call trace: | dump_backtrace+0x0/0x380 | show_stack+0x1c/0x30 | dump_stack_lvl+0x8c/0xb8 | print_address_description.constprop.0+0x74/0x2b8 | kasan_report+0x1f4/0x210 | kasan_check_range+0xfc/0x1a4 | __kasan_check_read+0x38/0x60 | mutex_is_locked+0x20/0x60 | mutex_destroy+0x80/0x100 | gov_attr_set_put+0xfc/0x150 | sugov_exit+0x78/0x190 | cpufreq_offline.isra.0+0x2c0/0x660 | cpuhp_cpufreq_offline+0x14/0x24 | cpuhp_invoke_callback+0x430/0x6d0 | cpuhp_thread_fun+0x1b0/0x624 | smpboot_thread_fn+0x5e0/0xa6c | kthread+0x3a0/0x450 | ret_from_fork+0x10/0x20 Swap the order of the calls. Fixes: e5c6b312ce3c ("cpufreq: schedutil: Use kobject release() method to free sugov_tunables") Cc: 4.7+ # 4.7+ Signed-off-by: James Morse Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor_attr_set.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c index 66b05a326910..a6f365b9cc1a 100644 --- a/drivers/cpufreq/cpufreq_governor_attr_set.c +++ b/drivers/cpufreq/cpufreq_governor_attr_set.c @@ -74,8 +74,8 @@ unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *l if (count) return count; - kobject_put(&attr_set->kobj); mutex_destroy(&attr_set->update_lock); + kobject_put(&attr_set->kobj); return 0; } EXPORT_SYMBOL_GPL(gov_attr_set_put); From 5d329e1286b0a040264e239b80257c937f6e685f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Sep 2021 11:08:37 -0600 Subject: [PATCH 196/543] io_uring: allow retry for O_NONBLOCK if async is supported A common complaint is that using O_NONBLOCK files with io_uring can be a bit of a pain. Be a bit nicer and allow normal retry IFF the file does support async behavior. This makes it possible to use io_uring more reliably with O_NONBLOCK files, for use cases where it either isn't possible or feasible to modify the file flags. Cc: stable@vger.kernel.org Reported-and-tested-by: Dan Melnic Signed-off-by: Jens Axboe --- fs/io_uring.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ae489ab275dd..3077f85a2638 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2843,7 +2843,8 @@ static bool io_file_supports_nowait(struct io_kiocb *req, int rw) return __io_file_supports_nowait(req->file, rw); } -static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) +static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, + int rw) { struct io_ring_ctx *ctx = req->ctx; struct kiocb *kiocb = &req->rw.kiocb; @@ -2865,8 +2866,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(ret)) return ret; - /* don't allow async punt for O_NONBLOCK or RWF_NOWAIT */ - if ((kiocb->ki_flags & IOCB_NOWAIT) || (file->f_flags & O_NONBLOCK)) + /* + * If the file is marked O_NONBLOCK, still allow retry for it if it + * supports async. Otherwise it's impossible to use O_NONBLOCK files + * reliably. If not, or it IOCB_NOWAIT is set, don't retry. + */ + if ((kiocb->ki_flags & IOCB_NOWAIT) || + ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req, rw))) req->flags |= REQ_F_NOWAIT; ioprio = READ_ONCE(sqe->ioprio); @@ -3349,7 +3355,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (unlikely(!(req->file->f_mode & FMODE_READ))) return -EBADF; - return io_prep_rw(req, sqe); + return io_prep_rw(req, sqe, READ); } /* @@ -3542,7 +3548,7 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (unlikely(!(req->file->f_mode & FMODE_WRITE))) return -EBADF; - return io_prep_rw(req, sqe); + return io_prep_rw(req, sqe, WRITE); } static int io_write(struct io_kiocb *req, unsigned int issue_flags) From 6a4746ba06191e23d30230738e94334b26590a8a Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 11 Sep 2021 10:40:08 +0300 Subject: [PATCH 197/543] ipc: remove memcg accounting for sops objects in do_semtimedop() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linus proposes to revert an accounting for sops objects in do_semtimedop() because it's really just a temporary buffer for a single semtimedop() system call. This object can consume up to 2 pages, syscall is sleeping one, size and duration can be controlled by user, and this allocation can be repeated by many thread at the same time. However Shakeel Butt pointed that there are much more popular objects with the same life time and similar memory consumption, the accounting of which was decided to be rejected for performance reasons. Considering at least 2 pages for task_struct and 2 pages for the kernel stack, a back of the envelope calculation gives a footprint amplification of <1.5 so this temporal buffer can be safely ignored. The factor would IMO be interesting if it was >> 2 (from the PoV of excessive (ab)use, fine-grained accounting seems to be currently unfeasible due to performance impact). Link: https://lore.kernel.org/lkml/90e254df-0dfe-f080-011e-b7c53ee7fd20@virtuozzo.com/ Fixes: 18319498fdd4 ("memcg: enable accounting of ipc resources") Signed-off-by: Vasily Averin Acked-by: Michal Hocko Reviewed-by: Michal Koutný Acked-by: Shakeel Butt Signed-off-by: Linus Torvalds --- ipc/sem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipc/sem.c b/ipc/sem.c index f833238df1ce..6693daf4fe11 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2238,7 +2238,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, return -EINVAL; if (nsops > SEMOPM_FAST) { - sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL_ACCOUNT); + sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); if (sops == NULL) return -ENOMEM; } From 8b4bd256674720709a9d858a219fcac6f2f253b5 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 9 Sep 2021 10:56:12 +0200 Subject: [PATCH 198/543] thermal/drivers/int340x: Do not set a wrong tcc offset on resume After upgrading to Linux 5.13.3 I noticed my laptop would shutdown due to overheat (when it should not). It turned out this was due to commit fe6a6de6692e ("thermal/drivers/int340x/processor_thermal: Fix tcc setting"). What happens is this drivers uses a global variable to keep track of the tcc offset (tcc_offset_save) and uses it on resume. The issue is this variable is initialized to 0, but is only set in tcc_offset_degree_celsius_store, i.e. when the tcc offset is explicitly set by userspace. If that does not happen, the resume path will set the offset to 0 (in my case the h/w default being 3, the offset would become too low after a suspend/resume cycle). The issue did not arise before commit fe6a6de6692e, as the function setting the offset would return if the offset was 0. This is no longer the case (rightfully). Fix this by not applying the offset if it wasn't saved before, reverting back to the old logic. A better approach will come later, but this will be easier to apply to stable kernels. The logic to restore the offset after a resume was there long before commit fe6a6de6692e, but as a value of 0 was considered invalid I'm referencing the commit that made the issue possible in the Fixes tag instead. Fixes: fe6a6de6692e ("thermal/drivers/int340x/processor_thermal: Fix tcc setting") Cc: stable@vger.kernel.org Cc: Srinivas Pandruvada Signed-off-by: Antoine Tenart Signed-off-by: Daniel Lezcano Reviewed-by: Srinivas Pandruvada Tested-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20210909085613.5577-2-atenart@kernel.org --- .../thermal/intel/int340x_thermal/processor_thermal_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c index 0f0038af2ad4..fb64acfd5e07 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c @@ -107,7 +107,7 @@ static int tcc_offset_update(unsigned int tcc) return 0; } -static unsigned int tcc_offset_save; +static int tcc_offset_save = -1; static ssize_t tcc_offset_degree_celsius_store(struct device *dev, struct device_attribute *attr, const char *buf, @@ -352,7 +352,8 @@ int proc_thermal_resume(struct device *dev) proc_dev = dev_get_drvdata(dev); proc_thermal_read_ppcc(proc_dev); - tcc_offset_update(tcc_offset_save); + if (tcc_offset_save >= 0) + tcc_offset_update(tcc_offset_save); return 0; } From 67a44e659888569a133a8f858c8230e9d7aad1d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernst=20Sj=C3=B6strand?= Date: Thu, 2 Sep 2021 09:50:27 +0200 Subject: [PATCH 199/543] drm/amd/amdgpu: Increase HWIP_MAX_INSTANCE to 10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seems like newer cards can have even more instances now. Found by UBSAN: array-index-out-of-bounds in drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c:318:29 index 8 is out of range for type 'uint32_t *[8]' Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1697 Cc: stable@vger.kernel.org Signed-off-by: Ernst Sjöstrand Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index dc3c6b3a00e5..d356e329e6f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -758,7 +758,7 @@ enum amd_hw_ip_block_type { MAX_HWIP }; -#define HWIP_MAX_INSTANCE 8 +#define HWIP_MAX_INSTANCE 10 struct amd_powerplay { void *pp_handle; From 5598d7c21a0bcab900f281dca4efbb1f80add0fe Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Mon, 6 Sep 2021 07:55:01 +0800 Subject: [PATCH 200/543] drm/amd/pm: fix the issue of uploading powerplay table fix the issue of uploading powerplay table due to the dependancy of rlc. Signed-off-by: Kenneth Feng Reviewed-by: Jack Gui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 3ab1ce4d3419..04863a797115 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1404,7 +1404,7 @@ static int smu_disable_dpms(struct smu_context *smu) */ if (smu->uploading_custom_pp_table && (adev->asic_type >= CHIP_NAVI10) && - (adev->asic_type <= CHIP_DIMGREY_CAVEFISH)) + (adev->asic_type <= CHIP_BEIGE_GOBY)) return smu_disable_all_features_with_exception(smu, true, SMU_FEATURE_COUNT); From c92db8d64f9e0313e7ecdc9500db93a5040c9370 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 7 Sep 2021 09:37:52 +0200 Subject: [PATCH 201/543] drm/amdgpu: fix use after free during BO move MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory backing old_mem is already freed at that point, move the check a bit more up. Signed-off-by: Christian König Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2") Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1699 Acked-by: Nirmoy Das Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 38dade421d46..94126dc39688 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -515,6 +515,15 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, goto out; } + if (bo->type == ttm_bo_type_device && + new_mem->mem_type == TTM_PL_VRAM && + old_mem->mem_type != TTM_PL_VRAM) { + /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU + * accesses the BO after it's moved. + */ + abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + } + if (adev->mman.buffer_funcs_enabled) { if (((old_mem->mem_type == TTM_PL_SYSTEM && new_mem->mem_type == TTM_PL_VRAM) || @@ -545,15 +554,6 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; } - if (bo->type == ttm_bo_type_device && - new_mem->mem_type == TTM_PL_VRAM && - old_mem->mem_type != TTM_PL_VRAM) { - /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU - * accesses the BO after it's moved. - */ - abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - } - out: /* update statistics */ atomic64_add(bo->base.size, &adev->num_bytes_moved); From 7bbee36d71502ab9a341505da89a017c7ae2e6b2 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 7 Sep 2021 14:19:34 +0000 Subject: [PATCH 202/543] amd/display: downgrade validation failure log level In amdgpu_dm_atomic_check, dc_validate_global_state is called. On failure this logs a warning to the kernel journal. However warnings shouldn't be used for atomic test-only commit failures: user-space might be perfoming a lot of atomic test-only commits to find the best hardware configuration. Downgrade the log to a regular DRM atomic message. While at it, use the new device-aware logging infrastructure. This fixes error messages in the kernel when running gamescope [1]. [1]: https://github.com/Plagman/gamescope/issues/245 Reviewed-by: Nicholas Kazlauskas Signed-off-by: Simon Ser Cc: Alex Deucher Cc: Harry Wentland Cc: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9b1fc54555ee..d55e61d8aa00 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10492,7 +10492,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; status = dc_validate_global_state(dc, dm_state->context, false); if (status != DC_OK) { - DC_LOG_WARNING("DC global validation failure: %s (%d)", + drm_dbg_atomic(dev, + "DC global validation failure: %s (%d)", dc_status_to_str(status), status); ret = -EINVAL; goto fail; From b04ce53eac2fc326290817a6f64a440b5bffd2e3 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 2 Sep 2021 13:27:56 +0200 Subject: [PATCH 203/543] drm/amdgpu: use IS_ERR for debugfs APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit debugfs APIs returns encoded error so use IS_ERR for checking return value. v2: return PTR_ERR(ent) References: https://gitlab.freedesktop.org/drm/amd/-/issues/1686 Signed-off-by: Nirmoy Das Reviewed-by: Christian König Reviewed-By: Shashank Sharma Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 10 ++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 277128846dd1..463b9c0283f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1544,20 +1544,18 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) struct dentry *ent; int r, i; - - ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev, &fops_ib_preempt); - if (!ent) { + if (IS_ERR(ent)) { DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n"); - return -EIO; + return PTR_ERR(ent); } ent = debugfs_create_file("amdgpu_force_sclk", 0200, root, adev, &fops_sclk_set); - if (!ent) { + if (IS_ERR(ent)) { DRM_ERROR("unable to create amdgpu_set_sclk debugsfs file\n"); - return -EIO; + return PTR_ERR(ent); } /* Register debugfs entries for amdgpu_ttm */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 7b634a1517f9..0554576d3695 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -428,8 +428,8 @@ int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, ent = debugfs_create_file(name, S_IFREG | S_IRUGO, root, ring, &amdgpu_debugfs_ring_fops); - if (!ent) - return -ENOMEM; + if (IS_ERR(ent)) + return PTR_ERR(ent); i_size_write(ent->d_inode, ring->ring_size + 12); ring->ent = ent; From 77e02cf57b6cff9919949defb7fd9b8ac16399a2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 14 Sep 2021 13:23:22 -0700 Subject: [PATCH 204/543] memblock: introduce saner 'memblock_free_ptr()' interface The boot-time allocation interface for memblock is a mess, with 'memblock_alloc()' returning a virtual pointer, but then you are supposed to free it with 'memblock_free()' that takes a _physical_ address. Not only is that all kinds of strange and illogical, but it actually causes bugs, when people then use it like a normal allocation function, and it fails spectacularly on a NULL pointer: https://lore.kernel.org/all/20210912140820.GD25450@xsang-OptiPlex-9020/ or just random memory corruption if the debug checks don't catch it: https://lore.kernel.org/all/61ab2d0c-3313-aaab-514c-e15b7aa054a0@suse.cz/ I really don't want to apply patches that treat the symptoms, when the fundamental cause is this horribly confusing interface. I started out looking at just automating a sane replacement sequence, but because of this mix or virtual and physical addresses, and because people have used the "__pa()" macro that can take either a regular kernel pointer, or just the raw "unsigned long" address, it's all quite messy. So this just introduces a new saner interface for freeing a virtual address that was allocated using 'memblock_alloc()', and that was kept as a regular kernel pointer. And then it converts a couple of users that are obvious and easy to test, including the 'xbc_nodes' case in lib/bootconfig.c that caused problems. Reported-by: kernel test robot Fixes: 40caa127f3c7 ("init: bootconfig: Remove all bootconfig data when the init memory is removed") Cc: Steven Rostedt Cc: Mike Rapoport Cc: Andrew Morton Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Vlastimil Babka Signed-off-by: Linus Torvalds --- arch/x86/kernel/setup_percpu.c | 2 +- arch/x86/mm/kasan_init_64.c | 6 ++---- arch/x86/mm/numa.c | 2 +- arch/x86/mm/numa_emulation.c | 3 +-- drivers/base/arch_numa.c | 2 +- drivers/macintosh/smu.c | 2 +- include/linux/memblock.h | 1 + init/main.c | 2 +- kernel/printk/printk.c | 4 ++-- lib/bootconfig.c | 2 +- mm/memblock.c | 16 +++++++++++++++- 11 files changed, 27 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 78a32b956e81..5afd98559193 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -135,7 +135,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) static void __init pcpu_fc_free(void *ptr, size_t size) { - memblock_free(__pa(ptr), size); + memblock_free_ptr(ptr, size); } static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 1a50434c8a4d..ef885370719a 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -49,8 +49,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, p = early_alloc(PMD_SIZE, nid, false); if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) return; - else if (p) - memblock_free(__pa(p), PMD_SIZE); + memblock_free_ptr(p, PMD_SIZE); } p = early_alloc(PAGE_SIZE, nid, true); @@ -86,8 +85,7 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, p = early_alloc(PUD_SIZE, nid, false); if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) return; - else if (p) - memblock_free(__pa(p), PUD_SIZE); + memblock_free_ptr(p, PUD_SIZE); } p = early_alloc(PAGE_SIZE, nid, true); diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index a1b5c71099e6..1e9b93b088db 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -355,7 +355,7 @@ void __init numa_reset_distance(void) /* numa_distance could be 1LU marking allocation failure, test cnt */ if (numa_distance_cnt) - memblock_free(__pa(numa_distance), size); + memblock_free_ptr(numa_distance, size); numa_distance_cnt = 0; numa_distance = NULL; /* enable table creation */ } diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index 737491b13728..e801e30089c4 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -517,8 +517,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) } /* free the copied physical distance table */ - if (phys_dist) - memblock_free(__pa(phys_dist), phys_size); + memblock_free_ptr(phys_dist, phys_size); return; no_emu: diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 46c503486e96..00fb4120a5b3 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -264,7 +264,7 @@ void __init numa_free_distance(void) size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); - memblock_free(__pa(numa_distance), size); + memblock_free_ptr(numa_distance, size); numa_distance_cnt = 0; numa_distance = NULL; } diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 94fb63a7b357..fe63d5ee201b 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -570,7 +570,7 @@ fail_msg_node: fail_db_node: of_node_put(smu->db_node); fail_bootmem: - memblock_free(__pa(smu), sizeof(struct smu_device)); + memblock_free_ptr(smu, sizeof(struct smu_device)); smu = NULL; fail_np: of_node_put(np); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b066024c62e3..34de69b3b8ba 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -118,6 +118,7 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); void memblock_free_all(void); +void memblock_free_ptr(void *ptr, size_t size); void reset_node_managed_pages(pg_data_t *pgdat); void reset_all_zones_managed_pages(void); diff --git a/init/main.c b/init/main.c index 5c9a48df90e1..3f7216934441 100644 --- a/init/main.c +++ b/init/main.c @@ -924,7 +924,7 @@ static void __init print_unknown_bootoptions(void) end += sprintf(end, " %s", *p); pr_notice("Unknown command line parameters:%s\n", unknown_options); - memblock_free(__pa(unknown_options), len); + memblock_free_ptr(unknown_options, len); } asmlinkage __visible void __init __no_sanitize_address start_kernel(void) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 825277e1e742..a8d0a58deebc 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1166,9 +1166,9 @@ void __init setup_log_buf(int early) return; err_free_descs: - memblock_free(__pa(new_descs), new_descs_size); + memblock_free_ptr(new_descs, new_descs_size); err_free_log_buf: - memblock_free(__pa(new_log_buf), new_log_buf_len); + memblock_free_ptr(new_log_buf, new_log_buf_len); } static bool __read_mostly ignore_loglevel; diff --git a/lib/bootconfig.c b/lib/bootconfig.c index f8419cff1147..5ae248b29373 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -792,7 +792,7 @@ void __init xbc_destroy_all(void) xbc_data = NULL; xbc_data_size = 0; xbc_node_num = 0; - memblock_free(__pa(xbc_nodes), sizeof(struct xbc_node) * XBC_NODE_MAX); + memblock_free_ptr(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX); xbc_nodes = NULL; brace_index = 0; } diff --git a/mm/memblock.c b/mm/memblock.c index 0ab5a749bfa6..184dcd2e5d99 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -472,7 +472,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, kfree(old_array); else if (old_array != memblock_memory_init_regions && old_array != memblock_reserved_init_regions) - memblock_free(__pa(old_array), old_alloc_size); + memblock_free_ptr(old_array, old_alloc_size); /* * Reserve the new array if that comes from the memblock. Otherwise, we @@ -795,6 +795,20 @@ int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) return memblock_remove_range(&memblock.memory, base, size); } +/** + * memblock_free_ptr - free boot memory allocation + * @ptr: starting address of the boot memory allocation + * @size: size of the boot memory block in bytes + * + * Free boot memory block previously allocated by memblock_alloc_xx() API. + * The freeing memory will not be released to the buddy allocator. + */ +void __init_memblock memblock_free_ptr(void *ptr, size_t size) +{ + if (ptr) + memblock_free(__pa(ptr), size); +} + /** * memblock_free - free boot memory block * @base: phys starting address of the boot memory block From 3ca706c189db861b2ca2019a0901b94050ca49d8 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 7 Sep 2021 18:03:02 +0800 Subject: [PATCH 205/543] drm/ttm: fix type mismatch error on sparc64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On sparc64, __fls() returns an "int", but the drm TTM code expected it to be "unsigned long" as on x86. As a result, on sparc (and arc, and m68k) you get build errors because 'min()' checks that the types match. As suggested by Linus, it can use min_t instead of min to force the type to be "unsigned int". Suggested-by: Linus Torvalds Signed-off-by: Huang Rui Reviewed-by: Christian König Cc: Alex Deucher Cc: David Airlie Cc: Daniel Vetter Cc: Guenter Roeck Signed-off-by: Linus Torvalds --- drivers/gpu/drm/ttm/ttm_pool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index cb38b1a17b09..82cbb29a05aa 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -383,7 +383,8 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, else gfp_flags |= GFP_HIGHUSER; - for (order = min(MAX_ORDER - 1UL, __fls(num_pages)); num_pages; + for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); + num_pages; order = min_t(unsigned int, order, __fls(num_pages))) { bool apply_caching = false; struct ttm_pool_type *pt; From 783a40a1b3ac7f3714d2776fa8ac8cce3535e4f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Sep 2021 09:06:55 +0200 Subject: [PATCH 206/543] block: check if a profile is actually registered in blk_integrity_unregister While clearing the profile itself is harmless, we really should not clear the stable writes flag if it wasn't set due to a registered integrity profile. Reported-by: Lihong Kou Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Link: https://lore.kernel.org/r/20210914070657.87677-2-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-integrity.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 69a12177dfb6..48bfb53aa857 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -426,8 +426,12 @@ EXPORT_SYMBOL(blk_integrity_register); */ void blk_integrity_unregister(struct gendisk *disk) { + struct blk_integrity *bi = &disk->queue->integrity; + + if (!bi->profile) + return; blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, disk->queue); - memset(&disk->queue->integrity, 0, sizeof(struct blk_integrity)); + memset(bi, 0, sizeof(*bi)); } EXPORT_SYMBOL(blk_integrity_unregister); From 3df49967f6f1d2121b0c27c381ca1c8386b1dab9 Mon Sep 17 00:00:00 2001 From: Lihong Kou Date: Tue, 14 Sep 2021 09:06:56 +0200 Subject: [PATCH 207/543] block: flush the integrity workqueue in blk_integrity_unregister When the integrity profile is unregistered there can still be integrity reads queued up which could see a NULL verify_fn as shown by the race window below: CPU0 CPU1 process_one_work nvme_validate_ns bio_integrity_verify_fn nvme_update_ns_info nvme_update_disk_info blk_integrity_unregister ---set queue->integrity as 0 bio_integrity_process --access bi->profile->verify_fn(bi is a pointer of queue->integity) Before calling blk_integrity_unregister in nvme_update_disk_info, we must make sure that there is no work item in the kintegrityd_wq. Just call blk_flush_integrity to flush the work queue so the bug can be resolved. Signed-off-by: Lihong Kou [hch: split up and shortened the changelog] Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Link: https://lore.kernel.org/r/20210914070657.87677-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-integrity.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 48bfb53aa857..16d5d5338392 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -430,6 +430,9 @@ void blk_integrity_unregister(struct gendisk *disk) if (!bi->profile) return; + + /* ensure all bios are off the integrity workqueue */ + blk_flush_integrity(); blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, disk->queue); memset(bi, 0, sizeof(*bi)); } From 9da4c7276ec5d167972d8f6670d64b59838b4ed2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Sep 2021 09:06:57 +0200 Subject: [PATCH 208/543] nvme: remove the call to nvme_update_disk_info in nvme_ns_remove There is no need to explicitly unregister the integrity profile when deleting the gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Link: https://lore.kernel.org/r/20210914070657.87677-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7efb31b87f37..c72283ebf0cc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3856,8 +3856,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_cdev_del(&ns->cdev, &ns->cdev_device); del_gendisk(ns->disk); blk_cleanup_queue(ns->queue); - if (blk_get_integrity(ns->disk)) - blk_integrity_unregister(ns->disk); down_write(&ns->ctrl->namespaces_rwsem); list_del_init(&ns->list); From 52ce14c134a003fee03d8fc57442c05a55b53715 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 12 Sep 2021 22:05:23 +0300 Subject: [PATCH 209/543] bnx2x: Fix enabling network interfaces without VFs This function is called to enable SR-IOV when available, not enabling interfaces without VFs was a regression. Fixes: 65161c35554f ("bnx2x: Fix missing error code in bnx2x_iov_init_one()") Signed-off-by: Adrian Bunk Reported-by: YunQiang Su Tested-by: YunQiang Su Cc: stable@vger.kernel.org Acked-by: Shai Malin Link: https://lore.kernel.org/r/20210912190523.27991-1-bunk@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index f255fd0b16db..6fbf735fca31 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -1224,7 +1224,7 @@ int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param, /* SR-IOV capability was enabled but there are no VFs*/ if (iov->total == 0) { - err = -EINVAL; + err = 0; goto failed; } From 7366c23ff492ad260776a3ee1aaabba9fc773a8b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 13 Sep 2021 15:06:05 -0700 Subject: [PATCH 210/543] ptp: dp83640: don't define PAGE0 Building dp83640.c on arch/parisc/ produces a build warning for PAGE0 being redefined. Since the macro is not used in the dp83640 driver, just make it a comment for documentation purposes. In file included from ../drivers/net/phy/dp83640.c:23: ../drivers/net/phy/dp83640_reg.h:8: warning: "PAGE0" redefined 8 | #define PAGE0 0x0000 from ../drivers/net/phy/dp83640.c:11: ../arch/parisc/include/asm/page.h:187: note: this is the location of the previous definition 187 | #define PAGE0 ((struct zeropage *)__PAGE_OFFSET) Fixes: cb646e2b02b2 ("ptp: Added a clock driver for the National Semiconductor PHYTER.") Signed-off-by: Randy Dunlap Reported-by: Geert Uytterhoeven Cc: Richard Cochran Cc: John Stultz Cc: Heiner Kallweit Cc: Russell King Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210913220605.19682-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83640_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83640_reg.h b/drivers/net/phy/dp83640_reg.h index 21aa24c741b9..daae7fa58fb8 100644 --- a/drivers/net/phy/dp83640_reg.h +++ b/drivers/net/phy/dp83640_reg.h @@ -5,7 +5,7 @@ #ifndef HAVE_DP83640_REGISTERS #define HAVE_DP83640_REGISTERS -#define PAGE0 0x0000 +/* #define PAGE0 0x0000 */ #define PHYCR2 0x001c /* PHY Control Register 2 */ #define PAGE4 0x0004 From f68aa100d815b5b4467fd1c3abbe3b99d65fd028 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 3 Sep 2021 10:49:37 +0200 Subject: [PATCH 211/543] xen: reset legacy rtc flag for PV domU A Xen PV guest doesn't have a legacy RTC device, so reset the legacy RTC flag. Otherwise the following WARN splat will occur at boot: [ 1.333404] WARNING: CPU: 1 PID: 1 at /home/gross/linux/head/drivers/rtc/rtc-mc146818-lib.c:25 mc146818_get_time+0x1be/0x210 [ 1.333404] Modules linked in: [ 1.333404] CPU: 1 PID: 1 Comm: swapper/0 Tainted: G W 5.14.0-rc7-default+ #282 [ 1.333404] RIP: e030:mc146818_get_time+0x1be/0x210 [ 1.333404] Code: c0 64 01 c5 83 fd 45 89 6b 14 7f 06 83 c5 64 89 6b 14 41 83 ec 01 b8 02 00 00 00 44 89 63 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 <0f> 0b 48 c7 c7 30 0e ef 82 4c 89 e6 e8 71 2a 24 00 48 c7 c0 ff ff [ 1.333404] RSP: e02b:ffffc90040093df8 EFLAGS: 00010002 [ 1.333404] RAX: 00000000000000ff RBX: ffffc90040093e34 RCX: 0000000000000000 [ 1.333404] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000000000000000d [ 1.333404] RBP: ffffffff82ef0e30 R08: ffff888005013e60 R09: 0000000000000000 [ 1.333404] R10: ffffffff82373e9b R11: 0000000000033080 R12: 0000000000000200 [ 1.333404] R13: 0000000000000000 R14: 0000000000000002 R15: ffffffff82cdc6d4 [ 1.333404] FS: 0000000000000000(0000) GS:ffff88807d440000(0000) knlGS:0000000000000000 [ 1.333404] CS: 10000e030 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1.333404] CR2: 0000000000000000 CR3: 000000000260a000 CR4: 0000000000050660 [ 1.333404] Call Trace: [ 1.333404] ? wakeup_sources_sysfs_init+0x30/0x30 [ 1.333404] ? rdinit_setup+0x2b/0x2b [ 1.333404] early_resume_init+0x23/0xa4 [ 1.333404] ? cn_proc_init+0x36/0x36 [ 1.333404] do_one_initcall+0x3e/0x200 [ 1.333404] kernel_init_freeable+0x232/0x28e [ 1.333404] ? rest_init+0xd0/0xd0 [ 1.333404] kernel_init+0x16/0x120 [ 1.333404] ret_from_fork+0x1f/0x30 Cc: Fixes: 8d152e7a5c7537 ("x86/rtc: Replace paravirt rtc check with platform legacy quirk") Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210903084937.19392-3-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 753f63734c13..349f780a1567 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1214,6 +1214,11 @@ static void __init xen_dom0_set_legacy_features(void) x86_platform.legacy.rtc = 1; } +static void __init xen_domu_set_legacy_features(void) +{ + x86_platform.legacy.rtc = 0; +} + /* First C function to be called on Xen boot */ asmlinkage __visible void __init xen_start_kernel(void) { @@ -1359,6 +1364,8 @@ asmlinkage __visible void __init xen_start_kernel(void) add_preferred_console("xenboot", 0, NULL); if (pci_xen) x86_init.pci.arch_init = pci_xen_init; + x86_platform.set_legacy_features = + xen_domu_set_legacy_features; } else { const struct dom0_vga_console_info *info = (void *)((char *)xen_start_info + From 36c9b5929b7094ea19a78827c0ede20d2e0e6c9c Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 8 Sep 2021 09:36:40 +0200 Subject: [PATCH 212/543] xen: fix usage of pmd_populate in mremap for pv guests Commit 0881ace292b662 ("mm/mremap: use pmd/pud_poplulate to update page table entries") introduced a regression when running as Xen PV guest. Today pmd_populate() for Xen PV assumes that the PFN inserted is referencing a not yet used page table. In case of move_normal_pmd() this is not true, resulting in WARN splats like: [34321.304270] ------------[ cut here ]------------ [34321.304277] WARNING: CPU: 0 PID: 23628 at arch/x86/xen/multicalls.c:102 xen_mc_flush+0x176/0x1a0 [34321.304288] Modules linked in: [34321.304291] CPU: 0 PID: 23628 Comm: apt-get Not tainted 5.14.1-20210906-doflr-mac80211debug+ #1 [34321.304294] Hardware name: MSI MS-7640/890FXA-GD70 (MS-7640) , BIOS V1.8B1 09/13/2010 [34321.304296] RIP: e030:xen_mc_flush+0x176/0x1a0 [34321.304300] Code: 89 45 18 48 c1 e9 3f 48 89 ce e9 20 ff ff ff e8 60 03 00 00 66 90 5b 5d 41 5c 41 5d c3 48 c7 45 18 ea ff ff ff be 01 00 00 00 <0f> 0b 8b 55 00 48 c7 c7 10 97 aa 82 31 db 49 c7 c5 38 97 aa 82 65 [34321.304303] RSP: e02b:ffffc90000a97c90 EFLAGS: 00010002 [34321.304305] RAX: ffff88807d416398 RBX: ffff88807d416350 RCX: ffff88807d416398 [34321.304306] RDX: 0000000000000001 RSI: 0000000000000001 RDI: deadbeefdeadf00d [34321.304308] RBP: ffff88807d416300 R08: aaaaaaaaaaaaaaaa R09: ffff888006160cc0 [34321.304309] R10: deadbeefdeadf00d R11: ffffea000026a600 R12: 0000000000000000 [34321.304310] R13: ffff888012f6b000 R14: 0000000012f6b000 R15: 0000000000000001 [34321.304320] FS: 00007f5071177800(0000) GS:ffff88807d400000(0000) knlGS:0000000000000000 [34321.304322] CS: 10000e030 DS: 0000 ES: 0000 CR0: 0000000080050033 [34321.304323] CR2: 00007f506f542000 CR3: 00000000160cc000 CR4: 0000000000000660 [34321.304326] Call Trace: [34321.304331] xen_alloc_pte+0x294/0x320 [34321.304334] move_pgt_entry+0x165/0x4b0 [34321.304339] move_page_tables+0x6fa/0x8d0 [34321.304342] move_vma.isra.44+0x138/0x500 [34321.304345] __x64_sys_mremap+0x296/0x410 [34321.304348] do_syscall_64+0x3a/0x80 [34321.304352] entry_SYSCALL_64_after_hwframe+0x44/0xae [34321.304355] RIP: 0033:0x7f507196301a [34321.304358] Code: 73 01 c3 48 8b 0d 76 0e 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 49 89 ca b8 19 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 46 0e 0c 00 f7 d8 64 89 01 48 [34321.304360] RSP: 002b:00007ffda1eecd38 EFLAGS: 00000246 ORIG_RAX: 0000000000000019 [34321.304362] RAX: ffffffffffffffda RBX: 000056205f950f30 RCX: 00007f507196301a [34321.304363] RDX: 0000000001a00000 RSI: 0000000001900000 RDI: 00007f506dc56000 [34321.304364] RBP: 0000000001a00000 R08: 0000000000000010 R09: 0000000000000004 [34321.304365] R10: 0000000000000001 R11: 0000000000000246 R12: 00007f506dc56060 [34321.304367] R13: 00007f506dc56000 R14: 00007f506dc56060 R15: 000056205f950f30 [34321.304368] ---[ end trace a19885b78fe8f33e ]--- [34321.304370] 1 of 2 multicall(s) failed: cpu 0 [34321.304371] call 2: op=12297829382473034410 arg=[aaaaaaaaaaaaaaaa] result=-22 Fix that by modifying xen_alloc_ptpage() to only pin the page table in case it wasn't pinned already. Fixes: 0881ace292b662 ("mm/mremap: use pmd/pud_poplulate to update page table entries") Cc: Reported-by: Sander Eikelenboom Tested-by: Sander Eikelenboom Signed-off-by: Juergen Gross Link: https://lore.kernel.org/r/20210908073640.11299-1-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/mmu_pv.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 1df5f01529e5..8d751939c6f3 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1518,14 +1518,17 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, if (pinned) { struct page *page = pfn_to_page(pfn); - if (static_branch_likely(&xen_struct_pages_ready)) + pinned = false; + if (static_branch_likely(&xen_struct_pages_ready)) { + pinned = PagePinned(page); SetPagePinned(page); + } xen_mc_batch(); __set_pfn_prot(pfn, PAGE_KERNEL_RO); - if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS && !pinned) __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); xen_mc_issue(PARAVIRT_LAZY_MMU); From 45da234467f381239d87536c86597149f189d375 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 7 Sep 2021 14:17:13 +0200 Subject: [PATCH 213/543] xen/pvcalls: backend can be a module It's not clear to me why only the frontend has been tristate. Switch the backend to be, too. Signed-off-by: Jan Beulich Acked-by: Stefano Stabellini Link: https://lore.kernel.org/r/54a6070c-92bb-36a3-2fc0-de9ccca438c5@suse.com Signed-off-by: Juergen Gross --- drivers/xen/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 5f1ce59b44b9..a37eb52fb401 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -214,7 +214,7 @@ config XEN_PVCALLS_FRONTEND implements them. config XEN_PVCALLS_BACKEND - bool "XEN PV Calls backend driver" + tristate "XEN PV Calls backend driver" depends on INET && XEN && XEN_BACKEND help Experimental backend for the Xen PV Calls protocol From ce6a80d1b2f923b1839655a1cda786293feaa085 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 7 Sep 2021 14:04:25 +0200 Subject: [PATCH 214/543] swiotlb-xen: avoid double free Of the two paths leading to the "error" label in xen_swiotlb_init() one didn't allocate anything, while the other did already free what was allocated. Fixes: b82776005369 ("xen/swiotlb: Use the swiotlb_late_init_with_tbl to init Xen-SWIOTLB late when PV PCI is used") Signed-off-by: Jan Beulich Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/ce9c2adb-8a52-6293-982a-0d6ece943ac6@suse.com Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 24d11861ac7d..99d518526eaf 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -216,7 +216,6 @@ error: goto retry; } pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc); - free_pages((unsigned long)start, order); return rc; } From 4c092c59015f7adf0f07685f869edb96d997a756 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 7 Sep 2021 14:04:47 +0200 Subject: [PATCH 215/543] swiotlb-xen: fix late init retry The commit referenced below removed the assignment of "bytes" from xen_swiotlb_init() without - like done for xen_swiotlb_init_early() - adding an assignment on the retry path, thus leading to excessively sized allocations upon retries. Fixes: 2d29960af0be ("swiotlb: dynamically allocate io_tlb_default_mem") Signed-off-by: Jan Beulich Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/778299d6-9cfd-1c13-026e-25ee5d14ecb3@suse.com Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 99d518526eaf..dbb18dc956f3 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -211,8 +211,8 @@ error: if (repeat--) { /* Min is 2MB */ nslabs = max(1024UL, (nslabs >> 1)); - pr_info("Lowering to %luMB\n", - (nslabs << IO_TLB_SHIFT) >> 20); + bytes = nslabs << IO_TLB_SHIFT; + pr_info("Lowering to %luMB\n", bytes >> 20); goto retry; } pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc); From d9a688add3d412c840e31060847a6a297b552316 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 7 Sep 2021 14:05:12 +0200 Subject: [PATCH 216/543] swiotlb-xen: maintain slab count properly Generic swiotlb code makes sure to keep the slab count a multiple of the number of slabs per segment. Yet even without checking whether any such assumption is made elsewhere, it is easy to see that xen_swiotlb_fixup() might alter unrelated memory when calling xen_create_contiguous_region() for the last segment, when that's not a full one - the function acts on full order-N regions, not individual pages. Align the slab count suitably when halving it for a retry. Add a build time check and a runtime one. Replace the no longer useful local variable "slabs" by an "order" one calculated just once, outside of the loop. Re-use "order" for calculating "dma_bits", and change the type of the latter as well as the one of "i" while touching this anyway. Signed-off-by: Jan Beulich Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/dc054cb0-bec4-4db0-fc06-c9fc957b6e66@suse.com Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index dbb18dc956f3..08fc694f05b7 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -106,27 +106,26 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr) static int xen_swiotlb_fixup(void *buf, unsigned long nslabs) { - int i, rc; - int dma_bits; + int rc; + unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT); + unsigned int i, dma_bits = order + PAGE_SHIFT; dma_addr_t dma_handle; phys_addr_t p = virt_to_phys(buf); - dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT; + BUILD_BUG_ON(IO_TLB_SEGSIZE & (IO_TLB_SEGSIZE - 1)); + BUG_ON(nslabs % IO_TLB_SEGSIZE); i = 0; do { - int slabs = min(nslabs - i, (unsigned long)IO_TLB_SEGSIZE); - do { rc = xen_create_contiguous_region( - p + (i << IO_TLB_SHIFT), - get_order(slabs << IO_TLB_SHIFT), + p + (i << IO_TLB_SHIFT), order, dma_bits, &dma_handle); } while (rc && dma_bits++ < MAX_DMA_BITS); if (rc) return rc; - i += slabs; + i += IO_TLB_SEGSIZE; } while (i < nslabs); return 0; } @@ -210,7 +209,7 @@ retry: error: if (repeat--) { /* Min is 2MB */ - nslabs = max(1024UL, (nslabs >> 1)); + nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE)); bytes = nslabs << IO_TLB_SHIFT; pr_info("Lowering to %luMB\n", bytes >> 20); goto retry; @@ -245,7 +244,7 @@ retry: memblock_free(__pa(start), PAGE_ALIGN(bytes)); if (repeat--) { /* Min is 2MB */ - nslabs = max(1024UL, (nslabs >> 1)); + nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE)); bytes = nslabs << IO_TLB_SHIFT; pr_info("Lowering to %luMB\n", bytes >> 20); goto retry; From 79ca5f778aafbd69727d577b58d913c9ce8400be Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 7 Sep 2021 14:05:54 +0200 Subject: [PATCH 217/543] swiotlb-xen: suppress certain init retries Only on the 2nd of the paths leading to xen_swiotlb_init()'s "error" label it is useful to retry the allocation; the first one did already iterate through all possible order values. Signed-off-by: Jan Beulich Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/56477481-87da-4962-9661-5e1b277efde0@suse.com Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 08fc694f05b7..6520d05e62ef 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -184,7 +184,7 @@ retry: order--; } if (!start) - goto error; + goto exit; if (order != get_order(bytes)) { pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n", (PAGE_SIZE << order) >> 20); @@ -214,6 +214,7 @@ error: pr_info("Lowering to %luMB\n", bytes >> 20); goto retry; } +exit: pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc); return rc; } From cabb7f89b24ed40c4640dac3bca044452ab0b386 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 7 Sep 2021 14:06:37 +0200 Subject: [PATCH 218/543] swiotlb-xen: limit init retries Due to the use of max(1024, ...) there's no point retrying (and issuing bogus log messages) when the number of slabs is already no larger than this minimum value. Signed-off-by: Jan Beulich Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/984fa426-2b7b-4b77-5ce8-766619575b7f@suse.com Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 6520d05e62ef..d30dc5e68a22 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -207,7 +207,7 @@ retry: swiotlb_set_max_segment(PAGE_SIZE); return 0; error: - if (repeat--) { + if (nslabs > 1024 && repeat--) { /* Min is 2MB */ nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE)); bytes = nslabs << IO_TLB_SHIFT; @@ -243,7 +243,7 @@ retry: rc = xen_swiotlb_fixup(start, nslabs); if (rc) { memblock_free(__pa(start), PAGE_ALIGN(bytes)); - if (repeat--) { + if (nslabs > 1024 && repeat--) { /* Min is 2MB */ nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE)); bytes = nslabs << IO_TLB_SHIFT; From 68573c1b5c4d0ebd90c24c6055abb53c474a8fc2 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 7 Sep 2021 14:06:55 +0200 Subject: [PATCH 219/543] swiotlb-xen: drop leftover __ref Commit a98f565462f0 ("xen-swiotlb: split xen_swiotlb_init") should not only have added __init to the split off function, but also should have dropped __ref from the one left. Signed-off-by: Jan Beulich Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/7cd163e1-fe13-270b-384c-2708e8273d34@suse.com Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index d30dc5e68a22..f3d81b2697b7 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -154,7 +154,7 @@ static const char *xen_swiotlb_error(enum xen_swiotlb_err err) #define DEFAULT_NSLABS ALIGN(SZ_64M >> IO_TLB_SHIFT, IO_TLB_SEGSIZE) -int __ref xen_swiotlb_init(void) +int xen_swiotlb_init(void) { enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN; unsigned long bytes = swiotlb_size_or_default(); From 7fd880a38cfefb2d54a912e4ae809110adec1c94 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 7 Sep 2021 14:07:21 +0200 Subject: [PATCH 220/543] swiotlb-xen: arrange to have buffer info logged I consider it unhelpful that address and size of the buffer aren't put in the log file; it makes diagnosing issues needlessly harder. The majority of callers of swiotlb_init() also passes 1 for the "verbose" parameter. Signed-off-by: Jan Beulich Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/2e3c8e68-36b2-4ae9-b829-bf7f75d39d47@suse.com Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index f3d81b2697b7..afe8c7eb31b9 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -253,7 +253,7 @@ retry: panic("%s (rc:%d)", xen_swiotlb_error(XEN_SWIOTLB_EFIXUP), rc); } - if (swiotlb_init_with_tbl(start, nslabs, false)) + if (swiotlb_init_with_tbl(start, nslabs, true)) panic("Cannot allocate SWIOTLB buffer"); swiotlb_set_max_segment(PAGE_SIZE); } From d859ed25b24289c87a97889653596f8088367e16 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 7 Sep 2021 14:07:47 +0200 Subject: [PATCH 221/543] swiotlb-xen: drop DEFAULT_NSLABS It was introduced by 4035b43da6da ("xen-swiotlb: remove xen_set_nslabs") and then not removed by 2d29960af0be ("swiotlb: dynamically allocate io_tlb_default_mem"). Signed-off-by: Jan Beulich Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/15259326-209a-1d11-338c-5018dc38abe8@suse.com Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index afe8c7eb31b9..9c9ba500ef23 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -152,8 +152,6 @@ static const char *xen_swiotlb_error(enum xen_swiotlb_err err) return ""; } -#define DEFAULT_NSLABS ALIGN(SZ_64M >> IO_TLB_SHIFT, IO_TLB_SEGSIZE) - int xen_swiotlb_init(void) { enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN; From c006a06508db4841d256d82f42da392d6391f3d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 13 Sep 2021 15:40:56 +0200 Subject: [PATCH 222/543] powerpc/xics: Set the IRQ chip data for the ICS native backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ICS native driver relies on the IRQ chip data to find the struct 'ics_native' describing the ICS controller but it was removed by commit 248af248a8f4 ("powerpc/xics: Rename the map handler in a check handler"). Revert this change to fix the Microwatt SoC platform. Fixes: 248af248a8f4 ("powerpc/xics: Rename the map handler in a check handler") Signed-off-by: Cédric Le Goater Tested-by: Gustavo Romero Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210913134056.3761960-1-clg@kaod.org --- arch/powerpc/sysdev/xics/xics-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 5c1a157a83b8..244a727c6ba4 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -348,9 +348,9 @@ static int xics_host_map(struct irq_domain *domain, unsigned int virq, if (xics_ics->check(xics_ics, hwirq)) return -EINVAL; - /* No chip data for the XICS domain */ + /* Let the ICS be the chip data for the XICS domain. For ICS native */ irq_domain_set_info(domain, virq, hwirq, xics_ics->chip, - NULL, handle_fasteoi_irq, NULL, NULL); + xics_ics, handle_fasteoi_irq, NULL, NULL); return 0; } From a8b92b8c1eac8d655a97b1e90f4d83c25d9b9a18 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 16:59:42 +0200 Subject: [PATCH 223/543] s390/pci_mmio: fully validate the VMA before calling follow_pte() We should not walk/touch page tables outside of VMA boundaries when holding only the mmap sem in read mode. Evil user space can modify the VMA layout just before this function runs and e.g., trigger races with page table removal code since commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap"). find_vma() does not check if the address is >= the VMA start address; use vma_lookup() instead. Reviewed-by: Niklas Schnelle Reviewed-by: Liam R. Howlett Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap") Signed-off-by: David Hildenbrand Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_mmio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index ae683aa623ac..c5b35ea129cf 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -159,7 +159,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, mmap_read_lock(current->mm); ret = -EINVAL; - vma = find_vma(current->mm, mmio_addr); + vma = vma_lookup(current->mm, mmio_addr); if (!vma) goto out_unlock_mmap; if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) @@ -298,7 +298,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, mmap_read_lock(current->mm); ret = -EINVAL; - vma = find_vma(current->mm, mmio_addr); + vma = vma_lookup(current->mm, mmio_addr); if (!vma) goto out_unlock_mmap; if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) From d76b14f3971a0638b6cd0da289f8b48acee287d0 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Thu, 9 Sep 2021 12:20:56 +0200 Subject: [PATCH 224/543] s390/sclp: fix Secure-IPL facility detection Prevent out-of-range access if the returned SCLP SCCB response is smaller in size than the address of the Secure-IPL flag. Fixes: c9896acc7851 ("s390/ipl: Provide has_secure sysfs attribute") Cc: stable@vger.kernel.org # 5.2+ Signed-off-by: Alexander Egorenkov Reviewed-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- drivers/s390/char/sclp_early.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 2f3515fa242a..f3d5c7f4c13d 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -45,13 +45,14 @@ static void __init sclp_early_facilities_detect(void) sclp.has_gisaf = !!(sccb->fac118 & 0x08); sclp.has_hvs = !!(sccb->fac119 & 0x80); sclp.has_kss = !!(sccb->fac98 & 0x01); - sclp.has_sipl = !!(sccb->cbl & 0x4000); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; if (sccb->fac91 & 0x40) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST; if (sccb->cpuoff > 134) sclp.has_diag318 = !!(sccb->byte_134 & 0x80); + if (sccb->cpuoff > 137) + sclp.has_sipl = !!(sccb->cbl & 0x4000); sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; sclp.rzm <<= 20; From 4b26ceac103be7ad71cd4cccf01f97b6017008a6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 1 Sep 2021 14:48:07 +0200 Subject: [PATCH 225/543] s390: update defconfigs Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 8 +++++--- arch/s390/configs/defconfig | 5 ++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 37b6115ed80e..6aad18ee131d 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -10,6 +10,7 @@ CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_LSM=y CONFIG_PREEMPT=y +CONFIG_SCHED_CORE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -503,6 +504,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MICROSOFT is not set +# CONFIG_NET_VENDOR_LITEX is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m @@ -661,7 +663,6 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_V4_SECURITY_LABEL=y CONFIG_CIFS=m -CONFIG_CIFS_WEAK_PW_HASH=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y @@ -720,6 +721,8 @@ CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_BLAKE2S=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA3=m @@ -774,7 +777,6 @@ CONFIG_RANDOM32_SELFTEST=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 CONFIG_DMA_API_DEBUG=y -CONFIG_STRING_SELFTEST=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y @@ -853,12 +855,12 @@ CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m CONFIG_TEST_MIN_HEAP=y -CONFIG_TEST_SORT=y CONFIG_KPROBES_SANITY_TEST=y CONFIG_RBTREE_TEST=y CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y +CONFIG_STRING_SELFTEST=y CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m CONFIG_TEST_LIVEPATCH=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 56a1cc85c5d7..f08b161c9446 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -8,6 +8,7 @@ CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_LSM=y +CONFIG_SCHED_CORE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -494,6 +495,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MICROSOFT is not set +# CONFIG_NET_VENDOR_LITEX is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m @@ -648,7 +650,6 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_V4_SECURITY_LABEL=y CONFIG_CIFS=m -CONFIG_CIFS_WEAK_PW_HASH=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y @@ -708,6 +709,8 @@ CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_BLAKE2S=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA3=m From 948e50551b9a07c3ab6b9d208bb8f16fa1b2ad41 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 7 Sep 2021 07:27:11 +0200 Subject: [PATCH 226/543] s390/ap: fix kernel doc comments Get rid of warnings like: drivers/s390/crypto/ap_bus.c:216: warning: bad line: drivers/s390/crypto/ap_bus.c:444: warning: Function parameter or member 'floating' not described in 'ap_interrupt_handler' Reviewed-by: Harald Freudenberger Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 3 ++- drivers/s390/crypto/ap_queue.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index f433428057d9..d9b804943d19 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -213,7 +213,6 @@ static inline int ap_fetch_qci_info(struct ap_config_info *info) * ap_init_qci_info(): Allocate and query qci config info. * Does also update the static variables ap_max_domain_id * and ap_max_adapter_id if this info is available. - */ static void __init ap_init_qci_info(void) { @@ -439,6 +438,7 @@ static enum hrtimer_restart ap_poll_timeout(struct hrtimer *unused) /** * ap_interrupt_handler() - Schedule ap_tasklet on interrupt * @airq: pointer to adapter interrupt descriptor + * @floating: ignored */ static void ap_interrupt_handler(struct airq_struct *airq, bool floating) { @@ -1786,6 +1786,7 @@ static inline void ap_scan_adapter(int ap) /** * ap_scan_bus(): Scan the AP bus for new devices * Runs periodically, workqueue timer (ap_config_time) + * @unused: Unused pointer. */ static void ap_scan_bus(struct work_struct *unused) { diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index d70c4d3d0907..9ea48bf0ee40 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -20,7 +20,7 @@ static void __ap_flush_queue(struct ap_queue *aq); /** * ap_queue_enable_irq(): Enable interrupt support on this AP queue. - * @qid: The AP queue number + * @aq: The AP queue * @ind: the notification indicator byte * * Enables interruption on AP queue via ap_aqic(). Based on the return @@ -311,7 +311,7 @@ static enum ap_sm_wait ap_sm_read_write(struct ap_queue *aq) /** * ap_sm_reset(): Reset an AP queue. - * @qid: The AP queue number + * @aq: The AP queue * * Submit the Reset command to an AP queue. */ From f5711f9df9242446feccf2bdb6fdc06a72ca1010 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Sep 2021 14:41:35 +0200 Subject: [PATCH 227/543] s390: remove WARN_DYNAMIC_STACK s390 is the only architecture which allows to set the -mwarn-dynamicstack compile option. This however will also always generate a warning with system call stack randomization, which uses alloca to generate some random sized stack frame. On the other hand Linus just enabled "-Werror" by default with commit 3fe617ccafd6 ("Enable '-Werror' by default for all kernel builds"), which means compiles will always fail by default. So instead of playing once again whack-a-mole for something which is s390 specific, simply remove this option. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 10 ---------- arch/s390/Makefile | 7 ------- 2 files changed, 17 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2bd90c51efd3..b86de61b8caa 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -685,16 +685,6 @@ config STACK_GUARD The minimum size for the stack guard should be 256 for 31 bit and 512 for 64 bit. -config WARN_DYNAMIC_STACK - def_bool n - prompt "Emit compiler warnings for function with dynamic stack usage" - help - This option enables the compiler option -mwarn-dynamicstack. If the - compiler supports this options generates warnings for functions - that dynamically allocate stack space using alloca. - - Say N if you are unsure. - endmenu menu "I/O subsystem" diff --git a/arch/s390/Makefile b/arch/s390/Makefile index a3cf33ad009f..450b351dfa8e 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -85,13 +85,6 @@ cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD) endif endif -ifdef CONFIG_WARN_DYNAMIC_STACK - ifneq ($(call cc-option,-mwarn-dynamicstack),) - KBUILD_CFLAGS += -mwarn-dynamicstack - KBUILD_CFLAGS_DECOMPRESSOR += -mwarn-dynamicstack - endif -endif - ifdef CONFIG_EXPOLINE ifneq ($(call cc-option,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),) CC_FLAGS_EXPOLINE := -mindirect-branch=thunk From 3782326577d4b02f3d9940e1c96c3e9b31cf5309 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 15 Sep 2021 01:19:32 -0700 Subject: [PATCH 228/543] Revert "of: property: fw_devlink: Add support for "phy-handle" property" This reverts commit cf4b94c8530d14017fbddae26aad064ddc42edd4. Some PHYs pointed to by "phy-handle" will never bind to a driver until a consumer attaches to it. And when the consumer attaches to it, they get forcefully bound to a generic PHY driver. In such cases, parsing the phy-handle property and creating a device link will prevent the consumer from ever probing. We don't want that. So revert support for "phy-handle" property until we come up with a better mechanism for binding PHYs to generic drivers before a consumer tries to attach to it. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20210915081933.485112-1-saravanak@google.com Signed-off-by: Rob Herring --- drivers/of/property.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index 3fd74bb34819..a3483484a5a2 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1291,7 +1291,6 @@ DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells") DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells") DEFINE_SIMPLE_PROP(leds, "leds", NULL) DEFINE_SIMPLE_PROP(backlight, "backlight", NULL) -DEFINE_SIMPLE_PROP(phy_handle, "phy-handle", NULL) DEFINE_SUFFIX_PROP(regulators, "-supply", NULL) DEFINE_SUFFIX_PROP(gpio, "-gpio", "#gpio-cells") @@ -1380,7 +1379,6 @@ static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_resets, }, { .parse_prop = parse_leds, }, { .parse_prop = parse_backlight, }, - { .parse_prop = parse_phy_handle, }, { .parse_prop = parse_gpio_compat, }, { .parse_prop = parse_interrupts, }, { .parse_prop = parse_regulators, }, From cd65869512ab5668a5d16f789bc4da1319c435c4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 10 Sep 2021 11:19:14 -0600 Subject: [PATCH 229/543] io_uring: use iov_iter state save/restore helpers Get rid of the need to do re-expand and revert on an iterator when we encounter a short IO, or failure that warrants a retry. Use the new state save/restore helpers instead. We keep the iov_iter_state persistent across retries, if we need to restart the read or write operation. If there's a pending retry, the operation will always exit with the state correctly saved. Signed-off-by: Jens Axboe --- fs/io_uring.c | 82 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 855ea544807f..25bda8a5a4e5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -712,6 +712,7 @@ struct io_async_rw { struct iovec fast_iov[UIO_FASTIOV]; const struct iovec *free_iovec; struct iov_iter iter; + struct iov_iter_state iter_state; size_t bytes_done; struct wait_page_queue wpq; }; @@ -2608,8 +2609,7 @@ static bool io_resubmit_prep(struct io_kiocb *req) if (!rw) return !io_req_prep_async(req); - /* may have left rw->iter inconsistent on -EIOCBQUEUED */ - iov_iter_revert(&rw->iter, req->result - iov_iter_count(&rw->iter)); + iov_iter_restore(&rw->iter, &rw->iter_state); return true; } @@ -3310,12 +3310,17 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, if (!force && !io_op_defs[req->opcode].needs_async_setup) return 0; if (!req->async_data) { + struct io_async_rw *iorw; + if (io_alloc_async_data(req)) { kfree(iovec); return -ENOMEM; } io_req_map_rw(req, iovec, fast_iov, iter); + iorw = req->async_data; + /* we've copied and mapped the iter, ensure state is saved */ + iov_iter_save_state(&iorw->iter, &iorw->iter_state); } return 0; } @@ -3334,6 +3339,7 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw) iorw->free_iovec = iov; if (iov) req->flags |= REQ_F_NEED_CLEANUP; + iov_iter_save_state(&iorw->iter, &iorw->iter_state); return 0; } @@ -3437,19 +3443,28 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) struct kiocb *kiocb = &req->rw.kiocb; struct iov_iter __iter, *iter = &__iter; struct io_async_rw *rw = req->async_data; - ssize_t io_size, ret, ret2; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + struct iov_iter_state __state, *state; + ssize_t ret, ret2; if (rw) { iter = &rw->iter; + state = &rw->iter_state; + /* + * We come here from an earlier attempt, restore our state to + * match in case it doesn't. It's cheap enough that we don't + * need to make this conditional. + */ + iov_iter_restore(iter, state); iovec = NULL; } else { ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock); if (ret < 0) return ret; + state = &__state; + iov_iter_save_state(iter, state); } - io_size = iov_iter_count(iter); - req->result = io_size; + req->result = iov_iter_count(iter); /* Ensure we clear previously set non-block flag */ if (!force_nonblock) @@ -3463,7 +3478,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) return ret ?: -EAGAIN; } - ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size); + ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result); if (unlikely(ret)) { kfree(iovec); return ret; @@ -3479,30 +3494,49 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) /* no retry on NONBLOCK nor RWF_NOWAIT */ if (req->flags & REQ_F_NOWAIT) goto done; - /* some cases will consume bytes even on error returns */ - iov_iter_reexpand(iter, iter->count + iter->truncated); - iov_iter_revert(iter, io_size - iov_iter_count(iter)); ret = 0; } else if (ret == -EIOCBQUEUED) { goto out_free; - } else if (ret <= 0 || ret == io_size || !force_nonblock || + } else if (ret <= 0 || ret == req->result || !force_nonblock || (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) { /* read all, failed, already did sync or don't want to retry */ goto done; } + /* + * Don't depend on the iter state matching what was consumed, or being + * untouched in case of error. Restore it and we'll advance it + * manually if we need to. + */ + iov_iter_restore(iter, state); + ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true); if (ret2) return ret2; iovec = NULL; rw = req->async_data; - /* now use our persistent iterator, if we aren't already */ - iter = &rw->iter; + /* + * Now use our persistent iterator and state, if we aren't already. + * We've restored and mapped the iter to match. + */ + if (iter != &rw->iter) { + iter = &rw->iter; + state = &rw->iter_state; + } do { - io_size -= ret; + /* + * We end up here because of a partial read, either from + * above or inside this loop. Advance the iter by the bytes + * that were consumed. + */ + iov_iter_advance(iter, ret); + if (!iov_iter_count(iter)) + break; rw->bytes_done += ret; + iov_iter_save_state(iter, state); + /* if we can retry, do so with the callbacks armed */ if (!io_rw_should_retry(req)) { kiocb->ki_flags &= ~IOCB_WAITQ; @@ -3520,7 +3554,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) return 0; /* we got some bytes, but not all. retry. */ kiocb->ki_flags &= ~IOCB_WAITQ; - } while (ret > 0 && ret < io_size); + iov_iter_restore(iter, state); + } while (ret > 0); done: kiocb_done(kiocb, ret, issue_flags); out_free: @@ -3543,19 +3578,24 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) struct kiocb *kiocb = &req->rw.kiocb; struct iov_iter __iter, *iter = &__iter; struct io_async_rw *rw = req->async_data; - ssize_t ret, ret2, io_size; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + struct iov_iter_state __state, *state; + ssize_t ret, ret2; if (rw) { iter = &rw->iter; + state = &rw->iter_state; + iov_iter_restore(iter, state); iovec = NULL; } else { ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock); if (ret < 0) return ret; + state = &__state; + iov_iter_save_state(iter, state); } - io_size = iov_iter_count(iter); - req->result = io_size; + req->result = iov_iter_count(iter); + ret2 = 0; /* Ensure we clear previously set non-block flag */ if (!force_nonblock) @@ -3572,7 +3612,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) (req->flags & REQ_F_ISREG)) goto copy_iov; - ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), io_size); + ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result); if (unlikely(ret)) goto out_free; @@ -3619,9 +3659,9 @@ done: kiocb_done(kiocb, ret2, issue_flags); } else { copy_iov: - /* some cases will consume bytes even on error returns */ - iov_iter_reexpand(iter, iter->count + iter->truncated); - iov_iter_revert(iter, io_size - iov_iter_count(iter)); + iov_iter_restore(iter, state); + if (ret2 > 0) + iov_iter_advance(iter, ret2); ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); return ret ?: -EAGAIN; } From 7dedd3e18077f996a10c47250ac85d080e5f474e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 10 Sep 2021 11:19:58 -0600 Subject: [PATCH 230/543] Revert "iov_iter: track truncated size" This reverts commit 2112ff5ce0c1128fe7b4d19cfe7f2b8ce5b595fa. We no longer need to track the truncation count, the one user that did need it has been converted to using iov_iter_restore() instead. Signed-off-by: Jens Axboe --- include/linux/uio.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index 984c4ab74859..207101a9c5c3 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -53,7 +53,6 @@ struct iov_iter { }; loff_t xarray_start; }; - size_t truncated; }; static inline enum iter_type iov_iter_type(const struct iov_iter *i) @@ -270,10 +269,8 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count) * conversion in assignement is by definition greater than all * values of size_t, including old i->count. */ - if (i->count > count) { - i->truncated += i->count - count; + if (i->count > count) i->count = count; - } } /* @@ -282,7 +279,6 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count) */ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) { - i->truncated -= count - i->count; i->count = count; } From b66ceaf324b394428bb47054140ddf03d8172e64 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 15 Sep 2021 11:00:05 +0100 Subject: [PATCH 231/543] io_uring: move iopoll reissue into regular IO path 230d50d448acb ("io_uring: move reissue into regular IO path") made non-IOPOLL I/O to not retry from ki_complete handler. Follow it steps and do the same for IOPOLL. Same problems, same implementation, same -EAGAIN assumptions. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/f80dfee2d5fa7678f0052a8ab3cfca9496a112ca.1631699928.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 25bda8a5a4e5..81b0c2723c72 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -736,7 +736,6 @@ enum { REQ_F_BUFFER_SELECTED_BIT, REQ_F_COMPLETE_INLINE_BIT, REQ_F_REISSUE_BIT, - REQ_F_DONT_REISSUE_BIT, REQ_F_CREDS_BIT, REQ_F_REFCOUNT_BIT, REQ_F_ARM_LTIMEOUT_BIT, @@ -783,8 +782,6 @@ enum { REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT), /* caller should reissue async */ REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), - /* don't attempt request reissue, see io_rw_reissue() */ - REQ_F_DONT_REISSUE = BIT(REQ_F_DONT_REISSUE_BIT), /* supports async reads */ REQ_F_NOWAIT_READ = BIT(REQ_F_NOWAIT_READ_BIT), /* supports async writes */ @@ -2440,13 +2437,6 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, req = list_first_entry(done, struct io_kiocb, inflight_entry); list_del(&req->inflight_entry); - if (READ_ONCE(req->result) == -EAGAIN && - !(req->flags & REQ_F_DONT_REISSUE)) { - req->iopoll_completed = 0; - io_req_task_queue_reissue(req); - continue; - } - __io_cqring_fill_event(ctx, req->user_data, req->result, io_put_rw_kbuf(req)); (*nr_events)++; @@ -2709,10 +2699,9 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) if (kiocb->ki_flags & IOCB_WRITE) kiocb_end_write(req); if (unlikely(res != req->result)) { - if (!(res == -EAGAIN && io_rw_should_reissue(req) && - io_resubmit_prep(req))) { - req_set_fail(req); - req->flags |= REQ_F_DONT_REISSUE; + if (res == -EAGAIN && io_rw_should_reissue(req)) { + req->flags |= REQ_F_REISSUE; + return; } } @@ -2926,7 +2915,6 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); struct io_async_rw *io = req->async_data; - bool check_reissue = kiocb->ki_complete == io_complete_rw; /* add previously done IO, if any */ if (io && io->bytes_done > 0) { @@ -2938,19 +2926,27 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, if (req->flags & REQ_F_CUR_POS) req->file->f_pos = kiocb->ki_pos; - if (ret >= 0 && check_reissue) + if (ret >= 0 && (kiocb->ki_complete == io_complete_rw)) __io_complete_rw(req, ret, 0, issue_flags); else io_rw_done(kiocb, ret); - if (check_reissue && (req->flags & REQ_F_REISSUE)) { + if (req->flags & REQ_F_REISSUE) { req->flags &= ~REQ_F_REISSUE; if (io_resubmit_prep(req)) { io_req_task_queue_reissue(req); } else { + unsigned int cflags = io_put_rw_kbuf(req); + struct io_ring_ctx *ctx = req->ctx; + req_set_fail(req); - __io_req_complete(req, issue_flags, ret, - io_put_rw_kbuf(req)); + if (issue_flags & IO_URING_F_NONBLOCK) { + mutex_lock(&ctx->uring_lock); + __io_req_complete(req, issue_flags, ret, cflags); + mutex_unlock(&ctx->uring_lock); + } else { + __io_req_complete(req, issue_flags, ret, cflags); + } } } } From b89a05b21f46150ac10a962aa50109250b56b03b Mon Sep 17 00:00:00 2001 From: Baptiste Lepers Date: Mon, 6 Sep 2021 11:53:10 +1000 Subject: [PATCH 232/543] events: Reuse value read using READ_ONCE instead of re-reading it In perf_event_addr_filters_apply, the task associated with the event (event->ctx->task) is read using READ_ONCE at the beginning of the function, checked, and then re-read from event->ctx->task, voiding all guarantees of the checks. Reuse the value that was read by READ_ONCE to ensure the consistency of the task struct throughout the function. Fixes: 375637bc52495 ("perf/core: Introduce address range filtering") Signed-off-by: Baptiste Lepers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210906015310.12802-1-baptiste.lepers@gmail.com --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 744e8726c5b2..0c000cb01eeb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10193,7 +10193,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event) return; if (ifh->nr_file_filters) { - mm = get_task_mm(event->ctx->task); + mm = get_task_mm(task); if (!mm) goto restart; From 7687201e37fabf2b7cf2b828f7ca46bf30e2948f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Sep 2021 12:59:17 +0200 Subject: [PATCH 233/543] locking/rwbase: Properly match set_and_save_state() to restore_state() Noticed while looking at the readers race. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Will Deacon Link: https://lkml.kernel.org/r/20210909110203.828203010@infradead.org --- kernel/locking/rwbase_rt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 4ba15088e640..542b0170e4f5 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -220,7 +220,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, for (; atomic_read(&rwb->readers);) { /* Optimized out for rwlocks */ if (rwbase_signal_pending_state(state, current)) { - __set_current_state(TASK_RUNNING); + rwbase_restore_current_state(); __rwbase_write_unlock(rwb, 0, flags); return -EINTR; } From 616be87eac9fa2ab2dca1069712f7236e50f3bf6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Sep 2021 12:59:18 +0200 Subject: [PATCH 234/543] locking/rwbase: Extract __rwbase_write_trylock() The code in rwbase_write_lock() is a little non-obvious vs the read+set 'trylock', extract the sequence into a helper function to clarify the code. This also provides a single site to fix fast-path ordering. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Link: https://lkml.kernel.org/r/YUCq3L+u44NDieEJ@hirez.programming.kicks-ass.net --- kernel/locking/rwbase_rt.c | 46 ++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 542b0170e4f5..48fbbccdce3f 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -196,6 +196,19 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb) __rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags); } +static inline bool __rwbase_write_trylock(struct rwbase_rt *rwb) +{ + /* Can do without CAS because we're serialized by wait_lock. */ + lockdep_assert_held(&rwb->rtmutex.wait_lock); + + if (!atomic_read(&rwb->readers)) { + atomic_set(&rwb->readers, WRITER_BIAS); + return 1; + } + + return 0; +} + static int __sched rwbase_write_lock(struct rwbase_rt *rwb, unsigned int state) { @@ -210,34 +223,30 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, atomic_sub(READER_BIAS, &rwb->readers); raw_spin_lock_irqsave(&rtm->wait_lock, flags); - /* - * set_current_state() for rw_semaphore - * current_save_and_set_rtlock_wait_state() for rwlock - */ - rwbase_set_and_save_current_state(state); + if (__rwbase_write_trylock(rwb)) + goto out_unlock; - /* Block until all readers have left the critical section. */ - for (; atomic_read(&rwb->readers);) { + rwbase_set_and_save_current_state(state); + for (;;) { /* Optimized out for rwlocks */ if (rwbase_signal_pending_state(state, current)) { rwbase_restore_current_state(); __rwbase_write_unlock(rwb, 0, flags); return -EINTR; } + + if (__rwbase_write_trylock(rwb)) + break; + raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); - - /* - * Schedule and wait for the readers to leave the critical - * section. The last reader leaving it wakes the waiter. - */ - if (atomic_read(&rwb->readers) != 0) - rwbase_schedule(); - set_current_state(state); + rwbase_schedule(); raw_spin_lock_irqsave(&rtm->wait_lock, flags); - } - atomic_set(&rwb->readers, WRITER_BIAS); + set_current_state(state); + } rwbase_restore_current_state(); + +out_unlock: raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); return 0; } @@ -253,8 +262,7 @@ static inline int rwbase_write_trylock(struct rwbase_rt *rwb) atomic_sub(READER_BIAS, &rwb->readers); raw_spin_lock_irqsave(&rtm->wait_lock, flags); - if (!atomic_read(&rwb->readers)) { - atomic_set(&rwb->readers, WRITER_BIAS); + if (__rwbase_write_trylock(rwb)) { raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); return 1; } From 81121524f1c798c9481bd7900450b72ee7ac2eef Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Thu, 9 Sep 2021 12:59:19 +0200 Subject: [PATCH 235/543] locking/rwbase: Take care of ordering guarantee for fastpath reader Readers of rwbase can lock and unlock without taking any inner lock, if that happens, we need the ordering provided by atomic operations to satisfy the ordering semantics of lock/unlock. Without that, considering the follow case: { X = 0 initially } CPU 0 CPU 1 ===== ===== rt_write_lock(); X = 1 rt_write_unlock(): atomic_add(READER_BIAS - WRITER_BIAS, ->readers); // ->readers is READER_BIAS. rt_read_lock(): if ((r = atomic_read(->readers)) < 0) // True atomic_try_cmpxchg(->readers, r, r + 1); // succeed. r1 = X; // r1 may be 0, because nothing prevent the reordering // of "X=1" and atomic_add() on CPU 1. Therefore audit every usage of atomic operations that may happen in a fast path, and add necessary barriers. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20210909110203.953991276@infradead.org --- kernel/locking/rwbase_rt.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 48fbbccdce3f..88191f6e252c 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -41,6 +41,12 @@ * The risk of writer starvation is there, but the pathological use cases * which trigger it are not necessarily the typical RT workloads. * + * Fast-path orderings: + * The lock/unlock of readers can run in fast paths: lock and unlock are only + * atomic ops, and there is no inner lock to provide ACQUIRE and RELEASE + * semantics of rwbase_rt. Atomic ops should thus provide _acquire() + * and _release() (or stronger). + * * Common code shared between RT rw_semaphore and rwlock */ @@ -53,6 +59,7 @@ static __always_inline int rwbase_read_trylock(struct rwbase_rt *rwb) * set. */ for (r = atomic_read(&rwb->readers); r < 0;) { + /* Fully-ordered if cmpxchg() succeeds, provides ACQUIRE */ if (likely(atomic_try_cmpxchg(&rwb->readers, &r, r + 1))) return 1; } @@ -162,6 +169,8 @@ static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb, /* * rwb->readers can only hit 0 when a writer is waiting for the * active readers to leave the critical section. + * + * dec_and_test() is fully ordered, provides RELEASE. */ if (unlikely(atomic_dec_and_test(&rwb->readers))) __rwbase_read_unlock(rwb, state); @@ -172,7 +181,11 @@ static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias, { struct rt_mutex_base *rtm = &rwb->rtmutex; - atomic_add(READER_BIAS - bias, &rwb->readers); + /* + * _release() is needed in case that reader is in fast path, pairing + * with atomic_try_cmpxchg() in rwbase_read_trylock(), provides RELEASE + */ + (void)atomic_add_return_release(READER_BIAS - bias, &rwb->readers); raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); rwbase_rtmutex_unlock(rtm); } @@ -201,7 +214,11 @@ static inline bool __rwbase_write_trylock(struct rwbase_rt *rwb) /* Can do without CAS because we're serialized by wait_lock. */ lockdep_assert_held(&rwb->rtmutex.wait_lock); - if (!atomic_read(&rwb->readers)) { + /* + * _acquire is needed in case the reader is in the fast path, pairing + * with rwbase_read_unlock(), provides ACQUIRE. + */ + if (!atomic_read_acquire(&rwb->readers)) { atomic_set(&rwb->readers, WRITER_BIAS); return 1; } From 8914a7a247e065438a0ec86a58c1c359223d2c9e Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 15 Sep 2021 21:45:54 +0800 Subject: [PATCH 236/543] selftests: be sure to make khdr before other targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LKP/0Day reported some building errors about kvm, and errors message are not always same: - lib/x86_64/processor.c:1083:31: error: ‘KVM_CAP_NESTED_STATE’ undeclared (first use in this function); did you mean ‘KVM_CAP_PIT_STATE2’? - lib/test_util.c:189:30: error: ‘MAP_HUGE_16KB’ undeclared (first use in this function); did you mean ‘MAP_HUGE_16GB’? Although kvm relies on the khdr, they still be built in parallel when -j is specified. In this case, it will cause compiling errors. Here we mark target khdr as NOTPARALLEL to make it be always built first. CC: Philip Li Reported-by: kernel test robot Signed-off-by: Li Zhijian Signed-off-by: Shuah Khan --- tools/testing/selftests/lib.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index fa2ac0e56b43..fe7ee2b0f29c 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -48,6 +48,7 @@ ARCH ?= $(SUBARCH) # When local build is done, headers are installed in the default # INSTALL_HDR_PATH usr/include. .PHONY: khdr +.NOTPARALLEL: khdr: ifndef KSFT_KHDR_INSTALL_DONE ifeq (1,$(DEFAULT_INSTALL_HDR_PATH)) From 80be5998ad6339e3e804a772723390cb50b96428 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 15 Sep 2021 09:53:38 +0900 Subject: [PATCH 237/543] tools/bootconfig: Define memblock_free_ptr() to fix build error The lib/bootconfig.c file is shared with the 'bootconfig' tooling, and as a result, the changes incommit 77e02cf57b6c ("memblock: introduce saner 'memblock_free_ptr()' interface") need to also be reflected in the tooling header file. So define the new memblock_free_ptr() wrapper, and remove unused __pa() and memblock_free(). Fixes: 77e02cf57b6c ("memblock: introduce saner 'memblock_free_ptr()' interface") Signed-off-by: Masami Hiramatsu Signed-off-by: Linus Torvalds --- tools/bootconfig/include/linux/memblock.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/bootconfig/include/linux/memblock.h b/tools/bootconfig/include/linux/memblock.h index 7862f217d85d..f2e506f7d57f 100644 --- a/tools/bootconfig/include/linux/memblock.h +++ b/tools/bootconfig/include/linux/memblock.h @@ -4,9 +4,8 @@ #include -#define __pa(addr) (addr) #define SMP_CACHE_BYTES 0 #define memblock_alloc(size, align) malloc(size) -#define memblock_free(paddr, size) free(paddr) +#define memblock_free_ptr(paddr, size) free(paddr) #endif From 6f5ddde41069fcd1f0993ec76c9dbbf9d021fd4d Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Wed, 15 Sep 2021 15:24:26 +0800 Subject: [PATCH 238/543] blkcg: fix memory leak in blk_iolatency_init BUG: memory leak unreferenced object 0xffff888129acdb80 (size 96): comm "syz-executor.1", pid 12661, jiffies 4294962682 (age 15.220s) hex dump (first 32 bytes): 20 47 c9 85 ff ff ff ff 20 d4 8e 29 81 88 ff ff G...... ..).... 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmalloc include/linux/slab.h:591 [inline] [] kzalloc include/linux/slab.h:721 [inline] [] blk_iolatency_init+0x28/0x190 block/blk-iolatency.c:724 [] blkcg_init_queue+0xb4/0x1c0 block/blk-cgroup.c:1185 [] blk_alloc_queue+0x22a/0x2e0 block/blk-core.c:566 [] blk_mq_init_queue_data block/blk-mq.c:3100 [inline] [] __blk_mq_alloc_disk+0x25/0xd0 block/blk-mq.c:3124 [] loop_add+0x1c3/0x360 drivers/block/loop.c:2344 [] loop_control_get_free drivers/block/loop.c:2501 [inline] [] loop_control_ioctl+0x17e/0x2e0 drivers/block/loop.c:2516 [] vfs_ioctl fs/ioctl.c:51 [inline] [] __do_sys_ioctl fs/ioctl.c:874 [inline] [] __se_sys_ioctl fs/ioctl.c:860 [inline] [] __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:860 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x44/0xae Once blk_throtl_init() queue init failed, blkcg_iolatency_exit() will not be invoked for cleanup. That leads a memory leak. Swap the blk_throtl_init() and blk_iolatency_init() calls can solve this. Reported-by: syzbot+01321b15cc98e6bf96d6@syzkaller.appspotmail.com Fixes: 19688d7f9592 (block/blk-cgroup: Swap the blk_throtl_init() and blk_iolatency_init() calls) Signed-off-by: Yanfei Xu Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210915072426.4022924-1-yanfei.xu@windriver.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 3c88a79a319b..1ded4181a6de 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1182,10 +1182,6 @@ int blkcg_init_queue(struct request_queue *q) if (preloaded) radix_tree_preload_end(); - ret = blk_iolatency_init(q); - if (ret) - goto err_destroy_all; - ret = blk_ioprio_init(q); if (ret) goto err_destroy_all; @@ -1194,6 +1190,12 @@ int blkcg_init_queue(struct request_queue *q) if (ret) goto err_destroy_all; + ret = blk_iolatency_init(q); + if (ret) { + blk_throtl_exit(q); + goto err_destroy_all; + } + return 0; err_destroy_all: From 858560b27645e7e97aca37ee8f232cccd658fbd2 Mon Sep 17 00:00:00 2001 From: Li Jinlin Date: Tue, 14 Sep 2021 12:26:05 +0800 Subject: [PATCH 239/543] blk-cgroup: fix UAF by grabbing blkcg lock before destroying blkg pd KASAN reports a use-after-free report when doing fuzz test: [693354.104835] ================================================================== [693354.105094] BUG: KASAN: use-after-free in bfq_io_set_weight_legacy+0xd3/0x160 [693354.105336] Read of size 4 at addr ffff888be0a35664 by task sh/1453338 [693354.105607] CPU: 41 PID: 1453338 Comm: sh Kdump: loaded Not tainted 4.18.0-147 [693354.105610] Hardware name: Huawei 2288H V5/BC11SPSCB0, BIOS 0.81 07/02/2018 [693354.105612] Call Trace: [693354.105621] dump_stack+0xf1/0x19b [693354.105626] ? show_regs_print_info+0x5/0x5 [693354.105634] ? printk+0x9c/0xc3 [693354.105638] ? cpumask_weight+0x1f/0x1f [693354.105648] print_address_description+0x70/0x360 [693354.105654] kasan_report+0x1b2/0x330 [693354.105659] ? bfq_io_set_weight_legacy+0xd3/0x160 [693354.105665] ? bfq_io_set_weight_legacy+0xd3/0x160 [693354.105670] bfq_io_set_weight_legacy+0xd3/0x160 [693354.105675] ? bfq_cpd_init+0x20/0x20 [693354.105683] cgroup_file_write+0x3aa/0x510 [693354.105693] ? ___slab_alloc+0x507/0x540 [693354.105698] ? cgroup_file_poll+0x60/0x60 [693354.105702] ? 0xffffffff89600000 [693354.105708] ? usercopy_abort+0x90/0x90 [693354.105716] ? mutex_lock+0xef/0x180 [693354.105726] kernfs_fop_write+0x1ab/0x280 [693354.105732] ? cgroup_file_poll+0x60/0x60 [693354.105738] vfs_write+0xe7/0x230 [693354.105744] ksys_write+0xb0/0x140 [693354.105749] ? __ia32_sys_read+0x50/0x50 [693354.105760] do_syscall_64+0x112/0x370 [693354.105766] ? syscall_return_slowpath+0x260/0x260 [693354.105772] ? do_page_fault+0x9b/0x270 [693354.105779] ? prepare_exit_to_usermode+0xf9/0x1a0 [693354.105784] ? enter_from_user_mode+0x30/0x30 [693354.105793] entry_SYSCALL_64_after_hwframe+0x65/0xca [693354.105875] Allocated by task 1453337: [693354.106001] kasan_kmalloc+0xa0/0xd0 [693354.106006] kmem_cache_alloc_node_trace+0x108/0x220 [693354.106010] bfq_pd_alloc+0x96/0x120 [693354.106015] blkcg_activate_policy+0x1b7/0x2b0 [693354.106020] bfq_create_group_hierarchy+0x1e/0x80 [693354.106026] bfq_init_queue+0x678/0x8c0 [693354.106031] blk_mq_init_sched+0x1f8/0x460 [693354.106037] elevator_switch_mq+0xe1/0x240 [693354.106041] elevator_switch+0x25/0x40 [693354.106045] elv_iosched_store+0x1a1/0x230 [693354.106049] queue_attr_store+0x78/0xb0 [693354.106053] kernfs_fop_write+0x1ab/0x280 [693354.106056] vfs_write+0xe7/0x230 [693354.106060] ksys_write+0xb0/0x140 [693354.106064] do_syscall_64+0x112/0x370 [693354.106069] entry_SYSCALL_64_after_hwframe+0x65/0xca [693354.106114] Freed by task 1453336: [693354.106225] __kasan_slab_free+0x130/0x180 [693354.106229] kfree+0x90/0x1b0 [693354.106233] blkcg_deactivate_policy+0x12c/0x220 [693354.106238] bfq_exit_queue+0xf5/0x110 [693354.106241] blk_mq_exit_sched+0x104/0x130 [693354.106245] __elevator_exit+0x45/0x60 [693354.106249] elevator_switch_mq+0xd6/0x240 [693354.106253] elevator_switch+0x25/0x40 [693354.106257] elv_iosched_store+0x1a1/0x230 [693354.106261] queue_attr_store+0x78/0xb0 [693354.106264] kernfs_fop_write+0x1ab/0x280 [693354.106268] vfs_write+0xe7/0x230 [693354.106271] ksys_write+0xb0/0x140 [693354.106275] do_syscall_64+0x112/0x370 [693354.106280] entry_SYSCALL_64_after_hwframe+0x65/0xca [693354.106329] The buggy address belongs to the object at ffff888be0a35580 which belongs to the cache kmalloc-1k of size 1024 [693354.106736] The buggy address is located 228 bytes inside of 1024-byte region [ffff888be0a35580, ffff888be0a35980) [693354.107114] The buggy address belongs to the page: [693354.107273] page:ffffea002f828c00 count:1 mapcount:0 mapping:ffff888107c17080 index:0x0 compound_mapcount: 0 [693354.107606] flags: 0x17ffffc0008100(slab|head) [693354.107760] raw: 0017ffffc0008100 ffffea002fcbc808 ffffea0030bd3a08 ffff888107c17080 [693354.108020] raw: 0000000000000000 00000000001c001c 00000001ffffffff 0000000000000000 [693354.108278] page dumped because: kasan: bad access detected [693354.108511] Memory state around the buggy address: [693354.108671] ffff888be0a35500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [693354.116396] ffff888be0a35580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [693354.124473] >ffff888be0a35600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [693354.132421] ^ [693354.140284] ffff888be0a35680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [693354.147912] ffff888be0a35700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [693354.155281] ================================================================== blkgs are protected by both queue and blkcg locks and holding either should stabilize them. However, the path of destroying blkg policy data is only protected by queue lock in blkcg_activate_policy()/blkcg_deactivate_policy(). Other tasks can get the blkg policy data before the blkg policy data is destroyed, and use it after destroyed, which will result in a use-after-free. CPU0 CPU1 blkcg_deactivate_policy spin_lock_irq(&q->queue_lock) bfq_io_set_weight_legacy spin_lock_irq(&blkcg->lock) blkg_to_bfqg(blkg) pd_to_bfqg(blkg->pd[pol->plid]) ^^^^^^blkg->pd[pol->plid] != NULL bfqg != NULL pol->pd_free_fn(blkg->pd[pol->plid]) pd_to_bfqg(blkg->pd[pol->plid]) bfqg_put(bfqg) kfree(bfqg) blkg->pd[pol->plid] = NULL spin_unlock_irq(q->queue_lock); bfq_group_set_weight(bfqg, val, 0) bfqg->entity.new_weight ^^^^^^trigger uaf here spin_unlock_irq(&blkcg->lock); Fix by grabbing the matching blkcg lock before trying to destroy blkg policy data. Suggested-by: Tejun Heo Signed-off-by: Li Jinlin Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210914042605.3260596-1-lijinlin3@huawei.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 1ded4181a6de..38b9f7684952 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1366,10 +1366,14 @@ enomem: /* alloc failed, nothing's initialized yet, free everything */ spin_lock_irq(&q->queue_lock); list_for_each_entry(blkg, &q->blkg_list, q_node) { + struct blkcg *blkcg = blkg->blkcg; + + spin_lock(&blkcg->lock); if (blkg->pd[pol->plid]) { pol->pd_free_fn(blkg->pd[pol->plid]); blkg->pd[pol->plid] = NULL; } + spin_unlock(&blkcg->lock); } spin_unlock_irq(&q->queue_lock); ret = -ENOMEM; @@ -1401,12 +1405,16 @@ void blkcg_deactivate_policy(struct request_queue *q, __clear_bit(pol->plid, q->blkcg_pols); list_for_each_entry(blkg, &q->blkg_list, q_node) { + struct blkcg *blkcg = blkg->blkcg; + + spin_lock(&blkcg->lock); if (blkg->pd[pol->plid]) { if (pol->pd_offline_fn) pol->pd_offline_fn(blkg->pd[pol->plid]); pol->pd_free_fn(blkg->pd[pol->plid]); blkg->pd[pol->plid] = NULL; } + spin_unlock(&blkcg->lock); } spin_unlock_irq(&q->queue_lock); From f6b5f1a56987de837f8e25cd560847106b8632a8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 14 Sep 2021 20:52:24 -0700 Subject: [PATCH 240/543] compiler.h: Introduce absolute_pointer macro absolute_pointer() disassociates a pointer from its originating symbol type and context. Use it to prevent compiler warnings/errors such as drivers/net/ethernet/i825xx/82596.c: In function 'i82596_probe': arch/m68k/include/asm/string.h:72:25: error: '__builtin_memcpy' reading 6 bytes from a region of size 0 [-Werror=stringop-overread] Such warnings may be reported by gcc 11.x for string and memory operations on fixed addresses. Suggested-by: Linus Torvalds Signed-off-by: Guenter Roeck Reviewed-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index b67261a1e3e9..3d5af56337bd 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -188,6 +188,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, (typeof(ptr)) (__ptr + (off)); }) #endif +#define absolute_pointer(val) RELOC_HIDE((void *)(val), 0) + #ifndef OPTIMIZER_HIDE_VAR /* Make the optimizer believe the variable can be manipulated arbitrarily. */ #define OPTIMIZER_HIDE_VAR(var) \ From dff2d13114f0beec448da9b3716204eb34b0cf41 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 14 Sep 2021 20:52:25 -0700 Subject: [PATCH 241/543] net: i825xx: Use absolute_pointer for memcpy from fixed memory location gcc 11.x reports the following compiler warning/error. drivers/net/ethernet/i825xx/82596.c: In function 'i82596_probe': arch/m68k/include/asm/string.h:72:25: error: '__builtin_memcpy' reading 6 bytes from a region of size 0 [-Werror=stringop-overread] Use absolute_pointer() to work around the problem. Cc: Geert Uytterhoeven Signed-off-by: Guenter Roeck Reviewed-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- drivers/net/ethernet/i825xx/82596.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index b8a40146b895..b482f6f633bd 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -1144,7 +1144,7 @@ static struct net_device * __init i82596_probe(void) err = -ENODEV; goto out; } - memcpy(eth_addr, (void *) 0xfffc1f2c, ETH_ALEN); /* YUCK! Get addr from NOVRAM */ + memcpy(eth_addr, absolute_pointer(0xfffc1f2c), ETH_ALEN); /* YUCK! Get addr from NOVRAM */ dev->base_addr = MVME_I596_BASE; dev->irq = (unsigned) MVME16x_IRQ_I596; goto found; From 3cb8b1537f8a89a681d2548ded5526280846f6db Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 14 Sep 2021 20:52:26 -0700 Subject: [PATCH 242/543] alpha: Move setup.h out of uapi Most of the contents of setup.h have no value for userspace applications. The file was probably moved to uapi accidentally. Keep the file in uapi to define the alpha-specific COMMAND_LINE_SIZE. Move all other defines to arch/alpha/include/asm/setup.h. Suggested-by: Linus Torvalds Signed-off-by: Guenter Roeck Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/setup.h | 43 +++++++++++++++++++++++++++++ arch/alpha/include/uapi/asm/setup.h | 42 ++-------------------------- 2 files changed, 46 insertions(+), 39 deletions(-) create mode 100644 arch/alpha/include/asm/setup.h diff --git a/arch/alpha/include/asm/setup.h b/arch/alpha/include/asm/setup.h new file mode 100644 index 000000000000..58fe3f45a235 --- /dev/null +++ b/arch/alpha/include/asm/setup.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ALPHA_SETUP_H +#define __ALPHA_SETUP_H + +#include + +/* + * We leave one page for the initial stack page, and one page for + * the initial process structure. Also, the console eats 3 MB for + * the initial bootloader (one of which we can reclaim later). + */ +#define BOOT_PCB 0x20000000 +#define BOOT_ADDR 0x20000000 +/* Remove when official MILO sources have ELF support: */ +#define BOOT_SIZE (16*1024) + +#ifdef CONFIG_ALPHA_LEGACY_START_ADDRESS +#define KERNEL_START_PHYS 0x300000 /* Old bootloaders hardcoded this. */ +#else +#define KERNEL_START_PHYS 0x1000000 /* required: Wildfire/Titan/Marvel */ +#endif + +#define KERNEL_START (PAGE_OFFSET+KERNEL_START_PHYS) +#define SWAPPER_PGD KERNEL_START +#define INIT_STACK (PAGE_OFFSET+KERNEL_START_PHYS+0x02000) +#define EMPTY_PGT (PAGE_OFFSET+KERNEL_START_PHYS+0x04000) +#define EMPTY_PGE (PAGE_OFFSET+KERNEL_START_PHYS+0x08000) +#define ZERO_PGE (PAGE_OFFSET+KERNEL_START_PHYS+0x0A000) + +#define START_ADDR (PAGE_OFFSET+KERNEL_START_PHYS+0x10000) + +/* + * This is setup by the secondary bootstrap loader. Because + * the zero page is zeroed out as soon as the vm system is + * initialized, we need to copy things out into a more permanent + * place. + */ +#define PARAM ZERO_PGE +#define COMMAND_LINE ((char *)(PARAM + 0x0000)) +#define INITRD_START (*(unsigned long *) (PARAM+0x100)) +#define INITRD_SIZE (*(unsigned long *) (PARAM+0x108)) + +#endif diff --git a/arch/alpha/include/uapi/asm/setup.h b/arch/alpha/include/uapi/asm/setup.h index 13b7ee465b0e..f881ea5947cb 100644 --- a/arch/alpha/include/uapi/asm/setup.h +++ b/arch/alpha/include/uapi/asm/setup.h @@ -1,43 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __ALPHA_SETUP_H -#define __ALPHA_SETUP_H +#ifndef _UAPI__ALPHA_SETUP_H +#define _UAPI__ALPHA_SETUP_H #define COMMAND_LINE_SIZE 256 -/* - * We leave one page for the initial stack page, and one page for - * the initial process structure. Also, the console eats 3 MB for - * the initial bootloader (one of which we can reclaim later). - */ -#define BOOT_PCB 0x20000000 -#define BOOT_ADDR 0x20000000 -/* Remove when official MILO sources have ELF support: */ -#define BOOT_SIZE (16*1024) - -#ifdef CONFIG_ALPHA_LEGACY_START_ADDRESS -#define KERNEL_START_PHYS 0x300000 /* Old bootloaders hardcoded this. */ -#else -#define KERNEL_START_PHYS 0x1000000 /* required: Wildfire/Titan/Marvel */ -#endif - -#define KERNEL_START (PAGE_OFFSET+KERNEL_START_PHYS) -#define SWAPPER_PGD KERNEL_START -#define INIT_STACK (PAGE_OFFSET+KERNEL_START_PHYS+0x02000) -#define EMPTY_PGT (PAGE_OFFSET+KERNEL_START_PHYS+0x04000) -#define EMPTY_PGE (PAGE_OFFSET+KERNEL_START_PHYS+0x08000) -#define ZERO_PGE (PAGE_OFFSET+KERNEL_START_PHYS+0x0A000) - -#define START_ADDR (PAGE_OFFSET+KERNEL_START_PHYS+0x10000) - -/* - * This is setup by the secondary bootstrap loader. Because - * the zero page is zeroed out as soon as the vm system is - * initialized, we need to copy things out into a more permanent - * place. - */ -#define PARAM ZERO_PGE -#define COMMAND_LINE ((char*)(PARAM + 0x0000)) -#define INITRD_START (*(unsigned long *) (PARAM+0x100)) -#define INITRD_SIZE (*(unsigned long *) (PARAM+0x108)) - -#endif +#endif /* _UAPI__ALPHA_SETUP_H */ From ebdc20d7bc74e8b6a242ff1f30e9017ffca9092c Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 14 Sep 2021 20:52:27 -0700 Subject: [PATCH 243/543] alpha: Use absolute_pointer to define COMMAND_LINE alpha:allmodconfig fails to build with the following error when using gcc 11.x. arch/alpha/kernel/setup.c: In function 'setup_arch': arch/alpha/kernel/setup.c:493:13: error: 'strcmp' reading 1 or more bytes from a region of size 0 Avoid the problem by declaring COMMAND_LINE as absolute_pointer(). Signed-off-by: Guenter Roeck Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/setup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/include/asm/setup.h b/arch/alpha/include/asm/setup.h index 58fe3f45a235..262aab99e391 100644 --- a/arch/alpha/include/asm/setup.h +++ b/arch/alpha/include/asm/setup.h @@ -36,7 +36,7 @@ * place. */ #define PARAM ZERO_PGE -#define COMMAND_LINE ((char *)(PARAM + 0x0000)) +#define COMMAND_LINE ((char *)(absolute_pointer(PARAM + 0x0000))) #define INITRD_START (*(unsigned long *) (PARAM+0x100)) #define INITRD_SIZE (*(unsigned long *) (PARAM+0x108)) From fc7c028dcdbfe981bca75d2a7b95f363eb691ef3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 6 Sep 2021 16:06:04 -0700 Subject: [PATCH 244/543] sparc: avoid stringop-overread errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sparc mdesc code does pointer games with 'struct mdesc_hdr', but didn't describe to the compiler how that header is then followed by the data that the header describes. As a result, gcc is now unhappy since it does stricter pointer range tracking, and doesn't understand about how these things work. This results in various errors like: arch/sparc/kernel/mdesc.c: In function ‘mdesc_node_by_name’: arch/sparc/kernel/mdesc.c:647:22: error: ‘strcmp’ reading 1 or more bytes from a region of size 0 [-Werror=stringop-overread] 647 | if (!strcmp(names + ep[ret].name_offset, name)) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which are easily avoided by just describing 'struct mdesc_hdr' better, and making the node_block() helper function look into that unsized data[] that follows the header. This makes the sparc64 build happy again at least for my cross-compiler version (gcc version 11.2.1). Link: https://lore.kernel.org/lkml/CAHk-=wi4NW3NC0xWykkw=6LnjQD6D_rtRtxY9g8gQAJXtQMi8A@mail.gmail.com/ Cc: Guenter Roeck Cc: David S. Miller Signed-off-by: Linus Torvalds --- arch/sparc/kernel/mdesc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 8e645ddac58e..30f171b7b00c 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -39,6 +39,7 @@ struct mdesc_hdr { u32 node_sz; /* node block size */ u32 name_sz; /* name block size */ u32 data_sz; /* data block size */ + char data[]; } __attribute__((aligned(16))); struct mdesc_elem { @@ -612,7 +613,7 @@ EXPORT_SYMBOL(mdesc_get_node_info); static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc) { - return (struct mdesc_elem *) (mdesc + 1); + return (struct mdesc_elem *) mdesc->data; } static void *name_block(struct mdesc_hdr *mdesc) From b7213ffa0e585feb1aee3e7173e965e66ee0abaa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 15 Sep 2021 13:56:37 -0700 Subject: [PATCH 245/543] qnx4: avoid stringop-overread errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The qnx4 directory entries are 64-byte blocks that have different contents depending on the a status byte that is in the last byte of the block. In particular, a directory entry can be either a "link info" entry with a 48-byte name and pointers to the real inode information, or an "inode entry" with a smaller 16-byte name and the full inode information. But the code was written to always just treat the directory name as if it was part of that "inode entry", and just extend the name to the longer case if the status byte said it was a link entry. That work just fine and gives the right results, but now that gcc is tracking data structure accesses much more, the code can trigger a compiler error about using up to 48 bytes (the long name) in a structure that only has that shorter name in it: fs/qnx4/dir.c: In function ‘qnx4_readdir’: fs/qnx4/dir.c:51:32: error: ‘strnlen’ specified bound 48 exceeds source size 16 [-Werror=stringop-overread] 51 | size = strnlen(de->di_fname, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from fs/qnx4/qnx4.h:3, from fs/qnx4/dir.c:16: include/uapi/linux/qnx4_fs.h:45:25: note: source object declared here 45 | char di_fname[QNX4_SHORT_NAME_MAX]; | ^~~~~~~~ which is because the source code doesn't really make this whole "one of two different types" explicit. Fix this by introducing a very explicit union of the two types, and basically explaining to the compiler what is really going on. Signed-off-by: Linus Torvalds --- fs/qnx4/dir.c | 51 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index a6ee23aadd28..2a66844b7ff8 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -15,13 +15,27 @@ #include #include "qnx4.h" +/* + * A qnx4 directory entry is an inode entry or link info + * depending on the status field in the last byte. The + * first byte is where the name start either way, and a + * zero means it's empty. + */ +union qnx4_directory_entry { + struct { + char de_name; + char de_pad[62]; + char de_status; + }; + struct qnx4_inode_entry inode; + struct qnx4_link_info link; +}; + static int qnx4_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); unsigned int offset; struct buffer_head *bh; - struct qnx4_inode_entry *de; - struct qnx4_link_info *le; unsigned long blknum; int ix, ino; int size; @@ -38,27 +52,30 @@ static int qnx4_readdir(struct file *file, struct dir_context *ctx) } ix = (ctx->pos >> QNX4_DIR_ENTRY_SIZE_BITS) % QNX4_INODES_PER_BLOCK; for (; ix < QNX4_INODES_PER_BLOCK; ix++, ctx->pos += QNX4_DIR_ENTRY_SIZE) { + union qnx4_directory_entry *de; + const char *name; + offset = ix * QNX4_DIR_ENTRY_SIZE; - de = (struct qnx4_inode_entry *) (bh->b_data + offset); - if (!de->di_fname[0]) + de = (union qnx4_directory_entry *) (bh->b_data + offset); + + if (!de->de_name) continue; - if (!(de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK))) + if (!(de->de_status & (QNX4_FILE_USED|QNX4_FILE_LINK))) continue; - if (!(de->di_status & QNX4_FILE_LINK)) - size = QNX4_SHORT_NAME_MAX; - else - size = QNX4_NAME_MAX; - size = strnlen(de->di_fname, size); - QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, de->di_fname)); - if (!(de->di_status & QNX4_FILE_LINK)) + if (!(de->de_status & QNX4_FILE_LINK)) { + size = sizeof(de->inode.di_fname); + name = de->inode.di_fname; ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; - else { - le = (struct qnx4_link_info*)de; - ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) * + } else { + size = sizeof(de->link.dl_fname); + name = de->link.dl_fname; + ino = ( le32_to_cpu(de->link.dl_inode_blk) - 1 ) * QNX4_INODES_PER_BLOCK + - le->dl_inode_ndx; + de->link.dl_inode_ndx; } - if (!dir_emit(ctx, de->di_fname, size, ino, DT_UNKNOWN)) { + size = strnlen(name, size); + QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, name)); + if (!dir_emit(ctx, name, size, ino, DT_UNKNOWN)) { brelse(bh); return 0; } From 34331739e19fd6a293d488add28832ad49c9fc54 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 10 Aug 2021 09:40:36 -0700 Subject: [PATCH 246/543] fpga: machxo2-spi: Return an error on failure Earlier successes leave 'ret' in a non error state, so these errors are not reported. Set ret to -EINVAL before going to the error handler. This addresses two issues reported by smatch: drivers/fpga/machxo2-spi.c:229 machxo2_write_init() warn: missing error code 'ret' drivers/fpga/machxo2-spi.c:316 machxo2_write_complete() warn: missing error code 'ret' [mdf@kernel.org: Reworded commit message] Fixes: 88fb3a002330 ("fpga: lattice machxo2: Add Lattice MachXO2 support") Reported-by: Dan Carpenter Signed-off-by: Tom Rix Signed-off-by: Moritz Fischer --- drivers/fpga/machxo2-spi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/fpga/machxo2-spi.c b/drivers/fpga/machxo2-spi.c index 1afb41aa20d7..b4a530a31302 100644 --- a/drivers/fpga/machxo2-spi.c +++ b/drivers/fpga/machxo2-spi.c @@ -225,8 +225,10 @@ static int machxo2_write_init(struct fpga_manager *mgr, goto fail; get_status(spi, &status); - if (test_bit(FAIL, &status)) + if (test_bit(FAIL, &status)) { + ret = -EINVAL; goto fail; + } dump_status_reg(&status); spi_message_init(&msg); @@ -313,6 +315,7 @@ static int machxo2_write_complete(struct fpga_manager *mgr, dump_status_reg(&status); if (!test_bit(DONE, &status)) { machxo2_cleanup(mgr); + ret = -EINVAL; goto fail; } From a1e4470823d99e75b596748086e120dea169ed3c Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 13 Aug 2021 14:40:42 +0800 Subject: [PATCH 247/543] fpga: machxo2-spi: Fix missing error code in machxo2_write_complete() The error code is missing in this code scenario, add the error code '-EINVAL' to the return value 'ret'. Eliminate the follow smatch warning: drivers/fpga/machxo2-spi.c:341 machxo2_write_complete() warn: missing error code 'ret'. [mdf@kernel.org: Reworded commit message] Fixes: 88fb3a002330 ("fpga: lattice machxo2: Add Lattice MachXO2 support") Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Moritz Fischer --- drivers/fpga/machxo2-spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/fpga/machxo2-spi.c b/drivers/fpga/machxo2-spi.c index b4a530a31302..ea2ec3c6815c 100644 --- a/drivers/fpga/machxo2-spi.c +++ b/drivers/fpga/machxo2-spi.c @@ -338,6 +338,7 @@ static int machxo2_write_complete(struct fpga_manager *mgr, break; if (++refreshloop == MACHXO2_MAX_REFRESH_LOOP) { machxo2_cleanup(mgr); + ret = -EINVAL; goto fail; } } while (1); From 00e1a5d21b4ff514593554167b28a8caeda1497f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Sep 2021 16:13:26 -0500 Subject: [PATCH 248/543] PCI/VPD: Defer VPD sizing until first access 7bac54497c3e ("PCI/VPD: Determine VPD size in pci_vpd_init()") reads VPD at enumeration-time to find the size. But this is quite slow, and we don't need the size until we actually need data from VPD. Dave reported a boot slowdown of more than two minutes [1]. Defer the VPD sizing until a driver or the user (via sysfs) requests information from VPD. If devices are quirked because VPD is known not to work, don't bother even looking for the VPD capability. The VPD will not be accessible at all. [1] https://lore.kernel.org/r/20210913141818.GA27911@codemonkey.org.uk/ Link: https://lore.kernel.org/r/20210914215543.GA1437800@bjorn-Precision-5520 Fixes: 7bac54497c3e ("PCI/VPD: Determine VPD size in pci_vpd_init()") Signed-off-by: Bjorn Helgaas --- drivers/pci/vpd.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/pci/vpd.c b/drivers/pci/vpd.c index 25557b272a4f..4be24890132e 100644 --- a/drivers/pci/vpd.c +++ b/drivers/pci/vpd.c @@ -99,6 +99,24 @@ error: return off ?: PCI_VPD_SZ_INVALID; } +static bool pci_vpd_available(struct pci_dev *dev) +{ + struct pci_vpd *vpd = &dev->vpd; + + if (!vpd->cap) + return false; + + if (vpd->len == 0) { + vpd->len = pci_vpd_size(dev); + if (vpd->len == PCI_VPD_SZ_INVALID) { + vpd->cap = 0; + return false; + } + } + + return true; +} + /* * Wait for last operation to complete. * This code has to spin since there is no other notification from the PCI @@ -145,7 +163,7 @@ static ssize_t pci_vpd_read(struct pci_dev *dev, loff_t pos, size_t count, loff_t end = pos + count; u8 *buf = arg; - if (!vpd->cap) + if (!pci_vpd_available(dev)) return -ENODEV; if (pos < 0) @@ -206,7 +224,7 @@ static ssize_t pci_vpd_write(struct pci_dev *dev, loff_t pos, size_t count, loff_t end = pos + count; int ret = 0; - if (!vpd->cap) + if (!pci_vpd_available(dev)) return -ENODEV; if (pos < 0 || (pos & 3) || (count & 3)) @@ -242,14 +260,11 @@ static ssize_t pci_vpd_write(struct pci_dev *dev, loff_t pos, size_t count, void pci_vpd_init(struct pci_dev *dev) { + if (dev->vpd.len == PCI_VPD_SZ_INVALID) + return; + dev->vpd.cap = pci_find_capability(dev, PCI_CAP_ID_VPD); mutex_init(&dev->vpd.lock); - - if (!dev->vpd.len) - dev->vpd.len = pci_vpd_size(dev); - - if (dev->vpd.len == PCI_VPD_SZ_INVALID) - dev->vpd.cap = 0; } static ssize_t vpd_read(struct file *filp, struct kobject *kobj, @@ -294,13 +309,14 @@ const struct attribute_group pci_dev_vpd_attr_group = { void *pci_vpd_alloc(struct pci_dev *dev, unsigned int *size) { - unsigned int len = dev->vpd.len; + unsigned int len; void *buf; int cnt; - if (!dev->vpd.cap) + if (!pci_vpd_available(dev)) return ERR_PTR(-ENODEV); + len = dev->vpd.len; buf = kmalloc(len, GFP_KERNEL); if (!buf) return ERR_PTR(-ENOMEM); From 6bd65974dedd1e8638e6665e92ccbf26e8a67cbf Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 13 Sep 2021 18:23:59 +0100 Subject: [PATCH 249/543] PCI/ACPI: Don't reset a fwnode set by OF Commit 375553a93201 ("PCI: Setup ACPI fwnode early and at the same time with OF") added a call to pci_set_acpi_fwnode() in pci_setup_device(), which unconditionally clears any fwnode previously set by pci_set_of_node(). pci_set_acpi_fwnode() looks for ACPI_COMPANION(), which only returns the existing fwnode if it was set by ACPI_COMPANION_SET(). If it was set by OF instead, ACPI_COMPANION() returns NULL and pci_set_acpi_fwnode() accidentally clears the fwnode. To fix this, look for any fwnode instead of just ACPI companions. Fixes a virtio-iommu boot regression in v5.15-rc1. Fixes: 375553a93201 ("PCI: Setup ACPI fwnode early and at the same time with OF") Link: https://lore.kernel.org/r/20210913172358.1775381-1-jean-philippe@linaro.org Signed-off-by: Jean-Philippe Brucker Signed-off-by: Bjorn Helgaas Acked-by: Rob Herring Acked-by: Rafael J. Wysocki --- drivers/pci/pci-acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index a1b1e2a01632..0f40943a9a18 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -937,7 +937,7 @@ static struct acpi_device *acpi_pci_find_companion(struct device *dev); void pci_set_acpi_fwnode(struct pci_dev *dev) { - if (!ACPI_COMPANION(&dev->dev) && !pci_dev_is_added(dev)) + if (!dev_fwnode(&dev->dev) && !pci_dev_is_added(dev)) ACPI_COMPANION_SET(&dev->dev, acpi_pci_find_companion(&dev->dev)); } From 60b78ed088ebe1a872ee1320b6c5ad6ee2c4bd9a Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 3 Sep 2021 14:33:11 +0800 Subject: [PATCH 250/543] PCI: Add AMD GPU multi-function power dependencies Some AMD GPUs have built-in USB xHCI and USB Type-C UCSI controllers with power dependencies between the GPU and the other functions as in 6d2e369f0d4c ("PCI: Add NVIDIA GPU multi-function power dependencies"). Add device link support for the AMD integrated USB xHCI and USB Type-C UCSI controllers. Without this, runtime power management, including GPU resume and temp and fan sensors don't work correctly. Reported-at: https://gitlab.freedesktop.org/drm/amd/-/issues/1704 Link: https://lore.kernel.org/r/20210903063311.3606226-1-evan.quan@amd.com Signed-off-by: Evan Quan Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org --- drivers/pci/quirks.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e5089af8ad90..4537d1ea14fd 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5435,7 +5435,7 @@ DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8, quirk_gpu_hda); /* - * Create device link for NVIDIA GPU with integrated USB xHCI Host + * Create device link for GPUs with integrated USB xHCI Host * controller to VGA. */ static void quirk_gpu_usb(struct pci_dev *usb) @@ -5444,9 +5444,11 @@ static void quirk_gpu_usb(struct pci_dev *usb) } DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_CLASS_SERIAL_USB, 8, quirk_gpu_usb); +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID, + PCI_CLASS_SERIAL_USB, 8, quirk_gpu_usb); /* - * Create device link for NVIDIA GPU with integrated Type-C UCSI controller + * Create device link for GPUs with integrated Type-C UCSI controller * to VGA. Currently there is no class code defined for UCSI device over PCI * so using UNKNOWN class for now and it will be updated when UCSI * over PCI gets a class code. @@ -5459,6 +5461,9 @@ static void quirk_gpu_usb_typec_ucsi(struct pci_dev *ucsi) DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_CLASS_SERIAL_UNKNOWN, 8, quirk_gpu_usb_typec_ucsi); +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID, + PCI_CLASS_SERIAL_UNKNOWN, 8, + quirk_gpu_usb_typec_ucsi); /* * Enable the NVIDIA GPU integrated HDA controller if the BIOS left it From e042a4533fc346a655de7f1b8ac1fa01a2ed96e5 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Thu, 9 Sep 2021 15:02:21 -0500 Subject: [PATCH 251/543] MAINTAINERS: Add Nirmal Patel as VMD maintainer Change my email to my unaffiliated address and move me to reviewer status. Link: https://lore.kernel.org/r/20210909200221.29981-1-jonathan.derrick@intel.com Signed-off-by: Jon Derrick Signed-off-by: Jon Derrick Signed-off-by: Bjorn Helgaas Cc: Nirmal Patel --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index eeb4c70b3d5b..ca6d6fde85cf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14342,7 +14342,8 @@ F: Documentation/devicetree/bindings/pci/intel,ixp4xx-pci.yaml F: drivers/pci/controller/pci-ixp4xx.c PCI DRIVER FOR INTEL VOLUME MANAGEMENT DEVICE (VMD) -M: Jonathan Derrick +M: Nirmal Patel +R: Jonathan Derrick L: linux-pci@vger.kernel.org S: Supported F: drivers/pci/controller/vmd.c From 6a52e73368038f47f6618623d75061dc263b26ae Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 14 Sep 2021 16:43:31 +0300 Subject: [PATCH 252/543] net: dsa: destroy the phylink instance on any error in dsa_slave_phy_setup DSA supports connecting to a phy-handle, and has a fallback to a non-OF based method of connecting to an internal PHY on the switch's own MDIO bus, if no phy-handle and no fixed-link nodes were present. The -ENODEV error code from the first attempt (phylink_of_phy_connect) is what triggers the second attempt (phylink_connect_phy). However, when the first attempt returns a different error code than -ENODEV, this results in an unbalance of calls to phylink_create and phylink_destroy by the time we exit the function. The phylink instance has leaked. There are many other error codes that can be returned by phylink_of_phy_connect. For example, phylink_validate returns -EINVAL. So this is a practical issue too. Fixes: aab9c4067d23 ("net: dsa: Plug in PHYLINK support") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20210914134331.2303380-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/slave.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 662ff531d4e2..a2bf2d8ac65b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1854,13 +1854,11 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) * use the switch internal MDIO bus instead */ ret = dsa_slave_phy_connect(slave_dev, dp->index, phy_flags); - if (ret) { - netdev_err(slave_dev, - "failed to connect to port %d: %d\n", - dp->index, ret); - phylink_destroy(dp->pl); - return ret; - } + } + if (ret) { + netdev_err(slave_dev, "failed to connect to PHY: %pe\n", + ERR_PTR(ret)); + phylink_destroy(dp->pl); } return ret; From 301de697d869be6564aebeb5ab811c84c0a7abed Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 14 Sep 2021 17:05:15 +0300 Subject: [PATCH 253/543] Revert "net: phy: Uniform PHY driver access" This reverts commit 3ac8eed62596387214869319379c1fcba264d8c6, which did more than it said on the box, and not only it replaced to_phy_driver with phydev->drv, but it also removed the "!drv" check, without actually explaining why that is fine. That patch in fact breaks suspend/resume on any system which has PHY devices with no drivers bound. The stack trace is: Unable to handle kernel NULL pointer dereference at virtual address 00000000000000e8 pc : mdio_bus_phy_suspend+0xd8/0xec lr : dpm_run_callback+0x38/0x90 Call trace: mdio_bus_phy_suspend+0xd8/0xec dpm_run_callback+0x38/0x90 __device_suspend+0x108/0x3cc dpm_suspend+0x140/0x210 dpm_suspend_start+0x7c/0xa0 suspend_devices_and_enter+0x13c/0x540 pm_suspend+0x2a4/0x330 Examples why that assumption is not fine: - There is an MDIO bus with a PHY device that doesn't have a specific PHY driver loaded, because mdiobus_register() automatically creates a PHY device for it but there is no specific PHY driver in the system. Normally under those circumstances, the generic PHY driver will be bound lazily to it (at phy_attach_direct time). But some Ethernet drivers attach to their PHY at .ndo_open time. Until then it, the to-be-driven-by-genphy PHY device will not have a driver. The blamed patch amounts to saying "you need to open all net devices before the system can suspend, to avoid the NULL pointer dereference". - There is any raw MDIO device which has 'plausible' values in the PHY ID registers 2 and 3, which is located on an MDIO bus whose driver does not set bus->phy_mask = ~0 (which prevents auto-scanning of PHY devices). An example could be a MAC's internal MDIO bus with PCS devices on it, for serial links such as SGMII. PHY devices will get created for those PCSes too, due to that MDIO bus auto-scanning, and although those PHY devices are not used, they do not bother anybody either. PCS devices are usually managed in Linux as raw MDIO devices. Nonetheless, they do not have a PHY driver, nor does anybody attempt to connect to them (because they are not a PHY), and therefore this patch breaks that. The goal itself of the patch is questionable, so I am going for a straight revert. to_phy_driver does not seem to have a need to be replaced by phydev->drv, in fact that might even trigger code paths which were not given too deep of a thought. For instance: phy_probe populates phydev->drv at the beginning, but does not clean it up on any error (including EPROBE_DEFER). So if the phydev driver requests probe deferral, phydev->drv will remain populated despite there being no driver bound. If a system suspend starts in between the initial probe deferral request and the subsequent probe retry, we will be calling the phydev->drv->suspend method, but _before_ any phydev->drv->probe call has succeeded. That is to say, if the phydev->drv is allocating any driver-private data structure in ->probe, it pretty much expects that data structure to be available in ->suspend. But it may not. That is a pretty insane environment to present to PHY drivers. In the code structure before the blamed patch, mdio_bus_phy_may_suspend would just say "no, don't suspend" to any PHY device which does not have a driver pointer _in_the_device_structure_ (not the phydev->drv). That would essentially ensure that ->suspend will never get called for a device that has not yet successfully completed probe. This is the code structure the patch is returning to, via the revert. Fixes: 3ac8eed62596 ("net: phy: Uniform PHY driver access") Signed-off-by: Vladimir Oltean Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20210914140515.2311548-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 9e2891d8e8dd..ba5ad86ec826 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -233,9 +233,11 @@ static DEFINE_MUTEX(phy_fixup_lock); static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { + struct device_driver *drv = phydev->mdio.dev.driver; + struct phy_driver *phydrv = to_phy_driver(drv); struct net_device *netdev = phydev->attached_dev; - if (!phydev->drv->suspend) + if (!drv || !phydrv->suspend) return false; /* PHY not attached? May suspend if the PHY has not already been From a57d8c217aadac75530b8e7ffb3a3e1b7bfd0330 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 14 Sep 2021 16:47:26 +0300 Subject: [PATCH 254/543] net: dsa: flush switchdev workqueue before tearing down CPU/DSA ports Sometimes when unbinding the mv88e6xxx driver on Turris MOX, these error messages appear: mv88e6085 d0032004.mdio-mii:12: port 1 failed to delete be:79:b4:9e:9e:96 vid 1 from fdb: -2 mv88e6085 d0032004.mdio-mii:12: port 1 failed to delete be:79:b4:9e:9e:96 vid 0 from fdb: -2 mv88e6085 d0032004.mdio-mii:12: port 1 failed to delete d8:58:d7:00:ca:6d vid 100 from fdb: -2 mv88e6085 d0032004.mdio-mii:12: port 1 failed to delete d8:58:d7:00:ca:6d vid 1 from fdb: -2 mv88e6085 d0032004.mdio-mii:12: port 1 failed to delete d8:58:d7:00:ca:6d vid 0 from fdb: -2 (and similarly for other ports) What happens is that DSA has a policy "even if there are bugs, let's at least not leak memory" and dsa_port_teardown() clears the dp->fdbs and dp->mdbs lists, which are supposed to be empty. But deleting that cleanup code, the warnings go away. => the FDB and MDB lists (used for refcounting on shared ports, aka CPU and DSA ports) will eventually be empty, but are not empty by the time we tear down those ports. Aka we are deleting them too soon. The addresses that DSA complains about are host-trapped addresses: the local addresses of the ports, and the MAC address of the bridge device. The problem is that offloading those entries happens from a deferred work item scheduled by the SWITCHDEV_FDB_DEL_TO_DEVICE handler, and this races with the teardown of the CPU and DSA ports where the refcounting is kept. In fact, not only it races, but fundamentally speaking, if we iterate through the port list linearly, we might end up tearing down the shared ports even before we delete a DSA user port which has a bridge upper. So as it turns out, we need to first tear down the user ports (and the unused ones, for no better place of doing that), then the shared ports (the CPU and DSA ports). In between, we need to ensure that all work items scheduled by our switchdev handlers (which only run for user ports, hence the reason why we tear them down first) have finished. Fixes: 161ca59d39e9 ("net: dsa: reference count the MDB entries at the cross-chip notifier level") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20210914134726.2305133-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 5 +++++ net/dsa/dsa.c | 5 +++++ net/dsa/dsa2.c | 46 +++++++++++++++++++++++++++++++--------------- net/dsa/dsa_priv.h | 1 + 4 files changed, 42 insertions(+), 15 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index f9a17145255a..258867eff230 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -447,6 +447,11 @@ static inline bool dsa_port_is_user(struct dsa_port *dp) return dp->type == DSA_PORT_TYPE_USER; } +static inline bool dsa_port_is_unused(struct dsa_port *dp) +{ + return dp->type == DSA_PORT_TYPE_UNUSED; +} + static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 1dc45e40f961..41f36ad8b0ec 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -345,6 +345,11 @@ bool dsa_schedule_work(struct work_struct *work) return queue_work(dsa_owq, work); } +void dsa_flush_workqueue(void) +{ + flush_workqueue(dsa_owq); +} + int dsa_devlink_param_get(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx) { diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 1b2b25d7bd02..eef13cd20f19 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -897,6 +897,33 @@ static void dsa_switch_teardown(struct dsa_switch *ds) ds->setup = false; } +/* First tear down the non-shared, then the shared ports. This ensures that + * all work items scheduled by our switchdev handlers for user ports have + * completed before we destroy the refcounting kept on the shared ports. + */ +static void dsa_tree_teardown_ports(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) + dsa_port_teardown(dp); + + dsa_flush_workqueue(); + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) + dsa_port_teardown(dp); +} + +static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + dsa_switch_teardown(dp->ds); +} + static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) { struct dsa_port *dp; @@ -923,26 +950,13 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) return 0; teardown: - list_for_each_entry(dp, &dst->ports, list) - dsa_port_teardown(dp); + dsa_tree_teardown_ports(dst); - list_for_each_entry(dp, &dst->ports, list) - dsa_switch_teardown(dp->ds); + dsa_tree_teardown_switches(dst); return err; } -static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) -{ - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) - dsa_port_teardown(dp); - - list_for_each_entry(dp, &dst->ports, list) - dsa_switch_teardown(dp->ds); -} - static int dsa_tree_setup_master(struct dsa_switch_tree *dst) { struct dsa_port *dp; @@ -1052,6 +1066,8 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_master(dst); + dsa_tree_teardown_ports(dst); + dsa_tree_teardown_switches(dst); dsa_tree_teardown_cpu_ports(dst); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 33ab7d7af9eb..a5c9bc7b66c6 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -170,6 +170,7 @@ void dsa_tag_driver_put(const struct dsa_device_ops *ops); const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf); bool dsa_schedule_work(struct work_struct *work); +void dsa_flush_workqueue(void); const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops); static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops) From 90cc7bed1ed19f869ae7221a6b41887fe762a6a3 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 16 Sep 2021 08:35:42 +0200 Subject: [PATCH 255/543] parisc: Use absolute_pointer() to define PAGE0 Use absolute_pointer() wrapper for PAGE0 to avoid this compiler warning: arch/parisc/kernel/setup.c: In function 'start_parisc': error: '__builtin_memcmp_eq' specified bound 8 exceeds source size 0 Signed-off-by: Helge Deller Co-Developed-by: Guenter Roeck Suggested-by: Linus Torvalds --- arch/parisc/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index d00313d1274e..0561568f7b48 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -184,7 +184,7 @@ extern int npmem_ranges; #include #include -#define PAGE0 ((struct zeropage *)__PAGE_OFFSET) +#define PAGE0 ((struct zeropage *)absolute_pointer(__PAGE_OFFSET)) /* DEFINITION OF THE ZERO-PAGE (PAG0) */ /* based on work by Jason Eckhardt (jason@equator.com) */ From 5297cfa6bdf93e3889f78f9b482e2a595a376083 Mon Sep 17 00:00:00 2001 From: Sai Krishna Potthuri Date: Wed, 18 Aug 2021 12:53:14 +0530 Subject: [PATCH 256/543] EDAC/synopsys: Fix wrong value type assignment for edac_mode dimm->edac_mode contains values of type enum edac_type - not the corresponding capability flags. Fix that. Issue caught by Coverity check "enumerated type mixed with another type." [ bp: Rewrite commit message, add tags. ] Fixes: ae9b56e3996d ("EDAC, synps: Add EDAC support for zynq ddr ecc controller") Signed-off-by: Sai Krishna Potthuri Signed-off-by: Shubhrajyoti Datta Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20210818072315.15149-1-shubhrajyoti.datta@xilinx.com --- drivers/edac/synopsys_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index 7e7146b22c16..7d08627e738b 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -782,7 +782,7 @@ static void init_csrows(struct mem_ctl_info *mci) for (j = 0; j < csi->nr_channels; j++) { dimm = csi->channels[j]->dimm; - dimm->edac_mode = EDAC_FLAG_SECDED; + dimm->edac_mode = EDAC_SECDED; dimm->mtype = p_data->get_mtype(priv->baseaddr); dimm->nr_pages = (size >> PAGE_SHIFT) / csi->nr_channels; dimm->grain = SYNPS_EDAC_ERR_GRAIN; From 78edefc05e41352099ffb8f06f8d9b2d091e29cd Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 20 Aug 2021 22:18:23 +0200 Subject: [PATCH 257/543] drm/etnaviv: return context from etnaviv_iommu_context_get Being able to have the refcount manipulation in an assignment makes it much easier to parse the code. Cc: stable@vger.kernel.org # 5.4 Signed-off-by: Lucas Stach Tested-by: Michael Walle Tested-by: Marek Vasut Reviewed-by: Christian Gmeiner --- drivers/gpu/drm/etnaviv/etnaviv_buffer.c | 3 +-- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 3 +-- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 3 +-- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 6 ++---- drivers/gpu/drm/etnaviv/etnaviv_mmu.h | 4 +++- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c index 76d38561c910..cf741c5c82d2 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c @@ -397,8 +397,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state, if (switch_mmu_context) { struct etnaviv_iommu_context *old_context = gpu->mmu_context; - etnaviv_iommu_context_get(mmu_context); - gpu->mmu_context = mmu_context; + gpu->mmu_context = etnaviv_iommu_context_get(mmu_context); etnaviv_iommu_context_put(old_context); } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 8f1b5af47dd6..f0b2540e60e4 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -294,8 +294,7 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get( list_del(&mapping->obj_node); } - etnaviv_iommu_context_get(mmu_context); - mapping->context = mmu_context; + mapping->context = etnaviv_iommu_context_get(mmu_context); mapping->use = 1; ret = etnaviv_iommu_map_gem(mmu_context, etnaviv_obj, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 4dd7d9d541c0..486259e154af 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -532,8 +532,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, goto err_submit_objects; submit->ctx = file->driver_priv; - etnaviv_iommu_context_get(submit->ctx->mmu); - submit->mmu_context = submit->ctx->mmu; + submit->mmu_context = etnaviv_iommu_context_get(submit->ctx->mmu); submit->exec_state = args->exec_state; submit->flags = args->flags; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index c297fffe06eb..6722efcf858a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1371,12 +1371,10 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit) } if (!gpu->mmu_context) { - etnaviv_iommu_context_get(submit->mmu_context); - gpu->mmu_context = submit->mmu_context; + gpu->mmu_context = etnaviv_iommu_context_get(submit->mmu_context); etnaviv_gpu_start_fe_idleloop(gpu); } else { - etnaviv_iommu_context_get(gpu->mmu_context); - submit->prev_mmu_context = gpu->mmu_context; + submit->prev_mmu_context = etnaviv_iommu_context_get(gpu->mmu_context); } if (submit->nr_pmrs) { diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h index d1d6902fd13b..e4a0b7d09c2e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h @@ -105,9 +105,11 @@ void etnaviv_iommu_dump(struct etnaviv_iommu_context *ctx, void *buf); struct etnaviv_iommu_context * etnaviv_iommu_context_init(struct etnaviv_iommu_global *global, struct etnaviv_cmdbuf_suballoc *suballoc); -static inline void etnaviv_iommu_context_get(struct etnaviv_iommu_context *ctx) +static inline struct etnaviv_iommu_context * +etnaviv_iommu_context_get(struct etnaviv_iommu_context *ctx) { kref_get(&ctx->refcount); + return ctx; } void etnaviv_iommu_context_put(struct etnaviv_iommu_context *ctx); void etnaviv_iommu_restore(struct etnaviv_gpu *gpu, From cda7532916f7bc860b36a1806cb8352e6f63dacb Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 20 Aug 2021 22:18:24 +0200 Subject: [PATCH 258/543] drm/etnaviv: put submit prev MMU context when it exists The prev context is the MMU context at the time of the job queueing in hardware. As a job might be queued multiple times due to recovery after a GPU hang, we need to make sure to put the stale prev MMU context from a prior queuing, to avoid the reference and thus the MMU context leaking. Cc: stable@vger.kernel.org # 5.4 Signed-off-by: Lucas Stach Tested-by: Michael Walle Tested-by: Marek Vasut Reviewed-by: Christian Gmeiner --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 6722efcf858a..0f700400937e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1374,6 +1374,8 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit) gpu->mmu_context = etnaviv_iommu_context_get(submit->mmu_context); etnaviv_gpu_start_fe_idleloop(gpu); } else { + if (submit->prev_mmu_context) + etnaviv_iommu_context_put(submit->prev_mmu_context); submit->prev_mmu_context = etnaviv_iommu_context_get(gpu->mmu_context); } From 23e0f5a57d0ecec86e1fc82194acd94aede21a46 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 20 Aug 2021 22:18:25 +0200 Subject: [PATCH 259/543] drm/etnaviv: stop abusing mmu_context as FE running marker While the DMA frontend can only be active when the MMU context is set, the reverse isn't necessarily true, as the frontend can be stopped while the MMU state is kept. Stop treating mmu_context being set as a indication that the frontend is running and instead add a explicit property. Cc: stable@vger.kernel.org # 5.4 Signed-off-by: Lucas Stach Tested-by: Michael Walle Tested-by: Marek Vasut Reviewed-by: Christian Gmeiner --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 10 ++++++++-- drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 0f700400937e..24d514e70f98 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -569,6 +569,8 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu) /* We rely on the GPU running, so program the clock */ etnaviv_gpu_update_clock(gpu); + gpu->fe_running = false; + return 0; } @@ -637,6 +639,8 @@ void etnaviv_gpu_start_fe(struct etnaviv_gpu *gpu, u32 address, u16 prefetch) VIVS_MMUv2_SEC_COMMAND_CONTROL_ENABLE | VIVS_MMUv2_SEC_COMMAND_CONTROL_PREFETCH(prefetch)); } + + gpu->fe_running = true; } static void etnaviv_gpu_start_fe_idleloop(struct etnaviv_gpu *gpu) @@ -1370,7 +1374,7 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit) goto out_unlock; } - if (!gpu->mmu_context) { + if (!gpu->fe_running) { gpu->mmu_context = etnaviv_iommu_context_get(submit->mmu_context); etnaviv_gpu_start_fe_idleloop(gpu); } else { @@ -1579,7 +1583,7 @@ int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms) static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu) { - if (gpu->initialized && gpu->mmu_context) { + if (gpu->initialized && gpu->fe_running) { /* Replace the last WAIT with END */ mutex_lock(&gpu->lock); etnaviv_buffer_end(gpu); @@ -1594,6 +1598,8 @@ static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu) etnaviv_iommu_context_put(gpu->mmu_context); gpu->mmu_context = NULL; + + gpu->fe_running = false; } gpu->exec_state = -1; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 8ea48697d132..1c75c8ed5bce 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -101,6 +101,7 @@ struct etnaviv_gpu { struct workqueue_struct *wq; struct drm_gpu_scheduler sched; bool initialized; + bool fe_running; /* 'ring'-buffer: */ struct etnaviv_cmdbuf buffer; From 8f3eea9d01d7b0f95b0fe04187c0059019ada85b Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 20 Aug 2021 22:18:26 +0200 Subject: [PATCH 260/543] drm/etnaviv: keep MMU context across runtime suspend/resume The MMU state may be kept across a runtime suspend/resume cycle, as we avoid a full hardware reset to keep the latency of the runtime PM small. Don't pretend that the MMU state is lost in driver state. The MMU context is pushed out when new HW jobs with a different context are coming in. The only exception to this is when the GPU is unbound, in which case we need to make sure to also free the last active context. Cc: stable@vger.kernel.org # 5.4 Reported-by: Michael Walle Signed-off-by: Lucas Stach Tested-by: Michael Walle Tested-by: Marek Vasut Reviewed-by: Christian Gmeiner --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 24d514e70f98..0757dcbe6913 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1596,9 +1596,6 @@ static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu) */ etnaviv_gpu_wait_idle(gpu, 100); - etnaviv_iommu_context_put(gpu->mmu_context); - gpu->mmu_context = NULL; - gpu->fe_running = false; } @@ -1747,6 +1744,9 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master, etnaviv_gpu_hw_suspend(gpu); #endif + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + if (gpu->initialized) { etnaviv_cmdbuf_free(&gpu->buffer); etnaviv_iommu_global_fini(gpu); From 725cbc7884c37f3b4f1777bc1aea6432cded8ca5 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 20 Aug 2021 22:18:27 +0200 Subject: [PATCH 261/543] drm/etnaviv: exec and MMU state is lost when resetting the GPU When the GPU is reset both the current exec state, as well as all MMU state is lost. Move the driver side state tracking into the reset function to keep hardware and software state from diverging. Cc: stable@vger.kernel.org # 5.4 Signed-off-by: Lucas Stach Tested-by: Michael Walle Tested-by: Marek Vasut Reviewed-by: Christian Gmeiner --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 0757dcbe6913..3d64bc3cc604 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -570,6 +570,8 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu) etnaviv_gpu_update_clock(gpu); gpu->fe_running = false; + gpu->exec_state = -1; + gpu->mmu_context = NULL; return 0; } @@ -836,7 +838,6 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) /* Now program the hardware */ mutex_lock(&gpu->lock); etnaviv_gpu_hw_init(gpu); - gpu->exec_state = -1; mutex_unlock(&gpu->lock); pm_runtime_mark_last_busy(gpu->dev); @@ -1061,8 +1062,6 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu) spin_unlock(&gpu->event_spinlock); etnaviv_gpu_hw_init(gpu); - gpu->exec_state = -1; - gpu->mmu_context = NULL; mutex_unlock(&gpu->lock); pm_runtime_mark_last_busy(gpu->dev); From f978a5302f5566480c58ffae64a16d34456801bd Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 20 Aug 2021 22:18:28 +0200 Subject: [PATCH 262/543] drm/etnaviv: fix MMU context leak on GPU reset After a reset the GPU is no longer using the MMU context and may be restarted with a different context. While the mmu_state proeprly was cleared, the context wasn't unreferenced, leading to a memory leak. Cc: stable@vger.kernel.org # 5.4 Reported-by: Michael Walle Signed-off-by: Lucas Stach Tested-by: Michael Walle Tested-by: Marek Vasut Reviewed-by: Christian Gmeiner --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 3d64bc3cc604..bc2bdfcbc082 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -571,6 +571,8 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu) gpu->fe_running = false; gpu->exec_state = -1; + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); gpu->mmu_context = NULL; return 0; From d6408538f091fb22d47f792d4efa58143d56c3fb Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 20 Aug 2021 22:18:29 +0200 Subject: [PATCH 263/543] drm/etnaviv: reference MMU context when setting up hardware state Move the refcount manipulation of the MMU context to the point where the hardware state is programmed. At that point it is also known if a previous MMU state is still there, or the state needs to be reprogrammed with a potentially different context. Cc: stable@vger.kernel.org # 5.4 Signed-off-by: Lucas Stach Tested-by: Michael Walle Tested-by: Marek Vasut Reviewed-by: Christian Gmeiner --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 24 +++++++++++----------- drivers/gpu/drm/etnaviv/etnaviv_iommu.c | 4 ++++ drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c | 8 ++++++++ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index bc2bdfcbc082..cc5b07f86346 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -647,17 +647,19 @@ void etnaviv_gpu_start_fe(struct etnaviv_gpu *gpu, u32 address, u16 prefetch) gpu->fe_running = true; } -static void etnaviv_gpu_start_fe_idleloop(struct etnaviv_gpu *gpu) +static void etnaviv_gpu_start_fe_idleloop(struct etnaviv_gpu *gpu, + struct etnaviv_iommu_context *context) { - u32 address = etnaviv_cmdbuf_get_va(&gpu->buffer, - &gpu->mmu_context->cmdbuf_mapping); u16 prefetch; + u32 address; /* setup the MMU */ - etnaviv_iommu_restore(gpu, gpu->mmu_context); + etnaviv_iommu_restore(gpu, context); /* Start command processor */ prefetch = etnaviv_buffer_init(gpu); + address = etnaviv_cmdbuf_get_va(&gpu->buffer, + &gpu->mmu_context->cmdbuf_mapping); etnaviv_gpu_start_fe(gpu, address, prefetch); } @@ -1375,14 +1377,12 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit) goto out_unlock; } - if (!gpu->fe_running) { - gpu->mmu_context = etnaviv_iommu_context_get(submit->mmu_context); - etnaviv_gpu_start_fe_idleloop(gpu); - } else { - if (submit->prev_mmu_context) - etnaviv_iommu_context_put(submit->prev_mmu_context); - submit->prev_mmu_context = etnaviv_iommu_context_get(gpu->mmu_context); - } + if (!gpu->fe_running) + etnaviv_gpu_start_fe_idleloop(gpu, submit->mmu_context); + + if (submit->prev_mmu_context) + etnaviv_iommu_context_put(submit->prev_mmu_context); + submit->prev_mmu_context = etnaviv_iommu_context_get(gpu->mmu_context); if (submit->nr_pmrs) { gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c index 1a7c89a67bea..afe5dd6a9925 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c @@ -92,6 +92,10 @@ static void etnaviv_iommuv1_restore(struct etnaviv_gpu *gpu, struct etnaviv_iommuv1_context *v1_context = to_v1_context(context); u32 pgtable; + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + gpu->mmu_context = etnaviv_iommu_context_get(context); + /* set base addresses */ gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, context->global->memory_base); gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, context->global->memory_base); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c index f8bf488e9d71..d664ae29ae20 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c @@ -172,6 +172,10 @@ static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu, if (gpu_read(gpu, VIVS_MMUv2_CONTROL) & VIVS_MMUv2_CONTROL_ENABLE) return; + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + gpu->mmu_context = etnaviv_iommu_context_get(context); + prefetch = etnaviv_buffer_config_mmuv2(gpu, (u32)v2_context->mtlb_dma, (u32)context->global->bad_page_dma); @@ -192,6 +196,10 @@ static void etnaviv_iommuv2_restore_sec(struct etnaviv_gpu *gpu, if (gpu_read(gpu, VIVS_MMUv2_SEC_CONTROL) & VIVS_MMUv2_SEC_CONTROL_ENABLE) return; + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + gpu->mmu_context = etnaviv_iommu_context_get(context); + gpu_write(gpu, VIVS_MMUv2_PTA_ADDRESS_LOW, lower_32_bits(context->global->v2.pta_dma)); gpu_write(gpu, VIVS_MMUv2_PTA_ADDRESS_HIGH, From f2faea8b64125852fa9acc6771c07fc0311a039b Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 20 Aug 2021 22:18:30 +0200 Subject: [PATCH 264/543] drm/etnaviv: add missing MMU context put when reaping MMU mapping When we forcefully evict a mapping from the the address space and thus the MMU context, the MMU context is leaked, as the mapping no longer points to it, so it doesn't get freed when the GEM object is destroyed. Add the mssing context put to fix the leak. Cc: stable@vger.kernel.org # 5.4 Signed-off-by: Lucas Stach Tested-by: Michael Walle Tested-by: Marek Vasut Reviewed-by: Christian Gmeiner --- drivers/gpu/drm/etnaviv/etnaviv_mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index dab1b58006d8..9fb1a2aadbcb 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -199,6 +199,7 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu_context *context, */ list_for_each_entry_safe(m, n, &list, scan_node) { etnaviv_iommu_remove_mapping(context, m); + etnaviv_iommu_context_put(m->context); m->context = NULL; list_del_init(&m->mmu_node); list_del_init(&m->scan_node); From 1511df6f5e9ef32826f20db2ee81f8527154dc14 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 7 Sep 2021 11:58:59 +0200 Subject: [PATCH 265/543] s390/bpf: Fix branch shortening during codegen pass EMIT6_PCREL() macro assumes that the previous pass generated 6 bytes of code, which is not the case if branch shortening took place. Fix by using jit->prg, like all the other EMIT6_PCREL_*() macros. Reported-by: Johan Almbladh Fixes: 4e9b4a6883dd ("s390/bpf: Use relative long branches") Signed-off-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/net/bpf_jit_comp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 88419263a89a..c3bd630e9b43 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -248,8 +248,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \ ({ \ - /* Branch instruction needs 6 bytes */ \ - int rel = (addrs[(i) + (off) + 1] - (addrs[(i) + 1] - 6)) / 2;\ + int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2; \ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ From 6e61dc9da0b7a0d91d57c2e20b5ea4fd2d4e7e53 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 7 Sep 2021 13:41:16 +0200 Subject: [PATCH 266/543] s390/bpf: Fix 64-bit subtraction of the -0x80000000 constant The JIT uses agfi for subtracting constants, but -(-0x80000000) cannot be represented as a 32-bit signed binary integer. Fix by using algfi in this particular case. Reported-by: Johan Almbladh Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Reviewed-by: Heiko Carstens Signed-off-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/net/bpf_jit_comp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index c3bd630e9b43..245f98d5f690 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -794,8 +794,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */ if (!imm) break; - /* agfi %dst,-imm */ - EMIT6_IMM(0xc2080000, dst_reg, -imm); + if (imm == -0x80000000) { + /* algfi %dst,0x80000000 */ + EMIT6_IMM(0xc20a0000, dst_reg, 0x80000000); + } else { + /* agfi %dst,-imm */ + EMIT6_IMM(0xc2080000, dst_reg, -imm); + } break; /* * BPF_MUL From db7bee653859ef7179be933e7d1384644f795f26 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 6 Sep 2021 15:04:14 +0200 Subject: [PATCH 267/543] s390/bpf: Fix optimizing out zero-extensions Currently the JIT completely removes things like `reg32 += 0`, however, the BPF_ALU semantics requires the target register to be zero-extended in such cases. Fix by optimizing out only the arithmetic operation, but not the subsequent zero-extension. Reported-by: Johan Almbladh Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Reviewed-by: Heiko Carstens Signed-off-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/net/bpf_jit_comp.c | 58 +++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 245f98d5f690..840d8594437d 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -760,10 +760,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb9080000, dst_reg, src_reg); break; case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */ - if (!imm) - break; - /* alfi %dst,imm */ - EMIT6_IMM(0xc20b0000, dst_reg, imm); + if (imm != 0) { + /* alfi %dst,imm */ + EMIT6_IMM(0xc20b0000, dst_reg, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */ @@ -785,10 +785,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb9090000, dst_reg, src_reg); break; case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */ - if (!imm) - break; - /* alfi %dst,-imm */ - EMIT6_IMM(0xc20b0000, dst_reg, -imm); + if (imm != 0) { + /* alfi %dst,-imm */ + EMIT6_IMM(0xc20b0000, dst_reg, -imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */ @@ -815,10 +815,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb90c0000, dst_reg, src_reg); break; case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */ - if (imm == 1) - break; - /* msfi %r5,imm */ - EMIT6_IMM(0xc2010000, dst_reg, imm); + if (imm != 1) { + /* msfi %r5,imm */ + EMIT6_IMM(0xc2010000, dst_reg, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */ @@ -871,6 +871,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, if (BPF_OP(insn->code) == BPF_MOD) /* lhgi %dst,0 */ EMIT4_IMM(0xa7090000, dst_reg, 0); + else + EMIT_ZERO(dst_reg); break; } /* lhi %w0,0 */ @@ -1003,10 +1005,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb9820000, dst_reg, src_reg); break; case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */ - if (!imm) - break; - /* xilf %dst,imm */ - EMIT6_IMM(0xc0070000, dst_reg, imm); + if (imm != 0) { + /* xilf %dst,imm */ + EMIT6_IMM(0xc0070000, dst_reg, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */ @@ -1037,10 +1039,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0); break; case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */ - if (imm == 0) - break; - /* sll %dst,imm(%r0) */ - EMIT4_DISP(0x89000000, dst_reg, REG_0, imm); + if (imm != 0) { + /* sll %dst,imm(%r0) */ + EMIT4_DISP(0x89000000, dst_reg, REG_0, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */ @@ -1062,10 +1064,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0); break; case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */ - if (imm == 0) - break; - /* srl %dst,imm(%r0) */ - EMIT4_DISP(0x88000000, dst_reg, REG_0, imm); + if (imm != 0) { + /* srl %dst,imm(%r0) */ + EMIT4_DISP(0x88000000, dst_reg, REG_0, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */ @@ -1087,10 +1089,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0); break; case BPF_ALU | BPF_ARSH | BPF_K: /* ((s32) dst >> imm */ - if (imm == 0) - break; - /* sra %dst,imm(%r0) */ - EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm); + if (imm != 0) { + /* sra %dst,imm(%r0) */ + EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */ From 54607282fae6148641a08d81a6e0953b541249c7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 16 Sep 2021 10:44:06 +0200 Subject: [PATCH 268/543] EDAC/dmc520: Assign the proper type to dimm->edac_mode dimm->edac_mode contains values of type enum edac_type - not the corresponding capability flags. Fix that. Fixes: 1088750d7839 ("EDAC: Add EDAC driver for DMC520") Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20210916085258.7544-1-bp@alien8.de --- drivers/edac/dmc520_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/dmc520_edac.c b/drivers/edac/dmc520_edac.c index fc1153ab1ebb..b8a7d9594afd 100644 --- a/drivers/edac/dmc520_edac.c +++ b/drivers/edac/dmc520_edac.c @@ -464,7 +464,7 @@ static void dmc520_init_csrow(struct mem_ctl_info *mci) dimm->grain = pvt->mem_width_in_bytes; dimm->dtype = dt; dimm->mtype = mt; - dimm->edac_mode = EDAC_FLAG_SECDED; + dimm->edac_mode = EDAC_SECDED; dimm->nr_pages = pages_per_rank / csi->nr_channels; } } From 5aeb05b27f81269a2bf2e15eab9fc0f9a400d3a8 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Wed, 15 Sep 2021 11:09:39 +0300 Subject: [PATCH 269/543] software node: balance refcount for managed software nodes software_node_notify(), on KOBJ_REMOVE drops the refcount twice on managed software nodes, thus leading to underflow errors. Balance the refcount by bumping it in the device_create_managed_software_node() function. The error [1] was encountered after adding a .shutdown() op to our fsl-mc-bus driver. [1] pc : refcount_warn_saturate+0xf8/0x150 lr : refcount_warn_saturate+0xf8/0x150 sp : ffff80001009b920 x29: ffff80001009b920 x28: ffff1a2420318000 x27: 0000000000000000 x26: ffffccac15e7a038 x25: 0000000000000008 x24: ffffccac168e0030 x23: ffff1a2428a82000 x22: 0000000000080000 x21: ffff1a24287b5000 x20: 0000000000000001 x19: ffff1a24261f4400 x18: ffffffffffffffff x17: 6f72645f726f7272 x16: 0000000000000000 x15: ffff80009009b607 x14: 0000000000000000 x13: ffffccac16602670 x12: 0000000000000a17 x11: 000000000000035d x10: ffffccac16602670 x9 : ffffccac16602670 x8 : 00000000ffffefff x7 : ffffccac1665a670 x6 : ffffccac1665a670 x5 : 0000000000000000 x4 : 0000000000000000 x3 : 00000000ffffffff x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff1a2420318000 Call trace: refcount_warn_saturate+0xf8/0x150 kobject_put+0x10c/0x120 software_node_notify+0xd8/0x140 device_platform_notify+0x4c/0xb4 device_del+0x188/0x424 fsl_mc_device_remove+0x2c/0x4c rebofind sp.c__fsl_mc_device_remove+0x14/0x2c device_for_each_child+0x5c/0xac dprc_remove+0x9c/0xc0 fsl_mc_driver_remove+0x28/0x64 __device_release_driver+0x188/0x22c device_release_driver+0x30/0x50 bus_remove_device+0x128/0x134 device_del+0x16c/0x424 fsl_mc_bus_remove+0x8c/0x114 fsl_mc_bus_shutdown+0x14/0x20 platform_shutdown+0x28/0x40 device_shutdown+0x15c/0x330 __do_sys_reboot+0x218/0x2a0 __arm64_sys_reboot+0x28/0x34 invoke_syscall+0x48/0x114 el0_svc_common+0x40/0xdc do_el0_svc+0x2c/0x94 el0_svc+0x2c/0x54 el0t_64_sync_handler+0xa8/0x12c el0t_64_sync+0x198/0x19c ---[ end trace 32eb1c71c7d86821 ]--- Fixes: 151f6ff78cdf ("software node: Provide replacement for device_add_properties()") Reported-by: Jon Nettleton Suggested-by: Heikki Krogerus Reviewed-by: Heikki Krogerus Signed-off-by: Laurentiu Tudor Cc: 5.12+ # 5.12+ [ rjw: Fix up the software_node_notify() invocation ] Signed-off-by: Rafael J. Wysocki --- drivers/base/swnode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index 7bd0f3cfb7eb..c46f6a8e14d2 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -1116,6 +1116,9 @@ int device_create_managed_software_node(struct device *dev, to_swnode(fwnode)->managed = true; set_secondary_fwnode(dev, fwnode); + if (device_is_registered(dev)) + software_node_notify(dev); + return 0; } EXPORT_SYMBOL_GPL(device_create_managed_software_node); From 98dc68f8b0c2248bdc3688c90d2499247b9432e4 Mon Sep 17 00:00:00 2001 From: Xiang wangx Date: Thu, 16 Sep 2021 20:24:42 +0800 Subject: [PATCH 270/543] selftests: nci: replace unsigned int with int Should not use comparison of unsigned expressions < 0. Signed-off-by: Xiang wangx Signed-off-by: David S. Miller --- tools/testing/selftests/nci/nci_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c index e1bf55dabdf6..162c41e9bcae 100644 --- a/tools/testing/selftests/nci/nci_dev.c +++ b/tools/testing/selftests/nci/nci_dev.c @@ -746,7 +746,7 @@ int read_write_nci_cmd(int nfc_sock, int virtual_fd, const __u8 *cmd, __u32 cmd_ const __u8 *rsp, __u32 rsp_len) { char buf[256]; - unsigned int len; + int len; send(nfc_sock, &cmd[3], cmd_len - 3, 0); len = read(virtual_fd, buf, cmd_len); From 84fb7dfc7463afcba61281f36535576a7f7b0626 Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Sun, 12 Sep 2021 23:23:21 +0200 Subject: [PATCH 271/543] net: wan: wanxl: define CROSS_COMPILE_M68K It was used but never set. The hardcoded value from before the dawn of time was non-standard; the usual name for cross-tools is $TRIPLET-$TOOL Signed-off-by: Adam Borowski Signed-off-by: David S. Miller --- drivers/net/wan/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index f6b92efffc94..480bcd1f6c1c 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -34,6 +34,8 @@ obj-$(CONFIG_SLIC_DS26522) += slic_ds26522.o clean-files := wanxlfw.inc $(obj)/wanxl.o: $(obj)/wanxlfw.inc +CROSS_COMPILE_M68K = m68k-linux-gnu- + ifeq ($(CONFIG_WANXL_BUILD_FIRMWARE),y) ifeq ($(ARCH),m68k) M68KCC = $(CC) From 7c3a0a018e672a9723a79b128227272562300055 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 15 Sep 2021 07:47:27 +0300 Subject: [PATCH 272/543] net/{mlx5|nfp|bnxt}: Remove unnecessary RTNL lock assert Remove the assert from the callback priv lookup function since it does not require RTNL lock and is already protected by flow_indr_block_lock. This will avoid warnings from being emitted to dmesg if the driver registers its callback after an ingress qdisc was created for a netdevice. The warnings started after the following patch was merged: commit 74fc4f828769 ("net: Fix offloading indirect devices dependency on qdisc order creation") Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 3 --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 3 --- drivers/net/ethernet/netronome/nfp/flower/offload.c | 3 --- 3 files changed, 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 46fae1acbeed..e6a4a768b10b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1884,9 +1884,6 @@ bnxt_tc_indr_block_cb_lookup(struct bnxt *bp, struct net_device *netdev) { struct bnxt_flower_indr_block_cb_priv *cb_priv; - /* All callback list access should be protected by RTNL. */ - ASSERT_RTNL(); - list_for_each_entry(cb_priv, &bp->tc_indr_block_list, list) if (cb_priv->tunnel_netdev == netdev) return cb_priv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 51a4d80f7fa3..de03684528bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -300,9 +300,6 @@ mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv, { struct mlx5e_rep_indr_block_priv *cb_priv; - /* All callback list access should be protected by RTNL. */ - ASSERT_RTNL(); - list_for_each_entry(cb_priv, &rpriv->uplink_priv.tc_indr_block_priv_list, list) diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 556c3495211d..64c0ef57ad42 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1767,9 +1767,6 @@ nfp_flower_indr_block_cb_priv_lookup(struct nfp_app *app, struct nfp_flower_indr_block_cb_priv *cb_priv; struct nfp_flower_priv *priv = app->priv; - /* All callback list access should be protected by RTNL. */ - ASSERT_RTNL(); - list_for_each_entry(cb_priv, &priv->indr_block_cb_priv, list) if (cb_priv->netdev == netdev) return cb_priv; From 40ee363c844fcb6ae0f1f5cfea68aed7e268c2f4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 15 Sep 2021 10:19:07 -0700 Subject: [PATCH 273/543] igc: fix tunnel offloading Checking tunnel offloading, it turns out that offloading doesn't work as expected. The following script allows to reproduce the issue. Call it as `testscript DEVICE LOCALIP REMOTEIP NETMASK' === SNIP === if [ $# -ne 4 ] then echo "Usage $0 DEVICE LOCALIP REMOTEIP NETMASK" exit 1 fi DEVICE="$1" LOCAL_ADDRESS="$2" REMOTE_ADDRESS="$3" NWMASK="$4" echo "Driver: $(ethtool -i ${DEVICE} | awk '/^driver:/{print $2}') " ethtool -k "${DEVICE}" | grep tx-udp echo echo "Set up NIC and tunnel..." ip addr add "${LOCAL_ADDRESS}/${NWMASK}" dev "${DEVICE}" ip link set "${DEVICE}" up sleep 2 ip link add vxlan1 type vxlan id 42 \ remote "${REMOTE_ADDRESS}" \ local "${LOCAL_ADDRESS}" \ dstport 0 \ dev "${DEVICE}" ip addr add fc00::1/64 dev vxlan1 ip link set vxlan1 up sleep 2 rm -f vxlan.pcap echo "Running tcpdump and iperf3..." ( nohup tcpdump -i any -w vxlan.pcap >/dev/null 2>&1 ) & sleep 2 iperf3 -c fc00::2 >/dev/null pkill tcpdump echo echo -n "Max. Paket Size: " tcpdump -r vxlan.pcap -nnle 2>/dev/null \ | grep "${LOCAL_ADDRESS}.*> ${REMOTE_ADDRESS}.*OTV" \ | awk '{print $8}' | awk -F ':' '{print $1}' \ | sort -n | tail -1 echo ip link del vxlan1 ip addr del ${LOCAL_ADDRESS}/${NWMASK} dev "${DEVICE}" === SNAP === The expected outcome is Max. Paket Size: 64904 This is what you see on igb, the code igc has been taken from. However, on igc the output is Max. Paket Size: 1516 so the GSO aggregate packets are segmented by the kernel before calling igc_xmit_frame. Inside the subsequent call to igc_tso, the check for skb_is_gso(skb) fails and the function returns prematurely. It turns out that this occurs because the feature flags aren't set entirely correctly in igc_probe. In contrast to the original code from igb_probe, igc_probe neglects to set the flags required to allow tunnel offloading. Setting the same flags as igb fixes the issue on igc. Fixes: 34428dff3679 ("igc: Add GSO partial support") Signed-off-by: Paolo Abeni Tested-by: Corinna Vinschen Acked-by: Sasha Neftin Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igc/igc_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index b877efae61df..0e19b4d02e62 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6350,7 +6350,9 @@ static int igc_probe(struct pci_dev *pdev, if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; - netdev->vlan_features |= netdev->features; + netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->hw_enc_features |= netdev->vlan_features; /* MTU range: 68 - 9216 */ netdev->min_mtu = ETH_MIN_MTU; From ee8a9600b5391f434905c46bec7f77d34505083e Mon Sep 17 00:00:00 2001 From: David Thompson Date: Wed, 15 Sep 2021 14:08:48 -0400 Subject: [PATCH 274/543] mlxbf_gige: clear valid_polarity upon open The network interface managed by the mlxbf_gige driver can get into a problem state where traffic does not flow. In this state, the interface will be up and enabled, but will stop processing received packets. This problem state will happen if three specific conditions occur: 1) driver has received more than (N * RxRingSize) packets but less than (N+1 * RxRingSize) packets, where N is an odd number Note: the command "ethtool -g " will display the current receive ring size, which currently defaults to 128 2) the driver's interface was disabled via "ifconfig oob_net0 down" during the window described in #1. 3) the driver's interface is re-enabled via "ifconfig oob_net0 up" This patch ensures that the driver's "valid_polarity" field is cleared during the open() method so that it always matches the receive polarity used by hardware. Without this fix, the driver needs to be unloaded and reloaded to correct this problem state. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Reviewed-by: Asmaa Mnebhi Signed-off-by: David Thompson Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 3e85b17f5857..6704f5c1aa32 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -142,6 +142,13 @@ static int mlxbf_gige_open(struct net_device *netdev) err = mlxbf_gige_clean_port(priv); if (err) goto free_irqs; + + /* Clear driver's valid_polarity to match hardware, + * since the above call to clean_port() resets the + * receive polarity used by hardware. + */ + priv->valid_polarity = 0; + err = mlxbf_gige_rx_init(priv); if (err) goto free_irqs; From b3a7b268c147119a9776185b4f37e1555ead9d68 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 7 Sep 2021 09:53:03 -0400 Subject: [PATCH 275/543] drm/amd/display: Add NULL checks for vblank workqueue [Why] If we're running a headless config with 0 links then the vblank workqueue will be NULL - causing a NULL pointer exception during any commit. [How] Guard access to the workqueue if it's NULL and don't queue or flush work if it is. Reported-by: Mike Lothian BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1700 Fixes: 58aa1c50e5a231 ("drm/amd/display: Use vblank control events for PSR enable/disable") Signed-off-by: Nicholas Kazlauskas Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d55e61d8aa00..14ba5eeb974f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6024,21 +6024,23 @@ static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) return 0; #if defined(CONFIG_DRM_AMD_DC_DCN) - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) - return -ENOMEM; + if (dm->vblank_control_workqueue) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return -ENOMEM; - INIT_WORK(&work->work, vblank_control_worker); - work->dm = dm; - work->acrtc = acrtc; - work->enable = enable; + INIT_WORK(&work->work, vblank_control_worker); + work->dm = dm; + work->acrtc = acrtc; + work->enable = enable; - if (acrtc_state->stream) { - dc_stream_retain(acrtc_state->stream); - work->stream = acrtc_state->stream; + if (acrtc_state->stream) { + dc_stream_retain(acrtc_state->stream); + work->stream = acrtc_state->stream; + } + + queue_work(dm->vblank_control_workqueue, &work->work); } - - queue_work(dm->vblank_control_workqueue, &work->work); #endif return 0; @@ -8648,7 +8650,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, * If PSR or idle optimizations are enabled then flush out * any pending work before hardware programming. */ - flush_workqueue(dm->vblank_control_workqueue); + if (dm->vblank_control_workqueue) + flush_workqueue(dm->vblank_control_workqueue); #endif bundle->stream_update.stream = acrtc_state->stream; @@ -8983,7 +8986,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) /* if there mode set or reset, disable eDP PSR */ if (mode_set_reset_required) { #if defined(CONFIG_DRM_AMD_DC_DCN) - flush_workqueue(dm->vblank_control_workqueue); + if (dm->vblank_control_workqueue) + flush_workqueue(dm->vblank_control_workqueue); #endif amdgpu_dm_psr_disable_all(dm); } From 2a54d110bd4393fe412ef2c9c2d05fcd92785d1a Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Thu, 9 Sep 2021 12:48:29 -0400 Subject: [PATCH 276/543] drm/amd/display: dc_assert_fp_enabled assert only if FPU is not enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assert only when FPU is not enabled. Fixes: 0ea7ee821701 ("drm/amd/display: Add DC_FP helper to check FPU state") Signed-off-by: Anson Jacob Cc: Christian König Cc: Hersen Wu Cc: Harry Wentland Cc: Rodrigo Siqueira Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c index c9f47d167472..b1bf80da3a55 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c @@ -62,7 +62,7 @@ inline void dc_assert_fp_enabled(void) depth = *pcpu; put_cpu_ptr(&fpu_recursion_depth); - ASSERT(depth > 1); + ASSERT(depth >= 1); } /** From 8f48ba303dfb15dc354e95a3ade59dea4614123a Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Wed, 8 Sep 2021 13:34:26 +0800 Subject: [PATCH 277/543] drm/amdgpu: fix sysfs_emit/sysfs_emit_at warnings(v2) sysfs_emit and sysfs_emit_at requrie a page boundary aligned buf address. Make them happy! v2: use an inline function. Warning Log: [ 492.545174] invalid sysfs_emit_at: buf:00000000f19bdfde at:0 [ 492.546416] WARNING: CPU: 7 PID: 1304 at fs/sysfs/file.c:765 sysfs_emit_at+0x4a/0xa0 [ 492.654805] Call Trace: [ 492.655353] ? smu_cmn_get_metrics_table+0x40/0x50 [amdgpu] [ 492.656780] vangogh_print_clk_levels+0x369/0x410 [amdgpu] [ 492.658245] vangogh_common_print_clk_levels+0x77/0x80 [amdgpu] [ 492.659733] ? preempt_schedule_common+0x18/0x30 [ 492.660713] smu_print_ppclk_levels+0x65/0x90 [amdgpu] [ 492.662107] amdgpu_get_pp_od_clk_voltage+0x13d/0x190 [amdgpu] [ 492.663620] dev_attr_show+0x1d/0x40 Signed-off-by: Lang Yu Acked-by: Huang Rui Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 8 ++++++-- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 4 +++- .../drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 +++- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 16 ++++++++++------ drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 2 ++ .../gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 12 ++++++++---- .../gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 6 ++++-- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 13 +++++++++++++ 8 files changed, 49 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index e343cc218990..082f01893f3d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -771,8 +771,12 @@ static int arcturus_print_clk_levels(struct smu_context *smu, struct smu_11_0_dpm_context *dpm_context = NULL; uint32_t gen_speed, lane_width; - if (amdgpu_ras_intr_triggered()) - return sysfs_emit(buf, "unavailable\n"); + smu_cmn_get_sysfs_buf(&buf, &size); + + if (amdgpu_ras_intr_triggered()) { + size += sysfs_emit_at(buf, size, "unavailable\n"); + return size; + } dpm_context = smu_dpm->dpm_context; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index a5fc5d7cb6c7..aec7cb21cc0f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1279,6 +1279,8 @@ static int navi10_print_clk_levels(struct smu_context *smu, struct smu_11_0_overdrive_table *od_settings = smu->od_settings; uint32_t min_value, max_value; + smu_cmn_get_sysfs_buf(&buf, &size); + switch (clk_type) { case SMU_GFXCLK: case SMU_SCLK: @@ -1392,7 +1394,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, case SMU_OD_RANGE: if (!smu->od_enabled || !od_table || !od_settings) break; - size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) { navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMIN, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 5e292c3f5050..d7519688065f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1058,6 +1058,8 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, uint32_t min_value, max_value; uint32_t smu_version; + smu_cmn_get_sysfs_buf(&buf, &size); + switch (clk_type) { case SMU_GFXCLK: case SMU_SCLK: @@ -1180,7 +1182,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, if (!smu->od_enabled || !od_table || !od_settings) break; - size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); if (sienna_cichlid_is_od_feature_supported(od_settings, SMU_11_0_7_ODCAP_GFXCLK_LIMITS)) { sienna_cichlid_get_od_setting_range(od_settings, SMU_11_0_7_ODSETTING_GFXCLKFMIN, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 3a3421452e57..f6ef0ce6e9e2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -589,10 +589,12 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu, if (ret) return ret; + smu_cmn_get_sysfs_buf(&buf, &size); + switch (clk_type) { case SMU_OD_SCLK: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sysfs_emit(buf, "%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); size += sysfs_emit_at(buf, size, "1: %10uMhz\n", @@ -601,7 +603,7 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu, break; case SMU_OD_CCLK: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sysfs_emit(buf, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); + size += sysfs_emit_at(buf, size, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->cpu_actual_soft_min_freq > 0) ? smu->cpu_actual_soft_min_freq : smu->cpu_default_soft_min_freq); size += sysfs_emit_at(buf, size, "1: %10uMhz\n", @@ -610,7 +612,7 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu, break; case SMU_OD_RANGE: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); size += sysfs_emit_at(buf, size, "CCLK: %7uMhz %10uMhz\n", @@ -688,10 +690,12 @@ static int vangogh_print_clk_levels(struct smu_context *smu, if (ret) return ret; + smu_cmn_get_sysfs_buf(&buf, &size); + switch (clk_type) { case SMU_OD_SCLK: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sysfs_emit(buf, "%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); size += sysfs_emit_at(buf, size, "1: %10uMhz\n", @@ -700,7 +704,7 @@ static int vangogh_print_clk_levels(struct smu_context *smu, break; case SMU_OD_CCLK: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sysfs_emit(buf, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); + size += sysfs_emit_at(buf, size, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->cpu_actual_soft_min_freq > 0) ? smu->cpu_actual_soft_min_freq : smu->cpu_default_soft_min_freq); size += sysfs_emit_at(buf, size, "1: %10uMhz\n", @@ -709,7 +713,7 @@ static int vangogh_print_clk_levels(struct smu_context *smu, break; case SMU_OD_RANGE: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); size += sysfs_emit_at(buf, size, "CCLK: %7uMhz %10uMhz\n", diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 5aa175e12a78..145f13b8c977 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -497,6 +497,8 @@ static int renoir_print_clk_levels(struct smu_context *smu, if (ret) return ret; + smu_cmn_get_sysfs_buf(&buf, &size); + switch (clk_type) { case SMU_OD_RANGE: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index ab652028e003..5019903db492 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -733,15 +733,19 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, uint32_t freq_values[3] = {0}; uint32_t min_clk, max_clk; - if (amdgpu_ras_intr_triggered()) - return sysfs_emit(buf, "unavailable\n"); + smu_cmn_get_sysfs_buf(&buf, &size); + + if (amdgpu_ras_intr_triggered()) { + size += sysfs_emit_at(buf, size, "unavailable\n"); + return size; + } dpm_context = smu_dpm->dpm_context; switch (type) { case SMU_OD_SCLK: - size = sysfs_emit(buf, "%s:\n", "GFXCLK"); + size += sysfs_emit_at(buf, size, "%s:\n", "GFXCLK"); fallthrough; case SMU_SCLK: ret = aldebaran_get_current_clk_freq_by_table(smu, SMU_GFXCLK, &now); @@ -795,7 +799,7 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, break; case SMU_OD_MCLK: - size = sysfs_emit(buf, "%s:\n", "MCLK"); + size += sysfs_emit_at(buf, size, "%s:\n", "MCLK"); fallthrough; case SMU_MCLK: ret = aldebaran_get_current_clk_freq_by_table(smu, SMU_UCLK, &now); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index 627ba2eec7fd..a403657151ba 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -1052,16 +1052,18 @@ static int yellow_carp_print_clk_levels(struct smu_context *smu, int i, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; + smu_cmn_get_sysfs_buf(&buf, &size); + switch (clk_type) { case SMU_OD_SCLK: - size = sysfs_emit(buf, "%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); size += sysfs_emit_at(buf, size, "1: %10uMhz\n", (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq); break; case SMU_OD_RANGE: - size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index 16993daa2ae0..4054d9493e77 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -110,5 +110,18 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev); int smu_cmn_set_mp1_state(struct smu_context *smu, enum pp_mp1_state mp1_state); +/* + * Helper function to make sysfs_emit_at() happy. Align buf to + * the current page boundary and record the offset. + */ +static inline void smu_cmn_get_sysfs_buf(char **buf, int *offset) +{ + if (!*buf || !offset) + return; + + *offset = offset_in_page(*buf); + *buf -= *offset; +} + #endif #endif From 8492d3a07d3c7a0c69df0dec2ae835f5557b8835 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Tue, 24 Aug 2021 17:02:57 +0800 Subject: [PATCH 278/543] drm/amdgpu: update SMU PPSMC for cyan skilfish Add some PPSMC MSGs for cyan skilfish. Signed-off-by: Lang Yu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h index 6e6088760b18..909a86aa60f3 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h @@ -65,6 +65,13 @@ #define PPSMC_MSG_SetDriverTableVMID 0x34 #define PPSMC_MSG_SetSoftMinCclk 0x35 #define PPSMC_MSG_SetSoftMaxCclk 0x36 -#define PPSMC_Message_Count 0x37 +#define PPSMC_MSG_GetGfxFrequency 0x37 +#define PPSMC_MSG_GetGfxVid 0x38 +#define PPSMC_MSG_ForceGfxFreq 0x39 +#define PPSMC_MSG_UnForceGfxFreq 0x3A +#define PPSMC_MSG_ForceGfxVid 0x3B +#define PPSMC_MSG_UnforceGfxVid 0x3C +#define PPSMC_MSG_GetEnabledSmuFeatures 0x3D +#define PPSMC_Message_Count 0x3E #endif From c007e17c8476cb3c1032864f60936f2b7586010b Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Fri, 27 Aug 2021 14:16:31 +0800 Subject: [PATCH 279/543] drm/amdgpu: update SMU driver interface for cyan skilfish(v3) Add SmuMetrics_t definition for cyan skilfish. v2: update SmuMetrics_t definition. v3: cleanup and rearrange the order of fields. Signed-off-by: Lang Yu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- .../pm/inc/smu11_driver_if_cyan_skillfish.h | 86 ++++++++----------- 1 file changed, 35 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h index 8a08ecc34c69..4884a4e1f261 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h +++ b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h @@ -33,63 +33,47 @@ #define TABLE_PMSTATUSLOG 3 // Called by Tools for Agm logging #define TABLE_DPMCLOCKS 4 // Called by Driver; defined here, but not used, for backward compatible #define TABLE_MOMENTARY_PM 5 // Called by Tools; defined here, but not used, for backward compatible -#define TABLE_COUNT 6 +#define TABLE_SMU_METRICS 6 // Called by Driver +#define TABLE_COUNT 7 -#define NUM_DSPCLK_LEVELS 8 -#define NUM_SOCCLK_DPM_LEVELS 8 -#define NUM_DCEFCLK_DPM_LEVELS 4 -#define NUM_FCLK_DPM_LEVELS 4 -#define NUM_MEMCLK_DPM_LEVELS 4 +typedef struct SmuMetricsTable_t { + //CPU status + uint16_t CoreFrequency[6]; //[MHz] + uint32_t CorePower[6]; //[mW] + uint16_t CoreTemperature[6]; //[centi-Celsius] + uint16_t L3Frequency[2]; //[MHz] + uint16_t L3Temperature[2]; //[centi-Celsius] + uint16_t C0Residency[6]; //Percentage -#define NUMBER_OF_PSTATES 8 -#define NUMBER_OF_CORES 8 + // GFX status + uint16_t GfxclkFrequency; //[MHz] + uint16_t GfxTemperature; //[centi-Celsius] -typedef enum { - S3_TYPE_ENTRY, - S5_TYPE_ENTRY, -} Sleep_Type_e; + // SOC IP info + uint16_t SocclkFrequency; //[MHz] + uint16_t VclkFrequency; //[MHz] + uint16_t DclkFrequency; //[MHz] + uint16_t MemclkFrequency; //[MHz] -typedef enum { - GFX_OFF = 0, - GFX_ON = 1, -} GFX_Mode_e; + // power, VF info for CPU/GFX telemetry rails, and then socket power total + uint32_t Voltage[2]; //[mV] indices: VDDCR_VDD, VDDCR_GFX + uint32_t Current[2]; //[mA] indices: VDDCR_VDD, VDDCR_GFX + uint32_t Power[2]; //[mW] indices: VDDCR_VDD, VDDCR_GFX + uint32_t CurrentSocketPower; //[mW] -typedef enum { - CPU_P0 = 0, - CPU_P1, - CPU_P2, - CPU_P3, - CPU_P4, - CPU_P5, - CPU_P6, - CPU_P7 -} CPU_PState_e; + uint16_t SocTemperature; //[centi-Celsius] + uint16_t EdgeTemperature; + uint16_t ThrottlerStatus; + uint16_t Spare; -typedef enum { - CPU_CORE0 = 0, - CPU_CORE1, - CPU_CORE2, - CPU_CORE3, - CPU_CORE4, - CPU_CORE5, - CPU_CORE6, - CPU_CORE7 -} CORE_ID_e; +} SmuMetricsTable_t; -typedef enum { - DF_DPM0 = 0, - DF_DPM1, - DF_DPM2, - DF_DPM3, - DF_PState_Count -} DF_PState_e; - -typedef enum { - GFX_DPM0 = 0, - GFX_DPM1, - GFX_DPM2, - GFX_DPM3, - GFX_PState_Count -} GFX_PState_e; +typedef struct SmuMetrics_t { + SmuMetricsTable_t Current; + SmuMetricsTable_t Average; + uint32_t SampleStartTime; + uint32_t SampleStopTime; + uint32_t Accnt; +} SmuMetrics_t; #endif From 3061fe937ea9990524e73af6d04baca60ad5b137 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Fri, 27 Aug 2021 15:03:50 +0800 Subject: [PATCH 280/543] drm/amdgpu: add some pptable funcs for cyan skilfish(v3) Add print_clk_levels and read_sensor pptable funcs for cyan skilfish. v2: keep consitency and add get_gpu_metrics callback. v3: use sysfs_emit_at() in sysfs show function. Signed-off-by: Lang Yu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- .../amd/pm/swsmu/smu11/cyan_skillfish_ppt.c | 347 ++++++++++++++++++ 1 file changed, 347 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c index b05f9541accc..e1fab030cfc5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c @@ -44,6 +44,12 @@ #undef pr_info #undef pr_debug +#define FEATURE_MASK(feature) (1ULL << feature) +#define SMC_DPM_FEATURE ( \ + FEATURE_MASK(FEATURE_FCLK_DPM_BIT) | \ + FEATURE_MASK(FEATURE_SOC_DPM_BIT) | \ + FEATURE_MASK(FEATURE_GFX_DPM_BIT)) + static struct cmn2asic_msg_mapping cyan_skillfish_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 0), @@ -52,14 +58,354 @@ static struct cmn2asic_msg_mapping cyan_skillfish_message_map[SMU_MSG_MAX_COUNT] MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverTableDramAddrLow, 0), MSG_MAP(TransferTableSmu2Dram, PPSMC_MSG_TransferTableSmu2Dram, 0), MSG_MAP(TransferTableDram2Smu, PPSMC_MSG_TransferTableDram2Smu, 0), + MSG_MAP(GetEnabledSmuFeatures, PPSMC_MSG_GetEnabledSmuFeatures, 0), + MSG_MAP(RequestGfxclk, PPSMC_MSG_RequestGfxclk, 0), + MSG_MAP(ForceGfxVid, PPSMC_MSG_ForceGfxVid, 0), + MSG_MAP(UnforceGfxVid, PPSMC_MSG_UnforceGfxVid, 0), }; +static struct cmn2asic_mapping cyan_skillfish_table_map[SMU_TABLE_COUNT] = { + TAB_MAP_VALID(SMU_METRICS), +}; + +static int cyan_skillfish_tables_init(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + + SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, + sizeof(SmuMetrics_t), + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM); + + smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); + if (!smu_table->metrics_table) + goto err0_out; + + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2); + smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); + if (!smu_table->gpu_metrics_table) + goto err1_out; + + smu_table->metrics_time = 0; + + return 0; + +err1_out: + smu_table->gpu_metrics_table_size = 0; + kfree(smu_table->metrics_table); +err0_out: + return -ENOMEM; +} + +static int cyan_skillfish_init_smc_tables(struct smu_context *smu) +{ + int ret = 0; + + ret = cyan_skillfish_tables_init(smu); + if (ret) + return ret; + + return smu_v11_0_init_smc_tables(smu); +} + +static int cyan_skillfish_finit_smc_tables(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + + kfree(smu_table->metrics_table); + smu_table->metrics_table = NULL; + + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + smu_table->gpu_metrics_table_size = 0; + + smu_table->metrics_time = 0; + + return 0; +} + +static int +cyan_skillfish_get_smu_metrics_data(struct smu_context *smu, + MetricsMember_t member, + uint32_t *value) +{ + struct smu_table_context *smu_table = &smu->smu_table; + SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table; + int ret = 0; + + mutex_lock(&smu->metrics_lock); + + ret = smu_cmn_get_metrics_table_locked(smu, NULL, false); + if (ret) { + mutex_unlock(&smu->metrics_lock); + return ret; + } + + switch (member) { + case METRICS_CURR_GFXCLK: + *value = metrics->Current.GfxclkFrequency; + break; + case METRICS_CURR_SOCCLK: + *value = metrics->Current.SocclkFrequency; + break; + case METRICS_CURR_VCLK: + *value = metrics->Current.VclkFrequency; + break; + case METRICS_CURR_DCLK: + *value = metrics->Current.DclkFrequency; + break; + case METRICS_CURR_UCLK: + *value = metrics->Current.MemclkFrequency; + break; + case METRICS_AVERAGE_SOCKETPOWER: + *value = (metrics->Current.CurrentSocketPower << 8) / + 1000; + break; + case METRICS_TEMPERATURE_EDGE: + *value = metrics->Current.GfxTemperature / 100 * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case METRICS_TEMPERATURE_HOTSPOT: + *value = metrics->Current.SocTemperature / 100 * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case METRICS_VOLTAGE_VDDSOC: + *value = metrics->Current.Voltage[0]; + break; + case METRICS_VOLTAGE_VDDGFX: + *value = metrics->Current.Voltage[1]; + break; + case METRICS_THROTTLER_STATUS: + *value = metrics->Current.ThrottlerStatus; + break; + default: + *value = UINT_MAX; + break; + } + + mutex_unlock(&smu->metrics_lock); + + return ret; +} + +static int cyan_skillfish_read_sensor(struct smu_context *smu, + enum amd_pp_sensors sensor, + void *data, + uint32_t *size) +{ + int ret = 0; + + if (!data || !size) + return -EINVAL; + + mutex_lock(&smu->sensor_lock); + + switch (sensor) { + case AMDGPU_PP_SENSOR_GFX_SCLK: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_CURR_GFXCLK, + (uint32_t *)data); + *(uint32_t *)data *= 100; + *size = 4; + break; + case AMDGPU_PP_SENSOR_GFX_MCLK: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_CURR_UCLK, + (uint32_t *)data); + *(uint32_t *)data *= 100; + *size = 4; + break; + case AMDGPU_PP_SENSOR_GPU_POWER: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_AVERAGE_SOCKETPOWER, + (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_TEMPERATURE_HOTSPOT, + (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_EDGE_TEMP: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_TEMPERATURE_EDGE, + (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_VDDNB: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_VOLTAGE_VDDSOC, + (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_VDDGFX: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_VOLTAGE_VDDGFX, + (uint32_t *)data); + *size = 4; + break; + default: + ret = -EOPNOTSUPP; + break; + } + + mutex_unlock(&smu->sensor_lock); + + return ret; +} + +static int cyan_skillfish_get_current_clk_freq(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *value) +{ + MetricsMember_t member_type; + + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + member_type = METRICS_CURR_GFXCLK; + break; + case SMU_FCLK: + case SMU_MCLK: + member_type = METRICS_CURR_UCLK; + break; + case SMU_SOCCLK: + member_type = METRICS_CURR_SOCCLK; + break; + case SMU_VCLK: + member_type = METRICS_CURR_VCLK; + break; + case SMU_DCLK: + member_type = METRICS_CURR_DCLK; + break; + default: + return -EINVAL; + } + + return cyan_skillfish_get_smu_metrics_data(smu, member_type, value); +} + +static int cyan_skillfish_print_clk_levels(struct smu_context *smu, + enum smu_clk_type clk_type, + char *buf) +{ + int ret = 0, size = 0; + uint32_t cur_value = 0; + + smu_cmn_get_sysfs_buf(&buf, &size); + + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + case SMU_FCLK: + case SMU_MCLK: + case SMU_SOCCLK: + case SMU_VCLK: + case SMU_DCLK: + ret = cyan_skillfish_get_current_clk_freq(smu, clk_type, &cur_value); + if (ret) + return ret; + size += sysfs_emit_at(buf, size, "0: %uMhz *\n", cur_value); + break; + default: + dev_warn(smu->adev->dev, "Unsupported clock type\n"); + return ret; + } + + return size; +} + +static bool cyan_skillfish_is_dpm_running(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int ret = 0; + uint32_t feature_mask[2]; + uint64_t feature_enabled; + + /* we need to re-init after suspend so return false */ + if (adev->in_suspend) + return false; + + ret = smu_cmn_get_enabled_32_bits_mask(smu, feature_mask, 2); + + if (ret) + return false; + + feature_enabled = (uint64_t)feature_mask[0] | + ((uint64_t)feature_mask[1] << 32); + + return !!(feature_enabled & SMC_DPM_FEATURE); +} + +static ssize_t cyan_skillfish_get_gpu_metrics(struct smu_context *smu, + void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct gpu_metrics_v2_2 *gpu_metrics = + (struct gpu_metrics_v2_2 *)smu_table->gpu_metrics_table; + SmuMetrics_t metrics; + int i, ret = 0; + + ret = smu_cmn_get_metrics_table(smu, &metrics, true); + if (ret) + return ret; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 2); + + gpu_metrics->temperature_gfx = metrics.Current.GfxTemperature; + gpu_metrics->temperature_soc = metrics.Current.SocTemperature; + + gpu_metrics->average_socket_power = metrics.Current.CurrentSocketPower; + gpu_metrics->average_soc_power = metrics.Current.Power[0]; + gpu_metrics->average_gfx_power = metrics.Current.Power[1]; + + gpu_metrics->average_gfxclk_frequency = metrics.Average.GfxclkFrequency; + gpu_metrics->average_socclk_frequency = metrics.Average.SocclkFrequency; + gpu_metrics->average_uclk_frequency = metrics.Average.MemclkFrequency; + gpu_metrics->average_fclk_frequency = metrics.Average.MemclkFrequency; + gpu_metrics->average_vclk_frequency = metrics.Average.VclkFrequency; + gpu_metrics->average_dclk_frequency = metrics.Average.DclkFrequency; + + gpu_metrics->current_gfxclk = metrics.Current.GfxclkFrequency; + gpu_metrics->current_socclk = metrics.Current.SocclkFrequency; + gpu_metrics->current_uclk = metrics.Current.MemclkFrequency; + gpu_metrics->current_fclk = metrics.Current.MemclkFrequency; + gpu_metrics->current_vclk = metrics.Current.VclkFrequency; + gpu_metrics->current_dclk = metrics.Current.DclkFrequency; + + for (i = 0; i < 6; i++) { + gpu_metrics->temperature_core[i] = metrics.Current.CoreTemperature[i]; + gpu_metrics->average_core_power[i] = metrics.Average.CorePower[i]; + gpu_metrics->current_coreclk[i] = metrics.Current.CoreFrequency[i]; + } + + for (i = 0; i < 2; i++) { + gpu_metrics->temperature_l3[i] = metrics.Current.L3Temperature[i]; + gpu_metrics->current_l3clk[i] = metrics.Current.L3Frequency[i]; + } + + gpu_metrics->throttle_status = metrics.Current.ThrottlerStatus; + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); + + *table = (void *)gpu_metrics; + + return sizeof(struct gpu_metrics_v2_2); +} + static const struct pptable_funcs cyan_skillfish_ppt_funcs = { .check_fw_status = smu_v11_0_check_fw_status, .check_fw_version = smu_v11_0_check_fw_version, .init_power = smu_v11_0_init_power, .fini_power = smu_v11_0_fini_power, + .init_smc_tables = cyan_skillfish_init_smc_tables, + .fini_smc_tables = cyan_skillfish_finit_smc_tables, + .read_sensor = cyan_skillfish_read_sensor, + .print_clk_levels = cyan_skillfish_print_clk_levels, + .is_dpm_running = cyan_skillfish_is_dpm_running, + .get_gpu_metrics = cyan_skillfish_get_gpu_metrics, .register_irq_handler = smu_v11_0_register_irq_handler, .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, .send_smc_msg_with_param = smu_cmn_send_smc_msg_with_param, @@ -72,5 +418,6 @@ void cyan_skillfish_set_ppt_funcs(struct smu_context *smu) { smu->ppt_funcs = &cyan_skillfish_ppt_funcs; smu->message_map = cyan_skillfish_message_map; + smu->table_map = cyan_skillfish_table_map; smu->is_apu = true; } From abd0a16ac72c98c46e7a1a91d591121b9c95cf97 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Fri, 27 Aug 2021 15:20:51 +0800 Subject: [PATCH 281/543] drm/amdgpu: add manual sclk/vddc setting support for cyan skilfish(v3) Add manual sclk/vddc setting supoort via pp_od_clk_voltage sysfs to maintain consistency with other asics. As cyan skillfish doesn't support DPM, there is only a single frequency and voltage to adjust. v2: maintain consistency and add command guide. v3: adjust user settings storage and coding style. Command guide: echo vc point sclk vddc > pp_od_clk_voltage "vc" - sclk voltage curve "point" - must be 0 "sclk" - target value of sclk(MHz), should be in safe range "vddc" - target value of vddc(mV), a 6.25(mV) stepping is recommended and should be in safe range (the real vddc is an approximation of target value) echo c > pp_od_clk_voltage "c" - commit the changes of sclk and vddc, only after the commit command, the target values set by "vc" command will take effect echo r > pp_od_clk_voltage "r" - reset sclk and vddc to default value, a subsequent commit command is needed to take effect Example: 1) Check default sclk and vddc $ cat pp_od_clk_voltage OD_SCLK: 0: 1800Mhz * OD_VDDC: 0: 862mV * OD_RANGE: SCLK: 1000Mhz 2000Mhz VDDC: 700mV 1129mV 2) Set sclk to 1500MHz and vddc to 700mV $ echo vc 0 1500 700 > pp_od_clk_voltage $ echo c > pp_od_clk_voltage $ cat pp_od_clk_voltage OD_SCLK: 0: 1500Mhz * OD_VDDC: 0: 693mV * OD_RANGE: SCLK: 1000Mhz 2000Mhz VDDC: 700mV 1129mV 3) Reset sclk and vddc to default $ echo r > pp_od_clk_voltage $ echo c > pp_od_clk_voltage $ cat pp_od_clk_voltage OD_SCLK: 0: 1800Mhz * OD_VDDC: 0: 874mV * OD_RANGE: SCLK: 1000Mhz 2000Mhz VDDC: 700mV 1129mV NOTE: We don't specify an explicit safe range, you can set any values between min and max at your own risk. Enjoy! Signed-off-by: Lang Yu Reviewed-by: Lijo Lazar Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_types.h | 5 +- .../amd/pm/swsmu/smu11/cyan_skillfish_ppt.c | 134 ++++++++++++++++++ 2 files changed, 138 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h index 6f1b1b50d527..18b862a90fbe 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h @@ -226,7 +226,10 @@ __SMU_DUMMY_MAP(SetUclkDpmMode), \ __SMU_DUMMY_MAP(LightSBR), \ __SMU_DUMMY_MAP(GfxDriverResetRecovery), \ - __SMU_DUMMY_MAP(BoardPowerCalibration), + __SMU_DUMMY_MAP(BoardPowerCalibration), \ + __SMU_DUMMY_MAP(RequestGfxclk), \ + __SMU_DUMMY_MAP(ForceGfxVid), \ + __SMU_DUMMY_MAP(UnforceGfxVid), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c index e1fab030cfc5..3d4c65bc29dc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c @@ -44,6 +44,21 @@ #undef pr_info #undef pr_debug +/* unit: MHz */ +#define CYAN_SKILLFISH_SCLK_MIN 1000 +#define CYAN_SKILLFISH_SCLK_MAX 2000 +#define CYAN_SKILLFISH_SCLK_DEFAULT 1800 + +/* unit: mV */ +#define CYAN_SKILLFISH_VDDC_MIN 700 +#define CYAN_SKILLFISH_VDDC_MAX 1129 +#define CYAN_SKILLFISH_VDDC_MAGIC 5118 // 0x13fe + +static struct gfx_user_settings { + uint32_t sclk; + uint32_t vddc; +} cyan_skillfish_user_settings; + #define FEATURE_MASK(feature) (1ULL << feature) #define SMC_DPM_FEATURE ( \ FEATURE_MASK(FEATURE_FCLK_DPM_BIT) | \ @@ -297,6 +312,27 @@ static int cyan_skillfish_print_clk_levels(struct smu_context *smu, smu_cmn_get_sysfs_buf(&buf, &size); switch (clk_type) { + case SMU_OD_SCLK: + ret = cyan_skillfish_get_smu_metrics_data(smu, METRICS_CURR_GFXCLK, &cur_value); + if (ret) + return ret; + size += sysfs_emit_at(buf, size,"%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "0: %uMhz *\n", cur_value); + break; + case SMU_OD_VDDC_CURVE: + ret = cyan_skillfish_get_smu_metrics_data(smu, METRICS_VOLTAGE_VDDGFX, &cur_value); + if (ret) + return ret; + size += sysfs_emit_at(buf, size,"%s:\n", "OD_VDDC"); + size += sysfs_emit_at(buf, size, "0: %umV *\n", cur_value); + break; + case SMU_OD_RANGE: + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", + CYAN_SKILLFISH_SCLK_MIN, CYAN_SKILLFISH_SCLK_MAX); + size += sysfs_emit_at(buf, size, "VDDC: %7umV %10umV\n", + CYAN_SKILLFISH_VDDC_MIN, CYAN_SKILLFISH_VDDC_MAX); + break; case SMU_GFXCLK: case SMU_SCLK: case SMU_FCLK: @@ -394,6 +430,103 @@ static ssize_t cyan_skillfish_get_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v2_2); } +static int cyan_skillfish_od_edit_dpm_table(struct smu_context *smu, + enum PP_OD_DPM_TABLE_COMMAND type, + long input[], uint32_t size) +{ + int ret = 0; + uint32_t vid; + + switch (type) { + case PP_OD_EDIT_VDDC_CURVE: + if (size != 3 || input[0] != 0) { + dev_err(smu->adev->dev, "Invalid parameter!\n"); + return -EINVAL; + } + + if (input[1] <= CYAN_SKILLFISH_SCLK_MIN || + input[1] > CYAN_SKILLFISH_SCLK_MAX) { + dev_err(smu->adev->dev, "Invalid sclk! Valid sclk range: %uMHz - %uMhz\n", + CYAN_SKILLFISH_SCLK_MIN, CYAN_SKILLFISH_SCLK_MAX); + return -EINVAL; + } + + if (input[2] <= CYAN_SKILLFISH_VDDC_MIN || + input[2] > CYAN_SKILLFISH_VDDC_MAX) { + dev_err(smu->adev->dev, "Invalid vddc! Valid vddc range: %umV - %umV\n", + CYAN_SKILLFISH_VDDC_MIN, CYAN_SKILLFISH_VDDC_MAX); + return -EINVAL; + } + + cyan_skillfish_user_settings.sclk = input[1]; + cyan_skillfish_user_settings.vddc = input[2]; + + break; + case PP_OD_RESTORE_DEFAULT_TABLE: + if (size != 0) { + dev_err(smu->adev->dev, "Invalid parameter!\n"); + return -EINVAL; + } + + cyan_skillfish_user_settings.sclk = CYAN_SKILLFISH_SCLK_DEFAULT; + cyan_skillfish_user_settings.vddc = CYAN_SKILLFISH_VDDC_MAGIC; + + break; + case PP_OD_COMMIT_DPM_TABLE: + if (size != 0) { + dev_err(smu->adev->dev, "Invalid parameter!\n"); + return -EINVAL; + } + + if (cyan_skillfish_user_settings.sclk < CYAN_SKILLFISH_SCLK_MIN || + cyan_skillfish_user_settings.sclk > CYAN_SKILLFISH_SCLK_MAX) { + dev_err(smu->adev->dev, "Invalid sclk! Valid sclk range: %uMHz - %uMhz\n", + CYAN_SKILLFISH_SCLK_MIN, CYAN_SKILLFISH_SCLK_MAX); + return -EINVAL; + } + + if ((cyan_skillfish_user_settings.vddc != CYAN_SKILLFISH_VDDC_MAGIC) && + (cyan_skillfish_user_settings.vddc < CYAN_SKILLFISH_VDDC_MIN || + cyan_skillfish_user_settings.vddc > CYAN_SKILLFISH_VDDC_MAX)) { + dev_err(smu->adev->dev, "Invalid vddc! Valid vddc range: %umV - %umV\n", + CYAN_SKILLFISH_VDDC_MIN, CYAN_SKILLFISH_VDDC_MAX); + return -EINVAL; + } + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RequestGfxclk, + cyan_skillfish_user_settings.sclk, NULL); + if (ret) { + dev_err(smu->adev->dev, "Set sclk failed!\n"); + return ret; + } + + if (cyan_skillfish_user_settings.vddc == CYAN_SKILLFISH_VDDC_MAGIC) { + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_UnforceGfxVid, NULL); + if (ret) { + dev_err(smu->adev->dev, "Unforce vddc failed!\n"); + return ret; + } + } else { + /* + * PMFW accepts SVI2 VID code, convert voltage to VID: + * vid = (uint32_t)((1.55 - voltage) * 160.0 + 0.00001) + */ + vid = (1550 - cyan_skillfish_user_settings.vddc) * 160 / 1000; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ForceGfxVid, vid, NULL); + if (ret) { + dev_err(smu->adev->dev, "Force vddc failed!\n"); + return ret; + } + } + + break; + default: + return -EOPNOTSUPP; + } + + return ret; +} + static const struct pptable_funcs cyan_skillfish_ppt_funcs = { .check_fw_status = smu_v11_0_check_fw_status, @@ -406,6 +539,7 @@ static const struct pptable_funcs cyan_skillfish_ppt_funcs = { .print_clk_levels = cyan_skillfish_print_clk_levels, .is_dpm_running = cyan_skillfish_is_dpm_running, .get_gpu_metrics = cyan_skillfish_get_gpu_metrics, + .od_edit_dpm_table = cyan_skillfish_od_edit_dpm_table, .register_irq_handler = smu_v11_0_register_irq_handler, .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, .send_smc_msg_with_param = smu_cmn_send_smc_msg_with_param, From fb932dfeb87411a8a01c995576198bfc302df339 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 31 Aug 2021 17:42:15 -0400 Subject: [PATCH 282/543] drm/amdkfd: make needs_pcie_atomics FW-version dependent On some GPUs the PCIe atomic requirement for KFD depends on the MEC firmware version. Add a firmware version check for this. The minimum firmware version that works without atomics can be updated in the device_info structure for each GPU type. Move PCIe atomic detection from kgd2kfd_probe into kgd2kfd_device_init because the MEC firmware is not loaded yet at the probe stage. Signed-off-by: Felix Kuehling Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 44 ++++++++++++++++--------- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 16a57b70cc1a..30fde852af19 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -468,6 +468,7 @@ static const struct kfd_device_info navi10_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 145, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -487,6 +488,7 @@ static const struct kfd_device_info navi12_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 145, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -506,6 +508,7 @@ static const struct kfd_device_info navi14_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 145, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -525,6 +528,7 @@ static const struct kfd_device_info sienna_cichlid_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 4, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -544,6 +548,7 @@ static const struct kfd_device_info navy_flounder_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -562,7 +567,8 @@ static const struct kfd_device_info vangogh_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 1, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, @@ -582,6 +588,7 @@ static const struct kfd_device_info dimgrey_cavefish_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -601,6 +608,7 @@ static const struct kfd_device_info beige_goby_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 1, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -619,7 +627,8 @@ static const struct kfd_device_info yellow_carp_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 1, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, @@ -708,20 +717,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, if (!kfd) return NULL; - /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. - * 32 and 64-bit requests are possible and must be - * supported. - */ - kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd); - if (device_info->needs_pci_atomics && - !kfd->pci_atomic_requested) { - dev_info(kfd_device, - "skipped device %x:%x, PCI rejects atomics\n", - pdev->vendor, pdev->device); - kfree(kfd); - return NULL; - } - kfd->kgd = kgd; kfd->device_info = device_info; kfd->pdev = pdev; @@ -821,6 +816,23 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd - kfd->vm_info.first_vmid_kfd + 1; + /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. + * 32 and 64-bit requests are possible and must be + * supported. + */ + kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->kgd); + if (!kfd->pci_atomic_requested && + kfd->device_info->needs_pci_atomics && + (!kfd->device_info->no_atomic_fw_version || + kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) { + dev_info(kfd_device, + "skipped device %x:%x, PCI rejects atomics %d<%d\n", + kfd->pdev->vendor, kfd->pdev->device, + kfd->mec_fw_version, + kfd->device_info->no_atomic_fw_version); + return false; + } + /* Verify module parameters regarding mapped process number*/ if ((hws_max_conc_proc < 0) || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ab83b0de6b22..6d8f9bb2d905 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -207,6 +207,7 @@ struct kfd_device_info { bool supports_cwsr; bool needs_iommu_device; bool needs_pci_atomics; + uint32_t no_atomic_fw_version; unsigned int num_sdma_engines; unsigned int num_xgmi_sdma_engines; unsigned int num_sdma_queues_per_engine; From 9987fbb368038d41bfdcda2a3f7f4945d7daa9a5 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Mon, 16 Aug 2021 15:57:12 -0400 Subject: [PATCH 283/543] drm/amd/display: Get backlight from PWM if DMCU is not initialized On Carrizo/Stoney systems we set backlight through panel_cntl, i.e. directly via the PWM registers, if DMCU is not initialized. We always read it back through ABM registers which leads to a mismatch and forces atomic_commit to program the backlight each time. Instead make sure we use the same logic for backlight readback, i.e. read it from panel_cntl if DMCU is not initialized. We also need to remove some extraneous and incorrect calculations at the end of dce_get_16_bit_backlight_from_pwm. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1666 Cc: stable@vger.kernel.org Reviewed-by: Josip Pavic Acked-by: Mikita Lipski Signed-off-by: Harry Wentland Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 16 ++++++++++++---- .../gpu/drm/amd/display/dc/dce/dce_panel_cntl.c | 10 ---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 8bd7f42a8053..1e44b13c1c7d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2586,13 +2586,21 @@ static struct abm *get_abm_from_stream_res(const struct dc_link *link) int dc_link_get_backlight_level(const struct dc_link *link) { - struct abm *abm = get_abm_from_stream_res(link); + struct panel_cntl *panel_cntl = link->panel_cntl; + struct dc *dc = link->ctx->dc; + struct dmcu *dmcu = dc->res_pool->dmcu; + bool fw_set_brightness = true; - if (abm == NULL || abm->funcs->get_current_backlight == NULL) + if (dmcu) + fw_set_brightness = dmcu->funcs->is_dmcu_initialized(dmcu); + + if (!fw_set_brightness && panel_cntl->funcs->get_current_backlight) + return panel_cntl->funcs->get_current_backlight(panel_cntl); + else if (abm != NULL && abm->funcs->get_current_backlight != NULL) + return (int) abm->funcs->get_current_backlight(abm); + else return DC_ERROR_UNEXPECTED; - - return (int) abm->funcs->get_current_backlight(abm); } int dc_link_get_target_backlight_pwm(const struct dc_link *link) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c index e92339235863..e8570060d007 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c @@ -49,7 +49,6 @@ static unsigned int dce_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_cntl) { uint64_t current_backlight; - uint32_t round_result; uint32_t bl_period, bl_int_count; uint32_t bl_pwm, fractional_duty_cycle_en; uint32_t bl_period_mask, bl_pwm_mask; @@ -84,15 +83,6 @@ static unsigned int dce_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_c current_backlight = div_u64(current_backlight, bl_period); current_backlight = (current_backlight + 1) >> 1; - current_backlight = (uint64_t)(current_backlight) * bl_period; - - round_result = (uint32_t)(current_backlight & 0xFFFFFFFF); - - round_result = (round_result >> (bl_int_count-1)) & 1; - - current_backlight >>= bl_int_count; - current_backlight += round_result; - return (uint32_t)(current_backlight); } From 90517c9838602846daa0feec7b37382fed61b001 Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Wed, 25 Aug 2021 16:27:47 -0400 Subject: [PATCH 284/543] drm/amd/display: dsc mst 2 4K displays go dark with 2 lane HBR3 [Why] call stack of amdgpu dsc mst pbn, slot num calculation is as below: -compute_bpp_x16_from_target_bandwidth -decide_dsc_target_bpp_x16 -setup_dsc_config -dc_dsc_compute_bandwidth_range -compute_mst_dsc_configs_for_link -compute_mst_dsc_configs_for_state from pbn -> dsc target bpp_x16 bpp_x16 is calulated by compute_bpp_x16_from_target_bandwidth. Beside pixel clock and bpp, num_slices_h and bpp_increment_div will also affect bpp_x16. from dsc target bpp_x16 -> pbn within dm_update_mst_vcpi_slots_for_dsc, pbn = drm_dp_calc_pbn_mode(clock, bpp_x16, true); drm_dp_calc_pbn_mode(int clock, int bpp, bool dsc) { return DIV_ROUND_UP_ULL(mul_u32_u32(clock * (bpp / 16), 64 * 1006), 8 * 54 * 1000 * 1000); } bpp / 16 trunc digits after decimal point. This will cause calculation delta. drm_dp_calc_pbn_mode does not have other informations, like num_slices_h, bpp_increment_div. therefore, it does not do revese calcuation properly from bpp_x16 to pbn. pbn from drm_dp_calc_pbn_mode is less than pbn from compute_mst_dsc_configs_for_state. This cause not enough mst slot allocated to display. display could not visually light up. [How] pass pbn from compute_mst_dsc_configs_for_state to dm_update_mst_vcpi_slots_for_dsc Cc: stable@vger.kernel.org Reviewed-by: Scott Foster Acked-by: Mikita Lipski Signed-off-by: Hersen Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 ++++++++++++++----- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 18 +++++++-------- .../display/amdgpu_dm/amdgpu_dm_mst_types.h | 11 +++++++++- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 14ba5eeb974f..deb8010efc69 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6794,14 +6794,15 @@ const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs = { #if defined(CONFIG_DRM_AMD_DC_DCN) static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, - struct dc_state *dc_state) + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars) { struct dc_stream_state *stream = NULL; struct drm_connector *connector; struct drm_connector_state *new_con_state; struct amdgpu_dm_connector *aconnector; struct dm_connector_state *dm_conn_state; - int i, j, clock, bpp; + int i, j, clock; int vcpi, pbn_div, pbn = 0; for_each_new_connector_in_state(state, connector, new_con_state, i) { @@ -6840,9 +6841,15 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, } pbn_div = dm_mst_get_pbn_divider(stream->link); - bpp = stream->timing.dsc_cfg.bits_per_pixel; clock = stream->timing.pix_clk_100hz / 10; - pbn = drm_dp_calc_pbn_mode(clock, bpp, true); + /* pbn is calculated by compute_mst_dsc_configs_for_state*/ + for (j = 0; j < dc_state->stream_count; j++) { + if (vars[j].aconnector == aconnector) { + pbn = vars[j].pbn; + break; + } + } + vcpi = drm_dp_mst_atomic_enable_dsc(state, aconnector->port, pbn, pbn_div, @@ -10247,6 +10254,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, int ret, i; bool lock_and_validation_needed = false; struct dm_crtc_state *dm_old_crtc_state; +#if defined(CONFIG_DRM_AMD_DC_DCN) + struct dsc_mst_fairness_vars vars[MAX_PIPES]; +#endif trace_amdgpu_dm_atomic_check_begin(state); @@ -10477,10 +10487,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; #if defined(CONFIG_DRM_AMD_DC_DCN) - if (!compute_mst_dsc_configs_for_state(state, dm_state->context)) + if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars)) goto fail; - ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context); + ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars); if (ret) goto fail; #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 1bcba6943fd7..7af0d58c231b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -518,12 +518,7 @@ struct dsc_mst_fairness_params { uint32_t num_slices_h; uint32_t num_slices_v; uint32_t bpp_overwrite; -}; - -struct dsc_mst_fairness_vars { - int pbn; - bool dsc_enabled; - int bpp_x16; + struct amdgpu_dm_connector *aconnector; }; static int kbps_to_peak_pbn(int kbps) @@ -750,12 +745,12 @@ static void try_disable_dsc(struct drm_atomic_state *state, static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, struct dc_state *dc_state, - struct dc_link *dc_link) + struct dc_link *dc_link, + struct dsc_mst_fairness_vars *vars) { int i; struct dc_stream_state *stream; struct dsc_mst_fairness_params params[MAX_PIPES]; - struct dsc_mst_fairness_vars vars[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; int count = 0; bool debugfs_overwrite = false; @@ -776,6 +771,7 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, params[count].timing = &stream->timing; params[count].sink = stream->sink; aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + params[count].aconnector = aconnector; params[count].port = aconnector->port; params[count].clock_force_enable = aconnector->dsc_settings.dsc_force_enable; if (params[count].clock_force_enable == DSC_CLK_FORCE_ENABLE) @@ -798,6 +794,7 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, } /* Try no compression */ for (i = 0; i < count; i++) { + vars[i].aconnector = params[i].aconnector; vars[i].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); vars[i].dsc_enabled = false; vars[i].bpp_x16 = 0; @@ -851,7 +848,8 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, } bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state) + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars) { int i, j; struct dc_stream_state *stream; @@ -882,7 +880,7 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, return false; mutex_lock(&aconnector->mst_mgr.lock); - if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link)) { + if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars)) { mutex_unlock(&aconnector->mst_mgr.lock); return false; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index b38bd68121ce..900d3f7a8498 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -39,8 +39,17 @@ void dm_dp_create_fake_mst_encoders(struct amdgpu_device *adev); #if defined(CONFIG_DRM_AMD_DC_DCN) + +struct dsc_mst_fairness_vars { + int pbn; + bool dsc_enabled; + int bpp_x16; + struct amdgpu_dm_connector *aconnector; +}; + bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state); + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars); #endif #endif From 4e00a434a08e0654a4dd9347485d9ec85deee1ef Mon Sep 17 00:00:00 2001 From: Qingqing Zhuo Date: Fri, 27 Aug 2021 06:58:38 -0400 Subject: [PATCH 285/543] drm/amd/display: Fix unstable HPCP compliance on Chrome Barcelo [Why] Intermittently, there presents two occurrences of 0 stream commits in a single HPD event. Current HDCP sequence does not consider such scenerio, and will thus disable HDCP. [How] Add condition check to include stream remove and re-enable case for HDCP enable. Reviewed-by: Bhawanpreet Lakha Acked-by: Mikita Lipski Signed-off-by: Qingqing Zhuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index deb8010efc69..5975233dfa77 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8067,8 +8067,26 @@ static bool is_content_protection_different(struct drm_connector_state *state, state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED) state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED; - /* Check if something is connected/enabled, otherwise we start hdcp but nothing is connected/enabled - * hot-plug, headless s3, dpms + /* Stream removed and re-enabled + * + * Can sometimes overlap with the HPD case, + * thus set update_hdcp to false to avoid + * setting HDCP multiple times. + * + * Handles: DESIRED -> DESIRED (Special case) + */ + if (!(old_state->crtc && old_state->crtc->enabled) && + state->crtc && state->crtc->enabled && + connector->state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) { + dm_con_state->update_hdcp = false; + return true; + } + + /* Hot-plug, headless s3, dpms + * + * Only start HDCP if the display is connected/enabled. + * update_hdcp flag will be set to false until the next + * HPD comes in. * * Handles: DESIRED -> DESIRED (Special case) */ From 71ae30997a8f1791835167d3ceb8d1fab32407db Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Mon, 30 Aug 2021 14:01:10 -0400 Subject: [PATCH 286/543] drm/amd/display: Link training retry fix for abort case [Why] If link training is aborted, it shall be retried if sink is present. [How] Check hpd status to find out whether sink is present or not. If sink is present, then link training shall be tried again with same settings. Otherwise, link training shall be aborted. Reviewed-by: Jimmy Kizito Acked-by: Mikita Lipski Signed-off-by: Meenakshikumar Somasundaram Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 330edd666b7d..ceda98a63ff3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1840,9 +1840,13 @@ bool perform_link_training_with_retries( dp_disable_link_phy(link, signal); /* Abort link training if failure due to sink being unplugged. */ - if (status == LINK_TRAINING_ABORT) - break; - else if (do_fallback) { + if (status == LINK_TRAINING_ABORT) { + enum dc_connection_type type = dc_connection_none; + + dc_link_detect_sink(link, &type); + if (type == dc_connection_none) + break; + } else if (do_fallback) { decide_fallback_link_setting(*link_setting, ¤t_setting, status); /* Fail link training if reduced link bandwidth no longer meets * stream requirements. From fefc01f042f44ede373ee66773b8238dd8fdcb55 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 7 Sep 2021 11:13:02 -0400 Subject: [PATCH 287/543] drm/amdkfd: separate kfd_iommu_resume from kfd_resume Separate kfd_iommu_resume from kfd_resume for fine-tuning of amdgpu device init/resume/reset/recovery sequence. v2: squash in fix for !CONFIG_HSA_AMD Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277 Signed-off-by: James Zhu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 6 ++++++ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 12 ++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index ec028cf963f5..089c7b967008 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -327,6 +327,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); +int kgd2kfd_resume_iommu(struct kfd_dev *kfd); int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); int kgd2kfd_pre_reset(struct kfd_dev *kfd); int kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -365,6 +366,11 @@ static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) { } +static int __maybe_unused kgd2kfd_resume_iommu(struct kfd_dev *kfd) +{ + return 0; +} + static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) { return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 30fde852af19..98d1b3ab3a46 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1069,17 +1069,21 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) return ret; } -static int kfd_resume(struct kfd_dev *kfd) +int kgd2kfd_resume_iommu(struct kfd_dev *kfd) { int err = 0; err = kfd_iommu_resume(kfd); - if (err) { + if (err) dev_err(kfd_device, "Failed to resume IOMMU for device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); - return err; - } + return err; +} + +static int kfd_resume(struct kfd_dev *kfd) +{ + int err = 0; err = kfd->dqm->ops.start(kfd->dqm); if (err) { From 8066008482e533e91934bee49765bf8b4a7c40db Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 7 Sep 2021 11:27:31 -0400 Subject: [PATCH 288/543] drm/amdgpu: add amdgpu_amdkfd_resume_iommu Add amdgpu_amdkfd_resume_iommu for amdgpu. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277 Signed-off-by: James Zhu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 3003ee1c9487..1d41c2c00623 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -192,6 +192,16 @@ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm) kgd2kfd_suspend(adev->kfd.dev, run_pm); } +int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev) +{ + int r = 0; + + if (adev->kfd.dev) + r = kgd2kfd_resume_iommu(adev->kfd.dev); + + return r; +} + int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) { int r = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 089c7b967008..3bc52b2c604f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -137,6 +137,7 @@ int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm); +int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev); int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm); void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry); From f02abeb0779700c308e661a412451b38962b8a0b Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 7 Sep 2021 11:32:22 -0400 Subject: [PATCH 289/543] drm/amdgpu: move iommu_resume before ip init/resume Separate iommu_resume from kfd_resume, and move it before other amdgpu ip init/resume. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277 Signed-off-by: James Zhu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 41c6b3aacd37..ab3794c42d36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2394,6 +2394,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; + r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + goto init_failed; + r = amdgpu_device_ip_hw_init_phase1(adev); if (r) goto init_failed; @@ -3148,6 +3152,10 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) { int r; + r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + return r; + r = amdgpu_device_ip_resume_phase1(adev); if (r) return r; @@ -4601,6 +4609,10 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, dev_warn(tmp_adev->dev, "asic atom init failed!"); } else { dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); + r = amdgpu_amdkfd_resume_iommu(tmp_adev); + if (r) + goto out; + r = amdgpu_device_ip_resume_phase1(tmp_adev); if (r) goto out; From 93def70cf8b23de5049d101b7dd5367864694bd3 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 13 Sep 2021 10:08:23 +0200 Subject: [PATCH 290/543] drm/radeon: pass drm dev radeon_agp_head_init directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass drm dev directly as rdev->ddev gets initialized later on at radeon_device_init(). Bug: https://bugzilla.kernel.org/show_bug.cgi?id=214375 Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_kms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 0473583dcdac..482fb0ae6cb5 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -119,7 +119,7 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) #endif if (pci_find_capability(pdev, PCI_CAP_ID_AGP)) - rdev->agp = radeon_agp_head_init(rdev->ddev); + rdev->agp = radeon_agp_head_init(dev); if (rdev->agp) { rdev->agp->agp_mtrr = arch_phys_wc_add( rdev->agp->agp_info.aper_base, From 8b514e898ee7f861eb8863c647d258f71053af40 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 9 Sep 2021 11:01:00 +0800 Subject: [PATCH 291/543] drm/amd/pm: fix runpm hang when amdgpu loaded prior to sound driver Current RUNPM mechanism relies on PMFW to master the timing for BACO in/exit. And that needs cooperation from sound driver for dstate change notification for function 1(audio). Otherwise(on sound driver missing), BACO cannot be kicked in correctly and hang will be observed on RUNPM exit. By switching back to legacy message way on sound driver missing, we are able to fix the runpm hang observed for the scenario below: amdgpu driver loaded -> runpm suspend kicked -> sound driver loaded Signed-off-by: Evan Quan Reported-and-tested-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Lijo Lazar Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 24 +++++++++++++++++-- .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 21 ++++++++++++++++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 2 ++ 4 files changed, 47 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index aec7cb21cc0f..b1ad451af06b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2274,7 +2274,27 @@ static int navi10_baco_enter(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - if (adev->in_runpm) + /* + * This aims the case below: + * amdgpu driver loaded -> runpm suspend kicked -> sound driver loaded + * + * For NAVI10 and later ASICs, we rely on PMFW to handle the runpm. To + * make that possible, PMFW needs to acknowledge the dstate transition + * process for both gfx(function 0) and audio(function 1) function of + * the ASIC. + * + * The PCI device's initial runpm status is RUNPM_SUSPENDED. So as the + * device representing the audio function of the ASIC. And that means + * even if the sound driver(snd_hda_intel) was not loaded yet, it's still + * possible runpm suspend kicked on the ASIC. However without the dstate + * transition notification from audio function, pmfw cannot handle the + * BACO in/exit correctly. And that will cause driver hang on runpm + * resuming. + * + * To address this, we revert to legacy message way(driver masters the + * timing for BACO in/exit) on sound driver missing. + */ + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_BACO); else return smu_v11_0_baco_enter(smu); @@ -2284,7 +2304,7 @@ static int navi10_baco_exit(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - if (adev->in_runpm) { + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { /* Wait for PMFW handling for the Dstate change */ msleep(10); return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index d7519688065f..ca57221e3962 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2189,7 +2189,7 @@ static int sienna_cichlid_baco_enter(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - if (adev->in_runpm) + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_BACO); else return smu_v11_0_baco_enter(smu); @@ -2199,7 +2199,7 @@ static int sienna_cichlid_baco_exit(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - if (adev->in_runpm) { + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { /* Wait for PMFW handling for the Dstate change */ msleep(10); return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 66711ab24c15..843d2cbfc71d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1053,3 +1053,24 @@ int smu_cmn_set_mp1_state(struct smu_context *smu, return ret; } + +bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev) +{ + struct pci_dev *p = NULL; + bool snd_driver_loaded; + + /* + * If the ASIC comes with no audio function, we always assume + * it is "enabled". + */ + p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), + adev->pdev->bus->number, 1); + if (!p) + return true; + + snd_driver_loaded = pci_is_enabled(p) ? true : false; + + pci_dev_put(p); + + return snd_driver_loaded; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index 4054d9493e77..beea03810bca 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -123,5 +123,7 @@ static inline void smu_cmn_get_sysfs_buf(char **buf, int *offset) *buf -= *offset; } +bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev); + #endif #endif From 114518ff3b30a3f0611f384fb58e0a968fdf7f5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 9 Sep 2021 18:56:28 +0200 Subject: [PATCH 292/543] drm/amdgpu: Drop inline from amdgpu_ras_eeprom_max_record_count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was unusual; normally, inline functions are declared static as well, and defined in a header file if used by multiple compilation units. The latter would be more involved in this case, so just drop the inline declaration for now. Fixes compile failure building for ppc64le on RHEL 8: In file included from ../drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h:32, from ../drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:33: ../drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c: In function ‘amdgpu_ras_recovery_init’: ../drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h:90:17: error: inlining failed in call to ‘always_inline’ ‘amdgpu_ras_eeprom_max_record_count’: function body not available 90 | inline uint32_t amdgpu_ras_eeprom_max_record_count(void); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:1985:34: note: called from here 1985 | max_eeprom_records_len = amdgpu_ras_eeprom_max_record_count(); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: c84d46707ebb "drm/amdgpu: validate bad page threshold in ras(v3)" Reviewed-by: Lyude Paul Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index dc44c946a244..98732518543e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -757,7 +757,7 @@ Out: return res; } -inline uint32_t amdgpu_ras_eeprom_max_record_count(void) +uint32_t amdgpu_ras_eeprom_max_record_count(void) { return RAS_MAX_RECORD_COUNT; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index f95fc61b3021..6bb00578bfbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -120,7 +120,7 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, struct eeprom_table_record *records, const u32 num); -inline uint32_t amdgpu_ras_eeprom_max_record_count(void); +uint32_t amdgpu_ras_eeprom_max_record_count(void); void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control); From b287e4946873d706f94d95bdb2bf099dc8902181 Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Mon, 13 Sep 2021 10:34:11 +0200 Subject: [PATCH 293/543] drm/amdgpu: Demote TMZ unsupported log message from warning to info As the user cannot do anything about the unsupported Trusted Memory Zone (TMZ) feature, do not warn about it, but make it informational, so demote the log level from warning to info. Signed-off-by: Paul Menzel Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c7797eac83c3..9ff600a38559 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -598,7 +598,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) break; default: adev->gmc.tmz_enabled = false; - dev_warn(adev->dev, + dev_info(adev->dev, "Trusted Memory Zone (TMZ) feature not supported\n"); break; } From cd51a57eb59fd56f3fe7ce9cadef444451bcf804 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Fri, 10 Sep 2021 15:37:41 +0000 Subject: [PATCH 294/543] amd/display: enable panel orientation quirks This patch allows panel orientation quirks from DRM core to be used. They attach a DRM connector property "panel orientation" which indicates in which direction the panel has been mounted. Some machines have the internal screen mounted with a rotation. Since the panel orientation quirks need the native mode from the EDID, check for it in amdgpu_dm_connector_ddc_get_modes. Signed-off-by: Simon Ser Cc: Alex Deucher Cc: Harry Wentland Cc: Nicholas Kazlauskas Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5975233dfa77..00b10879baf7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7528,6 +7528,32 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder, } } +static void amdgpu_set_panel_orientation(struct drm_connector *connector) +{ + struct drm_encoder *encoder; + struct amdgpu_encoder *amdgpu_encoder; + const struct drm_display_mode *native_mode; + + if (connector->connector_type != DRM_MODE_CONNECTOR_eDP && + connector->connector_type != DRM_MODE_CONNECTOR_LVDS) + return; + + encoder = amdgpu_dm_connector_to_encoder(connector); + if (!encoder) + return; + + amdgpu_encoder = to_amdgpu_encoder(encoder); + + native_mode = &amdgpu_encoder->native_mode; + if (native_mode->hdisplay == 0 || native_mode->vdisplay == 0) + return; + + drm_connector_set_panel_orientation_with_quirk(connector, + DRM_MODE_PANEL_ORIENTATION_UNKNOWN, + native_mode->hdisplay, + native_mode->vdisplay); +} + static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector, struct edid *edid) { @@ -7556,6 +7582,8 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector, * restored here. */ amdgpu_dm_update_freesync_caps(connector, edid); + + amdgpu_set_panel_orientation(connector); } else { amdgpu_dm_connector->num_modes = 0; } From a70939851f9ced298dc7d523374b8c4d05239caf Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 13 Sep 2021 14:56:44 -0400 Subject: [PATCH 295/543] drm/amd/display: Fix white screen page fault for gpuvm [Why] The "base_addr_is_mc_addr" field was added for dcn3.1 support but pa_config was never updated to set it to false. Uninitialized memory causes it to be set to true which results in address mistranslation and white screen. [How] Use memset to ensure all fields are initialized to 0 by default. Fixes: 64b1d0e8d500 ("drm/amd/display: Add DCN3.1 HWSEQ") Signed-off-by: Nicholas Kazlauskas Acked-by: Alex Deucher Acked-by: Aaron Liu Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 00b10879baf7..66c799f5c7cf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -998,6 +998,8 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ uint32_t agp_base, agp_bot, agp_top; PHYSICAL_ADDRESS_LOC page_table_start, page_table_end, page_table_base; + memset(pa_config, 0, sizeof(*pa_config)); + logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18; pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); From 92554cbe0a36494f6dd760bc25ce5e5cdc60fc47 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Sep 2021 10:50:47 -0400 Subject: [PATCH 296/543] drm/amdgpu/display: add a proper license to dc_link_dp.c Was missing. Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index ceda98a63ff3..f6dbc5a74757 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1,4 +1,26 @@ -/* Copyright 2015 Advanced Micro Devices, Inc. */ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + */ #include "dm_services.h" #include "dc.h" #include "dc_link_dp.h" From e22e509c1cd90b48ae31099905418de74515e56f Mon Sep 17 00:00:00 2001 From: Alim Akhtar Date: Wed, 15 Sep 2021 20:18:44 +0900 Subject: [PATCH 297/543] dt-bindings: ufs: Add bindings for Samsung ufs host This patch adds DT bindings for Samsung ufs hci Signed-off-by: Alim Akhtar Signed-off-by: Chanho Park Link: https://lore.kernel.org/r/20210915111844.42752-1-chanho61.park@samsung.com Signed-off-by: Rob Herring --- .../bindings/ufs/samsung,exynos-ufs.yaml | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml diff --git a/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml b/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml new file mode 100644 index 000000000000..b9ca8ef4f2be --- /dev/null +++ b/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/ufs/samsung,exynos-ufs.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Samsung SoC series UFS host controller Device Tree Bindings + +maintainers: + - Alim Akhtar + +description: | + Each Samsung UFS host controller instance should have its own node. + This binding define Samsung specific binding other then what is used + in the common ufshcd bindings + [1] Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt + +properties: + + compatible: + enum: + - samsung,exynos7-ufs + + reg: + items: + - description: HCI register + - description: vendor specific register + - description: unipro register + - description: UFS protector register + + reg-names: + items: + - const: hci + - const: vs_hci + - const: unipro + - const: ufsp + + clocks: + items: + - description: ufs link core clock + - description: unipro main clock + + clock-names: + items: + - const: core_clk + - const: sclk_unipro_main + + interrupts: + maxItems: 1 + + phys: + maxItems: 1 + + phy-names: + const: ufs-phy + +required: + - compatible + - reg + - interrupts + - phys + - phy-names + - clocks + - clock-names + +additionalProperties: false + +examples: + - | + #include + #include + + ufs: ufs@15570000 { + compatible = "samsung,exynos7-ufs"; + reg = <0x15570000 0x100>, + <0x15570100 0x100>, + <0x15571000 0x200>, + <0x15572000 0x300>; + reg-names = "hci", "vs_hci", "unipro", "ufsp"; + interrupts = ; + clocks = <&clock_fsys1 ACLK_UFS20_LINK>, + <&clock_fsys1 SCLK_UFSUNIPRO20_USER>; + clock-names = "core_clk", "sclk_unipro_main"; + pinctrl-names = "default"; + pinctrl-0 = <&ufs_rst_n &ufs_refclk_out>; + phys = <&ufs_phy>; + phy-names = "ufs-phy"; + }; +... From e35ac9d0b56e9efefaeeb84b635ea26c2839ea86 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 9 Sep 2021 17:53:56 +0100 Subject: [PATCH 298/543] arm64/sve: Use correct size when reinitialising SVE state When we need a buffer for SVE register state we call sve_alloc() to make sure that one is there. In order to avoid repeated allocations and frees we keep the buffer around unless we change vector length and just memset() it to ensure a clean register state. The function that deals with this takes the task to operate on as an argument, however in the case where we do a memset() we initialise using the SVE state size for the current task rather than the task passed as an argument. This is only an issue in the case where we are setting the register state for a task via ptrace and the task being configured has a different vector length to the task tracing it. In the case where the buffer is larger in the traced process we will leak old state from the traced process to itself, in the case where the buffer is smaller in the traced process we will overflow the buffer and corrupt memory. Fixes: bc0ee4760364 ("arm64/sve: Core task context handling") Cc: # 4.15.x Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210909165356.10675-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5a294f20e9de..ff4962750b3d 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -513,7 +513,7 @@ size_t sve_state_size(struct task_struct const *task) void sve_alloc(struct task_struct *task) { if (task->thread.sve_state) { - memset(task->thread.sve_state, 0, sve_state_size(current)); + memset(task->thread.sve_state, 0, sve_state_size(task)); return; } From 861dc4f52e6992c933998fb4dd03fefe1fa5ce27 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Wed, 1 Sep 2021 18:11:26 -0700 Subject: [PATCH 299/543] arm64/kernel: remove duplicate include in process.c Remove all but the first include of linux/sched.h from process.c Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20210902011126.29828-1-lv.ruyi@zte.com.cn Signed-off-by: Catalin Marinas --- arch/arm64/kernel/process.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 19100fe8f7e4..1a1213cca173 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include From 9fcb2e93f41c07a400885325e7dbdfceba6efaec Mon Sep 17 00:00:00 2001 From: Dan Li Date: Tue, 14 Sep 2021 17:44:02 +0800 Subject: [PATCH 300/543] arm64: Mark __stack_chk_guard as __ro_after_init __stack_chk_guard is setup once while init stage and never changed after that. Although the modification of this variable at runtime will usually cause the kernel to crash (so does the attacker), it should be marked as __ro_after_init, and it should not affect performance if it is placed in the ro_after_init section. Signed-off-by: Dan Li Acked-by: Mark Rutland Link: https://lore.kernel.org/r/1631612642-102881-1-git-send-email-ashimida@linux.alibaba.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 1a1213cca173..40adb8cdbf5a 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -57,7 +57,7 @@ #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include -unsigned long __stack_chk_guard __read_mostly; +unsigned long __stack_chk_guard __ro_after_init; EXPORT_SYMBOL(__stack_chk_guard); #endif From db71f8fb44956714249a526647c143bac5bb96a1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 16 Sep 2021 11:14:47 -0700 Subject: [PATCH 301/543] 3com 3c515: make it compile on 64-bit architectures This driver isn't enabled most places because of the ISA config dependency, but alpha still has it. And I think the 'Jensen' actually did have an ISA slot. However, it doesn't build cleanly, because the "Vortex bus master" code just casts the skb->data pointer to 'int': outl((int) (skb->data), ioaddr + Wn7_MasterAddr); which is all kinds of broken. Even on a good old traditional PC/AT it would be broken because the high bits will be random kernel address bits, but presumably the hardware ignores those bits. I mean, it's ISA. We're talking 16MB dma limits. The "good old days". Make the build happy with this kind of craziness by using the proper isa_virt_to_bus() handling that the full bus master code uses anyway (the Vortex bus mastering is a limited special case). Who knows, this might even work. Reported-by: Guenter Roeck Signed-off-by: Linus Torvalds --- drivers/net/ethernet/3com/3c515.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index 8d90fed5d33e..6f0ea2facea9 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -1050,7 +1050,7 @@ static netdev_tx_t corkscrew_start_xmit(struct sk_buff *skb, #ifdef VORTEX_BUS_MASTER if (vp->bus_master) { /* Set the bus-master controller to transfer the packet. */ - outl((int) (skb->data), ioaddr + Wn7_MasterAddr); + outl(isa_virt_to_bus(skb->data), ioaddr + Wn7_MasterAddr); outw((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen); vp->tx_skb = skb; outw(StartDMADown, ioaddr + EL3_CMD); From 35a3f4ef0ab543daa1725b0c963eb8c05e3376f8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 8 Sep 2021 22:00:33 -0700 Subject: [PATCH 302/543] alpha: Declare virt_to_phys and virt_to_bus parameter as pointer to volatile Some drivers pass a pointer to volatile data to virt_to_bus() and virt_to_phys(), and that works fine. One exception is alpha. This results in a number of compile errors such as drivers/net/wan/lmc/lmc_main.c: In function 'lmc_softreset': drivers/net/wan/lmc/lmc_main.c:1782:50: error: passing argument 1 of 'virt_to_bus' discards 'volatile' qualifier from pointer target type drivers/atm/ambassador.c: In function 'do_loader_command': drivers/atm/ambassador.c:1747:58: error: passing argument 1 of 'virt_to_bus' discards 'volatile' qualifier from pointer target type Declare the parameter of virt_to_phys and virt_to_bus as pointer to volatile to fix the problem. Signed-off-by: Guenter Roeck Acked-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/io.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index 0fab5ac90775..c9cb554fbe54 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -60,7 +60,7 @@ extern inline void set_hae(unsigned long new_hae) * Change virtual addresses to physical addresses and vv. */ #ifdef USE_48_BIT_KSEG -static inline unsigned long virt_to_phys(void *address) +static inline unsigned long virt_to_phys(volatile void *address) { return (unsigned long)address - IDENT_ADDR; } @@ -70,7 +70,7 @@ static inline void * phys_to_virt(unsigned long address) return (void *) (address + IDENT_ADDR); } #else -static inline unsigned long virt_to_phys(void *address) +static inline unsigned long virt_to_phys(volatile void *address) { unsigned long phys = (unsigned long)address; @@ -106,7 +106,7 @@ static inline void * phys_to_virt(unsigned long address) extern unsigned long __direct_map_base; extern unsigned long __direct_map_size; -static inline unsigned long __deprecated virt_to_bus(void *address) +static inline unsigned long __deprecated virt_to_bus(volatile void *address) { unsigned long phys = virt_to_phys(address); unsigned long bus = phys + __direct_map_base; From b60cee5bae733f49ba33840804c159a8e474cfda Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 9 Sep 2021 11:47:14 -0700 Subject: [PATCH 303/543] cpufreq: vexpress: Drop unused variable arm:allmodconfig fails to build with the following error. drivers/cpufreq/vexpress-spc-cpufreq.c:454:13: error: unused variable 'cur_cluster' Remove the unused variable. Fixes: bb8c26d9387f ("cpufreq: vexpress: Set CPUFREQ_IS_COOLING_DEV flag") Cc: Viresh Kumar Signed-off-by: Guenter Roeck Reviewed-by: Kees Cook Signed-off-by: Linus Torvalds --- drivers/cpufreq/vexpress-spc-cpufreq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index 284b6bd040b1..d295f405c4bb 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -451,7 +451,6 @@ static int ve_spc_cpufreq_init(struct cpufreq_policy *policy) static int ve_spc_cpufreq_exit(struct cpufreq_policy *policy) { struct device *cpu_dev; - int cur_cluster = cpu_to_cluster(policy->cpu); cpu_dev = get_cpu_device(policy->cpu); if (!cpu_dev) { From 39a71f712d8a13728febd8f3cb3f6db7e1fa7221 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 15 Sep 2021 15:28:06 -0600 Subject: [PATCH 304/543] selftests:kvm: fix get_warnings_count() ignoring fscanf() return warn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix get_warnings_count() to check fscanf() return value to get rid of the following warning: x86_64/mmio_warning_test.c: In function ‘get_warnings_count’: x86_64/mmio_warning_test.c:85:2: warning: ignoring return value of ‘fscanf’ declared with attribute ‘warn_unused_result’ [-Wunused-result] 85 | fscanf(f, "%d", &warnings); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Shuah Khan Acked-by: Paolo Bonzini Signed-off-by: Shuah Khan --- tools/testing/selftests/kvm/x86_64/mmio_warning_test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c index e6480fd5c4bd..8039e1eff938 100644 --- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c +++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c @@ -82,7 +82,8 @@ int get_warnings_count(void) FILE *f; f = popen("dmesg | grep \"WARNING:\" | wc -l", "r"); - fscanf(f, "%d", &warnings); + if (fscanf(f, "%d", &warnings) < 1) + warnings = 0; fclose(f); return warnings; From 3a4f0cc693cd3d80e66a255f0bff0e2c0461eef1 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 15 Sep 2021 15:28:07 -0600 Subject: [PATCH 305/543] selftests:kvm: fix get_trans_hugepagesz() ignoring fscanf() return warn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix get_trans_hugepagesz() to check fscanf() return value to get rid of the following warning: lib/test_util.c: In function ‘get_trans_hugepagesz’: lib/test_util.c:138:2: warning: ignoring return value of ‘fscanf’ declared with attribute ‘warn_unused_result’ [-Wunused-result] 138 | fscanf(f, "%ld", &size); | ^~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Shuah Khan Acked-by: Paolo Bonzini Signed-off-by: Shuah Khan --- tools/testing/selftests/kvm/lib/test_util.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index af1031fed97f..938cd423643e 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -129,13 +129,16 @@ size_t get_trans_hugepagesz(void) { size_t size; FILE *f; + int ret; TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r"); TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size"); - fscanf(f, "%ld", &size); + ret = fscanf(f, "%ld", &size); + ret = fscanf(f, "%ld", &size); + TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size"); fclose(f); return size; From 20175d5eac5bb94a7a3719ef275337fc9abf26ac Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 15 Sep 2021 15:28:08 -0600 Subject: [PATCH 306/543] selftests: kvm: move get_run_delay() into lib/test_util get_run_delay() is defined static in xen_shinfo_test and steal_time test. Move it to lib and remove code duplication. Signed-off-by: Shuah Khan Acked-by: Paolo Bonzini Signed-off-by: Shuah Khan --- tools/testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 15 +++++++++++++++ tools/testing/selftests/kvm/steal_time.c | 15 --------------- .../selftests/kvm/x86_64/xen_shinfo_test.c | 15 --------------- 4 files changed, 16 insertions(+), 30 deletions(-) diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index d79be15dd3d2..c7409b9b4e5b 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -102,6 +102,7 @@ const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i); size_t get_backing_src_pagesz(uint32_t i); void backing_src_help(void); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); +long get_run_delay(void); /* * Whether or not the given source type is shared memory (as opposed to diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 938cd423643e..f80dd38a38b2 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "linux/kernel.h" @@ -303,3 +304,17 @@ enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name) TEST_FAIL("Unknown backing src type: %s", type_name); return -1; } + +long get_run_delay(void) +{ + char path[64]; + long val[2]; + FILE *fp; + + sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid)); + fp = fopen(path, "r"); + fscanf(fp, "%ld %ld ", &val[0], &val[1]); + fclose(fp); + + return val[1]; +} diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index ecec30865a74..51fe95a5c36a 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include @@ -217,20 +216,6 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid) #endif -static long get_run_delay(void) -{ - char path[64]; - long val[2]; - FILE *fp; - - sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid)); - fp = fopen(path, "r"); - fscanf(fp, "%ld %ld ", &val[0], &val[1]); - fclose(fp); - - return val[1]; -} - static void *do_steal_time(void *arg) { struct timespec ts, stop; diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 117bf49a3d79..eda0d2a51224 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -14,7 +14,6 @@ #include #include #include -#include #define VCPU_ID 5 @@ -98,20 +97,6 @@ static void guest_code(void) GUEST_DONE(); } -static long get_run_delay(void) -{ - char path[64]; - long val[2]; - FILE *fp; - - sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid)); - fp = fopen(path, "r"); - fscanf(fp, "%ld %ld ", &val[0], &val[1]); - fclose(fp); - - return val[1]; -} - static int cmp_timespec(struct timespec *a, struct timespec *b) { if (a->tv_sec > b->tv_sec) From f5013d412a43662b63f3d5f3a804d63213acd471 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 15 Sep 2021 15:28:09 -0600 Subject: [PATCH 307/543] selftests: kvm: fix get_run_delay() ignoring fscanf() return warn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix get_run_delay() to check fscanf() return value to get rid of the following warning. When fscanf() fails return MIN_RUN_DELAY_NS from get_run_delay(). Move MIN_RUN_DELAY_NS from steal_time.c to test_util.h so get_run_delay() and steal_time.c can use it. lib/test_util.c: In function ‘get_run_delay’: lib/test_util.c:316:2: warning: ignoring return value of ‘fscanf’ declared with attribute ‘warn_unused_result’ [-Wunused-result] 316 | fscanf(fp, "%ld %ld ", &val[0], &val[1]); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Shuah Khan Acked-by: Paolo Bonzini Signed-off-by: Shuah Khan --- tools/testing/selftests/kvm/include/test_util.h | 2 ++ tools/testing/selftests/kvm/lib/test_util.c | 4 +++- tools/testing/selftests/kvm/steal_time.c | 1 - 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index c7409b9b4e5b..451fed5ce8e7 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -95,6 +95,8 @@ struct vm_mem_backing_src_alias { uint32_t flag; }; +#define MIN_RUN_DELAY_NS 200000UL + bool thp_configured(void); size_t get_trans_hugepagesz(void); size_t get_def_hugetlb_pagesz(void); diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index f80dd38a38b2..a9107bfae402 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -313,7 +313,9 @@ long get_run_delay(void) sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid)); fp = fopen(path, "r"); - fscanf(fp, "%ld %ld ", &val[0], &val[1]); + /* Return MIN_RUN_DELAY_NS upon failure just to be safe */ + if (fscanf(fp, "%ld %ld ", &val[0], &val[1]) < 2) + val[1] = MIN_RUN_DELAY_NS; fclose(fp); return val[1]; diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index 51fe95a5c36a..2172d65b85e4 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -19,7 +19,6 @@ #define NR_VCPUS 4 #define ST_GPA_BASE (1 << 30) -#define MIN_RUN_DELAY_NS 200000UL static void *st_gva[NR_VCPUS]; static uint64_t guest_stolen_time[NR_VCPUS]; From 040b8907ccf1c78d020aca29800036565d761d73 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Apr 2020 23:31:24 +0200 Subject: [PATCH 308/543] drm/rockchip: cdn-dp-core: Make cdn_dp_core_resume __maybe_unused With the new static annotation, the compiler warns when the functions are actually unused: drivers/gpu/drm/rockchip/cdn-dp-core.c:1123:12: error: 'cdn_dp_resume' defined but not used [-Werror=unused-function] 1123 | static int cdn_dp_resume(struct device *dev) | ^~~~~~~~~~~~~ Mark them __maybe_unused to suppress that warning as well. [ Not so 'new' static annotations any more, and I removed the part of the patch that added __maybe_unused to cdn_dp_suspend(), because it's used by the shutdown/remove code. So only the resume function ends up possibly unused if CONFIG_PM isn't set - Linus ] Fixes: 7c49abb4c2f8 ("drm/rockchip: cdn-dp-core: Make cdn_dp_core_suspend/resume static") Signed-off-by: Arnd Bergmann Reviewed-by: Enric Balletbo i Serra Signed-off-by: Linus Torvalds --- drivers/gpu/drm/rockchip/cdn-dp-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c index 8ab3247dbc4a..13c6b857158f 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c @@ -1123,7 +1123,7 @@ static int cdn_dp_suspend(struct device *dev) return ret; } -static int cdn_dp_resume(struct device *dev) +static __maybe_unused int cdn_dp_resume(struct device *dev) { struct cdn_dp_device *dp = dev_get_drvdata(dev); From 3c0d2a46c0141913dc6fd126c57d0615677d946e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 8 Sep 2021 20:57:43 -0700 Subject: [PATCH 309/543] net: 6pack: Fix tx timeout and slot time tx timeout and slot time are currently specified in units of HZ. On Alpha, HZ is defined as 1024. When building alpha:allmodconfig, this results in the following error message. drivers/net/hamradio/6pack.c: In function 'sixpack_open': drivers/net/hamradio/6pack.c:71:41: error: unsigned conversion from 'int' to 'unsigned char' changes value from '256' to '0' In the 6PACK protocol, tx timeout is specified in units of 10 ms and transmitted over the wire: https://www.linux-ax25.org/wiki/6PACK Defining a value dependent on HZ doesn't really make sense, and presumably comes from the (very historical) situation where HZ was originally 100. Note that the SIXP_SLOTTIME use explicitly is about 10ms granularity: mod_timer(&sp->tx_t, jiffies + ((when + 1) * HZ) / 100); and the SIXP_TXDELAY walue is sent as a byte over the wire. Signed-off-by: Guenter Roeck Signed-off-by: Linus Torvalds --- drivers/net/hamradio/6pack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 8fe8887d506a..6192244b304a 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -68,9 +68,9 @@ #define SIXP_DAMA_OFF 0 /* default level 2 parameters */ -#define SIXP_TXDELAY (HZ/4) /* in 1 s */ +#define SIXP_TXDELAY 25 /* 250 ms */ #define SIXP_PERSIST 50 /* in 256ths */ -#define SIXP_SLOTTIME (HZ/10) /* in 1 s */ +#define SIXP_SLOTTIME 10 /* 100 ms */ #define SIXP_INIT_RESYNC_TIMEOUT (3*HZ/2) /* in 1 s */ #define SIXP_RESYNC_TIMEOUT 5*HZ /* in 1 s */ From e9a9970bf520c99e530d8f1fa5b5c22671fad4ef Mon Sep 17 00:00:00 2001 From: Russ Weight Date: Thu, 16 Sep 2021 14:07:33 -0700 Subject: [PATCH 310/543] fpga: dfl: Avoid reads to AFU CSRs during enumeration CSR address space for Accelerator Functional Units (AFU) is not available during the early Device Feature List (DFL) enumeration. Early access to this space results in invalid data and port errors. This change adds a condition to prevent an early read from the AFU CSR space. Fixes: 1604986c3e6b ("fpga: dfl: expose feature revision from struct dfl_device") Cc: stable@vger.kernel.org Signed-off-by: Russ Weight Signed-off-by: Moritz Fischer --- drivers/fpga/dfl.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c index c99b78ee008a..f86666cf2c6a 100644 --- a/drivers/fpga/dfl.c +++ b/drivers/fpga/dfl.c @@ -1019,16 +1019,18 @@ create_feature_instance(struct build_feature_devs_info *binfo, { unsigned int irq_base, nr_irqs; struct dfl_feature_info *finfo; + u8 revision = 0; int ret; - u8 revision; u64 v; - v = readq(binfo->ioaddr + ofst); - revision = FIELD_GET(DFH_REVISION, v); + if (fid != FEATURE_ID_AFU) { + v = readq(binfo->ioaddr + ofst); + revision = FIELD_GET(DFH_REVISION, v); - /* read feature size and id if inputs are invalid */ - size = size ? size : feature_size(v); - fid = fid ? fid : feature_id(v); + /* read feature size and id if inputs are invalid */ + size = size ? size : feature_size(v); + fid = fid ? fid : feature_id(v); + } if (binfo->len - ofst < size) return -EINVAL; From a11de92523f75a8140cf8eea3ce9b628f7a3cc77 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 16 Sep 2021 11:55:01 +0200 Subject: [PATCH 311/543] dt-bindings: net: dsa: sja1105: update nxp,sja1105.yaml reference Changeset 62568bdbe6f6 ("dt-bindings: net: dsa: sja1105: convert to YAML schema") renamed: Documentation/devicetree/bindings/net/dsa/sja1105.txt to: Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml. Update its cross-reference accordingly. Fixes: 62568bdbe6f6 ("dt-bindings: net: dsa: sja1105: convert to YAML schema") Reviewed-by: Vladimir Oltean Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/994ce6c6358746ff600459822b9f6e336db933c9.1631785820.git.mchehab+huawei@kernel.org --- Documentation/networking/dsa/sja1105.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst index 564caeebe2b2..29b1bae0cf00 100644 --- a/Documentation/networking/dsa/sja1105.rst +++ b/Documentation/networking/dsa/sja1105.rst @@ -296,7 +296,7 @@ not available. Device Tree bindings and board design ===================================== -This section references ``Documentation/devicetree/bindings/net/dsa/sja1105.txt`` +This section references ``Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml`` and aims to showcase some potential switch caveats. RMII PHY role and out-of-band signaling From c8087adc8865c76500dbc072a46b61d35f6c908b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 16 Sep 2021 11:55:02 +0200 Subject: [PATCH 312/543] dt-bindings: arm: mediatek: mmsys: update mediatek,mmsys.yaml reference Changeset cba3c40d1f97 ("dt-bindings: arm: mediatek: mmsys: convert to YAML format") renamed: Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt to: Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml. Update its cross-reference accordingly. Fixes: cba3c40d1f97 ("dt-bindings: arm: mediatek: mmsys: convert to YAML format") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/a87eb079a73e8ab41cdf6e40e80b1d1f868da6bd.1631785820.git.mchehab+huawei@kernel.org --- .../devicetree/bindings/display/mediatek/mediatek,disp.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt index fbb59c9ddda6..78044c340e20 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt @@ -9,7 +9,7 @@ function block. All DISP device tree nodes must be siblings to the central MMSYS_CONFIG node. For a description of the MMSYS_CONFIG binding, see -Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt. +Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml. DISP function blocks ==================== From e8f69b16ee776da88589b5271e3f46020efc8f6c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Sep 2021 12:12:04 +0200 Subject: [PATCH 313/543] net: hso: fix muxed tty registration If resource allocation and registration fail for a muxed tty device (e.g. if there are no more minor numbers) the driver should not try to deregister the never-registered (or already-deregistered) tty. Fix up the error handling to avoid dereferencing a NULL pointer when attempting to remove the character device. Fixes: 72dc1c096c70 ("HSO: add option hso driver") Cc: stable@vger.kernel.org # 2.6.27 Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index a57251ba5991..f97813a4e8d1 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2719,14 +2719,14 @@ struct hso_device *hso_create_mux_serial_device(struct usb_interface *interface, serial = kzalloc(sizeof(*serial), GFP_KERNEL); if (!serial) - goto exit; + goto err_free_dev; hso_dev->port_data.dev_serial = serial; serial->parent = hso_dev; if (hso_serial_common_create (serial, 1, CTRL_URB_RX_SIZE, CTRL_URB_TX_SIZE)) - goto exit; + goto err_free_serial; serial->tx_data_length--; serial->write_data = hso_mux_serial_write_data; @@ -2742,11 +2742,9 @@ struct hso_device *hso_create_mux_serial_device(struct usb_interface *interface, /* done, return it */ return hso_dev; -exit: - if (serial) { - tty_unregister_device(tty_drv, serial->minor); - kfree(serial); - } +err_free_serial: + kfree(serial); +err_free_dev: kfree(hso_dev); return NULL; From 23ca067b3295d935835b71f743235f9e5ab31cc5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 1 Sep 2021 10:44:03 +0200 Subject: [PATCH 314/543] mm: Fully initialize invalidate_lock, amend lock class later The function __init_rwsem() is not part of the official API, it just a helper function used by init_rwsem(). Changing the lock's class and name should be done by using lockdep_set_class_and_name() after the has been fully initialized. The overhead of the additional class struct and setting it twice is negligible and it works across all locks. Fully initialize the lock with init_rwsem() and then set the custom class and name for the lock. Fixes: 730633f0b7f95 ("mm: Protect operations adding pages to page cache with invalidate_lock") Link: https://lore.kernel.org/r/20210901084403.g4fezi23cixemlhh@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jan Kara --- fs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 37710ca863b5..ed0cab8a32db 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -190,8 +190,10 @@ int inode_init_always(struct super_block *sb, struct inode *inode) mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); mapping->private_data = NULL; mapping->writeback_index = 0; - __init_rwsem(&mapping->invalidate_lock, "mapping.invalidate_lock", - &sb->s_type->invalidate_lock_key); + init_rwsem(&mapping->invalidate_lock); + lockdep_set_class_and_name(&mapping->invalidate_lock, + &sb->s_type->invalidate_lock_key, + "mapping.invalidate_lock"); inode->i_private = NULL; inode->i_mapping = mapping; INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ From dc9660590d106bb58d145233fffca4efadad3655 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 16 Sep 2021 16:41:02 +0200 Subject: [PATCH 315/543] regulator: max14577: Revert "regulator: max14577: Add proper module aliases strings" This reverts commit 0da6736ecd10b45e535b100acd58df2db4c099d8. The MODULE_DEVICE_TABLE already creates proper alias. Having another MODULE_ALIAS causes the alias to be duplicated: $ modinfo max14577-regulator.ko alias: platform:max77836-regulator alias: platform:max14577-regulator description: Maxim 14577/77836 regulator driver alias: platform:max77836-regulator alias: platform:max14577-regulator Cc: Marek Szyprowski Fixes: 0da6736ecd10 ("regulator: max14577: Add proper module aliases strings") Signed-off-by: Krzysztof Kozlowski Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/20210916144102.120980-1-krzysztof.kozlowski@canonical.com Signed-off-by: Mark Brown --- drivers/regulator/max14577-regulator.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/regulator/max14577-regulator.c b/drivers/regulator/max14577-regulator.c index 1d78b455cc48..e34face736f4 100644 --- a/drivers/regulator/max14577-regulator.c +++ b/drivers/regulator/max14577-regulator.c @@ -269,5 +269,3 @@ module_exit(max14577_regulator_exit); MODULE_AUTHOR("Krzysztof Kozlowski "); MODULE_DESCRIPTION("Maxim 14577/77836 regulator driver"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:max14577-regulator"); -MODULE_ALIAS("platform:max77836-regulator"); From 3c9cfb5269f76d447dbadb67835368f3111a91d7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Sep 2021 14:17:35 +0300 Subject: [PATCH 316/543] net: update NXP copyright text NXP Legal insists that the following are not fine: - Saying "NXP Semiconductors" instead of "NXP", since the company's registered name is "NXP" - Putting a "(c)" sign in the copyright string - Putting a comma in the copyright string The only accepted copyright string format is "Copyright NXP". This patch changes the copyright headers in the networking files that were sent by me, or derived from code sent by me. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 2 +- drivers/net/dsa/ocelot/felix.h | 2 +- drivers/net/dsa/ocelot/felix_vsc9959.c | 2 +- drivers/net/dsa/sja1105/sja1105_clocking.c | 2 +- drivers/net/dsa/sja1105/sja1105_devlink.c | 2 +- drivers/net/dsa/sja1105/sja1105_flower.c | 2 +- drivers/net/dsa/sja1105/sja1105_mdio.c | 2 +- drivers/net/dsa/sja1105/sja1105_spi.c | 2 +- drivers/net/dsa/sja1105/sja1105_static_config.c | 2 +- drivers/net/dsa/sja1105/sja1105_static_config.h | 2 +- drivers/net/dsa/sja1105/sja1105_vl.c | 2 +- drivers/net/dsa/sja1105/sja1105_vl.h | 2 +- drivers/net/ethernet/freescale/enetc/enetc_ierb.c | 2 +- drivers/net/ethernet/freescale/enetc/enetc_ierb.h | 2 +- drivers/net/ethernet/mscc/ocelot_devlink.c | 2 +- drivers/net/ethernet/mscc/ocelot_mrp.c | 2 +- drivers/net/ethernet/mscc/ocelot_net.c | 2 +- drivers/net/pcs/pcs-xpcs-nxp.c | 2 +- include/linux/dsa/ocelot.h | 2 +- include/linux/packing.h | 2 +- lib/packing.c | 2 +- net/dsa/tag_ocelot.c | 2 +- net/dsa/tag_ocelot_8021q.c | 2 +- tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh | 2 +- 24 files changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 3656e67af789..a3a9636430d6 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2019-2021 NXP Semiconductors +/* Copyright 2019-2021 NXP * * This is an umbrella module for all network switches that are * register-compatible with Ocelot and that perform I/O to their host CPU diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 5854bab43327..54024b6f9498 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright 2019 NXP Semiconductors +/* Copyright 2019 NXP */ #ifndef _MSCC_FELIX_H #define _MSCC_FELIX_H diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index f966a253d1c7..9e2ac8e46619 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) /* Copyright 2017 Microsemi Corporation - * Copyright 2018-2019 NXP Semiconductors + * Copyright 2018-2019 NXP */ #include #include diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c index 387a1f2f161c..5bbf1707f2af 100644 --- a/drivers/net/dsa/sja1105/sja1105_clocking.c +++ b/drivers/net/dsa/sja1105/sja1105_clocking.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause -/* Copyright (c) 2016-2018, NXP Semiconductors +/* Copyright 2016-2018 NXP * Copyright (c) 2018-2019, Vladimir Oltean */ #include diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c index 05c7f4ca3b1a..0569ff066634 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018-2019, Vladimir Oltean - * Copyright 2020 NXP Semiconductors + * Copyright 2020 NXP */ #include "sja1105.h" diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c index 6c10ffa968ce..72b9b39b0989 100644 --- a/drivers/net/dsa/sja1105/sja1105_flower.c +++ b/drivers/net/dsa/sja1105/sja1105_flower.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2020, NXP Semiconductors +/* Copyright 2020 NXP */ #include "sja1105.h" #include "sja1105_vl.h" diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c index 705d3900e43a..215dd17ca790 100644 --- a/drivers/net/dsa/sja1105/sja1105_mdio.c +++ b/drivers/net/dsa/sja1105/sja1105_mdio.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2021, NXP Semiconductors +/* Copyright 2021 NXP */ #include #include diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c index d60a530d0272..d3c9ad6d39d4 100644 --- a/drivers/net/dsa/sja1105/sja1105_spi.c +++ b/drivers/net/dsa/sja1105/sja1105_spi.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause -/* Copyright (c) 2016-2018, NXP Semiconductors +/* Copyright 2016-2018 NXP * Copyright (c) 2018, Sensor-Technik Wiedemann GmbH * Copyright (c) 2018-2019, Vladimir Oltean */ diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c index 7a422ef4deb6..baba204ad62f 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.c +++ b/drivers/net/dsa/sja1105/sja1105_static_config.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause -/* Copyright (c) 2016-2018, NXP Semiconductors +/* Copyright 2016-2018 NXP * Copyright (c) 2018-2019, Vladimir Oltean */ #include "sja1105_static_config.h" diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h index bce0f5c03d0b..6a372d5f22ae 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.h +++ b/drivers/net/dsa/sja1105/sja1105_static_config.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause */ -/* Copyright (c) 2016-2018, NXP Semiconductors +/* Copyright 2016-2018 NXP * Copyright (c) 2018-2019, Vladimir Oltean */ #ifndef _SJA1105_STATIC_CONFIG_H diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c index ec7b65daec20..6802f4057cc0 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.c +++ b/drivers/net/dsa/sja1105/sja1105_vl.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2020, NXP Semiconductors +/* Copyright 2020 NXP */ #include #include diff --git a/drivers/net/dsa/sja1105/sja1105_vl.h b/drivers/net/dsa/sja1105/sja1105_vl.h index 173d78963fed..51fba0dce91a 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.h +++ b/drivers/net/dsa/sja1105/sja1105_vl.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright 2020, NXP Semiconductors +/* Copyright 2020 NXP */ #ifndef _SJA1105_VL_H #define _SJA1105_VL_H diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c index ee1468e3eaa3..91f02c505028 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) -/* Copyright 2021 NXP Semiconductors +/* Copyright 2021 NXP * * The Integrated Endpoint Register Block (IERB) is configured by pre-boot * software and is supposed to be to ENETC what a NVRAM is to a 'real' PCIe diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.h b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h index b3b774e0998a..c2ce47c4be9f 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ierb.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ -/* Copyright 2021 NXP Semiconductors */ +/* Copyright 2021 NXP */ #include #include diff --git a/drivers/net/ethernet/mscc/ocelot_devlink.c b/drivers/net/ethernet/mscc/ocelot_devlink.c index edafbd37d12c..b8737efd2a85 100644 --- a/drivers/net/ethernet/mscc/ocelot_devlink.c +++ b/drivers/net/ethernet/mscc/ocelot_devlink.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) -/* Copyright 2020-2021 NXP Semiconductors +/* Copyright 2020-2021 NXP */ #include #include "ocelot.h" diff --git a/drivers/net/ethernet/mscc/ocelot_mrp.c b/drivers/net/ethernet/mscc/ocelot_mrp.c index 08b481a93460..4b0941f09f71 100644 --- a/drivers/net/ethernet/mscc/ocelot_mrp.c +++ b/drivers/net/ethernet/mscc/ocelot_mrp.c @@ -2,7 +2,7 @@ /* Microsemi Ocelot Switch driver * * Copyright (c) 2017, 2019 Microsemi Corporation - * Copyright 2020-2021 NXP Semiconductors + * Copyright 2020-2021 NXP */ #include diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index c0c465a4a981..e54b9fb2a97a 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -5,7 +5,7 @@ * mscc_ocelot_switch_lib. * * Copyright (c) 2017, 2019 Microsemi Corporation - * Copyright 2020-2021 NXP Semiconductors + * Copyright 2020-2021 NXP */ #include diff --git a/drivers/net/pcs/pcs-xpcs-nxp.c b/drivers/net/pcs/pcs-xpcs-nxp.c index 984c9f7f16a8..d16fc58cd48d 100644 --- a/drivers/net/pcs/pcs-xpcs-nxp.c +++ b/drivers/net/pcs/pcs-xpcs-nxp.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2021 NXP Semiconductors +/* Copyright 2021 NXP */ #include #include "pcs-xpcs.h" diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index c6bc45ae5e03..435777a0073c 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 - * Copyright 2019-2021 NXP Semiconductors + * Copyright 2019-2021 NXP */ #ifndef _NET_DSA_TAG_OCELOT_H diff --git a/include/linux/packing.h b/include/linux/packing.h index 54667735cc67..8d6571feb95d 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright (c) 2016-2018, NXP Semiconductors + * Copyright 2016-2018 NXP * Copyright (c) 2018-2019, Vladimir Oltean */ #ifndef _LINUX_PACKING_H diff --git a/lib/packing.c b/lib/packing.c index 6ed72dccfdb5..9a72f4bbf0e2 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 -/* Copyright (c) 2016-2018, NXP Semiconductors +/* Copyright 2016-2018 NXP * Copyright (c) 2018-2019, Vladimir Oltean */ #include diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index d37ab98e7fe1..8025ed778d33 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2019 NXP Semiconductors +/* Copyright 2019 NXP */ #include #include diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 3038a257ba05..59072930cb02 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2020-2021 NXP Semiconductors +/* Copyright 2020-2021 NXP * * An implementation of the software-defined tag_8021q.c tagger format, which * also preserves full functionality under a vlan_filtering bridge. It does diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh index beee0d5646a6..f7d84549cc3e 100755 --- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -1,6 +1,6 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Copyright 2020 NXP Semiconductors +# Copyright 2020 NXP WAIT_TIME=1 NUM_NETIFS=4 From 02319bf15acf54004216e40ac9c171437f24be24 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 16 Sep 2021 14:33:35 -0700 Subject: [PATCH 317/543] net: dsa: bcm_sf2: Fix array overrun in bcm_sf2_num_active_ports() After d12e1c464988 ("net: dsa: b53: Set correct number of ports in the DSA struct") we stopped setting dsa_switch::num_ports to DSA_MAX_PORTS, which created an off by one error between the statically allocated bcm_sf2_priv::port_sts array (of size DSA_MAX_PORTS). When dsa_is_cpu_port() is used, we end-up accessing an out of bounds member and causing a NPD. Fix this by iterating with the appropriate port count using ds->num_ports. Fixes: d12e1c464988 ("net: dsa: b53: Set correct number of ports in the DSA struct") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 6ce9ec1283e0..b6c4b3adb171 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -68,7 +68,7 @@ static unsigned int bcm_sf2_num_active_ports(struct dsa_switch *ds) struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int port, count = 0; - for (port = 0; port < ARRAY_SIZE(priv->port_sts); port++) { + for (port = 0; port < ds->num_ports; port++) { if (dsa_is_cpu_port(ds, port)) continue; if (priv->port_sts[port].enabled) From a9b3043de47b7f8cbe38c36aee572526665b6315 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Sun, 12 Sep 2021 11:56:26 +0900 Subject: [PATCH 318/543] ksmbd: transport_rdma: Don't include rwlock.h directly rwlock.h specifically asks to not be included directly. In fact, the proper spinlock.h include isn't needed either, it comes with the huge pile that kthread.h ends up pulling in, so just drop it entirely. Signed-off-by: Mike Galbraith Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/transport_rdma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c index 52b2556e76b1..3a7fa23ba850 100644 --- a/fs/ksmbd/transport_rdma.c +++ b/fs/ksmbd/transport_rdma.c @@ -20,7 +20,6 @@ #define SUBMOD_NAME "smb_direct" #include -#include #include #include #include From 89c485c7a3ecbc2ebd568f9c9c2edf3a8cf7485b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 16 Sep 2021 17:24:54 -0400 Subject: [PATCH 319/543] NLM: Fix svcxdr_encode_owner() Dai Ngo reports that, since the XDR overhaul, the NLM server crashes when the TEST procedure wants to return NLM_DENIED. There is a bug in svcxdr_encode_owner() that none of our standard test cases found. Replace the open-coded function with a call to an appropriate pre-fabricated XDR helper. Reported-by: Dai Ngo Fixes: a6a63ca5652e ("lockd: Common NLM XDR helpers") Signed-off-by: Chuck Lever --- fs/lockd/svcxdr.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/lockd/svcxdr.h b/fs/lockd/svcxdr.h index c69a0bb76c94..4f1a451da5ba 100644 --- a/fs/lockd/svcxdr.h +++ b/fs/lockd/svcxdr.h @@ -134,18 +134,9 @@ svcxdr_decode_owner(struct xdr_stream *xdr, struct xdr_netobj *obj) static inline bool svcxdr_encode_owner(struct xdr_stream *xdr, const struct xdr_netobj *obj) { - unsigned int quadlen = XDR_QUADLEN(obj->len); - __be32 *p; - - if (xdr_stream_encode_u32(xdr, obj->len) < 0) + if (obj->len > XDR_MAX_NETOBJ) return false; - p = xdr_reserve_space(xdr, obj->len); - if (!p) - return false; - p[quadlen - 1] = 0; /* XDR pad */ - memcpy(p, obj->data, obj->len); - - return true; + return xdr_stream_encode_opaque(xdr, obj->data, obj->len) > 0; } #endif /* _LOCKD_SVCXDR_H_ */ From 02579b2ff8b0becfb51d85a975908ac4ab15fba8 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Thu, 16 Sep 2021 14:22:12 -0400 Subject: [PATCH 320/543] nfsd: back channel stuck in SEQ4_STATUS_CB_PATH_DOWN When the back channel enters SEQ4_STATUS_CB_PATH_DOWN state, the client recovers by sending BIND_CONN_TO_SESSION but the server fails to recover the back channel and leaves it as NFSD4_CB_DOWN. Fix by enhancing nfsd4_bind_conn_to_session to probe the back channel by calling nfsd4_probe_callback. Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4b6d60b46b0a..ba8bab17175e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3570,7 +3570,7 @@ static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_s } static __be32 nfsd4_match_existing_connection(struct svc_rqst *rqst, - struct nfsd4_session *session, u32 req) + struct nfsd4_session *session, u32 req, struct nfsd4_conn **conn) { struct nfs4_client *clp = session->se_client; struct svc_xprt *xpt = rqst->rq_xprt; @@ -3593,6 +3593,8 @@ static __be32 nfsd4_match_existing_connection(struct svc_rqst *rqst, else status = nfserr_inval; spin_unlock(&clp->cl_lock); + if (status == nfs_ok && conn) + *conn = c; return status; } @@ -3617,8 +3619,16 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(session->se_client, rqstp)) goto out; - status = nfsd4_match_existing_connection(rqstp, session, bcts->dir); - if (status == nfs_ok || status == nfserr_inval) + status = nfsd4_match_existing_connection(rqstp, session, + bcts->dir, &conn); + if (status == nfs_ok) { + if (bcts->dir == NFS4_CDFC4_FORE_OR_BOTH || + bcts->dir == NFS4_CDFC4_BACK) + conn->cn_flags |= NFS4_CDFC4_BACK; + nfsd4_probe_callback(session->se_client); + goto out; + } + if (status == nfserr_inval) goto out; status = nfsd4_map_bcts_dir(&bcts->dir); if (status) From bbc9a6eb5eec03dcafee266b19f56295e3b2aa8f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 17 Aug 2021 07:55:40 +0800 Subject: [PATCH 321/543] btrfs: replace BUG_ON() in btrfs_csum_one_bio() with proper error handling There is a BUG_ON() in btrfs_csum_one_bio() to catch code logic error. It has indeed caught several bugs during subpage development. But the BUG_ON() itself will bring down the whole system which is an overkill. Replace it with a WARN() and exit gracefully, so that it won't crash the whole system while we can still catch the code logic error. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file-item.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 2673c6ba7a4e..0b9401a5afd3 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -665,7 +665,18 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio, if (!ordered) { ordered = btrfs_lookup_ordered_extent(inode, offset); - BUG_ON(!ordered); /* Logic error */ + /* + * The bio range is not covered by any ordered extent, + * must be a code logic error. + */ + if (unlikely(!ordered)) { + WARN(1, KERN_WARNING + "no ordered extent for root %llu ino %llu offset %llu\n", + inode->root->root_key.objectid, + btrfs_ino(inode), offset); + kvfree(sums); + return BLK_STS_IOERR; + } } nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, From acbee9aff8aea4b9b66ab3d5cee6b8dbc153dc38 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 8 Sep 2021 16:29:26 +0100 Subject: [PATCH 322/543] btrfs: fix transaction handle leak after verity rollback failure During a verity rollback, if we fail to update the inode or delete the orphan, we abort the transaction and return without releasing our transaction handle. Fix that by releasing the handle. Fixes: 146054090b0859 ("btrfs: initial fsverity support") Fixes: 705242538ff348 ("btrfs: verity metadata orphan items") Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/verity.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c index 28d443d3ef93..4968535dfff0 100644 --- a/fs/btrfs/verity.c +++ b/fs/btrfs/verity.c @@ -451,7 +451,7 @@ static int del_orphan(struct btrfs_trans_handle *trans, struct btrfs_inode *inod */ static int rollback_verity(struct btrfs_inode *inode) { - struct btrfs_trans_handle *trans; + struct btrfs_trans_handle *trans = NULL; struct btrfs_root *root = inode->root; int ret; @@ -473,6 +473,7 @@ static int rollback_verity(struct btrfs_inode *inode) trans = btrfs_start_transaction(root, 2); if (IS_ERR(trans)) { ret = PTR_ERR(trans); + trans = NULL; btrfs_handle_fs_error(root->fs_info, ret, "failed to start transaction in verity rollback %llu", (u64)inode->vfs_inode.i_ino); @@ -490,8 +491,9 @@ static int rollback_verity(struct btrfs_inode *inode) btrfs_abort_transaction(trans, ret); goto out; } - btrfs_end_transaction(trans); out: + if (trans) + btrfs_end_transaction(trans); return ret; } From 6b225baababf1e3d41a4250e802cbd193e1343fb Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 8 Sep 2021 19:05:44 +0100 Subject: [PATCH 323/543] btrfs: fix mount failure due to past and transient device flush error When we get an error flushing one device, during a super block commit, we record the error in the device structure, in the field 'last_flush_error'. This is used to later check if we should error out the super block commit, depending on whether the number of flush errors is greater than or equals to the maximum tolerated device failures for a raid profile. However if we get a transient device flush error, unmount the filesystem and later try to mount it, we can fail the mount because we treat that past error as critical and consider the device is missing. Even if it's very likely that the error will happen again, as it's probably due to a hardware related problem, there may be cases where the error might not happen again. One example is during testing, and a test case like the new generic/648 from fstests always triggers this. The test cases generic/019 and generic/475 also trigger this scenario, but very sporadically. When this happens we get an error like this: $ mount /dev/sdc /mnt mount: /mnt wrong fs type, bad option, bad superblock on /dev/sdc, missing codepage or helper program, or other error. $ dmesg (...) [12918.886926] BTRFS warning (device sdc): chunk 13631488 missing 1 devices, max tolerance is 0 for writable mount [12918.888293] BTRFS warning (device sdc): writable mount is not allowed due to too many missing devices [12918.890853] BTRFS error (device sdc): open_ctree failed The failure happens because when btrfs_check_rw_degradable() is called at mount time, or at remount from RO to RW time, is sees a non zero value in a device's ->last_flush_error attribute, and therefore considers that the device is 'missing'. Fix this by setting a device's ->last_flush_error to zero when we close a device, making sure the error is not seen on the next mount attempt. We only need to track flush errors during the current mount, so that we never commit a super block if such errors happened. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 464485aa7318..2ec3b8ac8fa3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1137,6 +1137,19 @@ static void btrfs_close_one_device(struct btrfs_device *device) atomic_set(&device->dev_stats_ccnt, 0); extent_io_tree_release(&device->alloc_state); + /* + * Reset the flush error record. We might have a transient flush error + * in this mount, and if so we aborted the current transaction and set + * the fs to an error state, guaranteeing no super blocks can be further + * committed. However that error might be transient and if we unmount the + * filesystem and mount it again, we should allow the mount to succeed + * (btrfs_check_rw_degradable() should not fail) - if after mounting the + * filesystem again we still get flush errors, then we will again abort + * any transaction and set the error state, guaranteeing no commits of + * unsafe super blocks. + */ + device->last_flush_error = 0; + /* Verify the device is back in a pristine state */ ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)); ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)); From 0619b7901473c380abc05d45cf9c70bee0707db3 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 16 Sep 2021 20:43:29 +0800 Subject: [PATCH 324/543] btrfs: prevent __btrfs_dump_space_info() to underflow its free space It's not uncommon where __btrfs_dump_space_info() gets called under over-commit situations. In that case free space would underflow as total allocated space is not enough to handle all the over-committed space. Such underflow values can sometimes cause confusion for users enabled enospc_debug mount option, and takes some seconds for developers to convert the underflow value to signed result. Just output the free space as s64 to avoid such problem. Reported-by: Eli V Link: https://lore.kernel.org/linux-btrfs/CAJtFHUSy4zgyhf-4d9T+KdJp9w=UgzC2A0V=VtmaeEpcGgm1-Q@mail.gmail.com/ CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/space-info.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 5ada02e0e629..aa5be0b24987 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -414,9 +414,10 @@ static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info, { lockdep_assert_held(&info->lock); - btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull", + /* The free space could be negative in case of overcommit */ + btrfs_info(fs_info, "space_info %llu has %lld free, is %sfull", info->flags, - info->total_bytes - btrfs_space_info_used(info, true), + (s64)(info->total_bytes - btrfs_space_info_used(info, true)), info->full ? "" : "not "); btrfs_info(fs_info, "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu zone_unusable=%llu", From 31c8025fac3d8bbff7ce4602338d88efc2d7972c Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Fri, 17 Sep 2021 14:14:23 +0100 Subject: [PATCH 325/543] of: restricted dma: Fix condition for rmem init of_dma_set_restricted_buffer fails to handle negative return values from of_property_count_elems_of_size, e.g. when the property does not exist. This results in an attempt to assign a non-existent reserved memory region to the device and a warning being printed. Fix the condition to take negative values into account. Fixes: f3cfd136aef0 ("of: restricted dma: Don't fail device probe on rmem init failure") Cc: Will Deacon Signed-off-by: David Brazdil Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210917131423.2760155-1-dbrazdil@google.com Signed-off-by: Rob Herring --- drivers/of/device.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/of/device.c b/drivers/of/device.c index 5b043ee30824..b0800c260f64 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -85,7 +85,11 @@ of_dma_set_restricted_buffer(struct device *dev, struct device_node *np) break; } - if (i != count && of_reserved_mem_device_init_by_idx(dev, of_node, i)) + /* + * Attempt to initialize a restricted-dma-pool region if one was found. + * Note that count can hold a negative error code. + */ + if (i < count && of_reserved_mem_device_init_by_idx(dev, of_node, i)) dev_warn(dev, "failed to initialise \"restricted-dma-pool\" memory node\n"); } From 55c21d57eafb7b379bb7b3e93baf9ca2695895b0 Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Sun, 12 Sep 2021 18:51:20 +0200 Subject: [PATCH 326/543] dt-bindings: arm: Fix Toradex compatible typo Fix board compatible typo reported by dtbs_check. Fixes: f4d1577e9bc6 ("dt-bindings: arm: Convert Tegra board/soc bindings to json-schema") Signed-off-by: David Heidelberg Link: https://lore.kernel.org/r/20210912165120.188490-1-david@ixit.cz Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/arm/tegra.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/arm/tegra.yaml b/Documentation/devicetree/bindings/arm/tegra.yaml index b962fa6d649c..d79d36ac0c44 100644 --- a/Documentation/devicetree/bindings/arm/tegra.yaml +++ b/Documentation/devicetree/bindings/arm/tegra.yaml @@ -54,7 +54,7 @@ properties: - const: toradex,apalis_t30 - const: nvidia,tegra30 - items: - - const: toradex,apalis_t30-eval-v1.1 + - const: toradex,apalis_t30-v1.1-eval - const: toradex,apalis_t30-eval - const: toradex,apalis_t30-v1.1 - const: toradex,apalis_t30 From e3fc065682ebbbd15b0ce0036800f4acbf765d46 Mon Sep 17 00:00:00 2001 From: Rohith Surabattula Date: Fri, 17 Sep 2021 18:14:26 +0000 Subject: [PATCH 327/543] cifs: Deferred close performance improvements During unlink/rename instead of closing all the deferred handles under tcon, close only handles under the requested dentry. Signed-off-by: Rohith Surabattula Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 3 +++ fs/cifs/inode.c | 6 +++--- fs/cifs/misc.c | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 54966c1a8eb6..d0f85b666662 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -267,6 +267,9 @@ extern void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode); extern void cifs_close_all_deferred_files(struct cifs_tcon *cifs_tcon); +extern void cifs_close_deferred_file_under_dentry(struct cifs_tcon *cifs_tcon, + const char *path); + extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx); extern void cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 19af2d0ec8d5..82848412ad85 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1624,7 +1624,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) goto unlink_out; } - cifs_close_deferred_file(CIFS_I(inode)); + cifs_close_deferred_file_under_dentry(tcon, full_path); if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { rc = CIFSPOSIXDelFile(xid, tcon, full_path, @@ -2113,9 +2113,9 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir, goto cifs_rename_exit; } - cifs_close_deferred_file(CIFS_I(d_inode(source_dentry))); + cifs_close_deferred_file_under_dentry(tcon, from_name); if (d_inode(target_dentry) != NULL) - cifs_close_deferred_file(CIFS_I(d_inode(target_dentry))); + cifs_close_deferred_file_under_dentry(tcon, to_name); rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, to_name); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index f4313935e734..05138f92d905 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -780,6 +780,43 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon) kfree(tmp_list); } } +void +cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, const char *path) +{ + struct cifsFileInfo *cfile; + struct list_head *tmp; + struct file_list *tmp_list, *tmp_next_list; + struct list_head file_head; + void *page; + const char *full_path; + + INIT_LIST_HEAD(&file_head); + page = alloc_dentry_path(); + spin_lock(&tcon->open_file_lock); + list_for_each(tmp, &tcon->openFileList) { + cfile = list_entry(tmp, struct cifsFileInfo, tlist); + full_path = build_path_from_dentry(cfile->dentry, page); + if (strstr(full_path, path)) { + if (delayed_work_pending(&cfile->deferred)) { + if (cancel_delayed_work(&cfile->deferred)) { + tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); + if (tmp_list == NULL) + break; + tmp_list->cfile = cfile; + list_add_tail(&tmp_list->list, &file_head); + } + } + } + } + spin_unlock(&tcon->open_file_lock); + + list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) { + _cifsFileInfo_put(tmp_list->cfile, true, false); + list_del(&tmp_list->list); + kfree(tmp_list); + } + free_dentry_path(page); +} /* parses DFS refferal V3 structure * caller is responsible for freeing target_nodes From 71826b068884050d5fdd37fda857ba1539c513d3 Mon Sep 17 00:00:00 2001 From: Rohith Surabattula Date: Fri, 17 Sep 2021 17:29:42 +0000 Subject: [PATCH 328/543] cifs: Fix soft lockup during fsstress Below traces are observed during fsstress and system got hung. [ 130.698396] watchdog: BUG: soft lockup - CPU#6 stuck for 26s! Cc: stable@vger.kernel.org # 5.13+ Signed-off-by: Rohith Surabattula Signed-off-by: Steve French --- fs/cifs/misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 05138f92d905..03da00eb7c04 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -735,7 +735,7 @@ cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode) if (cancel_delayed_work(&cfile->deferred)) { tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); if (tmp_list == NULL) - continue; + break; tmp_list->cfile = cfile; list_add_tail(&tmp_list->list, &file_head); } @@ -766,7 +766,7 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon) if (cancel_delayed_work(&cfile->deferred)) { tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); if (tmp_list == NULL) - continue; + break; tmp_list->cfile = cfile; list_add_tail(&tmp_list->list, &file_head); } From 35866f3f779aef5e7ba84e4d1023fe2e2a0e219e Mon Sep 17 00:00:00 2001 From: Rohith Surabattula Date: Fri, 17 Sep 2021 16:50:40 -0500 Subject: [PATCH 329/543] cifs: Not to defer close on file when lock is set Close file immediately when lock is set. Cc: stable@vger.kernel.org # 5.13+ Signed-off-by: Rohith Surabattula Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 1 + fs/cifs/file.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7dd9878e26bf..e916470468ea 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1399,6 +1399,7 @@ struct cifsInodeInfo { #define CIFS_INO_INVALID_MAPPING (4) /* pagecache is invalid */ #define CIFS_INO_LOCK (5) /* lock bit for synchronization */ #define CIFS_INO_MODIFIED_ATTR (6) /* Indicate change in mtime/ctime */ +#define CIFS_INO_CLOSE_ON_LOCK (7) /* Not to defer the close when lock is set */ unsigned long flags; spinlock_t writers_lock; unsigned int writers; /* Number of writers on this inode */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4d10c9343890..6796fc73b304 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -882,6 +882,7 @@ int cifs_close(struct inode *inode, struct file *file) dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); if ((cinode->oplock == CIFS_CACHE_RHW_FLG) && cinode->lease_granted && + !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && dclose) { if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { inode->i_ctime = inode->i_mtime = current_time(inode); @@ -1864,6 +1865,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, tcon->ses->server); cifs_sb = CIFS_FILE_SB(file); + set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && From f58eae6c5fa882d6d0a6b7587a099602a59d57b5 Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Fri, 17 Sep 2021 22:14:08 +0900 Subject: [PATCH 330/543] ksmbd: prevent out of share access Because of .., files outside the share directory could be accessed. To prevent this, normalize the given path and remove all . and .. components. In addition to the usual large set of regression tests (smbtorture and xfstests), ran various tests on this to specifically check path name validation including libsmb2 tests to verify path normalization: ./examples/smb2-ls-async smb://172.30.1.15/homes2/../ ./examples/smb2-ls-async smb://172.30.1.15/homes2/foo/../ ./examples/smb2-ls-async smb://172.30.1.15/homes2/foo/../../ ./examples/smb2-ls-async smb://172.30.1.15/homes2/foo/../ ./examples/smb2-ls-async smb://172.30.1.15/homes2/foo/..bar/ ./examples/smb2-ls-async smb://172.30.1.15/homes2/foo/bar../ ./examples/smb2-ls-async smb://172.30.1.15/homes2/foo/bar.. ./examples/smb2-ls-async smb://172.30.1.15/homes2/foo/bar../../../../ Signed-off-by: Hyunchul Lee Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/misc.c | 76 ++++++++++++++++++++++++++++++++++++++++------ fs/ksmbd/misc.h | 3 +- fs/ksmbd/smb2pdu.c | 14 ++++++--- 3 files changed, 77 insertions(+), 16 deletions(-) diff --git a/fs/ksmbd/misc.c b/fs/ksmbd/misc.c index 0b307ca28a19..3eac3c01749f 100644 --- a/fs/ksmbd/misc.c +++ b/fs/ksmbd/misc.c @@ -191,19 +191,77 @@ int get_nlink(struct kstat *st) return nlink; } -void ksmbd_conv_path_to_unix(char *path) +char *ksmbd_conv_path_to_unix(char *path) { + size_t path_len, remain_path_len, out_path_len; + char *out_path, *out_next; + int i, pre_dotdot_cnt = 0, slash_cnt = 0; + bool is_last; + strreplace(path, '\\', '/'); -} + path_len = strlen(path); + remain_path_len = path_len; + if (path_len == 0) + return ERR_PTR(-EINVAL); -void ksmbd_strip_last_slash(char *path) -{ - int len = strlen(path); + out_path = kzalloc(path_len + 2, GFP_KERNEL); + if (!out_path) + return ERR_PTR(-ENOMEM); + out_path_len = 0; + out_next = out_path; - while (len && path[len - 1] == '/') { - path[len - 1] = '\0'; - len--; - } + do { + char *name = path + path_len - remain_path_len; + char *next = strchrnul(name, '/'); + size_t name_len = next - name; + + is_last = !next[0]; + if (name_len == 2 && name[0] == '.' && name[1] == '.') { + pre_dotdot_cnt++; + /* handle the case that path ends with "/.." */ + if (is_last) + goto follow_dotdot; + } else { + if (pre_dotdot_cnt) { +follow_dotdot: + slash_cnt = 0; + for (i = out_path_len - 1; i >= 0; i--) { + if (out_path[i] == '/' && + ++slash_cnt == pre_dotdot_cnt + 1) + break; + } + + if (i < 0 && + slash_cnt != pre_dotdot_cnt) { + kfree(out_path); + return ERR_PTR(-EINVAL); + } + + out_next = &out_path[i+1]; + *out_next = '\0'; + out_path_len = i + 1; + + } + + if (name_len != 0 && + !(name_len == 1 && name[0] == '.') && + !(name_len == 2 && name[0] == '.' && name[1] == '.')) { + next[0] = '\0'; + sprintf(out_next, "%s/", name); + out_next += name_len + 1; + out_path_len += name_len + 1; + next[0] = '/'; + } + pre_dotdot_cnt = 0; + } + + remain_path_len -= name_len + 1; + } while (!is_last); + + if (out_path_len > 0) + out_path[out_path_len-1] = '\0'; + path[path_len] = '\0'; + return out_path; } void ksmbd_conv_path_to_windows(char *path) diff --git a/fs/ksmbd/misc.h b/fs/ksmbd/misc.h index af8717d4d85b..b7b10139ada2 100644 --- a/fs/ksmbd/misc.h +++ b/fs/ksmbd/misc.h @@ -16,8 +16,7 @@ int ksmbd_validate_filename(char *filename); int parse_stream_name(char *filename, char **stream_name, int *s_type); char *convert_to_nt_pathname(char *filename, char *sharepath); int get_nlink(struct kstat *st); -void ksmbd_conv_path_to_unix(char *path); -void ksmbd_strip_last_slash(char *path); +char *ksmbd_conv_path_to_unix(char *path); void ksmbd_conv_path_to_windows(char *path); char *ksmbd_extract_sharename(char *treename); char *convert_to_unix_name(struct ksmbd_share_config *share, char *name); diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index c86164dc70bb..46e0275a77a8 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -634,7 +634,7 @@ static char * smb2_get_name(struct ksmbd_share_config *share, const char *src, const int maxlen, struct nls_table *local_nls) { - char *name, *unixname; + char *name, *norm_name, *unixname; name = smb_strndup_from_utf16(src, maxlen, 1, local_nls); if (IS_ERR(name)) { @@ -643,11 +643,15 @@ smb2_get_name(struct ksmbd_share_config *share, const char *src, } /* change it to absolute unix name */ - ksmbd_conv_path_to_unix(name); - ksmbd_strip_last_slash(name); - - unixname = convert_to_unix_name(share, name); + norm_name = ksmbd_conv_path_to_unix(name); + if (IS_ERR(norm_name)) { + kfree(name); + return norm_name; + } kfree(name); + + unixname = convert_to_unix_name(share, norm_name); + kfree(norm_name); if (!unixname) { pr_err("can not convert absolute name\n"); return ERR_PTR(-ENOMEM); From 6d56262c3d224699b29b9bb6b4ace8bab7d692c2 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 18 Sep 2021 18:45:12 +0900 Subject: [PATCH 331/543] ksmbd: add validation for FILE_FULL_EA_INFORMATION of smb2_get_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add validation to check whether req->InputBufferLength is smaller than smb2_ea_info_req structure size. Cc: Ronnie Sahlberg Cc: Ralph Böhme Cc: Steve French Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 46e0275a77a8..6304c9bda479 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -4045,6 +4045,10 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp, path = &fp->filp->f_path; /* single EA entry is requested with given user.* name */ if (req->InputBufferLength) { + if (le32_to_cpu(req->InputBufferLength) < + sizeof(struct smb2_ea_info_req)) + return -EINVAL; + ea_req = (struct smb2_ea_info_req *)req->Buffer; } else { /* need to send all EAs, if no specific EA is requested*/ From efafec27c5658ed987e720130772f8933c685e87 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 18 Sep 2021 10:05:06 -0700 Subject: [PATCH 332/543] spi: Fix tegra20 build with CONFIG_PM=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without CONFIG_PM enabled, the SET_RUNTIME_PM_OPS() macro ends up being empty, and the only use of tegra_slink_runtime_{resume,suspend} goes away, resulting in drivers/spi/spi-tegra20-slink.c:1200:12: error: ‘tegra_slink_runtime_resume’ defined but not used [-Werror=unused-function] 1200 | static int tegra_slink_runtime_resume(struct device *dev) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/spi/spi-tegra20-slink.c:1188:12: error: ‘tegra_slink_runtime_suspend’ defined but not used [-Werror=unused-function] 1188 | static int tegra_slink_runtime_suspend(struct device *dev) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ mark the functions __maybe_unused to make the build happy. This hits the alpha allmodconfig build (and others). Reported-by: Guenter Roeck Signed-off-by: Linus Torvalds --- drivers/spi/spi-tegra20-slink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index ebd27f883033..56c6feb2ba3e 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1185,7 +1185,7 @@ static int tegra_slink_resume(struct device *dev) } #endif -static int tegra_slink_runtime_suspend(struct device *dev) +static int __maybe_unused tegra_slink_runtime_suspend(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); struct tegra_slink_data *tspi = spi_master_get_devdata(master); @@ -1197,7 +1197,7 @@ static int tegra_slink_runtime_suspend(struct device *dev) return 0; } -static int tegra_slink_runtime_resume(struct device *dev) +static int __maybe_unused tegra_slink_runtime_resume(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); struct tegra_slink_data *tspi = spi_master_get_devdata(master); From cc9d3aaa5331577a8658e25473a27ba5949023d8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 18 Sep 2021 10:57:10 -0700 Subject: [PATCH 333/543] alpha: make 'Jensen' IO functions build again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Jensen IO functions are overly copmplicated because some of the IO addresses refer to special 'local IO' ports, and they get accessed differently. That then makes gcc not actually inline them, and since they were marked "extern inline" when included through the regular path, and then only marked "inline" when included from sys_jensen.c, you never necessarily got a body for the IO functions at all. The intent of the sys_jensen.c code is to actually get the non-inlined copy generated, so remove the 'inline' from the magic macro that is supposed to sort this all out. Also, do not mix 'extern inline' functions (that may or may not be inlined and will not generate a function body if they are not) with 'static inline' (that _will_ generate a function body when not inlined). Because gcc will complain about this situation: error: ‘jensen_bus_outb’ is static but used in inline function ‘jensen_outb’ which is not static because gcc basically doesn't know whether to generate a body for that static inline function or not for that call site. So make all of these use that __EXTERN_INLINE marker. Gcc will generally not inline these things on use, and then generate the function body out-of-line in sys_jensen.c. This makes the core IO functions build for the alpha Jensen config. Not that the rest then builds, because it turns out Jensen also doesn't enable PCI, which then makes other drievrs very unhappy, but that's a separate issue. Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/jensen.h | 8 ++++---- arch/alpha/kernel/sys_jensen.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/alpha/include/asm/jensen.h b/arch/alpha/include/asm/jensen.h index 916895155a88..1c4131453db2 100644 --- a/arch/alpha/include/asm/jensen.h +++ b/arch/alpha/include/asm/jensen.h @@ -111,18 +111,18 @@ __EXTERN_INLINE void jensen_set_hae(unsigned long addr) * convinced that I need one of the newer machines. */ -static inline unsigned int jensen_local_inb(unsigned long addr) +__EXTERN_INLINE unsigned int jensen_local_inb(unsigned long addr) { return 0xff & *(vuip)((addr << 9) + EISA_VL82C106); } -static inline void jensen_local_outb(u8 b, unsigned long addr) +__EXTERN_INLINE void jensen_local_outb(u8 b, unsigned long addr) { *(vuip)((addr << 9) + EISA_VL82C106) = b; mb(); } -static inline unsigned int jensen_bus_inb(unsigned long addr) +__EXTERN_INLINE unsigned int jensen_bus_inb(unsigned long addr) { long result; @@ -131,7 +131,7 @@ static inline unsigned int jensen_bus_inb(unsigned long addr) return __kernel_extbl(result, addr & 3); } -static inline void jensen_bus_outb(u8 b, unsigned long addr) +__EXTERN_INLINE void jensen_bus_outb(u8 b, unsigned long addr) { jensen_set_hae(0); *(vuip)((addr << 7) + EISA_IO + 0x00) = b * 0x01010101; diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index e5d870ff225f..436914c4851b 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -17,7 +17,7 @@ #include -#define __EXTERN_INLINE inline +#define __EXTERN_INLINE #include #include #undef __EXTERN_INLINE From cd395d529faf46bd7fd799852a659ca1bd650a27 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 18 Sep 2021 11:15:01 -0700 Subject: [PATCH 334/543] tgafb: clarify dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TGA boards were based on the DECchip 21030 PCI graphics accelerator used mainly for alpha, and existed in a TURBOchannel (TC) version for the DECstation (MIPS) workstations. However, the config option for the TGA code is a bit confused, and says depends on FB && (ALPHA || TC) because people didn't really want to enable the option for random PCI environments, so the "ALPHA" stands in for that case (while the TC case is then the MIPS DECstation case). So that config dependency is kind of a mixture of architecture and bus choices. But it's incorrect, in that there were non-PCI-based alpha hardware, and then the driver just causes warnings: drivers/video/fbdev/tgafb.c:1532:13: error: ‘tgafb_unregister’ defined but not used [-Werror=unused-function] 1532 | static void tgafb_unregister(struct device *dev) | ^~~~~~~~~~~~~~~~ drivers/video/fbdev/tgafb.c:1387:12: error: ‘tgafb_register’ defined but not used [-Werror=unused-function] 1387 | static int tgafb_register(struct device *dev) | ^~~~~~~~~~~~~~ so let's make the config option dependencies a bit more explict: depends on FB depends on PCI || TC depends on ALPHA || TC where that first "FB" is the software configuration dependency, the second "PCI || TC" is the hardware bus dependency, while that final "ALPHA || TC" dependency is the "don't bother asking except for these situations. We could make that third case have "COMPILE_TEST" as an option, and mark the register/unregister functions as __maybe_unused, but I'm not sure it's really worth it. Signed-off-by: Linus Torvalds --- drivers/video/fbdev/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index d33c5cd684c0..b26b79dfcac9 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -582,7 +582,9 @@ config FB_HP300 config FB_TGA tristate "TGA/SFB+ framebuffer support" - depends on FB && (ALPHA || TC) + depends on FB + depends on PCI || TC + depends on ALPHA || TC select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT From 7efbcc8c075c5e9ef69b2379b75b58a699f23eb3 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Sat, 11 Sep 2021 10:08:53 +0530 Subject: [PATCH 335/543] perf annotate: Fix fused instr logic for assembly functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some x86 microarchitectures fuse a subset of cmp/test/ALU instructions with branch instructions, and thus perf annotate highlight such valid pairs as fused. When annotated with source, perf uses struct disasm_line to contain either source or instruction line from objdump output. Usually, a C statement generates multiple instructions which include such cmp/test/ALU + branch instruction pairs. But in case of assembly function, each individual assembly source line generate one instruction. The 'perf annotate' instruction fusion logic assumes the previous disasm_line as the previous instruction line, which is wrong because, for assembly function, previous disasm_line contains source line. And thus perf fails to highlight valid fused instruction pairs for assembly functions. Fix it by searching backward until we find an instruction line and consider that disasm_line as fused with current branch instruction. Before: │ cmpq %rcx, RIP+8(%rsp) 0.00 │ cmp %rcx,0x88(%rsp) │ je .Lerror_bad_iret <--- Source line 0.14 │ ┌──je b4 <--- Instruction line │ │movl %ecx, %eax After: │ cmpq %rcx, RIP+8(%rsp) 0.00 │ ┌──cmp %rcx,0x88(%rsp) │ │je .Lerror_bad_iret 0.14 │ ├──je b4 │ │movl %ecx, %eax Reviewed-by: Jin Yao Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kim Phillips Cc: Mark Rutland Cc: Namhyung Kim Link: https //lore.kernel.org/r/20210911043854.8373-1-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browser.c | 33 ++++++++++++++++++++++--------- tools/perf/ui/browser.h | 2 +- tools/perf/ui/browsers/annotate.c | 24 +++++++++++++++------- 3 files changed, 42 insertions(+), 17 deletions(-) diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 781afe42e90e..fa5bd5c20e96 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -757,25 +757,40 @@ void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, } void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column, - unsigned int row, bool arrow_down) + unsigned int row, int diff, bool arrow_down) { - unsigned int end_row; + int end_row; - if (row >= browser->top_idx) - end_row = row - browser->top_idx; - else + if (diff <= 0) return; SLsmg_set_char_set(1); if (arrow_down) { + if (row + diff <= browser->top_idx) + return; + + end_row = row + diff - browser->top_idx; ui_browser__gotorc(browser, end_row, column - 1); - SLsmg_write_char(SLSMG_ULCORN_CHAR); - ui_browser__gotorc(browser, end_row, column); - SLsmg_draw_hline(2); - ui_browser__gotorc(browser, end_row + 1, column - 1); SLsmg_write_char(SLSMG_LTEE_CHAR); + + while (--end_row >= 0 && end_row > (int)(row - browser->top_idx)) { + ui_browser__gotorc(browser, end_row, column - 1); + SLsmg_draw_vline(1); + } + + end_row = (int)(row - browser->top_idx); + if (end_row >= 0) { + ui_browser__gotorc(browser, end_row, column - 1); + SLsmg_write_char(SLSMG_ULCORN_CHAR); + ui_browser__gotorc(browser, end_row, column); + SLsmg_draw_hline(2); + } } else { + if (row < browser->top_idx) + return; + + end_row = row - browser->top_idx; ui_browser__gotorc(browser, end_row, column - 1); SLsmg_write_char(SLSMG_LTEE_CHAR); ui_browser__gotorc(browser, end_row, column); diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 3678eb88f119..510ce4554050 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -51,7 +51,7 @@ void ui_browser__write_graph(struct ui_browser *browser, int graph); void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, u64 start, u64 end); void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column, - unsigned int row, bool arrow_down); + unsigned int row, int diff, bool arrow_down); void __ui_browser__show_title(struct ui_browser *browser, const char *title); void ui_browser__show_title(struct ui_browser *browser, const char *title); int ui_browser__show(struct ui_browser *browser, const char *title, diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ef4da4295bf7..e81c2493efdf 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -125,13 +125,20 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ab->selection = al; } -static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) +static int is_fused(struct annotate_browser *ab, struct disasm_line *cursor) { struct disasm_line *pos = list_prev_entry(cursor, al.node); const char *name; + int diff = 1; + + while (pos && pos->al.offset == -1) { + pos = list_prev_entry(pos, al.node); + if (!ab->opts->hide_src_code) + diff++; + } if (!pos) - return false; + return 0; if (ins__is_lock(&pos->ins)) name = pos->ops.locked.ins.name; @@ -139,9 +146,11 @@ static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) name = pos->ins.name; if (!name || !cursor->ins.name) - return false; + return 0; - return ins__is_fused(ab->arch, name, cursor->ins.name); + if (ins__is_fused(ab->arch, name, cursor->ins.name)) + return diff; + return 0; } static void annotate_browser__draw_current_jump(struct ui_browser *browser) @@ -155,6 +164,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) struct annotation *notes = symbol__annotation(sym); u8 pcnt_width = annotation__pcnt_width(notes); int width; + int diff = 0; /* PLT symbols contain external offsets */ if (strstr(sym->name, "@plt")) @@ -205,11 +215,11 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) pcnt_width + 2 + notes->widths.addr + width, from, to); - if (is_fused(ab, cursor)) { + diff = is_fused(ab, cursor); + if (diff > 0) { ui_browser__mark_fused(browser, pcnt_width + 3 + notes->widths.addr + width, - from - 1, - to > from); + from - diff, diff, to > from); } } From ff6f41fbcee9830f88413cbb08dc45e543243b55 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Sat, 11 Sep 2021 16:30:53 +0300 Subject: [PATCH 336/543] perf script: Fix ip display when type != attr->type set_print_ip_opts() was not being called when type != attr->type because there is not a one-to-one relationship between output types and attr->type. That resulted in ip not printing. The attr_type() function is removed, and the match of attr->type to output type is corrected. Example on ADL using taskset to select an atom cpu: # perf record -e cpu_atom/cpu-cycles/ taskset 0x1000 uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.003 MB perf.data (7 samples) ] Before: # perf script | head taskset 428 [-01] 10394.179041: 1 cpu_atom/cpu-cycles/: taskset 428 [-01] 10394.179043: 1 cpu_atom/cpu-cycles/: taskset 428 [-01] 10394.179044: 11 cpu_atom/cpu-cycles/: taskset 428 [-01] 10394.179045: 407 cpu_atom/cpu-cycles/: taskset 428 [-01] 10394.179046: 16789 cpu_atom/cpu-cycles/: taskset 428 [-01] 10394.179052: 676300 cpu_atom/cpu-cycles/: uname 428 [-01] 10394.179278: 4079859 cpu_atom/cpu-cycles/: After: # perf script | head taskset 428 10394.179041: 1 cpu_atom/cpu-cycles/: ffffffff95a0bb97 __intel_pmu_enable_all.constprop.48+0x47 ([kernel.kallsyms]) taskset 428 10394.179043: 1 cpu_atom/cpu-cycles/: ffffffff95a0bb97 __intel_pmu_enable_all.constprop.48+0x47 ([kernel.kallsyms]) taskset 428 10394.179044: 11 cpu_atom/cpu-cycles/: ffffffff95a0bb97 __intel_pmu_enable_all.constprop.48+0x47 ([kernel.kallsyms]) taskset 428 10394.179045: 407 cpu_atom/cpu-cycles/: ffffffff95a0bb97 __intel_pmu_enable_all.constprop.48+0x47 ([kernel.kallsyms]) taskset 428 10394.179046: 16789 cpu_atom/cpu-cycles/: ffffffff95a0bb97 __intel_pmu_enable_all.constprop.48+0x47 ([kernel.kallsyms]) taskset 428 10394.179052: 676300 cpu_atom/cpu-cycles/: 7f829ef73800 cfree+0x0 (/lib/libc-2.32.so) uname 428 10394.179278: 4079859 cpu_atom/cpu-cycles/: ffffffff95bae912 vma_interval_tree_remove+0x1f2 ([kernel.kallsyms]) Signed-off-by: Adrian Hunter Reviewed-by: Kan Liang Cc: Jin Yao Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20210911133053.15682-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 0e824f7d8b19..6211d0b84b7a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -368,16 +368,6 @@ static inline int output_type(unsigned int type) return OUTPUT_TYPE_OTHER; } -static inline unsigned int attr_type(unsigned int type) -{ - switch (type) { - case OUTPUT_TYPE_SYNTH: - return PERF_TYPE_SYNTH; - default: - return type; - } -} - static bool output_set_by_user(void) { int j; @@ -556,6 +546,18 @@ static void set_print_ip_opts(struct perf_event_attr *attr) output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE; } +static struct evsel *find_first_output_type(struct evlist *evlist, + unsigned int type) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (output_type(evsel->core.attr.type) == (int)type) + return evsel; + } + return NULL; +} + /* * verify all user requested events exist and the samples * have the expected data @@ -567,7 +569,7 @@ static int perf_session__check_output_opt(struct perf_session *session) struct evsel *evsel; for (j = 0; j < OUTPUT_TYPE_MAX; ++j) { - evsel = perf_session__find_first_evtype(session, attr_type(j)); + evsel = find_first_output_type(session->evlist, j); /* * even if fields is set to 0 (ie., show nothing) event must From 57f0ff059e3daa4e70a811cb1d31a49968262d20 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Mon, 19 Jul 2021 16:53:32 +0200 Subject: [PATCH 337/543] perf machine: Initialize srcline string member in add_location struct It's later supposed to be either a correct address or NULL. Without the initialization, it may contain an undefined value which results in the following segmentation fault: # perf top --sort comm -g --ignore-callees=do_idle terminates with: #0 0x00007ffff56b7685 in __strlen_avx2 () from /lib64/libc.so.6 #1 0x00007ffff55e3802 in strdup () from /lib64/libc.so.6 #2 0x00005555558cb139 in hist_entry__init (callchain_size=, sample_self=true, template=0x7fffde7fb110, he=0x7fffd801c250) at util/hist.c:489 #3 hist_entry__new (template=template@entry=0x7fffde7fb110, sample_self=sample_self@entry=true) at util/hist.c:564 #4 0x00005555558cb4ba in hists__findnew_entry (hists=hists@entry=0x5555561d9e38, entry=entry@entry=0x7fffde7fb110, al=al@entry=0x7fffde7fb420, sample_self=sample_self@entry=true) at util/hist.c:657 #5 0x00005555558cba1b in __hists__add_entry (hists=hists@entry=0x5555561d9e38, al=0x7fffde7fb420, sym_parent=, bi=bi@entry=0x0, mi=mi@entry=0x0, sample=sample@entry=0x7fffde7fb4b0, sample_self=true, ops=0x0, block_info=0x0) at util/hist.c:288 #6 0x00005555558cbb70 in hists__add_entry (sample_self=true, sample=0x7fffde7fb4b0, mi=0x0, bi=0x0, sym_parent=, al=, hists=0x5555561d9e38) at util/hist.c:1056 #7 iter_add_single_cumulative_entry (iter=0x7fffde7fb460, al=) at util/hist.c:1056 #8 0x00005555558cc8a4 in hist_entry_iter__add (iter=iter@entry=0x7fffde7fb460, al=al@entry=0x7fffde7fb420, max_stack_depth=, arg=arg@entry=0x7fffffff7db0) at util/hist.c:1231 #9 0x00005555557cdc9a in perf_event__process_sample (machine=, sample=0x7fffde7fb4b0, evsel=, event=, tool=0x7fffffff7db0) at builtin-top.c:842 #10 deliver_event (qe=, qevent=) at builtin-top.c:1202 #11 0x00005555558a9318 in do_flush (show_progress=false, oe=0x7fffffff80e0) at util/ordered-events.c:244 #12 __ordered_events__flush (oe=oe@entry=0x7fffffff80e0, how=how@entry=OE_FLUSH__TOP, timestamp=timestamp@entry=0) at util/ordered-events.c:323 #13 0x00005555558a9789 in __ordered_events__flush (timestamp=, how=, oe=) at util/ordered-events.c:339 #14 ordered_events__flush (how=OE_FLUSH__TOP, oe=0x7fffffff80e0) at util/ordered-events.c:341 #15 ordered_events__flush (oe=oe@entry=0x7fffffff80e0, how=how@entry=OE_FLUSH__TOP) at util/ordered-events.c:339 #16 0x00005555557cd631 in process_thread (arg=0x7fffffff7db0) at builtin-top.c:1114 #17 0x00007ffff7bb817a in start_thread () from /lib64/libpthread.so.0 #18 0x00007ffff5656dc3 in clone () from /lib64/libc.so.6 If you look at the frame #2, the code is: 488 if (he->srcline) { 489 he->srcline = strdup(he->srcline); 490 if (he->srcline == NULL) 491 goto err_rawdata; 492 } If he->srcline is not NULL (it is not NULL if it is uninitialized rubbish), it gets strdupped and strdupping a rubbish random string causes the problem. Also, if you look at the commit 1fb7d06a509e, it adds the srcline property into the struct, but not initializing it everywhere needed. Committer notes: Now I see, when using --ignore-callees=do_idle we end up here at line 2189 in add_callchain_ip(): 2181 if (al.sym != NULL) { 2182 if (perf_hpp_list.parent && !*parent && 2183 symbol__match_regex(al.sym, &parent_regex)) 2184 *parent = al.sym; 2185 else if (have_ignore_callees && root_al && 2186 symbol__match_regex(al.sym, &ignore_callees_regex)) { 2187 /* Treat this symbol as the root, 2188 forgetting its callees. */ 2189 *root_al = al; 2190 callchain_cursor_reset(cursor); 2191 } 2192 } And the al that doesn't have the ->srcline field initialized will be copied to the root_al, so then, back to: 1211 int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, 1212 int max_stack_depth, void *arg) 1213 { 1214 int err, err2; 1215 struct map *alm = NULL; 1216 1217 if (al) 1218 alm = map__get(al->map); 1219 1220 err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, 1221 iter->evsel, al, max_stack_depth); 1222 if (err) { 1223 map__put(alm); 1224 return err; 1225 } 1226 1227 err = iter->ops->prepare_entry(iter, al); 1228 if (err) 1229 goto out; 1230 1231 err = iter->ops->add_single_entry(iter, al); 1232 if (err) 1233 goto out; 1234 That al at line 1221 is what hist_entry_iter__add() (called from sample__resolve_callchain()) saw as 'root_al', and then: iter->ops->add_single_entry(iter, al); will go on with al->srcline with a bogus value, I'll add the above sequence to the cset and apply, thanks! Signed-off-by: Michael Petlan CC: Milian Wolff Cc: Jiri Olsa Fixes: 1fb7d06a509e ("perf report Use srcline from callchain for hist entries") Link: https //lore.kernel.org/r/20210719145332.29747-1-mpetlan@redhat.com Reported-by: Juri Lelli Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da19be7da284..44e40bad0e33 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2149,6 +2149,7 @@ static int add_callchain_ip(struct thread *thread, al.filtered = 0; al.sym = NULL; + al.srcline = NULL; if (!cpumode) { thread__find_cpumode_addr_location(thread, ip, &al); } else { From aba5daeb645181ee5a046bc00c231fd045882aaa Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 17 Sep 2021 22:44:40 -0700 Subject: [PATCH 338/543] libperf evsel: Make use of FD robust. FD uses xyarray__entry that may return NULL if an index is out of bounds. If NULL is returned then a segv happens as FD unconditionally dereferences the pointer. This was happening in a case of with perf iostat as shown below. The fix is to make FD an "int*" rather than an int and handle the NULL case as either invalid input or a closed fd. $ sudo gdb --args perf stat --iostat list ... Breakpoint 1, perf_evsel__alloc_fd (evsel=0x5555560951a0, ncpus=1, nthreads=1) at evsel.c:50 50 { (gdb) bt #0 perf_evsel__alloc_fd (evsel=0x5555560951a0, ncpus=1, nthreads=1) at evsel.c:50 #1 0x000055555585c188 in evsel__open_cpu (evsel=0x5555560951a0, cpus=0x555556093410, threads=0x555556086fb0, start_cpu=0, end_cpu=1) at util/evsel.c:1792 #2 0x000055555585cfb2 in evsel__open (evsel=0x5555560951a0, cpus=0x0, threads=0x555556086fb0) at util/evsel.c:2045 #3 0x000055555585d0db in evsel__open_per_thread (evsel=0x5555560951a0, threads=0x555556086fb0) at util/evsel.c:2065 #4 0x00005555558ece64 in create_perf_stat_counter (evsel=0x5555560951a0, config=0x555555c34700 , target=0x555555c2f1c0 , cpu=0) at util/stat.c:590 #5 0x000055555578e927 in __run_perf_stat (argc=1, argv=0x7fffffffe4a0, run_idx=0) at builtin-stat.c:833 #6 0x000055555578f3c6 in run_perf_stat (argc=1, argv=0x7fffffffe4a0, run_idx=0) at builtin-stat.c:1048 #7 0x0000555555792ee5 in cmd_stat (argc=1, argv=0x7fffffffe4a0) at builtin-stat.c:2534 #8 0x0000555555835ed3 in run_builtin (p=0x555555c3f540 , argc=3, argv=0x7fffffffe4a0) at perf.c:313 #9 0x0000555555836154 in handle_internal_command (argc=3, argv=0x7fffffffe4a0) at perf.c:365 #10 0x000055555583629f in run_argv (argcp=0x7fffffffe2ec, argv=0x7fffffffe2e0) at perf.c:409 #11 0x0000555555836692 in main (argc=3, argv=0x7fffffffe4a0) at perf.c:539 ... (gdb) c Continuing. Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (uncore_iio_0/event=0x83,umask=0x04,ch_mask=0xF,fc_mask=0x07/). /bin/dmesg | grep -i perf may provide additional information. Program received signal SIGSEGV, Segmentation fault. 0x00005555559b03ea in perf_evsel__close_fd_cpu (evsel=0x5555560951a0, cpu=1) at evsel.c:166 166 if (FD(evsel, cpu, thread) >= 0) v3. fixes a bug in perf_evsel__run_ioctl where the sense of a branch was backward. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210918054440.2350466-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evsel.c | 64 +++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index d8886720e83d..8441e3e1aaac 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -43,7 +43,7 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) +#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y)) #define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL) int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -54,7 +54,10 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) int cpu, thread; for (cpu = 0; cpu < ncpus; cpu++) { for (thread = 0; thread < nthreads; thread++) { - FD(evsel, cpu, thread) = -1; + int *fd = FD(evsel, cpu, thread); + + if (fd) + *fd = -1; } } } @@ -80,7 +83,7 @@ sys_perf_event_open(struct perf_event_attr *attr, static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd) { struct perf_evsel *leader = evsel->leader; - int fd; + int *fd; if (evsel == leader) { *group_fd = -1; @@ -95,10 +98,10 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou return -ENOTCONN; fd = FD(leader, cpu, thread); - if (fd == -1) + if (fd == NULL || *fd == -1) return -EBADF; - *group_fd = fd; + *group_fd = *fd; return 0; } @@ -138,7 +141,11 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < threads->nr; thread++) { - int fd, group_fd; + int fd, group_fd, *evsel_fd; + + evsel_fd = FD(evsel, cpu, thread); + if (evsel_fd == NULL) + return -EINVAL; err = get_group_fd(evsel, cpu, thread, &group_fd); if (err < 0) @@ -151,7 +158,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, if (fd < 0) return -errno; - FD(evsel, cpu, thread) = fd; + *evsel_fd = fd; } } @@ -163,9 +170,12 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { - if (FD(evsel, cpu, thread) >= 0) - close(FD(evsel, cpu, thread)); - FD(evsel, cpu, thread) = -1; + int *fd = FD(evsel, cpu, thread); + + if (fd && *fd >= 0) { + close(*fd); + *fd = -1; + } } } @@ -209,13 +219,12 @@ void perf_evsel__munmap(struct perf_evsel *evsel) for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int fd = FD(evsel, cpu, thread); - struct perf_mmap *map = MMAP(evsel, cpu, thread); + int *fd = FD(evsel, cpu, thread); - if (fd < 0) + if (fd == NULL || *fd < 0) continue; - perf_mmap__munmap(map); + perf_mmap__munmap(MMAP(evsel, cpu, thread)); } } @@ -239,15 +248,16 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int fd = FD(evsel, cpu, thread); - struct perf_mmap *map = MMAP(evsel, cpu, thread); + int *fd = FD(evsel, cpu, thread); + struct perf_mmap *map; - if (fd < 0) + if (fd == NULL || *fd < 0) continue; + map = MMAP(evsel, cpu, thread); perf_mmap__init(map, NULL, false, NULL); - ret = perf_mmap__mmap(map, &mp, fd, cpu); + ret = perf_mmap__mmap(map, &mp, *fd, cpu); if (ret) { perf_evsel__munmap(evsel); return ret; @@ -260,7 +270,9 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread) { - if (FD(evsel, cpu, thread) < 0 || MMAP(evsel, cpu, thread) == NULL) + int *fd = FD(evsel, cpu, thread); + + if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL) return NULL; return MMAP(evsel, cpu, thread)->base; @@ -295,17 +307,18 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count) { size_t size = perf_evsel__read_size(evsel); + int *fd = FD(evsel, cpu, thread); memset(count, 0, sizeof(*count)); - if (FD(evsel, cpu, thread) < 0) + if (fd == NULL || *fd < 0) return -EINVAL; if (MMAP(evsel, cpu, thread) && !perf_mmap__read_self(MMAP(evsel, cpu, thread), count)) return 0; - if (readn(FD(evsel, cpu, thread), count->values, size) <= 0) + if (readn(*fd, count->values, size) <= 0) return -errno; return 0; @@ -318,8 +331,13 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int fd = FD(evsel, cpu, thread), - err = ioctl(fd, ioc, arg); + int err; + int *fd = FD(evsel, cpu, thread); + + if (fd == NULL || *fd < 0) + return -1; + + err = ioctl(*fd, ioc, arg); if (err) return err; From 219d720e6df71c2607d7120d6b9281614863e5b1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 14 Sep 2021 10:00:04 -0700 Subject: [PATCH 339/543] perf bpf: Ignore deprecation warning when using libbpf's btf__get_from_id() Perf code re-implements libbpf's btf__load_from_kernel_by_id() API as a weak function, presumably to dynamically link against old version of libbpf shared library. Unfortunately this causes compilation warning when perf is compiled against libbpf v0.6+. For now, just ignore deprecation warning, but there might be a better solution, depending on perf's needs. Signed-off-by: Andrii Nakryiko Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: kernel-team@fb.com LPU-Reference: 20210914170004.4185659-1-andrii@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 683f6d63560e..1a7112a87736 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -24,7 +24,10 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id) { struct btf *btf; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" int err = btf__get_from_id(id, &btf); +#pragma GCC diagnostic pop return err ? ERR_PTR(err) : btf; } From ab41f75ee6a06fa9b947a59c8f9de92370463e40 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 18 Sep 2021 14:12:39 -0700 Subject: [PATCH 340/543] alpha: mark 'Jensen' platform as no longer broken Ok, it almost certainly is still broken on actual hardware, but the immediate reason for it having been marked BROKEN was a build error that is fixed by just making sure the low-level IO header file is included sufficiently early that the __EXTERN_INLINE hackery takes effect. This was marked broken back in 2017 by commit 1883c9f49d02 ("alpha: mark jensen as broken"), but Ulrich Teichert made me look at it as part of my cross-build work to make sure -Werror actually does the right thing. There are lots of alpha configurations that do not build cleanly, but now it's no longer because Jensen wouldn't be buildable. That said, because the Jensen platform doesn't force PCI to be enabled (Jensen only had EISA), it ends up being somewhat interesting as a source of odd configs. Reported-by: Ulrich Teichert Signed-off-by: Linus Torvalds --- arch/alpha/Kconfig | 1 - arch/alpha/kernel/sys_jensen.c | 10 +++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 02e5b673bdad..11e99358aa58 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -199,7 +199,6 @@ config ALPHA_EIGER config ALPHA_JENSEN bool "Jensen" - depends on BROKEN select HAVE_EISA help DEC PC 150 AXP (aka Jensen): This is a very old Digital system - one diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index 436914c4851b..5c9c88428124 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -7,6 +7,11 @@ * * Code supporting the Jensen. */ +#define __EXTERN_INLINE +#include +#include +#undef __EXTERN_INLINE + #include #include #include @@ -17,11 +22,6 @@ #include -#define __EXTERN_INLINE -#include -#include -#undef __EXTERN_INLINE - #include #include #include From d4d016caa4b85b9aa98d7ec8c84e928621a614bc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 18 Sep 2021 14:45:48 -0700 Subject: [PATCH 341/543] alpha: move __udiv_qrnnd library function to arch/alpha/lib/ We already had the implementation for __udiv_qrnnd (unsigned divide for multi-precision arithmetic) as part of the alpha math emulation code. But you can disable the math emulation code - even if you shouldn't - and then the MPI code that actually wants this functionality (and is needed by various crypto functions) will fail to build. So move the extended-precision divide code to be a regular library function, just like all the regular division code is. That way ie is available regardless of math-emulation. Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/asm-prototypes.h | 1 + arch/alpha/lib/Makefile | 1 + arch/alpha/{math-emu/qrnnd.S => lib/udiv-qrnnd.S} | 2 ++ arch/alpha/math-emu/Makefile | 2 +- arch/alpha/math-emu/math.c | 2 -- 5 files changed, 5 insertions(+), 3 deletions(-) rename arch/alpha/{math-emu/qrnnd.S => lib/udiv-qrnnd.S} (98%) diff --git a/arch/alpha/include/asm/asm-prototypes.h b/arch/alpha/include/asm/asm-prototypes.h index b34cc1f06720..c8ae46fc2e74 100644 --- a/arch/alpha/include/asm/asm-prototypes.h +++ b/arch/alpha/include/asm/asm-prototypes.h @@ -16,3 +16,4 @@ extern void __divlu(void); extern void __remlu(void); extern void __divqu(void); extern void __remqu(void); +extern unsigned long __udiv_qrnnd(unsigned long *, unsigned long, unsigned long , unsigned long); diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile index 854d5e79979e..1cc74f7b50ef 100644 --- a/arch/alpha/lib/Makefile +++ b/arch/alpha/lib/Makefile @@ -14,6 +14,7 @@ ev6-$(CONFIG_ALPHA_EV6) := ev6- ev67-$(CONFIG_ALPHA_EV67) := ev67- lib-y = __divqu.o __remqu.o __divlu.o __remlu.o \ + udiv-qrnnd.o \ udelay.o \ $(ev6-y)memset.o \ $(ev6-y)memcpy.o \ diff --git a/arch/alpha/math-emu/qrnnd.S b/arch/alpha/lib/udiv-qrnnd.S similarity index 98% rename from arch/alpha/math-emu/qrnnd.S rename to arch/alpha/lib/udiv-qrnnd.S index d6373ec1bff9..b887aa5428e5 100644 --- a/arch/alpha/math-emu/qrnnd.S +++ b/arch/alpha/lib/udiv-qrnnd.S @@ -25,6 +25,7 @@ # along with GCC; see the file COPYING. If not, write to the # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, # MA 02111-1307, USA. +#include .set noreorder .set noat @@ -161,3 +162,4 @@ $Odd: ret $31,($26),1 .end __udiv_qrnnd +EXPORT_SYMBOL(__udiv_qrnnd) diff --git a/arch/alpha/math-emu/Makefile b/arch/alpha/math-emu/Makefile index 6eda0973e183..3206402a8792 100644 --- a/arch/alpha/math-emu/Makefile +++ b/arch/alpha/math-emu/Makefile @@ -7,4 +7,4 @@ ccflags-y := -w obj-$(CONFIG_MATHEMU) += math-emu.o -math-emu-objs := math.o qrnnd.o +math-emu-objs := math.o diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c index f7cef66af88d..4212258f3cfd 100644 --- a/arch/alpha/math-emu/math.c +++ b/arch/alpha/math-emu/math.c @@ -403,5 +403,3 @@ alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask) egress: return si_code; } - -EXPORT_SYMBOL(__udiv_qrnnd); From d62d5aed3354ae57d57fae1e59948913f360d4eb Mon Sep 17 00:00:00 2001 From: Ariel Marcovitch Date: Wed, 1 Sep 2021 17:52:12 +0300 Subject: [PATCH 342/543] checkkconfigsymbols.py: Forbid passing 'HEAD' to --commit As opposed to the --diff option, --commit can get ref names instead of commit hashes. When using the --commit option, the script resets the working directory to the commit before the given ref, by adding '~' to the end of the ref. However, the 'HEAD' ref is relative, and so when the working directory is reset to 'HEAD~', 'HEAD' points to what was 'HEAD~'. Then when the script resets to 'HEAD' it actually stays in the same commit. In this case, the script won't report any cases because there is no diff between the cases of the two refs. Prevent the user from using HEAD refs. A better solution might be to resolve the refs before doing the reset, but for now just disallow such refs. Signed-off-by: Ariel Marcovitch Signed-off-by: Masahiro Yamada --- scripts/checkkconfigsymbols.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py index b9b0f15e5880..c57c990c3244 100755 --- a/scripts/checkkconfigsymbols.py +++ b/scripts/checkkconfigsymbols.py @@ -102,6 +102,9 @@ def parse_options(): "continue.") if args.commit: + if args.commit.startswith('HEAD'): + sys.exit("The --commit option can't use the HEAD ref") + args.find = False if args.ignore: From aa0f5ea12e477b2940a57fcda4908627e0beed0f Mon Sep 17 00:00:00 2001 From: Ariel Marcovitch Date: Wed, 1 Sep 2021 19:49:52 +0300 Subject: [PATCH 343/543] checkkconfigsymbols.py: Remove skipping of help lines in parse_kconfig_file When parsing Kconfig files to find symbol definitions and references, lines after a 'help' line are skipped until a new config definition starts. However, Kconfig statements can actually be after a help section, as long as these have shallower indentation. These are skipped by the parser. This means that symbols referenced in this kind of statements are ignored by this function and thus are not considered undefined references in case the symbol is not defined. Remove the 'skip' logic entirely, as it is not needed if we just use the STMT regex to find the end of help lines. However, this means that keywords that appear as part of the help message (i.e. with the same indentation as the help lines) it will be considered as a reference/definition. This can happen now as well, but only with REGEX_KCONFIG_DEF lines. Also, the keyword must have a SYMBOL after it, which probably means that someone referenced a config in the help so it seems like a bonus :) The real solution is to keep track of the indentation when a the first help line in encountered and then handle DEF and STMT lines only if the indentation is shallower. Signed-off-by: Ariel Marcovitch Signed-off-by: Masahiro Yamada --- scripts/checkkconfigsymbols.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py index c57c990c3244..217d21abc86e 100755 --- a/scripts/checkkconfigsymbols.py +++ b/scripts/checkkconfigsymbols.py @@ -34,7 +34,6 @@ REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL) REGEX_KCONFIG_DEF = re.compile(DEF) REGEX_KCONFIG_EXPR = re.compile(EXPR) REGEX_KCONFIG_STMT = re.compile(STMT) -REGEX_KCONFIG_HELP = re.compile(r"^\s+help\s*$") REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$") REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+") REGEX_QUOTES = re.compile("(\"(.*?)\")") @@ -435,7 +434,6 @@ def parse_kconfig_file(kfile): lines = [] defined = [] references = [] - skip = False if not os.path.exists(kfile): return defined, references @@ -451,12 +449,6 @@ def parse_kconfig_file(kfile): if REGEX_KCONFIG_DEF.match(line): symbol_def = REGEX_KCONFIG_DEF.findall(line) defined.append(symbol_def[0]) - skip = False - elif REGEX_KCONFIG_HELP.match(line): - skip = True - elif skip: - # ignore content of help messages - pass elif REGEX_KCONFIG_STMT.match(line): line = REGEX_QUOTES.sub("", line) symbols = get_symbols_in_line(line) From ec783c7cb2495c5a3b8ca10db8056d43c528f940 Mon Sep 17 00:00:00 2001 From: Kortan Date: Wed, 8 Sep 2021 11:28:48 +0800 Subject: [PATCH 344/543] gen_compile_commands: fix missing 'sys' package We need to import the 'sys' package since the script has called sys.exit() method. Fixes: 6ad7cbc01527 ("Makefile: Add clang-tidy and static analyzer support to makefile") Signed-off-by: Kortan Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/clang-tools/gen_compile_commands.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index 0033eedce003..1d1bde1fd45e 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -13,6 +13,7 @@ import logging import os import re import subprocess +import sys _DEFAULT_OUTPUT = 'compile_commands.json' _DEFAULT_LOG_LEVEL = 'WARNING' From 4e70b646bae578bb51c89204b5b81ef499436482 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 16 Sep 2021 10:43:53 +0200 Subject: [PATCH 345/543] sh: Add missing FORCE prerequisites in Makefile make: arch/sh/boot/Makefile:87: FORCE prerequisite is missing Add the missing FORCE prerequisites for all build targets identified by "make help". Fixes: e1f86d7b4b2a5213 ("kbuild: warn if FORCE is missing for if_changed(_dep,_rule) and filechk") Signed-off-by: Geert Uytterhoeven Signed-off-by: Masahiro Yamada --- arch/sh/boot/Makefile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/sh/boot/Makefile b/arch/sh/boot/Makefile index 58592dfa5cb6..c081e7e2d6e7 100644 --- a/arch/sh/boot/Makefile +++ b/arch/sh/boot/Makefile @@ -80,30 +80,30 @@ $(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE $(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE $(call if_changed,lzo) -$(obj)/uImage.bz2: $(obj)/vmlinux.bin.bz2 +$(obj)/uImage.bz2: $(obj)/vmlinux.bin.bz2 FORCE $(call if_changed,uimage,bzip2) -$(obj)/uImage.gz: $(obj)/vmlinux.bin.gz +$(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE $(call if_changed,uimage,gzip) -$(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma +$(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE $(call if_changed,uimage,lzma) -$(obj)/uImage.xz: $(obj)/vmlinux.bin.xz +$(obj)/uImage.xz: $(obj)/vmlinux.bin.xz FORCE $(call if_changed,uimage,xz) -$(obj)/uImage.lzo: $(obj)/vmlinux.bin.lzo +$(obj)/uImage.lzo: $(obj)/vmlinux.bin.lzo FORCE $(call if_changed,uimage,lzo) -$(obj)/uImage.bin: $(obj)/vmlinux.bin +$(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE $(call if_changed,uimage,none) OBJCOPYFLAGS_vmlinux.srec := -I binary -O srec -$(obj)/vmlinux.srec: $(obj)/compressed/vmlinux +$(obj)/vmlinux.srec: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) OBJCOPYFLAGS_uImage.srec := -I binary -O srec -$(obj)/uImage.srec: $(obj)/uImage +$(obj)/uImage.srec: $(obj)/uImage FORCE $(call if_changed,objcopy) $(obj)/uImage: $(obj)/uImage.$(suffix-y) From 7c80144626db460bc3969027810a540741865fb1 Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Thu, 16 Sep 2021 09:21:22 +0000 Subject: [PATCH 346/543] kbuild: Fix comment typo in scripts/Makefile.modpost Change comment "create one .mod.c file pr. module" to "create one .mod.c file per module" Signed-off-by: Ramji Jiyani Signed-off-by: Masahiro Yamada --- scripts/Makefile.modpost | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index eef56d629799..48585c4d04ad 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -13,7 +13,7 @@ # Stage 2 is handled by this file and does the following # 1) Find all modules listed in modules.order # 2) modpost is then used to -# 3) create one .mod.c file pr. module +# 3) create one .mod.c file per module # 4) create one Module.symvers file with CRC for all exported symbols # Step 3 is used to place certain information in the module's ELF From 7fa6a2746616c8de4c40b748c2bb0656e00624ff Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 16 Sep 2021 11:40:16 -0700 Subject: [PATCH 347/543] x86/build: Do not add -falign flags unconditionally for clang clang does not support -falign-jumps and only recently gained support for -falign-loops. When one of the configuration options that adds these flags is enabled, clang warns and all cc-{disable-warning,option} that follow fail because -Werror gets added to test for the presence of this warning: clang-14: warning: optimization flag '-falign-jumps=0' is not supported [-Wignored-optimization-argument] To resolve this, add a couple of cc-option calls when building with clang; gcc has supported these options since 3.2 so there is no point in testing for their support. -falign-functions was implemented in clang-7, -falign-loops was implemented in clang-14, and -falign-jumps has not been implemented yet. Link: https://lore.kernel.org/r/YSQE2f5teuvKLkON@Ryzen-9-3900X.localdomain/ Link: https://lore.kernel.org/r/20210824022640.2170859-2-nathan@kernel.org/ Reported-by: kernel test robot Reviewed-by: Nick Desaulniers Acked-by: Borislav Petkov Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- arch/x86/Makefile_32.cpu | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index e7355f8b51c2..94834c4b5e5e 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -4,6 +4,12 @@ tune = $(call cc-option,-mtune=$(1),$(2)) +ifdef CONFIG_CC_IS_CLANG +align := -falign-functions=0 $(call cc-option,-falign-jumps=0) $(call cc-option,-falign-loops=0) +else +align := -falign-functions=0 -falign-jumps=0 -falign-loops=0 +endif + cflags-$(CONFIG_M486SX) += -march=i486 cflags-$(CONFIG_M486) += -march=i486 cflags-$(CONFIG_M586) += -march=i586 @@ -19,11 +25,11 @@ cflags-$(CONFIG_MK6) += -march=k6 # They make zero difference whatsosever to performance at this time. cflags-$(CONFIG_MK7) += -march=athlon cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) -cflags-$(CONFIG_MCRUSOE) += -march=i686 -falign-functions=0 -falign-jumps=0 -falign-loops=0 -cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) -falign-functions=0 -falign-jumps=0 -falign-loops=0 +cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align) +cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align) cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) -cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) -falign-functions=0 -falign-jumps=0 -falign-loops=0 +cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align) cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) cflags-$(CONFIG_MVIAC7) += -march=i686 cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) From 0664684e1ebd7875e120d0cecd525bac4805f8ed Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 16 Sep 2021 11:40:17 -0700 Subject: [PATCH 348/543] kbuild: Add -Werror=ignored-optimization-argument to CLANG_FLAGS Similar to commit 589834b3a009 ("kbuild: Add -Werror=unknown-warning-option to CLANG_FLAGS"). Clang ignores certain GCC flags that it has not implemented, only emitting a warning: $ echo | clang -fsyntax-only -falign-jumps -x c - clang-14: warning: optimization flag '-falign-jumps' is not supported [-Wignored-optimization-argument] When one of these flags gets added to KBUILD_CFLAGS unconditionally, all subsequent cc-{disable-warning,option} calls fail because -Werror was added to these invocations to turn the above warning and the equivalent -W flag warning into errors. To catch the presence of these flags earlier, turn -Wignored-optimization-argument into an error so that the flags can either be implemented or ignored via cc-option and there are no more weird errors. Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/Makefile.clang | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/Makefile.clang b/scripts/Makefile.clang index 4cce8fd0779c..51fc23e2e9e5 100644 --- a/scripts/Makefile.clang +++ b/scripts/Makefile.clang @@ -29,7 +29,12 @@ CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) else CLANG_FLAGS += -fintegrated-as endif +# By default, clang only warns when it encounters an unknown warning flag or +# certain optimization flags it knows it has not implemented. +# Make it behave more like gcc by erroring when these flags are encountered +# so they can be implemented or wrapped in cc-option. CLANG_FLAGS += -Werror=unknown-warning-option +CLANG_FLAGS += -Werror=ignored-optimization-argument KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_AFLAGS += $(CLANG_FLAGS) export CLANG_FLAGS From cf9579976f724ad517cc15b7caadea728c7e245c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Sep 2021 16:34:32 +0300 Subject: [PATCH 349/543] net: mdio: introduce a shutdown method to mdio device drivers MDIO-attached devices might have interrupts and other things that might need quiesced when we kexec into a new kernel. Things are even more creepy when those interrupt lines are shared, and in that case it is absolutely mandatory to disable all interrupt sources. Moreover, MDIO devices might be DSA switches, and DSA needs its own shutdown method to unlink from the DSA master, which is a new requirement that appeared after commit 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings"). So introduce a ->shutdown method in the MDIO device driver structure. Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio_device.c | 11 +++++++++++ include/linux/mdio.h | 3 +++ 2 files changed, 14 insertions(+) diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index c94cb5382dc9..250742ffdfd9 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -179,6 +179,16 @@ static int mdio_remove(struct device *dev) return 0; } +static void mdio_shutdown(struct device *dev) +{ + struct mdio_device *mdiodev = to_mdio_device(dev); + struct device_driver *drv = mdiodev->dev.driver; + struct mdio_driver *mdiodrv = to_mdio_driver(drv); + + if (mdiodrv->shutdown) + mdiodrv->shutdown(mdiodev); +} + /** * mdio_driver_register - register an mdio_driver with the MDIO layer * @drv: new mdio_driver to register @@ -193,6 +203,7 @@ int mdio_driver_register(struct mdio_driver *drv) mdiodrv->driver.bus = &mdio_bus_type; mdiodrv->driver.probe = mdio_probe; mdiodrv->driver.remove = mdio_remove; + mdiodrv->driver.shutdown = mdio_shutdown; retval = driver_register(&mdiodrv->driver); if (retval) { diff --git a/include/linux/mdio.h b/include/linux/mdio.h index ffb787d5ebde..5e6dc38f418e 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -80,6 +80,9 @@ struct mdio_driver { /* Clears up any memory if needed */ void (*remove)(struct mdio_device *mdiodev); + + /* Quiesces the device on system shutdown, turns off interrupts etc */ + void (*shutdown)(struct mdio_device *mdiodev); }; static inline struct mdio_driver * From 0650bf52b31ff35dc6430fc2e37969c36baba724 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Sep 2021 16:34:33 +0300 Subject: [PATCH 350/543] net: dsa: be compatible with masters which unregister on shutdown Lino reports that on his system with bcmgenet as DSA master and KSZ9897 as a switch, rebooting or shutting down never works properly. What does the bcmgenet driver have special to trigger this, that other DSA masters do not? It has an implementation of ->shutdown which simply calls its ->remove implementation. Otherwise said, it unregisters its network interface on shutdown. This message can be seen in a loop, and it hangs the reboot process there: unregister_netdevice: waiting for eth0 to become free. Usage count = 3 So why 3? A usage count of 1 is normal for a registered network interface, and any virtual interface which links itself as an upper of that will increment it via dev_hold. In the case of DSA, this is the call path: dsa_slave_create -> netdev_upper_dev_link -> __netdev_upper_dev_link -> __netdev_adjacent_dev_insert -> dev_hold So a DSA switch with 3 interfaces will result in a usage count elevated by two, and netdev_wait_allrefs will wait until they have gone away. Other stacked interfaces, like VLAN, watch NETDEV_UNREGISTER events and delete themselves, but DSA cannot just vanish and go poof, at most it can unbind itself from the switch devices, but that must happen strictly earlier compared to when the DSA master unregisters its net_device, so reacting on the NETDEV_UNREGISTER event is way too late. It seems that it is a pretty established pattern to have a driver's ->shutdown hook redirect to its ->remove hook, so the same code is executed regardless of whether the driver is unbound from the device, or the system is just shutting down. As Florian puts it, it is quite a big hammer for bcmgenet to unregister its net_device during shutdown, but having a common code path with the driver unbind helps ensure it is well tested. So DSA, for better or for worse, has to live with that and engage in an arms race of implementing the ->shutdown hook too, from all individual drivers, and do something sane when paired with masters that unregister their net_device there. The only sane thing to do, of course, is to unlink from the master. However, complications arise really quickly. The pattern of redirecting ->shutdown to ->remove is not unique to bcmgenet or even to net_device drivers. In fact, SPI controllers do it too (see dspi_shutdown -> dspi_remove), and presumably, I2C controllers and MDIO controllers do it too (this is something I have not researched too deeply, but even if this is not the case today, it is certainly plausible to happen in the future, and must be taken into consideration). Since DSA switches might be SPI devices, I2C devices, MDIO devices, the insane implication is that for the exact same DSA switch device, we might have both ->shutdown and ->remove getting called. So we need to do something with that insane environment. The pattern I've come up with is "if this, then not that", so if either ->shutdown or ->remove gets called, we set the device's drvdata to NULL, and in the other hook, we check whether the drvdata is NULL and just do nothing. This is probably not necessary for platform devices, just for devices on buses, but I would really insist for consistency among drivers, because when code is copy-pasted, it is not always copy-pasted from the best sources. So depending on whether the DSA switch's ->remove or ->shutdown will get called first, we cannot really guarantee even for the same driver if rebooting will result in the same code path on all platforms. But nonetheless, we need to do something minimally reasonable on ->shutdown too to fix the bug. Of course, the ->remove will do more (a full teardown of the tree, with all data structures freed, and this is why the bug was not caught for so long). The new ->shutdown method is kept separate from dsa_unregister_switch not because we couldn't have unregistered the switch, but simply in the interest of doing something quick and to the point. The big question is: does the DSA switch's ->shutdown get called earlier than the DSA master's ->shutdown? If not, there is still a risk that we might still trigger the WARN_ON in unregister_netdevice that says we are attempting to unregister a net_device which has uppers. That's no good. Although the reference to the master net_device won't physically go away even if DSA's ->shutdown comes afterwards, remember we have a dev_hold on it. The answer to that question lies in this comment above device_link_add: * A side effect of the link creation is re-ordering of dpm_list and the * devices_kset list by moving the consumer device and all devices depending * on it to the ends of these lists (that does not happen to devices that have * not been registered when this function is called). so the fact that DSA uses device_link_add towards its master is not exactly for nothing. device_shutdown() walks devices_kset from the back, so this is our guarantee that DSA's shutdown happens before the master's shutdown. Fixes: 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings") Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/ Reported-by: Lino Sanfilippo Signed-off-by: Vladimir Oltean Tested-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_mdio.c | 21 ++++++++- drivers/net/dsa/b53/b53_mmap.c | 13 ++++++ drivers/net/dsa/b53/b53_priv.h | 5 +++ drivers/net/dsa/b53/b53_spi.c | 13 ++++++ drivers/net/dsa/b53/b53_srab.c | 21 ++++++++- drivers/net/dsa/bcm_sf2.c | 12 ++++++ drivers/net/dsa/dsa_loop.c | 22 +++++++++- drivers/net/dsa/lan9303-core.c | 6 +++ drivers/net/dsa/lan9303.h | 1 + drivers/net/dsa/lan9303_i2c.c | 24 +++++++++-- drivers/net/dsa/lan9303_mdio.c | 15 +++++++ drivers/net/dsa/lantiq_gswip.c | 18 ++++++++ drivers/net/dsa/microchip/ksz8795_spi.c | 11 ++++- drivers/net/dsa/microchip/ksz9477_i2c.c | 14 +++++- drivers/net/dsa/microchip/ksz9477_spi.c | 8 +++- drivers/net/dsa/mt7530.c | 18 ++++++++ drivers/net/dsa/mv88e6060.c | 18 ++++++++ drivers/net/dsa/mv88e6xxx/chip.c | 22 +++++++++- drivers/net/dsa/ocelot/felix_vsc9959.c | 20 ++++++++- drivers/net/dsa/ocelot/seville_vsc9953.c | 20 ++++++++- drivers/net/dsa/qca/ar9331.c | 18 ++++++++ drivers/net/dsa/qca8k.c | 18 ++++++++ drivers/net/dsa/realtek-smi-core.c | 20 ++++++++- drivers/net/dsa/sja1105/sja1105_main.c | 21 ++++++++- drivers/net/dsa/vitesse-vsc73xx-core.c | 6 +++ drivers/net/dsa/vitesse-vsc73xx-platform.c | 22 +++++++++- drivers/net/dsa/vitesse-vsc73xx-spi.c | 22 +++++++++- drivers/net/dsa/vitesse-vsc73xx.h | 1 + include/net/dsa.h | 1 + net/dsa/dsa2.c | 50 ++++++++++++++++++++++ 30 files changed, 457 insertions(+), 24 deletions(-) diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c index a533a90e3904..a7aeb3c132c9 100644 --- a/drivers/net/dsa/b53/b53_mdio.c +++ b/drivers/net/dsa/b53/b53_mdio.c @@ -351,9 +351,25 @@ static int b53_mdio_probe(struct mdio_device *mdiodev) static void b53_mdio_remove(struct mdio_device *mdiodev) { struct b53_device *dev = dev_get_drvdata(&mdiodev->dev); - struct dsa_switch *ds = dev->ds; - dsa_unregister_switch(ds); + if (!dev) + return; + + b53_switch_remove(dev); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void b53_mdio_shutdown(struct mdio_device *mdiodev) +{ + struct b53_device *dev = dev_get_drvdata(&mdiodev->dev); + + if (!dev) + return; + + b53_switch_shutdown(dev); + + dev_set_drvdata(&mdiodev->dev, NULL); } static const struct of_device_id b53_of_match[] = { @@ -373,6 +389,7 @@ MODULE_DEVICE_TABLE(of, b53_of_match); static struct mdio_driver b53_mdio_driver = { .probe = b53_mdio_probe, .remove = b53_mdio_remove, + .shutdown = b53_mdio_shutdown, .mdiodrv.driver = { .name = "bcm53xx", .of_match_table = b53_of_match, diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c index 82680e083cc2..ae4c79d39bc0 100644 --- a/drivers/net/dsa/b53/b53_mmap.c +++ b/drivers/net/dsa/b53/b53_mmap.c @@ -316,9 +316,21 @@ static int b53_mmap_remove(struct platform_device *pdev) if (dev) b53_switch_remove(dev); + platform_set_drvdata(pdev, NULL); + return 0; } +static void b53_mmap_shutdown(struct platform_device *pdev) +{ + struct b53_device *dev = platform_get_drvdata(pdev); + + if (dev) + b53_switch_shutdown(dev); + + platform_set_drvdata(pdev, NULL); +} + static const struct of_device_id b53_mmap_of_table[] = { { .compatible = "brcm,bcm3384-switch" }, { .compatible = "brcm,bcm6328-switch" }, @@ -331,6 +343,7 @@ MODULE_DEVICE_TABLE(of, b53_mmap_of_table); static struct platform_driver b53_mmap_driver = { .probe = b53_mmap_probe, .remove = b53_mmap_remove, + .shutdown = b53_mmap_shutdown, .driver = { .name = "b53-switch", .of_match_table = b53_mmap_of_table, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 5d068acf7cf8..959a52d41f0a 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -228,6 +228,11 @@ static inline void b53_switch_remove(struct b53_device *dev) dsa_unregister_switch(dev->ds); } +static inline void b53_switch_shutdown(struct b53_device *dev) +{ + dsa_switch_shutdown(dev->ds); +} + #define b53_build_op(type_op_size, val_type) \ static inline int b53_##type_op_size(struct b53_device *dev, u8 page, \ u8 reg, val_type val) \ diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c index ecb9f7f6b335..01e37b75471e 100644 --- a/drivers/net/dsa/b53/b53_spi.c +++ b/drivers/net/dsa/b53/b53_spi.c @@ -321,9 +321,21 @@ static int b53_spi_remove(struct spi_device *spi) if (dev) b53_switch_remove(dev); + spi_set_drvdata(spi, NULL); + return 0; } +static void b53_spi_shutdown(struct spi_device *spi) +{ + struct b53_device *dev = spi_get_drvdata(spi); + + if (dev) + b53_switch_shutdown(dev); + + spi_set_drvdata(spi, NULL); +} + static const struct of_device_id b53_spi_of_match[] = { { .compatible = "brcm,bcm5325" }, { .compatible = "brcm,bcm5365" }, @@ -344,6 +356,7 @@ static struct spi_driver b53_spi_driver = { }, .probe = b53_spi_probe, .remove = b53_spi_remove, + .shutdown = b53_spi_shutdown, }; module_spi_driver(b53_spi_driver); diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c index 3f4249de70c5..4591bb1c05d2 100644 --- a/drivers/net/dsa/b53/b53_srab.c +++ b/drivers/net/dsa/b53/b53_srab.c @@ -629,17 +629,34 @@ static int b53_srab_probe(struct platform_device *pdev) static int b53_srab_remove(struct platform_device *pdev) { struct b53_device *dev = platform_get_drvdata(pdev); - struct b53_srab_priv *priv = dev->priv; - b53_srab_intr_set(priv, false); + if (!dev) + return 0; + + b53_srab_intr_set(dev->priv, false); b53_switch_remove(dev); + platform_set_drvdata(pdev, NULL); + return 0; } +static void b53_srab_shutdown(struct platform_device *pdev) +{ + struct b53_device *dev = platform_get_drvdata(pdev); + + if (!dev) + return; + + b53_switch_shutdown(dev); + + platform_set_drvdata(pdev, NULL); +} + static struct platform_driver b53_srab_driver = { .probe = b53_srab_probe, .remove = b53_srab_remove, + .shutdown = b53_srab_shutdown, .driver = { .name = "b53-srab-switch", .of_match_table = b53_srab_of_match, diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index b6c4b3adb171..7578a5c38df5 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1512,6 +1512,9 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) { struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + if (!priv) + return 0; + priv->wol_ports_mask = 0; /* Disable interrupts */ bcm_sf2_intr_disable(priv); @@ -1523,6 +1526,8 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) if (priv->type == BCM7278_DEVICE_ID) reset_control_assert(priv->rcdev); + platform_set_drvdata(pdev, NULL); + return 0; } @@ -1530,6 +1535,9 @@ static void bcm_sf2_sw_shutdown(struct platform_device *pdev) { struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + if (!priv) + return; + /* For a kernel about to be kexec'd we want to keep the GPHY on for a * successful MDIO bus scan to occur. If we did turn off the GPHY * before (e.g: port_disable), this will also power it back on. @@ -1538,6 +1546,10 @@ static void bcm_sf2_sw_shutdown(struct platform_device *pdev) */ if (priv->hw_params.num_gphy == 1) bcm_sf2_gphy_enable_set(priv->dev->ds, true); + + dsa_switch_shutdown(priv->dev->ds); + + platform_set_drvdata(pdev, NULL); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index bfdf3324aac3..e638e3eea911 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -340,10 +340,29 @@ static int dsa_loop_drv_probe(struct mdio_device *mdiodev) static void dsa_loop_drv_remove(struct mdio_device *mdiodev) { struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); - struct dsa_loop_priv *ps = ds->priv; + struct dsa_loop_priv *ps; + + if (!ds) + return; + + ps = ds->priv; dsa_unregister_switch(ds); dev_put(ps->netdev); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void dsa_loop_drv_shutdown(struct mdio_device *mdiodev) +{ + struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); + + if (!ds) + return; + + dsa_switch_shutdown(ds); + + dev_set_drvdata(&mdiodev->dev, NULL); } static struct mdio_driver dsa_loop_drv = { @@ -352,6 +371,7 @@ static struct mdio_driver dsa_loop_drv = { }, .probe = dsa_loop_drv_probe, .remove = dsa_loop_drv_remove, + .shutdown = dsa_loop_drv_shutdown, }; #define NUM_FIXED_PHYS (DSA_LOOP_NUM_PORTS - 2) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index d7ce281570b5..89f920289ae2 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1379,6 +1379,12 @@ int lan9303_remove(struct lan9303 *chip) } EXPORT_SYMBOL(lan9303_remove); +void lan9303_shutdown(struct lan9303 *chip) +{ + dsa_switch_shutdown(chip->ds); +} +EXPORT_SYMBOL(lan9303_shutdown); + MODULE_AUTHOR("Juergen Borleis "); MODULE_DESCRIPTION("Core driver for SMSC/Microchip LAN9303 three port ethernet switch"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/dsa/lan9303.h b/drivers/net/dsa/lan9303.h index 11f590b64701..c7f73efa50f0 100644 --- a/drivers/net/dsa/lan9303.h +++ b/drivers/net/dsa/lan9303.h @@ -10,3 +10,4 @@ extern const struct lan9303_phy_ops lan9303_indirect_phy_ops; int lan9303_probe(struct lan9303 *chip, struct device_node *np); int lan9303_remove(struct lan9303 *chip); +void lan9303_shutdown(struct lan9303 *chip); diff --git a/drivers/net/dsa/lan9303_i2c.c b/drivers/net/dsa/lan9303_i2c.c index 9bffaef65a04..8ca4713310fa 100644 --- a/drivers/net/dsa/lan9303_i2c.c +++ b/drivers/net/dsa/lan9303_i2c.c @@ -67,13 +67,28 @@ static int lan9303_i2c_probe(struct i2c_client *client, static int lan9303_i2c_remove(struct i2c_client *client) { - struct lan9303_i2c *sw_dev; + struct lan9303_i2c *sw_dev = i2c_get_clientdata(client); - sw_dev = i2c_get_clientdata(client); if (!sw_dev) - return -ENODEV; + return 0; - return lan9303_remove(&sw_dev->chip); + lan9303_remove(&sw_dev->chip); + + i2c_set_clientdata(client, NULL); + + return 0; +} + +static void lan9303_i2c_shutdown(struct i2c_client *client) +{ + struct lan9303_i2c *sw_dev = i2c_get_clientdata(client); + + if (!sw_dev) + return; + + lan9303_shutdown(&sw_dev->chip); + + i2c_set_clientdata(client, NULL); } /*-------------------------------------------------------------------------*/ @@ -97,6 +112,7 @@ static struct i2c_driver lan9303_i2c_driver = { }, .probe = lan9303_i2c_probe, .remove = lan9303_i2c_remove, + .shutdown = lan9303_i2c_shutdown, .id_table = lan9303_i2c_id, }; module_i2c_driver(lan9303_i2c_driver); diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c index 9cbe80460b53..bbb7032409ba 100644 --- a/drivers/net/dsa/lan9303_mdio.c +++ b/drivers/net/dsa/lan9303_mdio.c @@ -138,6 +138,20 @@ static void lan9303_mdio_remove(struct mdio_device *mdiodev) return; lan9303_remove(&sw_dev->chip); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void lan9303_mdio_shutdown(struct mdio_device *mdiodev) +{ + struct lan9303_mdio *sw_dev = dev_get_drvdata(&mdiodev->dev); + + if (!sw_dev) + return; + + lan9303_shutdown(&sw_dev->chip); + + dev_set_drvdata(&mdiodev->dev, NULL); } /*-------------------------------------------------------------------------*/ @@ -155,6 +169,7 @@ static struct mdio_driver lan9303_mdio_driver = { }, .probe = lan9303_mdio_probe, .remove = lan9303_mdio_remove, + .shutdown = lan9303_mdio_shutdown, }; mdio_module_driver(lan9303_mdio_driver); diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 267324889dd6..3ff4b7e177f3 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -2184,6 +2184,9 @@ static int gswip_remove(struct platform_device *pdev) struct gswip_priv *priv = platform_get_drvdata(pdev); int i; + if (!priv) + return 0; + /* disable the switch */ gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB); @@ -2197,9 +2200,23 @@ static int gswip_remove(struct platform_device *pdev) for (i = 0; i < priv->num_gphy_fw; i++) gswip_gphy_fw_remove(priv, &priv->gphy_fw[i]); + platform_set_drvdata(pdev, NULL); + return 0; } +static void gswip_shutdown(struct platform_device *pdev) +{ + struct gswip_priv *priv = platform_get_drvdata(pdev); + + if (!priv) + return; + + dsa_switch_shutdown(priv->ds); + + platform_set_drvdata(pdev, NULL); +} + static const struct gswip_hw_info gswip_xrx200 = { .max_ports = 7, .cpu_port = 6, @@ -2223,6 +2240,7 @@ MODULE_DEVICE_TABLE(of, gswip_of_match); static struct platform_driver gswip_driver = { .probe = gswip_probe, .remove = gswip_remove, + .shutdown = gswip_shutdown, .driver = { .name = "gswip", .of_match_table = gswip_of_match, diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c index ea7550d1b634..866767b70d65 100644 --- a/drivers/net/dsa/microchip/ksz8795_spi.c +++ b/drivers/net/dsa/microchip/ksz8795_spi.c @@ -94,6 +94,8 @@ static int ksz8795_spi_remove(struct spi_device *spi) if (dev) ksz_switch_remove(dev); + spi_set_drvdata(spi, NULL); + return 0; } @@ -101,8 +103,15 @@ static void ksz8795_spi_shutdown(struct spi_device *spi) { struct ksz_device *dev = spi_get_drvdata(spi); - if (dev && dev->dev_ops->shutdown) + if (!dev) + return; + + if (dev->dev_ops->shutdown) dev->dev_ops->shutdown(dev); + + dsa_switch_shutdown(dev->ds); + + spi_set_drvdata(spi, NULL); } static const struct of_device_id ksz8795_dt_ids[] = { diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c index 4e053a25d077..f3afb8b8c4cc 100644 --- a/drivers/net/dsa/microchip/ksz9477_i2c.c +++ b/drivers/net/dsa/microchip/ksz9477_i2c.c @@ -56,7 +56,10 @@ static int ksz9477_i2c_remove(struct i2c_client *i2c) { struct ksz_device *dev = i2c_get_clientdata(i2c); - ksz_switch_remove(dev); + if (dev) + ksz_switch_remove(dev); + + i2c_set_clientdata(i2c, NULL); return 0; } @@ -65,8 +68,15 @@ static void ksz9477_i2c_shutdown(struct i2c_client *i2c) { struct ksz_device *dev = i2c_get_clientdata(i2c); - if (dev && dev->dev_ops->shutdown) + if (!dev) + return; + + if (dev->dev_ops->shutdown) dev->dev_ops->shutdown(dev); + + dsa_switch_shutdown(dev->ds); + + i2c_set_clientdata(i2c, NULL); } static const struct i2c_device_id ksz9477_i2c_id[] = { diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index 15bc11b3cda4..e3cb0e6c9f6f 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -72,6 +72,8 @@ static int ksz9477_spi_remove(struct spi_device *spi) if (dev) ksz_switch_remove(dev); + spi_set_drvdata(spi, NULL); + return 0; } @@ -79,8 +81,10 @@ static void ksz9477_spi_shutdown(struct spi_device *spi) { struct ksz_device *dev = spi_get_drvdata(spi); - if (dev && dev->dev_ops->shutdown) - dev->dev_ops->shutdown(dev); + if (dev) + dsa_switch_shutdown(dev->ds); + + spi_set_drvdata(spi, NULL); } static const struct of_device_id ksz9477_dt_ids[] = { diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index d0cba2d1cd68..094737e5084a 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -3286,6 +3286,9 @@ mt7530_remove(struct mdio_device *mdiodev) struct mt7530_priv *priv = dev_get_drvdata(&mdiodev->dev); int ret = 0; + if (!priv) + return; + ret = regulator_disable(priv->core_pwr); if (ret < 0) dev_err(priv->dev, @@ -3301,11 +3304,26 @@ mt7530_remove(struct mdio_device *mdiodev) dsa_unregister_switch(priv->ds); mutex_destroy(&priv->reg_mutex); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void mt7530_shutdown(struct mdio_device *mdiodev) +{ + struct mt7530_priv *priv = dev_get_drvdata(&mdiodev->dev); + + if (!priv) + return; + + dsa_switch_shutdown(priv->ds); + + dev_set_drvdata(&mdiodev->dev, NULL); } static struct mdio_driver mt7530_mdio_driver = { .probe = mt7530_probe, .remove = mt7530_remove, + .shutdown = mt7530_shutdown, .mdiodrv.driver = { .name = "mt7530", .of_match_table = mt7530_of_match, diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 24b8219fd607..a4c6eb9a52d0 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -290,7 +290,24 @@ static void mv88e6060_remove(struct mdio_device *mdiodev) { struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); + if (!ds) + return; + dsa_unregister_switch(ds); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void mv88e6060_shutdown(struct mdio_device *mdiodev) +{ + struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); + + if (!ds) + return; + + dsa_switch_shutdown(ds); + + dev_set_drvdata(&mdiodev->dev, NULL); } static const struct of_device_id mv88e6060_of_match[] = { @@ -303,6 +320,7 @@ static const struct of_device_id mv88e6060_of_match[] = { static struct mdio_driver mv88e6060_driver = { .probe = mv88e6060_probe, .remove = mv88e6060_remove, + .shutdown = mv88e6060_shutdown, .mdiodrv.driver = { .name = "mv88e6060", .of_match_table = mv88e6060_of_match, diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index c45ca2473743..fb10422d2c33 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -6389,7 +6389,12 @@ out: static void mv88e6xxx_remove(struct mdio_device *mdiodev) { struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); - struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_chip *chip; + + if (!ds) + return; + + chip = ds->priv; if (chip->info->ptp_support) { mv88e6xxx_hwtstamp_free(chip); @@ -6410,6 +6415,20 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev) mv88e6xxx_g1_irq_free(chip); else mv88e6xxx_irq_poll_free(chip); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void mv88e6xxx_shutdown(struct mdio_device *mdiodev) +{ + struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); + + if (!ds) + return; + + dsa_switch_shutdown(ds); + + dev_set_drvdata(&mdiodev->dev, NULL); } static const struct of_device_id mv88e6xxx_of_match[] = { @@ -6433,6 +6452,7 @@ MODULE_DEVICE_TABLE(of, mv88e6xxx_of_match); static struct mdio_driver mv88e6xxx_driver = { .probe = mv88e6xxx_probe, .remove = mv88e6xxx_remove, + .shutdown = mv88e6xxx_shutdown, .mdiodrv.driver = { .name = "mv88e6085", .of_match_table = mv88e6xxx_of_match, diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 9e2ac8e46619..11b42fd812e4 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1472,9 +1472,10 @@ err_pci_enable: static void felix_pci_remove(struct pci_dev *pdev) { - struct felix *felix; + struct felix *felix = pci_get_drvdata(pdev); - felix = pci_get_drvdata(pdev); + if (!felix) + return; dsa_unregister_switch(felix->ds); @@ -1482,6 +1483,20 @@ static void felix_pci_remove(struct pci_dev *pdev) kfree(felix); pci_disable_device(pdev); + + pci_set_drvdata(pdev, NULL); +} + +static void felix_pci_shutdown(struct pci_dev *pdev) +{ + struct felix *felix = pci_get_drvdata(pdev); + + if (!felix) + return; + + dsa_switch_shutdown(felix->ds); + + pci_set_drvdata(pdev, NULL); } static struct pci_device_id felix_ids[] = { @@ -1498,6 +1513,7 @@ static struct pci_driver felix_vsc9959_pci_driver = { .id_table = felix_ids, .probe = felix_pci_probe, .remove = felix_pci_remove, + .shutdown = felix_pci_shutdown, }; module_pci_driver(felix_vsc9959_pci_driver); diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index deae923c8b7a..de1d34a1f1e4 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1245,18 +1245,33 @@ err_alloc_felix: static int seville_remove(struct platform_device *pdev) { - struct felix *felix; + struct felix *felix = platform_get_drvdata(pdev); - felix = platform_get_drvdata(pdev); + if (!felix) + return 0; dsa_unregister_switch(felix->ds); kfree(felix->ds); kfree(felix); + platform_set_drvdata(pdev, NULL); + return 0; } +static void seville_shutdown(struct platform_device *pdev) +{ + struct felix *felix = platform_get_drvdata(pdev); + + if (!felix) + return; + + dsa_switch_shutdown(felix->ds); + + platform_set_drvdata(pdev, NULL); +} + static const struct of_device_id seville_of_match[] = { { .compatible = "mscc,vsc9953-switch" }, { }, @@ -1266,6 +1281,7 @@ MODULE_DEVICE_TABLE(of, seville_of_match); static struct platform_driver seville_vsc9953_driver = { .probe = seville_probe, .remove = seville_remove, + .shutdown = seville_shutdown, .driver = { .name = "mscc_seville", .of_match_table = of_match_ptr(seville_of_match), diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index 563d8a279030..a6bfb6abc51a 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -1083,6 +1083,9 @@ static void ar9331_sw_remove(struct mdio_device *mdiodev) struct ar9331_sw_priv *priv = dev_get_drvdata(&mdiodev->dev); unsigned int i; + if (!priv) + return; + for (i = 0; i < ARRAY_SIZE(priv->port); i++) { struct ar9331_sw_port *port = &priv->port[i]; @@ -1094,6 +1097,20 @@ static void ar9331_sw_remove(struct mdio_device *mdiodev) dsa_unregister_switch(&priv->ds); reset_control_assert(priv->sw_reset); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void ar9331_sw_shutdown(struct mdio_device *mdiodev) +{ + struct ar9331_sw_priv *priv = dev_get_drvdata(&mdiodev->dev); + + if (!priv) + return; + + dsa_switch_shutdown(&priv->ds); + + dev_set_drvdata(&mdiodev->dev, NULL); } static const struct of_device_id ar9331_sw_of_match[] = { @@ -1104,6 +1121,7 @@ static const struct of_device_id ar9331_sw_of_match[] = { static struct mdio_driver ar9331_sw_mdio_driver = { .probe = ar9331_sw_probe, .remove = ar9331_sw_remove, + .shutdown = ar9331_sw_shutdown, .mdiodrv.driver = { .name = AR9331_SW_NAME, .of_match_table = ar9331_sw_of_match, diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index bda5a9bf4f52..a984f06f6f04 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1880,10 +1880,27 @@ qca8k_sw_remove(struct mdio_device *mdiodev) struct qca8k_priv *priv = dev_get_drvdata(&mdiodev->dev); int i; + if (!priv) + return; + for (i = 0; i < QCA8K_NUM_PORTS; i++) qca8k_port_set_status(priv, i, 0); dsa_unregister_switch(priv->ds); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void qca8k_sw_shutdown(struct mdio_device *mdiodev) +{ + struct qca8k_priv *priv = dev_get_drvdata(&mdiodev->dev); + + if (!priv) + return; + + dsa_switch_shutdown(priv->ds); + + dev_set_drvdata(&mdiodev->dev, NULL); } #ifdef CONFIG_PM_SLEEP @@ -1940,6 +1957,7 @@ static const struct of_device_id qca8k_of_match[] = { static struct mdio_driver qca8kmdio_driver = { .probe = qca8k_sw_probe, .remove = qca8k_sw_remove, + .shutdown = qca8k_sw_shutdown, .mdiodrv.driver = { .name = "qca8k", .of_match_table = qca8k_of_match, diff --git a/drivers/net/dsa/realtek-smi-core.c b/drivers/net/dsa/realtek-smi-core.c index 8e49d4f85d48..dd2f0d6208b3 100644 --- a/drivers/net/dsa/realtek-smi-core.c +++ b/drivers/net/dsa/realtek-smi-core.c @@ -464,16 +464,33 @@ static int realtek_smi_probe(struct platform_device *pdev) static int realtek_smi_remove(struct platform_device *pdev) { - struct realtek_smi *smi = dev_get_drvdata(&pdev->dev); + struct realtek_smi *smi = platform_get_drvdata(pdev); + + if (!smi) + return 0; dsa_unregister_switch(smi->ds); if (smi->slave_mii_bus) of_node_put(smi->slave_mii_bus->dev.of_node); gpiod_set_value(smi->reset, 1); + platform_set_drvdata(pdev, NULL); + return 0; } +static void realtek_smi_shutdown(struct platform_device *pdev) +{ + struct realtek_smi *smi = platform_get_drvdata(pdev); + + if (!smi) + return; + + dsa_switch_shutdown(smi->ds); + + platform_set_drvdata(pdev, NULL); +} + static const struct of_device_id realtek_smi_of_match[] = { { .compatible = "realtek,rtl8366rb", @@ -495,6 +512,7 @@ static struct platform_driver realtek_smi_driver = { }, .probe = realtek_smi_probe, .remove = realtek_smi_remove, + .shutdown = realtek_smi_shutdown, }; module_platform_driver(realtek_smi_driver); diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 2f8cc6686c38..7c0db80eff00 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -3335,13 +3335,29 @@ static int sja1105_probe(struct spi_device *spi) static int sja1105_remove(struct spi_device *spi) { struct sja1105_private *priv = spi_get_drvdata(spi); - struct dsa_switch *ds = priv->ds; - dsa_unregister_switch(ds); + if (!priv) + return 0; + + dsa_unregister_switch(priv->ds); + + spi_set_drvdata(spi, NULL); return 0; } +static void sja1105_shutdown(struct spi_device *spi) +{ + struct sja1105_private *priv = spi_get_drvdata(spi); + + if (!priv) + return; + + dsa_switch_shutdown(priv->ds); + + spi_set_drvdata(spi, NULL); +} + static const struct of_device_id sja1105_dt_ids[] = { { .compatible = "nxp,sja1105e", .data = &sja1105e_info }, { .compatible = "nxp,sja1105t", .data = &sja1105t_info }, @@ -3365,6 +3381,7 @@ static struct spi_driver sja1105_driver = { }, .probe = sja1105_probe, .remove = sja1105_remove, + .shutdown = sja1105_shutdown, }; module_spi_driver(sja1105_driver); diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index 19ce4aa0973b..a4b1447ff055 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -1225,6 +1225,12 @@ int vsc73xx_remove(struct vsc73xx *vsc) } EXPORT_SYMBOL(vsc73xx_remove); +void vsc73xx_shutdown(struct vsc73xx *vsc) +{ + dsa_switch_shutdown(vsc->ds); +} +EXPORT_SYMBOL(vsc73xx_shutdown); + MODULE_AUTHOR("Linus Walleij "); MODULE_DESCRIPTION("Vitesse VSC7385/7388/7395/7398 driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/dsa/vitesse-vsc73xx-platform.c b/drivers/net/dsa/vitesse-vsc73xx-platform.c index 2a57f337b2a2..fe4b154a0a57 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-platform.c +++ b/drivers/net/dsa/vitesse-vsc73xx-platform.c @@ -116,7 +116,26 @@ static int vsc73xx_platform_remove(struct platform_device *pdev) { struct vsc73xx_platform *vsc_platform = platform_get_drvdata(pdev); - return vsc73xx_remove(&vsc_platform->vsc); + if (!vsc_platform) + return 0; + + vsc73xx_remove(&vsc_platform->vsc); + + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static void vsc73xx_platform_shutdown(struct platform_device *pdev) +{ + struct vsc73xx_platform *vsc_platform = platform_get_drvdata(pdev); + + if (!vsc_platform) + return; + + vsc73xx_shutdown(&vsc_platform->vsc); + + platform_set_drvdata(pdev, NULL); } static const struct vsc73xx_ops vsc73xx_platform_ops = { @@ -144,6 +163,7 @@ MODULE_DEVICE_TABLE(of, vsc73xx_of_match); static struct platform_driver vsc73xx_platform_driver = { .probe = vsc73xx_platform_probe, .remove = vsc73xx_platform_remove, + .shutdown = vsc73xx_platform_shutdown, .driver = { .name = "vsc73xx-platform", .of_match_table = vsc73xx_of_match, diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c index 81eca4a5781d..645398901e05 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-spi.c +++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c @@ -163,7 +163,26 @@ static int vsc73xx_spi_remove(struct spi_device *spi) { struct vsc73xx_spi *vsc_spi = spi_get_drvdata(spi); - return vsc73xx_remove(&vsc_spi->vsc); + if (!vsc_spi) + return 0; + + vsc73xx_remove(&vsc_spi->vsc); + + spi_set_drvdata(spi, NULL); + + return 0; +} + +static void vsc73xx_spi_shutdown(struct spi_device *spi) +{ + struct vsc73xx_spi *vsc_spi = spi_get_drvdata(spi); + + if (!vsc_spi) + return; + + vsc73xx_shutdown(&vsc_spi->vsc); + + spi_set_drvdata(spi, NULL); } static const struct vsc73xx_ops vsc73xx_spi_ops = { @@ -191,6 +210,7 @@ MODULE_DEVICE_TABLE(of, vsc73xx_of_match); static struct spi_driver vsc73xx_spi_driver = { .probe = vsc73xx_spi_probe, .remove = vsc73xx_spi_remove, + .shutdown = vsc73xx_spi_shutdown, .driver = { .name = "vsc73xx-spi", .of_match_table = vsc73xx_of_match, diff --git a/drivers/net/dsa/vitesse-vsc73xx.h b/drivers/net/dsa/vitesse-vsc73xx.h index 7478f8d4e0a9..30b951504e65 100644 --- a/drivers/net/dsa/vitesse-vsc73xx.h +++ b/drivers/net/dsa/vitesse-vsc73xx.h @@ -27,3 +27,4 @@ struct vsc73xx_ops { int vsc73xx_is_addr_valid(u8 block, u8 subblock); int vsc73xx_probe(struct vsc73xx *vsc); int vsc73xx_remove(struct vsc73xx *vsc); +void vsc73xx_shutdown(struct vsc73xx *vsc); diff --git a/include/net/dsa.h b/include/net/dsa.h index 258867eff230..6e29c0e080f6 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1046,6 +1046,7 @@ static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr, void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds); +void dsa_switch_shutdown(struct dsa_switch *ds); struct dsa_switch *dsa_switch_find(int tree_index, int sw_index); #ifdef CONFIG_PM_SLEEP int dsa_switch_suspend(struct dsa_switch *ds); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index eef13cd20f19..fa88e58705f0 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1562,3 +1562,53 @@ void dsa_unregister_switch(struct dsa_switch *ds) mutex_unlock(&dsa2_mutex); } EXPORT_SYMBOL_GPL(dsa_unregister_switch); + +/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is + * blocking that operation from completion, due to the dev_hold taken inside + * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of + * the DSA master, so that the system can reboot successfully. + */ +void dsa_switch_shutdown(struct dsa_switch *ds) +{ + struct net_device *master, *slave_dev; + LIST_HEAD(unregister_list); + struct dsa_port *dp; + + mutex_lock(&dsa2_mutex); + rtnl_lock(); + + list_for_each_entry(dp, &ds->dst->ports, list) { + if (dp->ds != ds) + continue; + + if (!dsa_port_is_user(dp)) + continue; + + master = dp->cpu_dp->master; + slave_dev = dp->slave; + + netdev_upper_dev_unlink(master, slave_dev); + /* Just unlinking ourselves as uppers of the master is not + * sufficient. When the master net device unregisters, that will + * also call dev_close, which we will catch as NETDEV_GOING_DOWN + * and trigger a dev_close on our own devices (dsa_slave_close). + * In turn, that will call dev_mc_unsync on the master's net + * device. If the master is also a DSA switch port, this will + * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on + * its own master. Lockdep will complain about the fact that + * all cascaded masters have the same dsa_master_addr_list_lock_key, + * which it normally would not do if the cascaded masters would + * be in a proper upper/lower relationship, which we've just + * destroyed. + * To suppress the lockdep warnings, let's actually unregister + * the DSA slave interfaces too, to avoid the nonsensical + * multicast address list synchronization on shutdown. + */ + unregister_netdevice_queue(slave_dev, &unregister_list); + } + unregister_netdevice_many(&unregister_list); + + rtnl_unlock(); + mutex_unlock(&dsa2_mutex); +} +EXPORT_SYMBOL_GPL(dsa_switch_shutdown); From 46baae56e1001a771a5d132aa883cb5605013ae2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Sep 2021 16:34:34 +0300 Subject: [PATCH 351/543] net: dsa: hellcreek: be compatible with masters which unregister on shutdown Since commit 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings"), DSA gained a requirement which it did not fulfill, which is to unlink itself from the DSA master at shutdown time. Since the hellcreek driver was introduced after the bad commit, it has never worked with DSA masters which decide to unregister their net_device on shutdown, effectively hanging the reboot process. Hellcreek is a platform device driver, so we probably cannot have the oddities of ->shutdown and ->remove getting both called for the exact same struct device. But to be in line with the pattern from the other device drivers which are on slow buses, implement the same "if this then not that" pattern of either running the ->shutdown or the ->remove hook. The driver's current ->remove implementation makes that very easy because it already zeroes out its device_drvdata on ->remove. Fixes: e4b27ebc780f ("net: dsa: Add DSA driver for Hirschmann Hellcreek switches") Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/ Reported-by: Lino Sanfilippo Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Acked-by: Kurt Kanzenbach Signed-off-by: David S. Miller --- drivers/net/dsa/hirschmann/hellcreek.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 542cfc4ccb08..354655f9ed00 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -1916,6 +1916,9 @@ static int hellcreek_remove(struct platform_device *pdev) { struct hellcreek *hellcreek = platform_get_drvdata(pdev); + if (!hellcreek) + return 0; + hellcreek_hwtstamp_free(hellcreek); hellcreek_ptp_free(hellcreek); dsa_unregister_switch(hellcreek->ds); @@ -1924,6 +1927,18 @@ static int hellcreek_remove(struct platform_device *pdev) return 0; } +static void hellcreek_shutdown(struct platform_device *pdev) +{ + struct hellcreek *hellcreek = platform_get_drvdata(pdev); + + if (!hellcreek) + return; + + dsa_switch_shutdown(hellcreek->ds); + + platform_set_drvdata(pdev, NULL); +} + static const struct hellcreek_platform_data de1soc_r1_pdata = { .name = "r4c30", .num_ports = 4, @@ -1946,6 +1961,7 @@ MODULE_DEVICE_TABLE(of, hellcreek_of_match); static struct platform_driver hellcreek_driver = { .probe = hellcreek_probe, .remove = hellcreek_remove, + .shutdown = hellcreek_shutdown, .driver = { .name = "hellcreek", .of_match_table = hellcreek_of_match, From fe4053078cd0f02a3fa140c43660f327702a9f10 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Sep 2021 16:34:35 +0300 Subject: [PATCH 352/543] net: dsa: microchip: ksz8863: be compatible with masters which unregister on shutdown Since commit 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings"), DSA gained a requirement which it did not fulfill, which is to unlink itself from the DSA master at shutdown time. Since the Microchip sub-driver for KSZ8863 was introduced after the bad commit, it has never worked with DSA masters which decide to unregister their net_device on shutdown, effectively hanging the reboot process. To fix that, we need to call dsa_switch_shutdown. Since this driver expects the MDIO bus to be backed by mdio_bitbang, I don't think there is currently any MDIO bus driver which implements its ->shutdown by redirecting it to ->remove, but in any case, to be compatible with that pattern, it is necessary to implement an "if this then not that" scheme, to avoid ->remove and ->shutdown from being called both for the same struct device. Fixes: 60a364760002 ("net: dsa: microchip: Add Microchip KSZ8863 SMI based driver support") Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/ Reported-by: Lino Sanfilippo Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8863_smi.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/dsa/microchip/ksz8863_smi.c b/drivers/net/dsa/microchip/ksz8863_smi.c index 11293485138c..5883fa7edda2 100644 --- a/drivers/net/dsa/microchip/ksz8863_smi.c +++ b/drivers/net/dsa/microchip/ksz8863_smi.c @@ -191,6 +191,18 @@ static void ksz8863_smi_remove(struct mdio_device *mdiodev) if (dev) ksz_switch_remove(dev); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void ksz8863_smi_shutdown(struct mdio_device *mdiodev) +{ + struct ksz_device *dev = dev_get_drvdata(&mdiodev->dev); + + if (dev) + dsa_switch_shutdown(dev->ds); + + dev_set_drvdata(&mdiodev->dev, NULL); } static const struct of_device_id ksz8863_dt_ids[] = { @@ -203,6 +215,7 @@ MODULE_DEVICE_TABLE(of, ksz8863_dt_ids); static struct mdio_driver ksz8863_driver = { .probe = ksz8863_smi_probe, .remove = ksz8863_smi_remove, + .shutdown = ksz8863_smi_shutdown, .mdiodrv.driver = { .name = "ksz8863-switch", .of_match_table = ksz8863_dt_ids, From a68e9da48568a0adf5dc817ef81971c0d1aa0672 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Sep 2021 16:34:36 +0300 Subject: [PATCH 353/543] net: dsa: xrs700x: be compatible with masters which unregister on shutdown Since commit 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings"), DSA gained a requirement which it did not fulfill, which is to unlink itself from the DSA master at shutdown time. Since the Arrow SpeedChips XRS700x driver was introduced after the bad commit, it has never worked with DSA masters which decide to unregister their net_device on shutdown, effectively hanging the reboot process. To fix that, we need to call dsa_switch_shutdown. These devices can be connected by I2C or by MDIO, and if I search for I2C or MDIO bus drivers that implement their ->shutdown by redirecting it to ->remove I don't see any, however this does not mean it would not be possible. To be compatible with that pattern, it is necessary to implement an "if this then not that" scheme, to avoid ->remove and ->shutdown from being called both for the same struct device. Fixes: ee00b24f32eb ("net: dsa: add Arrow SpeedChips XRS700x driver") Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/ Reported-by: Lino Sanfilippo Signed-off-by: Vladimir Oltean Reviewed-by: George McCollister Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/xrs700x/xrs700x.c | 6 ++++++ drivers/net/dsa/xrs700x/xrs700x.h | 1 + drivers/net/dsa/xrs700x/xrs700x_i2c.c | 18 ++++++++++++++++++ drivers/net/dsa/xrs700x/xrs700x_mdio.c | 18 ++++++++++++++++++ 4 files changed, 43 insertions(+) diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c index 130abb0f1438..469420941054 100644 --- a/drivers/net/dsa/xrs700x/xrs700x.c +++ b/drivers/net/dsa/xrs700x/xrs700x.c @@ -822,6 +822,12 @@ void xrs700x_switch_remove(struct xrs700x *priv) } EXPORT_SYMBOL(xrs700x_switch_remove); +void xrs700x_switch_shutdown(struct xrs700x *priv) +{ + dsa_switch_shutdown(priv->ds); +} +EXPORT_SYMBOL(xrs700x_switch_shutdown); + MODULE_AUTHOR("George McCollister "); MODULE_DESCRIPTION("Arrow SpeedChips XRS700x DSA driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/dsa/xrs700x/xrs700x.h b/drivers/net/dsa/xrs700x/xrs700x.h index ff62cf61b091..4d58257471d2 100644 --- a/drivers/net/dsa/xrs700x/xrs700x.h +++ b/drivers/net/dsa/xrs700x/xrs700x.h @@ -40,3 +40,4 @@ struct xrs700x { struct xrs700x *xrs700x_switch_alloc(struct device *base, void *devpriv); int xrs700x_switch_register(struct xrs700x *priv); void xrs700x_switch_remove(struct xrs700x *priv); +void xrs700x_switch_shutdown(struct xrs700x *priv); diff --git a/drivers/net/dsa/xrs700x/xrs700x_i2c.c b/drivers/net/dsa/xrs700x/xrs700x_i2c.c index 489d9385b4f0..6deae388a0d6 100644 --- a/drivers/net/dsa/xrs700x/xrs700x_i2c.c +++ b/drivers/net/dsa/xrs700x/xrs700x_i2c.c @@ -109,11 +109,28 @@ static int xrs700x_i2c_remove(struct i2c_client *i2c) { struct xrs700x *priv = i2c_get_clientdata(i2c); + if (!priv) + return 0; + xrs700x_switch_remove(priv); + i2c_set_clientdata(i2c, NULL); + return 0; } +static void xrs700x_i2c_shutdown(struct i2c_client *i2c) +{ + struct xrs700x *priv = i2c_get_clientdata(i2c); + + if (!priv) + return; + + xrs700x_switch_shutdown(priv); + + i2c_set_clientdata(i2c, NULL); +} + static const struct i2c_device_id xrs700x_i2c_id[] = { { "xrs700x-switch", 0 }, {}, @@ -137,6 +154,7 @@ static struct i2c_driver xrs700x_i2c_driver = { }, .probe = xrs700x_i2c_probe, .remove = xrs700x_i2c_remove, + .shutdown = xrs700x_i2c_shutdown, .id_table = xrs700x_i2c_id, }; diff --git a/drivers/net/dsa/xrs700x/xrs700x_mdio.c b/drivers/net/dsa/xrs700x/xrs700x_mdio.c index 44f58bee04a4..d01cf1073d49 100644 --- a/drivers/net/dsa/xrs700x/xrs700x_mdio.c +++ b/drivers/net/dsa/xrs700x/xrs700x_mdio.c @@ -136,7 +136,24 @@ static void xrs700x_mdio_remove(struct mdio_device *mdiodev) { struct xrs700x *priv = dev_get_drvdata(&mdiodev->dev); + if (!priv) + return; + xrs700x_switch_remove(priv); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void xrs700x_mdio_shutdown(struct mdio_device *mdiodev) +{ + struct xrs700x *priv = dev_get_drvdata(&mdiodev->dev); + + if (!priv) + return; + + xrs700x_switch_shutdown(priv); + + dev_set_drvdata(&mdiodev->dev, NULL); } static const struct of_device_id __maybe_unused xrs700x_mdio_dt_ids[] = { @@ -155,6 +172,7 @@ static struct mdio_driver xrs700x_mdio_driver = { }, .probe = xrs700x_mdio_probe, .remove = xrs700x_mdio_remove, + .shutdown = xrs700x_mdio_shutdown, }; mdio_module_driver(xrs700x_mdio_driver); From 3ede7f84c7c21f93c5eac611d60eba3f2c765e0f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 17 Sep 2021 08:27:10 +0200 Subject: [PATCH 354/543] xen-netback: correct success/error reporting for the SKB-with-fraglist case When re-entering the main loop of xenvif_tx_check_gop() a 2nd time, the special considerations for the head of the SKB no longer apply. Don't mistakenly report ERROR to the frontend for the first entry in the list, even if - from all I can tell - this shouldn't matter much as the overall transmit will need to be considered failed anyway. Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 39a01c2a3058..32d5bc4919d8 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -499,7 +499,7 @@ check_frags: * the header's copy failed, and they are * sharing a slot, send an error */ - if (i == 0 && sharedslot) + if (i == 0 && !first_shinfo && sharedslot) xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR); else From afd92d82c9d715fb97565408755acad81573591a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 17 Sep 2021 16:34:06 +0800 Subject: [PATCH 355/543] virtio-net: fix pages leaking when building skb in big mode We try to use build_skb() if we had sufficient tailroom. But we forget to release the unused pages chained via private in big mode which will leak pages. Fixing this by release the pages after building the skb in big mode. Cc: Xuan Zhuo Fixes: fb32856b16ad ("virtio-net: page_to_skb() use build_skb when there's sufficient tailroom") Signed-off-by: Jason Wang Reviewed-by: Xuan Zhuo Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 271d38c1d9f8..79bd2585ec6b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -423,6 +423,10 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, skb_reserve(skb, p - buf); skb_put(skb, len); + + page = (struct page *)page->private; + if (page) + give_pages(rq, page); goto ok; } From 7237a494decfa17d0b9d0076e6cee3235719de90 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Fri, 17 Sep 2021 13:22:05 +0300 Subject: [PATCH 356/543] enetc: Fix illegal access when reading affinity_hint irq_set_affinity_hit() stores a reference to the cpumask_t parameter in the irq descriptor, and that reference can be accessed later from irq_affinity_hint_proc_show(). Since the cpu_mask parameter passed to irq_set_affinity_hit() has only temporary storage (it's on the stack memory), later accesses to it are illegal. Thus reads from the corresponding procfs affinity_hint file can result in paging request oops. The issue is fixed by the get_cpu_mask() helper, which provides a permanent storage for the cpumask_t parameter. Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 3ca93adb9662..7f90c27c0e79 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1879,7 +1879,6 @@ static void enetc_clear_bdrs(struct enetc_ndev_priv *priv) static int enetc_setup_irqs(struct enetc_ndev_priv *priv) { struct pci_dev *pdev = priv->si->pdev; - cpumask_t cpu_mask; int i, j, err; for (i = 0; i < priv->bdr_int_num; i++) { @@ -1908,9 +1907,7 @@ static int enetc_setup_irqs(struct enetc_ndev_priv *priv) enetc_wr(hw, ENETC_SIMSITRV(idx), entry); } - cpumask_clear(&cpu_mask); - cpumask_set_cpu(i % num_online_cpus(), &cpu_mask); - irq_set_affinity_hint(irq, &cpu_mask); + irq_set_affinity_hint(irq, get_cpu_mask(i % num_online_cpus())); } return 0; From 9f7afa05c9522b086327929ae622facab0f0f72b Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Fri, 17 Sep 2021 13:22:06 +0300 Subject: [PATCH 357/543] enetc: Fix uninitialized struct dim_sample field usage The only struct dim_sample member that does not get initialized by dim_update_sample() is comp_ctr. (There is special API to initialize comp_ctr: dim_update_sample_with_comps(), and it is currently used only for RDMA.) comp_ctr is used to compute curr_stats->cmps and curr_stats->cpe_ratio (see dim_calc_stats()) which in turn are consumed by the rdma_dim_*() API. Therefore, functionally, the net_dim*() API consumers are not affected. Nevertheless, fix the computation of statistics based on an uninitialized variable, even if the mentioned statistics are not used at the moment. Fixes: ae0e6a5d1627 ("enetc: Add adaptive interrupt coalescing") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 7f90c27c0e79..042327b9981f 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -419,7 +419,7 @@ static void enetc_rx_dim_work(struct work_struct *w) static void enetc_rx_net_dim(struct enetc_int_vector *v) { - struct dim_sample dim_sample; + struct dim_sample dim_sample = {}; v->comp_cnt++; From 87758511075ec961486fe78d7548dd709b524433 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Sep 2021 14:05:47 -0700 Subject: [PATCH 358/543] igc: fix build errors for PTP When IGC=y and PTP_1588_CLOCK=m, the ptp_*() interface family is not available to the igc driver. Make this driver depend on PTP_1588_CLOCK_OPTIONAL so that it will build without errors. Various igc commits have used ptp_*() functions without checking that PTP_1588_CLOCK is enabled. Fix all of these here. Fixes these build errors: ld: drivers/net/ethernet/intel/igc/igc_main.o: in function `igc_msix_other': igc_main.c:(.text+0x6494): undefined reference to `ptp_clock_event' ld: igc_main.c:(.text+0x64ef): undefined reference to `ptp_clock_event' ld: igc_main.c:(.text+0x6559): undefined reference to `ptp_clock_event' ld: drivers/net/ethernet/intel/igc/igc_ethtool.o: in function `igc_ethtool_get_ts_info': igc_ethtool.c:(.text+0xc7a): undefined reference to `ptp_clock_index' ld: drivers/net/ethernet/intel/igc/igc_ptp.o: in function `igc_ptp_feature_enable_i225': igc_ptp.c:(.text+0x330): undefined reference to `ptp_find_pin' ld: igc_ptp.c:(.text+0x36f): undefined reference to `ptp_find_pin' ld: drivers/net/ethernet/intel/igc/igc_ptp.o: in function `igc_ptp_init': igc_ptp.c:(.text+0x11cd): undefined reference to `ptp_clock_register' ld: drivers/net/ethernet/intel/igc/igc_ptp.o: in function `igc_ptp_stop': igc_ptp.c:(.text+0x12dd): undefined reference to `ptp_clock_unregister' ld: drivers/platform/x86/dell/dell-wmi-privacy.o: in function `dell_privacy_wmi_probe': Fixes: 64433e5bf40ab ("igc: Enable internal i225 PPS") Fixes: 60dbede0c4f3d ("igc: Add support for ethtool GET_TS_INFO command") Fixes: 87938851b6efb ("igc: enable auxiliary PHC functions for the i225") Fixes: 5f2958052c582 ("igc: Add basic skeleton for PTP") Signed-off-by: Randy Dunlap Cc: Ederson de Souza Cc: Tony Nguyen Cc: Vinicius Costa Gomes Cc: Jeff Kirsher Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Jesse Brandeburg Cc: intel-wired-lan@lists.osuosl.org Acked-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index b0b6f90deb7d..ed8ea63bb172 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -335,6 +335,7 @@ config IGC tristate "Intel(R) Ethernet Controller I225-LM/I225-V support" default n depends on PCI + depends on PTP_1588_CLOCK_OPTIONAL help This driver supports Intel(R) Ethernet Controller I225-LM/I225-V family of adapters. From 48e6d083b3aa006052db687fb26eeceef1d325b6 Mon Sep 17 00:00:00 2001 From: Alejandro Concepcion-Rodriguez Date: Fri, 17 Sep 2021 18:49:59 +0000 Subject: [PATCH 359/543] docs: net: dsa: sja1105: fix reference to sja1105.txt The file sja1105.txt was converted to nxp,sja1105.yaml. Signed-off-by: Alejandro Concepcion-Rodriguez Signed-off-by: David S. Miller --- Documentation/networking/dsa/sja1105.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst index 564caeebe2b2..29b1bae0cf00 100644 --- a/Documentation/networking/dsa/sja1105.rst +++ b/Documentation/networking/dsa/sja1105.rst @@ -296,7 +296,7 @@ not available. Device Tree bindings and board design ===================================== -This section references ``Documentation/devicetree/bindings/net/dsa/sja1105.txt`` +This section references ``Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml`` and aims to showcase some potential switch caveats. RMII PHY role and out-of-band signaling From 2dcb96bacce36021c2f3eaae0cef607b5bb71ede Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 18 Sep 2021 14:42:35 +0200 Subject: [PATCH 360/543] net: core: Correct the sock::sk_lock.owned lockdep annotations lock_sock_fast() and lock_sock_nested() contain lockdep annotations for the sock::sk_lock.owned 'mutex'. sock::sk_lock.owned is not a regular mutex. It is just lockdep wise equivalent. In fact it's an open coded trivial mutex implementation with some interesting features. sock::sk_lock.slock is a regular spinlock protecting the 'mutex' representation sock::sk_lock.owned which is a plain boolean. If 'owned' is true, then some other task holds the 'mutex', otherwise it is uncontended. As this locking construct is obviously endangered by lock ordering issues as any other locking primitive it got lockdep annotated via a dedicated dependency map sock::sk_lock.dep_map which has to be updated at the lock and unlock sites. lock_sock_nested() is a straight forward 'mutex' lock operation: might_sleep(); spin_lock_bh(sock::sk_lock.slock) while (!try_lock(sock::sk_lock.owned)) { spin_unlock_bh(sock::sk_lock.slock); wait_for_release(); spin_lock_bh(sock::sk_lock.slock); } The lockdep annotation for sock::sk_lock.owned is for unknown reasons _after_ the lock has been acquired, i.e. after the code block above and after releasing sock::sk_lock.slock, but inside the bottom halves disabled region: spin_unlock(sock::sk_lock.slock); mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); local_bh_enable(); The placement after the unlock is obvious because otherwise the mutex_acquire() would nest into the spin lock held region. But that's from the lockdep perspective still the wrong place: 1) The mutex_acquire() is issued _after_ the successful acquisition which is pointless because in a dead lock scenario this point is never reached which means that if the deadlock is the first instance of exposing the wrong lock order lockdep does not have a chance to detect it. 2) It only works because lockdep is rather lax on the context from which the mutex_acquire() is issued. Acquiring a mutex inside a bottom halves and therefore non-preemptible region is obviously invalid, except for a trylock which is clearly not the case here. This 'works' stops working on RT enabled kernels where the bottom halves serialization is done via a local lock, which exposes this misplacement because the 'mutex' and the local lock nest the wrong way around and lockdep complains rightfully about a lock inversion. The placement is wrong since the initial commit a5b5bb9a053a ("[PATCH] lockdep: annotate sk_locks") which introduced this. Fix it by moving the mutex_acquire() in front of the actual lock acquisition, which is what the regular mutex_lock() operation does as well. lock_sock_fast() is not that straight forward. It looks at the first glance like a convoluted trylock operation: spin_lock_bh(sock::sk_lock.slock) if (!sock::sk_lock.owned) return false; while (!try_lock(sock::sk_lock.owned)) { spin_unlock_bh(sock::sk_lock.slock); wait_for_release(); spin_lock_bh(sock::sk_lock.slock); } spin_unlock(sock::sk_lock.slock); mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); local_bh_enable(); return true; But that's not the case: lock_sock_fast() is an interesting optimization for short critical sections which can run with bottom halves disabled and sock::sk_lock.slock held. This allows to shortcut the 'mutex' operation in the non contended case by preventing other lockers to acquire sock::sk_lock.owned because they are blocked on sock::sk_lock.slock, which in turn avoids the overhead of doing the heavy processing in release_sock() including waking up wait queue waiters. In the contended case, i.e. when sock::sk_lock.owned == true the behavior is the same as lock_sock_nested(). Semantically this shortcut means, that the task acquired the 'mutex' even if it does not touch the sock::sk_lock.owned field in the non-contended case. Not telling lockdep about this shortcut acquisition is hiding potential lock ordering violations in the fast path. As a consequence the same reasoning as for the above lock_sock_nested() case vs. the placement of the lockdep annotation applies. The current placement of the lockdep annotation was just copied from the original lock_sock(), now renamed to lock_sock_nested(), implementation. Fix this by moving the mutex_acquire() in front of the actual lock acquisition and adding the corresponding mutex_release() into unlock_sock_fast(). Also document the fast path return case with a comment. Reported-by: Sebastian Siewior Signed-off-by: Thomas Gleixner Cc: netdev@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 1 + net/core/sock.c | 37 +++++++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 66a9a90f9558..c005c3c750e8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1640,6 +1640,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) release_sock(sk); __release(&sk->sk_lock.slock); } else { + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); spin_unlock_bh(&sk->sk_lock.slock); } } diff --git a/net/core/sock.c b/net/core/sock.c index 62627e868e03..512e629f9780 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3179,17 +3179,15 @@ EXPORT_SYMBOL(sock_init_data); void lock_sock_nested(struct sock *sk, int subclass) { + /* The sk_lock has mutex_lock() semantics here. */ + mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); + might_sleep(); spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_lock.owned) __lock_sock(sk); sk->sk_lock.owned = 1; - spin_unlock(&sk->sk_lock.slock); - /* - * The sk_lock has mutex_lock() semantics here: - */ - mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); - local_bh_enable(); + spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(lock_sock_nested); @@ -3227,24 +3225,35 @@ EXPORT_SYMBOL(release_sock); */ bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) { + /* The sk_lock has mutex_lock() semantics here. */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + might_sleep(); spin_lock_bh(&sk->sk_lock.slock); - if (!sk->sk_lock.owned) + if (!sk->sk_lock.owned) { /* - * Note : We must disable BH + * Fast path return with bottom halves disabled and + * sock::sk_lock.slock held. + * + * The 'mutex' is not contended and holding + * sock::sk_lock.slock prevents all other lockers to + * proceed so the corresponding unlock_sock_fast() can + * avoid the slow path of release_sock() completely and + * just release slock. + * + * From a semantical POV this is equivalent to 'acquiring' + * the 'mutex', hence the corresponding lockdep + * mutex_release() has to happen in the fast path of + * unlock_sock_fast(). */ return false; + } __lock_sock(sk); sk->sk_lock.owned = 1; - spin_unlock(&sk->sk_lock.slock); - /* - * The sk_lock has mutex_lock() semantics here: - */ - mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); __acquire(&sk->sk_lock.slock); - local_bh_enable(); + spin_unlock_bh(&sk->sk_lock.slock); return true; } EXPORT_SYMBOL(lock_sock_fast); From 163957c43d96c2409d9d9d2e94823f7300f6e52c Mon Sep 17 00:00:00 2001 From: Colin Foster Date: Fri, 17 Sep 2021 08:39:04 -0700 Subject: [PATCH 361/543] net: mscc: ocelot: remove buggy and useless write to ANA_PFC_PFC_CFG A useless write to ANA_PFC_PFC_CFG was left in while refactoring ocelot to phylink. Since priority flow control is disabled, writing the speed has no effect. Further, it was using ethtool.h SPEED_ instead of OCELOT_SPEED_ macros, which are incorrectly offset for GENMASK. Lastly, for priority flow control to properly function, some scenarios would rely on the rate adaptation from the PCS while the MAC speed would be fixed. So it isn't used, and even if it was, neither "speed" nor "mac_speed" are necessarily the correct values to be used. Fixes: e6e12df625f2 ("net: mscc: ocelot: convert to phylink") Signed-off-by: Colin Foster Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index c581b955efb3..08be0440af28 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -569,10 +569,6 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port, ocelot_port_writel(ocelot_port, DEV_CLOCK_CFG_LINK_SPEED(speed), DEV_CLOCK_CFG); - /* No PFC */ - ocelot_write_gix(ocelot, ANA_PFC_PFC_CFG_FC_LINK_SPEED(speed), - ANA_PFC_PFC_CFG, port); - /* Core: Enable port for frame transfer */ ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 1); From ba68e9941984792f7e8a7be90b8245eb0d2b4d7b Mon Sep 17 00:00:00 2001 From: Colin Foster Date: Fri, 17 Sep 2021 08:39:05 -0700 Subject: [PATCH 362/543] net: mscc: ocelot: remove buggy duplicate write to DEV_CLOCK_CFG When updating ocelot to use phylink, a second write to DEV_CLOCK_CFG was mistakenly left in. It used the variable "speed" which, previously, would would have been assigned a value of OCELOT_SPEED_1000. In phylink the variable is be SPEED_1000, which is invalid for the DEV_CLOCK_LINK_SPEED macro. Removing it as unnecessary and buggy. Fixes: e6e12df625f2 ("net: mscc: ocelot: convert to phylink") Signed-off-by: Colin Foster Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 08be0440af28..729ba826ba17 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -563,12 +563,6 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port, ocelot_port_writel(ocelot_port, DEV_MAC_ENA_CFG_RX_ENA | DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG); - /* Take MAC, Port, Phy (intern) and PCS (SGMII/Serdes) clock out of - * reset - */ - ocelot_port_writel(ocelot_port, DEV_CLOCK_CFG_LINK_SPEED(speed), - DEV_CLOCK_CFG); - /* Core: Enable port for frame transfer */ ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 1); From fdb475838539cb516caeeeaed06b4b5bc62c9179 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 17 Sep 2021 11:20:58 +0200 Subject: [PATCH 363/543] net: freescale: drop unneeded MODULE_ALIAS The MODULE_DEVICE_TABLE already creates proper alias for platform driver. Having another MODULE_ALIAS causes the alias to be duplicated. Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 80bd5c629fa0..ec87b370bba1 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -4176,5 +4176,4 @@ static struct platform_driver fec_driver = { module_platform_driver(fec_driver); -MODULE_ALIAS("platform:"DRIVER_NAME); MODULE_LICENSE("GPL"); From fd292c189a979838622d5e03e15fa688c81dd50b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Sep 2021 17:29:16 +0300 Subject: [PATCH 364/543] net: dsa: tear down devlink port regions when tearing down the devlink port on error Commit 86f8b1c01a0a ("net: dsa: Do not make user port errors fatal") decided it was fine to ignore errors on certain ports that fail to probe, and go on with the ports that do probe fine. Commit fb6ec87f7229 ("net: dsa: Fix type was not set for devlink port") noticed that devlink_port_type_eth_set(dlp, dp->slave); does not get called, and devlink notices after a timeout of 3600 seconds and prints a WARN_ON. So it went ahead to unregister the devlink port. And because there exists an UNUSED port flavour, we actually re-register the devlink port as UNUSED. Commit 08156ba430b4 ("net: dsa: Add devlink port regions support to DSA") added devlink port regions, which are set up by the driver and not by DSA. When we trigger the devlink port deregistration and reregistration as unused, devlink now prints another WARN_ON, from here: devlink_port_unregister: WARN_ON(!list_empty(&devlink_port->region_list)); So the port still has regions, which makes sense, because they were set up by the driver, and the driver doesn't know we're unregistering the devlink port. Somebody needs to tear them down, and optionally (actually it would be nice, to be consistent) set them up again for the new devlink port. But DSA's layering stays in our way quite badly here. The options I've considered are: 1. Introduce a function in devlink to just change a port's type and flavour. No dice, devlink keeps a lot of state, it really wants the port to not be registered when you set its parameters, so changing anything can only be done by destroying what we currently have and recreating it. 2. Make DSA cache the parameters passed to dsa_devlink_port_region_create, and the region returned, keep those in a list, then when the devlink port unregister needs to take place, the existing devlink regions are destroyed by DSA, and we replay the creation of new regions using the cached parameters. Problem: mv88e6xxx keeps the region pointers in chip->ports[port].region, and these will remain stale after DSA frees them. There are many things DSA can do, but updating mv88e6xxx's private pointers is not one of them. 3. Just let the driver do it (i.e. introduce a very specific method called ds->ops->port_reinit_as_unused, which unregisters its devlink port devlink regions, then the old devlink port, then registers the new one, then the devlink port regions for it). While it does work, as opposed to the others, it's pretty horrible from an API perspective and we can do better. 4. Introduce a new pair of methods, ->port_setup and ->port_teardown, which in the case of mv88e6xxx must register and unregister the devlink port regions. Call these 2 methods when the port must be reinitialized as unused. Naturally, I went for the 4th approach. Fixes: 08156ba430b4 ("net: dsa: Add devlink port regions support to DSA") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 16 ++++++- drivers/net/dsa/mv88e6xxx/devlink.c | 73 ++++------------------------- drivers/net/dsa/mv88e6xxx/devlink.h | 6 ++- include/net/dsa.h | 8 ++++ net/dsa/dsa2.c | 51 ++++++++++++++++++-- 5 files changed, 81 insertions(+), 73 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index fb10422d2c33..8ab0be793811 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3071,7 +3071,7 @@ static void mv88e6xxx_teardown(struct dsa_switch *ds) { mv88e6xxx_teardown_devlink_params(ds); dsa_devlink_resources_unregister(ds); - mv88e6xxx_teardown_devlink_regions(ds); + mv88e6xxx_teardown_devlink_regions_global(ds); } static int mv88e6xxx_setup(struct dsa_switch *ds) @@ -3215,7 +3215,7 @@ unlock: if (err) goto out_resources; - err = mv88e6xxx_setup_devlink_regions(ds); + err = mv88e6xxx_setup_devlink_regions_global(ds); if (err) goto out_params; @@ -3229,6 +3229,16 @@ out_resources: return err; } +static int mv88e6xxx_port_setup(struct dsa_switch *ds, int port) +{ + return mv88e6xxx_setup_devlink_regions_port(ds, port); +} + +static void mv88e6xxx_port_teardown(struct dsa_switch *ds, int port) +{ + mv88e6xxx_teardown_devlink_regions_port(ds, port); +} + /* prod_id for switch families which do not have a PHY model number */ static const u16 family_prod_id_table[] = { [MV88E6XXX_FAMILY_6341] = MV88E6XXX_PORT_SWITCH_ID_PROD_6341, @@ -6116,6 +6126,8 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .change_tag_protocol = mv88e6xxx_change_tag_protocol, .setup = mv88e6xxx_setup, .teardown = mv88e6xxx_teardown, + .port_setup = mv88e6xxx_port_setup, + .port_teardown = mv88e6xxx_port_teardown, .phylink_validate = mv88e6xxx_validate, .phylink_mac_link_state = mv88e6xxx_serdes_pcs_get_state, .phylink_mac_config = mv88e6xxx_mac_config, diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c index 0c0f5ea6680c..381068395c63 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.c +++ b/drivers/net/dsa/mv88e6xxx/devlink.c @@ -647,26 +647,25 @@ static struct mv88e6xxx_region mv88e6xxx_regions[] = { }, }; -static void -mv88e6xxx_teardown_devlink_regions_global(struct mv88e6xxx_chip *chip) +void mv88e6xxx_teardown_devlink_regions_global(struct dsa_switch *ds) { + struct mv88e6xxx_chip *chip = ds->priv; int i; for (i = 0; i < ARRAY_SIZE(mv88e6xxx_regions); i++) dsa_devlink_region_destroy(chip->regions[i]); } -static void -mv88e6xxx_teardown_devlink_regions_port(struct mv88e6xxx_chip *chip, - int port) +void mv88e6xxx_teardown_devlink_regions_port(struct dsa_switch *ds, int port) { + struct mv88e6xxx_chip *chip = ds->priv; + dsa_devlink_region_destroy(chip->ports[port].region); } -static int mv88e6xxx_setup_devlink_regions_port(struct dsa_switch *ds, - struct mv88e6xxx_chip *chip, - int port) +int mv88e6xxx_setup_devlink_regions_port(struct dsa_switch *ds, int port) { + struct mv88e6xxx_chip *chip = ds->priv; struct devlink_region *region; region = dsa_devlink_port_region_create(ds, @@ -681,40 +680,10 @@ static int mv88e6xxx_setup_devlink_regions_port(struct dsa_switch *ds, return 0; } -static void -mv88e6xxx_teardown_devlink_regions_ports(struct mv88e6xxx_chip *chip) -{ - int port; - - for (port = 0; port < mv88e6xxx_num_ports(chip); port++) - mv88e6xxx_teardown_devlink_regions_port(chip, port); -} - -static int mv88e6xxx_setup_devlink_regions_ports(struct dsa_switch *ds, - struct mv88e6xxx_chip *chip) -{ - int port; - int err; - - for (port = 0; port < mv88e6xxx_num_ports(chip); port++) { - err = mv88e6xxx_setup_devlink_regions_port(ds, chip, port); - if (err) - goto out; - } - - return 0; - -out: - while (port-- > 0) - mv88e6xxx_teardown_devlink_regions_port(chip, port); - - return err; -} - -static int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds, - struct mv88e6xxx_chip *chip) +int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds) { bool (*cond)(struct mv88e6xxx_chip *chip); + struct mv88e6xxx_chip *chip = ds->priv; struct devlink_region_ops *ops; struct devlink_region *region; u64 size; @@ -753,30 +722,6 @@ out: return PTR_ERR(region); } -int mv88e6xxx_setup_devlink_regions(struct dsa_switch *ds) -{ - struct mv88e6xxx_chip *chip = ds->priv; - int err; - - err = mv88e6xxx_setup_devlink_regions_global(ds, chip); - if (err) - return err; - - err = mv88e6xxx_setup_devlink_regions_ports(ds, chip); - if (err) - mv88e6xxx_teardown_devlink_regions_global(chip); - - return err; -} - -void mv88e6xxx_teardown_devlink_regions(struct dsa_switch *ds) -{ - struct mv88e6xxx_chip *chip = ds->priv; - - mv88e6xxx_teardown_devlink_regions_ports(chip); - mv88e6xxx_teardown_devlink_regions_global(chip); -} - int mv88e6xxx_devlink_info_get(struct dsa_switch *ds, struct devlink_info_req *req, struct netlink_ext_ack *extack) diff --git a/drivers/net/dsa/mv88e6xxx/devlink.h b/drivers/net/dsa/mv88e6xxx/devlink.h index 3d72db3dcf95..65ce6a6858b9 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.h +++ b/drivers/net/dsa/mv88e6xxx/devlink.h @@ -12,8 +12,10 @@ int mv88e6xxx_devlink_param_get(struct dsa_switch *ds, u32 id, struct devlink_param_gset_ctx *ctx); int mv88e6xxx_devlink_param_set(struct dsa_switch *ds, u32 id, struct devlink_param_gset_ctx *ctx); -int mv88e6xxx_setup_devlink_regions(struct dsa_switch *ds); -void mv88e6xxx_teardown_devlink_regions(struct dsa_switch *ds); +int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds); +void mv88e6xxx_teardown_devlink_regions_global(struct dsa_switch *ds); +int mv88e6xxx_setup_devlink_regions_port(struct dsa_switch *ds, int port); +void mv88e6xxx_teardown_devlink_regions_port(struct dsa_switch *ds, int port); int mv88e6xxx_devlink_info_get(struct dsa_switch *ds, struct devlink_info_req *req, diff --git a/include/net/dsa.h b/include/net/dsa.h index 6e29c0e080f6..d784e76113b8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -585,8 +585,16 @@ struct dsa_switch_ops { int (*change_tag_protocol)(struct dsa_switch *ds, int port, enum dsa_tag_protocol proto); + /* Optional switch-wide initialization and destruction methods */ int (*setup)(struct dsa_switch *ds); void (*teardown)(struct dsa_switch *ds); + + /* Per-port initialization and destruction methods. Mandatory if the + * driver registers devlink port regions, optional otherwise. + */ + int (*port_setup)(struct dsa_switch *ds, int port); + void (*port_teardown)(struct dsa_switch *ds, int port); + u32 (*get_phy_flags)(struct dsa_switch *ds, int port); /* diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index fa88e58705f0..f14897d9b31d 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -429,6 +429,7 @@ static int dsa_port_setup(struct dsa_port *dp) { struct devlink_port *dlp = &dp->devlink_port; bool dsa_port_link_registered = false; + struct dsa_switch *ds = dp->ds; bool dsa_port_enabled = false; int err = 0; @@ -438,6 +439,12 @@ static int dsa_port_setup(struct dsa_port *dp) INIT_LIST_HEAD(&dp->fdbs); INIT_LIST_HEAD(&dp->mdbs); + if (ds->ops->port_setup) { + err = ds->ops->port_setup(ds, dp->index); + if (err) + return err; + } + switch (dp->type) { case DSA_PORT_TYPE_UNUSED: dsa_port_disable(dp); @@ -480,8 +487,11 @@ static int dsa_port_setup(struct dsa_port *dp) dsa_port_disable(dp); if (err && dsa_port_link_registered) dsa_port_link_unregister_of(dp); - if (err) + if (err) { + if (ds->ops->port_teardown) + ds->ops->port_teardown(ds, dp->index); return err; + } dp->setup = true; @@ -533,11 +543,15 @@ static int dsa_port_devlink_setup(struct dsa_port *dp) static void dsa_port_teardown(struct dsa_port *dp) { struct devlink_port *dlp = &dp->devlink_port; + struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a, *tmp; if (!dp->setup) return; + if (ds->ops->port_teardown) + ds->ops->port_teardown(ds, dp->index); + devlink_port_type_clear(dlp); switch (dp->type) { @@ -581,6 +595,36 @@ static void dsa_port_devlink_teardown(struct dsa_port *dp) dp->devlink_port_setup = false; } +/* Destroy the current devlink port, and create a new one which has the UNUSED + * flavour. At this point, any call to ds->ops->port_setup has been already + * balanced out by a call to ds->ops->port_teardown, so we know that any + * devlink port regions the driver had are now unregistered. We then call its + * ds->ops->port_setup again, in order for the driver to re-create them on the + * new devlink port. + */ +static int dsa_port_reinit_as_unused(struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + int err; + + dsa_port_devlink_teardown(dp); + dp->type = DSA_PORT_TYPE_UNUSED; + err = dsa_port_devlink_setup(dp); + if (err) + return err; + + if (ds->ops->port_setup) { + /* On error, leave the devlink port registered, + * dsa_switch_teardown will clean it up later. + */ + err = ds->ops->port_setup(ds, dp->index); + if (err) + return err; + } + + return 0; +} + static int dsa_devlink_info_get(struct devlink *dl, struct devlink_info_req *req, struct netlink_ext_ack *extack) @@ -938,12 +982,9 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) list_for_each_entry(dp, &dst->ports, list) { err = dsa_port_setup(dp); if (err) { - dsa_port_devlink_teardown(dp); - dp->type = DSA_PORT_TYPE_UNUSED; - err = dsa_port_devlink_setup(dp); + err = dsa_port_reinit_as_unused(dp); if (err) goto teardown; - continue; } } From 029497e66bdc762e001880e4c85a91f35a54b1e2 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sun, 19 Sep 2021 13:57:25 +0200 Subject: [PATCH 365/543] net: bgmac-bcma: handle deferred probe error due to mac-address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to the inclusion of nvmem handling into the mac-address getter function of_get_mac_address() by commit d01f449c008a ("of_net: add NVMEM support to of_get_mac_address") it is now possible to get a -EPROBE_DEFER return code. Which did cause bgmac to assign a random ethernet address. This exact issue happened on my Meraki MR32. The nvmem provider is an EEPROM (at24c64) which gets instantiated once the module driver is loaded... This happens once the filesystem becomes available. With this patch, bgmac_probe() will propagate the -EPROBE_DEFER error. Then the driver subsystem will reschedule the probe at a later time. Cc: Petr Štetiar Cc: Michael Walle Fixes: d01f449c008a ("of_net: add NVMEM support to of_get_mac_address") Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac-bcma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index 85fa0ab7201c..9513cfb5ba58 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -129,6 +129,8 @@ static int bgmac_probe(struct bcma_device *core) bcma_set_drvdata(core, bgmac); err = of_get_mac_address(bgmac->dev->of_node, bgmac->net_dev->dev_addr); + if (err == -EPROBE_DEFER) + return err; /* If no MAC address assigned via device tree, check SPROM */ if (err) { From 48514a22333099b93c33ea1c7c97dad3f7a611ce Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 17 Sep 2021 13:26:14 -0600 Subject: [PATCH 366/543] selftests: net: af_unix: Fix incorrect args in test result msg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the args to fprintf(). Splitting the message ends up passing incorrect arg for "sigurg %d" and an extra arg overall. The test result message ends up incorrect. test_unix_oob.c: In function ‘main’: test_unix_oob.c:274:43: warning: format ‘%d’ expects argument of type ‘int’, but argument 3 has type ‘char *’ [-Wformat=] 274 | fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ", | ~^ | | | int | %s 275 | "atmark %d\n", signal_recvd, len, oob, atmark); | ~~~~~~~~~~~~~ | | | char * test_unix_oob.c:274:19: warning: too many arguments for format [-Wformat-extra-args] 274 | fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ", Signed-off-by: Shuah Khan Signed-off-by: David S. Miller --- tools/testing/selftests/net/af_unix/test_unix_oob.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c index 0f3e3763f4f8..3dece8b29253 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -271,8 +271,9 @@ main(int argc, char **argv) read_oob(pfd, &oob); if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) { - fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ", - "atmark %d\n", signal_recvd, len, oob, atmark); + fprintf(stderr, + "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n", + signal_recvd, len, oob, atmark); die(1); } From 72a3c58d18fd780eecd80178bb2132ce741a0a74 Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Sun, 19 Sep 2021 14:55:45 +0300 Subject: [PATCH 367/543] net/mlx4_en: Resolve bad operstate value Any link state change that's done prior to net device registration isn't reflected on the state, thus the operational state is left obsolete, with 'UNKNOWN' status. To resolve the issue, query link state from FW upon open operations to ensure operational state is updated. Fixes: c27a02cd94d6 ("mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC") Signed-off-by: Lama Kayal Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/en_netdev.c | 47 ++++++++++++------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 - 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index a2f61a87cef8..35154635ec3a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1269,7 +1269,6 @@ static void mlx4_en_do_set_rx_mode(struct work_struct *work) if (!netif_carrier_ok(dev)) { if (!mlx4_en_QUERY_PORT(mdev, priv->port)) { if (priv->port_state.link_state) { - priv->last_link_state = MLX4_DEV_EVENT_PORT_UP; netif_carrier_on(dev); en_dbg(LINK, priv, "Link Up\n"); } @@ -1557,26 +1556,36 @@ static void mlx4_en_service_task(struct work_struct *work) mutex_unlock(&mdev->state_lock); } -static void mlx4_en_linkstate(struct work_struct *work) +static void mlx4_en_linkstate(struct mlx4_en_priv *priv) +{ + struct mlx4_en_port_state *port_state = &priv->port_state; + struct mlx4_en_dev *mdev = priv->mdev; + struct net_device *dev = priv->dev; + bool up; + + if (mlx4_en_QUERY_PORT(mdev, priv->port)) + port_state->link_state = MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN; + + up = port_state->link_state == MLX4_PORT_STATE_DEV_EVENT_PORT_UP; + if (up == netif_carrier_ok(dev)) + netif_carrier_event(dev); + if (!up) { + en_info(priv, "Link Down\n"); + netif_carrier_off(dev); + } else { + en_info(priv, "Link Up\n"); + netif_carrier_on(dev); + } +} + +static void mlx4_en_linkstate_work(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, linkstate_task); struct mlx4_en_dev *mdev = priv->mdev; - int linkstate = priv->link_state; mutex_lock(&mdev->state_lock); - /* If observable port state changed set carrier state and - * report to system log */ - if (priv->last_link_state != linkstate) { - if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) { - en_info(priv, "Link Down\n"); - netif_carrier_off(priv->dev); - } else { - en_info(priv, "Link Up\n"); - netif_carrier_on(priv->dev); - } - } - priv->last_link_state = linkstate; + mlx4_en_linkstate(priv); mutex_unlock(&mdev->state_lock); } @@ -2079,9 +2088,11 @@ static int mlx4_en_open(struct net_device *dev) mlx4_en_clear_stats(dev); err = mlx4_en_start_port(dev); - if (err) + if (err) { en_err(priv, "Failed starting port:%d\n", priv->port); - + goto out; + } + mlx4_en_linkstate(priv); out: mutex_unlock(&mdev->state_lock); return err; @@ -3168,7 +3179,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); INIT_WORK(&priv->restart_task, mlx4_en_restart); - INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); + INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate_work); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); #ifdef CONFIG_RFS_ACCEL diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index f3d1a20201ef..6bf558c5ec10 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -552,7 +552,6 @@ struct mlx4_en_priv { struct mlx4_hwq_resources res; int link_state; - int last_link_state; bool port_up; int port; int registered; From e30cd812dffadc58241ae378e48728e6a161becd Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 17 Sep 2021 15:53:56 -0600 Subject: [PATCH 368/543] selftests: net: af_unix: Fix makefile to use TEST_GEN_PROGS Makefile uses TEST_PROGS instead of TEST_GEN_PROGS to define executables. TEST_PROGS is for shell scripts that need to be installed and run by the common lib.mk framework. The common framework doesn't touch TEST_PROGS when it does build and clean. As a result "make kselftest-clean" and "make clean" fail to remove executables. Run and install work because the common framework runs and installs TEST_PROGS. Build works because the Makefile defines "all" rule which is unnecessary if TEST_GEN_PROGS is used. Use TEST_GEN_PROGS so the common framework can handle build/run/ install/clean properly. Signed-off-by: Shuah Khan Signed-off-by: David S. Miller --- tools/testing/selftests/net/af_unix/Makefile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index cfc7f4f97fd1..df341648f818 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,5 +1,2 @@ -##TEST_GEN_FILES := test_unix_oob -TEST_PROGS := test_unix_oob +TEST_GEN_PROGS := test_unix_oob include ../../lib.mk - -all: $(TEST_PROGS) From b1044a9b8100a0cc5c9d2e1e2f9ca4bb8e32b23a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Sep 2021 10:06:46 -0700 Subject: [PATCH 369/543] Revert drm/vc4 hdmi runtime PM changes This reverts commits 9984d6664ce9 ("drm/vc4: hdmi: Make sure the controller is powered in detect") 411efa18e4b0 ("drm/vc4: hdmi: Move the HSM clock enable to runtime_pm") as Michael Stapelberg reports that the new runtime PM changes cause his Raspberry Pi 3 to hang on boot, probably due to interactions with other changes in the DRM tree (because a bisect points to the merge in commit e058a84bfddc: "Merge tag 'drm-next-2021-07-01' of git://.../drm"). Revert these two commits until it's been resolved. Link: https://lore.kernel.org/all/871r5mp7h2.fsf@midna.i-did-not-set--mail-host-address--so-tickle-me/ Reported-and-tested-by: Michael Stapelberg Cc: Maxime Ripard Cc: Dave Stevenson Cc: Dave Airlie Signed-off-by: Linus Torvalds --- drivers/gpu/drm/vc4/vc4_hdmi.c | 44 ++++++++-------------------------- 1 file changed, 10 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 4a1115043114..1acf64222aa3 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -167,8 +167,6 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector); bool connected = false; - WARN_ON(pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev)); - if (vc4_hdmi->hpd_gpio && gpiod_get_value_cansleep(vc4_hdmi->hpd_gpio)) { connected = true; @@ -189,12 +187,10 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) } } - pm_runtime_put(&vc4_hdmi->pdev->dev); return connector_status_connected; } cec_phys_addr_invalidate(vc4_hdmi->cec_adap); - pm_runtime_put(&vc4_hdmi->pdev->dev); return connector_status_disconnected; } @@ -635,6 +631,7 @@ static void vc4_hdmi_encoder_post_crtc_powerdown(struct drm_encoder *encoder, vc4_hdmi->variant->phy_disable(vc4_hdmi); clk_disable_unprepare(vc4_hdmi->pixel_bvb_clock); + clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); ret = pm_runtime_put(&vc4_hdmi->pdev->dev); @@ -947,6 +944,13 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, return; } + ret = clk_prepare_enable(vc4_hdmi->hsm_clock); + if (ret) { + DRM_ERROR("Failed to turn on HSM clock: %d\n", ret); + clk_disable_unprepare(vc4_hdmi->pixel_clock); + return; + } + vc4_hdmi_cec_update_clk_div(vc4_hdmi); if (pixel_rate > 297000000) @@ -959,6 +963,7 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, ret = clk_set_min_rate(vc4_hdmi->pixel_bvb_clock, bvb_rate); if (ret) { DRM_ERROR("Failed to set pixel bvb clock rate: %d\n", ret); + clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); return; } @@ -966,6 +971,7 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, ret = clk_prepare_enable(vc4_hdmi->pixel_bvb_clock); if (ret) { DRM_ERROR("Failed to turn on pixel bvb clock: %d\n", ret); + clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); return; } @@ -2114,29 +2120,6 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) return 0; } -#ifdef CONFIG_PM -static int vc4_hdmi_runtime_suspend(struct device *dev) -{ - struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); - - clk_disable_unprepare(vc4_hdmi->hsm_clock); - - return 0; -} - -static int vc4_hdmi_runtime_resume(struct device *dev) -{ - struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); - int ret; - - ret = clk_prepare_enable(vc4_hdmi->hsm_clock); - if (ret) - return ret; - - return 0; -} -#endif - static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) { const struct vc4_hdmi_variant *variant = of_device_get_match_data(dev); @@ -2391,18 +2374,11 @@ static const struct of_device_id vc4_hdmi_dt_match[] = { {} }; -static const struct dev_pm_ops vc4_hdmi_pm_ops = { - SET_RUNTIME_PM_OPS(vc4_hdmi_runtime_suspend, - vc4_hdmi_runtime_resume, - NULL) -}; - struct platform_driver vc4_hdmi_driver = { .probe = vc4_hdmi_dev_probe, .remove = vc4_hdmi_dev_remove, .driver = { .name = "vc4_hdmi", .of_match_table = vc4_hdmi_dt_match, - .pm = &vc4_hdmi_pm_ops, }, }; From 31ad37bd6faf871c070650f72ac9488ceeeceeb0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Sep 2021 10:11:53 -0700 Subject: [PATCH 370/543] Revert "drm/vc4: hdmi: Remove drm_encoder->crtc usage" This reverts commit 27da370e0fb343a0baf308f503bb3e5dcdfe3362. Sudip Mukherjee reports that this broke pulseaudio with a NULL pointer dereference in vc4_hdmi_audio_prepare(), bisected it to this commit, and confirmed that a revert fixed the problem. Revert the problematic commit until fixed. Link: https://lore.kernel.org/all/CADVatmPB9-oKd=ypvj25UYysVo6EZhQ6bCM7EvztQBMyiZfAyw@mail.gmail.com/ Link: https://lore.kernel.org/all/CADVatmN5EpRshGEPS_JozbFQRXg5w_8LFB3OMP1Ai-ghxd3w4g@mail.gmail.com/ Reported-and-tested-by: Sudip Mukherjee Cc: Maxime Ripard Cc: Emma Anholt Cc: Dave Airlie Signed-off-by: Linus Torvalds --- drivers/gpu/drm/vc4/vc4_hdmi.c | 40 +++++++++++----------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 1acf64222aa3..b4b4653fe301 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -432,7 +432,7 @@ static void vc4_hdmi_set_avi_infoframe(struct drm_encoder *encoder) struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); struct drm_connector *connector = &vc4_hdmi->connector; struct drm_connector_state *cstate = connector->state; - struct drm_crtc *crtc = cstate->crtc; + struct drm_crtc *crtc = encoder->crtc; const struct drm_display_mode *mode = &crtc->state->adjusted_mode; union hdmi_infoframe frame; int ret; @@ -537,11 +537,8 @@ static bool vc4_hdmi_supports_scrambling(struct drm_encoder *encoder, static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder) { + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); - struct drm_connector *connector = &vc4_hdmi->connector; - struct drm_connector_state *cstate = connector->state; - struct drm_crtc *crtc = cstate->crtc; - struct drm_display_mode *mode = &crtc->state->adjusted_mode; if (!vc4_hdmi_supports_scrambling(encoder, mode)) return; @@ -562,18 +559,17 @@ static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder) static void vc4_hdmi_disable_scrambling(struct drm_encoder *encoder) { struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); - struct drm_connector *connector = &vc4_hdmi->connector; - struct drm_connector_state *cstate = connector->state; + struct drm_crtc *crtc = encoder->crtc; /* - * At boot, connector->state will be NULL. Since we don't know the + * At boot, encoder->crtc will be NULL. Since we don't know the * state of the scrambler and in order to avoid any * inconsistency, let's disable it all the time. */ - if (cstate && !vc4_hdmi_supports_scrambling(encoder, &cstate->crtc->mode)) + if (crtc && !vc4_hdmi_supports_scrambling(encoder, &crtc->mode)) return; - if (cstate && !vc4_hdmi_mode_needs_scrambling(&cstate->crtc->mode)) + if (crtc && !vc4_hdmi_mode_needs_scrambling(&crtc->mode)) return; if (delayed_work_pending(&vc4_hdmi->scrambling_work)) @@ -895,9 +891,7 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, vc4_hdmi_encoder_get_connector_state(encoder, state); struct vc4_hdmi_connector_state *vc4_conn_state = conn_state_to_vc4_hdmi_conn_state(conn_state); - struct drm_crtc_state *crtc_state = - drm_atomic_get_new_crtc_state(state, conn_state->crtc); - struct drm_display_mode *mode = &crtc_state->adjusted_mode; + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); unsigned long bvb_rate, pixel_rate, hsm_rate; int ret; @@ -991,11 +985,7 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, static void vc4_hdmi_encoder_pre_crtc_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { - struct drm_connector_state *conn_state = - vc4_hdmi_encoder_get_connector_state(encoder, state); - struct drm_crtc_state *crtc_state = - drm_atomic_get_new_crtc_state(state, conn_state->crtc); - struct drm_display_mode *mode = &crtc_state->adjusted_mode; + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); @@ -1018,11 +1008,7 @@ static void vc4_hdmi_encoder_pre_crtc_enable(struct drm_encoder *encoder, static void vc4_hdmi_encoder_post_crtc_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { - struct drm_connector_state *conn_state = - vc4_hdmi_encoder_get_connector_state(encoder, state); - struct drm_crtc_state *crtc_state = - drm_atomic_get_new_crtc_state(state, conn_state->crtc); - struct drm_display_mode *mode = &crtc_state->adjusted_mode; + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); bool hsync_pos = mode->flags & DRM_MODE_FLAG_PHSYNC; @@ -1210,8 +1196,8 @@ static void vc4_hdmi_audio_set_mai_clock(struct vc4_hdmi *vc4_hdmi, static void vc4_hdmi_set_n_cts(struct vc4_hdmi *vc4_hdmi, unsigned int samplerate) { - struct drm_connector *connector = &vc4_hdmi->connector; - struct drm_crtc *crtc = connector->state->crtc; + struct drm_encoder *encoder = &vc4_hdmi->encoder.base.base; + struct drm_crtc *crtc = encoder->crtc; const struct drm_display_mode *mode = &crtc->state->adjusted_mode; u32 n, cts; u64 tmp; @@ -1244,13 +1230,13 @@ static inline struct vc4_hdmi *dai_to_hdmi(struct snd_soc_dai *dai) static int vc4_hdmi_audio_startup(struct device *dev, void *data) { struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); - struct drm_connector *connector = &vc4_hdmi->connector; + struct drm_encoder *encoder = &vc4_hdmi->encoder.base.base; /* * If the HDMI encoder hasn't probed, or the encoder is * currently in DVI mode, treat the codec dai as missing. */ - if (!connector->state || !(HDMI_READ(HDMI_RAM_PACKET_CONFIG) & + if (!encoder->crtc || !(HDMI_READ(HDMI_RAM_PACKET_CONFIG) & VC4_HDMI_RAM_PACKET_ENABLE)) return -ENODEV; From 9caea0007601d3bc6debec04f8b4cd6f4c2394be Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 19 Sep 2021 10:36:09 -0700 Subject: [PATCH 371/543] parisc: Declare pci_iounmap() parisc version only when CONFIG_PCI enabled Linus noticed odd declaration rules for pci_iounmap() in iomap.h and pci_iomap.h, where it dependend on either NO_GENERIC_PCI_IOPORT_MAP or GENERIC_IOMAP when CONFIG_PCI was disabled. Testing on parisc seems to indicate that we need pci_iounmap() only when CONFIG_PCI is enabled, so the declaration of pci_iounmap() can be moved cleanly into pci_iomap.h in sync with the declarations of pci_iomap(). Link: https://lore.kernel.org/all/CAHk-=wjRrh98pZoQ+AzfWmsTZacWxTJKXZ9eKU2X_0+jM=O8nw@mail.gmail.com/ Signed-off-by: Helge Deller Suggested-by: Linus Torvalds Fixes: 97a29d59fc22 ("[PARISC] fix compile break caused by iomap: make IOPORT/PCI mapping functions conditional") Cc: Arnd Bergmann Cc: Guenter Roeck Cc: Ulrich Teichert Cc: James Bottomley Signed-off-by: Linus Torvalds --- arch/parisc/lib/iomap.c | 4 +++- include/asm-generic/iomap.h | 10 ---------- include/asm-generic/pci_iomap.h | 3 +++ 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c index f03adb1999e7..367f6397bda7 100644 --- a/arch/parisc/lib/iomap.c +++ b/arch/parisc/lib/iomap.c @@ -513,12 +513,15 @@ void ioport_unmap(void __iomem *addr) } } +#ifdef CONFIG_PCI void pci_iounmap(struct pci_dev *dev, void __iomem * addr) { if (!INDIRECT_ADDR(addr)) { iounmap(addr); } } +EXPORT_SYMBOL(pci_iounmap); +#endif EXPORT_SYMBOL(ioread8); EXPORT_SYMBOL(ioread16); @@ -544,4 +547,3 @@ EXPORT_SYMBOL(iowrite16_rep); EXPORT_SYMBOL(iowrite32_rep); EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); -EXPORT_SYMBOL(pci_iounmap); diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index 9b3eb6d86200..08237ae8b840 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -110,16 +110,6 @@ static inline void __iomem *ioremap_np(phys_addr_t offset, size_t size) } #endif -#ifdef CONFIG_PCI -/* Destroy a virtual mapping cookie for a PCI BAR (memory or IO) */ -struct pci_dev; -extern void pci_iounmap(struct pci_dev *dev, void __iomem *); -#elif defined(CONFIG_GENERIC_IOMAP) -struct pci_dev; -static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) -{ } -#endif - #include #endif diff --git a/include/asm-generic/pci_iomap.h b/include/asm-generic/pci_iomap.h index df636c6d8e6c..5a2f9bf53384 100644 --- a/include/asm-generic/pci_iomap.h +++ b/include/asm-generic/pci_iomap.h @@ -18,6 +18,7 @@ extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar, extern void __iomem *pci_iomap_wc_range(struct pci_dev *dev, int bar, unsigned long offset, unsigned long maxlen); +extern void pci_iounmap(struct pci_dev *dev, void __iomem *); /* Create a virtual mapping cookie for a port on a given PCI device. * Do not call this directly, it exists to make it easier for architectures * to override */ @@ -50,6 +51,8 @@ static inline void __iomem *pci_iomap_wc_range(struct pci_dev *dev, int bar, { return NULL; } +static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ } #endif #endif /* __ASM_GENERIC_PCI_IOMAP_H */ From 4fef6115903afed5a7f21b387ad132b4c8838bc7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Sep 2021 10:37:00 -0700 Subject: [PATCH 372/543] alpha: enable GENERIC_PCI_IOMAP unconditionally With the previous commit (9caea0007601: "parisc: Declare pci_iounmap() parisc version only when CONFIG_PCI enabled") we can now enable GENERIC_PCI_IOMAP unconditionally on alpha, and if PCI is not enabled we will just get the nice empty helper functions that allow mixed-bus drivers to build. Example driver: the old 3com/3c59x.c driver works with either the PCI or the EISA version of the 3x59x card, but wouldn't build in an EISA-only configuration because of missing pci_iomap() and pci_iounmap() dummy wrappers. Most of the other PCI infrastructure just becomes empty wrappers even without GENERIC_PCI_IOMAP, and it's not obvious that the pci_iomap functionality shouldn't do the same, but this works. Cc: Ulrich Teichert Signed-off-by: Linus Torvalds --- arch/alpha/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 11e99358aa58..4e87783c90ad 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -20,7 +20,7 @@ config ALPHA select NEED_SG_DMA_LENGTH select VIRT_TO_BUS select GENERIC_IRQ_PROBE - select GENERIC_PCI_IOMAP if PCI + select GENERIC_PCI_IOMAP select AUTO_IRQ_AFFINITY if SMP select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION From bc1abb9e55cedaeac4602426f8fc83fb3a5b1c35 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Sep 2021 10:49:42 -0700 Subject: [PATCH 373/543] dmascc: use proper 'virt_to_bus()' rather than casting to 'int' The old dmascc driver depends on the legacy ISA_DMA_API, and blindly just casts the kernel virtual address to 'int' for set_dma_addr(). That works only incidentally, and because the high bits of the address will be ignored anyway. And on 64-bit architectures it causes warnings. Admittedly, 64-bit architectures with ISA are basically dead - I think the only example of this is alpha, and nobody would ever use the dmascc driver there. But hey, the fix is easy enough, the end result is cleaner, and it's yet another configuration that now builds without warnings. If somebody actually uses this driver on an alpha and this fixes it for you, please email me. Because that is just incredibly bizarre. Signed-off-by: Linus Torvalds --- drivers/net/hamradio/dmascc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c index b50b7fafd8d6..f4c3efc3e074 100644 --- a/drivers/net/hamradio/dmascc.c +++ b/drivers/net/hamradio/dmascc.c @@ -973,7 +973,7 @@ static inline void tx_on(struct scc_priv *priv) flags = claim_dma_lock(); set_dma_mode(priv->param.dma, DMA_MODE_WRITE); set_dma_addr(priv->param.dma, - (int) priv->tx_buf[priv->tx_tail] + n); + virt_to_bus(priv->tx_buf[priv->tx_tail]) + n); set_dma_count(priv->param.dma, priv->tx_len[priv->tx_tail] - n); release_dma_lock(flags); @@ -1020,7 +1020,7 @@ static inline void rx_on(struct scc_priv *priv) flags = claim_dma_lock(); set_dma_mode(priv->param.dma, DMA_MODE_READ); set_dma_addr(priv->param.dma, - (int) priv->rx_buf[priv->rx_head]); + virt_to_bus(priv->rx_buf[priv->rx_head])); set_dma_count(priv->param.dma, BUF_SIZE); release_dma_lock(flags); enable_dma(priv->param.dma); @@ -1233,7 +1233,7 @@ static void special_condition(struct scc_priv *priv, int rc) if (priv->param.dma >= 0) { flags = claim_dma_lock(); set_dma_addr(priv->param.dma, - (int) priv->rx_buf[priv->rx_head]); + virt_to_bus(priv->rx_buf[priv->rx_head])); set_dma_count(priv->param.dma, BUF_SIZE); release_dma_lock(flags); } else { From 316e8d79a0959c302b0c462ab64b069599f10eef Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Sep 2021 17:13:35 -0700 Subject: [PATCH 374/543] pci_iounmap'2: Electric Boogaloo: try to make sense of it all Nathan Chancellor reports that the recent change to pci_iounmap in commit 9caea0007601 ("parisc: Declare pci_iounmap() parisc version only when CONFIG_PCI enabled") causes build errors on arm64. It took me about two hours to convince myself that I think I know what the logic of that mess of #ifdef's in the header file really aim to do, and rewrite it to be easier to follow. Famous last words. Anyway, the code has now been lifted from that grotty header file into lib/pci_iomap.c, and has fairly extensive comments about what the logic is. It also avoids indirecting through another confusing (and badly named) helper function that has other preprocessor config conditionals. Let's see what odd architecture did something else strange in this area to break things. But my arm64 cross build is clean. Fixes: 9caea0007601 ("parisc: Declare pci_iounmap() parisc version only when CONFIG_PCI enabled") Reported-by: Nathan Chancellor Cc: Helge Deller Cc: Arnd Bergmann Cc: Guenter Roeck Cc: Ulrich Teichert Cc: James Bottomley Signed-off-by: Linus Torvalds --- include/asm-generic/io.h | 26 +++--------------------- lib/pci_iomap.c | 43 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index e93375c710b9..cc7338f9e0d1 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -1023,16 +1023,7 @@ static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) port &= IO_SPACE_LIMIT; return (port > MMIO_UPPER_LIMIT) ? NULL : PCI_IOBASE + port; } -#define __pci_ioport_unmap __pci_ioport_unmap -static inline void __pci_ioport_unmap(void __iomem *p) -{ - uintptr_t start = (uintptr_t) PCI_IOBASE; - uintptr_t addr = (uintptr_t) p; - - if (addr >= start && addr < start + IO_SPACE_LIMIT) - return; - iounmap(p); -} +#define ARCH_HAS_GENERIC_IOPORT_MAP #endif #ifndef ioport_unmap @@ -1048,21 +1039,10 @@ extern void ioport_unmap(void __iomem *p); #endif /* CONFIG_HAS_IOPORT_MAP */ #ifndef CONFIG_GENERIC_IOMAP -struct pci_dev; -extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); - -#ifndef __pci_ioport_unmap -static inline void __pci_ioport_unmap(void __iomem *p) {} -#endif - #ifndef pci_iounmap -#define pci_iounmap pci_iounmap -static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p) -{ - __pci_ioport_unmap(p); -} +#define ARCH_WANTS_GENERIC_PCI_IOUNMAP +#endif #endif -#endif /* CONFIG_GENERIC_IOMAP */ #ifndef xlate_dev_mem_ptr #define xlate_dev_mem_ptr xlate_dev_mem_ptr diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c index 2d3eb1cb73b8..ce39ce9f3526 100644 --- a/lib/pci_iomap.c +++ b/lib/pci_iomap.c @@ -134,4 +134,47 @@ void __iomem *pci_iomap_wc(struct pci_dev *dev, int bar, unsigned long maxlen) return pci_iomap_wc_range(dev, bar, 0, maxlen); } EXPORT_SYMBOL_GPL(pci_iomap_wc); + +/* + * pci_iounmap() somewhat illogically comes from lib/iomap.c for the + * CONFIG_GENERIC_IOMAP case, because that's the code that knows about + * the different IOMAP ranges. + * + * But if the architecture does not use the generic iomap code, and if + * it has _not_ defined it's own private pci_iounmap function, we define + * it here. + * + * NOTE! This default implementation assumes that if the architecture + * support ioport mapping (HAS_IOPORT_MAP), the ioport mapping will + * be fixed to the range [ PCI_IOBASE, PCI_IOBASE+IO_SPACE_LIMIT [, + * and does not need unmapping with 'ioport_unmap()'. + * + * If you have different rules for your architecture, you need to + * implement your own pci_iounmap() that knows the rules for where + * and how IO vs MEM get mapped. + * + * This code is odd, and the ARCH_HAS/ARCH_WANTS #define logic comes + * from legacy header file behavior. In particular, + * it would seem to make sense to do the iounmap(p) for the non-IO-space + * case here regardless, but that's not what the old header file code + * did. Probably incorrectly, but this is meant to be bug-for-bug + * compatible. + */ +#if defined(ARCH_WANTS_GENERIC_PCI_IOUNMAP) + +void pci_iounmap(struct pci_dev *dev, void __iomem *p) +{ +#ifdef ARCH_HAS_GENERIC_IOPORT_MAP + uintptr_t start = (uintptr_t) PCI_IOBASE; + uintptr_t addr = (uintptr_t) p; + + if (addr >= start && addr < start + IO_SPACE_LIMIT) + return; + iounmap(p); +#endif +} +EXPORT_SYMBOL(pci_iounmap); + +#endif /* ARCH_WANTS_GENERIC_PCI_IOUNMAP */ + #endif /* CONFIG_PCI */ From e4e737bb5c170df6135a127739a9e6148ee3da82 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Sep 2021 17:28:22 -0700 Subject: [PATCH 375/543] Linux 5.15-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 34a0afc3a8eb..5e7c1d854441 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Opossums on Parade # *DOCUMENTATION* From b51593c4cd739dff7fc40bbed368572d98b19ae8 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 17 Sep 2021 09:13:23 -0400 Subject: [PATCH 376/543] init/do_mounts.c: Harden split_fs_names() against buffer overflow split_fs_names() currently takes comma separate list of filesystems and converts it into individual filesystem strings. Pleaces these strings in the input buffer passed by caller and returns number of strings. If caller manages to pass input string bigger than buffer, then we can write beyond the buffer. Or if string just fits buffer, we will still write beyond the buffer as we append a '\0' byte at the end. Pass size of input buffer to split_fs_names() and put enough checks in place so such buffer overrun possibilities do not occur. This patch does few things. - Add a parameter "size" to split_fs_names(). This specifies size of input buffer. - Use strlcpy() (instead of strcpy()) so that we can't go beyond buffer size. If input string "names" is larger than passed in buffer, input string will be truncated to fit in buffer. - Stop appending extra '\0' character at the end and avoid one possibility of going beyond the input buffer size. - Do not use extra loop to count number of strings. - Previously if one passed "rootfstype=foo,,bar", split_fs_names() will return only 1 string "foo" (and "bar" will be truncated due to extra ,). After this patch, now split_fs_names() will return 3 strings ("foo", zero-sized-string, and "bar"). Callers of split_fs_names() have been modified to check for zero sized string and skip to next one. Reported-by: xu xin Signed-off-by: Vivek Goyal Reviewed-by: Jan Kara Signed-off-by: Al Viro --- init/do_mounts.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/init/do_mounts.c b/init/do_mounts.c index 2ed30ff6c906..9207bde9ca3f 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -338,20 +338,19 @@ __setup("rootflags=", root_data_setup); __setup("rootfstype=", fs_names_setup); __setup("rootdelay=", root_delay_setup); -static int __init split_fs_names(char *page, char *names) +/* This can return zero length strings. Caller should check */ +static int __init split_fs_names(char *page, size_t size, char *names) { - int count = 0; + int count = 1; char *p = page; - strcpy(p, root_fs_names); + strlcpy(p, root_fs_names, size); while (*p++) { - if (p[-1] == ',') + if (p[-1] == ',') { p[-1] = '\0'; + count++; + } } - *p = '\0'; - - for (p = page; *p; p += strlen(p)+1) - count++; return count; } @@ -404,12 +403,16 @@ void __init mount_block_root(char *name, int flags) scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)", MAJOR(ROOT_DEV), MINOR(ROOT_DEV)); if (root_fs_names) - num_fs = split_fs_names(fs_names, root_fs_names); + num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names); else num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE); retry: for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1) { - int err = do_mount_root(name, p, flags, root_mount_data); + int err; + + if (!*p) + continue; + err = do_mount_root(name, p, flags, root_mount_data); switch (err) { case 0: goto out; @@ -543,10 +546,12 @@ static int __init mount_nodev_root(void) fs_names = (void *)__get_free_page(GFP_KERNEL); if (!fs_names) return -EINVAL; - num_fs = split_fs_names(fs_names, root_fs_names); + num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names); for (i = 0, fstype = fs_names; i < num_fs; i++, fstype += strlen(fstype) + 1) { + if (!*fstype) + continue; if (!fs_is_nodev(fstype)) continue; err = do_mount_root(root_device_name, fstype, root_mountflags, From 40c8ee67cfc49d00a13ccbf542e307b6b5421ad3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 14 Sep 2021 12:12:10 +0300 Subject: [PATCH 377/543] init: don't panic if mount_nodev_root failed Attempt to mount 9p file system as root gives the following kernel panic: 9pnet_virtio: no channels available for device root Kernel panic - not syncing: VFS: Unable to mount root "root" (9p), err=-2 CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.15.0-rc1+ #127 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x45/0x59 panic+0x1e2/0x44b ? __warn_printk+0xf3/0xf3 ? free_unref_page+0x2d4/0x4a0 ? trace_hardirqs_on+0x32/0x120 ? free_unref_page+0x2d4/0x4a0 mount_root+0x189/0x1e0 prepare_namespace+0x136/0x165 kernel_init_freeable+0x3b8/0x3cb ? rest_init+0x2e0/0x2e0 kernel_init+0x19/0x130 ret_from_fork+0x1f/0x30 Kernel Offset: disabled ---[ end Kernel panic - not syncing: VFS: Unable to mount root "root" (9p), err=-2 ]--- QEMU command line: "qemu-system-x86_64 -append root=/dev/root rw rootfstype=9p rootflags=trans=virtio ..." This error is because root_device_name is truncated in prepare_namespace() from being "/dev/root" to be "root" prior to call to mount_nodev_root(). As a solution, don't treat errors in mount_nodev_root() as errors that require panics and allow failback to the mount flow that existed before patch citied in Fixes tag. Fixes: f9259be6a9e7 ("init: allow mounting arbitrary non-blockdevice filesystems as root") Signed-off-by: Leon Romanovsky Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- init/do_mounts.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/init/do_mounts.c b/init/do_mounts.c index 9207bde9ca3f..762b534978d9 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -558,9 +558,6 @@ static int __init mount_nodev_root(void) root_mount_data); if (!err) break; - if (err != -EACCES && err != -EINVAL) - panic("VFS: Unable to mount root \"%s\" (%s), err=%d\n", - root_device_name, fstype, err); } free_page((unsigned long)fs_names); From 3765996e4f0b8a755cab215a08df744490c76052 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Sat, 18 Sep 2021 16:52:32 +0800 Subject: [PATCH 378/543] napi: fix race inside napi_enable The process will cause napi.state to contain NAPI_STATE_SCHED and not in the poll_list, which will cause napi_disable() to get stuck. The prefix "NAPI_STATE_" is removed in the figure below, and NAPI_STATE_HASHED is ignored in napi.state. CPU0 | CPU1 | napi.state =============================================================================== napi_disable() | | SCHED | NPSVC napi_enable() | | { | | smp_mb__before_atomic(); | | clear_bit(SCHED, &n->state); | | NPSVC | napi_schedule_prep() | SCHED | NPSVC | napi_poll() | | napi_complete_done() | | { | | if (n->state & (NPSVC | | (1) | _BUSY_POLL))) | | return false; | | ................ | | } | SCHED | NPSVC | | clear_bit(NPSVC, &n->state); | | SCHED } | | | | napi_schedule_prep() | | SCHED | MISSED (2) (1) Here return direct. Because of NAPI_STATE_NPSVC exists. (2) NAPI_STATE_SCHED exists. So not add napi.poll_list to sd->poll_list Since NAPI_STATE_SCHED already exists and napi is not in the sd->poll_list queue, NAPI_STATE_SCHED cannot be cleared and will always exist. 1. This will cause this queue to no longer receive packets. 2. If you encounter napi_disable under the protection of rtnl_lock, it will cause the entire rtnl_lock to be locked, affecting the overall system. This patch uses cmpxchg to implement napi_enable(), which ensures that there will be no race due to the separation of clear two bits. Fixes: 2d8bff12699abc ("netpoll: Close race condition between poll_one_napi and napi_disable") Signed-off-by: Xuan Zhuo Reviewed-by: Dust Li Signed-off-by: David S. Miller --- net/core/dev.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 74fd402d26dd..7ee9fecd3aff 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6923,12 +6923,16 @@ EXPORT_SYMBOL(napi_disable); */ void napi_enable(struct napi_struct *n) { - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - smp_mb__before_atomic(); - clear_bit(NAPI_STATE_SCHED, &n->state); - clear_bit(NAPI_STATE_NPSVC, &n->state); - if (n->dev->threaded && n->thread) - set_bit(NAPI_STATE_THREADED, &n->state); + unsigned long val, new; + + do { + val = READ_ONCE(n->state); + BUG_ON(!test_bit(NAPI_STATE_SCHED, &val)); + + new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC); + if (n->dev->threaded && n->thread) + new |= NAPIF_STATE_THREADED; + } while (cmpxchg(&n->state, val, new) != val); } EXPORT_SYMBOL(napi_enable); From 563f23b002534176f49524b5ca0e1d94d8906c40 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 17 Sep 2021 16:02:18 +0300 Subject: [PATCH 379/543] nexthop: Fix division by zero while replacing a resilient group The resilient nexthop group torture tests in fib_nexthop.sh exposed a possible division by zero while replacing a resilient group [1]. The division by zero occurs when the data path sees a resilient nexthop group with zero buckets. The tests replace a resilient nexthop group in a loop while traffic is forwarded through it. The tests do not specify the number of buckets while performing the replacement, resulting in the kernel allocating a stub resilient table (i.e, 'struct nh_res_table') with zero buckets. This table should never be visible to the data path, but the old nexthop group (i.e., 'oldg') might still be used by the data path when the stub table is assigned to it. Fix this by only assigning the stub table to the old nexthop group after making sure the group is no longer used by the data path. Tested with fib_nexthops.sh: Tests passed: 222 Tests failed: 0 [1] divide error: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 1850 Comm: ping Not tainted 5.14.0-custom-10271-ga86eb53057fe #1107 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-4.fc34 04/01/2014 RIP: 0010:nexthop_select_path+0x2d2/0x1a80 [...] Call Trace: fib_select_multipath+0x79b/0x1530 fib_select_path+0x8fb/0x1c10 ip_route_output_key_hash_rcu+0x1198/0x2da0 ip_route_output_key_hash+0x190/0x340 ip_route_output_flow+0x21/0x120 raw_sendmsg+0x91d/0x2e10 inet_sendmsg+0x9e/0xe0 __sys_sendto+0x23d/0x360 __x64_sys_sendto+0xe1/0x1b0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Cc: stable@vger.kernel.org Fixes: 283a72a5599e ("nexthop: Add implementation of resilient next-hop groups") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/nexthop.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 75ca4b6e484f..0e75fd3e57b4 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1982,6 +1982,8 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old, rcu_assign_pointer(old->nh_grp, newg); if (newg->resilient) { + /* Make sure concurrent readers are not using 'oldg' anymore. */ + synchronize_net(); rcu_assign_pointer(oldg->res_table, tmp_table); rcu_assign_pointer(oldg->spare->res_table, tmp_table); } From 5bed8b0704c9ecccc8f4a2c377d7c8e21090a82e Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 20 Sep 2021 02:51:52 -0400 Subject: [PATCH 380/543] bnxt_en: Fix TX timeout when TX ring size is set to the smallest The smallest TX ring size we support must fit a TX SKB with MAX_SKB_FRAGS + 1. Because the first TX BD for a packet is always a long TX BD, we need an extra TX BD to fit this packet. Define BNXT_MIN_TX_DESC_CNT with this value to make this more clear. The current code uses a minimum that is off by 1. Fix it using this constant. The tx_wake_thresh to determine when to wake up the TX queue is half the ring size but we must have at least BNXT_MIN_TX_DESC_CNT for the next packet which may have maximum fragments. So the comparison of the available TX BDs with tx_wake_thresh should be >= instead of > in the current code. Otherwise, at the smallest ring size, we will never wake up the TX queue and will cause TX timeout. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++---- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 5 +++++ drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 037767b370d5..62f84cc91e4d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -391,7 +391,7 @@ static bool bnxt_txr_netif_try_stop_queue(struct bnxt *bp, * netif_tx_queue_stopped(). */ smp_mb(); - if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh) { + if (bnxt_tx_avail(bp, txr) >= bp->tx_wake_thresh) { netif_tx_wake_queue(txq); return false; } @@ -764,7 +764,7 @@ next_tx_int: smp_mb(); if (unlikely(netif_tx_queue_stopped(txq)) && - bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh && + bnxt_tx_avail(bp, txr) >= bp->tx_wake_thresh && READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING) netif_tx_wake_queue(txq); } @@ -2416,7 +2416,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) { tx_pkts++; /* return full budget so NAPI will complete. */ - if (unlikely(tx_pkts > bp->tx_wake_thresh)) { + if (unlikely(tx_pkts >= bp->tx_wake_thresh)) { rx_pkts = budget; raw_cons = NEXT_RAW_CMP(raw_cons); if (budget) @@ -3640,7 +3640,7 @@ static int bnxt_init_tx_rings(struct bnxt *bp) u16 i; bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2, - MAX_SKB_FRAGS + 1); + BNXT_MIN_TX_DESC_CNT); for (i = 0; i < bp->tx_nr_rings; i++) { struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index ec046e7a2484..19fe6478e9b4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -629,6 +629,11 @@ struct nqe_cn { #define BNXT_MAX_RX_JUM_DESC_CNT (RX_DESC_CNT * MAX_RX_AGG_PAGES - 1) #define BNXT_MAX_TX_DESC_CNT (TX_DESC_CNT * MAX_TX_PAGES - 1) +/* Minimum TX BDs for a TX packet with MAX_SKB_FRAGS + 1. We need one extra + * BD because the first TX BD is always a long BD. + */ +#define BNXT_MIN_TX_DESC_CNT (MAX_SKB_FRAGS + 2) + #define RX_RING(x) (((x) & ~(RX_DESC_CNT - 1)) >> (BNXT_PAGE_SHIFT - 4)) #define RX_IDX(x) ((x) & (RX_DESC_CNT - 1)) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index b056e3c29bbd..7260910e75fb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -798,7 +798,7 @@ static int bnxt_set_ringparam(struct net_device *dev, if ((ering->rx_pending > BNXT_MAX_RX_DESC_CNT) || (ering->tx_pending > BNXT_MAX_TX_DESC_CNT) || - (ering->tx_pending <= MAX_SKB_FRAGS)) + (ering->tx_pending < BNXT_MIN_TX_DESC_CNT)) return -EINVAL; if (netif_running(dev)) From 211f323768a25b30c106fd38f15a0f62c7c2b5f4 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 17 Sep 2021 11:18:47 +0200 Subject: [PATCH 381/543] USB: serial: mos7840: remove duplicated 0xac24 device ID 0xac24 device ID is already defined and used via BANDB_DEVICE_ID_USO9ML2_4. Remove the duplicate from the list. Fixes: 27f1281d5f72 ("USB: serial: Extra device/vendor ID for mos7840 driver") Signed-off-by: Krzysztof Kozlowski Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/mos7840.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index d7fe33ca73e4..925067a7978d 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -107,7 +107,6 @@ #define BANDB_DEVICE_ID_USOPTL4_2P 0xBC02 #define BANDB_DEVICE_ID_USOPTL4_4 0xAC44 #define BANDB_DEVICE_ID_USOPTL4_4P 0xBC03 -#define BANDB_DEVICE_ID_USOPTL2_4 0xAC24 /* Interrupt Routine Defines */ @@ -186,7 +185,6 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2P) }, { USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4) }, { USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4P) }, - { USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL2_4) }, {} /* terminating entry */ }; MODULE_DEVICE_TABLE(usb, id_table); From 1ca200a8c6f079950a04ea3c3380fe8cf78e95a2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 17 Sep 2021 11:18:48 +0200 Subject: [PATCH 382/543] USB: serial: option: remove duplicate USB device ID The device ZTE 0x0094 is already on the list. Signed-off-by: Krzysztof Kozlowski Fixes: b9e44fe5ecda ("USB: option: cleanup zte 3g-dongle's pid in option.c") Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index a79f51e35115..02a35f26ee82 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1658,7 +1658,6 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0060, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0070, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0073, 0xff, 0xff, 0xff) }, - { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0094, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0130, 0xff, 0xff, 0xff), .driver_info = RSVD(1) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0133, 0xff, 0xff, 0xff), From 42a99a0be307562c1bfef32bad8f89aa3c428edd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 20 Sep 2021 11:57:49 +0200 Subject: [PATCH 383/543] ptp: ocp: add COMMON_CLK dependency Without CONFIG_COMMON_CLK, this fails to link: arm-linux-gnueabi-ld: drivers/ptp/ptp_ocp.o: in function `ptp_ocp_register_i2c': ptp_ocp.c:(.text+0xcc0): undefined reference to `__clk_hw_register_fixed_rate' arm-linux-gnueabi-ld: ptp_ocp.c:(.text+0xcf4): undefined reference to `devm_clk_hw_register_clkdev' arm-linux-gnueabi-ld: drivers/ptp/ptp_ocp.o: in function `ptp_ocp_detach': ptp_ocp.c:(.text+0x1c24): undefined reference to `clk_hw_unregister_fixed_rate' Fixes: a7e1abad13f3 ("ptp: Add clock driver for the OpenCompute TimeCard.") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/ptp/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index f02bedf41264..458218f88c5e 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -174,6 +174,7 @@ config PTP_1588_CLOCK_OCP depends on I2C && MTD depends on SERIAL_8250 depends on !S390 + depends on COMMON_CLK select NET_DEVLINK help This driver adds support for an OpenCompute time card. From aa3233ea7bdb6c4004f5032a3a07417ea51dc409 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 20 Sep 2021 11:55:09 +0200 Subject: [PATCH 384/543] staging: r8188eu: fix -Wrestrict warnings Adding back the nonstandard ioctl commands caused -Wrestrict warnings when building with 'make W=1': drivers/staging/r8188eu/os_dep/ioctl_linux.c: In function 'rtw_mp_read_rf': drivers/staging/r8188eu/os_dep/ioctl_linux.c:5515:27: error: 'sprintf' argument 3 overlaps destination object 'extra' [-Werror=restrict] 5515 | sprintf(extra, "%s %d", extra, strtou); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/staging/r8188eu/os_dep/ioctl_linux.c:5470:54: note: destination object referenced by 'restrict'-qualified argument 1 was declared here 5470 | struct iw_point *wrqu, char *extra) | ~~~~~~^~~~~ Change these to the same construct used elsewhere in that driver, with an offset to the string to make the warning go away. The ioctl commands were previously removed, and it's unlikely that anything is actually using them, so ideally I would prefer to have them removed again. The lack of range checking of the 'extra' output buffer is also slightly worrying, but I did not check whether this could cause harm. Fixes: 2b42bd58b321 ("staging: r8188eu: introduce new os_dep dir for RTL8188eu driver") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210920095525.1150678-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/os_dep/ioctl_linux.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c index 81d4255d1785..1fd375076001 100644 --- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c @@ -5372,8 +5372,8 @@ static int rtw_mp_read_reg(struct net_device *dev, pnext++; if (*pnext != '\0') { - strtout = simple_strtoul(pnext, &ptmp, 16); - sprintf(extra, "%s %d", extra, strtout); + strtout = simple_strtoul(pnext, &ptmp, 16); + sprintf(extra + strlen(extra), " %d", strtout); } else { break; } @@ -5405,7 +5405,7 @@ static int rtw_mp_read_reg(struct net_device *dev, pnext++; if (*pnext != '\0') { strtout = simple_strtoul(pnext, &ptmp, 16); - sprintf(extra, "%s %d", extra, strtout); + sprintf(extra + strlen(extra), " %d", strtout); } else { break; } @@ -5512,7 +5512,7 @@ static int rtw_mp_read_rf(struct net_device *dev, pnext++; if (*pnext != '\0') { strtou = simple_strtoul(pnext, &ptmp, 16); - sprintf(extra, "%s %d", extra, strtou); + sprintf(extra + strlen(extra), " %d", strtou); } else { break; } From e184cec5e29d8eb3c3435b12a9074b75e2d69e4a Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 15 Sep 2021 21:52:06 +0800 Subject: [PATCH 385/543] net: hns3: fix change RSS 'hfunc' ineffective issue When user change rss 'hfunc' without set rss 'hkey' by ethtool -X command, the driver will ignore the 'hfunc' for the hkey is NULL. It's unreasonable. So fix it. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Fixes: 374ad291762a ("net: hns3: Add RSS general configuration support for VF") Signed-off-by: Jian Shen Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 45 ++++++++++------ .../hisilicon/hns3/hns3vf/hclgevf_main.c | 52 ++++++++++++------- 2 files changed, 64 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index f1e46ba799f9..36c8741445e8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -4741,6 +4741,24 @@ static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir, return 0; } +static int hclge_parse_rss_hfunc(struct hclge_vport *vport, const u8 hfunc, + u8 *hash_algo) +{ + switch (hfunc) { + case ETH_RSS_HASH_TOP: + *hash_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ; + return 0; + case ETH_RSS_HASH_XOR: + *hash_algo = HCLGE_RSS_HASH_ALGO_SIMPLE; + return 0; + case ETH_RSS_HASH_NO_CHANGE: + *hash_algo = vport->rss_algo; + return 0; + default: + return -EINVAL; + } +} + static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir, const u8 *key, const u8 hfunc) { @@ -4750,30 +4768,27 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir, u8 hash_algo; int ret, i; + ret = hclge_parse_rss_hfunc(vport, hfunc, &hash_algo); + if (ret) { + dev_err(&hdev->pdev->dev, "invalid hfunc type %u\n", hfunc); + return ret; + } + /* Set the RSS Hash Key if specififed by the user */ if (key) { - switch (hfunc) { - case ETH_RSS_HASH_TOP: - hash_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ; - break; - case ETH_RSS_HASH_XOR: - hash_algo = HCLGE_RSS_HASH_ALGO_SIMPLE; - break; - case ETH_RSS_HASH_NO_CHANGE: - hash_algo = vport->rss_algo; - break; - default: - return -EINVAL; - } - ret = hclge_set_rss_algo_key(hdev, hash_algo, key); if (ret) return ret; /* Update the shadow RSS key with user specified qids */ memcpy(vport->rss_hash_key, key, HCLGE_RSS_KEY_SIZE); - vport->rss_algo = hash_algo; + } else { + ret = hclge_set_rss_algo_key(hdev, hash_algo, + vport->rss_hash_key); + if (ret) + return ret; } + vport->rss_algo = hash_algo; /* Update the shadow RSS table with user specified qids */ for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index a69e892277b3..5fdac8685f95 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -816,40 +816,56 @@ static int hclgevf_get_rss(struct hnae3_handle *handle, u32 *indir, u8 *key, return 0; } +static int hclgevf_parse_rss_hfunc(struct hclgevf_dev *hdev, const u8 hfunc, + u8 *hash_algo) +{ + switch (hfunc) { + case ETH_RSS_HASH_TOP: + *hash_algo = HCLGEVF_RSS_HASH_ALGO_TOEPLITZ; + return 0; + case ETH_RSS_HASH_XOR: + *hash_algo = HCLGEVF_RSS_HASH_ALGO_SIMPLE; + return 0; + case ETH_RSS_HASH_NO_CHANGE: + *hash_algo = hdev->rss_cfg.hash_algo; + return 0; + default: + return -EINVAL; + } +} + static int hclgevf_set_rss(struct hnae3_handle *handle, const u32 *indir, const u8 *key, const u8 hfunc) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg; + u8 hash_algo; int ret, i; if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) { + ret = hclgevf_parse_rss_hfunc(hdev, hfunc, &hash_algo); + if (ret) + return ret; + /* Set the RSS Hash Key if specififed by the user */ if (key) { - switch (hfunc) { - case ETH_RSS_HASH_TOP: - rss_cfg->hash_algo = - HCLGEVF_RSS_HASH_ALGO_TOEPLITZ; - break; - case ETH_RSS_HASH_XOR: - rss_cfg->hash_algo = - HCLGEVF_RSS_HASH_ALGO_SIMPLE; - break; - case ETH_RSS_HASH_NO_CHANGE: - break; - default: - return -EINVAL; - } - - ret = hclgevf_set_rss_algo_key(hdev, rss_cfg->hash_algo, - key); - if (ret) + ret = hclgevf_set_rss_algo_key(hdev, hash_algo, key); + if (ret) { + dev_err(&hdev->pdev->dev, + "invalid hfunc type %u\n", hfunc); return ret; + } /* Update the shadow RSS key with user specified qids */ memcpy(rss_cfg->rss_hash_key, key, HCLGEVF_RSS_KEY_SIZE); + } else { + ret = hclgevf_set_rss_algo_key(hdev, hash_algo, + rss_cfg->rss_hash_key); + if (ret) + return ret; } + rss_cfg->hash_algo = hash_algo; } /* update the shadow RSS table with user specified qids */ From 91bc0d5272d3a4dc3d4fd2a74387c7e7361bbe96 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 15 Sep 2021 21:52:07 +0800 Subject: [PATCH 386/543] net: hns3: fix inconsistent vf id print The vf id from ethtool is added 1 before configured to driver. So it's necessary to minus 1 when printing it, in order to keep consistent with user's configuration. Fixes: dd74f815dd41 ("net: hns3: Add support for rule add/delete for flow director") Signed-off-by: Jian Shen Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 36c8741445e8..c0f25ea043b0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -6642,10 +6642,13 @@ static int hclge_fd_parse_ring_cookie(struct hclge_dev *hdev, u64 ring_cookie, u8 vf = ethtool_get_flow_spec_ring_vf(ring_cookie); u16 tqps; + /* To keep consistent with user's configuration, minus 1 when + * printing 'vf', because vf id from ethtool is added 1 for vf. + */ if (vf > hdev->num_req_vfs) { dev_err(&hdev->pdev->dev, - "Error: vf id (%u) > max vf num (%u)\n", - vf, hdev->num_req_vfs); + "Error: vf id (%u) should be less than %u\n", + vf - 1, hdev->num_req_vfs); return -EINVAL; } From 311c0aaa9b4bb8dc65f22634e15963316b17c921 Mon Sep 17 00:00:00 2001 From: Jiaran Zhang Date: Wed, 15 Sep 2021 21:52:08 +0800 Subject: [PATCH 387/543] net: hns3: fix misuse vf id and vport id in some logs vport_id include PF and VFs, vport_id = 0 means PF, other values mean VFs. So the actual vf id is equal to vport_id minus 1. Some VF print logs are actually vport, and logs of vf id actually use vport id, so this patch fixes them. Fixes: ac887be5b0fe ("net: hns3: change print level of RAS error log from warning to error") Fixes: adcf738b804b ("net: hns3: cleanup some print format warning") Signed-off-by: Jiaran Zhang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 8 ++++---- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 10 ++++++---- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 718c16d686fa..bb9b026ae88e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -2445,12 +2445,12 @@ static void hclge_handle_over_8bd_err(struct hclge_dev *hdev, return; } - dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vf_id(%u), queue_id(%u)\n", + dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vport(%u), queue_id(%u)\n", vf_id, q_id); if (vf_id) { if (vf_id >= hdev->num_alloc_vport) { - dev_err(dev, "invalid vf id(%u)\n", vf_id); + dev_err(dev, "invalid vport(%u)\n", vf_id); return; } @@ -2463,8 +2463,8 @@ static void hclge_handle_over_8bd_err(struct hclge_dev *hdev, ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]); if (ret) - dev_err(dev, "inform reset to vf(%u) failed %d!\n", - hdev->vport->vport_id, ret); + dev_err(dev, "inform reset to vport(%u) failed %d!\n", + vf_id, ret); } else { set_bit(HNAE3_FUNC_RESET, reset_requests); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c0f25ea043b0..afc88a41a89c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3661,7 +3661,8 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) if (ret) { dev_err(&hdev->pdev->dev, "set vf(%u) rst failed %d!\n", - vport->vport_id, ret); + vport->vport_id - HCLGE_VF_VPORT_START_NUM, + ret); return ret; } @@ -3676,7 +3677,8 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) if (ret) dev_warn(&hdev->pdev->dev, "inform reset to vf(%u) failed %d!\n", - vport->vport_id, ret); + vport->vport_id - HCLGE_VF_VPORT_START_NUM, + ret); } return 0; @@ -11467,11 +11469,11 @@ static void hclge_clear_resetting_state(struct hclge_dev *hdev) struct hclge_vport *vport = &hdev->vport[i]; int ret; - /* Send cmd to clear VF's FUNC_RST_ING */ + /* Send cmd to clear vport's FUNC_RST_ING */ ret = hclge_set_vf_rst(hdev, vport->vport_id, false); if (ret) dev_warn(&hdev->pdev->dev, - "clear vf(%u) rst failed %d!\n", + "clear vport(%u) rst failed %d!\n", vport->vport_id, ret); } } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 2ce5302c5956..07aa6ada4fdb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -566,7 +566,7 @@ static int hclge_reset_vf(struct hclge_vport *vport) struct hclge_dev *hdev = vport->back; dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %u!", - vport->vport_id); + vport->vport_id - HCLGE_VF_VPORT_START_NUM); return hclge_func_reset_cmd(hdev, vport->vport_id); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 78d5bf1ea561..44618cc4cca1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -581,7 +581,7 @@ int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate) ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, - "vf%u, qs%u failed to set tx_rate:%d, ret=%d\n", + "vport%u, qs%u failed to set tx_rate:%d, ret=%d\n", vport->vport_id, shap_cfg_cmd->qs_id, max_tx_rate, ret); return ret; From 63b1279d9905100a14da9e043de7b28e99dba3f8 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Wed, 15 Sep 2021 21:52:09 +0800 Subject: [PATCH 388/543] net: hns3: check queue id range before using The input parameters may not be reliable. Before using the queue id, we should check this parameter. Otherwise, memory overwriting may occur. Fixes: d34100184685 ("net: hns3: refactor the mailbox message between PF and VF") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 07aa6ada4fdb..65d78ee4d65a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -590,9 +590,17 @@ static void hclge_get_queue_id_in_pf(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *mbx_req, struct hclge_respond_to_vf_msg *resp_msg) { + struct hnae3_handle *handle = &vport->nic; + struct hclge_dev *hdev = vport->back; u16 queue_id, qid_in_pf; memcpy(&queue_id, mbx_req->msg.data, sizeof(queue_id)); + if (queue_id >= handle->kinfo.num_tqps) { + dev_err(&hdev->pdev->dev, "Invalid queue id(%u) from VF %u\n", + queue_id, mbx_req->mbx_src_vfid); + return; + } + qid_in_pf = hclge_covert_handle_qid_global(&vport->nic, queue_id); memcpy(resp_msg->data, &qid_in_pf, sizeof(qid_in_pf)); resp_msg->len = sizeof(qid_in_pf); From ef39d632608e66f428c1246836fd060cf4818d67 Mon Sep 17 00:00:00 2001 From: liaoguojia Date: Wed, 15 Sep 2021 21:52:10 +0800 Subject: [PATCH 389/543] net: hns3: check vlan id before using it The input parameters may not be reliable, so check the vlan id before using it, otherwise may set wrong vlan id into hardware. Fixes: dc8131d846d4 ("net: hns3: Fix for packet loss due wrong filter config in VLAN tbls") Signed-off-by: liaoguojia Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index afc88a41a89c..14e9daf09f8c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9817,6 +9817,9 @@ static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto, if (is_kill && !vlan_id) return 0; + if (vlan_id >= VLAN_N_VID) + return -EINVAL; + ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id); if (ret) { dev_err(&hdev->pdev->dev, From 5126b9d3d4acdebc12b9d436282f88d8a1b5146c Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Wed, 15 Sep 2021 21:52:11 +0800 Subject: [PATCH 390/543] net: hns3: fix a return value error in hclge_get_reset_status() hclge_get_reset_status() should return the tqp reset status. However, if the CMDQ fails, the caller will take it as tqp reset success status by mistake. Therefore, uses a parameters to get the tqp reset status instead. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 14e9daf09f8c..47fea8985861 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -10726,7 +10726,8 @@ static int hclge_reset_tqp_cmd_send(struct hclge_dev *hdev, u16 queue_id, return 0; } -static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id) +static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id, + u8 *reset_status) { struct hclge_reset_tqp_queue_cmd *req; struct hclge_desc desc; @@ -10744,7 +10745,9 @@ static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id) return ret; } - return hnae3_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B); + *reset_status = hnae3_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B); + + return 0; } u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id) @@ -10763,7 +10766,7 @@ static int hclge_reset_tqp_cmd(struct hnae3_handle *handle) struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; u16 reset_try_times = 0; - int reset_status; + u8 reset_status; u16 queue_gid; int ret; u16 i; @@ -10779,7 +10782,11 @@ static int hclge_reset_tqp_cmd(struct hnae3_handle *handle) } while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { - reset_status = hclge_get_reset_status(hdev, queue_gid); + ret = hclge_get_reset_status(hdev, queue_gid, + &reset_status); + if (ret) + return ret; + if (reset_status) break; From 4403f8062abecf24794e0fd3a3e424cc63ba6662 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 16 Sep 2021 17:05:29 +0200 Subject: [PATCH 391/543] xen/x86: drop redundant zeroing from cpu_initialize_context() Just after having obtained the pointer from kzalloc() there's no reason at all to set part of the area to all zero yet another time. Similarly there's no point explicitly clearing "ldt_ents". Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/14881835-a48e-29fa-0870-e177b10fcf65@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/smp_pv.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 96afadf9878e..7ed56c6075b0 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -290,8 +290,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) gdt = get_cpu_gdt_rw(cpu); - memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); - /* * Bring up the CPU in cpu_bringup_and_idle() with the stack * pointing just below where pt_regs would be if it were a normal @@ -308,8 +306,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) xen_copy_trap_info(ctxt->trap_ctxt); - ctxt->ldt_ents = 0; - BUG_ON((unsigned long)gdt & ~PAGE_MASK); gdt_mfn = arbitrary_virt_to_mfn(gdt); From f28347cc66395e96712f5c2db0a302ee75bafce6 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 17 Sep 2021 08:13:08 +0200 Subject: [PATCH 392/543] Xen/gntdev: don't ignore kernel unmapping error While working on XSA-361 and its follow-ups, I failed to spot another place where the kernel mapping part of an operation was not treated the same as the user space part. Detect and propagate errors and add a 2nd pr_debug(). Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/c2513395-74dc-aea3-9192-fd265aa44e35@suse.com Signed-off-by: Juergen Gross --- drivers/xen/gntdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 1e7f6b1c0c97..fec1b6537166 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -381,6 +381,14 @@ static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; + if (use_ptemod) { + if (map->kunmap_ops[offset+i].status) + err = -EINVAL; + pr_debug("kunmap handle=%u st=%d\n", + map->kunmap_ops[offset+i].handle, + map->kunmap_ops[offset+i].status); + map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; + } } return err; } From 9074c79b62b6e0d91d7f716c6e4e9968eaf9e043 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 17 Sep 2021 12:45:49 +0200 Subject: [PATCH 393/543] swiotlb-xen: ensure to issue well-formed XENMEM_exchange requests While the hypervisor hasn't been enforcing this, we would still better avoid issuing requests with GFNs not aligned to the requested order. Instead of altering the value also in the call to panic(), drop it there for being static and hence easy to determine without being part of the panic message. Signed-off-by: Jan Beulich Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/7b3998e3-1233-4e5a-89ec-d740e77eb166@suse.com Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 9c9ba500ef23..c0c38672fee3 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -230,10 +230,11 @@ retry: /* * Get IO TLB memory from any location. */ - start = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE); + start = memblock_alloc(PAGE_ALIGN(bytes), + IO_TLB_SEGSIZE << IO_TLB_SHIFT); if (!start) - panic("%s: Failed to allocate %lu bytes align=0x%lx\n", - __func__, PAGE_ALIGN(bytes), PAGE_SIZE); + panic("%s: Failed to allocate %lu bytes\n", + __func__, PAGE_ALIGN(bytes)); /* * And replace that memory with pages under 4GB. From e243ae953b5926eba1a8fbea64cbf68094f86a44 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 17 Sep 2021 12:48:03 +0200 Subject: [PATCH 394/543] PCI: only build xen-pcifront in PV-enabled environments The driver's module init function, pcifront_init(), invokes xen_pv_domain() first thing. That construct produces constant "false" when !CONFIG_XEN_PV. Hence there's no point building the driver in non-PV configurations. Drop the (now implicit and generally wrong) X86 dependency: At present, XEN_PV can only be set when X86 is also enabled. In general an architecture supporting Xen PV (and PCI) would want to have this driver built. Signed-off-by: Jan Beulich Reviewed-by: Stefano Stabellini Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/3a7f6c9b-215d-b593-8056-b5fe605dafd7@suse.com Signed-off-by: Juergen Gross --- drivers/pci/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 0c473d75e625..43e615aa12ff 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -110,7 +110,7 @@ config PCI_PF_STUB config XEN_PCIDEV_FRONTEND tristate "Xen PCI Frontend" - depends on X86 && XEN + depends on XEN_PV select PCI_XEN select XEN_XENBUS_FRONTEND default y From 8e1034a526652f265ed993fab7f659eb8ae4b6f0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 17 Sep 2021 12:49:04 +0200 Subject: [PATCH 395/543] xen/pci-swiotlb: reduce visibility of symbols xen_swiotlb and pci_xen_swiotlb_init() are only used within the file defining them, so make them static and remove the stubs. Otoh pci_xen_swiotlb_detect() has a use (as function pointer) from the main pci-swiotlb.c file - convert its stub to a #define to NULL. Signed-off-by: Jan Beulich Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/aef5fc33-9c02-4df0-906a-5c813142e13c@suse.com Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/swiotlb-xen.h | 6 +----- arch/x86/xen/pci-swiotlb-xen.c | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h index 6b56d0d45d15..66b4ddde7743 100644 --- a/arch/x86/include/asm/xen/swiotlb-xen.h +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -3,14 +3,10 @@ #define _ASM_X86_SWIOTLB_XEN_H #ifdef CONFIG_SWIOTLB_XEN -extern int xen_swiotlb; extern int __init pci_xen_swiotlb_detect(void); -extern void __init pci_xen_swiotlb_init(void); extern int pci_xen_swiotlb_init_late(void); #else -#define xen_swiotlb (0) -static inline int __init pci_xen_swiotlb_detect(void) { return 0; } -static inline void __init pci_xen_swiotlb_init(void) { } +#define pci_xen_swiotlb_detect NULL static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } #endif diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 54f9aa7e8457..46df59aeaa06 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -18,7 +18,7 @@ #endif #include -int xen_swiotlb __read_mostly; +static int xen_swiotlb __read_mostly; /* * pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary @@ -56,7 +56,7 @@ int __init pci_xen_swiotlb_detect(void) return xen_swiotlb; } -void __init pci_xen_swiotlb_init(void) +static void __init pci_xen_swiotlb_init(void) { if (xen_swiotlb) { xen_swiotlb_init_early(); From 794d5b8a497ff053f56856472e2fae038fa761aa Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 17 Sep 2021 12:50:38 +0200 Subject: [PATCH 396/543] swiotlb-xen: this is PV-only on x86 The code is unreachable for HVM or PVH, and it also makes little sense in auto-translated environments. On Arm, with xen_{create,destroy}_contiguous_region() both being stubs, I have a hard time seeing what good the Xen specific variant does - the generic one ought to be fine for all purposes there. Still Arm code explicitly references symbols here, so the code will continue to be included there. Instead of making PCI_XEN's "select" conditional, simply drop it - SWIOTLB_XEN will be available unconditionally in the PV case anyway, and is - as explained above - dead code in non-PV environments. This in turn allows dropping the stubs for xen_{create,destroy}_contiguous_region(), the former of which was broken anyway - it failed to set the DMA handle output. Signed-off-by: Jan Beulich Reviewed-by: Christoph Hellwig Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/5947b8ae-fdc7-225c-4838-84712265fc1e@suse.com Signed-off-by: Juergen Gross --- arch/x86/Kconfig | 1 - drivers/xen/Kconfig | 1 + include/xen/xen-ops.h | 12 ------------ 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 88fb922c23a0..a71ced4c711f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2605,7 +2605,6 @@ config PCI_OLPC config PCI_XEN def_bool y depends on PCI && XEN - select SWIOTLB_XEN config MMCONF_FAM10H def_bool y diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a37eb52fb401..22f5aff0c136 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -177,6 +177,7 @@ config XEN_GRANT_DMA_ALLOC config SWIOTLB_XEN def_bool y + depends on XEN_PV || ARM || ARM64 select DMA_OPS select SWIOTLB diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 39a5580f8feb..db28e79b77ee 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -46,19 +46,7 @@ extern unsigned long *xen_contiguous_bitmap; int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle); - void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); -#else -static inline int xen_create_contiguous_region(phys_addr_t pstart, - unsigned int order, - unsigned int address_bits, - dma_addr_t *dma_handle) -{ - return 0; -} - -static inline void xen_destroy_contiguous_region(phys_addr_t pstart, - unsigned int order) { } #endif #if defined(CONFIG_XEN_PV) From d8b1e10a2b8efaf71d151aa756052fbf2f3b6d57 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 20 Sep 2021 10:56:32 -0700 Subject: [PATCH 397/543] sparc64: fix pci_iounmap() when CONFIG_PCI is not set Guenter reported [1] that the pci_iounmap() changes remain problematic, with sparc64 allnoconfig and tinyconfig still not building due to the header file changes and confusion with the arch-specific pci_iounmap() implementation. I'm pretty convinced that sparc should just use GENERIC_IOMAP instead of doing its own thing, since it turns out that the sparc64 version of pci_iounmap() is somewhat buggy (see [2]). But in the meantime, this just fixes the build by avoiding the trivial re-definition of the empty case. Link: https://lore.kernel.org/lkml/20210920134424.GA346531@roeck-us.net/ [1] Link: https://lore.kernel.org/lkml/CAHk-=wgheheFx9myQyy5osh79BAazvmvYURAtub2gQtMvLrhqQ@mail.gmail.com/ [2] Reported-by: Guenter Roeck Cc: David Miller Signed-off-by: Linus Torvalds --- arch/sparc/lib/iomap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c index c9da9f139694..f3a8cd491ce0 100644 --- a/arch/sparc/lib/iomap.c +++ b/arch/sparc/lib/iomap.c @@ -19,8 +19,10 @@ void ioport_unmap(void __iomem *addr) EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); +#ifdef CONFIG_PCI void pci_iounmap(struct pci_dev *dev, void __iomem * addr) { /* nothing to do */ } EXPORT_SYMBOL(pci_iounmap); +#endif From e8f71f89236ef82d449991bfbc237e3cb6ea584f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 8 Sep 2021 12:08:17 -0700 Subject: [PATCH 398/543] drm/nouveau/nvkm: Replace -ENOSYS with -ENODEV nvkm test builds fail with the following error. drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c: In function 'nvkm_control_mthd_pstate_info': drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c:60:35: error: overflow in conversion from 'int' to '__s8' {aka 'signed char'} changes value from '-251' to '5' The code builds on most architectures, but fails on parisc where ENOSYS is defined as 251. Replace the error code with -ENODEV (-19). The actual error code does not really matter and is not passed to userspace - it just has to be negative. Fixes: 7238eca4cf18 ("drm/nouveau: expose pstate selection per-power source in sysfs") Signed-off-by: Guenter Roeck Cc: Ben Skeggs Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Linus Torvalds --- drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c index b0ece71aefde..ce774579c89d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c @@ -57,7 +57,7 @@ nvkm_control_mthd_pstate_info(struct nvkm_control *ctrl, void *data, u32 size) args->v0.count = 0; args->v0.ustate_ac = NVIF_CONTROL_PSTATE_INFO_V0_USTATE_DISABLE; args->v0.ustate_dc = NVIF_CONTROL_PSTATE_INFO_V0_USTATE_DISABLE; - args->v0.pwrsrc = -ENOSYS; + args->v0.pwrsrc = -ENODEV; args->v0.pstate = NVIF_CONTROL_PSTATE_INFO_V0_PSTATE_UNKNOWN; } From d4ffd5df9d18031b6a53f934388726775b4452d3 Mon Sep 17 00:00:00 2001 From: Jiashuo Liang Date: Fri, 30 Jul 2021 11:01:52 +0800 Subject: [PATCH 399/543] x86/fault: Fix wrong signal when vsyscall fails with pkey The function __bad_area_nosemaphore() calls kernelmode_fixup_or_oops() with the parameter @signal being actually @pkey, which will send a signal numbered with the argument in @pkey. This bug can be triggered when the kernel fails to access user-given memory pages that are protected by a pkey, so it can go down the do_user_addr_fault() path and pass the !user_mode() check in __bad_area_nosemaphore(). Most cases will simply run the kernel fixup code to make an -EFAULT. But when another condition current->thread.sig_on_uaccess_err is met, which is only used to emulate vsyscall, the kernel will generate the wrong signal. Add a new parameter @pkey to kernelmode_fixup_or_oops() to fix this. [ bp: Massage commit message, fix build error as reported by the 0day bot: https://lkml.kernel.org/r/202109202245.APvuT8BX-lkp@intel.com ] Fixes: 5042d40a264c ("x86/fault: Bypass no_context() for implicit kernel faults from usermode") Reported-by: kernel test robot Signed-off-by: Jiashuo Liang Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/20210730030152.249106-1-liangjs@pku.edu.cn --- arch/x86/include/asm/pkeys.h | 2 -- arch/x86/mm/fault.c | 26 ++++++++++++++++++-------- include/linux/pkeys.h | 2 ++ 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 5c7bcaa79623..1d5f14aff5f6 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -2,8 +2,6 @@ #ifndef _ASM_X86_PKEYS_H #define _ASM_X86_PKEYS_H -#define ARCH_DEFAULT_PKEY 0 - /* * If more than 16 keys are ever supported, a thorough audit * will be necessary to ensure that the types that store key diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b2eefdefc108..84a2c8c4af73 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -710,7 +710,8 @@ oops: static noinline void kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, - unsigned long address, int signal, int si_code) + unsigned long address, int signal, int si_code, + u32 pkey) { WARN_ON_ONCE(user_mode(regs)); @@ -735,8 +736,12 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, set_signal_archinfo(address, error_code); - /* XXX: hwpoison faults will set the wrong code. */ - force_sig_fault(signal, si_code, (void __user *)address); + if (si_code == SEGV_PKUERR) { + force_sig_pkuerr((void __user *)address, pkey); + } else { + /* XXX: hwpoison faults will set the wrong code. */ + force_sig_fault(signal, si_code, (void __user *)address); + } } /* @@ -798,7 +803,8 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, struct task_struct *tsk = current; if (!user_mode(regs)) { - kernelmode_fixup_or_oops(regs, error_code, address, pkey, si_code); + kernelmode_fixup_or_oops(regs, error_code, address, + SIGSEGV, si_code, pkey); return; } @@ -930,7 +936,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, { /* Kernel mode? Handle exceptions or die: */ if (!user_mode(regs)) { - kernelmode_fixup_or_oops(regs, error_code, address, SIGBUS, BUS_ADRERR); + kernelmode_fixup_or_oops(regs, error_code, address, + SIGBUS, BUS_ADRERR, ARCH_DEFAULT_PKEY); return; } @@ -1396,7 +1403,8 @@ good_area: */ if (!user_mode(regs)) kernelmode_fixup_or_oops(regs, error_code, address, - SIGBUS, BUS_ADRERR); + SIGBUS, BUS_ADRERR, + ARCH_DEFAULT_PKEY); return; } @@ -1416,7 +1424,8 @@ good_area: return; if (fatal_signal_pending(current) && !user_mode(regs)) { - kernelmode_fixup_or_oops(regs, error_code, address, 0, 0); + kernelmode_fixup_or_oops(regs, error_code, address, + 0, 0, ARCH_DEFAULT_PKEY); return; } @@ -1424,7 +1433,8 @@ good_area: /* Kernel mode? Handle exceptions or die: */ if (!user_mode(regs)) { kernelmode_fixup_or_oops(regs, error_code, address, - SIGSEGV, SEGV_MAPERR); + SIGSEGV, SEGV_MAPERR, + ARCH_DEFAULT_PKEY); return; } diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h index 6beb26b7151d..86be8bf27b41 100644 --- a/include/linux/pkeys.h +++ b/include/linux/pkeys.h @@ -4,6 +4,8 @@ #include +#define ARCH_DEFAULT_PKEY 0 + #ifdef CONFIG_ARCH_HAS_PKEYS #include #else /* ! CONFIG_ARCH_HAS_PKEYS */ From 555f66d0f8a38537456acc77043d0e4469fcbe8e Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 14 Sep 2021 11:20:06 +0200 Subject: [PATCH 400/543] nvme-fc: update hardware queues before using them In case the number of hardware queues changes, we need to update the tagset and the mapping of ctx to hctx first. If we try to create and connect the I/O queues first, this operation will fail (target will reject the connect call due to the wrong number of queues) and hence we bail out of the recreate function. Then we will to try the very same operation again, thus we don't make any progress. Signed-off-by: Daniel Wagner Reviewed-by: Ming Lei Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b08a61ca283f..b5d9a5507de5 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2951,14 +2951,6 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) if (ctrl->ctrl.queue_count == 1) return 0; - ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); - if (ret) - goto out_free_io_queues; - - ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); - if (ret) - goto out_delete_hw_queues; - if (prior_ioq_cnt != nr_io_queues) { dev_info(ctrl->ctrl.device, "reconnect: revising io queue count from %d to %d\n", @@ -2968,6 +2960,14 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) nvme_unfreeze(&ctrl->ctrl); } + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); + if (ret) + goto out_free_io_queues; + + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); + if (ret) + goto out_delete_hw_queues; + return 0; out_delete_hw_queues: From e5445dae29d25d7b03e0a10d3d4277a1d0c8119b Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 14 Sep 2021 11:20:07 +0200 Subject: [PATCH 401/543] nvme-fc: avoid race between time out and tear down To avoid race between time out and tear down, in tear down process, first we quiesce the queue, and then delete the timer and cancel the time out work for the queue. This patch merges the admin and io sync ops into the queue teardown logic as shown in the RDMA patch 3017013dcc "nvme-rdma: avoid race between time out and tear down". There is no teardown_lock in nvme-fc. Signed-off-by: James Smart Tested-by: Daniel Wagner Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Reviewed-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b5d9a5507de5..6ebe68396712 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2487,6 +2487,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) */ if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); + nvme_sync_io_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->tag_set); @@ -2510,6 +2511,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) * clean up the admin queue. Same thing as above. */ blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_sync_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); From bdaa1365667103e7a754e87c08b846a979ce322b Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 14 Sep 2021 11:20:08 +0200 Subject: [PATCH 402/543] nvme-fc: remove freeze/unfreeze around update_nr_hw_queues Remove the freeze/unfreeze around changes to the number of hardware queues. Study and retest has indicated there are no ios that can be active at this point so there is nothing to freeze. nvme-fc is draining the queues in the shutdown and error recovery path in __nvme_fc_abort_outstanding_ios. This patch primarily reverts 88e837ed0f1f "nvme-fc: wait for queues to freeze before calling update_hr_hw_queues". It's not an exact revert as it leaves the adjusting of hw queues only if the count changes. Signed-off-by: James Smart [dwagner: added explanation why no IO is pending] Signed-off-by: Daniel Wagner Reviewed-by: Ming Lei Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 6ebe68396712..aa14ad963d91 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2957,9 +2957,7 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) dev_info(ctrl->ctrl.device, "reconnect: revising io queue count from %d to %d\n", prior_ioq_cnt, nr_io_queues); - nvme_wait_freeze(&ctrl->ctrl); blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); - nvme_unfreeze(&ctrl->ctrl); } ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); From e371af033c560b9dd1e861f8f0b503142bf0a06c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 14 Sep 2021 18:38:55 +0300 Subject: [PATCH 403/543] nvme-tcp: fix incorrect h2cdata pdu offset accounting When the controller sends us multiple r2t PDUs in a single request we need to account for it correctly as our send/recv context run concurrently (i.e. we get a new r2t with r2t_offset before we updated our iterator and req->data_sent marker). This can cause wrong offsets to be sent to the controller. To fix that, we will first know that this may happen only in the send sequence of the last page, hence we will take the r2t_offset to the h2c PDU data_offset, and in nvme_tcp_try_send_data loop, we make sure to increment the request markers also when we completed a PDU but we are expecting more r2t PDUs as we still did not send the entire data of the request. Fixes: 825619b09ad3 ("nvme-tcp: fix possible use-after-completion") Reported-by: Nowak, Lukasz Tested-by: Nowak, Lukasz Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index e4249b7dc056..3c1c29dd3020 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -620,7 +620,7 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst); data->ttag = pdu->ttag; data->command_id = nvme_cid(rq); - data->data_offset = cpu_to_le32(req->data_sent); + data->data_offset = pdu->r2t_offset; data->data_length = cpu_to_le32(req->pdu_len); return 0; } @@ -953,7 +953,15 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) nvme_tcp_ddgst_update(queue->snd_hash, page, offset, ret); - /* fully successful last write*/ + /* + * update the request iterator except for the last payload send + * in the request where we don't want to modify it as we may + * compete with the RX path completing the request. + */ + if (req->data_sent + ret < req->data_len) + nvme_tcp_advance_req(req, ret); + + /* fully successful last send in current PDU */ if (last && ret == len) { if (queue->data_digest) { nvme_tcp_ddgst_final(queue->snd_hash, @@ -965,7 +973,6 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) } return 1; } - nvme_tcp_advance_req(req, ret); } return -EAGAIN; } From 298ba0e3d4af539cc37f982d4c011a0f07fca48c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Sep 2021 08:38:20 +0200 Subject: [PATCH 404/543] nvme: keep ctrl->namespaces ordered Various places in the nvme code that rely on ctrl->namespace to be ordered. Ensure that the namespae is inserted into the list at the right position from the start instead of sorting it after the fact. Fixes: 540c801c65eb ("NVMe: Implement namespace list scanning") Reported-by: Anton Eidelman Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6600e138945e..e486845d2c7e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -3716,15 +3715,6 @@ out_unlock: return ret; } -static int ns_cmp(void *priv, const struct list_head *a, - const struct list_head *b) -{ - struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); - struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); - - return nsa->head->ns_id - nsb->head->ns_id; -} - struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns, *ret = NULL; @@ -3745,6 +3735,22 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) } EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU); +/* + * Add the namespace to the controller list while keeping the list ordered. + */ +static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns) +{ + struct nvme_ns *tmp; + + list_for_each_entry_reverse(tmp, &ns->ctrl->namespaces, list) { + if (tmp->head->ns_id < ns->head->ns_id) { + list_add(&ns->list, &tmp->list); + return; + } + } + list_add(&ns->list, &ns->ctrl->namespaces); +} + static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_ns_ids *ids) { @@ -3795,9 +3801,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, goto out_unlink_ns; down_write(&ctrl->namespaces_rwsem); - list_add_tail(&ns->list, &ctrl->namespaces); + nvme_ns_add_to_ctrl_list(ns); up_write(&ctrl->namespaces_rwsem); - nvme_get_ctrl(ctrl); if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups)) @@ -4080,10 +4085,6 @@ static void nvme_scan_work(struct work_struct *work) if (nvme_scan_ns_list(ctrl) != 0) nvme_scan_ns_sequential(ctrl); mutex_unlock(&ctrl->scan_lock); - - down_write(&ctrl->namespaces_rwsem); - list_sort(NULL, &ctrl->namespaces, ns_cmp); - up_write(&ctrl->namespaces_rwsem); } /* From 96f5bd03e1be606987644b71899ea56a8d05f825 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 20 Sep 2021 12:03:45 +0200 Subject: [PATCH 405/543] xen/balloon: fix balloon kthread freezing Commit 8480ed9c2bbd56 ("xen/balloon: use a kernel thread instead a workqueue") switched the Xen balloon driver to use a kernel thread. Unfortunately the patch omitted to call try_to_freeze() or to use wait_event_freezable_timeout(), causing a system suspend to fail. Fixes: 8480ed9c2bbd56 ("xen/balloon: use a kernel thread instead a workqueue") Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210920100345.21939-1-jgross@suse.com Signed-off-by: Juergen Gross --- drivers/xen/balloon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 2d2803883306..43ebfe36ac27 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -522,8 +522,8 @@ static int balloon_thread(void *unused) timeout = 3600 * HZ; credit = current_credit(); - wait_event_interruptible_timeout(balloon_thread_wq, - balloon_thread_cond(state, credit), timeout); + wait_event_freezable_timeout(balloon_thread_wq, + balloon_thread_cond(state, credit), timeout); if (kthread_should_stop()) return 0; From 0594c58161b6e0f3da8efa9c6e3d4ba52b652717 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 20 Sep 2021 18:15:11 +0200 Subject: [PATCH 406/543] xen/x86: fix PV trap handling on secondary processors The initial observation was that in PV mode under Xen 32-bit user space didn't work anymore. Attempts of system calls ended in #GP(0x402). All of the sudden the vector 0x80 handler was not in place anymore. As it turns out up to 5.13 redundant initialization did occur: Once from cpu_initialize_context() (through its VCPUOP_initialise hypercall) and a 2nd time while each CPU was brought fully up. This 2nd initialization is now gone, uncovering that the 1st one was flawed: Unlike for the set_trap_table hypercall, a full virtual IDT needs to be specified here; the "vector" fields of the individual entries are of no interest. With many (kernel) IDT entries still(?) (i.e. at that point at least) empty, the syscall vector 0x80 ended up in slot 0x20 of the virtual IDT, thus becoming the domain's handler for vector 0x20. Make xen_convert_trap_info() fit for either purpose, leveraging the fact that on the xen_copy_trap_info() path the table starts out zero-filled. This includes moving out the writing of the sentinel, which would also have lead to a buffer overrun in the xen_copy_trap_info() case if all (kernel) IDT entries were populated. Convert the writing of the sentinel to clearing of the entire table entry rather than just the address field. (I didn't bother trying to identify the commit which uncovered the issue in 5.14; the commit named below is the one which actually introduced the bad code.) Fixes: f87e4cac4f4e ("xen: SMP guest support") Cc: stable@vger.kernel.org Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/7a266932-092e-b68f-f2bb-1473b61adc6e@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 349f780a1567..6e0d0754f94f 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -755,8 +755,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) preempt_enable(); } -static void xen_convert_trap_info(const struct desc_ptr *desc, - struct trap_info *traps) +static unsigned xen_convert_trap_info(const struct desc_ptr *desc, + struct trap_info *traps, bool full) { unsigned in, out, count; @@ -766,17 +766,18 @@ static void xen_convert_trap_info(const struct desc_ptr *desc, for (in = out = 0; in < count; in++) { gate_desc *entry = (gate_desc *)(desc->address) + in; - if (cvt_gate_to_trap(in, entry, &traps[out])) + if (cvt_gate_to_trap(in, entry, &traps[out]) || full) out++; } - traps[out].address = 0; + + return out; } void xen_copy_trap_info(struct trap_info *traps) { const struct desc_ptr *desc = this_cpu_ptr(&idt_desc); - xen_convert_trap_info(desc, traps); + xen_convert_trap_info(desc, traps, true); } /* Load a new IDT into Xen. In principle this can be per-CPU, so we @@ -786,6 +787,7 @@ static void xen_load_idt(const struct desc_ptr *desc) { static DEFINE_SPINLOCK(lock); static struct trap_info traps[257]; + unsigned out; trace_xen_cpu_load_idt(desc); @@ -793,7 +795,8 @@ static void xen_load_idt(const struct desc_ptr *desc) memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc)); - xen_convert_trap_info(desc, traps); + out = xen_convert_trap_info(desc, traps, false); + memset(&traps[out], 0, sizeof(traps[0])); xen_mc_flush(); if (HYPERVISOR_set_trap_table(traps)) From 8aa83e6395ce047a506f0b16edca45f36c1ae7f8 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 20 Sep 2021 14:04:21 +0200 Subject: [PATCH 407/543] x86/setup: Call early_reserve_memory() earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit in Fixes introduced early_reserve_memory() to do all needed initial memblock_reserve() calls in one function. Unfortunately, the call of early_reserve_memory() is done too late for Xen dom0, as in some cases a Xen hook called by e820__memory_setup() will need those memory reservations to have happened already. Move the call of early_reserve_memory() before the call of e820__memory_setup() in order to avoid such problems. Fixes: a799c2bd29d1 ("x86/setup: Consolidate early memory reservations") Reported-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Tested-by: Marek Marczykowski-Górecki Tested-by: Nathan Chancellor Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20210920120421.29276-1-jgross@suse.com --- arch/x86/kernel/setup.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 79f164141116..40ed44ead063 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -830,6 +830,20 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); + /* + * Do some memory reservations *before* memory is added to memblock, so + * memblock allocations won't overwrite it. + * + * After this point, everything still needed from the boot loader or + * firmware or kernel text should be early reserved or marked not RAM in + * e820. All other memory is free game. + * + * This call needs to happen before e820__memory_setup() which calls the + * xen_memory_setup() on Xen dom0 which relies on the fact that those + * early reservations have happened already. + */ + early_reserve_memory(); + iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; e820__memory_setup(); parse_setup_data(); @@ -876,18 +890,6 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); - /* - * Do some memory reservations *before* memory is added to - * memblock, so memblock allocations won't overwrite it. - * Do it after early param, so we could get (unlikely) panic from - * serial. - * - * After this point everything still needed from the boot loader or - * firmware or kernel text should be early reserved or marked not - * RAM in e820. All other memory is free game. - */ - early_reserve_memory(); - #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux From 6c90731980655280ea07ce4b21eb97457bf86286 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 20 Sep 2021 21:18:14 +0200 Subject: [PATCH 408/543] net/smc: add missing error check in smc_clc_prfx_set() Coverity stumbled over a missing error check in smc_clc_prfx_set(): *** CID 1475954: Error handling issues (CHECKED_RETURN) /net/smc/smc_clc.c: 233 in smc_clc_prfx_set() >>> CID 1475954: Error handling issues (CHECKED_RETURN) >>> Calling "kernel_getsockname" without checking return value (as is done elsewhere 8 out of 10 times). 233 kernel_getsockname(clcsock, (struct sockaddr *)&addrs); Add the return code check in smc_clc_prfx_set(). Fixes: c246d942eabc ("net/smc: restructure netinfo for CLC proposal msgs") Reported-by: Julian Wiedmann Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_clc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index e286dafd6e88..6ec1ebe878ae 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -230,7 +230,8 @@ static int smc_clc_prfx_set(struct socket *clcsock, goto out_rel; } /* get address to which the internal TCP socket is bound */ - kernel_getsockname(clcsock, (struct sockaddr *)&addrs); + if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0) + goto out_rel; /* analyze IP specific data of net_device belonging to TCP socket */ addr6 = (struct sockaddr_in6 *)&addrs; rcu_read_lock(); From a18cee4791b1123d0a6579a7c89f4b87e48abe03 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 20 Sep 2021 21:18:15 +0200 Subject: [PATCH 409/543] net/smc: fix 'workqueue leaked lock' in smc_conn_abort_work The abort_work is scheduled when a connection was detected to be out-of-sync after a link failure. The work calls smc_conn_kill(), which calls smc_close_active_abort() and that might end up calling smc_close_cancel_work(). smc_close_cancel_work() cancels any pending close_work and tx_work but needs to release the sock_lock before and acquires the sock_lock again afterwards. So when the sock_lock was NOT acquired before then it may be held after the abort_work completes. Thats why the sock_lock is acquired before the call to smc_conn_kill() in __smc_lgr_terminate(), but this is missing in smc_conn_abort_work(). Fix that by acquiring the sock_lock first and release it after the call to smc_conn_kill(). Fixes: b286a0651e44 ("net/smc: handle incoming CDC validation message") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index af227b65669e..8280c938be80 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1474,7 +1474,9 @@ static void smc_conn_abort_work(struct work_struct *work) abort_work); struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + lock_sock(&smc->sk); smc_conn_kill(conn, true); + release_sock(&smc->sk); sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */ } From e5845aa0eadda3d8a950eb8845c1396827131f30 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 21 Sep 2021 01:49:18 +0300 Subject: [PATCH 410/543] net: dsa: fix dsa_tree_setup error path Since the blamed commit, dsa_tree_teardown_switches() was split into two smaller functions, dsa_tree_teardown_switches and dsa_tree_teardown_ports. However, the error path of dsa_tree_setup stopped calling dsa_tree_teardown_ports. Fixes: a57d8c217aad ("net: dsa: flush switchdev workqueue before tearing down CPU/DSA ports") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index f14897d9b31d..f54639a3a822 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1089,6 +1089,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) teardown_master: dsa_tree_teardown_master(dst); teardown_switches: + dsa_tree_teardown_ports(dst); dsa_tree_teardown_switches(dst); teardown_cpu_ports: dsa_tree_teardown_cpu_ports(dst); From 3e95cfa24e24fbd7fb7675ab972a5aa507c7a89c Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 21 Sep 2021 15:41:23 +0900 Subject: [PATCH 411/543] Doc: networking: Fox a typo in ice.rst This patch fixes a spelling typo in ice.rst Signed-off-by: Masanari Iida Signed-off-by: David S. Miller --- Documentation/networking/device_drivers/ethernet/intel/ice.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst index e7d9cbff771b..67b7a701ce9e 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst @@ -851,7 +851,7 @@ NOTES: - 0x88A8 traffic will not be received unless VLAN stripping is disabled with the following command:: - # ethool -K rxvlan off + # ethtool -K rxvlan off - 0x88A8/0x8100 double VLANs cannot be used with 0x8100 or 0x8100/0x8100 VLANS configured on the same port. 0x88a8/0x8100 traffic will not be received if From 2566fffd6011df17dfba0b216fe9a154d3eb3f75 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Tue, 14 Sep 2021 15:07:44 -0700 Subject: [PATCH 412/543] drm/i915: Update memory bandwidth parameters Earlier while calculating derated bw we would use 90% of the calculated bw. Starting ADL-P we use a non standard derating. Updating the formulae to reflect the same. Bspec: 64631 v2: Use the new derating value only for ADL-P(MattR) Fixes: 4d32fe2f14a7 ("drm/i915/adl_p: Update memory bandwidth parameters") Cc: Matt Roper Signed-off-by: Radhakrishna Sripada Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210914220744.16042-1-radhakrishna.sripada@intel.com (cherry picked from commit f6d66fc8cf5f673ea76407be84dc17dbb3eda108) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bw.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index e91e0e0191fb..4b94256d7319 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -222,31 +222,42 @@ static int icl_sagv_max_dclk(const struct intel_qgv_info *qi) struct intel_sa_info { u16 displayrtids; - u8 deburst, deprogbwlimit; + u8 deburst, deprogbwlimit, derating; }; static const struct intel_sa_info icl_sa_info = { .deburst = 8, .deprogbwlimit = 25, /* GB/s */ .displayrtids = 128, + .derating = 10, }; static const struct intel_sa_info tgl_sa_info = { .deburst = 16, .deprogbwlimit = 34, /* GB/s */ .displayrtids = 256, + .derating = 10, }; static const struct intel_sa_info rkl_sa_info = { .deburst = 16, .deprogbwlimit = 20, /* GB/s */ .displayrtids = 128, + .derating = 10, }; static const struct intel_sa_info adls_sa_info = { .deburst = 16, .deprogbwlimit = 38, /* GB/s */ .displayrtids = 256, + .derating = 10, +}; + +static const struct intel_sa_info adlp_sa_info = { + .deburst = 16, + .deprogbwlimit = 38, /* GB/s */ + .displayrtids = 256, + .derating = 20, }; static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) @@ -302,7 +313,7 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct); bi->deratedbw[j] = min(maxdebw, - bw * 9 / 10); /* 90% */ + bw * (100 - sa->derating) / 100); drm_dbg_kms(&dev_priv->drm, "BW%d / QGV %d: num_planes=%d deratedbw=%u\n", @@ -400,7 +411,9 @@ void intel_bw_init_hw(struct drm_i915_private *dev_priv) if (IS_DG2(dev_priv)) dg2_get_bw_info(dev_priv); - else if (IS_ALDERLAKE_S(dev_priv) || IS_ALDERLAKE_P(dev_priv)) + else if (IS_ALDERLAKE_P(dev_priv)) + icl_get_bw_info(dev_priv, &adlp_sa_info); + else if (IS_ALDERLAKE_S(dev_priv)) icl_get_bw_info(dev_priv, &adls_sa_info); else if (IS_ROCKETLAKE(dev_priv)) icl_get_bw_info(dev_priv, &rkl_sa_info); From f9b23c157a78c77545099312394d484ce4f35b8b Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 30 Aug 2021 14:09:48 +0200 Subject: [PATCH 413/543] drm/i915: Move __i915_gem_free_object to ttm_bo_destroy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we implement delayed destroy, we may have a second call to the delete_mem_notify() handler, while free_object() only should be called once. Move it to bo->destroy(), to ensure it's only called once. This fixes some weird memory corruption issues with delayed destroy when async eviction is used. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210830121006.2978297-2-maarten.lankhorst@linux.intel.com Fixes: 213d50927763 ("drm/i915/ttm: Introduce a TTM i915 gem object backend") Cc: Thomas Hellström Reviewed-by: Thomas Hellström (cherry picked from commit 48b0961269546716c3232748bf37e64e49fb866c) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 35eedc14f522..6ea13159bffc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -356,11 +356,8 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - if (likely(obj)) { - /* This releases all gem object bindings to the backend. */ + if (likely(obj)) i915_ttm_free_cached_io_st(obj); - __i915_gem_free_object(obj); - } } static struct intel_memory_region * @@ -875,8 +872,12 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + /* This releases all gem object bindings to the backend. */ + __i915_gem_free_object(obj); + i915_gem_object_release_memory_region(obj); mutex_destroy(&obj->ttm.get_io_page.lock); + if (obj->ttm.created) call_rcu(&obj->rcu, __i915_gem_free_object_rcu); } From b875fb313a10bf816b5d49d8d7642d1cc9905f2f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Aug 2021 12:48:05 -0700 Subject: [PATCH 414/543] drm/i915: Free all DMC payloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Free all the DMC payloads, not just DMC_MAIN. unreferenced object 0xffff88ff32d4d800 (size 1024): comm "kworker/1:5", pid 701, jiffies 4294904239 (age 109.736s) hex dump (first 32 bytes): 40 40 00 0c 03 00 00 00 00 00 00 00 00 00 00 00 @@.............. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000ba9d0d95>] dmc_load_work_fn+0x34d/0x510 [i915] [<000000001049fcab>] process_one_work+0x261/0x550 [<00000000eeb995ac>] worker_thread+0x49/0x3c0 [<0000000021031dc3>] kthread+0x10b/0x140 [<000000004a0f69ee>] ret_from_fork+0x1f/0x30 unreferenced object 0xffff88ff0bde4000 (size 1024): comm "kworker/0:3", pid 708, jiffies 4294904469 (age 108.816s) hex dump (first 32 bytes): 40 40 00 0c 01 00 00 00 00 00 00 00 00 00 00 00 @@.............. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000ba9d0d95>] dmc_load_work_fn+0x34d/0x510 [i915] [<000000001049fcab>] process_one_work+0x261/0x550 [<00000000eeb995ac>] worker_thread+0x49/0x3c0 [<0000000021031dc3>] kthread+0x10b/0x140 [<000000004a0f69ee>] ret_from_fork+0x1f/0x30 Fixes: 3d5928a168a9 ("drm/i915/xelpd: Pipe A DMC plugging") Cc: Anusha Srivatsa Cc: José Roberto de Souza Signed-off-by: Chris Wilson Signed-off-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210809194805.3793060-1-lucas.demarchi@intel.com (cherry picked from commit 064b877dff4252ced91a1c8b1f129073f2991f6e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dmc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 3c3c6cb5c0df..b3c8e1c450ef 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -805,11 +805,14 @@ void intel_dmc_ucode_resume(struct drm_i915_private *dev_priv) */ void intel_dmc_ucode_fini(struct drm_i915_private *dev_priv) { + int id; + if (!HAS_DMC(dev_priv)) return; intel_dmc_ucode_suspend(dev_priv); drm_WARN_ON(&dev_priv->drm, dev_priv->dmc.wakeref); - kfree(dev_priv->dmc.dmc_info[DMC_FW_MAIN].payload); + for (id = 0; id < DMC_FW_MAX; id++) + kfree(dev_priv->dmc.dmc_info[id].payload); } From 8c8a3b5bd960cd88f7655b5251dc28741e11f139 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 15 Sep 2021 12:03:35 -0700 Subject: [PATCH 415/543] arm64: add MTE supported check to thread switching and syscall entry/exit This lets us avoid doing unnecessary work on hardware that does not support MTE, and will allow us to freely use MTE instructions in the code called by mte_thread_switch(). Since this would mean that we do a redundant check in mte_check_tfsr_el1(), remove it and add two checks now required in its callers. This also avoids an unnecessary DSB+ISB sequence on the syscall exit path for hardware not supporting MTE. Fixes: 65812c6921cc ("arm64: mte: Enable async tag check fault") Cc: # 5.13.x Signed-off-by: Peter Collingbourne Link: https://linux-review.googlesource.com/id/I02fd000d1ef2c86c7d2952a7f099b254ec227a5d Link: https://lore.kernel.org/r/20210915190336.398390-1-pcc@google.com [catalin.marinas@arm.com: adjust the commit log slightly] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mte.h | 6 ++++++ arch/arm64/kernel/mte.c | 10 ++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 3f93b9e0b339..02511650cffe 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -99,11 +99,17 @@ void mte_check_tfsr_el1(void); static inline void mte_check_tfsr_entry(void) { + if (!system_supports_mte()) + return; + mte_check_tfsr_el1(); } static inline void mte_check_tfsr_exit(void) { + if (!system_supports_mte()) + return; + /* * The asynchronous faults are sync'ed automatically with * TFSR_EL1 on kernel entry but for exit an explicit dsb() diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 9d314a3bad3b..e5e801bc5312 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -142,12 +142,7 @@ void mte_enable_kernel_async(void) #ifdef CONFIG_KASAN_HW_TAGS void mte_check_tfsr_el1(void) { - u64 tfsr_el1; - - if (!system_supports_mte()) - return; - - tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1); + u64 tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1); if (unlikely(tfsr_el1 & SYS_TFSR_EL1_TF1)) { /* @@ -199,6 +194,9 @@ void mte_thread_init_user(void) void mte_thread_switch(struct task_struct *next) { + if (!system_supports_mte()) + return; + mte_update_sctlr_user(next); /* From 5135e96a3dd2f4555ae6981c3155a62bcf3227f6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 21 Sep 2021 00:42:08 +0300 Subject: [PATCH 416/543] net: dsa: don't allocate the slave_mii_bus using devres The Linux device model permits both the ->shutdown and ->remove driver methods to get called during a shutdown procedure. Example: a DSA switch which sits on an SPI bus, and the SPI bus driver calls this on its ->shutdown method: spi_unregister_controller -> device_for_each_child(&ctlr->dev, NULL, __unregister); -> spi_unregister_device(to_spi_device(dev)); -> device_del(&spi->dev); So this is a simple pattern which can theoretically appear on any bus, although the only other buses on which I've been able to find it are I2C: i2c_del_adapter -> device_for_each_child(&adap->dev, NULL, __unregister_client); -> i2c_unregister_device(client); -> device_unregister(&client->dev); The implication of this pattern is that devices on these buses can be unregistered after having been shut down. The drivers for these devices might choose to return early either from ->remove or ->shutdown if the other callback has already run once, and they might choose that the ->shutdown method should only perform a subset of the teardown done by ->remove (to avoid unnecessary delays when rebooting). So in other words, the device driver may choose on ->remove to not do anything (therefore to not unregister an MDIO bus it has registered on ->probe), because this ->remove is actually triggered by the device_shutdown path, and its ->shutdown method has already run and done the minimally required cleanup. This used to be fine until the blamed commit, but now, the following BUG_ON triggers: void mdiobus_free(struct mii_bus *bus) { /* For compatibility with error handling in drivers. */ if (bus->state == MDIOBUS_ALLOCATED) { kfree(bus); return; } BUG_ON(bus->state != MDIOBUS_UNREGISTERED); bus->state = MDIOBUS_RELEASED; put_device(&bus->dev); } In other words, there is an attempt to free an MDIO bus which was not unregistered. The attempt to free it comes from the devres release callbacks of the SPI device, which are executed after the device is unregistered. I'm not saying that the fact that MDIO buses allocated using devres would automatically get unregistered wasn't strange. I'm just saying that the commit didn't care about auditing existing call paths in the kernel, and now, the following code sequences are potentially buggy: (a) devm_mdiobus_alloc followed by plain mdiobus_register, for a device located on a bus that unregisters its children on shutdown. After the blamed patch, either both the alloc and the register should use devres, or none should. (b) devm_mdiobus_alloc followed by plain mdiobus_register, and then no mdiobus_unregister at all in the remove path. After the blamed patch, nobody unregisters the MDIO bus anymore, so this is even more buggy than the previous case which needs a specific bus configuration to be seen, this one is an unconditional bug. In this case, DSA falls into category (a), it tries to be helpful and registers an MDIO bus on behalf of the switch, which might be on such a bus. I've no idea why it does it under devres. It does this on probe: if (!ds->slave_mii_bus && ds->ops->phy_read) alloc and register mdio bus and this on remove: if (ds->slave_mii_bus && ds->ops->phy_read) unregister mdio bus I _could_ imagine using devres because the condition used on remove is different than the condition used on probe. So strictly speaking, DSA cannot determine whether the ds->slave_mii_bus it sees on remove is the ds->slave_mii_bus that _it_ has allocated on probe. Using devres would have solved that problem. But nonetheless, the existing code already proceeds to unregister the MDIO bus, even though it might be unregistering an MDIO bus it has never registered. So I can only guess that no driver that implements ds->ops->phy_read also allocates and registers ds->slave_mii_bus itself. So in that case, if unregistering is fine, freeing must be fine too. Stop using devres and free the MDIO bus manually. This will make devres stop attempting to free a still registered MDIO bus on ->shutdown. Fixes: ac3a68d56651 ("net: phy: don't abuse devres in devm_mdiobus_register()") Reported-by: Lino Sanfilippo Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Tested-by: Lino Sanfilippo Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index f54639a3a822..b29262eee00b 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -880,7 +880,7 @@ static int dsa_switch_setup(struct dsa_switch *ds) devlink_params_publish(ds->devlink); if (!ds->slave_mii_bus && ds->ops->phy_read) { - ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); + ds->slave_mii_bus = mdiobus_alloc(); if (!ds->slave_mii_bus) { err = -ENOMEM; goto teardown; @@ -890,13 +890,16 @@ static int dsa_switch_setup(struct dsa_switch *ds) err = mdiobus_register(ds->slave_mii_bus); if (err < 0) - goto teardown; + goto free_slave_mii_bus; } ds->setup = true; return 0; +free_slave_mii_bus: + if (ds->slave_mii_bus && ds->ops->phy_read) + mdiobus_free(ds->slave_mii_bus); teardown: if (ds->ops->teardown) ds->ops->teardown(ds); @@ -921,8 +924,11 @@ static void dsa_switch_teardown(struct dsa_switch *ds) if (!ds->setup) return; - if (ds->slave_mii_bus && ds->ops->phy_read) + if (ds->slave_mii_bus && ds->ops->phy_read) { mdiobus_unregister(ds->slave_mii_bus); + mdiobus_free(ds->slave_mii_bus); + ds->slave_mii_bus = NULL; + } dsa_switch_unregister_notifier(ds); From 74b6d7d13307b016f4b5bba8198297824c0ee6df Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 21 Sep 2021 00:42:09 +0300 Subject: [PATCH 417/543] net: dsa: realtek: register the MDIO bus under devres MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Linux device model permits both the ->shutdown and ->remove driver methods to get called during a shutdown procedure. Example: a DSA switch which sits on an SPI bus, and the SPI bus driver calls this on its ->shutdown method: spi_unregister_controller -> device_for_each_child(&ctlr->dev, NULL, __unregister); -> spi_unregister_device(to_spi_device(dev)); -> device_del(&spi->dev); So this is a simple pattern which can theoretically appear on any bus, although the only other buses on which I've been able to find it are I2C: i2c_del_adapter -> device_for_each_child(&adap->dev, NULL, __unregister_client); -> i2c_unregister_device(client); -> device_unregister(&client->dev); The implication of this pattern is that devices on these buses can be unregistered after having been shut down. The drivers for these devices might choose to return early either from ->remove or ->shutdown if the other callback has already run once, and they might choose that the ->shutdown method should only perform a subset of the teardown done by ->remove (to avoid unnecessary delays when rebooting). So in other words, the device driver may choose on ->remove to not do anything (therefore to not unregister an MDIO bus it has registered on ->probe), because this ->remove is actually triggered by the device_shutdown path, and its ->shutdown method has already run and done the minimally required cleanup. This used to be fine until the blamed commit, but now, the following BUG_ON triggers: void mdiobus_free(struct mii_bus *bus) { /* For compatibility with error handling in drivers. */ if (bus->state == MDIOBUS_ALLOCATED) { kfree(bus); return; } BUG_ON(bus->state != MDIOBUS_UNREGISTERED); bus->state = MDIOBUS_RELEASED; put_device(&bus->dev); } In other words, there is an attempt to free an MDIO bus which was not unregistered. The attempt to free it comes from the devres release callbacks of the SPI device, which are executed after the device is unregistered. I'm not saying that the fact that MDIO buses allocated using devres would automatically get unregistered wasn't strange. I'm just saying that the commit didn't care about auditing existing call paths in the kernel, and now, the following code sequences are potentially buggy: (a) devm_mdiobus_alloc followed by plain mdiobus_register, for a device located on a bus that unregisters its children on shutdown. After the blamed patch, either both the alloc and the register should use devres, or none should. (b) devm_mdiobus_alloc followed by plain mdiobus_register, and then no mdiobus_unregister at all in the remove path. After the blamed patch, nobody unregisters the MDIO bus anymore, so this is even more buggy than the previous case which needs a specific bus configuration to be seen, this one is an unconditional bug. In this case, the Realtek drivers fall under category (b). To solve it, we can register the MDIO bus under devres too, which restores the previous behavior. Fixes: ac3a68d56651 ("net: phy: don't abuse devres in devm_mdiobus_register()") Reported-by: Lino Sanfilippo Reported-by: Alvin Šipraga Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/realtek-smi-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/realtek-smi-core.c b/drivers/net/dsa/realtek-smi-core.c index dd2f0d6208b3..2fcfd917b876 100644 --- a/drivers/net/dsa/realtek-smi-core.c +++ b/drivers/net/dsa/realtek-smi-core.c @@ -368,7 +368,7 @@ int realtek_smi_setup_mdio(struct realtek_smi *smi) smi->slave_mii_bus->parent = smi->dev; smi->ds->slave_mii_bus = smi->slave_mii_bus; - ret = of_mdiobus_register(smi->slave_mii_bus, mdio_np); + ret = devm_of_mdiobus_register(smi->dev, smi->slave_mii_bus, mdio_np); if (ret) { dev_err(smi->dev, "unable to register MDIO bus %s\n", smi->slave_mii_bus->id); From 1bb30b20b49773369c299d4d6c65227201328663 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Sep 2021 16:13:42 +0300 Subject: [PATCH 418/543] thermal/core: Potential buffer overflow in thermal_build_list_of_policies() After printing the list of thermal governors, then this function prints a newline character. The problem is that "size" has not been updated after printing the last governor. This means that it can write one character (the NUL terminator) beyond the end of the buffer. Get rid of the "size" variable and just use "PAGE_SIZE - count" directly. Fixes: 1b4f48494eb2 ("thermal: core: group functions related to governor handling") Signed-off-by: Dan Carpenter Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210916131342.GB25094@kili --- drivers/thermal/thermal_core.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 97ef9b040b84..51374f4e1cca 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -222,15 +222,14 @@ int thermal_build_list_of_policies(char *buf) { struct thermal_governor *pos; ssize_t count = 0; - ssize_t size = PAGE_SIZE; mutex_lock(&thermal_governor_lock); list_for_each_entry(pos, &thermal_governor_list, governor_list) { - size = PAGE_SIZE - count; - count += scnprintf(buf + count, size, "%s ", pos->name); + count += scnprintf(buf + count, PAGE_SIZE - count, "%s ", + pos->name); } - count += scnprintf(buf + count, size, "\n"); + count += scnprintf(buf + count, PAGE_SIZE - count, "\n"); mutex_unlock(&thermal_governor_lock); From cf96921876dcee4d6ac07b9de470368a075ba9ad Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Tue, 7 Sep 2021 23:25:42 +0200 Subject: [PATCH 419/543] thermal/drivers/tsens: Fix wrong check for tzd in irq handlers Some devices can have some thermal sensors disabled from the factory. The current two irq handler functions check all the sensor by default and the check if the sensor was actually registered is wrong. The tzd is actually never set if the registration fails hence the IS_ERR check is wrong. Signed-off-by: Ansuel Smith Reviewed-by: Matthias Kaehlcke Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210907212543.20220-1-ansuelsmth@gmail.com --- drivers/thermal/qcom/tsens.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c index 4c7ebd1d3f9c..b1162e566a70 100644 --- a/drivers/thermal/qcom/tsens.c +++ b/drivers/thermal/qcom/tsens.c @@ -417,7 +417,7 @@ static irqreturn_t tsens_critical_irq_thread(int irq, void *data) const struct tsens_sensor *s = &priv->sensor[i]; u32 hw_id = s->hw_id; - if (IS_ERR(s->tzd)) + if (!s->tzd) continue; if (!tsens_threshold_violated(priv, hw_id, &d)) continue; @@ -467,7 +467,7 @@ static irqreturn_t tsens_irq_thread(int irq, void *data) const struct tsens_sensor *s = &priv->sensor[i]; u32 hw_id = s->hw_id; - if (IS_ERR(s->tzd)) + if (!s->tzd) continue; if (!tsens_threshold_violated(priv, hw_id, &d)) continue; From 5b72dafaca73b33416c82457ae615e6f2022e901 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Sep 2021 21:48:29 -0700 Subject: [PATCH 420/543] platform/x86: dell: fix DELL_WMI_PRIVACY dependencies & build error When DELL_WMI=y, DELL_WMI_PRIVACY=y, and LEDS_TRIGGER_AUDIO=m, there is a linker error since the LEDS trigger code is built as a loadable module. This happens because DELL_WMI_PRIVACY is a bool that depends on a tristate (LEDS_TRIGGER_AUDIO=m), which can be dangerous. ld: drivers/platform/x86/dell/dell-wmi-privacy.o: in function `dell_privacy_wmi_probe': dell-wmi-privacy.c:(.text+0x3df): undefined reference to `ledtrig_audio_get' Fixes: 8af9fa37b8a3 ("platform/x86: dell-privacy: Add support for Dell hardware privacy") Signed-off-by: Randy Dunlap Cc: Perry Yuan Cc: Dell.Client.Kernel@dell.com Cc: platform-driver-x86@vger.kernel.org Cc: Hans de Goede Cc: Mark Gross Link: https://lore.kernel.org/r/20210918044829.19222-1-rdunlap@infradead.org Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/dell/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/platform/x86/dell/Kconfig b/drivers/platform/x86/dell/Kconfig index 821aba31821c..42513eab1d06 100644 --- a/drivers/platform/x86/dell/Kconfig +++ b/drivers/platform/x86/dell/Kconfig @@ -166,8 +166,7 @@ config DELL_WMI config DELL_WMI_PRIVACY bool "Dell WMI Hardware Privacy Support" - depends on DELL_WMI - depends on LEDS_TRIGGER_AUDIO + depends on LEDS_TRIGGER_AUDIO = y || DELL_WMI = LEDS_TRIGGER_AUDIO help This option adds integration with the "Dell Hardware Privacy" feature of Dell laptops to the dell-wmi driver. From b201cb0ebe87b209e252d85668e517ac1929e250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Mon, 20 Sep 2021 18:03:12 +0200 Subject: [PATCH 421/543] platform/x86/intel: hid: Add DMI switches allow list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some devices, even non convertible ones, can send incorrect SW_TABLET_MODE reports. Add an allow list and accept such reports only from devices in it. Bug reported for Dell XPS 17 9710 on: https://gitlab.freedesktop.org/libinput/libinput/-/issues/662 Reported-by: Tobias Gurtzick Suggested-by: Hans de Goede Tested-by: Tobias Gurtzick Signed-off-by: José Expósito Link: https://lore.kernel.org/r/20210920160312.9787-1-jose.exposito89@gmail.com [hdegoede@redhat.com: Check dmi_switches_auto_add_allow_list only once] Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/hid.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index a33a5826e81a..08598942a6d7 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -118,12 +118,30 @@ static const struct dmi_system_id dmi_vgbs_allow_list[] = { { } }; +/* + * Some devices, even non convertible ones, can send incorrect SW_TABLET_MODE + * reports. Accept such reports only from devices in this list. + */ +static const struct dmi_system_id dmi_auto_add_switch[] = { + { + .matches = { + DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "31" /* Convertible */), + }, + }, + { + .matches = { + DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "32" /* Detachable */), + }, + }, + {} /* Array terminator */ +}; + struct intel_hid_priv { struct input_dev *input_dev; struct input_dev *array; struct input_dev *switches; bool wakeup_mode; - bool dual_accel; + bool auto_add_switch; }; #define HID_EVENT_FILTER_UUID "eeec56b3-4442-408f-a792-4edd4d758054" @@ -452,10 +470,8 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) * Some convertible have unreliable VGBS return which could cause incorrect * SW_TABLET_MODE report, in these cases we enable support when receiving * the first event instead of during driver setup. - * - * See dual_accel_detect.h for more info on the dual_accel check. */ - if (!priv->switches && !priv->dual_accel && (event == 0xcc || event == 0xcd)) { + if (!priv->switches && priv->auto_add_switch && (event == 0xcc || event == 0xcd)) { dev_info(&device->dev, "switch event received, enable switches supports\n"); err = intel_hid_switches_setup(device); if (err) @@ -596,7 +612,8 @@ static int intel_hid_probe(struct platform_device *device) return -ENOMEM; dev_set_drvdata(&device->dev, priv); - priv->dual_accel = dual_accel_detect(); + /* See dual_accel_detect.h for more info on the dual_accel check. */ + priv->auto_add_switch = dmi_check_system(dmi_auto_add_switch) && !dual_accel_detect(); err = intel_hid_input_setup(device); if (err) { From 6f6aab1caf6c7fef46852aaab03f4e8250779e52 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Tue, 21 Sep 2021 12:07:02 +0200 Subject: [PATCH 422/543] platform/x86: gigabyte-wmi: add support for B550I Aorus Pro AX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tested with a AMD Ryzen 7 5800X. Signed-off-by: Tobias Jakobi Acked-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20210921100702.3838-1-tjakobi@math.uni-bielefeld.de Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 7f3a03f937f6..d53634c8a6e0 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -144,6 +144,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550I AORUS PRO AX"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"), From 59a68d4138086c015ab8241c3267eec5550fbd44 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 17 Sep 2021 15:59:30 +0100 Subject: [PATCH 423/543] arm64: Mitigate MTE issues with str{n}cmp() As with strlen(), the patches importing the updated str{n}cmp() implementations were originally developed and tested before the advent of CONFIG_KASAN_HW_TAGS, and have subsequently revealed not to be MTE-safe. Since in-kernel MTE is still a rather niche case, let it temporarily fall back to the generic C versions for correctness until we can figure out the best fix. Fixes: 758602c04409 ("arm64: Import latest version of Cortex Strings' strcmp") Fixes: 020b199bc70d ("arm64: Import latest version of Cortex Strings' strncmp") Cc: # 5.14.x Reported-by: Branislav Rankov Signed-off-by: Robin Murphy Acked-by: Mark Rutland Link: https://lore.kernel.org/r/34dc4d12eec0adae49b0ac927df642ed10089d40.1631890770.git.robin.murphy@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 5 +++++ arch/arm64/include/asm/string.h | 2 ++ arch/arm64/lib/strcmp.S | 2 +- arch/arm64/lib/strncmp.S | 2 +- 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 89faca0e740d..bfa58409a4d4 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -525,6 +525,11 @@ alternative_endif #define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name) #endif +#ifdef CONFIG_KASAN_HW_TAGS +#define EXPORT_SYMBOL_NOHWKASAN(name) +#else +#define EXPORT_SYMBOL_NOHWKASAN(name) EXPORT_SYMBOL_NOKASAN(name) +#endif /* * Emit a 64-bit absolute little endian symbol reference in a way that * ensures that it will be resolved at build time, even when building a diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index 3a3264ff47b9..95f7686b728d 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -12,11 +12,13 @@ extern char *strrchr(const char *, int c); #define __HAVE_ARCH_STRCHR extern char *strchr(const char *, int c); +#ifndef CONFIG_KASAN_HW_TAGS #define __HAVE_ARCH_STRCMP extern int strcmp(const char *, const char *); #define __HAVE_ARCH_STRNCMP extern int strncmp(const char *, const char *, __kernel_size_t); +#endif #define __HAVE_ARCH_STRLEN extern __kernel_size_t strlen(const char *); diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index d7bee210a798..83bcad72ec97 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -173,4 +173,4 @@ L(done): ret SYM_FUNC_END_PI(strcmp) -EXPORT_SYMBOL_NOKASAN(strcmp) +EXPORT_SYMBOL_NOHWKASAN(strcmp) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index 48d44f7fddb1..e42bcfcd37e6 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -258,4 +258,4 @@ L(ret0): ret SYM_FUNC_END_PI(strncmp) -EXPORT_SYMBOL_NOKASAN(strncmp) +EXPORT_SYMBOL_NOHWKASAN(strncmp) From d9d1232b48344c6c72dbdf89fae1e7638e5df757 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Sep 2021 13:57:36 +0200 Subject: [PATCH 424/543] misc: bcm-vk: fix tty registration race Make sure to set the tty class-device driver data before registering the tty to avoid having a racing open() dereference a NULL pointer. Fixes: 91ca10d6fa07 ("misc: bcm-vk: add ttyVK support") Cc: stable@vger.kernel.org # 5.12 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210917115736.5816-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/bcm-vk/bcm_vk_tty.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/bcm-vk/bcm_vk_tty.c b/drivers/misc/bcm-vk/bcm_vk_tty.c index 1b6076a89ca6..6669625ba4c8 100644 --- a/drivers/misc/bcm-vk/bcm_vk_tty.c +++ b/drivers/misc/bcm-vk/bcm_vk_tty.c @@ -267,13 +267,13 @@ int bcm_vk_tty_init(struct bcm_vk *vk, char *name) struct device *tty_dev; tty_port_init(&vk->tty[i].port); - tty_dev = tty_port_register_device(&vk->tty[i].port, tty_drv, - i, dev); + tty_dev = tty_port_register_device_attr(&vk->tty[i].port, + tty_drv, i, dev, vk, + NULL); if (IS_ERR(tty_dev)) { err = PTR_ERR(tty_dev); goto unwind; } - dev_set_drvdata(tty_dev, vk); vk->tty[i].is_opened = false; } From ce1c42b4dacfe7d71c852d8bf3371067ccba865c Mon Sep 17 00:00:00 2001 From: Julian Sikorski Date: Mon, 13 Sep 2021 20:14:55 +0200 Subject: [PATCH 425/543] Re-enable UAS for LaCie Rugged USB3-FW with fk quirk Further testing has revealed that LaCie Rugged USB3-FW does work with uas as long as US_FL_NO_REPORT_OPCODES and US_FL_NO_SAME are enabled. Link: https://lore.kernel.org/linux-usb/2167ea48-e273-a336-a4e0-10a4e883e75e@redhat.com/ Cc: stable Suggested-by: Hans de Goede Reviewed-by: Hans de Goede Acked-by: Oliver Neukum Signed-off-by: Julian Sikorski Link: https://lore.kernel.org/r/20210913181454.7365-1-belegdol+github@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index c35a6db993f1..4051c8cd0cd8 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -50,7 +50,7 @@ UNUSUAL_DEV(0x059f, 0x1061, 0x0000, 0x9999, "LaCie", "Rugged USB3-FW", USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_IGNORE_UAS), + US_FL_NO_REPORT_OPCODES | US_FL_NO_SAME), /* * Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI From b55d37ef6b7db3eda9b4495a8d9b0a944ee8c67d Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Mon, 13 Sep 2021 23:01:06 +0200 Subject: [PATCH 426/543] usb-storage: Add quirk for ScanLogic SL11R-IDE older than 2.6c ScanLogic SL11R-IDE with firmware older than 2.6c (the latest one) has broken tag handling, preventing the device from working at all: usb 1-1: new full-speed USB device number 2 using uhci_hcd usb 1-1: New USB device found, idVendor=04ce, idProduct=0002, bcdDevice= 2.60 usb 1-1: New USB device strings: Mfr=1, Product=1, SerialNumber=0 usb 1-1: Product: USB Device usb 1-1: Manufacturer: USB Device usb-storage 1-1:1.0: USB Mass Storage device detected scsi host2: usb-storage 1-1:1.0 usbcore: registered new interface driver usb-storage usb 1-1: reset full-speed USB device number 2 using uhci_hcd usb 1-1: reset full-speed USB device number 2 using uhci_hcd usb 1-1: reset full-speed USB device number 2 using uhci_hcd usb 1-1: reset full-speed USB device number 2 using uhci_hcd Add US_FL_BULK_IGNORE_TAG to fix it. Also update my e-mail address. 2.6c is the only firmware that claims Linux compatibility. The firmware can be upgraded using ezotgdbg utility: https://github.com/asciilifeform/ezotgdbg Acked-by: Alan Stern Signed-off-by: Ondrej Zary Cc: stable Link: https://lore.kernel.org/r/20210913210106.12717-1-linux@zary.sk Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index efa972be2ee3..c6b3fcf90180 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -416,9 +416,16 @@ UNUSUAL_DEV( 0x04cb, 0x0100, 0x0000, 0x2210, USB_SC_UFI, USB_PR_DEVICE, NULL, US_FL_FIX_INQUIRY | US_FL_SINGLE_LUN), /* - * Reported by Ondrej Zary + * Reported by Ondrej Zary * The device reports one sector more and breaks when that sector is accessed + * Firmwares older than 2.6c (the latest one and the only that claims Linux + * support) have also broken tag handling */ +UNUSUAL_DEV( 0x04ce, 0x0002, 0x0000, 0x026b, + "ScanLogic", + "SL11R-IDE", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_FIX_CAPACITY | US_FL_BULK_IGNORE_TAG), UNUSUAL_DEV( 0x04ce, 0x0002, 0x026c, 0x026c, "ScanLogic", "SL11R-IDE", From 517c7bf99bad3d6b9360558414aae634b7472d80 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Sep 2021 16:57:37 +0300 Subject: [PATCH 427/543] usb: musb: tusb6010: uninitialized data in tusb_fifo_write_unaligned() This is writing to the first 1 - 3 bytes of "val" and then writing all four bytes to musb_writel(). The last byte is always going to be garbage. Zero out the last bytes instead. Fixes: 550a7375fe72 ("USB: Add MUSB and TUSB support") Signed-off-by: Dan Carpenter Cc: stable Link: https://lore.kernel.org/r/20210916135737.GI25094@kili Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/tusb6010.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c index c42937692207..c968ecda42aa 100644 --- a/drivers/usb/musb/tusb6010.c +++ b/drivers/usb/musb/tusb6010.c @@ -190,6 +190,7 @@ tusb_fifo_write_unaligned(void __iomem *fifo, const u8 *buf, u16 len) } if (len > 0) { /* Write the rest 1 - 3 bytes to FIFO */ + val = 0; memcpy(&val, buf, len); musb_writel(fifo, 0, val); } From d5f6545934c47e97c0b48a645418e877b452a992 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 20 Sep 2021 10:26:21 -0700 Subject: [PATCH 428/543] qnx4: work around gcc false positive warning bug In commit b7213ffa0e58 ("qnx4: avoid stringop-overread errors") I tried to teach gcc about how the directory entry structure can be two different things depending on a status flag. It made the code clearer, and it seemed to make gcc happy. However, Arnd points to a gcc bug, where despite using two different members of a union, gcc then gets confused, and uses the size of one of the members to decide if a string overrun happens. And not necessarily the rigth one. End result: with some configurations, gcc-11 will still complain about the source buffer size being overread: fs/qnx4/dir.c: In function 'qnx4_readdir': fs/qnx4/dir.c:76:32: error: 'strnlen' specified bound [16, 48] exceeds source size 1 [-Werror=stringop-overread] 76 | size = strnlen(name, size); | ^~~~~~~~~~~~~~~~~~~ fs/qnx4/dir.c:26:22: note: source object declared here 26 | char de_name; | ^~~~~~~ because gcc will get confused about which union member entry is actually getting accessed, even when the source code is very clear about it. Gcc internally will have combined two "redundant" pointers (pointing to different union elements that are at the same offset), and takes the size checking from one or the other - not necessarily the right one. This is clearly a gcc bug, but we can work around it fairly easily. The biggest thing here is the big honking comment about why we do what we do. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578#c6 Reported-and-tested-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- fs/qnx4/dir.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index 2a66844b7ff8..66645a5a35f3 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -20,12 +20,33 @@ * depending on the status field in the last byte. The * first byte is where the name start either way, and a * zero means it's empty. + * + * Also, due to a bug in gcc, we don't want to use the + * real (differently sized) name arrays in the inode and + * link entries, but always the 'de_name[]' one in the + * fake struct entry. + * + * See + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578#c6 + * + * for details, but basically gcc will take the size of the + * 'name' array from one of the used union entries randomly. + * + * This use of 'de_name[]' (48 bytes) avoids the false positive + * warnings that would happen if gcc decides to use 'inode.di_name' + * (16 bytes) even when the pointer and size were to come from + * 'link.dl_name' (48 bytes). + * + * In all cases the actual name pointer itself is the same, it's + * only the gcc internal 'what is the size of this field' logic + * that can get confused. */ union qnx4_directory_entry { struct { - char de_name; - char de_pad[62]; - char de_status; + const char de_name[48]; + u8 de_pad[15]; + u8 de_status; }; struct qnx4_inode_entry inode; struct qnx4_link_info link; @@ -53,29 +74,26 @@ static int qnx4_readdir(struct file *file, struct dir_context *ctx) ix = (ctx->pos >> QNX4_DIR_ENTRY_SIZE_BITS) % QNX4_INODES_PER_BLOCK; for (; ix < QNX4_INODES_PER_BLOCK; ix++, ctx->pos += QNX4_DIR_ENTRY_SIZE) { union qnx4_directory_entry *de; - const char *name; offset = ix * QNX4_DIR_ENTRY_SIZE; de = (union qnx4_directory_entry *) (bh->b_data + offset); - if (!de->de_name) + if (!de->de_name[0]) continue; if (!(de->de_status & (QNX4_FILE_USED|QNX4_FILE_LINK))) continue; if (!(de->de_status & QNX4_FILE_LINK)) { size = sizeof(de->inode.di_fname); - name = de->inode.di_fname; ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; } else { size = sizeof(de->link.dl_fname); - name = de->link.dl_fname; ino = ( le32_to_cpu(de->link.dl_inode_blk) - 1 ) * QNX4_INODES_PER_BLOCK + de->link.dl_inode_ndx; } - size = strnlen(name, size); + size = strnlen(de->de_name, size); QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, name)); - if (!dir_emit(ctx, name, size, ino, DT_UNKNOWN)) { + if (!dir_emit(ctx, de->de_name, size, ino, DT_UNKNOWN)) { brelse(bh); return 0; } From 7af526c740bdbd5b4dcebba04ace5b3b0c07801f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 14 Sep 2021 11:29:49 +0200 Subject: [PATCH 429/543] nvmem: NVMEM_NINTENDO_OTP should depend on WII The Nintendo Wii and Wii U OTP is only present on Nintendo Wii and Wii U consoles. Hence add a dependency on WII, to prevent asking the user about this driver when configuring a kernel without Nintendo Wii and Wii U console support. Fixes: 3683b761fe3a10ad ("nvmem: nintendo-otp: Add new driver for the Wii and Wii U OTP") Reviewed-by: Emmanuel Gil Peyrot Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/01318920709dddc4d85fe895e2083ca0eee234d8.1631611652.git.geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvmem/Kconfig b/drivers/nvmem/Kconfig index 39854d43758b..da414617a54d 100644 --- a/drivers/nvmem/Kconfig +++ b/drivers/nvmem/Kconfig @@ -109,6 +109,7 @@ config MTK_EFUSE config NVMEM_NINTENDO_OTP tristate "Nintendo Wii and Wii U OTP Support" + depends on WII || COMPILE_TEST help This is a driver exposing the OTP of a Nintendo Wii or Wii U console. From 708c87168b6121abc74b2a57d0c498baaf70cbea Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 6 Sep 2021 12:43:01 +0300 Subject: [PATCH 430/543] ceph: fix off by one bugs in unsafe_request_wait() The "> max" tests should be ">= max" to prevent an out of bounds access on the next lines. Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs") Signed-off-by: Dan Carpenter Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 6c0e52fd0743..3e42d0466521 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2263,7 +2263,7 @@ retry: list_for_each_entry(req, &ci->i_unsafe_dirops, r_unsafe_dir_item) { s = req->r_session; - if (unlikely(s->s_mds > max)) { + if (unlikely(s->s_mds >= max)) { spin_unlock(&ci->i_unsafe_lock); goto retry; } @@ -2277,7 +2277,7 @@ retry: list_for_each_entry(req, &ci->i_unsafe_iops, r_unsafe_target_item) { s = req->r_session; - if (unlikely(s->s_mds > max)) { + if (unlikely(s->s_mds >= max)) { spin_unlock(&ci->i_unsafe_lock); goto retry; } From bb509a6ffed2c8b0950f637ab5779aa818ed1596 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Thu, 16 Sep 2021 15:50:23 +0100 Subject: [PATCH 431/543] comedi: Fix memory leak in compat_insnlist() `compat_insnlist()` handles the 32-bit version of the `COMEDI_INSNLIST` ioctl (whenwhen `CONFIG_COMPAT` is enabled). It allocates memory to temporarily hold an array of `struct comedi_insn` converted from the 32-bit version in user space. This memory is only being freed if there is a fault while filling the array, otherwise it is leaked. Add a call to `kfree()` to fix the leak. Fixes: b8d47d881305 ("comedi: get rid of compat_alloc_user_space() mess in COMEDI_INSNLIST compat") Cc: Al Viro Cc: Greg Kroah-Hartman Cc: linux-staging@lists.linux.dev Cc: # 5.13+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210916145023.157479-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/comedi_fops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c index df77b6bf5c64..763cea8418f8 100644 --- a/drivers/comedi/comedi_fops.c +++ b/drivers/comedi/comedi_fops.c @@ -3090,6 +3090,7 @@ static int compat_insnlist(struct file *file, unsigned long arg) mutex_lock(&dev->mutex); rc = do_insnlist_ioctl(dev, insns, insnlist32.n_insns, file); mutex_unlock(&dev->mutex); + kfree(insns); return rc; } From 0e3dbf765fe22060acbcb8eb8c4d256e655a1247 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 20 Sep 2021 13:12:28 +0100 Subject: [PATCH 432/543] kselftest/arm64: signal: Skip tests if required features are missing During initialization of a signal testcase, features declared as required are properly checked against the running system but no action is then taken to effectively skip such a testcase. Fix core signals test logic to abort initialization and report such a testcase as skipped to the KSelfTest framework. Fixes: f96bf4340316 ("kselftest: arm64: mangle_pstate_invalid_compat_toggle and common utils") Signed-off-by: Cristian Marussi Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210920121228.35368-1-cristian.marussi@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/signal/test_signals_utils.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 6836510a522f..22722abc9dfa 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -266,16 +266,19 @@ int test_init(struct tdescr *td) td->feats_supported |= FEAT_SSBS; if (getauxval(AT_HWCAP) & HWCAP_SVE) td->feats_supported |= FEAT_SVE; - if (feats_ok(td)) + if (feats_ok(td)) { fprintf(stderr, "Required Features: [%s] supported\n", feats_to_string(td->feats_required & td->feats_supported)); - else + } else { fprintf(stderr, "Required Features: [%s] NOT supported\n", feats_to_string(td->feats_required & ~td->feats_supported)); + td->result = KSFT_SKIP; + return 0; + } } /* Perform test specific additional initialization */ From e44fd5081c50b0ffdb75ce6c83452e60173d791b Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 20 Sep 2021 19:01:42 -0500 Subject: [PATCH 433/543] ksmbd: log that server is experimental at module load While we are working through detailed security reviews of ksmbd server code we should remind users that it is an experimental module by adding a warning when the module loads. Currently the module shows as experimental in Kconfig and is disabled by default, but we don't want to confuse users. Although ksmbd passes a wide variety of the important functional tests (since initial focus had been largely on functional testing such as smbtorture, xfstests etc.), and ksmbd has added key security features (e.g. GCM256 encryption, Kerberos support), there are ongoing detailed reviews of the code base for path processing and network buffer decoding, and this patch reminds users that the module should be considered "experimental." Reviewed-by: Namjae Jeon Reviewed-by: Paulo Alcantara (SUSE) Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/ksmbd/server.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c index e6a9f6aa47eb..2a2b2135bfde 100644 --- a/fs/ksmbd/server.c +++ b/fs/ksmbd/server.c @@ -584,6 +584,9 @@ static int __init ksmbd_server_init(void) ret = ksmbd_workqueue_init(); if (ret) goto err_crypto_destroy; + + pr_warn_once("The ksmbd server is experimental, use at your own risk.\n"); + return 0; err_crypto_destroy: From 9f6323311c7064414bfd1edb28e0837baf6b3c7f Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 18 Sep 2021 21:02:39 +0900 Subject: [PATCH 434/543] ksmbd: add default data stream name in FILE_STREAM_INFORMATION Windows client expect to get default stream name(::DATA) in FILE_STREAM_INFORMATION response even if there is no stream data in file. This patch fix update failure when writing ppt or doc files. Signed-off-by: Namjae Jeon Reviewed-By: Tom Talpey Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 6304c9bda479..f59f9b8be51c 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -4428,17 +4428,15 @@ static void get_file_stream_info(struct ksmbd_work *work, file_info->NextEntryOffset = cpu_to_le32(next); } - if (nbytes) { + if (!S_ISDIR(stat.mode)) { file_info = (struct smb2_file_stream_info *) &rsp->Buffer[nbytes]; streamlen = smbConvertToUTF16((__le16 *)file_info->StreamName, "::$DATA", 7, conn->local_nls, 0); streamlen *= 2; file_info->StreamNameLength = cpu_to_le32(streamlen); - file_info->StreamSize = S_ISDIR(stat.mode) ? 0 : - cpu_to_le64(stat.size); - file_info->StreamAllocationSize = S_ISDIR(stat.mode) ? 0 : - cpu_to_le64(stat.size); + file_info->StreamSize = 0; + file_info->StreamAllocationSize = 0; nbytes += sizeof(struct smb2_file_stream_info) + streamlen; } From 96c8395e2166efa86082f3b71567ffd84936439b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 21 Sep 2021 15:44:06 +0100 Subject: [PATCH 435/543] spi: Revert modalias changes During the v5.13 cycle we updated the SPI subsystem to generate OF style modaliases for SPI devices, replacing the old Linux style modalises we used to generate based on spi_device_id which are the DT style name with the vendor removed. Unfortunately this means that we start only reporting OF style modalises and not the old ones and there is nothing that ensures that drivers list every possible OF compatible string in their OF ID table. The result is that there are systems which have been relying on loading modules based on the old style that are now broken, as found by Russell King with spi-nor on Macchiatobin. spi-nor is a particularly problematic case for this, it only lists a single generic DT compatible jedec,spi-nor in the driver but supports a huge raft of device specific compatibles, with a large set of part numbers many of which are offered by multiple vendors. Russell's searches of upstream device trees has turned up examples with vendor names written in non-standard ways too. To make matters worse up until 8ff16cf77ce3 ("Documentation: devicetree: m25p80: add "nor-jedec" binding") the generic compatible was not part of the binding so there are device trees out there written to that binding version which don't list it all. The sheer number of parts supported together with our previous approach of ignoring the vendor ID makes robustly fixing this by adding compatibles to the spi-nor driver seem problematic, the current DT binding document does not list all the parts supported by the driver at the minute (further patches will fix this). I've also investigated supporting both formats of modalias simultaneously but that doesn't seem possible, especially without breaking our userspace ABI which is obviously not viable. Instead revert the relevant changes for now: e09f2ab8eecc ("spi: update modalias_show after of_device_uevent_modalias support") 3ce6c9e2617e ("spi: add of_device_uevent_modalias support") This will unfortunately mean that any system which had started having modules autoload based on the OF compatibles for drivers that list things there but not in the spi_device_ids will now not have those modules load which is itself a regression. Since it affects a narrower time window and the particularly problematic spi-nor driver may be critical to system boot on smaller systems this seems the best of a series of bad options. I will start an audit of SPI drivers to identify and fix cases where things won't autoload using spi_device_id, this is not great but seems to be the best way forward that anyone has been able to identify. Thanks to Russell for both his report and the additional diagnostic and analysis work he has done here, the detailed research above was his work. Fixes: e09f2ab8eecc ("spi: update modalias_show after of_device_uevent_modalias support") Fixes: 3ce6c9e2617e ("spi: add of_device_uevent_modalias support") Reported-by: Russell King (Oracle) Suggested-by: Russell King (Oracle) Signed-off-by: Mark Brown Tested-by: Russell King (Oracle) Cc: Andreas Schwab Cc: Marco Felsch --- drivers/spi/spi.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 57e2499ec1ed..aea037c65985 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -58,10 +58,6 @@ modalias_show(struct device *dev, struct device_attribute *a, char *buf) const struct spi_device *spi = to_spi_device(dev); int len; - len = of_device_modalias(dev, buf, PAGE_SIZE); - if (len != -ENODEV) - return len; - len = acpi_device_modalias(dev, buf, PAGE_SIZE - 1); if (len != -ENODEV) return len; @@ -367,10 +363,6 @@ static int spi_uevent(struct device *dev, struct kobj_uevent_env *env) const struct spi_device *spi = to_spi_device(dev); int rc; - rc = of_device_uevent_modalias(dev, env); - if (rc != -ENODEV) - return rc; - rc = acpi_device_uevent_modalias(dev, env); if (rc != -ENODEV) return rc; From e946d3c887a9dc33aa82a349c6284f4a084163f4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 21 Sep 2021 23:33:35 +0300 Subject: [PATCH 436/543] cifs: fix a sign extension bug The problem is the mismatched types between "ctx->total_len" which is an unsigned int, "rc" which is an int, and "ctx->rc" which is a ssize_t. The code does: ctx->rc = (rc == 0) ? ctx->total_len : rc; We want "ctx->rc" to store the negative "rc" error code. But what happens is that "rc" is type promoted to a high unsigned int and 'ctx->rc" will store the high positive value instead of a negative value. The fix is to change "rc" from an int to a ssize_t. Fixes: c610c4b619e5 ("CIFS: Add asynchronous write support through kernel AIO") Signed-off-by: Dan Carpenter Signed-off-by: Steve French --- fs/cifs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 6796fc73b304..0ab5bb24b8ca 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3113,7 +3113,7 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; struct dentry *dentry = ctx->cfile->dentry; - int rc; + ssize_t rc; tcon = tlink_tcon(ctx->cfile->tlink); cifs_sb = CIFS_SB(dentry->d_sb); From 248f064af222a1f97ee02c84a98013dfbccad386 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 21 Sep 2021 16:52:15 +0200 Subject: [PATCH 437/543] s390/qeth: fix NULL deref in qeth_clear_working_pool_list() When qeth_set_online() calls qeth_clear_working_pool_list() to roll back after an error exit from qeth_hardsetup_card(), we are at risk of accessing card->qdio.in_q before it was allocated by qeth_alloc_qdio_queues() via qeth_mpc_initialize(). qeth_clear_working_pool_list() then dereferences NULL, and by writing to queue->bufs[i].pool_entry scribbles all over the CPU's lowcore. Resulting in a crash when those lowcore areas are used next (eg. on the next machine-check interrupt). Such a scenario would typically happen when the device is first set online and its queues aren't allocated yet. An early IO error or certain misconfigs (eg. mismatched transport mode, bad portno) then cause us to error out from qeth_hardsetup_card() with card->qdio.in_q still being NULL. Fix it by checking the pointer for NULL before accessing it. Note that we also have (rare) paths inside qeth_mpc_initialize() where a configuration change can cause us to free the existing queues, expecting that subsequent code will allocate them again. If we then error out before that re-allocation happens, the same bug occurs. Fixes: eff73e16ee11 ("s390/qeth: tolerate pre-filled RX buffer") Reported-by: Stefan Raspl Root-caused-by: Heiko Carstens Signed-off-by: Julian Wiedmann Reviewed-by: Alexandra Winter Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 41ca6273b750..3fba440a0731 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -202,6 +202,9 @@ static void qeth_clear_working_pool_list(struct qeth_card *card) &card->qdio.in_buf_pool.entry_list, list) list_del(&pool_entry->list); + if (!queue) + return; + for (i = 0; i < ARRAY_SIZE(queue->bufs); i++) queue->bufs[i].pool_entry = NULL; } From ee909d0b1dac8632eeb78cbf17661d6c7674bbd0 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 21 Sep 2021 16:52:16 +0200 Subject: [PATCH 438/543] s390/qeth: Fix deadlock in remove_discipline Problem: qeth_close_dev_handler is a worker that tries to acquire card->discipline_mutex via drv->set_offline() in ccwgroup_set_offline(). Since commit b41b554c1ee7 ("s390/qeth: fix locking for discipline setup / removal") qeth_remove_discipline() is called under card->discipline_mutex and cancels the work and waits for it to finish. STOPLAN reception with reason code IPA_RC_VEPA_TO_VEB_TRANSITION is the only situation that schedules close_dev_work. In that situation scheduling qeth recovery will also result in an offline interface, when resetting the isolation mode fails, if the external switch is still set to VEB. And since commit 0b9902c1fcc5 ("s390/qeth: fix deadlock during recovery") qeth recovery does not aquire card->discipline_mutex anymore. So we accept the longer pathlength of qeth_schedule_recovery in this error situation and re-use the existing function. As a side-benefit this changes the hwtrap to behave like during recovery instead of like during a user-triggered set_offline. Fixes: b41b554c1ee7 ("s390/qeth: fix locking for discipline setup / removal") Signed-off-by: Alexandra Winter Acked-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core.h | 1 - drivers/s390/net/qeth_core_main.c | 16 ++++------------ drivers/s390/net/qeth_l2_main.c | 1 - drivers/s390/net/qeth_l3_main.c | 1 - 4 files changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 535a60b3946d..a5aa0bdc61d6 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -858,7 +858,6 @@ struct qeth_card { struct napi_struct napi; struct qeth_rx rx; struct delayed_work buffer_reclaim_work; - struct work_struct close_dev_work; }; static inline bool qeth_card_hw_is_reachable(struct qeth_card *card) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 3fba440a0731..9f26706051e5 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -70,15 +70,6 @@ static void qeth_issue_next_read_cb(struct qeth_card *card, static int qeth_qdio_establish(struct qeth_card *); static void qeth_free_qdio_queues(struct qeth_card *card); -static void qeth_close_dev_handler(struct work_struct *work) -{ - struct qeth_card *card; - - card = container_of(work, struct qeth_card, close_dev_work); - QETH_CARD_TEXT(card, 2, "cldevhdl"); - ccwgroup_set_offline(card->gdev); -} - static const char *qeth_get_cardname(struct qeth_card *card) { if (IS_VM_NIC(card)) { @@ -795,10 +786,12 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card, case IPA_CMD_STOPLAN: if (cmd->hdr.return_code == IPA_RC_VEPA_TO_VEB_TRANSITION) { dev_err(&card->gdev->dev, - "Interface %s is down because the adjacent port is no longer in reflective relay mode\n", + "Adjacent port of interface %s is no longer in reflective relay mode, trigger recovery\n", netdev_name(card->dev)); - schedule_work(&card->close_dev_work); + /* Set offline, then probably fail to set online: */ + qeth_schedule_recovery(card); } else { + /* stay online for subsequent STARTLAN */ dev_warn(&card->gdev->dev, "The link for interface %s on CHPID 0x%X failed\n", netdev_name(card->dev), card->info.chpid); @@ -1540,7 +1533,6 @@ static void qeth_setup_card(struct qeth_card *card) INIT_LIST_HEAD(&card->ipato.entries); qeth_init_qdio_info(card); INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work); - INIT_WORK(&card->close_dev_work, qeth_close_dev_handler); hash_init(card->rx_mode_addrs); hash_init(card->local_addrs4); hash_init(card->local_addrs6); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 72e84ff9fea5..dc6c00768d91 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2307,7 +2307,6 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev) if (gdev->state == CCWGROUP_ONLINE) qeth_set_offline(card, card->discipline, false); - cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) { priv = netdev_priv(card->dev); if (priv->brport_features & BR_LEARNING_SYNC) { diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 3a523e700a5a..6fd3e288f059 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1969,7 +1969,6 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) if (cgdev->state == CCWGROUP_ONLINE) qeth_set_offline(card, card->discipline, false); - cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) unregister_netdev(card->dev); From d2b59bd4b06d84a4eadb520b0f71c62fe8ec0a62 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 21 Sep 2021 16:52:17 +0200 Subject: [PATCH 439/543] s390/qeth: fix deadlock during failing recovery Commit 0b9902c1fcc5 ("s390/qeth: fix deadlock during recovery") removed taking discipline_mutex inside qeth_do_reset(), fixing potential deadlocks. An error path was missed though, that still takes discipline_mutex and thus has the original deadlock potential. Intermittent deadlocks were seen when a qeth channel path is configured offline, causing a race between qeth_do_reset and ccwgroup_remove. Call qeth_set_offline() directly in the qeth_do_reset() error case and then a new variant of ccwgroup_set_offline(), without taking discipline_mutex. Fixes: b41b554c1ee7 ("s390/qeth: fix locking for discipline setup / removal") Signed-off-by: Alexandra Winter Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski --- arch/s390/include/asm/ccwgroup.h | 2 +- drivers/s390/cio/ccwgroup.c | 10 ++++++++-- drivers/s390/net/qeth_core_main.c | 3 ++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index 36dbf5043fc0..aa995d91cd1d 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -55,7 +55,7 @@ int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv, int num_devices, const char *buf); extern int ccwgroup_set_online(struct ccwgroup_device *gdev); -extern int ccwgroup_set_offline(struct ccwgroup_device *gdev); +int ccwgroup_set_offline(struct ccwgroup_device *gdev, bool call_gdrv); extern int ccwgroup_probe_ccwdev(struct ccw_device *cdev); extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev); diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 2ec741106cb6..f0538609dfe4 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -77,12 +77,13 @@ EXPORT_SYMBOL(ccwgroup_set_online); /** * ccwgroup_set_offline() - disable a ccwgroup device * @gdev: target ccwgroup device + * @call_gdrv: Call the registered gdrv set_offline function * * This function attempts to put the ccwgroup device into the offline state. * Returns: * %0 on success and a negative error value on failure. */ -int ccwgroup_set_offline(struct ccwgroup_device *gdev) +int ccwgroup_set_offline(struct ccwgroup_device *gdev, bool call_gdrv) { struct ccwgroup_driver *gdrv = to_ccwgroupdrv(gdev->dev.driver); int ret = -EINVAL; @@ -91,11 +92,16 @@ int ccwgroup_set_offline(struct ccwgroup_device *gdev) return -EAGAIN; if (gdev->state == CCWGROUP_OFFLINE) goto out; + if (!call_gdrv) { + ret = 0; + goto offline; + } if (gdrv->set_offline) ret = gdrv->set_offline(gdev); if (ret) goto out; +offline: gdev->state = CCWGROUP_OFFLINE; out: atomic_set(&gdev->onoff, 0); @@ -124,7 +130,7 @@ static ssize_t ccwgroup_online_store(struct device *dev, if (value == 1) ret = ccwgroup_set_online(gdev); else if (value == 0) - ret = ccwgroup_set_offline(gdev); + ret = ccwgroup_set_offline(gdev, true); else ret = -EINVAL; out: diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 9f26706051e5..e9807d2996a9 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5514,7 +5514,8 @@ static int qeth_do_reset(void *data) dev_info(&card->gdev->dev, "Device successfully recovered!\n"); } else { - ccwgroup_set_offline(card->gdev); + qeth_set_offline(card, disc, true); + ccwgroup_set_offline(card->gdev, false); dev_warn(&card->gdev->dev, "The qeth device driver failed to recover an error on the device\n"); } From 88b099006d83b0bf452379cad4ce494329084726 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Sep 2021 17:43:49 +0300 Subject: [PATCH 440/543] scsi: ufs: core: Revert "scsi: ufs: Synchronize SCSI and UFS error handling" This reverts commit a113eaaf86373362b053279049907ff82b5df6c8. There are a couple of issues with the commit: 1. It causes deadlocks. 2. It causes the shost->eh_cmd_q list of failed requests not to be processed, ever. So revert it. 1. Deadlocks The SCSI error handler runs with requests blocked beginning when scsi_schedule_eh() sets SHOST_RECOVERY state, continuing through scsi_error_handler() callback ->eh_strategy_handler() until scsi_restart_operations() is called. By setting eh_strategy_handler to ufshcd_err_handler, the patch changed the UFS error handler to run with requests blocked, including PM requests, for the entire run of the error handler. That conflicts with UFS error handler existing synchronization with UFS device PM operations. The UFS error handler synchronizes with runtime PM by doing pm_runtime_get_sync() prior to blocking requests itself. It synchronizes with system PM by use of hba->host_sem, again before blocking requests itself. However, if requests are already blocked, then PM operations will block. So: the UFS error handler blocks waiting on PM + PM blocks waiting on SCSI PM requests to process or fail + PM requests are blocked waiting on error handling to finish = deadlock This happens both for runtime PM and system PM. Prior to the patch, these deadlocks could not happen even if SCSI error handling was running, because the presence of requests in shost->eh_cmd_q would mean the queues could not be suspended, which would mean that, should the UFS error handler run at the same time, it would not need to wait for PM or vice versa. Please note these scenarios are not just theoretical, they were found during testing on a Samsung Galaxy Book S. 2. ->eh_strategy_handler() must process shost->eh_cmd_q list of failed requests, as all other eh_strategy_handler's do except UFS error handler. Refer for example: scsi_unjam_host(), ata_scsi_error() and sas_scsi_recover_host(). Link: https://lore.kernel.org/r/20210917144349.14058-1-adrian.hunter@intel.com Fixes: a113eaaf8637 ("scsi: ufs: Synchronize SCSI and UFS error handling") Reviewed-by: Bart Van Assche Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 111 +++++++++++++++++++------------------- drivers/scsi/ufs/ufshcd.h | 4 ++ 2 files changed, 58 insertions(+), 57 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 67889d74761c..a3df5804b2c7 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -17,8 +17,6 @@ #include #include #include -#include -#include "../scsi_transport_api.h" #include "ufshcd.h" #include "ufs_quirks.h" #include "unipro.h" @@ -237,6 +235,7 @@ static int ufshcd_scale_clks(struct ufs_hba *hba, bool scale_up); static irqreturn_t ufshcd_intr(int irq, void *__hba); static int ufshcd_change_power_mode(struct ufs_hba *hba, struct ufs_pa_layer_attr *pwr_mode); +static void ufshcd_schedule_eh_work(struct ufs_hba *hba); static int ufshcd_setup_hba_vreg(struct ufs_hba *hba, bool on); static int ufshcd_setup_vreg(struct ufs_hba *hba, bool on); static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba, @@ -2759,8 +2758,13 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) out: up_read(&hba->clk_scaling_lock); - if (ufs_trigger_eh()) - scsi_schedule_eh(hba->host); + if (ufs_trigger_eh()) { + unsigned long flags; + + spin_lock_irqsave(hba->host->host_lock, flags); + ufshcd_schedule_eh_work(hba); + spin_unlock_irqrestore(hba->host->host_lock, flags); + } return err; } @@ -3919,35 +3923,6 @@ out: } EXPORT_SYMBOL_GPL(ufshcd_dme_get_attr); -static inline bool ufshcd_is_saved_err_fatal(struct ufs_hba *hba) -{ - lockdep_assert_held(hba->host->host_lock); - - return (hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR) || - (hba->saved_err & (INT_FATAL_ERRORS | UFSHCD_UIC_HIBERN8_MASK)); -} - -static void ufshcd_schedule_eh(struct ufs_hba *hba) -{ - bool schedule_eh = false; - unsigned long flags; - - spin_lock_irqsave(hba->host->host_lock, flags); - /* handle fatal errors only when link is not in error state */ - if (hba->ufshcd_state != UFSHCD_STATE_ERROR) { - if (hba->force_reset || ufshcd_is_link_broken(hba) || - ufshcd_is_saved_err_fatal(hba)) - hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_FATAL; - else - hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_NON_FATAL; - schedule_eh = true; - } - spin_unlock_irqrestore(hba->host->host_lock, flags); - - if (schedule_eh) - scsi_schedule_eh(hba->host); -} - /** * ufshcd_uic_pwr_ctrl - executes UIC commands (which affects the link power * state) and waits for it to take effect. @@ -3968,7 +3943,6 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) { DECLARE_COMPLETION_ONSTACK(uic_async_done); unsigned long flags; - bool schedule_eh = false; u8 status; int ret; bool reenable_intr = false; @@ -4038,14 +4012,10 @@ out: ufshcd_enable_intr(hba, UIC_COMMAND_COMPL); if (ret) { ufshcd_set_link_broken(hba); - schedule_eh = true; + ufshcd_schedule_eh_work(hba); } - out_unlock: spin_unlock_irqrestore(hba->host->host_lock, flags); - - if (schedule_eh) - ufshcd_schedule_eh(hba); mutex_unlock(&hba->uic_cmd_mutex); return ret; @@ -5911,6 +5881,27 @@ out: return err_handling; } +/* host lock must be held before calling this func */ +static inline bool ufshcd_is_saved_err_fatal(struct ufs_hba *hba) +{ + return (hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR) || + (hba->saved_err & (INT_FATAL_ERRORS | UFSHCD_UIC_HIBERN8_MASK)); +} + +/* host lock must be held before calling this func */ +static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba) +{ + /* handle fatal errors only when link is not in error state */ + if (hba->ufshcd_state != UFSHCD_STATE_ERROR) { + if (hba->force_reset || ufshcd_is_link_broken(hba) || + ufshcd_is_saved_err_fatal(hba)) + hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_FATAL; + else + hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_NON_FATAL; + queue_work(hba->eh_wq, &hba->eh_work); + } +} + static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow) { down_write(&hba->clk_scaling_lock); @@ -6044,11 +6035,11 @@ static bool ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba) /** * ufshcd_err_handler - handle UFS errors that require s/w attention - * @host: SCSI host pointer + * @work: pointer to work structure */ -static void ufshcd_err_handler(struct Scsi_Host *host) +static void ufshcd_err_handler(struct work_struct *work) { - struct ufs_hba *hba = shost_priv(host); + struct ufs_hba *hba; unsigned long flags; bool err_xfer = false; bool err_tm = false; @@ -6056,9 +6047,10 @@ static void ufshcd_err_handler(struct Scsi_Host *host) int tag; bool needs_reset = false, needs_restore = false; + hba = container_of(work, struct ufs_hba, eh_work); + down(&hba->host_sem); spin_lock_irqsave(hba->host->host_lock, flags); - hba->host->host_eh_scheduled = 0; if (ufshcd_err_handling_should_stop(hba)) { if (hba->ufshcd_state != UFSHCD_STATE_ERROR) hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL; @@ -6371,6 +6363,7 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba, u32 intr_status) "host_regs: "); ufshcd_print_pwr_info(hba); } + ufshcd_schedule_eh_work(hba); retval |= IRQ_HANDLED; } /* @@ -6382,10 +6375,6 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba, u32 intr_status) hba->errors = 0; hba->uic_error = 0; spin_unlock(hba->host->host_lock); - - if (queue_eh_work) - ufshcd_schedule_eh(hba); - return retval; } @@ -7048,17 +7037,15 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) * will be to send LU reset which, again, is a spec violation. * To avoid these unnecessary/illegal steps, first we clean up * the lrb taken by this cmd and re-set it in outstanding_reqs, - * then queue the error handler and bail. + * then queue the eh_work and bail. */ if (lrbp->lun == UFS_UPIU_UFS_DEVICE_WLUN) { ufshcd_update_evt_hist(hba, UFS_EVT_ABORT, lrbp->lun); spin_lock_irqsave(host->host_lock, flags); hba->force_reset = true; + ufshcd_schedule_eh_work(hba); spin_unlock_irqrestore(host->host_lock, flags); - - ufshcd_schedule_eh(hba); - goto release; } @@ -7191,10 +7178,11 @@ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd) spin_lock_irqsave(hba->host->host_lock, flags); hba->force_reset = true; + ufshcd_schedule_eh_work(hba); dev_err(hba->dev, "%s: reset in progress - 1\n", __func__); spin_unlock_irqrestore(hba->host->host_lock, flags); - ufshcd_err_handler(hba->host); + flush_work(&hba->eh_work); spin_lock_irqsave(hba->host->host_lock, flags); if (hba->ufshcd_state == UFSHCD_STATE_ERROR) @@ -8604,6 +8592,8 @@ static void ufshcd_hba_exit(struct ufs_hba *hba) if (hba->is_powered) { ufshcd_exit_clk_scaling(hba); ufshcd_exit_clk_gating(hba); + if (hba->eh_wq) + destroy_workqueue(hba->eh_wq); ufs_debugfs_hba_exit(hba); ufshcd_variant_hba_exit(hba); ufshcd_setup_vreg(hba, false); @@ -9448,10 +9438,6 @@ static int ufshcd_set_dma_mask(struct ufs_hba *hba) return dma_set_mask_and_coherent(hba->dev, DMA_BIT_MASK(32)); } -static struct scsi_transport_template ufshcd_transport_template = { - .eh_strategy_handler = ufshcd_err_handler, -}; - /** * ufshcd_alloc_host - allocate Host Bus Adapter (HBA) * @dev: pointer to device handle @@ -9478,7 +9464,6 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) err = -ENOMEM; goto out_error; } - host->transportt = &ufshcd_transport_template; hba = shost_priv(host); hba->host = host; hba->dev = dev; @@ -9518,6 +9503,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) int err; struct Scsi_Host *host = hba->host; struct device *dev = hba->dev; + char eh_wq_name[sizeof("ufs_eh_wq_00")]; if (!mmio_base) { dev_err(hba->dev, @@ -9571,6 +9557,17 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) hba->max_pwr_info.is_valid = false; + /* Initialize work queues */ + snprintf(eh_wq_name, sizeof(eh_wq_name), "ufs_eh_wq_%d", + hba->host->host_no); + hba->eh_wq = create_singlethread_workqueue(eh_wq_name); + if (!hba->eh_wq) { + dev_err(hba->dev, "%s: failed to create eh workqueue\n", + __func__); + err = -ENOMEM; + goto out_disable; + } + INIT_WORK(&hba->eh_work, ufshcd_err_handler); INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler); sema_init(&hba->host_sem, 1); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 4723f27a55d1..f0da5d3db1fa 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -741,6 +741,8 @@ struct ufs_hba_monitor { * @is_powered: flag to check if HBA is powered * @shutting_down: flag to check if shutdown has been invoked * @host_sem: semaphore used to serialize concurrent contexts + * @eh_wq: Workqueue that eh_work works on + * @eh_work: Worker to handle UFS errors that require s/w attention * @eeh_work: Worker to handle exception events * @errors: HBA errors * @uic_error: UFS interconnect layer error status @@ -843,6 +845,8 @@ struct ufs_hba { struct semaphore host_sem; /* Work Queues */ + struct workqueue_struct *eh_wq; + struct work_struct eh_work; struct work_struct eeh_work; /* HBA Errors */ From 1d479e6c9cb2b40abfb455863a4e9335db882e33 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 17 Sep 2021 14:23:14 -0700 Subject: [PATCH 441/543] scsi: sd_zbc: Support disks with more than 2**32 logical blocks This patch addresses the following Coverity report about the zno * sdkp->zone_blocks expression: CID 1475514 (#1 of 1): Unintentional integer overflow (OVERFLOW_BEFORE_WIDEN) overflow_before_widen: Potentially overflowing expression zno * sdkp->zone_blocks with type unsigned int (32 bits, unsigned) is evaluated using 32-bit arithmetic, and then used in a context that expects an expression of type sector_t (64 bits, unsigned). Link: https://lore.kernel.org/r/20210917212314.2362324-1-bvanassche@acm.org Fixes: 5795eb443060 ("scsi: sd_zbc: emulate ZONE_APPEND commands") Cc: Johannes Thumshirn Cc: Damien Le Moal Cc: Hannes Reinecke Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Himanshu Madhani Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd_zbc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 8197d31a81f9..ed06798983f8 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -280,7 +280,7 @@ static void sd_zbc_update_wp_offset_workfn(struct work_struct *work) { struct scsi_disk *sdkp; unsigned long flags; - unsigned int zno; + sector_t zno; int ret; sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work); From d04a968c33684b15d1206e23fc1119ce0f0587fb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 16 Sep 2021 10:54:04 -0700 Subject: [PATCH 442/543] scsi: ufs: core: Unbreak the reset handler A command tag is passed as the second argument of the __ufshcd_transfer_req_compl() call in ufshcd_eh_device_reset_handler() instead of a bitmask. Fix this by passing a bitmask as argument instead of a command tag. Link: https://lore.kernel.org/r/20210916175408.2260084-1-bvanassche@acm.org Fixes: a45f937110fa ("scsi: ufs: Optimize host lock on transfer requests send/compl paths") Cc: Can Guo Reviewed-by: Avri Altman Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index a3df5804b2c7..029c9631ec2b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6865,7 +6865,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) err = ufshcd_clear_cmd(hba, pos); if (err) break; - __ufshcd_transfer_req_compl(hba, pos, /*retry_requests=*/true); + __ufshcd_transfer_req_compl(hba, 1U << pos, false); } } From 5f8579038842d77e6ce05e1df6bf9dd493b0e3ef Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Wed, 15 Sep 2021 18:32:39 +0300 Subject: [PATCH 443/543] scsi: qla2xxx: Restore initiator in dual mode In dual mode in case of disabling the target, the whole port goes offline and initiator is turned off too. Fix restoring initiator mode after disabling target in dual mode. Link: https://lore.kernel.org/r/20210915153239.8035-1-d.bogdanov@yadro.com Fixes: 0645cb8350cd ("scsi: qla2xxx: Add mode control for each physical port") Reviewed-by: Himanshu Madhani Signed-off-by: Dmitry Bogdanov Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 1e4e3e83b5c7..5fc7697f0af4 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -7169,7 +7169,8 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) return 0; break; case QLA2XXX_INI_MODE_DUAL: - if (!qla_dual_mode_enabled(vha)) + if (!qla_dual_mode_enabled(vha) && + !qla_ini_mode_enabled(vha)) return 0; break; case QLA2XXX_INI_MODE_ENABLED: From bc41fcbffd5759c9610f7de211420eae6b379503 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 15 Sep 2021 17:07:13 +0800 Subject: [PATCH 444/543] scsi: fas216: Kill scmd->tag The driver is attempting to allocate a tag internally which is a no-go with blk-mq. Switch the driver to use the request tag and kill usage of scmd->tag and scmd->device->current_tag. [jpg: Change to use scsi_cmd_to_rq()] Link: https://lore.kernel.org/r/1631696835-136198-2-git-send-email-john.garry@huawei.com Signed-off-by: Hannes Reinecke Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/arm/fas216.c | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/arm/fas216.c b/drivers/scsi/arm/fas216.c index 9c4458a99025..cf71ef488e36 100644 --- a/drivers/scsi/arm/fas216.c +++ b/drivers/scsi/arm/fas216.c @@ -77,7 +77,6 @@ * I was thinking that this was a good chip until I found this restriction ;( */ #define SCSI2_SYNC -#undef SCSI2_TAG #undef DEBUG_CONNECT #undef DEBUG_MESSAGES @@ -990,7 +989,7 @@ fas216_reselected_intr(FAS216_Info *info) info->scsi.disconnectable = 0; if (info->SCpnt->device->id == target && info->SCpnt->device->lun == lun && - info->SCpnt->tag == tag) { + scsi_cmd_to_rq(info->SCpnt)->tag == tag) { fas216_log(info, LOG_CONNECT, "reconnected previously executing command"); } else { queue_add_cmd_tail(&info->queues.disconnected, info->SCpnt); @@ -1791,8 +1790,9 @@ static void fas216_start_command(FAS216_Info *info, struct scsi_cmnd *SCpnt) /* * add tag message if required */ - if (SCpnt->tag) - msgqueue_addmsg(&info->scsi.msgs, 2, SIMPLE_QUEUE_TAG, SCpnt->tag); + if (SCpnt->device->simple_tags) + msgqueue_addmsg(&info->scsi.msgs, 2, SIMPLE_QUEUE_TAG, + scsi_cmd_to_rq(SCpnt)->tag); do { #ifdef SCSI2_SYNC @@ -1815,20 +1815,8 @@ static void fas216_start_command(FAS216_Info *info, struct scsi_cmnd *SCpnt) static void fas216_allocate_tag(FAS216_Info *info, struct scsi_cmnd *SCpnt) { -#ifdef SCSI2_TAG - /* - * tagged queuing - allocate a new tag to this command - */ - if (SCpnt->device->simple_tags && SCpnt->cmnd[0] != REQUEST_SENSE && - SCpnt->cmnd[0] != INQUIRY) { - SCpnt->device->current_tag += 1; - if (SCpnt->device->current_tag == 0) - SCpnt->device->current_tag = 1; - SCpnt->tag = SCpnt->device->current_tag; - } else -#endif - set_bit(SCpnt->device->id * 8 + - (u8)(SCpnt->device->lun & 0x7), info->busyluns); + set_bit(SCpnt->device->id * 8 + + (u8)(SCpnt->device->lun & 0x7), info->busyluns); info->stats.removes += 1; switch (SCpnt->cmnd[0]) { @@ -2117,7 +2105,6 @@ request_sense: init_SCp(SCpnt); SCpnt->SCp.Message = 0; SCpnt->SCp.Status = 0; - SCpnt->tag = 0; SCpnt->host_scribble = (void *)fas216_rq_sns_done; /* @@ -2223,7 +2210,6 @@ static int fas216_queue_command_lck(struct scsi_cmnd *SCpnt, init_SCp(SCpnt); info->stats.queues += 1; - SCpnt->tag = 0; spin_lock(&info->host_lock); @@ -3003,9 +2989,8 @@ void fas216_print_devices(FAS216_Info *info, struct seq_file *m) dev = &info->device[scd->id]; seq_printf(m, " %d/%llu ", scd->id, scd->lun); if (scd->tagged_supported) - seq_printf(m, "%3sabled(%3d) ", - scd->simple_tags ? "en" : "dis", - scd->current_tag); + seq_printf(m, "%3sabled ", + scd->simple_tags ? "en" : "dis"); else seq_puts(m, "unsupported "); From 756fb6a895afbf1f0615d93ebdd14863a00b1198 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 15 Sep 2021 17:07:14 +0800 Subject: [PATCH 445/543] scsi: acornscsi: Remove tagged queuing vestiges The acornscsi driver has a config option to enable tagged queuing, but this option gets disabled in the driver itself with the comment 'needs to be debugged'. As this is a _really_ old driver I doubt anyone will be wanting to invest time here, so remove the tagged queue vestiges and make our lives easier. [jpg: Use scsi_cmd_to_rq()] Link: https://lore.kernel.org/r/1631696835-136198-3-git-send-email-john.garry@huawei.com Signed-off-by: Hannes Reinecke Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/arm/Kconfig | 11 ---- drivers/scsi/arm/acornscsi.c | 103 ++++++++--------------------------- drivers/scsi/arm/queue.c | 2 +- 3 files changed, 23 insertions(+), 93 deletions(-) diff --git a/drivers/scsi/arm/Kconfig b/drivers/scsi/arm/Kconfig index f34badc75196..9f64133f976a 100644 --- a/drivers/scsi/arm/Kconfig +++ b/drivers/scsi/arm/Kconfig @@ -10,17 +10,6 @@ config SCSI_ACORNSCSI_3 This enables support for the Acorn SCSI card (aka30). If you have an Acorn system with one of these, say Y. If unsure, say N. -config SCSI_ACORNSCSI_TAGGED_QUEUE - bool "Support SCSI 2 Tagged queueing" - depends on SCSI_ACORNSCSI_3 - help - Say Y here to enable tagged queuing support on the Acorn SCSI card. - - This is a feature of SCSI-2 which improves performance: the host - adapter can send several SCSI commands to a device's queue even if - previous commands haven't finished yet. Some SCSI devices don't - implement this properly, so the safe answer is N. - config SCSI_ACORNSCSI_SYNC bool "Support SCSI 2 Synchronous Transfers" depends on SCSI_ACORNSCSI_3 diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c index 4a84599ff491..b4cb5fb19998 100644 --- a/drivers/scsi/arm/acornscsi.c +++ b/drivers/scsi/arm/acornscsi.c @@ -52,12 +52,8 @@ * You can tell if you have a device that supports tagged queueing my * cating (eg) /proc/scsi/acornscsi/0 and see if the SCSI revision is reported * as '2 TAG'. - * - * Also note that CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE is normally set in the config - * scripts, but disabled here. Once debugged, remove the #undef, otherwise to debug, - * comment out the undef. */ -#undef CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE + /* * SCSI-II Synchronous transfer support. * @@ -171,7 +167,7 @@ static void acornscsi_done(AS_Host *host, struct scsi_cmnd **SCpntp, unsigned int result); static int acornscsi_reconnect_finish(AS_Host *host); static void acornscsi_dma_cleanup(AS_Host *host); -static void acornscsi_abortcmd(AS_Host *host, unsigned char tag); +static void acornscsi_abortcmd(AS_Host *host); /* ==================================================================================== * Miscellaneous @@ -741,17 +737,6 @@ intr_ret_t acornscsi_kick(AS_Host *host) #endif if (from_queue) { -#ifdef CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE - /* - * tagged queueing - allocate a new tag to this command - */ - if (SCpnt->device->simple_tags) { - SCpnt->device->current_tag += 1; - if (SCpnt->device->current_tag == 0) - SCpnt->device->current_tag = 1; - SCpnt->tag = SCpnt->device->current_tag; - } else -#endif set_bit(SCpnt->device->id * 8 + (u8)(SCpnt->device->lun & 0x07), host->busyluns); @@ -1192,7 +1177,7 @@ void acornscsi_dma_intr(AS_Host *host) * the device recognises the attention. */ if (dmac_read(host, DMAC_STATUS) & STATUS_RQ0) { - acornscsi_abortcmd(host, host->SCpnt->tag); + acornscsi_abortcmd(host); dmac_write(host, DMAC_TXCNTLO, 0); dmac_write(host, DMAC_TXCNTHI, 0); @@ -1560,23 +1545,6 @@ void acornscsi_message(AS_Host *host) acornscsi_sbic_issuecmd(host, CMND_ASSERTATN); switch (host->scsi.last_message) { -#ifdef CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE - case HEAD_OF_QUEUE_TAG: - case ORDERED_QUEUE_TAG: - case SIMPLE_QUEUE_TAG: - /* - * ANSI standard says: (Section SCSI-2 Rev. 10c Sect 5.6.17) - * If a target does not implement tagged queuing and a queue tag - * message is received, it shall respond with a MESSAGE REJECT - * message and accept the I/O process as if it were untagged. - */ - printk(KERN_NOTICE "scsi%d.%c: disabling tagged queueing\n", - host->host->host_no, acornscsi_target(host)); - host->SCpnt->device->simple_tags = 0; - set_bit(host->SCpnt->device->id * 8 + - (u8)(host->SCpnt->device->lun & 0x7), host->busyluns); - break; -#endif case EXTENDED_MESSAGE | (EXTENDED_SDTR << 8): /* * Target can't handle synchronous transfers @@ -1687,24 +1655,11 @@ void acornscsi_buildmessages(AS_Host *host) #if 0 /* does the device need the current command aborted */ if (cmd_aborted) { - acornscsi_abortcmd(host->SCpnt->tag); + acornscsi_abortcmd(host); return; } #endif -#ifdef CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE - if (host->SCpnt->tag) { - unsigned int tag_type; - - if (host->SCpnt->cmnd[0] == REQUEST_SENSE || - host->SCpnt->cmnd[0] == TEST_UNIT_READY || - host->SCpnt->cmnd[0] == INQUIRY) - tag_type = HEAD_OF_QUEUE_TAG; - else - tag_type = SIMPLE_QUEUE_TAG; - msgqueue_addmsg(&host->scsi.msgs, 2, tag_type, host->SCpnt->tag); - } -#endif #ifdef CONFIG_SCSI_ACORNSCSI_SYNC if (host->device[host->SCpnt->device->id].sync_state == SYNC_NEGOCIATE) { @@ -1798,7 +1753,7 @@ int acornscsi_reconnect(AS_Host *host) "to reconnect with\n", host->host->host_no, '0' + target); acornscsi_dumplog(host, target); - acornscsi_abortcmd(host, 0); + acornscsi_abortcmd(host); if (host->SCpnt) { queue_add_cmd_tail(&host->queues.disconnected, host->SCpnt); host->SCpnt = NULL; @@ -1821,7 +1776,7 @@ int acornscsi_reconnect_finish(AS_Host *host) host->scsi.disconnectable = 0; if (host->SCpnt->device->id == host->scsi.reconnected.target && host->SCpnt->device->lun == host->scsi.reconnected.lun && - host->SCpnt->tag == host->scsi.reconnected.tag) { + scsi_cmd_to_tag(host->SCpnt) == host->scsi.reconnected.tag) { #if (DEBUG & (DEBUG_QUEUES|DEBUG_DISCON)) DBG(host->SCpnt, printk("scsi%d.%c: reconnected", host->host->host_no, acornscsi_target(host))); @@ -1848,7 +1803,7 @@ int acornscsi_reconnect_finish(AS_Host *host) } if (!host->SCpnt) - acornscsi_abortcmd(host, host->scsi.reconnected.tag); + acornscsi_abortcmd(host); else { /* * Restore data pointer from SAVED pointers. @@ -1889,21 +1844,15 @@ void acornscsi_disconnect_unexpected(AS_Host *host) * Function: void acornscsi_abortcmd(AS_host *host, unsigned char tag) * Purpose : abort a currently executing command * Params : host - host with connected command to abort - * tag - tag to abort */ static -void acornscsi_abortcmd(AS_Host *host, unsigned char tag) +void acornscsi_abortcmd(AS_Host *host) { host->scsi.phase = PHASE_ABORTED; sbic_arm_write(host, SBIC_CMND, CMND_ASSERTATN); msgqueue_flush(&host->scsi.msgs); -#ifdef CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE - if (tag) - msgqueue_addmsg(&host->scsi.msgs, 2, ABORT_TAG, tag); - else -#endif - msgqueue_addmsg(&host->scsi.msgs, 1, ABORT); + msgqueue_addmsg(&host->scsi.msgs, 1, ABORT); } /* ========================================================================================== @@ -1993,7 +1942,7 @@ intr_ret_t acornscsi_sbicintr(AS_Host *host, int in_irq) printk(KERN_ERR "scsi%d.%c: PHASE_CONNECTING, SSR %02X?\n", host->host->host_no, acornscsi_target(host), ssr); acornscsi_dumplog(host, host->SCpnt ? host->SCpnt->device->id : 8); - acornscsi_abortcmd(host, host->SCpnt->tag); + acornscsi_abortcmd(host); } return INTR_PROCESSING; @@ -2029,7 +1978,7 @@ intr_ret_t acornscsi_sbicintr(AS_Host *host, int in_irq) printk(KERN_ERR "scsi%d.%c: PHASE_CONNECTED, SSR %02X?\n", host->host->host_no, acornscsi_target(host), ssr); acornscsi_dumplog(host, host->SCpnt ? host->SCpnt->device->id : 8); - acornscsi_abortcmd(host, host->SCpnt->tag); + acornscsi_abortcmd(host); } return INTR_PROCESSING; @@ -2075,20 +2024,20 @@ intr_ret_t acornscsi_sbicintr(AS_Host *host, int in_irq) case 0x18: /* -> PHASE_DATAOUT */ /* COMMAND -> DATA OUT */ if (host->scsi.SCp.sent_command != host->SCpnt->cmd_len) - acornscsi_abortcmd(host, host->SCpnt->tag); + acornscsi_abortcmd(host); acornscsi_dma_setup(host, DMA_OUT); if (!acornscsi_starttransfer(host)) - acornscsi_abortcmd(host, host->SCpnt->tag); + acornscsi_abortcmd(host); host->scsi.phase = PHASE_DATAOUT; return INTR_IDLE; case 0x19: /* -> PHASE_DATAIN */ /* COMMAND -> DATA IN */ if (host->scsi.SCp.sent_command != host->SCpnt->cmd_len) - acornscsi_abortcmd(host, host->SCpnt->tag); + acornscsi_abortcmd(host); acornscsi_dma_setup(host, DMA_IN); if (!acornscsi_starttransfer(host)) - acornscsi_abortcmd(host, host->SCpnt->tag); + acornscsi_abortcmd(host); host->scsi.phase = PHASE_DATAIN; return INTR_IDLE; @@ -2156,7 +2105,7 @@ intr_ret_t acornscsi_sbicintr(AS_Host *host, int in_irq) /* MESSAGE IN -> DATA OUT */ acornscsi_dma_setup(host, DMA_OUT); if (!acornscsi_starttransfer(host)) - acornscsi_abortcmd(host, host->SCpnt->tag); + acornscsi_abortcmd(host); host->scsi.phase = PHASE_DATAOUT; return INTR_IDLE; @@ -2165,7 +2114,7 @@ intr_ret_t acornscsi_sbicintr(AS_Host *host, int in_irq) /* MESSAGE IN -> DATA IN */ acornscsi_dma_setup(host, DMA_IN); if (!acornscsi_starttransfer(host)) - acornscsi_abortcmd(host, host->SCpnt->tag); + acornscsi_abortcmd(host); host->scsi.phase = PHASE_DATAIN; return INTR_IDLE; @@ -2206,7 +2155,7 @@ intr_ret_t acornscsi_sbicintr(AS_Host *host, int in_irq) switch (ssr) { case 0x19: /* -> PHASE_DATAIN */ case 0x89: /* -> PHASE_DATAIN */ - acornscsi_abortcmd(host, host->SCpnt->tag); + acornscsi_abortcmd(host); return INTR_IDLE; case 0x1b: /* -> PHASE_STATUSIN */ @@ -2255,7 +2204,7 @@ intr_ret_t acornscsi_sbicintr(AS_Host *host, int in_irq) switch (ssr) { case 0x18: /* -> PHASE_DATAOUT */ case 0x88: /* -> PHASE_DATAOUT */ - acornscsi_abortcmd(host, host->SCpnt->tag); + acornscsi_abortcmd(host); return INTR_IDLE; case 0x1b: /* -> PHASE_STATUSIN */ @@ -2482,7 +2431,6 @@ static int acornscsi_queuecmd_lck(struct scsi_cmnd *SCpnt, SCpnt->scsi_done = done; SCpnt->host_scribble = NULL; SCpnt->result = 0; - SCpnt->tag = 0; SCpnt->SCp.phase = (int)acornscsi_datadirection(SCpnt->cmnd[0]); SCpnt->SCp.sent_command = 0; SCpnt->SCp.scsi_xferred = 0; @@ -2581,7 +2529,7 @@ static enum res_abort acornscsi_do_abort(AS_Host *host, struct scsi_cmnd *SCpnt) break; default: - acornscsi_abortcmd(host, host->SCpnt->tag); + acornscsi_abortcmd(host); res = res_snooze; } local_irq_restore(flags); @@ -2747,9 +2695,6 @@ char *acornscsi_info(struct Scsi_Host *host) #ifdef CONFIG_SCSI_ACORNSCSI_SYNC " SYNC" #endif -#ifdef CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE - " TAG" -#endif #if (DEBUG & DEBUG_NO_WRITE) " NOWRITE (" __stringify(NO_WRITE) ")" #endif @@ -2770,9 +2715,6 @@ static int acornscsi_show_info(struct seq_file *m, struct Scsi_Host *instance) #ifdef CONFIG_SCSI_ACORNSCSI_SYNC " SYNC" #endif -#ifdef CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE - " TAG" -#endif #if (DEBUG & DEBUG_NO_WRITE) " NOWRITE (" __stringify(NO_WRITE) ")" #endif @@ -2827,9 +2769,8 @@ static int acornscsi_show_info(struct seq_file *m, struct Scsi_Host *instance) seq_printf(m, "Device/Lun TaggedQ Sync\n"); seq_printf(m, " %d/%llu ", scd->id, scd->lun); if (scd->tagged_supported) - seq_printf(m, "%3sabled(%3d) ", - scd->simple_tags ? "en" : "dis", - scd->current_tag); + seq_printf(m, "%3sabled ", + scd->simple_tags ? "en" : "dis"); else seq_printf(m, "unsupported "); diff --git a/drivers/scsi/arm/queue.c b/drivers/scsi/arm/queue.c index e5559f27669d..c6f71a7d1b8e 100644 --- a/drivers/scsi/arm/queue.c +++ b/drivers/scsi/arm/queue.c @@ -214,7 +214,7 @@ struct scsi_cmnd *queue_remove_tgtluntag(Queue_t *queue, int target, int lun, list_for_each(l, &queue->head) { QE_t *q = list_entry(l, QE_t, list); if (q->SCpnt->device->id == target && q->SCpnt->device->lun == lun && - q->SCpnt->tag == tag) { + scsi_cmd_to_rq(q->SCpnt)->tag == tag) { SCpnt = __queue_remove(queue, l); break; } From a4869faf9642518145a8aa4b52e0d5ab0e7ee896 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 15 Sep 2021 17:07:15 +0800 Subject: [PATCH 446/543] scsi: core: Remove 'current_tag' The 'current_tag' field in struct scsi_device is unused now; remove it. Link: https://lore.kernel.org/r/1631696835-136198-4-git-send-email-john.garry@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: Hannes Reinecke Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 09a17f6e93a7..b97e142a7ca9 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -146,7 +146,6 @@ struct scsi_device { struct scsi_vpd __rcu *vpd_pg83; struct scsi_vpd __rcu *vpd_pg80; struct scsi_vpd __rcu *vpd_pg89; - unsigned char current_tag; /* current tag */ struct scsi_target *sdev_target; blist_flags_t sdev_bflags; /* black/white flags as also found in From cdbc16c552f27ac211a44f9959d813b4f3188223 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Sep 2021 16:22:51 +0300 Subject: [PATCH 447/543] scsi: lpfc: Fix sprintf() overflow in lpfc_display_fpin_wwpn() This scnprintf() uses the wrong limit. It should be "LPFC_FPIN_WWPN_LINE_SZ - len" instead of LPFC_FPIN_WWPN_LINE_SZ. Link: https://lore.kernel.org/r/20210916132251.GD25094@kili Fixes: 428569e66fa7 ("scsi: lpfc: Expand FPIN and RDF receive logging") Reviewed-by: James Smart Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index f3fc79b99165..052c0e5b1119 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -9387,7 +9387,7 @@ lpfc_display_fpin_wwpn(struct lpfc_hba *phba, __be64 *wwnlist, u32 cnt) /* Extract the next WWPN from the payload */ wwn = *wwnlist++; wwpn = be64_to_cpu(wwn); - len += scnprintf(buf + len, LPFC_FPIN_WWPN_LINE_SZ, + len += scnprintf(buf + len, LPFC_FPIN_WWPN_LINE_SZ - len, " %016llx", wwpn); /* Log a message if we are on the last WWPN From 6dacc371b77f473770ec646e220303a84fe96c11 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Sep 2021 16:23:31 +0300 Subject: [PATCH 448/543] scsi: lpfc: Use correct scnprintf() limit The limit should be "PAGE_SIZE - len" instead of "PAGE_SIZE". We're not going to hit the limit so this fix will not affect runtime. Link: https://lore.kernel.org/r/20210916132331.GE25094@kili Fixes: 5b9e70b22cc5 ("scsi: lpfc: raise sg count for nvme to use available sg resources") Reviewed-by: James Smart Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index b35bf70a8c0d..1e5a30eb04de 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -6204,7 +6204,8 @@ lpfc_sg_seg_cnt_show(struct device *dev, struct device_attribute *attr, len = scnprintf(buf, PAGE_SIZE, "SGL sz: %d total SGEs: %d\n", phba->cfg_sg_dma_buf_size, phba->cfg_total_seg_cnt); - len += scnprintf(buf + len, PAGE_SIZE, "Cfg: %d SCSI: %d NVME: %d\n", + len += scnprintf(buf + len, PAGE_SIZE - len, + "Cfg: %d SCSI: %d NVME: %d\n", phba->cfg_sg_seg_cnt, phba->cfg_scsi_seg_cnt, phba->cfg_nvme_seg_cnt); return len; From a38923f2d088d1a5cbaa86818abe039b2f87093d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 20 Sep 2021 11:56:22 +0200 Subject: [PATCH 449/543] scsi: lpfc: Fix gcc -Wstringop-overread warning, again I fixed a stringop-overread warning earlier this year, now a second copy of the original code was added and the warning came back: drivers/scsi/lpfc/lpfc_attr.c: In function 'lpfc_cmf_info_show': drivers/scsi/lpfc/lpfc_attr.c:289:25: error: 'strnlen' specified bound 4095 exceeds source size 24 [-Werror=stringop-overread] 289 | strnlen(LPFC_INFO_MORE_STR, PAGE_SIZE - 1), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix it the same way as the other copy. Link: https://lore.kernel.org/r/20210920095628.1191676-1-arnd@kernel.org Fixes: ada48ba70f6b ("scsi: lpfc: Fix gcc -Wstringop-overread warning") Fixes: 74a7baa2a3ee ("scsi: lpfc: Add cmf_info sysfs entry") Reviewed-by: James Smart Signed-off-by: Arnd Bergmann Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 1e5a30eb04de..ebe417921dac 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -285,11 +285,8 @@ buffer_done: "6312 Catching potential buffer " "overflow > PAGE_SIZE = %lu bytes\n", PAGE_SIZE); - strscpy(buf + PAGE_SIZE - 1 - - strnlen(LPFC_INFO_MORE_STR, PAGE_SIZE - 1), - LPFC_INFO_MORE_STR, - strnlen(LPFC_INFO_MORE_STR, PAGE_SIZE - 1) - + 1); + strscpy(buf + PAGE_SIZE - 1 - sizeof(LPFC_INFO_MORE_STR), + LPFC_INFO_MORE_STR, sizeof(LPFC_INFO_MORE_STR) + 1); } return len; } From 9a8ef2c73c727a3c64b70c01697c578c7b10fed2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 20 Sep 2021 19:32:06 +0100 Subject: [PATCH 450/543] scsi: target: Fix spelling mistake "CONFLIFT" -> "CONFLICT" There is a spelling mistake in a dev_err message. Fix it. Link: https://lore.kernel.org/r/20210920183206.17477-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/target/target_core_pr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 4b94b085625b..3829b61b56c1 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -269,7 +269,7 @@ target_scsi2_reservation_reserve(struct se_cmd *cmd) spin_lock(&dev->dev_reservation_lock); if (dev->reservation_holder && dev->reservation_holder->se_node_acl != sess->se_node_acl) { - pr_err("SCSI-2 RESERVATION CONFLIFT for %s fabric\n", + pr_err("SCSI-2 RESERVATION CONFLICT for %s fabric\n", tpg->se_tpg_tfo->fabric_name); pr_err("Original reserver LUN: %llu %s\n", cmd->se_lun->unpacked_lun, From fbdac19e642899455b4e64c63aafe2325df7aafa Mon Sep 17 00:00:00 2001 From: Wen Xiong Date: Thu, 16 Sep 2021 22:24:21 -0500 Subject: [PATCH 451/543] scsi: ses: Retry failed Send/Receive Diagnostic commands Setting SCSI logging level with error=3, we saw some errors from enclosues: [108017.360833] ses 0:0:9:0: tag#641 Done: NEEDS_RETRY Result: hostbyte=DID_ERROR driverbyte=DRIVER_OK cmd_age=0s [108017.360838] ses 0:0:9:0: tag#641 CDB: Receive Diagnostic 1c 01 01 00 20 00 [108017.427778] ses 0:0:9:0: Power-on or device reset occurred [108017.427784] ses 0:0:9:0: tag#641 Done: SUCCESS Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s [108017.427788] ses 0:0:9:0: tag#641 CDB: Receive Diagnostic 1c 01 01 00 20 00 [108017.427791] ses 0:0:9:0: tag#641 Sense Key : Unit Attention [current] [108017.427793] ses 0:0:9:0: tag#641 Add. Sense: Bus device reset function occurred [108017.427801] ses 0:0:9:0: Failed to get diagnostic page 0x1 [108017.427804] ses 0:0:9:0: Failed to bind enclosure -19 [108017.427895] ses 0:0:10:0: Attached Enclosure device [108017.427942] ses 0:0:10:0: Attached scsi generic sg18 type 13 Retry if the Send/Receive Diagnostic commands complete with a transient error status (NOT_READY or UNIT_ATTENTION with ASC 0x29). Link: https://lore.kernel.org/r/1631849061-10210-2-git-send-email-wenxiong@linux.ibm.com Reviewed-by: Brian King Reviewed-by: James Bottomley Signed-off-by: Wen Xiong Signed-off-by: Martin K. Petersen --- drivers/scsi/ses.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index c2afba2a5414..43e682297fd5 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -87,9 +87,16 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code, 0 }; unsigned char recv_page_code; + unsigned int retries = SES_RETRIES; + struct scsi_sense_hdr sshdr; + + do { + ret = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, bufflen, + &sshdr, SES_TIMEOUT, 1, NULL); + } while (ret > 0 && --retries && scsi_sense_valid(&sshdr) && + (sshdr.sense_key == NOT_READY || + (sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29))); - ret = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, bufflen, - NULL, SES_TIMEOUT, SES_RETRIES, NULL); if (unlikely(ret)) return ret; @@ -121,9 +128,16 @@ static int ses_send_diag(struct scsi_device *sdev, int page_code, bufflen & 0xff, 0 }; + struct scsi_sense_hdr sshdr; + unsigned int retries = SES_RETRIES; + + do { + result = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, buf, bufflen, + &sshdr, SES_TIMEOUT, 1, NULL); + } while (result > 0 && --retries && scsi_sense_valid(&sshdr) && + (sshdr.sense_key == NOT_READY || + (sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29))); - result = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, buf, bufflen, - NULL, SES_TIMEOUT, SES_RETRIES, NULL); if (result) sdev_printk(KERN_ERR, sdev, "SEND DIAGNOSTIC result: %8x\n", result); From f7d848e0fdfa557f181955a769cbb163d54fd292 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 22 Sep 2021 08:30:08 +0200 Subject: [PATCH 452/543] MAINTAINERS: usb, update Peter Korsgaard's entries Peter's e-mail in MAINTAINERS is defunct: This is the qmail-send program at a.mx.sunsite.dk. : Sorry, no mailbox here by that name. (#5.1.1) Peter says: ** Ahh yes, it should be changed to peter@korsgaard.com. However he also says: ** I haven't had access to c67x00 hw for quite some years though, so maybe ** it should be marked Orphan instead? So change as he wishes. Cc: Peter Korsgaard Cc: Linus Torvalds Cc: linux-usb@vger.kernel.org Acked-by: Peter Korsgaard Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20210922063008.25758-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index eeb4c70b3d5b..d1bbaf06dc1d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19288,13 +19288,12 @@ S: Maintained F: drivers/usb/misc/chaoskey.c USB CYPRESS C67X00 DRIVER -M: Peter Korsgaard L: linux-usb@vger.kernel.org -S: Maintained +S: Orphan F: drivers/usb/c67x00/ USB DAVICOM DM9601 DRIVER -M: Peter Korsgaard +M: Peter Korsgaard L: netdev@vger.kernel.org S: Maintained W: http://www.linux-usb.org/usbnet From cef0d022f55364d69017daeb9443bd31510ad6a2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 16 Aug 2021 12:41:19 +0200 Subject: [PATCH 453/543] gpiolib: acpi: Make set-debounce-timeout failures non fatal Commit 8dcb7a15a585 ("gpiolib: acpi: Take into account debounce settings") made the gpiolib-acpi code call gpio_set_debounce_timeout() when requesting GPIOs. This in itself is fine, but it also made gpio_set_debounce_timeout() errors fatal, causing the requesting of the GPIO to fail. This is causing regressions. E.g. on a HP ElitePad 1000 G2 various _AEI specified GPIO ACPI event sources specify a debouncy timeout of 20 ms, but the pinctrl-baytrail.c only supports certain fixed values, the closest ones being 12 or 24 ms and pinctrl-baytrail.c responds with -EINVAL when specified a value which is not one of the fixed values. This is causing the acpi_request_own_gpiod() call to fail for 3 ACPI event sources on the HP ElitePad 1000 G2, which in turn is causing e.g. the battery charging vs discharging status to never get updated, even though a charger has been plugged-in or unplugged. Make gpio_set_debounce_timeout() errors non fatal, warning about the failure instead, to fix this regression. Note we should probably also fix various pinctrl drivers to just pick the first bigger discrete value rather then returning -EINVAL but this will need to be done on a per driver basis, where as this fix at least gets us back to where things were before and thus restores functionality on devices where this was lost due to gpio_set_debounce_timeout() errors. Fixes: 8dcb7a15a585 ("gpiolib: acpi: Take into account debounce settings") Depends-on: 2e2b496cebef ("gpiolib: acpi: Extract acpi_request_own_gpiod() helper") Reviewed-by: Mika Westerberg Signed-off-by: Hans de Goede Acked-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-acpi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 411525ac4cc4..47712b6903b5 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -313,9 +313,11 @@ static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip, ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout); if (ret) - gpiochip_free_own_desc(desc); + dev_warn(chip->parent, + "Failed to set debounce-timeout for pin 0x%04X, err %d\n", + pin, ret); - return ret ? ERR_PTR(ret) : desc; + return desc; } static bool acpi_gpio_in_ignore_list(const char *controller_in, int pin_in) From 2dd824cca3407bc9a2bd11b00f6e117b66fcfcf1 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Thu, 16 Sep 2021 20:19:35 +0900 Subject: [PATCH 454/543] gpio: uniphier: Fix void functions to remove return value The return type of irq_chip.irq_mask() and irq_chip.irq_unmask() should be void. Fixes: dbe776c2ca54 ("gpio: uniphier: add UniPhier GPIO controller driver") Signed-off-by: Kunihiko Hayashi Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-uniphier.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c index f99f3c10bed0..39dca147d587 100644 --- a/drivers/gpio/gpio-uniphier.c +++ b/drivers/gpio/gpio-uniphier.c @@ -184,7 +184,7 @@ static void uniphier_gpio_irq_mask(struct irq_data *data) uniphier_gpio_reg_update(priv, UNIPHIER_GPIO_IRQ_EN, mask, 0); - return irq_chip_mask_parent(data); + irq_chip_mask_parent(data); } static void uniphier_gpio_irq_unmask(struct irq_data *data) @@ -194,7 +194,7 @@ static void uniphier_gpio_irq_unmask(struct irq_data *data) uniphier_gpio_reg_update(priv, UNIPHIER_GPIO_IRQ_EN, mask, mask); - return irq_chip_unmask_parent(data); + irq_chip_unmask_parent(data); } static int uniphier_gpio_irq_set_type(struct irq_data *data, unsigned int type) From f6c35df22708438c94605b8896d2b4e4d5f342a3 Mon Sep 17 00:00:00 2001 From: Steven Lee Date: Tue, 7 Sep 2021 17:55:25 +0800 Subject: [PATCH 455/543] gpio: gpio-aspeed-sgpio: Fix wrong hwirq in irq handler. The current hwirq is calculated based on the old GPIO pin order(input GPIO range is from 0 to ngpios - 1). It should be calculated based on the current GPIO input pin order(input GPIOs are 0, 2, 4, ..., (ngpios - 1) * 2). Signed-off-by: Steven Lee Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aspeed-sgpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c index 10f303d15225..3d6ef37a7702 100644 --- a/drivers/gpio/gpio-aspeed-sgpio.c +++ b/drivers/gpio/gpio-aspeed-sgpio.c @@ -395,7 +395,7 @@ static void aspeed_sgpio_irq_handler(struct irq_desc *desc) reg = ioread32(bank_reg(data, bank, reg_irq_status)); for_each_set_bit(p, ®, 32) - generic_handle_domain_irq(gc->irq.domain, i * 32 + p); + generic_handle_domain_irq(gc->irq.domain, i * 32 + p * 2); } chained_irq_exit(ic, desc); From 0f562b7de99085935d76b00c41ab5caa26ff5c74 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 14 Sep 2021 00:49:23 +0200 Subject: [PATCH 456/543] gpio/rockchip: extended debounce support is only available on v2 The gpio driver runs into issues on v1 gpio blocks, as the db_clk and the whole extended debounce support is only ever defined on v2. So checking for the IS_ERR on the db_clk is not enough, as it will be NULL on v1. Fix this by adding the needed condition for v2 first before checking the existence of the db_clk. This caused my rk3288-veyron-pinky to enter a reboot loop when it tried to enable the power-key as adc-key device. Fixes: 3bcbd1a85b68 ("gpio/rockchip: support next version gpio controller") Signed-off-by: Heiko Stuebner Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-rockchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index 036b2d959503..16d9bf7188e3 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -195,7 +195,7 @@ static int rockchip_gpio_set_debounce(struct gpio_chip *gc, unsigned int cur_div_reg; u64 div; - if (!IS_ERR(bank->db_clk)) { + if (bank->gpio_type == GPIO_TYPE_V2 && !IS_ERR(bank->db_clk)) { div_debounce_support = true; freq = clk_get_rate(bank->db_clk); max_debounce = (GENMASK(23, 0) + 1) * 2 * 1000000 / freq; From b22a4705e2e60f342b1b851c9ebdb3ea02f21f8f Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 14 Sep 2021 00:49:24 +0200 Subject: [PATCH 457/543] gpio/rockchip: fix get_direction value handling The function uses the newly introduced rockchip_gpio_readl_bit() which directly returns the actual value of the requeste bit. So using the existing bit-wise check for the bit inside the value will always return 0. Fix this by dropping the bit manipulation on the result. Fixes: 3bcbd1a85b68 ("gpio/rockchip: support next version gpio controller") Signed-off-by: Heiko Stuebner Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-rockchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index 16d9bf7188e3..3335bd57761d 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -141,7 +141,7 @@ static int rockchip_gpio_get_direction(struct gpio_chip *chip, u32 data; data = rockchip_gpio_readl_bit(bank, offset, bank->gpio_regs->port_ddr); - if (data & BIT(offset)) + if (data) return GPIO_LINE_DIRECTION_OUT; return GPIO_LINE_DIRECTION_IN; From 372d1f3e1bfede719864d0d1fbf3146b1e638c88 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 21 Sep 2021 23:32:33 +0300 Subject: [PATCH 458/543] ext2: fix sleeping in atomic bugs on error The ext2_error() function syncs the filesystem so it sleeps. The caller is holding a spinlock so it's not allowed to sleep. ext2_statfs() <- disables preempt -> ext2_count_free_blocks() -> ext2_get_group_desc() Fix this by using WARN() to print an error message and a stack trace instead of using ext2_error(). Link: https://lore.kernel.org/r/20210921203233.GA16529@kili Signed-off-by: Dan Carpenter Signed-off-by: Jan Kara --- fs/ext2/balloc.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 1f3f4326bf3c..c17ccc19b938 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -48,10 +48,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, struct ext2_sb_info *sbi = EXT2_SB(sb); if (block_group >= sbi->s_groups_count) { - ext2_error (sb, "ext2_get_group_desc", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", - block_group, sbi->s_groups_count); + WARN(1, "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sbi->s_groups_count); return NULL; } @@ -59,10 +58,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1); if (!sbi->s_group_desc[group_desc]) { - ext2_error (sb, "ext2_get_group_desc", - "Group descriptor not loaded - " - "block_group = %d, group_desc = %lu, desc = %lu", - block_group, group_desc, offset); + WARN(1, "Group descriptor not loaded - " + "block_group = %d, group_desc = %lu, desc = %lu", + block_group, group_desc, offset); return NULL; } From 2a7313dc81e88adc7bb09d0f056985fa8afc2b89 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 22 Sep 2021 14:19:41 +0100 Subject: [PATCH 459/543] irqchip/armada-370-xp: Fix ack/eoi breakage When converting the driver to using handle_percpu_devid_irq, we forgot to repaint the irq_eoi() callback into irq_ack(), as handle_percpu_devid_fasteoi_ipi() was actually using EOI really early in the handling. Yes this was a stupid idea. Fix this by using the HW ack method as irq_ack(). Fixes: e52e73b7e9f7 ("irqchip/armada-370-xp: Make IPIs use handle_percpu_devid_irq()") Reported-by: Steffen Trumtrar Tested-by: Steffen Trumtrar Signed-off-by: Marc Zyngier Cc: Valentin Schneider Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87tuiexq5f.fsf@pengutronix.de --- drivers/irqchip/irq-armada-370-xp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 7557ab551295..53e0fb0562c1 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -359,16 +359,16 @@ static void armada_370_xp_ipi_send_mask(struct irq_data *d, ARMADA_370_XP_SW_TRIG_INT_OFFS); } -static void armada_370_xp_ipi_eoi(struct irq_data *d) +static void armada_370_xp_ipi_ack(struct irq_data *d) { writel(~BIT(d->hwirq), per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS); } static struct irq_chip ipi_irqchip = { .name = "IPI", + .irq_ack = armada_370_xp_ipi_ack, .irq_mask = armada_370_xp_ipi_mask, .irq_unmask = armada_370_xp_ipi_unmask, - .irq_eoi = armada_370_xp_ipi_eoi, .ipi_send_mask = armada_370_xp_ipi_send_mask, }; From 20c36ce2164f1774b487d443ece99b754bc6ad43 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Thu, 16 Sep 2021 10:52:03 +0800 Subject: [PATCH 460/543] irqdomain: Change the type of 'size' in __irq_domain_add() to be consistent The 'size' is used in struct_size(domain, revmap, size) and its input parameter type is 'size_t'(unsigned int). Changing the size to 'unsigned int' to make the type consistent. Signed-off-by: Bixuan Cui Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210916025203.44841-1-cuibixuan@huawei.com --- include/linux/irqdomain.h | 2 +- kernel/irq/irqdomain.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 23e4ee523576..9ee238ad29ce 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -251,7 +251,7 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) } void irq_domain_free_fwnode(struct fwnode_handle *fwnode); -struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, +struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, void *host_data); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 62be16135e7c..bfa289ed57ab 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -136,7 +136,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); * Allocates and initializes an irq_domain structure. * Returns pointer to IRQ domain, or NULL on failure. */ -struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, +struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, void *host_data) From b99948836162b0cfb03007d9b2c2da9babc057b5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 4 Sep 2021 20:36:44 -0700 Subject: [PATCH 461/543] irqchip/mbigen: Repair non-kernel-doc notation Fix kernel-doc warnings in irq-mbigen.c: irq-mbigen.c:29: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * In mbigen vector register irq-mbigen.c:43: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * offset of clear register in mbigen node irq-mbigen.c:50: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * offset of interrupt type register Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Jun Ma Cc: Yun Wu Cc: Thomas Gleixner Cc: Marc Zyngier Cc: Aditya Srivastava Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210905033644.15988-1-rdunlap@infradead.org --- drivers/irqchip/irq-mbigen.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index f565317a3da3..12df2162108e 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -25,7 +25,7 @@ /* The maximum IRQ pin number of mbigen chip(start from 0) */ #define MAXIMUM_IRQ_PIN_NUM 1407 -/** +/* * In mbigen vector register * bit[21:12]: event id value * bit[11:0]: device id @@ -39,14 +39,14 @@ /* offset of vector register in mbigen node */ #define REG_MBIGEN_VEC_OFFSET 0x200 -/** +/* * offset of clear register in mbigen node * This register is used to clear the status * of interrupt */ #define REG_MBIGEN_CLEAR_OFFSET 0xa000 -/** +/* * offset of interrupt type register * This register is used to configure interrupt * trigger type From 969ac78db78c723a24e9410666b457cc1b0cb3c3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 5 Sep 2021 09:25:19 -0700 Subject: [PATCH 462/543] irqchip/goldfish-pic: Select GENERIC_IRQ_CHIP to fix build irq-goldfish-pic uses GENERIC_IRQ_CHIP interfaces so select that symbol to fix build errors. Fixes these build errors: mips-linux-ld: drivers/irqchip/irq-goldfish-pic.o: in function `goldfish_pic_of_init': irq-goldfish-pic.c:(.init.text+0xc0): undefined reference to `irq_alloc_generic_chip' mips-linux-ld: irq-goldfish-pic.c:(.init.text+0xf4): undefined reference to `irq_gc_unmask_enable_reg' mips-linux-ld: irq-goldfish-pic.c:(.init.text+0xf8): undefined reference to `irq_gc_unmask_enable_reg' mips-linux-ld: irq-goldfish-pic.c:(.init.text+0x100): undefined reference to `irq_gc_mask_disable_reg' mips-linux-ld: irq-goldfish-pic.c:(.init.text+0x104): undefined reference to `irq_gc_mask_disable_reg' mips-linux-ld: irq-goldfish-pic.c:(.init.text+0x11c): undefined reference to `irq_setup_generic_chip' mips-linux-ld: irq-goldfish-pic.c:(.init.text+0x168): undefined reference to `irq_remove_generic_chip' Fixes: 4235ff50cf98 ("irqchip/irq-goldfish-pic: Add Goldfish PIC driver") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Miodrag Dinic Cc: Geert Uytterhoeven Cc: Bartosz Golaszewski Cc: Thomas Gleixner Cc: Marc Zyngier Cc: Goran Ferenc Cc: Aleksandar Markovic Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210905162519.21507-1-rdunlap@infradead.org --- drivers/irqchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 4d5924e9f766..aca7b595c4c7 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -409,6 +409,7 @@ config MESON_IRQ_GPIO config GOLDFISH_PIC bool "Goldfish programmable interrupt controller" depends on MIPS && (GOLDFISH || COMPILE_TEST) + select GENERIC_IRQ_CHIP select IRQ_DOMAIN help Say yes here to enable Goldfish interrupt controller driver used From 280bef512933b2dda01d681d8cbe499b98fc5bdd Mon Sep 17 00:00:00 2001 From: Kaige Fu Date: Wed, 15 Sep 2021 10:20:55 +0800 Subject: [PATCH 463/543] irqchip/gic-v3-its: Fix potential VPE leak on error In its_vpe_irq_domain_alloc, when its_vpe_init() returns an error, there is an off-by-one in the number of VPEs to be freed. Fix it by simply passing the number of VPEs allocated, which is the index of the loop iterating over the VPEs. Fixes: 7d75bbb4bc1a ("irqchip/gic-v3-its: Add VPE irq domain allocation/teardown") Signed-off-by: Kaige Fu [maz: fixed commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/d9e36dee512e63670287ed9eff884a5d8d6d27f2.1631672311.git.kaige.fu@linux.alibaba.com --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 7f40dca8cda5..eb0882d15366 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4501,7 +4501,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq if (err) { if (i > 0) - its_vpe_irq_domain_free(domain, virq, i - 1); + its_vpe_irq_domain_free(domain, virq, i); its_lpi_free(bitmap, base, nr_ids); its_free_prop_table(vprop_page); From 3ce8c70ecedb4e1f1d36301afb0281be40390f13 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Sep 2021 11:47:30 +0200 Subject: [PATCH 464/543] irqchip/renesas-rza1: Use semicolons instead of commas This code works, but it is cleaner to use semicolons at the end of statements instead of commas. Extracted from a big anonymous patch by Julia Lawall . Signed-off-by: Geert Uytterhoeven Reviewed-by: Ulrich Hecht Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/b1710bb6ea5faa7a7fe74404adb0beb951e0bf8c.1631699160.git.geert+renesas@glider.be --- drivers/irqchip/irq-renesas-rza1.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rza1.c b/drivers/irqchip/irq-renesas-rza1.c index b0d46ac42b89..72c06e883d1c 100644 --- a/drivers/irqchip/irq-renesas-rza1.c +++ b/drivers/irqchip/irq-renesas-rza1.c @@ -223,12 +223,12 @@ static int rza1_irqc_probe(struct platform_device *pdev) goto out_put_node; } - priv->chip.name = "rza1-irqc", - priv->chip.irq_mask = irq_chip_mask_parent, - priv->chip.irq_unmask = irq_chip_unmask_parent, - priv->chip.irq_eoi = rza1_irqc_eoi, - priv->chip.irq_retrigger = irq_chip_retrigger_hierarchy, - priv->chip.irq_set_type = rza1_irqc_set_type, + priv->chip.name = "rza1-irqc"; + priv->chip.irq_mask = irq_chip_mask_parent; + priv->chip.irq_unmask = irq_chip_unmask_parent; + priv->chip.irq_eoi = rza1_irqc_eoi; + priv->chip.irq_retrigger = irq_chip_retrigger_hierarchy; + priv->chip.irq_set_type = rza1_irqc_set_type; priv->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE; priv->irq_domain = irq_domain_add_hierarchy(parent, 0, IRQC_NUM_IRQ, From 1ea7812326004afd2803cc968a4776ae5120a597 Mon Sep 17 00:00:00 2001 From: Shai Malin Date: Wed, 22 Sep 2021 13:53:26 +0300 Subject: [PATCH 465/543] qed: rdma - don't wait for resources under hw error recovery flow If the HW device is during recovery, the HW resources will never return, hence we shouldn't wait for the CID (HW context ID) bitmaps to clear. This fix speeds up the error recovery flow. Fixes: 64515dc899df ("qed: Add infrastructure for error detection and recovery") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 8 ++++++++ drivers/net/ethernet/qlogic/qed/qed_roce.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index fc8b3e64f153..186d0048a9d1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1297,6 +1297,14 @@ qed_iwarp_wait_cid_map_cleared(struct qed_hwfn *p_hwfn, struct qed_bmap *bmap) prev_weight = weight; while (weight) { + /* If the HW device is during recovery, all resources are + * immediately reset without receiving a per-cid indication + * from HW. In this case we don't expect the cid_map to be + * cleared. + */ + if (p_hwfn->cdev->recov_in_prog) + return 0; + msleep(QED_IWARP_MAX_CID_CLEAN_TIME); weight = bitmap_weight(bmap->bitmap, bmap->max_count); diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index f16a157bb95a..cf5baa5e59bc 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -77,6 +77,14 @@ void qed_roce_stop(struct qed_hwfn *p_hwfn) * Beyond the added delay we clear the bitmap anyway. */ while (bitmap_weight(rcid_map->bitmap, rcid_map->max_count)) { + /* If the HW device is during recovery, all resources are + * immediately reset without receiving a per-cid indication + * from HW. In this case we don't expect the cid bitmap to be + * cleared. + */ + if (p_hwfn->cdev->recov_in_prog) + return; + msleep(100); if (wait_count++ > 20) { DP_NOTICE(p_hwfn, "cid bitmap wait timed out\n"); From 977d293e23b48a1129830d7968605f61c4af71a0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 22 Sep 2021 13:12:17 +0200 Subject: [PATCH 466/543] mptcp: ensure tx skbs always have the MPTCP ext Due to signed/unsigned comparison, the expression: info->size_goal - skb->len > 0 evaluates to true when the size goal is smaller than the skb size. That results in lack of tx cache refill, so that the skb allocated by the core TCP code lacks the required MPTCP skb extensions. Due to the above, syzbot is able to trigger the following WARN_ON(): WARNING: CPU: 1 PID: 810 at net/mptcp/protocol.c:1366 mptcp_sendmsg_frag+0x1362/0x1bc0 net/mptcp/protocol.c:1366 Modules linked in: CPU: 1 PID: 810 Comm: syz-executor.4 Not tainted 5.14.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:mptcp_sendmsg_frag+0x1362/0x1bc0 net/mptcp/protocol.c:1366 Code: ff 4c 8b 74 24 50 48 8b 5c 24 58 e9 0f fb ff ff e8 13 44 8b f8 4c 89 e7 45 31 ed e8 98 57 2e fe e9 81 f4 ff ff e8 fe 43 8b f8 <0f> 0b 41 bd ea ff ff ff e9 6f f4 ff ff 4c 89 e7 e8 b9 8e d2 f8 e9 RSP: 0018:ffffc9000531f6a0 EFLAGS: 00010216 RAX: 000000000000697f RBX: 0000000000000000 RCX: ffffc90012107000 RDX: 0000000000040000 RSI: ffffffff88eac9e2 RDI: 0000000000000003 RBP: ffff888078b15780 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff88eac017 R11: 0000000000000000 R12: ffff88801de0a280 R13: 0000000000006b58 R14: ffff888066278280 R15: ffff88803c2fe9c0 FS: 00007fd9f866e700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007faebcb2f718 CR3: 00000000267cb000 CR4: 00000000001506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __mptcp_push_pending+0x1fb/0x6b0 net/mptcp/protocol.c:1547 mptcp_release_cb+0xfe/0x210 net/mptcp/protocol.c:3003 release_sock+0xb4/0x1b0 net/core/sock.c:3206 sk_stream_wait_memory+0x604/0xed0 net/core/stream.c:145 mptcp_sendmsg+0xc39/0x1bc0 net/mptcp/protocol.c:1749 inet6_sendmsg+0x99/0xe0 net/ipv6/af_inet6.c:643 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 sock_write_iter+0x2a0/0x3e0 net/socket.c:1057 call_write_iter include/linux/fs.h:2163 [inline] new_sync_write+0x40b/0x640 fs/read_write.c:507 vfs_write+0x7cf/0xae0 fs/read_write.c:594 ksys_write+0x1ee/0x250 fs/read_write.c:647 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x4665f9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fd9f866e188 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000056c038 RCX: 00000000004665f9 RDX: 00000000000e7b78 RSI: 0000000020000000 RDI: 0000000000000003 RBP: 00000000004bfcc4 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000056c038 R13: 0000000000a9fb1f R14: 00007fd9f866e300 R15: 0000000000022000 Fix the issue rewriting the relevant expression to avoid sign-related problems - note: size_goal is always >= 0. Additionally, ensure that the skb in the tx cache always carries the relevant extension. Reported-and-tested-by: syzbot+263a248eec3e875baa7b@syzkaller.appspotmail.com Fixes: 1094c6fe7280 ("mptcp: fix possible divide by zero") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2602f1386160..dbcebf56798f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1316,7 +1316,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, goto alloc_skb; } - must_collapse = (info->size_goal - skb->len > 0) && + must_collapse = (info->size_goal > skb->len) && (skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags); if (must_collapse) { size_bias = skb->len; @@ -1325,7 +1325,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, } alloc_skb: - if (!must_collapse && !ssk->sk_tx_skb_cache && + if (!must_collapse && !mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held)) return 0; From b78f26926b17cc289e4f16b63363abe0aa2e8efc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 10 Sep 2021 18:29:25 +0100 Subject: [PATCH 467/543] irqchip/gic: Work around broken Renesas integration Geert reported that the GIC driver locks up on a Renesas system since 005c34ae4b44f085 ("irqchip/gic: Atomically update affinity") fixed the driver to use writeb_relaxed() instead of writel_relaxed(). As it turns out, the interconnect used on this system mandates 32bit wide accesses for all MMIO transactions, even if the GIC architecture specifically mandates for some registers to be byte accessible. Gahhh... Work around the issue by crudly detecting the offending system, and falling back to an inefficient RMW+lock implementation. Reported-by: Geert Uytterhoeven Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/CAMuHMdV+Ev47K5NO8XHsanSq5YRMCHn2gWAQyV-q2LpJVy9HiQ@mail.gmail.com --- drivers/irqchip/irq-gic.c | 52 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index d329ec3d64d8..5f22c9d65e57 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -107,6 +107,8 @@ static DEFINE_RAW_SPINLOCK(cpu_map_lock); #endif +static DEFINE_STATIC_KEY_FALSE(needs_rmw_access); + /* * The GIC mapping of CPU interfaces does not necessarily match * the logical CPU numbering. Let's use a mapping as returned @@ -774,6 +776,25 @@ static int gic_pm_init(struct gic_chip_data *gic) #endif #ifdef CONFIG_SMP +static void rmw_writeb(u8 bval, void __iomem *addr) +{ + static DEFINE_RAW_SPINLOCK(rmw_lock); + unsigned long offset = (unsigned long)addr & 3UL; + unsigned long shift = offset * 8; + unsigned long flags; + u32 val; + + raw_spin_lock_irqsave(&rmw_lock, flags); + + addr -= offset; + val = readl_relaxed(addr); + val &= ~GENMASK(shift + 7, shift); + val |= bval << shift; + writel_relaxed(val, addr); + + raw_spin_unlock_irqrestore(&rmw_lock, flags); +} + static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { @@ -788,7 +809,10 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids) return -EINVAL; - writeb_relaxed(gic_cpu_map[cpu], reg); + if (static_branch_unlikely(&needs_rmw_access)) + rmw_writeb(gic_cpu_map[cpu], reg); + else + writeb_relaxed(gic_cpu_map[cpu], reg); irq_data_update_effective_affinity(d, cpumask_of(cpu)); return IRQ_SET_MASK_OK_DONE; @@ -1375,6 +1399,30 @@ static bool gic_check_eoimode(struct device_node *node, void __iomem **base) return true; } +static bool gic_enable_rmw_access(void *data) +{ + /* + * The EMEV2 class of machines has a broken interconnect, and + * locks up on accesses that are less than 32bit. So far, only + * the affinity setting requires it. + */ + if (of_machine_is_compatible("renesas,emev2")) { + static_branch_enable(&needs_rmw_access); + return true; + } + + return false; +} + +static const struct gic_quirk gic_quirks[] = { + { + .desc = "broken byte access", + .compatible = "arm,pl390", + .init = gic_enable_rmw_access, + }, + { }, +}; + static int gic_of_setup(struct gic_chip_data *gic, struct device_node *node) { if (!gic || !node) @@ -1391,6 +1439,8 @@ static int gic_of_setup(struct gic_chip_data *gic, struct device_node *node) if (of_property_read_u32(node, "cpu-offset", &gic->percpu_offset)) gic->percpu_offset = 0; + gic_enable_of_quirks(node, gic_quirks, gic); + return 0; error: From 8646e53633f314e4d746a988240d3b951a92f94a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 1 Sep 2021 13:30:26 -0700 Subject: [PATCH 468/543] KVM: rseq: Update rseq when processing NOTIFY_RESUME on xfer to KVM guest Invoke rseq's NOTIFY_RESUME handler when processing the flag prior to transferring to a KVM guest, which is roughly equivalent to an exit to userspace and processes many of the same pending actions. While the task cannot be in an rseq critical section as the KVM path is reachable only by via ioctl(KVM_RUN), the side effects that apply to rseq outside of a critical section still apply, e.g. the current CPU needs to be updated if the task is migrated. Clearing TIF_NOTIFY_RESUME without informing rseq can lead to segfaults and other badness in userspace VMMs that use rseq in combination with KVM, e.g. due to the CPU ID being stale after task migration. Fixes: 72c3c0fe54a3 ("x86/kvm: Use generic xfer to guest work function") Reported-by: Peter Foley Bisected-by: Doug Evans Acked-by: Mathieu Desnoyers Cc: Shakeel Butt Cc: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210901203030.1292304-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- kernel/entry/kvm.c | 4 +++- kernel/rseq.c | 14 +++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index 49972ee99aff..049fd06b4c3d 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -19,8 +19,10 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) if (ti_work & _TIF_NEED_RESCHED) schedule(); - if (ti_work & _TIF_NOTIFY_RESUME) + if (ti_work & _TIF_NOTIFY_RESUME) { tracehook_notify_resume(NULL); + rseq_handle_notify_resume(NULL, NULL); + } ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work); if (ret) diff --git a/kernel/rseq.c b/kernel/rseq.c index 35f7bd0fced0..6d45ac3dae7f 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -282,9 +282,17 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) if (unlikely(t->flags & PF_EXITING)) return; - ret = rseq_ip_fixup(regs); - if (unlikely(ret < 0)) - goto error; + + /* + * regs is NULL if and only if the caller is in a syscall path. Skip + * fixup and leave rseq_cs as is so that rseq_sycall() will detect and + * kill a misbehaving userspace on debug kernels. + */ + if (regs) { + ret = rseq_ip_fixup(regs); + if (unlikely(ret < 0)) + goto error; + } if (unlikely(rseq_update_cpu_id(t))) goto error; return; From a68de80f61f6af397bc06fb391ff2e571c9c4d80 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 1 Sep 2021 13:30:27 -0700 Subject: [PATCH 469/543] entry: rseq: Call rseq_handle_notify_resume() in tracehook_notify_resume() Invoke rseq_handle_notify_resume() from tracehook_notify_resume() now that the two function are always called back-to-back by architectures that have rseq. The rseq helper is stubbed out for architectures that don't support rseq, i.e. this is a nop across the board. Note, tracehook_notify_resume() is horribly named and arguably does not belong in tracehook.h as literally every line of code in it has nothing to do with tracing. But, that's been true since commit a42c6ded827d ("move key_repace_session_keyring() into tracehook_notify_resume()") first usurped tracehook_notify_resume() back in 2012. Punt cleaning that mess up to future patches. No functional change intended. Acked-by: Mathieu Desnoyers Signed-off-by: Sean Christopherson Message-Id: <20210901203030.1292304-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/arm/kernel/signal.c | 1 - arch/arm64/kernel/signal.c | 4 +--- arch/csky/kernel/signal.c | 4 +--- arch/mips/kernel/signal.c | 4 +--- arch/powerpc/kernel/signal.c | 4 +--- include/linux/tracehook.h | 2 ++ kernel/entry/common.c | 4 +--- kernel/entry/kvm.c | 4 +--- 8 files changed, 8 insertions(+), 19 deletions(-) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index d0a800be0486..a41e27ace391 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -628,7 +628,6 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) uprobe_notify_resume(regs); } else { tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); } } local_irq_disable(); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 9fe70b12b34f..c287b9407f28 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -940,10 +940,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); - if (thread_flags & _TIF_NOTIFY_RESUME) { + if (thread_flags & _TIF_NOTIFY_RESUME) tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); - } if (thread_flags & _TIF_FOREIGN_FPSTATE) fpsimd_restore_current_state(); diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 312f046d452d..bc4238b9f709 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -260,8 +260,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); - if (thread_info_flags & _TIF_NOTIFY_RESUME) { + if (thread_info_flags & _TIF_NOTIFY_RESUME) tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); - } } diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index f1e985109da0..c9b2a75563e1 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -906,10 +906,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); - if (thread_info_flags & _TIF_NOTIFY_RESUME) { + if (thread_info_flags & _TIF_NOTIFY_RESUME) tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); - } user_enter(); } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index e600764a926c..b93b87df499d 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -293,10 +293,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) do_signal(current); } - if (thread_info_flags & _TIF_NOTIFY_RESUME) { + if (thread_info_flags & _TIF_NOTIFY_RESUME) tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); - } } static unsigned long get_tm_stackpointer(struct task_struct *tsk) diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 3e80c4bc66f7..2564b7434b4d 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -197,6 +197,8 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) mem_cgroup_handle_over_high(); blkcg_maybe_throttle_current(); + + rseq_handle_notify_resume(NULL, regs); } /* diff --git a/kernel/entry/common.c b/kernel/entry/common.c index bf16395b9e13..d5a61d565ad5 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -171,10 +171,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) handle_signal_work(regs, ti_work); - if (ti_work & _TIF_NOTIFY_RESUME) { + if (ti_work & _TIF_NOTIFY_RESUME) tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); - } /* Architecture specific TIF work */ arch_exit_to_user_mode_work(regs, ti_work); diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index 049fd06b4c3d..49972ee99aff 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -19,10 +19,8 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) if (ti_work & _TIF_NEED_RESCHED) schedule(); - if (ti_work & _TIF_NOTIFY_RESUME) { + if (ti_work & _TIF_NOTIFY_RESUME) tracehook_notify_resume(NULL); - rseq_handle_notify_resume(NULL, NULL); - } ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work); if (ret) From de5f4213dafa8f8b0b52cdaf06bb35ad4cab1681 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 1 Sep 2021 13:30:28 -0700 Subject: [PATCH 470/543] tools: Move x86 syscall number fallbacks to .../uapi/ Move unistd_{32,64}.h from x86/include/asm to x86/include/uapi/asm so that tools/selftests that install kernel headers, e.g. KVM selftests, can include non-uapi tools headers, e.g. to get 'struct list_head', without effectively overriding the installed non-tool uapi headers. Swapping KVM's search order, e.g. to search the kernel headers before tool headers, is not a viable option as doing results in linux/type.h and other core headers getting pulled from the kernel headers, which do not have the kernel-internal typedefs that are used through tools, including many files outside of selftests/kvm's control. Prior to commit cec07f53c398 ("perf tools: Move syscall number fallbacks from perf-sys.h to tools/arch/x86/include/asm/"), the handcoded numbers were actual fallbacks, i.e. overriding unistd_{32,64}.h from the kernel headers was unintentional. Signed-off-by: Sean Christopherson Message-Id: <20210901203030.1292304-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/arch/x86/include/{ => uapi}/asm/unistd_32.h | 0 tools/arch/x86/include/{ => uapi}/asm/unistd_64.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tools/arch/x86/include/{ => uapi}/asm/unistd_32.h (100%) rename tools/arch/x86/include/{ => uapi}/asm/unistd_64.h (100%) diff --git a/tools/arch/x86/include/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h similarity index 100% rename from tools/arch/x86/include/asm/unistd_32.h rename to tools/arch/x86/include/uapi/asm/unistd_32.h diff --git a/tools/arch/x86/include/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h similarity index 100% rename from tools/arch/x86/include/asm/unistd_64.h rename to tools/arch/x86/include/uapi/asm/unistd_64.h From 61e52f1630f54713f5dffa1ab4bb49772235aa5a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 1 Sep 2021 13:30:29 -0700 Subject: [PATCH 471/543] KVM: selftests: Add a test for KVM_RUN+rseq to detect task migration bugs Add a test to verify an rseq's CPU ID is updated correctly if the task is migrated while the kernel is handling KVM_RUN. This is a regression test for a bug introduced by commit 72c3c0fe54a3 ("x86/kvm: Use generic xfer to guest work function"), where TIF_NOTIFY_RESUME would be cleared by KVM without updating rseq, leading to a stale CPU ID and other badness. Signed-off-by: Sean Christopherson Acked-by: Mathieu Desnoyers Message-Id: <20210901203030.1292304-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 3 + tools/testing/selftests/kvm/rseq_test.c | 236 ++++++++++++++++++++++++ 3 files changed, 240 insertions(+) create mode 100644 tools/testing/selftests/kvm/rseq_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 98053d3afbda..618bf9bc7f3f 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -48,6 +48,7 @@ /kvm_page_table_test /memslot_modification_stress_test /memslot_perf_test +/rseq_test /set_memory_region_test /steal_time /kvm_binary_stats_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 5d05801ab816..9ac325cfc94a 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -80,6 +80,7 @@ TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += kvm_page_table_test TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test TEST_GEN_PROGS_x86_64 += memslot_perf_test +TEST_GEN_PROGS_x86_64 += rseq_test TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test @@ -93,6 +94,7 @@ TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_aarch64 += kvm_page_table_test +TEST_GEN_PROGS_aarch64 += rseq_test TEST_GEN_PROGS_aarch64 += set_memory_region_test TEST_GEN_PROGS_aarch64 += steal_time TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test @@ -104,6 +106,7 @@ TEST_GEN_PROGS_s390x += demand_paging_test TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus TEST_GEN_PROGS_s390x += kvm_page_table_test +TEST_GEN_PROGS_s390x += rseq_test TEST_GEN_PROGS_s390x += set_memory_region_test TEST_GEN_PROGS_s390x += kvm_binary_stats_test diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c new file mode 100644 index 000000000000..060538bd405a --- /dev/null +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +#define VCPU_ID 0 + +static __thread volatile struct rseq __rseq = { + .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, +}; + +/* + * Use an arbitrary, bogus signature for configuring rseq, this test does not + * actually enter an rseq critical section. + */ +#define RSEQ_SIG 0xdeadbeef + +/* + * Any bug related to task migration is likely to be timing-dependent; perform + * a large number of migrations to reduce the odds of a false negative. + */ +#define NR_TASK_MIGRATIONS 100000 + +static pthread_t migration_thread; +static cpu_set_t possible_mask; +static bool done; + +static atomic_t seq_cnt; + +static void guest_code(void) +{ + for (;;) + GUEST_SYNC(0); +} + +static void sys_rseq(int flags) +{ + int r; + + r = syscall(__NR_rseq, &__rseq, sizeof(__rseq), flags, RSEQ_SIG); + TEST_ASSERT(!r, "rseq failed, errno = %d (%s)", errno, strerror(errno)); +} + +static void *migration_worker(void *ign) +{ + cpu_set_t allowed_mask; + int r, i, nr_cpus, cpu; + + CPU_ZERO(&allowed_mask); + + nr_cpus = CPU_COUNT(&possible_mask); + + for (i = 0; i < NR_TASK_MIGRATIONS; i++) { + cpu = i % nr_cpus; + if (!CPU_ISSET(cpu, &possible_mask)) + continue; + + CPU_SET(cpu, &allowed_mask); + + /* + * Bump the sequence count twice to allow the reader to detect + * that a migration may have occurred in between rseq and sched + * CPU ID reads. An odd sequence count indicates a migration + * is in-progress, while a completely different count indicates + * a migration occurred since the count was last read. + */ + atomic_inc(&seq_cnt); + + /* + * Ensure the odd count is visible while sched_getcpu() isn't + * stable, i.e. while changing affinity is in-progress. + */ + smp_wmb(); + r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask); + TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)", + errno, strerror(errno)); + smp_wmb(); + atomic_inc(&seq_cnt); + + CPU_CLR(cpu, &allowed_mask); + + /* + * Wait 1-10us before proceeding to the next iteration and more + * specifically, before bumping seq_cnt again. A delay is + * needed on three fronts: + * + * 1. To allow sched_setaffinity() to prompt migration before + * ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME + * (or TIF_NEED_RESCHED, which indirectly leads to handling + * NOTIFY_RESUME) is handled in KVM context. + * + * If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters + * the guest, the guest will trigger a IO/MMIO exit all the + * way to userspace and the TIF flags will be handled by + * the generic "exit to userspace" logic, not by KVM. The + * exit to userspace is necessary to give the test a chance + * to check the rseq CPU ID (see #2). + * + * Alternatively, guest_code() could include an instruction + * to trigger an exit that is handled by KVM, but any such + * exit requires architecture specific code. + * + * 2. To let ioctl(KVM_RUN) make its way back to the test + * before the next round of migration. The test's check on + * the rseq CPU ID must wait for migration to complete in + * order to avoid false positive, thus any kernel rseq bug + * will be missed if the next migration starts before the + * check completes. + * + * 3. To ensure the read-side makes efficient forward progress, + * e.g. if sched_getcpu() involves a syscall. Stalling the + * read-side means the test will spend more time waiting for + * sched_getcpu() to stabilize and less time trying to hit + * the timing-dependent bug. + * + * Because any bug in this area is likely to be timing-dependent, + * run with a range of delays at 1us intervals from 1us to 10us + * as a best effort to avoid tuning the test to the point where + * it can hit _only_ the original bug and not detect future + * regressions. + * + * The original bug can reproduce with a delay up to ~500us on + * x86-64, but starts to require more iterations to reproduce + * as the delay creeps above ~10us, and the average runtime of + * each iteration obviously increases as well. Cap the delay + * at 10us to keep test runtime reasonable while minimizing + * potential coverage loss. + * + * The lower bound for reproducing the bug is likely below 1us, + * e.g. failures occur on x86-64 with nanosleep(0), but at that + * point the overhead of the syscall likely dominates the delay. + * Use usleep() for simplicity and to avoid unnecessary kernel + * dependencies. + */ + usleep((i % 10) + 1); + } + done = true; + return NULL; +} + +int main(int argc, char *argv[]) +{ + int r, i, snapshot; + struct kvm_vm *vm; + u32 cpu, rseq_cpu; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); + TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, + strerror(errno)); + + if (CPU_COUNT(&possible_mask) < 2) { + print_skip("Only one CPU, task migration not possible\n"); + exit(KSFT_SKIP); + } + + sys_rseq(0); + + /* + * Create and run a dummy VM that immediately exits to userspace via + * GUEST_SYNC, while concurrently migrating the process by setting its + * CPU affinity. + */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + + pthread_create(&migration_thread, NULL, migration_worker, 0); + + for (i = 0; !done; i++) { + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, + "Guest failed?"); + + /* + * Verify rseq's CPU matches sched's CPU. Ensure migration + * doesn't occur between sched_getcpu() and reading the rseq + * cpu_id by rereading both if the sequence count changes, or + * if the count is odd (migration in-progress). + */ + do { + /* + * Drop bit 0 to force a mismatch if the count is odd, + * i.e. if a migration is in-progress. + */ + snapshot = atomic_read(&seq_cnt) & ~1; + + /* + * Ensure reading sched_getcpu() and rseq.cpu_id + * complete in a single "no migration" window, i.e. are + * not reordered across the seq_cnt reads. + */ + smp_rmb(); + cpu = sched_getcpu(); + rseq_cpu = READ_ONCE(__rseq.cpu_id); + smp_rmb(); + } while (snapshot != atomic_read(&seq_cnt)); + + TEST_ASSERT(rseq_cpu == cpu, + "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu); + } + + /* + * Sanity check that the test was able to enter the guest a reasonable + * number of times, e.g. didn't get stalled too often/long waiting for + * sched_getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a + * fairly conservative ratio on x86-64, which can do _more_ KVM_RUNs + * than migrations given the 1us+ delay in the migration task. + */ + TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), + "Only performed %d KVM_RUNs, task stalled too much?\n", i); + + pthread_join(migration_thread, NULL); + + kvm_vm_free(vm); + + sys_rseq(RSEQ_FLAG_UNREGISTER); + + return 0; +} From 2da4a23599c263bd4a7658c2fe561cb3a73ea6ae Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 1 Sep 2021 13:30:30 -0700 Subject: [PATCH 472/543] KVM: selftests: Remove __NR_userfaultfd syscall fallback Revert the __NR_userfaultfd syscall fallback added for KVM selftests now that x86's unistd_{32,63}.h overrides are under uapi/ and thus not in KVM selftests' search path, i.e. now that KVM gets x86 syscall numbers from the installed kernel headers. No functional change intended. Reviewed-by: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20210901203030.1292304-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/arch/x86/include/uapi/asm/unistd_64.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/arch/x86/include/uapi/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h index 4205ed4158bf..cb52a3a8b8fc 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_64.h +++ b/tools/arch/x86/include/uapi/asm/unistd_64.h @@ -1,7 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NR_userfaultfd -#define __NR_userfaultfd 282 -#endif #ifndef __NR_perf_event_open # define __NR_perf_event_open 298 #endif From 7df835a32a8bedf7ce88efcfa7c9b245b52ff139 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Sep 2021 13:38:29 +0200 Subject: [PATCH 473/543] md: fix a lock order reversal in md_alloc Commit b0140891a8cea3 ("md: Fix race when creating a new md device.") not only moved assigning mddev->gendisk before calling add_disk, which fixes the races described in the commit log, but also added a mddev->open_mutex critical section over add_disk and creation of the md kobj. Adding a kobject after add_disk is racy vs deleting the gendisk right after adding it, but md already prevents against that by holding a mddev->active reference. On the other hand taking this lock added a lock order reversal with what is not disk->open_mutex (used to be bdev->bd_mutex when the commit was added) for partition devices, which need that lock for the internal open for the partition scan, and a recent commit also takes it for non-partitioned devices, leading to further lockdep splatter. Fixes: b0140891a8ce ("md: Fix race when creating a new md device.") Fixes: d62633873590 ("block: support delayed holder registration") Reported-by: syzbot+fadc0aaf497e6a493b9f@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig Tested-by: syzbot+fadc0aaf497e6a493b9f@syzkaller.appspotmail.com Reviewed-by: NeilBrown Signed-off-by: Song Liu --- drivers/md/md.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index ae8fe54ea358..6c0c3d0d905a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5700,10 +5700,6 @@ static int md_alloc(dev_t dev, char *name) disk->flags |= GENHD_FL_EXT_DEVT; disk->events |= DISK_EVENT_MEDIA_CHANGE; mddev->gendisk = disk; - /* As soon as we call add_disk(), another thread could get - * through to md_open, so make sure it doesn't get too far - */ - mutex_lock(&mddev->open_mutex); add_disk(disk); error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md"); @@ -5718,7 +5714,6 @@ static int md_alloc(dev_t dev, char *name) if (mddev->kobj.sd && sysfs_create_group(&mddev->kobj, &md_bitmap_group)) pr_debug("pointless warning\n"); - mutex_unlock(&mddev->open_mutex); abort: mutex_unlock(&disks_mutex); if (!error && mddev->kobj.sd) { From 8f1b7ba55c61d2c9f3a47a4759db43abfb59fc16 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 22 Sep 2021 08:35:58 +0200 Subject: [PATCH 474/543] MAINTAINERS: ARM/VT8500, remove defunct e-mail linux@prisktech.co.nz is defunct: 4.1.2 : Recipient address rejected: Domain not found Remove it from MAINTAINERS and mark the ARM/VT8500 entry orphan. Signed-off-by: Jiri Slaby Signed-off-by: Linus Torvalds --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ca6d6fde85cf..0227e15e2484 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2804,9 +2804,8 @@ F: arch/arm/mach-pxa/include/mach/vpac270.h F: arch/arm/mach-pxa/vpac270.c ARM/VT8500 ARM ARCHITECTURE -M: Tony Prisk L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Maintained +S: Orphan F: Documentation/devicetree/bindings/i2c/i2c-wmt.txt F: arch/arm/mach-vt8500/ F: drivers/clocksource/timer-vt8500.c From d81ff5fe14a950f53e2833cfa196e7bb3fd5d4e3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 10 Sep 2021 15:33:32 -0700 Subject: [PATCH 475/543] x86/asm: Fix SETZ size enqcmds() build failure When building under GCC 4.9 and 5.5: arch/x86/include/asm/special_insns.h: Assembler messages: arch/x86/include/asm/special_insns.h:286: Error: operand size mismatch for `setz' Change the type to "bool" for condition code arguments, as documented. Fixes: 7f5933f81bd8 ("x86/asm: Add an enqcmds() wrapper for the ENQCMDS instruction") Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Kees Cook Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210910223332.3224851-1-keescook@chromium.org --- arch/x86/include/asm/special_insns.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index f3fbb84ff8a7..68c257a3de0d 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -275,7 +275,7 @@ static inline int enqcmds(void __iomem *dst, const void *src) { const struct { char _[64]; } *__src = src; struct { char _[64]; } __iomem *__dst = dst; - int zf; + bool zf; /* * ENQCMDS %(rdx), rax From c4aa1eeb093b09fbae9329a080172c58ace8da8c Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Wed, 22 Sep 2021 14:30:01 -0500 Subject: [PATCH 476/543] MAINTAINERS: update entry for NIOS2 Ley Foon has left Intel and will no longer be able to maintain NIOS2. Update the MAINTAINER's entry to Dinh Nguyen. Acked-by: Ley Foon Tan Signed-off-by: Dinh Nguyen Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0227e15e2484..4806d2156a80 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13254,9 +13254,9 @@ F: Documentation/scsi/NinjaSCSI.rst F: drivers/scsi/nsp32* NIOS2 ARCHITECTURE -M: Ley Foon Tan +M: Dinh Nguyen S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/lftan/nios2.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git F: arch/nios2/ NITRO ENCLAVES (NE) From 2e36a964ada4f7bda24f3caa971a33500e23af36 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 17 Sep 2021 12:16:51 -0400 Subject: [PATCH 477/543] MAINTAINERS: Update SWIOTLB maintainership Konrad's new job role is putting a serious cramp on him being a responsive maintainer and as such he is handing off the reins to Christoph Hellwig. Thank you! Acked-by: Christoph Hellwig Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Linus Torvalds --- MAINTAINERS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4806d2156a80..a8a1b1c283e2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17968,10 +17968,11 @@ F: Documentation/admin-guide/svga.rst F: arch/x86/boot/video* SWIOTLB SUBSYSTEM -M: Konrad Rzeszutek Wilk +M: Christoph Hellwig L: iommu@lists.linux-foundation.org S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git +W: http://git.infradead.org/users/hch/dma-mapping.git +T: git git://git.infradead.org/users/hch/dma-mapping.git F: arch/*/kernel/pci-swiotlb.c F: include/linux/swiotlb.h F: kernel/dma/swiotlb.c From 4057525736b159bd456732d11270af2cc49ec21f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 17 Sep 2021 12:16:52 -0400 Subject: [PATCH 478/543] MAINTAINERS: Update Xen-[PCI,SWIOTLB,Block] maintainership MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Konrad's new job role is putting a serious cramp on him being a responsive maintainer and as such he is handing off the reins to Juergen, Roger, and Stefano. Thank you! Acked-by: Juergen Gross Acked-by: Roger Pau Monné Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Linus Torvalds --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a8a1b1c283e2..2e75360c7b8b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20475,7 +20475,6 @@ F: samples/bpf/xdpsock* F: tools/lib/bpf/xsk* XEN BLOCK SUBSYSTEM -M: Konrad Rzeszutek Wilk M: Roger Pau Monné L: xen-devel@lists.xenproject.org (moderated for non-subscribers) S: Supported @@ -20523,7 +20522,7 @@ S: Supported F: drivers/net/xen-netback/* XEN PCI SUBSYSTEM -M: Konrad Rzeszutek Wilk +M: Juergen Gross L: xen-devel@lists.xenproject.org (moderated for non-subscribers) S: Supported F: arch/x86/pci/*xen* @@ -20546,7 +20545,8 @@ S: Supported F: sound/xen/* XEN SWIOTLB SUBSYSTEM -M: Konrad Rzeszutek Wilk +M: Juergen Gross +M: Stefano Stabellini L: xen-devel@lists.xenproject.org (moderated for non-subscribers) L: iommu@lists.linux-foundation.org S: Supported From 58e2cf5d794616b84f591d4d1276c8953278ce24 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 1 Sep 2021 09:09:28 +0200 Subject: [PATCH 479/543] init: Revert accidental changes to print irqs_disabled() Commit f8ade8dddb16 ("xsurf100: drop include of lib8390.c") accidentally changed init/main.c. Revert that part. Fixes: f8ade8dddb16 ("xsurf100: drop include of lib8390.c") Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- init/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/init/main.c b/init/main.c index 3f7216934441..81a79a77db46 100644 --- a/init/main.c +++ b/init/main.c @@ -1242,7 +1242,7 @@ trace_initcall_start_cb(void *data, initcall_t fn) { ktime_t *calltime = (ktime_t *)data; - printk(KERN_DEBUG "calling %pS @ %i irqs_disabled() %d\n", fn, task_pid_nr(current), irqs_disabled()); + printk(KERN_DEBUG "calling %pS @ %i\n", fn, task_pid_nr(current)); *calltime = ktime_get(); } @@ -1256,8 +1256,8 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret) rettime = ktime_get(); delta = ktime_sub(rettime, *calltime); duration = (unsigned long long) ktime_to_ns(delta) >> 10; - printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs, irqs_disabled() %d\n", - fn, ret, duration, irqs_disabled()); + printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n", + fn, ret, duration); } static ktime_t initcall_calltime; From 18a015bccf9e8927008d0a255c9f14b8ec15a648 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 22 Sep 2021 21:00:57 +0900 Subject: [PATCH 480/543] ksmbd: check protocol id in ksmbd_verify_smb_message() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When second smb2 pdu has invalid protocol id, ksmbd doesn't detect it and allow to process smb2 request. This patch add the check it in ksmbd_verify_smb_message() and don't use protocol id of smb2 request as protocol id of response. Reviewed-by: Ronnie Sahlberg Reviewed-by: Ralph Böhme Reported-by: Ronnie Sahlberg Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 2 +- fs/ksmbd/smb_common.c | 13 +++++++++---- fs/ksmbd/smb_common.h | 1 + 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index f59f9b8be51c..fd9d5595a5ca 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -433,7 +433,7 @@ static void init_chained_smb2_rsp(struct ksmbd_work *work) work->compound_pfid = KSMBD_NO_FID; } memset((char *)rsp_hdr + 4, 0, sizeof(struct smb2_hdr) + 2); - rsp_hdr->ProtocolId = rcv_hdr->ProtocolId; + rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER; rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE; rsp_hdr->Command = rcv_hdr->Command; diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c index 43d3123d8b62..40f4fafa2e11 100644 --- a/fs/ksmbd/smb_common.c +++ b/fs/ksmbd/smb_common.c @@ -129,16 +129,22 @@ int ksmbd_lookup_protocol_idx(char *str) * * check for valid smb signature and packet direction(request/response) * - * Return: 0 on success, otherwise 1 + * Return: 0 on success, otherwise -EINVAL */ int ksmbd_verify_smb_message(struct ksmbd_work *work) { - struct smb2_hdr *smb2_hdr = work->request_buf; + struct smb2_hdr *smb2_hdr = work->request_buf + work->next_smb2_rcv_hdr_off; + struct smb_hdr *hdr; if (smb2_hdr->ProtocolId == SMB2_PROTO_NUMBER) return ksmbd_smb2_check_message(work); - return 0; + hdr = work->request_buf; + if (*(__le32 *)hdr->Protocol == SMB1_PROTO_NUMBER && + hdr->Command == SMB_COM_NEGOTIATE) + return 0; + + return -EINVAL; } /** @@ -265,7 +271,6 @@ static int ksmbd_negotiate_smb_dialect(void *buf) return BAD_PROT_ID; } -#define SMB_COM_NEGOTIATE 0x72 int ksmbd_init_smb_server(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h index 57c667c1be06..0a6af447cc45 100644 --- a/fs/ksmbd/smb_common.h +++ b/fs/ksmbd/smb_common.h @@ -210,6 +210,7 @@ FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES) #define SMB1_PROTO_NUMBER cpu_to_le32(0x424d53ff) +#define SMB_COM_NEGOTIATE 0x72 #define SMB1_CLIENT_GUID_SIZE (16) struct smb_hdr { From 4ea477988c423a57241ea4840b12832de6fabdfd Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 21 Sep 2021 14:19:33 +0900 Subject: [PATCH 481/543] ksmbd: remove follow symlinks support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use LOOKUP_NO_SYMLINKS flags for default lookup to prohibit the middle of symlink component lookup and remove follow symlinks parameter support. We re-implement it as reparse point later. Test result: smbclient -Ulinkinjeon%1234 //172.30.1.42/share -c "get hacked/passwd passwd" NT_STATUS_OBJECT_NAME_NOT_FOUND opening remote file \hacked\passwd Cc: Ralph Böhme Cc: Steve French Acked-by: Ronnie Sahlberg Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 43 ++++++++++--------------------------------- fs/ksmbd/vfs.c | 32 +++++++++----------------------- 2 files changed, 19 insertions(+), 56 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index fd9d5595a5ca..0c49a0e887d3 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -2632,13 +2632,9 @@ int smb2_open(struct ksmbd_work *work) goto err_out1; } - if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE) { - /* - * On delete request, instead of following up, need to - * look the current entity - */ - rc = ksmbd_vfs_kern_path(name, 0, &path, 1); - if (!rc) { + rc = ksmbd_vfs_kern_path(name, LOOKUP_NO_SYMLINKS, &path, 1); + if (!rc) { + if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE) { /* * If file exists with under flags, return access * denied error. @@ -2657,25 +2653,10 @@ int smb2_open(struct ksmbd_work *work) path_put(&path); goto err_out; } - } - } else { - if (test_share_config_flag(work->tcon->share_conf, - KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS)) { - /* - * Use LOOKUP_FOLLOW to follow the path of - * symlink in path buildup - */ - rc = ksmbd_vfs_kern_path(name, LOOKUP_FOLLOW, &path, 1); - if (rc) { /* Case for broken link ?*/ - rc = ksmbd_vfs_kern_path(name, 0, &path, 1); - } - } else { - rc = ksmbd_vfs_kern_path(name, 0, &path, 1); - if (!rc && d_is_symlink(path.dentry)) { - rc = -EACCES; - path_put(&path); - goto err_out; - } + } else if (d_is_symlink(path.dentry)) { + rc = -EACCES; + path_put(&path); + goto err_out; } } @@ -4751,12 +4732,8 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, struct path path; int rc = 0, len; int fs_infoclass_size = 0; - int lookup_flags = 0; - if (test_share_config_flag(share, KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS)) - lookup_flags = LOOKUP_FOLLOW; - - rc = ksmbd_vfs_kern_path(share->path, lookup_flags, &path, 0); + rc = ksmbd_vfs_kern_path(share->path, LOOKUP_NO_SYMLINKS, &path, 0); if (rc) { pr_err("cannot create vfs path\n"); return -EIO; @@ -5333,7 +5310,7 @@ static int smb2_rename(struct ksmbd_work *work, } ksmbd_debug(SMB, "new name %s\n", new_name); - rc = ksmbd_vfs_kern_path(new_name, 0, &path, 1); + rc = ksmbd_vfs_kern_path(new_name, LOOKUP_NO_SYMLINKS, &path, 1); if (rc) file_present = false; else @@ -5407,7 +5384,7 @@ static int smb2_create_link(struct ksmbd_work *work, } ksmbd_debug(SMB, "target name is %s\n", target_name); - rc = ksmbd_vfs_kern_path(link_name, 0, &path, 0); + rc = ksmbd_vfs_kern_path(link_name, LOOKUP_NO_SYMLINKS, &path, 0); if (rc) file_present = false; else diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index b047f2980d96..3733e4944c1d 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -166,7 +166,7 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) struct dentry *dentry; int err; - dentry = kern_path_create(AT_FDCWD, name, &path, 0); + dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_NO_SYMLINKS); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); if (err != -ENOENT) @@ -203,7 +203,8 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) struct dentry *dentry; int err; - dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); + dentry = kern_path_create(AT_FDCWD, name, &path, + LOOKUP_NO_SYMLINKS | LOOKUP_DIRECTORY); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); if (err != -EEXIST) @@ -588,16 +589,11 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name) struct path path; struct dentry *parent; int err; - int flags = 0; if (ksmbd_override_fsids(work)) return -ENOMEM; - if (test_share_config_flag(work->tcon->share_conf, - KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS)) - flags = LOOKUP_FOLLOW; - - err = kern_path(name, flags, &path); + err = kern_path(name, LOOKUP_NO_SYMLINKS, &path); if (err) { ksmbd_debug(VFS, "can't get %s, err %d\n", name, err); ksmbd_revert_fsids(work); @@ -652,16 +648,11 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname, struct path oldpath, newpath; struct dentry *dentry; int err; - int flags = 0; if (ksmbd_override_fsids(work)) return -ENOMEM; - if (test_share_config_flag(work->tcon->share_conf, - KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS)) - flags = LOOKUP_FOLLOW; - - err = kern_path(oldname, flags, &oldpath); + err = kern_path(oldname, LOOKUP_NO_SYMLINKS, &oldpath); if (err) { pr_err("cannot get linux path for %s, err = %d\n", oldname, err); @@ -669,7 +660,7 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname, } dentry = kern_path_create(AT_FDCWD, newname, &newpath, - flags | LOOKUP_REVAL); + LOOKUP_NO_SYMLINKS | LOOKUP_REVAL); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); pr_err("path create err for %s, err %d\n", newname, err); @@ -788,7 +779,6 @@ int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp, struct dentry *src_dent, *trap_dent, *src_child; char *dst_name; int err; - int flags; dst_name = extract_last_component(newname); if (!dst_name) @@ -797,12 +787,8 @@ int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp, src_dent_parent = dget_parent(fp->filp->f_path.dentry); src_dent = fp->filp->f_path.dentry; - flags = LOOKUP_DIRECTORY; - if (test_share_config_flag(work->tcon->share_conf, - KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS)) - flags |= LOOKUP_FOLLOW; - - err = kern_path(newname, flags, &dst_path); + err = kern_path(newname, LOOKUP_NO_SYMLINKS | LOOKUP_DIRECTORY, + &dst_path); if (err) { ksmbd_debug(VFS, "Cannot get path for %s [%d]\n", newname, err); goto out; @@ -861,7 +847,7 @@ int ksmbd_vfs_truncate(struct ksmbd_work *work, const char *name, int err = 0; if (name) { - err = kern_path(name, 0, &path); + err = kern_path(name, LOOKUP_NO_SYMLINKS, &path); if (err) { pr_err("cannot get linux path for %s, err %d\n", name, err); From c32dfec6c1c36bbbcd5d33e949d99aeb215877ec Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Sep 2021 13:30:59 +0200 Subject: [PATCH 482/543] USB: serial: cp210x: fix dropped characters with CP2102 Some CP2102 do not support event-insertion mode but return no error when attempting to enable it. This means that any event escape characters in the input stream will not be escaped by the device and consequently regular data may be interpreted as escape sequences and be removed from the stream by the driver. The reporter's device has batch number DCL00X etched into it and as discovered by the SHA2017 Badge team, counterfeit devices with that marking can be detected by sending malformed vendor requests. [1][2] Tests confirm that the possibly counterfeit CP2102 returns a single byte in response to a malformed two-byte part-number request, while an original CP2102 returns two bytes. Assume that every CP2102 that behaves this way also does not support event-insertion mode (e.g. cannot report parity errors). [1] https://mobile.twitter.com/sha2017badge/status/1167902087289532418 [2] https://hackaday.com/2017/08/14/hands-on-with-the-shacamp-2017-badge/#comment-3903376 Reported-by: Malte Di Donato Tested-by: Malte Di Donato Fixes: a7207e9835a4 ("USB: serial: cp210x: add support for line-status events") Cc: stable@vger.kernel.org # 5.9 Link: https://lore.kernel.org/r/20210922113100.20888-1-johan@kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 66a6ac50a4cd..b98454fe08ea 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -258,6 +258,7 @@ struct cp210x_serial_private { speed_t max_speed; bool use_actual_rate; bool no_flow_control; + bool no_event_mode; }; enum cp210x_event_state { @@ -1113,12 +1114,16 @@ static void cp210x_change_speed(struct tty_struct *tty, static void cp210x_enable_event_mode(struct usb_serial_port *port) { + struct cp210x_serial_private *priv = usb_get_serial_data(port->serial); struct cp210x_port_private *port_priv = usb_get_serial_port_data(port); int ret; if (port_priv->event_mode) return; + if (priv->no_event_mode) + return; + port_priv->event_state = ES_DATA; port_priv->event_mode = true; @@ -2074,6 +2079,33 @@ static void cp210x_init_max_speed(struct usb_serial *serial) priv->use_actual_rate = use_actual_rate; } +static void cp2102_determine_quirks(struct usb_serial *serial) +{ + struct cp210x_serial_private *priv = usb_get_serial_data(serial); + u8 *buf; + int ret; + + buf = kmalloc(2, GFP_KERNEL); + if (!buf) + return; + /* + * Some (possibly counterfeit) CP2102 do not support event-insertion + * mode and respond differently to malformed vendor requests. + * Specifically, they return one instead of two bytes when sent a + * two-byte part-number request. + */ + ret = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), + CP210X_VENDOR_SPECIFIC, REQTYPE_DEVICE_TO_HOST, + CP210X_GET_PARTNUM, 0, buf, 2, USB_CTRL_GET_TIMEOUT); + if (ret == 1) { + dev_dbg(&serial->interface->dev, + "device does not support event-insertion mode\n"); + priv->no_event_mode = true; + } + + kfree(buf); +} + static int cp210x_get_fw_version(struct usb_serial *serial, u16 value) { struct cp210x_serial_private *priv = usb_get_serial_data(serial); @@ -2109,6 +2141,9 @@ static void cp210x_determine_type(struct usb_serial *serial) } switch (priv->partnum) { + case CP210X_PARTNUM_CP2102: + cp2102_determine_quirks(serial); + break; case CP210X_PARTNUM_CP2105: case CP210X_PARTNUM_CP2108: cp210x_get_fw_version(serial, CP210X_GET_FW_VER); From 90ca6e7db83a06e9173bee5bb34ded3b37f4948d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Sep 2021 13:31:00 +0200 Subject: [PATCH 483/543] USB: serial: cp210x: add part-number debug printk Add a part-number debug printk to facilitate debugging. Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index b98454fe08ea..fd51498ab108 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -2140,6 +2140,8 @@ static void cp210x_determine_type(struct usb_serial *serial) return; } + dev_dbg(&serial->interface->dev, "partnum = 0x%02x\n", priv->partnum); + switch (priv->partnum) { case CP210X_PARTNUM_CP2102: cp2102_determine_quirks(serial); From 8cd9da85d2bd87ce889043e7b1735723dd10eb89 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 13 Sep 2021 16:53:32 +0200 Subject: [PATCH 484/543] posix-cpu-timers: Prevent spuriously armed 0-value itimer Resetting/stopping an itimer eventually leads to it being reprogrammed with an actual "0" value. As a result the itimer expires on the next tick, triggering an unexpected signal. To fix this, make sure that struct signal_struct::it[CPUCLOCK_PROF/VIRT]::expires is set to 0 when setitimer() passes a 0 it_value, indicating that the timer must stop. Fixes: 406dd42bd1ba ("posix-cpu-timers: Force next expiration recalc after itimer reset") Reported-by: Victor Stinner Reported-by: Chris Hixon Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210913145332.232023-1-frederic@kernel.org --- kernel/time/posix-cpu-timers.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index ee736861b18f..643d412ac623 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1404,7 +1404,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid, } } - *newval += now; + if (*newval) + *newval += now; } /* From 3106a0847525befe3e22fc723909d1b21eb0d520 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 22 Sep 2021 13:25:40 +0300 Subject: [PATCH 485/543] nexthop: Fix memory leaks in nexthop notification chain listeners syzkaller discovered memory leaks [1] that can be reduced to the following commands: # ip nexthop add id 1 blackhole # devlink dev reload pci/0000:06:00.0 As part of the reload flow, mlxsw will unregister its netdevs and then unregister from the nexthop notification chain. Before unregistering from the notification chain, mlxsw will receive delete notifications for nexthop objects using netdevs registered by mlxsw or their uppers. mlxsw will not receive notifications for nexthops using netdevs that are not dismantled as part of the reload flow. For example, the blackhole nexthop above that internally uses the loopback netdev as its nexthop device. One way to fix this problem is to have listeners flush their nexthop tables after unregistering from the notification chain. This is error-prone as evident by this patch and also not symmetric with the registration path where a listener receives a dump of all the existing nexthops. Therefore, fix this problem by replaying delete notifications for the listener being unregistered. This is symmetric to the registration path and also consistent with the netdev notification chain. The above means that unregister_nexthop_notifier(), like register_nexthop_notifier(), will have to take RTNL in order to iterate over the existing nexthops and that any callers of the function cannot hold RTNL. This is true for mlxsw and netdevsim, but not for the VXLAN driver. To avoid a deadlock, change the latter to unregister its nexthop listener without holding RTNL, making it symmetric to the registration path. [1] unreferenced object 0xffff88806173d600 (size 512): comm "syz-executor.0", pid 1290, jiffies 4295583142 (age 143.507s) hex dump (first 32 bytes): 41 9d 1e 60 80 88 ff ff 08 d6 73 61 80 88 ff ff A..`......sa.... 08 d6 73 61 80 88 ff ff 01 00 00 00 00 00 00 00 ..sa............ backtrace: [] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [] slab_post_alloc_hook+0x96/0x490 mm/slab.h:522 [] slab_alloc_node mm/slub.c:3206 [inline] [] slab_alloc mm/slub.c:3214 [inline] [] kmem_cache_alloc_trace+0x163/0x370 mm/slub.c:3231 [] kmalloc include/linux/slab.h:591 [inline] [] kzalloc include/linux/slab.h:721 [inline] [] mlxsw_sp_nexthop_obj_group_create drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:4918 [inline] [] mlxsw_sp_nexthop_obj_new drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:5054 [inline] [] mlxsw_sp_nexthop_obj_event+0x59a/0x2910 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:5239 [] notifier_call_chain+0xbd/0x210 kernel/notifier.c:83 [] blocking_notifier_call_chain kernel/notifier.c:318 [inline] [] blocking_notifier_call_chain+0x72/0xa0 kernel/notifier.c:306 [] call_nexthop_notifiers+0x156/0x310 net/ipv4/nexthop.c:244 [] insert_nexthop net/ipv4/nexthop.c:2336 [inline] [] nexthop_add net/ipv4/nexthop.c:2644 [inline] [] rtm_new_nexthop+0x14e8/0x4d10 net/ipv4/nexthop.c:2913 [] rtnetlink_rcv_msg+0x448/0xbf0 net/core/rtnetlink.c:5572 [] netlink_rcv_skb+0x173/0x480 net/netlink/af_netlink.c:2504 [] rtnetlink_rcv+0x22/0x30 net/core/rtnetlink.c:5590 [] netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] [] netlink_unicast+0x5ae/0x7f0 net/netlink/af_netlink.c:1340 [] netlink_sendmsg+0x8e1/0xe30 net/netlink/af_netlink.c:1929 [] sock_sendmsg_nosec net/socket.c:704 [inline] [] sock_sendmsg net/socket.c:724 [inline] [] ____sys_sendmsg+0x874/0x9f0 net/socket.c:2409 [] ___sys_sendmsg+0x104/0x170 net/socket.c:2463 [] __sys_sendmsg+0x111/0x1f0 net/socket.c:2492 [] __do_sys_sendmsg net/socket.c:2501 [inline] [] __se_sys_sendmsg net/socket.c:2499 [inline] [] __x64_sys_sendmsg+0x7d/0xc0 net/socket.c:2499 Fixes: 2a014b200bbd ("mlxsw: spectrum_router: Add support for nexthop objects") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 2 +- net/ipv4/nexthop.c | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 5a8df5a195cb..141635a35c28 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -4756,12 +4756,12 @@ static void __net_exit vxlan_exit_batch_net(struct list_head *net_list) LIST_HEAD(list); unsigned int h; - rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { struct vxlan_net *vn = net_generic(net, vxlan_net_id); unregister_nexthop_notifier(net, &vn->nexthop_notifier_block); } + rtnl_lock(); list_for_each_entry(net, net_list, exit_list) vxlan_destroy_tunnels(net, &list); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 0e75fd3e57b4..9e8100728d46 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3567,6 +3567,7 @@ static struct notifier_block nh_netdev_notifier = { }; static int nexthops_dump(struct net *net, struct notifier_block *nb, + enum nexthop_event_type event_type, struct netlink_ext_ack *extack) { struct rb_root *root = &net->nexthop.rb_root; @@ -3577,8 +3578,7 @@ static int nexthops_dump(struct net *net, struct notifier_block *nb, struct nexthop *nh; nh = rb_entry(node, struct nexthop, rb_node); - err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh, - extack); + err = call_nexthop_notifier(nb, net, event_type, nh, extack); if (err) break; } @@ -3592,7 +3592,7 @@ int register_nexthop_notifier(struct net *net, struct notifier_block *nb, int err; rtnl_lock(); - err = nexthops_dump(net, nb, extack); + err = nexthops_dump(net, nb, NEXTHOP_EVENT_REPLACE, extack); if (err) goto unlock; err = blocking_notifier_chain_register(&net->nexthop.notifier_chain, @@ -3605,8 +3605,17 @@ EXPORT_SYMBOL(register_nexthop_notifier); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) { - return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain, - nb); + int err; + + rtnl_lock(); + err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain, + nb); + if (err) + goto unlock; + nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL); +unlock: + rtnl_unlock(); + return err; } EXPORT_SYMBOL(unregister_nexthop_notifier); From 5b099870c8e0eb026a1560894d94f827832491cf Mon Sep 17 00:00:00 2001 From: Guvenc Gulce Date: Wed, 22 Sep 2021 19:21:29 +0200 Subject: [PATCH 486/543] MAINTAINERS: remove Guvenc Gulce as net/smc maintainer Remove myself as net/smc maintainer, as I am leaving IBM soon and can not maintain net/smc anymore. Cc: Julian Wiedmann Acked-by: Karsten Graul Signed-off-by: Guvenc Gulce Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index eeb4c70b3d5b..3c814976443e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16955,7 +16955,6 @@ F: drivers/misc/sgi-xp/ SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS M: Karsten Graul -M: Guvenc Gulce L: linux-s390@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ From 31339440b2d0a4987030aac026adbaba44e22490 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 22 Sep 2021 19:30:37 +0100 Subject: [PATCH 487/543] nfc: st-nci: Add SPI ID matching DT compatible Currently autoloading for SPI devices does not use the DT ID table, it uses SPI modalises. Supporting OF modalises is going to be difficult if not impractical, an attempt was made but has been reverted, so ensure that module autoloading works for this driver by adding the part name used in the compatible to the list of SPI IDs. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Mark Brown Signed-off-by: David S. Miller --- drivers/nfc/st-nci/spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c index a620c34790e6..0875b773fb41 100644 --- a/drivers/nfc/st-nci/spi.c +++ b/drivers/nfc/st-nci/spi.c @@ -278,6 +278,7 @@ static int st_nci_spi_remove(struct spi_device *dev) static struct spi_device_id st_nci_spi_id_table[] = { {ST_NCI_SPI_DRIVER_NAME, 0}, + {"st21nfcb-spi", 0}, {} }; MODULE_DEVICE_TABLE(spi, st_nci_spi_id_table); From e68daf61ed13832aef8892200a874139700ca754 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 22 Sep 2021 16:55:48 -0700 Subject: [PATCH 488/543] net: ethernet: mtk_eth_soc: avoid creating duplicate offload entries Sometimes multiple CLS_REPLACE calls are issued for the same connection. rhashtable_insert_fast does not check for these duplicates, so multiple hardware flow entries can be created. Fix this by checking for an existing entry early Fixes: 502e84e2382d ("net: ethernet: mtk_eth_soc: add flow offloading support") Signed-off-by: Felix Fietkau Signed-off-by: Ilya Lipnitskiy Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index b5f68f66d42a..7bb1f20002b5 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -186,6 +186,9 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) int hash; int i; + if (rhashtable_lookup(ð->flow_table, &f->cookie, mtk_flow_ht_params)) + return -EEXIST; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { struct flow_match_meta match; From acc64f52afac15e9e44d9b5253271346841786e0 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 22 Sep 2021 19:03:38 -0700 Subject: [PATCH 489/543] net: mscc: ocelot: fix forwarding from BLOCKING ports remaining enabled The blamed commit made the fatally incorrect assumption that ports which aren't in the FORWARDING STP state should not have packets forwarded towards them, and that is all that needs to be done. However, that logic alone permits BLOCKING ports to forward to FORWARDING ports, which of course allows packet storms to occur when there is an L2 loop. The ocelot_get_bridge_fwd_mask should not only ask "what can the bridge do for you", but "what can you do for the bridge". This way, only FORWARDING ports forward to the other FORWARDING ports from the same bridging domain, and we are still compatible with the idea of multiple bridges. Fixes: df291e54ccca ("net: ocelot: support multiple bridges") Suggested-by: Colin Foster Reported-by: Colin Foster Signed-off-by: Vladimir Oltean Signed-off-by: Colin Foster Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 729ba826ba17..559177e6ded4 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1293,14 +1293,19 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond, return mask; } -static u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot, +static u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot, int src_port, struct net_device *bridge) { + struct ocelot_port *ocelot_port = ocelot->ports[src_port]; u32 mask = 0; int port; + if (!ocelot_port || ocelot_port->bridge != bridge || + ocelot_port->stp_state != BR_STATE_FORWARDING) + return 0; + for (port = 0; port < ocelot->num_phys_ports; port++) { - struct ocelot_port *ocelot_port = ocelot->ports[port]; + ocelot_port = ocelot->ports[port]; if (!ocelot_port) continue; @@ -1366,7 +1371,7 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot) struct net_device *bridge = ocelot_port->bridge; struct net_device *bond = ocelot_port->bond; - mask = ocelot_get_bridge_fwd_mask(ocelot, bridge); + mask = ocelot_get_bridge_fwd_mask(ocelot, port, bridge); mask |= cpu_fwd_mask; mask &= ~BIT(port); if (bond) { From fdbccea419dc782079ce5881d2705cc9e3881480 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 23 Sep 2021 09:51:45 +0300 Subject: [PATCH 490/543] net/mlx4_en: Don't allow aRFS for encapsulated packets Driver doesn't support aRFS for encapsulated packets, return early error in such a case. Fixes: 1eb8c695bda9 ("net/mlx4_en: Add accelerated RFS support") Signed-off-by: Aya Levin Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 35154635ec3a..8af7f2827322 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -372,6 +372,9 @@ mlx4_en_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, int nhoff = skb_network_offset(skb); int ret = 0; + if (skb->encapsulation) + return -EPROTONOSUPPORT; + if (skb->protocol != htons(ETH_P_IP)) return -EPROTONOSUPPORT; From 4d88c339c423eefe2fd48215016cb0c75fcb4c4d Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 23 Sep 2021 03:16:05 -0700 Subject: [PATCH 491/543] atlantic: Fix issue in the pm resume flow. After fixing hibernation resume flow, another usecase was found which should be explicitly handled - resume when device is in "down" state. Invoke aq_nic_init jointly with aq_nic_start only if ndev was already up during suspend/hibernate. We still need to perform nic_deinit() if caller requests for it, to handle the freeze/resume scenarios. Fixes: 57f780f1c433 ("atlantic: Fix driver resume flow.") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index dee9ff74d6d6..d4b1976ee69b 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -413,13 +413,13 @@ static int atl_resume_common(struct device *dev, bool deep) if (deep) { /* Reinitialize Nic/Vecs objects */ aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); - - ret = aq_nic_init(nic); - if (ret) - goto err_exit; } if (netif_running(nic->ndev)) { + ret = aq_nic_init(nic); + if (ret) + goto err_exit; + ret = aq_nic_start(nic); if (ret) goto err_exit; From 22b70e6f2da0a4c8b1421b00cfc3016bc9d4d9d4 Mon Sep 17 00:00:00 2001 From: dann frazier Date: Thu, 23 Sep 2021 08:50:02 -0600 Subject: [PATCH 492/543] arm64: Restore forced disabling of KPTI on ThunderX A noted side-effect of commit 0c6c2d3615ef ("arm64: Generate cpucaps.h") is that cpucaps are now sorted, changing the enumeration order. This assumed no dependencies between cpucaps, which turned out not to be true in one case. UNMAP_KERNEL_AT_EL0 currently needs to be processed after WORKAROUND_CAVIUM_27456. ThunderX systems are incompatible with KPTI, so unmap_kernel_at_el0() bails if WORKAROUND_CAVIUM_27456 is set. But because of the sorting, WORKAROUND_CAVIUM_27456 will not yet have been considered when unmap_kernel_at_el0() checks for it, so the kernel tries to run w/ KPTI - and quickly falls over. Because all ThunderX implementations have homogeneous CPUs, we can remove this dependency by just checking the current CPU for the erratum. Fixes: 0c6c2d3615ef ("arm64: Generate cpucaps.h") Cc: # 5.13.x Signed-off-by: dann frazier Suggested-by: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Reviewed-by: Mark Brown Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20210923145002.3394558-1-dann.frazier@canonical.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f8a3067d10c6..6ec7036ef7e1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1526,9 +1526,13 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, /* * For reasons that aren't entirely clear, enabling KPTI on Cavium * ThunderX leads to apparent I-cache corruption of kernel text, which - * ends as well as you might imagine. Don't even try. + * ends as well as you might imagine. Don't even try. We cannot rely + * on the cpus_have_*cap() helpers here to detect the CPU erratum + * because cpucap detection order may change. However, since we know + * affected CPUs are always in a homogeneous configuration, it is + * safe to rely on this_cpu_has_cap() here. */ - if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456)) { + if (this_cpu_has_cap(ARM64_WORKAROUND_CAVIUM_27456)) { str = "ARM64_WORKAROUND_CAVIUM_27456"; __kpti_forced = -1; } From 93368aab0efc87288cac65e99c9ed2e0ffc9e7d0 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 21 Sep 2021 22:35:30 +0800 Subject: [PATCH 493/543] erofs: fix up erofs_lookup tracepoint Fix up a misuse that the filename pointer isn't always valid in the ring buffer, and we should copy the content instead. Link: https://lore.kernel.org/r/20210921143531.81356-1-hsiangkao@linux.alibaba.com Fixes: 13f06f48f7bf ("staging: erofs: support tracepoint") Cc: stable@vger.kernel.org # 4.19+ Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- include/trace/events/erofs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index bf9806fd1306..db4f2cec8360 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -35,20 +35,20 @@ TRACE_EVENT(erofs_lookup, TP_STRUCT__entry( __field(dev_t, dev ) __field(erofs_nid_t, nid ) - __field(const char *, name ) + __string(name, dentry->d_name.name ) __field(unsigned int, flags ) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->nid = EROFS_I(dir)->nid; - __entry->name = dentry->d_name.name; + __assign_str(name, dentry->d_name.name); __entry->flags = flags; ), TP_printk("dev = (%d,%d), pnid = %llu, name:%s, flags:%x", show_dev_nid(__entry), - __entry->name, + __get_str(name), __entry->flags) ); From d705117ddd724a9d4877e338e4587010ab6a1c62 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Wed, 22 Sep 2021 17:51:41 +0800 Subject: [PATCH 494/543] erofs: fix misbehavior of unsupported chunk format check Unsupported chunk format should be checked with "if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL)" Found when checking with 4k-byte blockmap (although currently mkfs uses inode chunk indexes format by default.) Link: https://lore.kernel.org/r/20210922095141.233938-1-hsiangkao@linux.alibaba.com Fixes: c5aa903a59db ("erofs: support reading chunk-based uncompressed files") Reviewed-by: Liu Bo Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- fs/erofs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 31ac3a73b390..a552399e211d 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -176,7 +176,7 @@ static struct page *erofs_read_inode(struct inode *inode, } if (vi->datalayout == EROFS_INODE_CHUNK_BASED) { - if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_ALL)) { + if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL) { erofs_err(inode->i_sb, "unsupported chunk format %x of nid %llu", vi->chunkformat, vi->nid); From c40dd3ca2a45d5bd6e8b3f4ace5cb81493096263 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Tue, 14 Sep 2021 11:59:15 +0800 Subject: [PATCH 495/543] erofs: clear compacted_2b if compacted_4b_initial > totalidx Currently, the whole indexes will only be compacted 4B if compacted_4b_initial > totalidx. So, the calculated compacted_2b is worthless for that case. It may waste CPU resources. No need to update compacted_4b_initial as mkfs since it's used to fulfill the alignment of the 1st compacted_2b pack and would handle the case above. We also need to clarify compacted_4b_end here. It's used for the last lclusters which aren't fitted in the previous compacted_2b packs. Some messages are from Xiang. Link: https://lore.kernel.org/r/20210914035915.1190-1-zbestahu@gmail.com Signed-off-by: Yue Hu Reviewed-by: Gao Xiang Reviewed-by: Chao Yu [ Gao Xiang: it's enough to use "compacted_4b_initial < totalidx". ] Signed-off-by: Gao Xiang --- fs/erofs/zmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 9fb98d85a3ce..7a6df35fdc91 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -369,7 +369,8 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, if (compacted_4b_initial == 32 / 4) compacted_4b_initial = 0; - if (vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) + if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) && + compacted_4b_initial < totalidx) compacted_2b = rounddown(totalidx - compacted_4b_initial, 16); else compacted_2b = 0; From a3727a8bac0a9e77c70820655fd8715523ba3db7 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 23 Sep 2021 09:50:11 -0400 Subject: [PATCH 496/543] selinux,smack: fix subjective/objective credential use mixups Jann Horn reported a problem with commit eb1231f73c4d ("selinux: clarify task subjective and objective credentials") where some LSM hooks were attempting to access the subjective credentials of a task other than the current task. Generally speaking, it is not safe to access another task's subjective credentials and doing so can cause a number of problems. Further, while looking into the problem, I realized that Smack was suffering from a similar problem brought about by a similar commit 1fb057dcde11 ("smack: differentiate between subjective and objective task credentials"). This patch addresses this problem by restoring the use of the task's objective credentials in those cases where the task is other than the current executing task. Not only does this resolve the problem reported by Jann, it is arguably the correct thing to do in these cases. Cc: stable@vger.kernel.org Fixes: eb1231f73c4d ("selinux: clarify task subjective and objective credentials") Fixes: 1fb057dcde11 ("smack: differentiate between subjective and objective task credentials") Reported-by: Jann Horn Acked-by: Eric W. Biederman Acked-by: Casey Schaufler Signed-off-by: Paul Moore --- security/selinux/hooks.c | 4 ++-- security/smack/smack_lsm.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6517f221d52c..e7ebd45ca345 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2157,7 +2157,7 @@ static int selinux_ptrace_access_check(struct task_struct *child, static int selinux_ptrace_traceme(struct task_struct *parent) { return avc_has_perm(&selinux_state, - task_sid_subj(parent), task_sid_obj(current), + task_sid_obj(parent), task_sid_obj(current), SECCLASS_PROCESS, PROCESS__PTRACE, NULL); } @@ -6222,7 +6222,7 @@ static int selinux_msg_queue_msgrcv(struct kern_ipc_perm *msq, struct msg_msg *m struct ipc_security_struct *isec; struct msg_security_struct *msec; struct common_audit_data ad; - u32 sid = task_sid_subj(target); + u32 sid = task_sid_obj(target); int rc; isec = selinux_ipc(msq); diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index cacbe7518519..21a0e7c3b8de 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2016,7 +2016,7 @@ static int smk_curacc_on_task(struct task_struct *p, int access, const char *caller) { struct smk_audit_info ad; - struct smack_known *skp = smk_of_task_struct_subj(p); + struct smack_known *skp = smk_of_task_struct_obj(p); int rc; smk_ad_init(&ad, caller, LSM_AUDIT_DATA_TASK); @@ -3480,7 +3480,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) */ static int smack_getprocattr(struct task_struct *p, char *name, char **value) { - struct smack_known *skp = smk_of_task_struct_subj(p); + struct smack_known *skp = smk_of_task_struct_obj(p); char *cp; int slen; From 1f828223b7991a228bc2aef837b78737946d44b2 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 22 Sep 2021 15:49:06 -0700 Subject: [PATCH 497/543] memcg: flush lruvec stats in the refault MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior to the commit 7e1c0d6f5820 ("memcg: switch lruvec stats to rstat") and the commit aa48e47e3906 ("memcg: infrastructure to flush memcg stats"), each lruvec memcg stats can be off by (nr_cgroups * nr_cpus * 32) at worst and for unbounded amount of time. The commit aa48e47e3906 moved the lruvec stats to rstat infrastructure and the commit 7e1c0d6f5820 bounded the error for all the lruvec stats to (nr_cpus * 32) at worst for at most 2 seconds. More specifically it decoupled the number of stats and the number of cgroups from the error rate. However this reduction in error comes with the cost of triggering the slowpath of stats update more frequently. Previously in the slowpath the kernel adds the stats up the memcg tree. After aa48e47e3906, the kernel triggers the asyn lruvec stats flush through queue_work(). This causes regression reports from 0day kernel bot [1] as well as from phoronix test suite [2]. We tried two options to fix the regression: 1) Increase the threshold to trigger the slowpath in lruvec stats update codepath from 32 to 512. 2) Remove the slowpath from lruvec stats update codepath and instead flush the stats in the page refault codepath. The assumption is that the kernel timely flush the stats, so, the update tree would be small in the refault codepath to not cause the preformance impact. Following are the results of will-it-scale/page_fault[1|2|3] benchmark on four settings i.e. (1) 5.15-rc1 as baseline (2) 5.15-rc1 with aa48e47e3906 and 7e1c0d6f5820 reverted (3) 5.15-rc1 with option-1 (4) 5.15-rc1 with option-2. test (1) (2) (3) (4) pg_f1 368563 406277 (10.23%) 399693 (8.44%) 416398 (12.97%) pg_f2 338399 372133 (9.96%) 369180 (9.09%) 381024 (12.59%) pg_f3 500853 575399 (14.88%) 570388 (13.88%) 576083 (15.02%) From the above result, it seems like the option-2 not only solves the regression but also improves the performance for at least these benchmarks. Feng Tang (intel) ran the aim7 benchmark with these two options and confirms that option-1 reduces the regression but option-2 removes the regression. Michael Larabel (phoronix) ran multiple benchmarks with these options and reported the results at [3] and it shows for most benchmarks option-2 removes the regression introduced by the commit aa48e47e3906 ("memcg: infrastructure to flush memcg stats"). Based on the experiment results, this patch proposed the option-2 as the solution to resolve the regression. Link: https://lore.kernel.org/all/20210726022421.GB21872@xsang-OptiPlex-9020 [1] Link: https://www.phoronix.com/scan.php?page=article&item=linux515-compile-regress [2] Link: https://openbenchmarking.org/result/2109226-DEBU-LINUX5104 [3] Fixes: aa48e47e3906 ("memcg: infrastructure to flush memcg stats") Signed-off-by: Shakeel Butt Tested-by: Michael Larabel Cc: Johannes Weiner Cc: Roman Gushchin Cc: Feng Tang Cc: Michal Hocko Cc: Hillf Danton , Cc: Michal Koutný Cc: Andrew Morton , Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 10 ---------- mm/workingset.c | 1 + 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b762215d73eb..6da5020a8656 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -106,9 +106,6 @@ static bool do_memsw_account(void) /* memcg and lruvec stats flushing */ static void flush_memcg_stats_dwork(struct work_struct *w); static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork); -static void flush_memcg_stats_work(struct work_struct *w); -static DECLARE_WORK(stats_flush_work, flush_memcg_stats_work); -static DEFINE_PER_CPU(unsigned int, stats_flush_threshold); static DEFINE_SPINLOCK(stats_flush_lock); #define THRESHOLDS_EVENTS_TARGET 128 @@ -682,8 +679,6 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, /* Update lruvec */ __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val); - if (!(__this_cpu_inc_return(stats_flush_threshold) % MEMCG_CHARGE_BATCH)) - queue_work(system_unbound_wq, &stats_flush_work); } /** @@ -5361,11 +5356,6 @@ static void flush_memcg_stats_dwork(struct work_struct *w) queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ); } -static void flush_memcg_stats_work(struct work_struct *w) -{ - mem_cgroup_flush_stats(); -} - static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); diff --git a/mm/workingset.c b/mm/workingset.c index d4268d8e9a82..d5b81e4f4cbe 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -352,6 +352,7 @@ void workingset_refault(struct page *page, void *shadow) inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file); + mem_cgroup_flush_stats(); /* * Compare the distance to the existing workingset size. We * don't activate pages that couldn't stay resident even if From 12064c1768439fa0882547010afae6b52aafa7af Mon Sep 17 00:00:00 2001 From: Jia He Date: Thu, 23 Sep 2021 11:35:57 +0800 Subject: [PATCH 498/543] Revert "ACPI: Add memory semantics to acpi_os_map_memory()" This reverts commit 437b38c51162f8b87beb28a833c4d5dc85fa864e. The memory semantics added in commit 437b38c51162 causes SystemMemory Operation region, whose address range is not described in the EFI memory map to be mapped as NormalNC memory on arm64 platforms (through acpi_os_map_memory() in acpi_ex_system_memory_space_handler()). This triggers the following abort on an ARM64 Ampere eMAG machine, because presumably the physical address range area backing the Opregion does not support NormalNC memory attributes driven on the bus. Internal error: synchronous external abort: 96000410 [#1] SMP Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0+ #462 Hardware name: MiTAC RAPTOR EV-883832-X3-0001/RAPTOR, BIOS 0.14 02/22/2019 pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [...snip...] Call trace: acpi_ex_system_memory_space_handler+0x26c/0x2c8 acpi_ev_address_space_dispatch+0x228/0x2c4 acpi_ex_access_region+0x114/0x268 acpi_ex_field_datum_io+0x128/0x1b8 acpi_ex_extract_from_field+0x14c/0x2ac acpi_ex_read_data_from_field+0x190/0x1b8 acpi_ex_resolve_node_to_value+0x1ec/0x288 acpi_ex_resolve_to_value+0x250/0x274 acpi_ds_evaluate_name_path+0xac/0x124 acpi_ds_exec_end_op+0x90/0x410 acpi_ps_parse_loop+0x4ac/0x5d8 acpi_ps_parse_aml+0xe0/0x2c8 acpi_ps_execute_method+0x19c/0x1ac acpi_ns_evaluate+0x1f8/0x26c acpi_ns_init_one_device+0x104/0x140 acpi_ns_walk_namespace+0x158/0x1d0 acpi_ns_initialize_devices+0x194/0x218 acpi_initialize_objects+0x48/0x50 acpi_init+0xe0/0x498 If the Opregion address range is not present in the EFI memory map there is no way for us to determine the memory attributes to use to map it - defaulting to NormalNC does not work (and it is not correct on a memory region that may have read side-effects) and therefore commit 437b38c51162 should be reverted, which means reverting back to the original behavior whereby address ranges that are mapped using acpi_os_map_memory() default to the safe devicenGnRnE attributes on ARM64 if the mapped address range is not defined in the EFI memory map. Fixes: 437b38c51162 ("ACPI: Add memory semantics to acpi_os_map_memory()") Signed-off-by: Jia He Acked-by: Lorenzo Pieralisi Acked-by: Catalin Marinas Signed-off-by: Rafael J. Wysocki --- arch/arm64/include/asm/acpi.h | 3 --- arch/arm64/kernel/acpi.c | 19 +++---------------- drivers/acpi/osl.c | 23 +++++++---------------- include/acpi/acpi_io.h | 8 -------- 4 files changed, 10 insertions(+), 43 deletions(-) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 7535dc7cc5aa..bd68e1b7f29f 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -50,9 +50,6 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); #define acpi_os_ioremap acpi_os_ioremap -void __iomem *acpi_os_memmap(acpi_physical_address phys, acpi_size size); -#define acpi_os_memmap acpi_os_memmap - typedef u64 phys_cpuid_t; #define PHYS_CPUID_INVALID INVALID_HWID diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 1c9c2f7a1c04..f3851724fe35 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -273,8 +273,7 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) return __pgprot(PROT_DEVICE_nGnRnE); } -static void __iomem *__acpi_os_ioremap(acpi_physical_address phys, - acpi_size size, bool memory) +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { efi_memory_desc_t *md, *region = NULL; pgprot_t prot; @@ -300,11 +299,9 @@ static void __iomem *__acpi_os_ioremap(acpi_physical_address phys, * It is fine for AML to remap regions that are not represented in the * EFI memory map at all, as it only describes normal memory, and MMIO * regions that require a virtual mapping to make them accessible to - * the EFI runtime services. Determine the region default - * attributes by checking the requested memory semantics. + * the EFI runtime services. */ - prot = memory ? __pgprot(PROT_NORMAL_NC) : - __pgprot(PROT_DEVICE_nGnRnE); + prot = __pgprot(PROT_DEVICE_nGnRnE); if (region) { switch (region->type) { case EFI_LOADER_CODE: @@ -364,16 +361,6 @@ static void __iomem *__acpi_os_ioremap(acpi_physical_address phys, return __ioremap(phys, size, prot); } -void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) -{ - return __acpi_os_ioremap(phys, size, false); -} - -void __iomem *acpi_os_memmap(acpi_physical_address phys, acpi_size size) -{ - return __acpi_os_ioremap(phys, size, true); -} - /* * Claim Synchronous External Aborts as a firmware first notification. * diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index a43f1521efe6..45c5c0e45e33 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -284,8 +284,7 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size) #define should_use_kmap(pfn) page_is_ram(pfn) #endif -static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz, - bool memory) +static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz) { unsigned long pfn; @@ -295,8 +294,7 @@ static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz, return NULL; return (void __iomem __force *)kmap(pfn_to_page(pfn)); } else - return memory ? acpi_os_memmap(pg_off, pg_sz) : - acpi_os_ioremap(pg_off, pg_sz); + return acpi_os_ioremap(pg_off, pg_sz); } static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr) @@ -311,10 +309,9 @@ static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr) } /** - * __acpi_os_map_iomem - Get a virtual address for a given physical address range. + * acpi_os_map_iomem - Get a virtual address for a given physical address range. * @phys: Start of the physical address range to map. * @size: Size of the physical address range to map. - * @memory: true if remapping memory, false if IO * * Look up the given physical address range in the list of existing ACPI memory * mappings. If found, get a reference to it and return a pointer to it (its @@ -324,8 +321,8 @@ static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr) * During early init (when acpi_permanent_mmap has not been set yet) this * routine simply calls __acpi_map_table() to get the job done. */ -static void __iomem __ref -*__acpi_os_map_iomem(acpi_physical_address phys, acpi_size size, bool memory) +void __iomem __ref +*acpi_os_map_iomem(acpi_physical_address phys, acpi_size size) { struct acpi_ioremap *map; void __iomem *virt; @@ -356,7 +353,7 @@ static void __iomem __ref pg_off = round_down(phys, PAGE_SIZE); pg_sz = round_up(phys + size, PAGE_SIZE) - pg_off; - virt = acpi_map(phys, size, memory); + virt = acpi_map(phys, size); if (!virt) { mutex_unlock(&acpi_ioremap_lock); kfree(map); @@ -375,17 +372,11 @@ out: mutex_unlock(&acpi_ioremap_lock); return map->virt + (phys - map->phys); } - -void __iomem *__ref -acpi_os_map_iomem(acpi_physical_address phys, acpi_size size) -{ - return __acpi_os_map_iomem(phys, size, false); -} EXPORT_SYMBOL_GPL(acpi_os_map_iomem); void *__ref acpi_os_map_memory(acpi_physical_address phys, acpi_size size) { - return (void *)__acpi_os_map_iomem(phys, size, true); + return (void *)acpi_os_map_iomem(phys, size); } EXPORT_SYMBOL_GPL(acpi_os_map_memory); diff --git a/include/acpi/acpi_io.h b/include/acpi/acpi_io.h index a0212e67d6f4..027faa8883aa 100644 --- a/include/acpi/acpi_io.h +++ b/include/acpi/acpi_io.h @@ -14,14 +14,6 @@ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys, } #endif -#ifndef acpi_os_memmap -static inline void __iomem *acpi_os_memmap(acpi_physical_address phys, - acpi_size size) -{ - return ioremap_cache(phys, size); -} -#endif - extern bool acpi_permanent_mmap; void __iomem __ref From 03ab9cb982b622239cc2542ce7617b98a9ea159e Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 20 Sep 2021 13:14:15 +0100 Subject: [PATCH 499/543] cifs: Deal with some warnings from W=1 Deal with some warnings generated from make W=1: (1) Add/remove/fix kerneldoc parameters descriptions. (2) Turn cifs' rqst_page_get_length()'s banner comment into a kerneldoc comment. It should probably be prefixed with "cifs_" though. Signed-off-by: David Howells Signed-off-by: Steve French --- fs/cifs/misc.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 03da00eb7c04..f2916b51652a 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -590,6 +590,7 @@ void cifs_put_writer(struct cifsInodeInfo *cinode) /** * cifs_queue_oplock_break - queue the oplock break handler for cfile + * @cfile: The file to break the oplock on * * This function is called from the demultiplex thread when it * receives an oplock break for @cfile. @@ -1065,6 +1066,9 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw) /** * cifs_alloc_hash - allocate hash and hash context together + * @name: The name of the crypto hash algo + * @shash: Where to put the pointer to the hash algo + * @sdesc: Where to put the pointer to the hash descriptor * * The caller has to make sure @sdesc is initialized to either NULL or * a valid context. Both can be freed via cifs_free_hash(). @@ -1103,6 +1107,8 @@ cifs_alloc_hash(const char *name, /** * cifs_free_hash - free hash and hash context together + * @shash: Where to find the pointer to the hash algo + * @sdesc: Where to find the pointer to the hash descriptor * * Freeing a NULL hash or context is safe. */ @@ -1118,8 +1124,10 @@ cifs_free_hash(struct crypto_shash **shash, struct sdesc **sdesc) /** * rqst_page_get_length - obtain the length and offset for a page in smb_rqst - * Input: rqst - a smb_rqst, page - a page index for rqst - * Output: *len - the length for this page, *offset - the offset for this page + * @rqst: The request descriptor + * @page: The index of the page to query + * @len: Where to store the length for this page: + * @offset: Where to store the offset for this page */ void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page, unsigned int *len, unsigned int *offset) @@ -1152,6 +1160,8 @@ void extract_unc_hostname(const char *unc, const char **h, size_t *len) /** * copy_path_name - copy src path to dst, possibly truncating + * @dst: The destination buffer + * @src: The source name * * returns number of bytes written (including trailing nul) */ From c48977f020d5846215e2ff7e8172e7b46b3d64b4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 20 Sep 2021 14:16:00 +0200 Subject: [PATCH 500/543] drm/amd/display: fix empty debug macros Using an empty macro expansion as a conditional expression produces a W=1 warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dce/dce_aux.c: In function 'dce_aux_transfer_with_retries': drivers/gpu/drm/amd/amdgpu/../display/dc/dce/dce_aux.c:775:156: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body] 775 | "dce_aux_transfer_with_retries: AUX_RET_SUCCESS: AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER"); | ^ drivers/gpu/drm/amd/amdgpu/../display/dc/dce/dce_aux.c:783:155: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body] 783 | "dce_aux_transfer_with_retries: AUX_RET_SUCCESS: AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK"); | ^ Expand it to "do { } while (0)" instead to make the expression more robust and avoid the warning. Fixes: 56aca2309301 ("drm/amd/display: Add AUX I2C tracing.") Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index e14f99b4b0c3..3c3347341103 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -42,7 +42,7 @@ #define DC_LOGGER \ engine->ctx->logger -#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */ +#define DC_TRACE_LEVEL_MESSAGE(...) do { } while (0) #define IS_DC_I2CAUX_LOGGING_ENABLED() (false) #define LOG_FLAG_Error_I2cAux LOG_ERROR #define LOG_FLAG_I2cAux_DceAux LOG_I2C_AUX @@ -76,7 +76,7 @@ enum { #define DEFAULT_AUX_ENGINE_MULT 0 #define DEFAULT_AUX_ENGINE_LENGTH 69 -#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */ +#define DC_TRACE_LEVEL_MESSAGE(...) do { } while (0) static void release_engine( struct dce_aux *engine) From 6de0653f7719bd0aa61f683fba16ae0e2c4ead64 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 17 Sep 2021 12:05:30 -0400 Subject: [PATCH 501/543] MAINTAINERS: fix up entry for AMD Powerplay Fix the path to cover both the older powerplay infrastructure and the newer SwSMU infrastructure. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ca6d6fde85cf..556957c5aa5c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -977,12 +977,12 @@ L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/amd-pmc.* -AMD POWERPLAY +AMD POWERPLAY AND SWSMU M: Evan Quan L: amd-gfx@lists.freedesktop.org S: Supported T: git https://gitlab.freedesktop.org/agd5f/linux.git -F: drivers/gpu/drm/amd/pm/powerplay/ +F: drivers/gpu/drm/amd/pm/ AMD PTDMA DRIVER M: Sanjay R Mehta From 7beb26dcedaa977ece5be7c712a66b7b6c66fc2b Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 13 Sep 2021 10:03:36 -0400 Subject: [PATCH 502/543] drm/amdkfd: SVM map to gpus check vma boundary SVM range may includes multiple VMAs with different vm_flags, if prange page index is the last page of the VMA offset + npages, update GPU mapping to create GPU page table with same VMA access permission. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 9fc8021bb0ab..432d5aae0962 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1178,7 +1178,11 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, for (i = offset; i < offset + npages; i++) { last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN; dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN; - if ((prange->start + i) < prange->last && + + /* Collect all pages in the same address range and memory domain + * that can be mapped with a single call to update mapping. + */ + if (i < offset + npages - 1 && last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN)) continue; From f63251184a81039ebc805306505838c2a073e51a Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 14 Sep 2021 16:33:40 -0400 Subject: [PATCH 503/543] drm/amdkfd: fix dma mapping leaking warning For xnack off, restore work dma unmap previous system memory page, and dma map the updated system memory page to update GPU mapping, this is not dma mapping leaking, remove the WARN_ONCE for dma mapping leaking. prange->dma_addr store the VRAM page pfn after the range migrated to VRAM, should not dma unmap VRAM page when updating GPU mapping or remove prange. Add helper svm_is_valid_dma_mapping_addr to check VRAM page and error cases. Mask out SVM_RANGE_VRAM_DOMAIN flag in dma_addr before calling amdgpu vm update to avoid BUG_ON(*addr & 0xFFFF00000000003FULL), and set it again immediately after. This flag is used to know the type of page later to dma unmapping system memory page. Fixes: 1d5dbfe6c06a ("drm/amdkfd: classify and map mixed svm range pages in GPU") Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 432d5aae0962..9d0f65a90002 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -118,6 +118,13 @@ static void svm_range_remove_notifier(struct svm_range *prange) mmu_interval_notifier_remove(&prange->notifier); } +static bool +svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr && !dma_mapping_error(dev, dma_addr) && + !(dma_addr & SVM_RANGE_VRAM_DOMAIN); +} + static int svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, unsigned long offset, unsigned long npages, @@ -139,8 +146,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, addr += offset; for (i = 0; i < npages; i++) { - if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]), - "leaking dma mapping\n")) + if (svm_is_valid_dma_mapping_addr(dev, addr[i])) dma_unmap_page(dev, addr[i], PAGE_SIZE, dir); page = hmm_pfn_to_page(hmm_pfns[i]); @@ -209,7 +215,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, return; for (i = offset; i < offset + npages; i++) { - if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) + if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i])) continue; pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT); dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); @@ -1165,7 +1171,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned long last_start; int last_domain; int r = 0; - int64_t i; + int64_t i, j; last_start = prange->start + offset; @@ -1205,6 +1211,10 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, NULL, dma_addr, &vm->last_update, &table_freed); + + for (j = last_start - prange->start; j <= i; j++) + dma_addr[j] |= last_domain; + if (r) { pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start); goto out; From ab39d3cef526ba09c4c6923b4cd7e6ec1c5d4faa Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 23 Sep 2021 11:58:43 +0800 Subject: [PATCH 504/543] drm/amd/pm: Update intermediate power state for SI Update the current state as boot state during dpm initialization. During the subsequent initialization, set_power_state gets called to transition to the final power state. set_power_state refers to values from the current state and without current state populated, it could result in NULL pointer dereference. For ex: on platforms where PCI speed change is supported through ACPI ATCS method, the link speed of current state needs to be queried before deciding on changing to final power state's link speed. The logic to query ATCS-support was broken on certain platforms. The issue became visible when broken ATCS-support logic got fixed with commit f9b7f3703ff9 ("drm/amdgpu/acpi: make ATPX/ATCS structures global (v2)"). Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1698 Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/powerplay/si_dpm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c index bdbbeb959c68..81f82aa05ec2 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c @@ -6867,6 +6867,8 @@ static int si_dpm_enable(struct amdgpu_device *adev) si_enable_auto_throttle_source(adev, AMDGPU_DPM_AUTO_THROTTLE_SRC_THERMAL, true); si_thermal_start_thermal_controller(adev); + ni_update_current_ps(adev, boot_ps); + return 0; } From 7d6687200a939176847090bbde5cb79a82792a2f Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 17 Sep 2021 14:32:14 -0400 Subject: [PATCH 505/543] drm/amdkfd: handle svm migrate init error If svm migration init failed to create pgmap for device memory, set pgmap type to 0 to disable device SVM support capability. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index dab290a4d19d..165e0ebb619d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -894,6 +894,9 @@ int svm_migrate_init(struct amdgpu_device *adev) r = devm_memremap_pages(adev->dev, pgmap); if (IS_ERR(r)) { pr_err("failed to register HMM device memory\n"); + + /* Disable SVM support capability */ + pgmap->type = 0; devm_release_mem_region(adev->dev, res->start, res->end - res->start + 1); return PTR_ERR(r); From 197ae17722e989942b36e33e044787877f158574 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 20 Sep 2021 17:25:52 -0400 Subject: [PATCH 506/543] drm/amdkfd: fix svm_migrate_fini warning Device manager releases device-specific resources when a driver disconnects from a device, devm_memunmap_pages and devm_release_mem_region calls in svm_migrate_fini are redundant. It causes below warning trace after patch "drm/amdgpu: Split amdgpu_device_fini into early and late", so remove function svm_migrate_fini. BUG: https://gitlab.freedesktop.org/drm/amd/-/issues/1718 WARNING: CPU: 1 PID: 3646 at drivers/base/devres.c:795 devm_release_action+0x51/0x60 Call Trace: ? memunmap_pages+0x360/0x360 svm_migrate_fini+0x2d/0x60 [amdgpu] kgd2kfd_device_exit+0x23/0xa0 [amdgpu] amdgpu_amdkfd_device_fini_sw+0x1d/0x30 [amdgpu] amdgpu_device_fini_sw+0x45/0x290 [amdgpu] amdgpu_driver_release_kms+0x12/0x30 [amdgpu] drm_dev_release+0x20/0x40 [drm] release_nodes+0x196/0x1e0 device_release_driver_internal+0x104/0x1d0 driver_detach+0x47/0x90 bus_remove_driver+0x7a/0xd0 pci_unregister_driver+0x3d/0x90 amdgpu_exit+0x11/0x20 [amdgpu] Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 - drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 13 ++++--------- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 5 ----- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 98d1b3ab3a46..c2a4d920da40 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -971,7 +971,6 @@ out: void kgd2kfd_device_exit(struct kfd_dev *kfd) { if (kfd->init_complete) { - svm_migrate_fini((struct amdgpu_device *)kfd->kgd); device_queue_manager_uninit(kfd->dqm); kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 165e0ebb619d..4a16e3c257b9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -891,6 +891,10 @@ int svm_migrate_init(struct amdgpu_device *adev) pgmap->ops = &svm_migrate_pgmap_ops; pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev); pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + + /* Device manager releases device-specific resources, memory region and + * pgmap when driver disconnects from device. + */ r = devm_memremap_pages(adev->dev, pgmap); if (IS_ERR(r)) { pr_err("failed to register HMM device memory\n"); @@ -911,12 +915,3 @@ int svm_migrate_init(struct amdgpu_device *adev) return 0; } - -void svm_migrate_fini(struct amdgpu_device *adev) -{ - struct dev_pagemap *pgmap = &adev->kfd.dev->pgmap; - - devm_memunmap_pages(adev->dev, pgmap); - devm_release_mem_region(adev->dev, pgmap->range.start, - pgmap->range.end - pgmap->range.start + 1); -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 0de76b5d4973..2f5b3394c9ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -47,7 +47,6 @@ unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); int svm_migrate_init(struct amdgpu_device *adev); -void svm_migrate_fini(struct amdgpu_device *adev); #else @@ -55,10 +54,6 @@ static inline int svm_migrate_init(struct amdgpu_device *adev) { return 0; } -static inline void svm_migrate_fini(struct amdgpu_device *adev) -{ - /* empty */ -} #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */ From 4f22262280ccb5c0a18a42029313938aabfaff12 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 23 Sep 2021 12:42:35 -0500 Subject: [PATCH 507/543] cifs: Clear modified attribute bit from inode flags Clear CIFS_INO_MODIFIED_ATTR bit from inode flags after updating mtime and ctime Signed-off-by: Rohith Surabattula Reviewed-by: Paulo Alcantara (SUSE) Acked-by: Ronnie Sahlberg Cc: stable@vger.kernel.org # 5.13+ Signed-off-by: Steve French --- fs/cifs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0ab5bb24b8ca..13f3182cf796 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -884,7 +884,7 @@ int cifs_close(struct inode *inode, struct file *file) cinode->lease_granted && !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && dclose) { - if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { + if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { inode->i_ctime = inode->i_mtime = current_time(inode); cifs_fscache_update_inode_cookie(inode); } From b06d893ef2492245d0319b4136edb4c346b687a3 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 23 Sep 2021 16:00:31 -0500 Subject: [PATCH 508/543] smb3: correct smb3 ACL security descriptor Address warning: fs/smbfs_client/smb2pdu.c:2425 create_sd_buf() warn: struct type mismatch 'smb3_acl vs cifs_acl' Pointed out by Dan Carpenter via smatch code analysis tool Reported-by: Dan Carpenter Acked-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 672ae78e866a..7829c590eeac 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2397,7 +2397,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) buf->sd.OffsetDacl = cpu_to_le32(ptr - (__u8 *)&buf->sd); /* Ship the ACL for now. we will copy it into buf later. */ aclptr = ptr; - ptr += sizeof(struct cifs_acl); + ptr += sizeof(struct smb3_acl); /* create one ACE to hold the mode embedded in reserved special SID */ acelen = setup_special_mode_ACE((struct cifs_ace *)ptr, (__u64)mode); @@ -2422,7 +2422,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) acl.AclRevision = ACL_REVISION; /* See 2.4.4.1 of MS-DTYP */ acl.AclSize = cpu_to_le16(acl_size); acl.AceCount = cpu_to_le16(ace_count); - memcpy(aclptr, &acl, sizeof(struct cifs_acl)); + memcpy(aclptr, &acl, sizeof(struct smb3_acl)); buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd); *len = roundup(ptr - (__u8 *)buf, 8); From 1db1aa98871defd9316d13f59708f2277c4f9232 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 23 Sep 2021 18:52:40 -0500 Subject: [PATCH 509/543] smb3: correct server pointer dereferencing check to be more consistent Address warning: fs/smbfs_client/misc.c:273 header_assemble() warn: variable dereferenced before check 'treeCon->ses->server' Pointed out by Dan Carpenter via smatch code analysis tool Although the check is likely unneeded, adding it makes the code more consistent and easier to read, as the same check is done elsewhere in the function. Reported-by: Dan Carpenter Acked-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/misc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index f2916b51652a..bb1185fff8cc 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -264,7 +264,8 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , /* Uid is not converted */ buffer->Uid = treeCon->ses->Suid; - buffer->Mid = get_next_mid(treeCon->ses->server); + if (treeCon->ses->server) + buffer->Mid = get_next_mid(treeCon->ses->server); } if (treeCon->Flags & SMB_SHARE_IS_IN_DFS) buffer->Flags2 |= SMBFLG2_DFS; From 9ed38fd4a15417cac83967360cf20b853bfab9b6 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 23 Sep 2021 19:18:37 -0500 Subject: [PATCH 510/543] cifs: fix incorrect check for null pointer in header_assemble Although very unlikely that the tlink pointer would be null in this case, get_next_mid function can in theory return null (but not an error) so need to check for null (not for IS_ERR, which can not be returned here). Address warning: fs/smbfs_client/connect.c:2392 cifs_match_super() warn: 'tlink' isn't an ERR_PTR Pointed out by Dan Carpenter via smatch code analysis tool CC: stable@vger.kernel.org Reported-by: Dan Carpenter Acked-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/connect.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7881115cfbee..c3b94c1e4591 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2389,9 +2389,10 @@ cifs_match_super(struct super_block *sb, void *data) spin_lock(&cifs_tcp_ses_lock); cifs_sb = CIFS_SB(sb); tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); - if (IS_ERR(tlink)) { + if (tlink == NULL) { + /* can not match superblock if tlink were ever null */ spin_unlock(&cifs_tcp_ses_lock); - return rc; + return 0; } tcon = tlink_tcon(tlink); ses = tcon->ses; From 3bd18ba7d859eb1fbef3beb1e80c24f6f7d7596c Mon Sep 17 00:00:00 2001 From: Uwe Brandt Date: Tue, 21 Sep 2021 19:54:46 +0200 Subject: [PATCH 511/543] USB: serial: cp210x: add ID for GW Instek GDM-834x Digital Multimeter Add the USB serial device ID for the GW Instek GDM-834x Digital Multimeter. Signed-off-by: Uwe Brandt Link: https://lore.kernel.org/r/YUxFl3YUCPGJZd8Y@hovoldconsulting.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index fd51498ab108..189279869a8b 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -233,6 +233,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1FB9, 0x0602) }, /* Lake Shore Model 648 Magnet Power Supply */ { USB_DEVICE(0x1FB9, 0x0700) }, /* Lake Shore Model 737 VSM Controller */ { USB_DEVICE(0x1FB9, 0x0701) }, /* Lake Shore Model 776 Hall Matrix */ + { USB_DEVICE(0x2184, 0x0030) }, /* GW Instek GDM-834x Digital Multimeter */ { USB_DEVICE(0x2626, 0xEA60) }, /* Aruba Networks 7xxx USB Serial Console */ { USB_DEVICE(0x3195, 0xF190) }, /* Link Instruments MSO-19 */ { USB_DEVICE(0x3195, 0xF280) }, /* Link Instruments MSO-28 */ From 9e3eed534f8235a4a596a9dae5b8a6425d81ea1a Mon Sep 17 00:00:00 2001 From: Slark Xiao Date: Fri, 17 Sep 2021 19:01:06 +0800 Subject: [PATCH 512/543] USB: serial: option: add device id for Foxconn T99W265 Adding support for Foxconn device T99W265 for enumeration with PID 0xe0db. usb-devices output for 0xe0db T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 19 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=ef(misc ) Sub=02 Prot=01 MxPS= 9 #Cfgs= 1 P: Vendor=0489 ProdID=e0db Rev=05.04 S: Manufacturer=Microsoft S: Product=Generic Mobile Broadband Adapter S: SerialNumber=6c50f452 C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#=0x1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option I: If#=0x3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#=0x4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option if0/1: MBIM, if2:Diag, if3:GNSS, if4: Modem Signed-off-by: Slark Xiao Link: https://lore.kernel.org/r/20210917110106.9852-1-slark_xiao@163.com [ johan: use USB_DEVICE_INTERFACE_CLASS(), amend comment ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 02a35f26ee82..6cfb5d33609f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2075,6 +2075,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(0x0489, 0xe0b5), /* Foxconn T77W968 ESIM */ .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0db, 0xff), /* Foxconn T99W265 MBIM */ + .driver_info = RSVD(3) }, { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 (IOT version) */ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */ From 5ba1071f7554c4027bdbd712a146111de57918de Mon Sep 17 00:00:00 2001 From: Numfor Mbiziwo-Tiapo Date: Thu, 23 Sep 2021 09:18:43 -0700 Subject: [PATCH 513/543] x86/insn, tools/x86: Fix undefined behavior due to potential unaligned accesses Don't perform unaligned loads in __get_next() and __peek_nbyte_next() as these are forms of undefined behavior: "A pointer to an object or incomplete type may be converted to a pointer to a different object or incomplete type. If the resulting pointer is not correctly aligned for the pointed-to type, the behavior is undefined." (from http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf) These problems were identified using the undefined behavior sanitizer (ubsan) with the tools version of the code and perf test. [ bp: Massage commit message. ] Signed-off-by: Numfor Mbiziwo-Tiapo Signed-off-by: Ian Rogers Signed-off-by: Borislav Petkov Acked-by: Masami Hiramatsu Link: https://lkml.kernel.org/r/20210923161843.751834-1-irogers@google.com --- arch/x86/lib/insn.c | 4 ++-- tools/arch/x86/lib/insn.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 058f19b20465..c565def611e2 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -37,10 +37,10 @@ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ - ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) + ({ t r; memcpy(&r, insn->next_byte, sizeof(t)); insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) #define __peek_nbyte_next(t, insn, n) \ - ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); }) + ({ t r; memcpy(&r, (insn)->next_byte + n, sizeof(t)); leXX_to_cpu(t, r); }) #define get_next(t, insn) \ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index c41f95815480..797699462cd8 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -37,10 +37,10 @@ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ - ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) + ({ t r; memcpy(&r, insn->next_byte, sizeof(t)); insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) #define __peek_nbyte_next(t, insn, n) \ - ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); }) + ({ t r; memcpy(&r, (insn)->next_byte + n, sizeof(t)); leXX_to_cpu(t, r); }) #define get_next(t, insn) \ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) From 87c1696655787895689618c8b63c5efe66b8f2ab Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 21 Sep 2021 08:24:57 -0600 Subject: [PATCH 514/543] io-wq: ensure we exit if thread group is exiting Dave reports that a coredumping workload gets stuck in 5.15-rc2, and identified the culprit in the Fixes line below. The problem is that relying solely on fatal_signal_pending() to gate whether to exit or not fails miserably if a process gets eg SIGILL sent. Don't exclusively rely on fatal signals, also check if the thread group is exiting. Fixes: 15e20db2e0ce ("io-wq: only exit on fatal signals") Reported-by: Dave Chinner Signed-off-by: Jens Axboe --- fs/io-wq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index c2e0e8e80949..c2360cdc403d 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -584,7 +584,8 @@ loop: if (!get_signal(&ksig)) continue; - if (fatal_signal_pending(current)) + if (fatal_signal_pending(current) || + signal_group_exit(current->signal)) break; continue; } From bd99c71bd14072ce2920f6d0c2fe43df072c653c Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Wed, 22 Sep 2021 18:12:36 +0800 Subject: [PATCH 515/543] io_uring: fix race between poll completion and cancel_hash insertion If poll arming and poll completion runs in parallel, there maybe races. For instance, run io_poll_add in iowq and io_poll_task_func in original context, then: iowq original context io_poll_add vfs_poll (interruption happens tw queued to original context) io_poll_task_func generate cqe del from cancel_hash[] if !poll.done insert to cancel_hash[] The entry left in cancel_hash[], similar case for fast poll. Fix it by set poll.done = true when del from cancel_hash[]. Fixes: 5082620fb2ca ("io_uring: terminate multishot poll for CQ ring overflow") Signed-off-by: Hao Xu Link: https://lore.kernel.org/r/20210922101238.7177-2-haoxu@linux.alibaba.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e372d5b9f6dc..43530aae6180 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5337,10 +5337,8 @@ static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask) } if (req->poll.events & EPOLLONESHOT) flags = 0; - if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { - req->poll.done = true; + if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) flags = 0; - } if (flags & IORING_CQE_F_MORE) ctx->cq_extra++; @@ -5371,6 +5369,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) if (done) { io_poll_remove_double(req); hash_del(&req->hash_node); + req->poll.done = true; } else { req->result = 0; add_wait_queue(req->poll.head, &req->poll.wait); @@ -5508,6 +5507,7 @@ static void io_async_task_func(struct io_kiocb *req, bool *locked) hash_del(&req->hash_node); io_poll_remove_double(req); + apoll->poll.done = true; spin_unlock(&ctx->completion_lock); if (!READ_ONCE(apoll->poll.canceled)) From a62682f92eedb41c1cd8290fa875a4b85624fb9a Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Wed, 22 Sep 2021 18:12:37 +0800 Subject: [PATCH 516/543] io_uring: fix missing set of EPOLLONESHOT for CQ ring overflow We should set EPOLLONESHOT if cqring_fill_event() returns false since io_poll_add() decides to put req or not by it. Fixes: 5082620fb2ca ("io_uring: terminate multishot poll for CQ ring overflow") Signed-off-by: Hao Xu Link: https://lore.kernel.org/r/20210922101238.7177-3-haoxu@linux.alibaba.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 43530aae6180..ac0c06d5c629 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5337,8 +5337,10 @@ static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask) } if (req->poll.events & EPOLLONESHOT) flags = 0; - if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) + if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { + req->poll.events |= EPOLLONESHOT; flags = 0; + } if (flags & IORING_CQE_F_MORE) ctx->cq_extra++; From 5b7aa38d86f348847a48f71e9ac7715406de900e Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Wed, 22 Sep 2021 18:12:38 +0800 Subject: [PATCH 517/543] io_uring: fix potential req refcount underflow For multishot mode, there may be cases like: iowq original context io_poll_add _arm_poll() mask = vfs_poll() is not 0 if mask (2) io_poll_complete() compl_unlock (interruption happens tw queued to original context) io_poll_task_func() compl_lock (3) done = io_poll_complete() is true compl_unlock put req ref (1) if (poll->flags & EPOLLONESHOT) put req ref EPOLLONESHOT flag in (1) may be from (2) or (3), so there are multiple combinations that can cause ref underfow. Let's address it by: - check the return value in (2) as done - change (1) to if (done) in this way, we only do ref put in (1) if 'oneshot flag' is from (2) - do poll.done check in io_poll_task_func(), so that we won't put ref for the second time. Signed-off-by: Hao Xu Link: https://lore.kernel.org/r/20210922101238.7177-4-haoxu@linux.alibaba.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ac0c06d5c629..7707cdb7b372 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5367,6 +5367,10 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) } else { bool done; + if (req->poll.done) { + spin_unlock(&ctx->completion_lock); + return; + } done = __io_poll_complete(req, req->result); if (done) { io_poll_remove_double(req); @@ -5830,6 +5834,7 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) struct io_ring_ctx *ctx = req->ctx; struct io_poll_table ipt; __poll_t mask; + bool done; ipt.pt._qproc = io_poll_queue_proc; @@ -5838,13 +5843,13 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) if (mask) { /* no async, we'd stolen it */ ipt.error = 0; - io_poll_complete(req, mask); + done = io_poll_complete(req, mask); } spin_unlock(&ctx->completion_lock); if (mask) { io_cqring_ev_posted(ctx); - if (poll->events & EPOLLONESHOT) + if (done) io_put_req(req); } return ipt.error; From 8bab4c09f24ec8d4a7a78ab343620f89d3a24804 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 24 Sep 2021 07:12:27 -0600 Subject: [PATCH 518/543] io_uring: allow conditional reschedule for intensive iterators If we have a lot of threads and rings, the tctx list can get quite big. This is especially true if we keep creating new threads and rings. Likewise for the provided buffers list. Be nice and insert a conditional reschedule point while iterating the nodes for deletion. Link: https://lore.kernel.org/io-uring/00000000000064b6b405ccb41113@google.com/ Reported-by: syzbot+111d2a03f51f5ae73775@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7707cdb7b372..ef3c94a55fbd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9173,8 +9173,10 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx) struct io_buffer *buf; unsigned long index; - xa_for_each(&ctx->io_buffers, index, buf) + xa_for_each(&ctx->io_buffers, index, buf) { __io_remove_buffers(ctx, buf, index, -1U); + cond_resched(); + } } static void io_req_cache_free(struct list_head *list) @@ -9672,8 +9674,10 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx) struct io_tctx_node *node; unsigned long index; - xa_for_each(&tctx->xa, index, node) + xa_for_each(&tctx->xa, index, node) { io_uring_del_tctx_node(index); + cond_resched(); + } if (wq) { /* * Must be after io_uring_del_task_file() (removes nodes under From 9990da93d2bf9892c2c14c958bef050d4e461a1a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 24 Sep 2021 07:39:08 -0600 Subject: [PATCH 519/543] io_uring: put provided buffer meta data under memcg accounting For each provided buffer, we allocate a struct io_buffer to hold the data associated with it. As a large number of buffers can be provided, account that data with memcg. Fixes: ddf0322db79c ("io_uring: add IORING_OP_PROVIDE_BUFFERS") Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ef3c94a55fbd..01e49d01fe74 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4387,7 +4387,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head) int i, bid = pbuf->bid; for (i = 0; i < pbuf->nbufs; i++) { - buf = kmalloc(sizeof(*buf), GFP_KERNEL); + buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); if (!buf) break; From cdb31c29d397a8076d81fd1458d091c647ef94ba Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 24 Sep 2021 08:43:54 -0600 Subject: [PATCH 520/543] io_uring: don't punt files update to io-wq unconditionally There's no reason to punt it unconditionally, we just need to ensure that the submit lock grabbing is conditional. Fixes: 05f3fb3c5397 ("io_uring: avoid ring quiesce for fixed file set unregister and update") Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 01e49d01fe74..c6139ace11fa 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6340,19 +6340,16 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) struct io_uring_rsrc_update2 up; int ret; - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - up.offset = req->rsrc_update.offset; up.data = req->rsrc_update.arg; up.nr = 0; up.tags = 0; up.resv = 0; - mutex_lock(&ctx->uring_lock); + io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, req->rsrc_update.nr_args); - mutex_unlock(&ctx->uring_lock); + io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); if (ret < 0) req_set_fail(req); From 9f3a2cb228c28606895d15f13b30d1f7402dc745 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 24 Sep 2021 17:14:48 +0100 Subject: [PATCH 521/543] io_uring: kill extra checks in io_write() We don't retry short writes and so we would never get to async setup in io_write() in that case. Thus ret2 > 0 is always false and iov_iter_advance() is never used. Apparently, the same is found by Coverity, which complains on the code. Fixes: cd65869512ab ("io_uring: use iov_iter state save/restore helpers") Reported-by: Dave Jones Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/5b33e61034748ef1022766efc0fb8854cfcf749c.1632500058.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c6139ace11fa..2b3232d53e79 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3605,7 +3605,6 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) iov_iter_save_state(iter, state); } req->result = iov_iter_count(iter); - ret2 = 0; /* Ensure we clear previously set non-block flag */ if (!force_nonblock) @@ -3670,8 +3669,6 @@ done: } else { copy_iov: iov_iter_restore(iter, state); - if (ret2 > 0) - iov_iter_advance(iter, ret2); ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); return ret ?: -EAGAIN; } From a647a524a46736786c95cdb553a070322ca096e3 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 24 Sep 2021 19:07:04 +0800 Subject: [PATCH 522/543] block: don't call rq_qos_ops->done_bio if the bio isn't tracked rq_qos framework is only applied on request based driver, so: 1) rq_qos_done_bio() needn't to be called for bio based driver 2) rq_qos_done_bio() needn't to be called for bio which isn't tracked, such as bios ended from error handling code. Especially in bio_endio(): 1) request queue is referred via bio->bi_bdev->bd_disk->queue, which may be gone since request queue refcount may not be held in above two cases 2) q->rq_qos may be freed in blk_cleanup_queue() when calling into __rq_qos_done_bio() Fix the potential kernel panic by not calling rq_qos_ops->done_bio if the bio isn't tracked. This way is safe because both ioc_rqos_done_bio() and blkcg_iolatency_done_bio() are nop if the bio isn't tracked. Reported-by: Yu Kuai Cc: tj@kernel.org Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210924110704.1541818-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index 5df3dd282e40..a6fb6a0b4295 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1466,7 +1466,7 @@ again: if (!bio_integrity_endio(bio)) return; - if (bio->bi_bdev) + if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED)) rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio); if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { From 5afedf670caf30a2b5a52da96eb7eac7dee6a9c9 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Thu, 23 Sep 2021 21:49:21 +0800 Subject: [PATCH 523/543] blktrace: Fix uaf in blk_trace access after removing by sysfs There is an use-after-free problem triggered by following process: P1(sda) P2(sdb) echo 0 > /sys/block/sdb/trace/enable blk_trace_remove_queue synchronize_rcu blk_trace_free relay_close rcu_read_lock __blk_add_trace trace_note_tsk (Iterate running_trace_list) relay_close_buf relay_destroy_buf kfree(buf) trace_note(sdb's bt) relay_reserve buf->offset <- nullptr deference (use-after-free) !!! rcu_read_unlock [ 502.714379] BUG: kernel NULL pointer dereference, address: 0000000000000010 [ 502.715260] #PF: supervisor read access in kernel mode [ 502.715903] #PF: error_code(0x0000) - not-present page [ 502.716546] PGD 103984067 P4D 103984067 PUD 17592b067 PMD 0 [ 502.717252] Oops: 0000 [#1] SMP [ 502.720308] RIP: 0010:trace_note.isra.0+0x86/0x360 [ 502.732872] Call Trace: [ 502.733193] __blk_add_trace.cold+0x137/0x1a3 [ 502.733734] blk_add_trace_rq+0x7b/0xd0 [ 502.734207] blk_add_trace_rq_issue+0x54/0xa0 [ 502.734755] blk_mq_start_request+0xde/0x1b0 [ 502.735287] scsi_queue_rq+0x528/0x1140 ... [ 502.742704] sg_new_write.isra.0+0x16e/0x3e0 [ 502.747501] sg_ioctl+0x466/0x1100 Reproduce method: ioctl(/dev/sda, BLKTRACESETUP, blk_user_trace_setup[buf_size=127]) ioctl(/dev/sda, BLKTRACESTART) ioctl(/dev/sdb, BLKTRACESETUP, blk_user_trace_setup[buf_size=127]) ioctl(/dev/sdb, BLKTRACESTART) echo 0 > /sys/block/sdb/trace/enable & // Add delay(mdelay/msleep) before kernel enters blk_trace_free() ioctl$SG_IO(/dev/sda, SG_IO, ...) // Enters trace_note_tsk() after blk_trace_free() returned // Use mdelay in rcu region rather than msleep(which may schedule out) Remove blk_trace from running_list before calling blk_trace_free() by sysfs if blk_trace is at Blktrace_running state. Fixes: c71a896154119f ("blktrace: add ftrace plugin") Signed-off-by: Zhihao Cheng Link: https://lore.kernel.org/r/20210923134921.109194-1-chengzhihao1@huawei.com Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c221e4c3f625..fa91f398f28b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1605,6 +1605,14 @@ static int blk_trace_remove_queue(struct request_queue *q) if (bt == NULL) return -EINVAL; + if (bt->trace_state == Blktrace_running) { + bt->trace_state = Blktrace_stopped; + spin_lock_irq(&running_trace_lock); + list_del_init(&bt->running_list); + spin_unlock_irq(&running_trace_lock); + relay_flush(bt->rchan); + } + put_probe_ref(); synchronize_rcu(); blk_trace_free(bt); From f278eb3d8178f9c31f8dfad7e91440e603dd7f1a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 23 Sep 2021 10:37:51 +0800 Subject: [PATCH 524/543] block: hold ->invalidate_lock in blkdev_fallocate When running ->fallocate(), blkdev_fallocate() should hold mapping->invalidate_lock to prevent page cache from being accessed, otherwise stale data may be read in page cache. Without this patch, blktests block/009 fails sometimes. With this patch, block/009 can pass always. Also as Jan pointed out, no pages can be created in the discarded area while you are holding the invalidate_lock, so remove the 2nd truncate_bdev_range(). Cc: Jan Kara Signed-off-by: Ming Lei Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210923023751.1441091-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/fops.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/block/fops.c b/block/fops.c index ffce6f6c68dd..1e970c247e0e 100644 --- a/block/fops.c +++ b/block/fops.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "blk.h" static struct inode *bdev_file_inode(struct file *file) @@ -553,7 +554,8 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) static long blkdev_fallocate(struct file *file, int mode, loff_t start, loff_t len) { - struct block_device *bdev = I_BDEV(bdev_file_inode(file)); + struct inode *inode = bdev_file_inode(file); + struct block_device *bdev = I_BDEV(inode); loff_t end = start + len - 1; loff_t isize; int error; @@ -580,10 +582,12 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, if ((start | len) & (bdev_logical_block_size(bdev) - 1)) return -EINVAL; + filemap_invalidate_lock(inode->i_mapping); + /* Invalidate the page cache, including dirty pages. */ error = truncate_bdev_range(bdev, file->f_mode, start, end); if (error) - return error; + goto fail; switch (mode) { case FALLOC_FL_ZERO_RANGE: @@ -600,17 +604,12 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, GFP_KERNEL, 0); break; default: - return -EOPNOTSUPP; + error = -EOPNOTSUPP; } - if (error) - return error; - /* - * Invalidate the page cache again; if someone wandered in and dirtied - * a page, we just discard it - userspace has no way of knowing whether - * the write happened before or after discard completing... - */ - return truncate_bdev_range(bdev, file->f_mode, start, end); + fail: + filemap_invalidate_unlock(inode->i_mapping); + return error; } const struct file_operations def_blk_fops = { From 7df778be2f61e1a23002d1f2f5d6aaf702771eb8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 24 Sep 2021 20:04:29 +0100 Subject: [PATCH 525/543] io_uring: make OP_CLOSE consistent with direct open From recently open/accept are now able to manipulate fixed file table, but it's inconsistent that close can't. Close the gap, keep API same as with open/accept, i.e. via sqe->file_slot. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2b3232d53e79..82f867983bb3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -502,6 +502,7 @@ struct io_poll_update { struct io_close { struct file *file; int fd; + u32 file_slot; }; struct io_timeout_data { @@ -1098,6 +1099,8 @@ static int io_req_prep_async(struct io_kiocb *req); static int io_install_fixed_file(struct io_kiocb *req, struct file *file, unsigned int issue_flags, u32 slot_index); +static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags); + static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); static struct kmem_cache *req_cachep; @@ -4591,12 +4594,16 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || - sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) + sqe->rw_flags || sqe->buf_index) return -EINVAL; if (req->flags & REQ_F_FIXED_FILE) return -EBADF; req->close.fd = READ_ONCE(sqe->fd); + req->close.file_slot = READ_ONCE(sqe->file_index); + if (req->close.file_slot && req->close.fd) + return -EINVAL; + return 0; } @@ -4608,6 +4615,11 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags) struct file *file = NULL; int ret = -EBADF; + if (req->close.file_slot) { + ret = io_close_fixed(req, issue_flags); + goto err; + } + spin_lock(&files->file_lock); fdt = files_fdtable(files); if (close->fd >= fdt->max_fds) { @@ -8401,6 +8413,44 @@ err: return ret; } +static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) +{ + unsigned int offset = req->close.file_slot - 1; + struct io_ring_ctx *ctx = req->ctx; + struct io_fixed_file *file_slot; + struct file *file; + int ret, i; + + io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); + ret = -ENXIO; + if (unlikely(!ctx->file_data)) + goto out; + ret = -EINVAL; + if (offset >= ctx->nr_user_files) + goto out; + ret = io_rsrc_node_switch_start(ctx); + if (ret) + goto out; + + i = array_index_nospec(offset, ctx->nr_user_files); + file_slot = io_fixed_file_slot(&ctx->file_table, i); + ret = -EBADF; + if (!file_slot->file_ptr) + goto out; + + file = (struct file *)(file_slot->file_ptr & FFS_MASK); + ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); + if (ret) + goto out; + + file_slot->file_ptr = 0; + io_rsrc_node_switch(ctx, ctx->file_data); + ret = 0; +out: + io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); + return ret; +} + static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct io_uring_rsrc_update2 *up, unsigned nr_args) From acfa299a4a63a58e5e81a87cb16798f20d35f7d7 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 24 Sep 2021 15:43:20 -0700 Subject: [PATCH 526/543] mm, hwpoison: add is_free_buddy_page() in HWPoisonHandlable() Commit fcc00621d88b ("mm/hwpoison: retry with shake_page() for unhandlable pages") changed the return value of __get_hwpoison_page() to retry for transiently unhandlable cases. However, __get_hwpoison_page() currently fails to properly judge buddy pages as handlable, so hard/soft offline for buddy pages always fail as "unhandlable page". This is totally regrettable. So let's add is_free_buddy_page() in HWPoisonHandlable(), so that __get_hwpoison_page() returns different return values between buddy pages and unhandlable pages as intended. Link: https://lkml.kernel.org/r/20210909004131.163221-1-naoya.horiguchi@linux.dev Fixes: fcc00621d88b ("mm/hwpoison: retry with shake_page() for unhandlable pages") Signed-off-by: Naoya Horiguchi Acked-by: David Hildenbrand Reviewed-by: Yang Shi Cc: Tony Luck Cc: Oscar Salvador Cc: Mike Kravetz Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 54879c339024..41901c7bb58f 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1126,7 +1126,7 @@ static int page_action(struct page_state *ps, struct page *p, */ static inline bool HWPoisonHandlable(struct page *page) { - return PageLRU(page) || __PageMovable(page); + return PageLRU(page) || __PageMovable(page) || is_free_buddy_page(page); } static int __get_hwpoison_page(struct page *page) From fa360beac4b62d54879a88b182afef4b369c9700 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 24 Sep 2021 15:43:23 -0700 Subject: [PATCH 527/543] kasan: fix Kconfig check of CC_HAS_WORKING_NOSANITIZE_ADDRESS In the main KASAN config option CC_HAS_WORKING_NOSANITIZE_ADDRESS is checked for instrumentation-based modes. However, if HAVE_ARCH_KASAN_HW_TAGS is true all modes may still be selected. To fix, also make the software modes depend on CC_HAS_WORKING_NOSANITIZE_ADDRESS. Link: https://lkml.kernel.org/r/20210910084240.1215803-1-elver@google.com Fixes: 6a63a63ff1ac ("kasan: introduce CONFIG_KASAN_HW_TAGS") Signed-off-by: Marco Elver Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Aleksandr Nogikh Cc: Taras Madan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.kasan | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 1e2d10f86011..cdc842d090db 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -66,6 +66,7 @@ choice config KASAN_GENERIC bool "Generic mode" depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC + depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS select SLUB_DEBUG if SLUB select CONSTRUCTORS help @@ -86,6 +87,7 @@ config KASAN_GENERIC config KASAN_SW_TAGS bool "Software tag-based mode" depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS + depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS select SLUB_DEBUG if SLUB select CONSTRUCTORS help From 892ab4bbd063cfe7f6bbb183e6be69d9907a61de Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Fri, 24 Sep 2021 15:43:26 -0700 Subject: [PATCH 528/543] mm/damon: don't use strnlen() with known-bogus source length gcc knows the true length too, and rightfully complains. Link: https://lkml.kernel.org/r/20210912204447.10427-1-kilobyte@angband.pl Signed-off-by: Adam Borowski Cc: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/dbgfs-test.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mm/damon/dbgfs-test.h b/mm/damon/dbgfs-test.h index 930e83bceef0..4eddcfa73996 100644 --- a/mm/damon/dbgfs-test.h +++ b/mm/damon/dbgfs-test.h @@ -20,27 +20,27 @@ static void damon_dbgfs_test_str_to_target_ids(struct kunit *test) ssize_t nr_integers = 0, i; question = "123"; - answers = str_to_target_ids(question, strnlen(question, 128), + answers = str_to_target_ids(question, strlen(question), &nr_integers); KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers); KUNIT_EXPECT_EQ(test, 123ul, answers[0]); kfree(answers); question = "123abc"; - answers = str_to_target_ids(question, strnlen(question, 128), + answers = str_to_target_ids(question, strlen(question), &nr_integers); KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers); KUNIT_EXPECT_EQ(test, 123ul, answers[0]); kfree(answers); question = "a123"; - answers = str_to_target_ids(question, strnlen(question, 128), + answers = str_to_target_ids(question, strlen(question), &nr_integers); KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers); kfree(answers); question = "12 35"; - answers = str_to_target_ids(question, strnlen(question, 128), + answers = str_to_target_ids(question, strlen(question), &nr_integers); KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers); for (i = 0; i < nr_integers; i++) @@ -48,7 +48,7 @@ static void damon_dbgfs_test_str_to_target_ids(struct kunit *test) kfree(answers); question = "12 35 46"; - answers = str_to_target_ids(question, strnlen(question, 128), + answers = str_to_target_ids(question, strlen(question), &nr_integers); KUNIT_EXPECT_EQ(test, (ssize_t)3, nr_integers); for (i = 0; i < nr_integers; i++) @@ -56,7 +56,7 @@ static void damon_dbgfs_test_str_to_target_ids(struct kunit *test) kfree(answers); question = "12 35 abc 46"; - answers = str_to_target_ids(question, strnlen(question, 128), + answers = str_to_target_ids(question, strlen(question), &nr_integers); KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers); for (i = 0; i < 2; i++) @@ -64,13 +64,13 @@ static void damon_dbgfs_test_str_to_target_ids(struct kunit *test) kfree(answers); question = ""; - answers = str_to_target_ids(question, strnlen(question, 128), + answers = str_to_target_ids(question, strlen(question), &nr_integers); KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers); kfree(answers); question = "\n"; - answers = str_to_target_ids(question, strnlen(question, 128), + answers = str_to_target_ids(question, strlen(question), &nr_integers); KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers); kfree(answers); From 867050247e295cf20fce046a92a7e6491fcfe066 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 24 Sep 2021 15:43:29 -0700 Subject: [PATCH 529/543] xtensa: increase size of gcc stack frame check xtensa frame size is larger than the frame size for almost all other architectures. This results in more than 50 "the frame size of is larger than 1024 bytes" errors when trying to build xtensa:allmodconfig. Increase frame size for xtensa to 1536 bytes to avoid compile errors due to frame size limits. Link: https://lkml.kernel.org/r/20210912025235.3514761-1-linux@roeck-us.net Signed-off-by: Guenter Roeck Reviewed-by: Max Filippov Cc: Chris Zankel Cc: David Laight Cc: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d566f601780f..2a9b6dcdac4f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -346,7 +346,7 @@ config FRAME_WARN int "Warn for stack frames larger than" range 0 8192 default 2048 if GCC_PLUGIN_LATENT_ENTROPY - default 1536 if (!64BIT && PARISC) + default 1536 if (!64BIT && (PARISC || XTENSA)) default 1024 if (!64BIT && !PARISC) default 2048 if 64BIT help From de6ee659684b1a2b149e0780d3c5e8032f3647d6 Mon Sep 17 00:00:00 2001 From: Liu Yuntao Date: Fri, 24 Sep 2021 15:43:32 -0700 Subject: [PATCH 530/543] mm/shmem.c: fix judgment error in shmem_is_huge() In the case of SHMEM_HUGE_WITHIN_SIZE, the page index is not rounded up correctly. When the page index points to the first page in a huge page, round_up() cannot bring it to the end of the huge page, but to the end of the previous one. An example: HPAGE_PMD_NR on my machine is 512(2 MB huge page size). After allcoating a 3000 KB buffer, I access it at location 2050 KB. In shmem_is_huge(), the corresponding index happens to be 512. After rounded up by HPAGE_PMD_NR, it will still be 512 which is smaller than i_size, and shmem_is_huge() will return true. As a result, my buffer takes an additional huge page, and that shouldn't happen when shmem_enabled is set to within_size. Link: https://lkml.kernel.org/r/20210909032007.18353-1-liuyuntao10@huawei.com Fixes: f3f0e1d2150b2b ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Liu Yuntao Acked-by: Kirill A. Shutemov Acked-by: Hugh Dickins Cc: wuxu.wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 88742953532c..b5860f4a2738 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -490,9 +490,9 @@ bool shmem_is_huge(struct vm_area_struct *vma, case SHMEM_HUGE_ALWAYS: return true; case SHMEM_HUGE_WITHIN_SIZE: - index = round_up(index, HPAGE_PMD_NR); + index = round_up(index + 1, HPAGE_PMD_NR); i_size = round_up(i_size_read(inode), PAGE_SIZE); - if (i_size >= HPAGE_PMD_SIZE && (i_size >> PAGE_SHIFT) >= index) + if (i_size >> PAGE_SHIFT >= index) return true; fallthrough; case SHMEM_HUGE_ADVISE: From 9c0f0a03e386f4e1df33db676401547e1b7800c6 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Fri, 24 Sep 2021 15:43:35 -0700 Subject: [PATCH 531/543] ocfs2: drop acl cache for directories too ocfs2_data_convert_worker() is currently dropping any cached acl info for FILE before down-converting meta lock. It should also drop for DIRECTORY. Otherwise the second acl lookup returns the cached one (from VFS layer) which could be already stale. The problem we are seeing is that the acl changes on one node doesn't get refreshed on other nodes in the following case: Node 1 Node 2 -------------- ---------------- getfacl dir1 getfacl dir1 <-- this is OK setfacl -m u:user1:rwX dir1 getfacl dir1 <-- see the change for user1 getfacl dir1 <-- can't see change for user1 Link: https://lkml.kernel.org/r/20210903012631.6099-1-wen.gang.wang@oracle.com Signed-off-by: Wengang Wang Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlmglue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 359524b7341f..801e60bab955 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3951,7 +3951,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, oi = OCFS2_I(inode); oi->ip_dir_lock_gen++; mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); - goto out; + goto out_forget; } if (!S_ISREG(inode->i_mode)) @@ -3982,6 +3982,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, filemap_fdatawait(mapping); } +out_forget: forget_all_cached_acls(inode); out: From d09c38726c78effaf910a1e3f31e247b5b031d56 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Fri, 24 Sep 2021 15:43:38 -0700 Subject: [PATCH 532/543] scripts/sorttable: riscv: fix undeclared identifier 'EM_RISCV' error Fix the following build failure reported in [1] by adding a conditional definition of EM_RISCV in order to allow cross-compilation on machines which do not have EM_RISCV definition in their host. scripts/sorttable.c:352:7: error: use of undeclared identifier 'EM_RISCV' EM_RISCV was added to in glibc 2.24 so builds on systems with glibc headers < 2.24 should show this error. [mkubecek@suse.cz: changelog addition] Link: https://lore.kernel.org/lkml/e8965b25-f15b-c7b4-748c-d207dda9c8e8@i2se.com/ [1] Link: https://lkml.kernel.org/r/20210913030625.4525-1-miles.chen@mediatek.com Fixes: 54fed35fd393 ("riscv: Enable BUILDTIME_TABLE_SORT") Signed-off-by: Miles Chen Reported-by: Stefan Wahren Tested-by: Stefan Wahren Reviewed-by: Jisheng Zhang Cc: Michal Kubecek Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Markus Mayer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/sorttable.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/sorttable.c b/scripts/sorttable.c index f355869c65cd..6ee4fa882919 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -54,6 +54,10 @@ #define EM_ARCV2 195 #endif +#ifndef EM_RISCV +#define EM_RISCV 243 +#endif + static uint32_t (*r)(const uint32_t *); static uint16_t (*r2)(const uint16_t *); static uint64_t (*r8)(const uint64_t *); From ebaeab2fe87987cef28eb5ab174c42cd28594387 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 24 Sep 2021 15:43:41 -0700 Subject: [PATCH 533/543] tools/vm/page-types: remove dependency on opt_file for idle page tracking Idle page tracking can also be used for process address space, not only file mappings. Without this change, using with '-i' option for process address space encounters below errors reported. $ sudo ./page-types -p $(pidof bash) -i mark page idle: Bad file descriptor mark page idle: Bad file descriptor mark page idle: Bad file descriptor mark page idle: Bad file descriptor ... Link: https://lkml.kernel.org/r/20210917032826.10669-1-changbin.du@gmail.com Signed-off-by: Changbin Du Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/vm/page-types.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 0517c744b04e..f62f10c988db 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -1331,7 +1331,7 @@ int main(int argc, char *argv[]) if (opt_list && opt_list_mapcnt) kpagecount_fd = checked_open(PROC_KPAGECOUNT, O_RDONLY); - if (opt_mark_idle && opt_file) + if (opt_mark_idle) page_idle_fd = checked_open(SYS_KERNEL_MM_PAGE_IDLE, O_RDWR); if (opt_list && opt_pid) From b7cd9fa5ccc392d9f2269edc4cb82508632c28da Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Fri, 24 Sep 2021 15:43:44 -0700 Subject: [PATCH 534/543] lib/zlib_inflate/inffast: check config in C to avoid unused function warning Building Linux for ppc64le with Ubuntu clang version 12.0.0-3ubuntu1~21.04.1 shows the warning below. arch/powerpc/boot/inffast.c:20:1: warning: unused function 'get_unaligned16' [-Wunused-function] get_unaligned16(const unsigned short *p) ^ 1 warning generated. Fix it by moving the check from the preprocessor to C, so the compiler sees the use. Link: https://lkml.kernel.org/r/20210920084332.5752-1-pmenzel@molgen.mpg.de Signed-off-by: Paul Menzel Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Cc: Nick Desaulniers Cc: Christophe Leroy Cc: Zhen Lei Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/zlib_inflate/inffast.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c index f19c4fbe1be7..2843f9bb42ac 100644 --- a/lib/zlib_inflate/inffast.c +++ b/lib/zlib_inflate/inffast.c @@ -253,13 +253,12 @@ void inflate_fast(z_streamp strm, unsigned start) sfrom = (unsigned short *)(from); loops = len >> 1; - do -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - *sout++ = *sfrom++; -#else - *sout++ = get_unaligned16(sfrom++); -#endif - while (--loops); + do { + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) + *sout++ = *sfrom++; + else + *sout++ = get_unaligned16(sfrom++); + } while (--loops); out = (unsigned char *)sout; from = (unsigned char *)sfrom; } else { /* dist == 1 or dist == 2 */ From 243418e3925d5b5b0657ae54c322d43035e97eed Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 24 Sep 2021 15:43:47 -0700 Subject: [PATCH 535/543] mm: fs: invalidate bh_lrus for only cold path The kernel test robot reported the regression of fio.write_iops[1] with commit 8cc621d2f45d ("mm: fs: invalidate BH LRU during page migration"). Since lru_add_drain is called frequently, invalidate bh_lrus there could increase bh_lrus cache miss ratio, which needs more IO in the end. This patch moves the bh_lrus invalidation from the hot path( e.g., zap_page_range, pagevec_release) to cold path(i.e., lru_add_drain_all, lru_cache_disable). Zhengjun Xing confirmed "I test the patch, the regression reduced to -2.9%" [1] https://lore.kernel.org/lkml/20210520083144.GD14190@xsang-OptiPlex-9020/ [2] 8cc621d2f45d, mm: fs: invalidate BH LRU during page migration Link: https://lkml.kernel.org/r/20210907212347.1977686-1-minchan@kernel.org Signed-off-by: Minchan Kim Reported-by: kernel test robot Reviewed-by: Chris Goldsworthy Tested-by: "Xing, Zhengjun" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 8 ++++++-- include/linux/buffer_head.h | 4 ++-- mm/swap.c | 19 ++++++++++++++++--- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index ab7573d72dd7..c615387aedca 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1425,12 +1425,16 @@ void invalidate_bh_lrus(void) } EXPORT_SYMBOL_GPL(invalidate_bh_lrus); -void invalidate_bh_lrus_cpu(int cpu) +/* + * It's called from workqueue context so we need a bh_lru_lock to close + * the race with preemption/irq. + */ +void invalidate_bh_lrus_cpu(void) { struct bh_lru *b; bh_lru_lock(); - b = per_cpu_ptr(&bh_lrus, cpu); + b = this_cpu_ptr(&bh_lrus); __invalidate_bh_lrus(b); bh_lru_unlock(); } diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6486d3c19463..36f33685c8c0 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -194,7 +194,7 @@ void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size, struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); -void invalidate_bh_lrus_cpu(int cpu); +void invalidate_bh_lrus_cpu(void); bool has_bh_in_lru(int cpu, void *dummy); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); @@ -408,7 +408,7 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } -static inline void invalidate_bh_lrus_cpu(int cpu) {} +static inline void invalidate_bh_lrus_cpu(void) {} static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; } #define buffer_heads_over_limit 0 diff --git a/mm/swap.c b/mm/swap.c index 897200d27dd0..af3cad4e5378 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -620,7 +620,6 @@ void lru_add_drain_cpu(int cpu) pagevec_lru_move_fn(pvec, lru_lazyfree_fn); activate_page_drain(cpu); - invalidate_bh_lrus_cpu(cpu); } /** @@ -703,6 +702,20 @@ void lru_add_drain(void) local_unlock(&lru_pvecs.lock); } +/* + * It's called from per-cpu workqueue context in SMP case so + * lru_add_drain_cpu and invalidate_bh_lrus_cpu should run on + * the same cpu. It shouldn't be a problem in !SMP case since + * the core is only one and the locks will disable preemption. + */ +static void lru_add_and_bh_lrus_drain(void) +{ + local_lock(&lru_pvecs.lock); + lru_add_drain_cpu(smp_processor_id()); + local_unlock(&lru_pvecs.lock); + invalidate_bh_lrus_cpu(); +} + void lru_add_drain_cpu_zone(struct zone *zone) { local_lock(&lru_pvecs.lock); @@ -717,7 +730,7 @@ static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); static void lru_add_drain_per_cpu(struct work_struct *dummy) { - lru_add_drain(); + lru_add_and_bh_lrus_drain(); } /* @@ -858,7 +871,7 @@ void lru_cache_disable(void) */ __lru_add_drain_all(true); #else - lru_add_drain(); + lru_add_and_bh_lrus_drain(); #endif } From a4ce73910427e960b2c7f4d83229153c327d0ee7 Mon Sep 17 00:00:00 2001 From: Weizhao Ouyang Date: Fri, 24 Sep 2021 15:43:50 -0700 Subject: [PATCH 536/543] mm/debug: sync up MR_CONTIG_RANGE and MR_LONGTERM_PIN Sync up MR_CONTIG_RANGE and MR_LONGTERM_PIN to migrate_reason_names. Link: https://lkml.kernel.org/r/20210921064553.293905-2-o451686892@gmail.com Fixes: 310253514bbf ("mm/migrate: rename migration reason MR_CMA to MR_CONTIG_RANGE") Fixes: d1e153fea2a8 ("mm/gup: migrate pinned pages out of movable zone") Signed-off-by: Weizhao Ouyang Reviewed-by: "Huang, Ying" Reviewed-by: John Hubbard Cc: Anshuman Khandual Cc: Michal Hocko Cc: Pavel Tatashin Cc: Yang Shi Cc: Zi Yan Cc: Dave Hansen Cc: Minchan Kim Cc: Mina Almasry Cc: "Matthew Wilcox (Oracle)" Cc: Oscar Salvador Cc: Wei Xu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/debug.c b/mm/debug.c index e73fe0a8ec3d..e61037cded98 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -24,7 +24,8 @@ const char *migrate_reason_names[MR_TYPES] = { "syscall_or_cpuset", "mempolicy_mbind", "numa_misplaced", - "cma", + "contig_range", + "longterm_pin", }; const struct trace_print_flags pageflag_names[] = { From 57ed7b4303a1c4d1885019fef03e6a5af2e8468a Mon Sep 17 00:00:00 2001 From: Weizhao Ouyang Date: Fri, 24 Sep 2021 15:43:53 -0700 Subject: [PATCH 537/543] mm/debug: sync up latest migrate_reason to migrate_reason_names Sync up MR_DEMOTION to migrate_reason_names and add a synch prompt. Link: https://lkml.kernel.org/r/20210921064553.293905-3-o451686892@gmail.com Fixes: 26aa2d199d6f ("mm/migrate: demote pages during reclaim") Signed-off-by: Weizhao Ouyang Reviewed-by: "Huang, Ying" Reviewed-by: John Hubbard Cc: Anshuman Khandual Cc: Michal Hocko Cc: Pavel Tatashin Cc: Yang Shi Cc: Zi Yan Cc: Dave Hansen Cc: Minchan Kim Cc: Mina Almasry Cc: "Matthew Wilcox (Oracle)" Cc: Oscar Salvador Cc: Wei Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 6 +++++- mm/debug.c | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 326250996b4e..c8077e936691 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -19,6 +19,11 @@ struct migration_target_control; */ #define MIGRATEPAGE_SUCCESS 0 +/* + * Keep sync with: + * - macro MIGRATE_REASON in include/trace/events/migrate.h + * - migrate_reason_names[MR_TYPES] in mm/debug.c + */ enum migrate_reason { MR_COMPACTION, MR_MEMORY_FAILURE, @@ -32,7 +37,6 @@ enum migrate_reason { MR_TYPES }; -/* In mm/debug.c; also keep sync with include/trace/events/migrate.h */ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION diff --git a/mm/debug.c b/mm/debug.c index e61037cded98..fae0f81ad831 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -26,6 +26,7 @@ const char *migrate_reason_names[MR_TYPES] = { "numa_misplaced", "contig_range", "longterm_pin", + "demotion", }; const struct trace_print_flags pageflag_names[] = { From e8e9f1e6327005be9656aa135aeb9dfdaf6b3032 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 24 Sep 2021 15:43:57 -0700 Subject: [PATCH 538/543] sh: pgtable-3level: fix cast to pointer from integer of different size If X2TLB=y (CPU_SHX2=y or CPU_SHX3=y, e.g. migor_defconfig), pgd_t.pgd is "unsigned long long", causing: In file included from arch/sh/include/asm/pgtable.h:13, from include/linux/pgtable.h:6, from include/linux/mm.h:33, from arch/sh/kernel/asm-offsets.c:14: arch/sh/include/asm/pgtable-3level.h: In function `pud_pgtable': arch/sh/include/asm/pgtable-3level.h:37:9: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] 37 | return (pmd_t *)pud_val(pud); | ^ Fix this by adding an intermediate cast to "unsigned long", which is basically what the old code did before. Link: https://lkml.kernel.org/r/2c2eef3c9a2f57e5609100a4864715ccf253d30f.1631713483.git.geert+renesas@glider.be Fixes: 9cf6fa2458443118 ("mm: rename pud_page_vaddr to pud_pgtable and make it return pmd_t *") Signed-off-by: Geert Uytterhoeven Tested-by: Daniel Palmer Acked-by: Rob Landley Cc: Yoshinori Sato Cc: Rich Felker Cc: "Aneesh Kumar K . V" Cc: Jacopo Mondi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/asm/pgtable-3level.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index 56bf35c2f29c..cdced80a7ffa 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -34,7 +34,7 @@ typedef struct { unsigned long long pmd; } pmd_t; static inline pmd_t *pud_pgtable(pud_t pud) { - return (pmd_t *)pud_val(pud); + return (pmd_t *)(unsigned long)pud_val(pud); } /* only used by the stubbed out hugetlb gup code, should never be called */ From 19532869feb9b0a97d17ddc14609d1e53a5b60db Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 24 Sep 2021 15:44:00 -0700 Subject: [PATCH 539/543] kasan: always respect CONFIG_KASAN_STACK Currently, the asan-stack parameter is only passed along if CFLAGS_KASAN_SHADOW is not empty, which requires KASAN_SHADOW_OFFSET to be defined in Kconfig so that the value can be checked. In RISC-V's case, KASAN_SHADOW_OFFSET is not defined in Kconfig, which means that asan-stack does not get disabled with clang even when CONFIG_KASAN_STACK is disabled, resulting in large stack warnings with allmodconfig: drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c:117:12: error: stack frame size (14400) exceeds limit (2048) in function 'lb035q02_connect' [-Werror,-Wframe-larger-than] static int lb035q02_connect(struct omap_dss_device *dssdev) ^ 1 error generated. Ensure that the value of CONFIG_KASAN_STACK is always passed along to the compiler so that these warnings do not happen when CONFIG_KASAN_STACK is disabled. Link: https://github.com/ClangBuiltLinux/linux/issues/1453 References: 6baec880d7a5 ("kasan: turn off asan-stack for clang-8 and earlier") Link: https://lkml.kernel.org/r/20210922205525.570068-1-nathan@kernel.org Signed-off-by: Nathan Chancellor Reviewed-by: Marco Elver Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Nick Desaulniers Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/Makefile.kasan | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 801c415bac59..b9e94c5e7097 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -33,10 +33,11 @@ else CFLAGS_KASAN := $(CFLAGS_KASAN_SHADOW) \ $(call cc-param,asan-globals=1) \ $(call cc-param,asan-instrumentation-with-call-threshold=$(call_threshold)) \ - $(call cc-param,asan-stack=$(stack_enable)) \ $(call cc-param,asan-instrument-allocas=1) endif +CFLAGS_KASAN += $(call cc-param,asan-stack=$(stack_enable)) + endif # CONFIG_KASAN_GENERIC ifdef CONFIG_KASAN_SW_TAGS From 5c91c0e77b8f2681e2b269c8abb4c5acef434d5b Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Fri, 24 Sep 2021 15:44:03 -0700 Subject: [PATCH 540/543] mm/memory_failure: fix the missing pte_unmap() call The paired pte_unmap() call is missing before the dev_pagemap_mapping_shift() returns. So fix it. David says: "I guess this code never runs on 32bit / highmem, that's why we didn't notice so far". [akpm@linux-foundation.org: cleanup] Link: https://lkml.kernel.org/r/20210923122642.4999-1-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: David Hildenbrand Cc: Naoya Horiguchi Cc: Muchun Song Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 41901c7bb58f..3e6449f2102a 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -306,6 +306,7 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page, struct vm_area_struct *vma) { unsigned long address = vma_address(page, vma); + unsigned long ret = 0; pgd_t *pgd; p4d_t *p4d; pud_t *pud; @@ -329,11 +330,10 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page, if (pmd_devmap(*pmd)) return PMD_SHIFT; pte = pte_offset_map(pmd, address); - if (!pte_present(*pte)) - return 0; - if (pte_devmap(*pte)) - return PAGE_SHIFT; - return 0; + if (pte_present(*pte) && pte_devmap(*pte)) + ret = PAGE_SHIFT; + pte_unmap(pte); + return ret; } /* From bcbda81020c3ee77e2c098cadf3e84f99ca3de17 Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Fri, 24 Sep 2021 15:44:06 -0700 Subject: [PATCH 541/543] mm: fix uninitialized use in overcommit_policy_handler We get an unexpected value of /proc/sys/vm/overcommit_memory after running the following program: int main() { int fd = open("/proc/sys/vm/overcommit_memory", O_RDWR); write(fd, "1", 1); write(fd, "2", 1); close(fd); } write(fd, "2", 1) will pass *ppos = 1 to proc_dointvec_minmax. proc_dointvec_minmax will return 0 without setting new_policy. t.data = &new_policy; ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos) -->do_proc_dointvec -->__do_proc_dointvec if (write) { if (proc_first_pos_non_zero_ignore(ppos, table)) goto out; sysctl_overcommit_memory = new_policy; so sysctl_overcommit_memory will be set to an uninitialized value. Check whether new_policy has been changed by proc_dointvec_minmax. Link: https://lkml.kernel.org/r/20210923020524.13289-1-chenjun102@huawei.com Fixes: 56f3547bfa4d ("mm: adjust vm_committed_as_batch according to vm overcommit policy") Signed-off-by: Chen Jun Acked-by: Michal Hocko Reviewed-by: Feng Tang Reviewed-by: Kefeng Wang Cc: Rui Xiang Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/util.c b/mm/util.c index 499b6b5767ed..bacabe446906 100644 --- a/mm/util.c +++ b/mm/util.c @@ -787,7 +787,7 @@ int overcommit_policy_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; - int new_policy; + int new_policy = -1; int ret; /* @@ -805,7 +805,7 @@ int overcommit_policy_handler(struct ctl_table *table, int write, void *buffer, t = *table; t.data = &new_policy; ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); - if (ret) + if (ret || new_policy == -1) return ret; mm_compute_batch(new_policy); From 265fd1991c1db85fbabaad4946ca0e63e2ae688d Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Sat, 25 Sep 2021 00:06:16 +0900 Subject: [PATCH 542/543] ksmbd: use LOOKUP_BENEATH to prevent the out of share access instead of removing '..' in a given path, call kern_path with LOOKUP_BENEATH flag to prevent the out of share access. ran various test on this: smb2-cat-async smb://127.0.0.1/homes/../out_of_share smb2-cat-async smb://127.0.0.1/homes/foo/../../out_of_share smbclient //127.0.0.1/homes -c "mkdir ../foo2" smbclient //127.0.0.1/homes -c "rename bar ../bar" Cc: Ronnie Sahlberg Cc: Ralph Boehme Tested-by: Steve French Tested-by: Namjae Jeon Acked-by: Namjae Jeon Signed-off-by: Hyunchul Lee Signed-off-by: Steve French --- fs/ksmbd/misc.c | 100 ++++++--------------------- fs/ksmbd/misc.h | 7 +- fs/ksmbd/smb2pdu.c | 74 ++++++++------------ fs/ksmbd/vfs.c | 166 ++++++++++++++++++++++++--------------------- fs/ksmbd/vfs.h | 9 ++- 5 files changed, 145 insertions(+), 211 deletions(-) diff --git a/fs/ksmbd/misc.c b/fs/ksmbd/misc.c index 3eac3c01749f..6a19f4bc692d 100644 --- a/fs/ksmbd/misc.c +++ b/fs/ksmbd/misc.c @@ -158,25 +158,21 @@ out: * Return : windows path string or error */ -char *convert_to_nt_pathname(char *filename, char *sharepath) +char *convert_to_nt_pathname(char *filename) { char *ab_pathname; - int len, name_len; - name_len = strlen(filename); - ab_pathname = kmalloc(name_len, GFP_KERNEL); - if (!ab_pathname) - return NULL; + if (strlen(filename) == 0) { + ab_pathname = kmalloc(2, GFP_KERNEL); + ab_pathname[0] = '\\'; + ab_pathname[1] = '\0'; + } else { + ab_pathname = kstrdup(filename, GFP_KERNEL); + if (!ab_pathname) + return NULL; - ab_pathname[0] = '\\'; - ab_pathname[1] = '\0'; - - len = strlen(sharepath); - if (!strncmp(filename, sharepath, len) && name_len != len) { - strscpy(ab_pathname, &filename[len], name_len); ksmbd_conv_path_to_windows(ab_pathname); } - return ab_pathname; } @@ -191,77 +187,19 @@ int get_nlink(struct kstat *st) return nlink; } -char *ksmbd_conv_path_to_unix(char *path) +void ksmbd_conv_path_to_unix(char *path) { - size_t path_len, remain_path_len, out_path_len; - char *out_path, *out_next; - int i, pre_dotdot_cnt = 0, slash_cnt = 0; - bool is_last; - strreplace(path, '\\', '/'); - path_len = strlen(path); - remain_path_len = path_len; - if (path_len == 0) - return ERR_PTR(-EINVAL); +} - out_path = kzalloc(path_len + 2, GFP_KERNEL); - if (!out_path) - return ERR_PTR(-ENOMEM); - out_path_len = 0; - out_next = out_path; +void ksmbd_strip_last_slash(char *path) +{ + int len = strlen(path); - do { - char *name = path + path_len - remain_path_len; - char *next = strchrnul(name, '/'); - size_t name_len = next - name; - - is_last = !next[0]; - if (name_len == 2 && name[0] == '.' && name[1] == '.') { - pre_dotdot_cnt++; - /* handle the case that path ends with "/.." */ - if (is_last) - goto follow_dotdot; - } else { - if (pre_dotdot_cnt) { -follow_dotdot: - slash_cnt = 0; - for (i = out_path_len - 1; i >= 0; i--) { - if (out_path[i] == '/' && - ++slash_cnt == pre_dotdot_cnt + 1) - break; - } - - if (i < 0 && - slash_cnt != pre_dotdot_cnt) { - kfree(out_path); - return ERR_PTR(-EINVAL); - } - - out_next = &out_path[i+1]; - *out_next = '\0'; - out_path_len = i + 1; - - } - - if (name_len != 0 && - !(name_len == 1 && name[0] == '.') && - !(name_len == 2 && name[0] == '.' && name[1] == '.')) { - next[0] = '\0'; - sprintf(out_next, "%s/", name); - out_next += name_len + 1; - out_path_len += name_len + 1; - next[0] = '/'; - } - pre_dotdot_cnt = 0; - } - - remain_path_len -= name_len + 1; - } while (!is_last); - - if (out_path_len > 0) - out_path[out_path_len-1] = '\0'; - path[path_len] = '\0'; - return out_path; + while (len && path[len - 1] == '/') { + path[len - 1] = '\0'; + len--; + } } void ksmbd_conv_path_to_windows(char *path) @@ -298,7 +236,7 @@ char *ksmbd_extract_sharename(char *treename) * * Return: converted name on success, otherwise NULL */ -char *convert_to_unix_name(struct ksmbd_share_config *share, char *name) +char *convert_to_unix_name(struct ksmbd_share_config *share, const char *name) { int no_slash = 0, name_len, path_len; char *new_name; diff --git a/fs/ksmbd/misc.h b/fs/ksmbd/misc.h index b7b10139ada2..253366bd0951 100644 --- a/fs/ksmbd/misc.h +++ b/fs/ksmbd/misc.h @@ -14,12 +14,13 @@ struct ksmbd_file; int match_pattern(const char *str, size_t len, const char *pattern); int ksmbd_validate_filename(char *filename); int parse_stream_name(char *filename, char **stream_name, int *s_type); -char *convert_to_nt_pathname(char *filename, char *sharepath); +char *convert_to_nt_pathname(char *filename); int get_nlink(struct kstat *st); -char *ksmbd_conv_path_to_unix(char *path); +void ksmbd_conv_path_to_unix(char *path); +void ksmbd_strip_last_slash(char *path); void ksmbd_conv_path_to_windows(char *path); char *ksmbd_extract_sharename(char *treename); -char *convert_to_unix_name(struct ksmbd_share_config *share, char *name); +char *convert_to_unix_name(struct ksmbd_share_config *share, const char *name); #define KSMBD_DIR_INFO_ALIGNMENT 8 struct ksmbd_dir_info; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 0c49a0e887d3..761e12171dc4 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -634,7 +634,7 @@ static char * smb2_get_name(struct ksmbd_share_config *share, const char *src, const int maxlen, struct nls_table *local_nls) { - char *name, *norm_name, *unixname; + char *name; name = smb_strndup_from_utf16(src, maxlen, 1, local_nls); if (IS_ERR(name)) { @@ -642,23 +642,9 @@ smb2_get_name(struct ksmbd_share_config *share, const char *src, return name; } - /* change it to absolute unix name */ - norm_name = ksmbd_conv_path_to_unix(name); - if (IS_ERR(norm_name)) { - kfree(name); - return norm_name; - } - kfree(name); - - unixname = convert_to_unix_name(share, norm_name); - kfree(norm_name); - if (!unixname) { - pr_err("can not convert absolute name\n"); - return ERR_PTR(-ENOMEM); - } - - ksmbd_debug(SMB, "absolute name = %s\n", unixname); - return unixname; + ksmbd_conv_path_to_unix(name); + ksmbd_strip_last_slash(name); + return name; } int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg) @@ -2352,7 +2338,7 @@ static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name, return rc; } - rc = ksmbd_vfs_kern_path(name, 0, path, 0); + rc = ksmbd_vfs_kern_path(work, name, 0, path, 0); if (rc) { pr_err("cannot get linux path (%s), err = %d\n", name, rc); @@ -2427,7 +2413,7 @@ int smb2_open(struct ksmbd_work *work) struct oplock_info *opinfo; __le32 *next_ptr = NULL; int req_op_level = 0, open_flags = 0, may_flags = 0, file_info = 0; - int rc = 0, len = 0; + int rc = 0; int contxt_cnt = 0, query_disk_id = 0; int maximal_access_ctxt = 0, posix_ctxt = 0; int s_type = 0; @@ -2499,17 +2485,11 @@ int smb2_open(struct ksmbd_work *work) goto err_out1; } } else { - len = strlen(share->path); - ksmbd_debug(SMB, "share path len %d\n", len); - name = kmalloc(len + 1, GFP_KERNEL); + name = kstrdup("", GFP_KERNEL); if (!name) { - rsp->hdr.Status = STATUS_NO_MEMORY; rc = -ENOMEM; goto err_out1; } - - memcpy(name, share->path, len); - *(name + len) = '\0'; } req_op_level = req->RequestedOplockLevel; @@ -2632,7 +2612,7 @@ int smb2_open(struct ksmbd_work *work) goto err_out1; } - rc = ksmbd_vfs_kern_path(name, LOOKUP_NO_SYMLINKS, &path, 1); + rc = ksmbd_vfs_kern_path(work, name, LOOKUP_NO_SYMLINKS, &path, 1); if (!rc) { if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE) { /* @@ -2661,11 +2641,8 @@ int smb2_open(struct ksmbd_work *work) } if (rc) { - if (rc == -EACCES) { - ksmbd_debug(SMB, - "User does not have right permission\n"); + if (rc != -ENOENT) goto err_out; - } ksmbd_debug(SMB, "can not get linux path for %s, rc = %d\n", name, rc); rc = 0; @@ -3161,7 +3138,7 @@ err_out1: rsp->hdr.Status = STATUS_INVALID_PARAMETER; else if (rc == -EOPNOTSUPP) rsp->hdr.Status = STATUS_NOT_SUPPORTED; - else if (rc == -EACCES || rc == -ESTALE) + else if (rc == -EACCES || rc == -ESTALE || rc == -EXDEV) rsp->hdr.Status = STATUS_ACCESS_DENIED; else if (rc == -ENOENT) rsp->hdr.Status = STATUS_OBJECT_NAME_INVALID; @@ -4277,8 +4254,7 @@ static int get_file_all_info(struct ksmbd_work *work, return -EACCES; } - filename = convert_to_nt_pathname(fp->filename, - work->tcon->share_conf->path); + filename = convert_to_nt_pathname(fp->filename); if (!filename) return -ENOMEM; @@ -4733,7 +4709,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, int rc = 0, len; int fs_infoclass_size = 0; - rc = ksmbd_vfs_kern_path(share->path, LOOKUP_NO_SYMLINKS, &path, 0); + rc = kern_path(share->path, LOOKUP_NO_SYMLINKS, &path); if (rc) { pr_err("cannot create vfs path\n"); return -EIO; @@ -5282,7 +5258,7 @@ static int smb2_rename(struct ksmbd_work *work, goto out; len = strlen(new_name); - if (new_name[len - 1] != '/') { + if (len > 0 && new_name[len - 1] != '/') { pr_err("not allow base filename in rename\n"); rc = -ESHARE; goto out; @@ -5310,11 +5286,14 @@ static int smb2_rename(struct ksmbd_work *work, } ksmbd_debug(SMB, "new name %s\n", new_name); - rc = ksmbd_vfs_kern_path(new_name, LOOKUP_NO_SYMLINKS, &path, 1); - if (rc) + rc = ksmbd_vfs_kern_path(work, new_name, LOOKUP_NO_SYMLINKS, &path, 1); + if (rc) { + if (rc != -ENOENT) + goto out; file_present = false; - else + } else { path_put(&path); + } if (ksmbd_share_veto_filename(share, new_name)) { rc = -ENOENT; @@ -5384,11 +5363,14 @@ static int smb2_create_link(struct ksmbd_work *work, } ksmbd_debug(SMB, "target name is %s\n", target_name); - rc = ksmbd_vfs_kern_path(link_name, LOOKUP_NO_SYMLINKS, &path, 0); - if (rc) + rc = ksmbd_vfs_kern_path(work, link_name, LOOKUP_NO_SYMLINKS, &path, 0); + if (rc) { + if (rc != -ENOENT) + goto out; file_present = false; - else + } else { path_put(&path); + } if (file_info->ReplaceIfExists) { if (file_present) { @@ -5548,7 +5530,7 @@ static int set_file_allocation_info(struct ksmbd_work *work, * inode size is retained by backup inode size. */ size = i_size_read(inode); - rc = ksmbd_vfs_truncate(work, NULL, fp, alloc_blks * 512); + rc = ksmbd_vfs_truncate(work, fp, alloc_blks * 512); if (rc) { pr_err("truncate failed! filename : %s, err %d\n", fp->filename, rc); @@ -5585,7 +5567,7 @@ static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp, if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC) { ksmbd_debug(SMB, "filename : %s truncated to newsize %lld\n", fp->filename, newsize); - rc = ksmbd_vfs_truncate(work, NULL, fp, newsize); + rc = ksmbd_vfs_truncate(work, fp, newsize); if (rc) { ksmbd_debug(SMB, "truncate failed! filename : %s err %d\n", fp->filename, rc); @@ -5862,7 +5844,7 @@ int smb2_set_info(struct ksmbd_work *work) return 0; err_out: - if (rc == -EACCES || rc == -EPERM) + if (rc == -EACCES || rc == -EPERM || rc == -EXDEV) rsp->hdr.Status = STATUS_ACCESS_DENIED; else if (rc == -EINVAL) rsp->hdr.Status = STATUS_INVALID_PARAMETER; diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 3733e4944c1d..b41954294d38 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -19,6 +19,8 @@ #include #include +#include "../internal.h" /* for vfs_path_lookup */ + #include "glob.h" #include "oplock.h" #include "connection.h" @@ -44,7 +46,6 @@ static char *extract_last_component(char *path) p++; } else { p = NULL; - pr_err("Invalid path %s\n", path); } return p; } @@ -155,7 +156,7 @@ int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns, /** * ksmbd_vfs_create() - vfs helper for smb create file * @work: work - * @name: file name + * @name: file name that is relative to share * @mode: file create mode * * Return: 0 on success, otherwise error @@ -166,7 +167,8 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) struct dentry *dentry; int err; - dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_NO_SYMLINKS); + dentry = ksmbd_vfs_kern_path_create(work, name, + LOOKUP_NO_SYMLINKS, &path); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); if (err != -ENOENT) @@ -191,7 +193,7 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) /** * ksmbd_vfs_mkdir() - vfs helper for smb create directory * @work: work - * @name: directory name + * @name: directory name that is relative to share * @mode: directory create mode * * Return: 0 on success, otherwise error @@ -203,8 +205,9 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) struct dentry *dentry; int err; - dentry = kern_path_create(AT_FDCWD, name, &path, - LOOKUP_NO_SYMLINKS | LOOKUP_DIRECTORY); + dentry = ksmbd_vfs_kern_path_create(work, name, + LOOKUP_NO_SYMLINKS | LOOKUP_DIRECTORY, + &path); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); if (err != -EEXIST) @@ -579,7 +582,7 @@ int ksmbd_vfs_fsync(struct ksmbd_work *work, u64 fid, u64 p_id) /** * ksmbd_vfs_remove_file() - vfs helper for smb rmdir or unlink - * @name: absolute directory or file name + * @name: directory or file name that is relative to share * * Return: 0 on success, otherwise error */ @@ -593,7 +596,7 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name) if (ksmbd_override_fsids(work)) return -ENOMEM; - err = kern_path(name, LOOKUP_NO_SYMLINKS, &path); + err = ksmbd_vfs_kern_path(work, name, LOOKUP_NO_SYMLINKS, &path, false); if (err) { ksmbd_debug(VFS, "can't get %s, err %d\n", name, err); ksmbd_revert_fsids(work); @@ -638,7 +641,7 @@ out_err: /** * ksmbd_vfs_link() - vfs helper for creating smb hardlink * @oldname: source file name - * @newname: hardlink name + * @newname: hardlink name that is relative to share * * Return: 0 on success, otherwise error */ @@ -659,8 +662,9 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname, goto out1; } - dentry = kern_path_create(AT_FDCWD, newname, &newpath, - LOOKUP_NO_SYMLINKS | LOOKUP_REVAL); + dentry = ksmbd_vfs_kern_path_create(work, newname, + LOOKUP_NO_SYMLINKS | LOOKUP_REVAL, + &newpath); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); pr_err("path create err for %s, err %d\n", newname, err); @@ -781,14 +785,17 @@ int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp, int err; dst_name = extract_last_component(newname); - if (!dst_name) - return -EINVAL; + if (!dst_name) { + dst_name = newname; + newname = ""; + } src_dent_parent = dget_parent(fp->filp->f_path.dentry); src_dent = fp->filp->f_path.dentry; - err = kern_path(newname, LOOKUP_NO_SYMLINKS | LOOKUP_DIRECTORY, - &dst_path); + err = ksmbd_vfs_kern_path(work, newname, + LOOKUP_NO_SYMLINKS | LOOKUP_DIRECTORY, + &dst_path, false); if (err) { ksmbd_debug(VFS, "Cannot get path for %s [%d]\n", newname, err); goto out; @@ -834,61 +841,43 @@ out: /** * ksmbd_vfs_truncate() - vfs helper for smb file truncate * @work: work - * @name: old filename * @fid: file id of old file * @size: truncate to given size * * Return: 0 on success, otherwise error */ -int ksmbd_vfs_truncate(struct ksmbd_work *work, const char *name, +int ksmbd_vfs_truncate(struct ksmbd_work *work, struct ksmbd_file *fp, loff_t size) { - struct path path; int err = 0; + struct file *filp; + + filp = fp->filp; + + /* Do we need to break any of a levelII oplock? */ + smb_break_all_levII_oplock(work, fp, 1); + + if (!work->tcon->posix_extensions) { + struct inode *inode = file_inode(filp); + + if (size < inode->i_size) { + err = check_lock_range(filp, size, + inode->i_size - 1, WRITE); + } else { + err = check_lock_range(filp, inode->i_size, + size - 1, WRITE); + } - if (name) { - err = kern_path(name, LOOKUP_NO_SYMLINKS, &path); if (err) { - pr_err("cannot get linux path for %s, err %d\n", - name, err); - return err; + pr_err("failed due to lock\n"); + return -EAGAIN; } - err = vfs_truncate(&path, size); - if (err) - pr_err("truncate failed for %s err %d\n", - name, err); - path_put(&path); - } else { - struct file *filp; - - filp = fp->filp; - - /* Do we need to break any of a levelII oplock? */ - smb_break_all_levII_oplock(work, fp, 1); - - if (!work->tcon->posix_extensions) { - struct inode *inode = file_inode(filp); - - if (size < inode->i_size) { - err = check_lock_range(filp, size, - inode->i_size - 1, WRITE); - } else { - err = check_lock_range(filp, inode->i_size, - size - 1, WRITE); - } - - if (err) { - pr_err("failed due to lock\n"); - return -EAGAIN; - } - } - - err = vfs_truncate(&filp->f_path, size); - if (err) - pr_err("truncate failed for filename : %s err %d\n", - fp->filename, err); } + err = vfs_truncate(&filp->f_path, size); + if (err) + pr_err("truncate failed for filename : %s err %d\n", + fp->filename, err); return err; } @@ -1206,22 +1195,25 @@ static int ksmbd_vfs_lookup_in_dir(struct path *dir, char *name, size_t namelen) /** * ksmbd_vfs_kern_path() - lookup a file and get path info - * @name: name of file for lookup + * @name: file path that is relative to share * @flags: lookup flags * @path: if lookup succeed, return path info * @caseless: caseless filename lookup * * Return: 0 on success, otherwise error */ -int ksmbd_vfs_kern_path(char *name, unsigned int flags, struct path *path, - bool caseless) +int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *name, + unsigned int flags, struct path *path, bool caseless) { + struct ksmbd_share_config *share_conf = work->tcon->share_conf; int err; - if (name[0] != '/') - return -EINVAL; - - err = kern_path(name, flags, path); + flags |= LOOKUP_BENEATH; + err = vfs_path_lookup(share_conf->vfs_path.dentry, + share_conf->vfs_path.mnt, + name, + flags, + path); if (!err) return 0; @@ -1235,11 +1227,10 @@ int ksmbd_vfs_kern_path(char *name, unsigned int flags, struct path *path, return -ENOMEM; path_len = strlen(filepath); - remain_len = path_len - 1; + remain_len = path_len; - err = kern_path("/", flags, &parent); - if (err) - goto out; + parent = share_conf->vfs_path; + path_get(&parent); while (d_can_lookup(parent.dentry)) { char *filename = filepath + path_len - remain_len; @@ -1252,21 +1243,21 @@ int ksmbd_vfs_kern_path(char *name, unsigned int flags, struct path *path, err = ksmbd_vfs_lookup_in_dir(&parent, filename, filename_len); - if (err) { - path_put(&parent); - goto out; - } - path_put(&parent); - next[0] = '\0'; - - err = kern_path(filepath, flags, &parent); if (err) goto out; - if (is_last) { - path->mnt = parent.mnt; - path->dentry = parent.dentry; + next[0] = '\0'; + + err = vfs_path_lookup(share_conf->vfs_path.dentry, + share_conf->vfs_path.mnt, + filepath, + flags, + &parent); + if (err) + goto out; + else if (is_last) { + *path = parent; goto out; } @@ -1282,6 +1273,23 @@ out: return err; } +struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, + const char *name, + unsigned int flags, + struct path *path) +{ + char *abs_name; + struct dentry *dent; + + abs_name = convert_to_unix_name(work->tcon->share_conf, name); + if (!abs_name) + return ERR_PTR(-ENOMEM); + + dent = kern_path_create(AT_FDCWD, abs_name, path, flags); + kfree(abs_name); + return dent; +} + int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns, struct dentry *dentry) { diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h index 85db50abdb24..7b1dcaa3fbdc 100644 --- a/fs/ksmbd/vfs.h +++ b/fs/ksmbd/vfs.h @@ -126,7 +126,7 @@ int ksmbd_vfs_link(struct ksmbd_work *work, int ksmbd_vfs_getattr(struct path *path, struct kstat *stat); int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp, char *newname); -int ksmbd_vfs_truncate(struct ksmbd_work *work, const char *name, +int ksmbd_vfs_truncate(struct ksmbd_work *work, struct ksmbd_file *fp, loff_t size); struct srv_copychunk; int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, @@ -152,8 +152,13 @@ int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, size_t *xattr_stream_name_size, int s_type); int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns, struct dentry *dentry, char *attr_name); -int ksmbd_vfs_kern_path(char *name, unsigned int flags, struct path *path, +int ksmbd_vfs_kern_path(struct ksmbd_work *work, + char *name, unsigned int flags, struct path *path, bool caseless); +struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, + const char *name, + unsigned int flags, + struct path *path); int ksmbd_vfs_empty_dir(struct ksmbd_file *fp); void ksmbd_vfs_set_fadvise(struct file *filp, __le32 option); int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp, From 5816b3e6577eaa676ceb00a848f0fd65fe2adc29 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Sep 2021 14:08:19 -0700 Subject: [PATCH 543/543] Linux 5.15-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5e7c1d854441..437ccc66a1c2 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Opossums on Parade # *DOCUMENTATION*