From 209c805835b29495cf66cc705b206da8f4a68e6e Mon Sep 17 00:00:00 2001 From: Al Cooper Date: Fri, 2 Oct 2020 15:01:15 -0400 Subject: [PATCH 001/167] phy: usb: Fix incorrect clearing of tca_drv_sel bit in SETUP reg for 7211 The 7211a0 has a tca_drv_sel bit in the USB SETUP register that should never be enabled. This feature is only used if there is a USB Type-C PHY, and the 7211 does not have one. If the bit is enabled, the VBUS signal will never be asserted. In the 7211a0, the bit was incorrectly defaulted to on so the driver had to clear the bit. In the 7211c0 the state was inverted so the driver should no longer clear the bit. This hasn't been a problem because all current 7211 boards don't use the VBUS signal, but there are some future customer boards that may use it. Signed-off-by: Al Cooper Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20201002190115.48017-1-alcooperx@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c index 456dc4a100c2..e63457e145c7 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c @@ -270,11 +270,6 @@ static void usb_init_common_7211b0(struct brcm_usb_init_params *params) reg |= params->mode << USB_PHY_UTMI_CTL_1_PHY_MODE_SHIFT; brcm_usb_writel(reg, usb_phy + USB_PHY_UTMI_CTL_1); - /* Fix the incorrect default */ - reg = brcm_usb_readl(ctrl + USB_CTRL_SETUP); - reg &= ~USB_CTRL_SETUP_tca_drv_sel_MASK; - brcm_usb_writel(reg, ctrl + USB_CTRL_SETUP); - usb_init_common(params); /* From eb9c4dd9bdfdebaa13846c16a8c79b5b336066b6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2020 10:58:20 +0100 Subject: [PATCH 002/167] phy: tegra: xusb: Fix dangling pointer on probe failure If, for some reason, the xusb PHY fails to probe, it leaves a dangling pointer attached to the platform device structure. This would normally be harmless, but the Tegra XHCI driver then goes and extract that pointer from the PHY device. Things go downhill from there: 8.752082] [004d554e5145533c] address between user and kernel address ranges [ 8.752085] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 8.752088] Modules linked in: max77620_regulator(E+) xhci_tegra(E+) sdhci_tegra(E+) xhci_hcd(E) sdhci_pltfm(E) cqhci(E) fixed(E) usbcore(E) scsi_mod(E) sdhci(E) host1x(E+) [ 8.752103] CPU: 4 PID: 158 Comm: systemd-udevd Tainted: G S W E 5.9.0-rc7-00298-gf6337624c4fe #1980 [ 8.752105] Hardware name: NVIDIA Jetson TX2 Developer Kit (DT) [ 8.752108] pstate: 20000005 (nzCv daif -PAN -UAO BTYPE=--) [ 8.752115] pc : kobject_put+0x1c/0x21c [ 8.752120] lr : put_device+0x20/0x30 [ 8.752121] sp : ffffffc012eb3840 [ 8.752122] x29: ffffffc012eb3840 x28: ffffffc010e82638 [ 8.752125] x27: ffffffc008d56440 x26: 0000000000000000 [ 8.752128] x25: ffffff81eb508200 x24: 0000000000000000 [ 8.752130] x23: ffffff81eb538800 x22: 0000000000000000 [ 8.752132] x21: 00000000fffffdfb x20: ffffff81eb538810 [ 8.752134] x19: 3d4d554e51455300 x18: 0000000000000020 [ 8.752136] x17: ffffffc008d00270 x16: ffffffc008d00c94 [ 8.752138] x15: 0000000000000004 x14: ffffff81ebd4ae90 [ 8.752140] x13: 0000000000000000 x12: ffffff81eb86a4e8 [ 8.752142] x11: ffffff81eb86a480 x10: ffffff81eb862fea [ 8.752144] x9 : ffffffc01055fb28 x8 : ffffff81eb86a4a8 [ 8.752146] x7 : 0000000000000001 x6 : 0000000000000001 [ 8.752148] x5 : ffffff81dff8bc38 x4 : 0000000000000000 [ 8.752150] x3 : 0000000000000001 x2 : 0000000000000001 [ 8.752152] x1 : 0000000000000002 x0 : 3d4d554e51455300 [ 8.752155] Call trace: [ 8.752157] kobject_put+0x1c/0x21c [ 8.752160] put_device+0x20/0x30 [ 8.752164] tegra_xusb_padctl_put+0x24/0x3c [ 8.752170] tegra_xusb_probe+0x8b0/0xd10 [xhci_tegra] [ 8.752174] platform_drv_probe+0x60/0xb4 [ 8.752176] really_probe+0xf0/0x504 [ 8.752179] driver_probe_device+0x100/0x170 [ 8.752181] device_driver_attach+0xcc/0xd4 [ 8.752183] __driver_attach+0xb0/0x17c [ 8.752185] bus_for_each_dev+0x7c/0xd4 [ 8.752187] driver_attach+0x30/0x3c [ 8.752189] bus_add_driver+0x154/0x250 [ 8.752191] driver_register+0x84/0x140 [ 8.752193] __platform_driver_register+0x54/0x60 [ 8.752197] tegra_xusb_init+0x40/0x1000 [xhci_tegra] [ 8.752201] do_one_initcall+0x54/0x2d0 [ 8.752205] do_init_module+0x68/0x29c [ 8.752207] load_module+0x2178/0x26c0 [ 8.752209] __do_sys_finit_module+0xb0/0x120 [ 8.752211] __arm64_sys_finit_module+0x2c/0x40 [ 8.752215] el0_svc_common.constprop.0+0x80/0x240 [ 8.752218] do_el0_svc+0x30/0xa0 [ 8.752220] el0_svc+0x18/0x50 [ 8.752223] el0_sync_handler+0x90/0x318 [ 8.752225] el0_sync+0x158/0x180 [ 8.752230] Code: a9bd7bfd 910003fd a90153f3 aa0003f3 (3940f000) [ 8.752232] ---[ end trace 90f6c89d62d85ff5 ]--- Reset the pointer on probe failure fixes the issue. Fixes: 53d2a715c2403 ("phy: Add Tegra XUSB pad controller support") Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201013095820.311376-1-maz@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/tegra/xusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index de4a46fe1763..ad88d74c1884 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -1242,6 +1242,7 @@ power_down: reset: reset_control_assert(padctl->rst); remove: + platform_set_drvdata(pdev, NULL); soc->ops->remove(padctl); return err; } From fcea94ac6154545dd13b17c947c07f5e0a54c121 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 26 Oct 2020 13:59:42 -0700 Subject: [PATCH 003/167] phy: qcom-qmp: Initialize another pointer to NULL This probe function is too complicated and should be refactored. For now let's just set this variable to NULL and keep the static analysis tools happy. Fixes: 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo phy") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20201026205942.2861828-1-swboyd@chromium.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c index 5d33ad4d06f2..0cda16846962 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp.c @@ -3926,7 +3926,7 @@ static int qcom_qmp_phy_probe(struct platform_device *pdev) struct phy_provider *phy_provider; void __iomem *serdes; void __iomem *usb_serdes; - void __iomem *dp_serdes; + void __iomem *dp_serdes = NULL; const struct qmp_phy_combo_cfg *combo_cfg = NULL; const struct qmp_phy_cfg *cfg = NULL; const struct qmp_phy_cfg *usb_cfg = NULL; From 25d76fed7ffecca47be0249a5d5ec0a5dd92af67 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Wed, 28 Oct 2020 16:59:09 +0800 Subject: [PATCH 004/167] phy: cpcap-usb: Use IRQF_ONESHOT Fixes coccicheck error: ./drivers/phy/motorola/phy-cpcap-usb.c:365:9-34: ERROR: Threaded IRQ with no primary handler requested without IRQF_ONESHOT Reported-by: Hulk Robot Signed-off-by: Zou Wei Link: https://lore.kernel.org/r/1603875549-107500-1-git-send-email-zou_wei@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/motorola/phy-cpcap-usb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c index 089db0dea703..442522ba487f 100644 --- a/drivers/phy/motorola/phy-cpcap-usb.c +++ b/drivers/phy/motorola/phy-cpcap-usb.c @@ -364,7 +364,8 @@ static int cpcap_usb_init_irq(struct platform_device *pdev, error = devm_request_threaded_irq(ddata->dev, irq, NULL, cpcap_phy_irq_thread, - IRQF_SHARED, + IRQF_SHARED | + IRQF_ONESHOT, name, ddata); if (error) { dev_err(ddata->dev, "could not get irq %s: %i\n", From d9b5665fb3c822730857ba9119ead8b5e5ff967d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 23 Oct 2020 13:57:32 +0200 Subject: [PATCH 005/167] kbuild: remove unused OBJSIZE The "size" tool has been solely used by s390 to enforce .bss section usage restrictions in early startup code. Since commit 980d5f9ab36b ("s390/boot: enable .bss section for compressed kernel") and commit 2e83e0eb85ca ("s390: clean .bss before running uncompressed kernel") these restrictions have been lifted for the decompressor and uncompressed kernel and the size tool is now unused. Signed-off-by: Vasily Gorbik Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- Documentation/kbuild/llvm.rst | 5 ++--- Makefile | 4 +--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst index cf3ca236d2cc..21c847890d03 100644 --- a/Documentation/kbuild/llvm.rst +++ b/Documentation/kbuild/llvm.rst @@ -57,9 +57,8 @@ to enable them. :: They can be enabled individually. The full list of the parameters: :: make CC=clang LD=ld.lld AR=llvm-ar NM=llvm-nm STRIP=llvm-strip \ - OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump OBJSIZE=llvm-size \ - READELF=llvm-readelf HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar \ - HOSTLD=ld.lld + OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump READELF=llvm-readelf \ + HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar HOSTLD=ld.lld Currently, the integrated assembler is disabled by default. You can pass ``LLVM_IAS=1`` to enable it. diff --git a/Makefile b/Makefile index 9e7fd6a065a7..f353886dbf44 100644 --- a/Makefile +++ b/Makefile @@ -433,7 +433,6 @@ NM = llvm-nm OBJCOPY = llvm-objcopy OBJDUMP = llvm-objdump READELF = llvm-readelf -OBJSIZE = llvm-size STRIP = llvm-strip else CC = $(CROSS_COMPILE)gcc @@ -443,7 +442,6 @@ NM = $(CROSS_COMPILE)nm OBJCOPY = $(CROSS_COMPILE)objcopy OBJDUMP = $(CROSS_COMPILE)objdump READELF = $(CROSS_COMPILE)readelf -OBJSIZE = $(CROSS_COMPILE)size STRIP = $(CROSS_COMPILE)strip endif PAHOLE = pahole @@ -509,7 +507,7 @@ KBUILD_LDFLAGS := CLANG_FLAGS := export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC -export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL +export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE From d1889589a4f54b2d1d7075d608b596d6fcfd3d96 Mon Sep 17 00:00:00 2001 From: Sven Joachim Date: Mon, 26 Oct 2020 20:32:16 +0100 Subject: [PATCH 006/167] builddeb: Fix rootless build in setuid/setgid directory Building 5.10-rc1 in a setgid directory failed with the following error: dpkg-deb: error: control directory has bad permissions 2755 (must be >=0755 and <=0775) When building with fakeroot, the earlier chown call would have removed the setgid bits, but in a rootless build they remain. Fixes: 3e8541803624 ("builddeb: Enable rootless builds") Cc: Guillem Jover Signed-off-by: Sven Joachim Signed-off-by: Masahiro Yamada --- scripts/package/builddeb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index 1b11f8993629..91a502bb97e8 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -45,6 +45,8 @@ create_package() { chmod -R go-w "$pdir" # in case we are in a restrictive umask environment like 0077 chmod -R a+rX "$pdir" + # in case we build in a setuid/setgid directory + chmod -R ug-s "$pdir" # Create the package dpkg-gencontrol -p$pname -P"$pdir" From 78e91588510919a0dc9bd48916e760c1ba5797d2 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 3 Nov 2020 19:23:27 +0800 Subject: [PATCH 007/167] usb: cdns3: gadget: initialize link_trb as NULL There is an uninitialized variable "link_trb" usage at function cdns3_ep_run_transfer. Fixed it by initialize "link_trb" as NULL. Fixes: 4e218882eb5a ("usb: cdns3: gadget: improve the dump TRB operation at cdns3_ep_run_transfer") Reported-by: Dan Carpenter Signed-off-by: Peter Chen --- drivers/usb/cdns3/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index 66c1e6723eb1..1ee6d3941dd4 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -1114,7 +1114,7 @@ static int cdns3_ep_run_transfer(struct cdns3_endpoint *priv_ep, struct cdns3_device *priv_dev = priv_ep->cdns3_dev; struct cdns3_request *priv_req; struct cdns3_trb *trb; - struct cdns3_trb *link_trb; + struct cdns3_trb *link_trb = NULL; dma_addr_t trb_dma; u32 togle_pcs = 1; int sg_iter = 0; From 40252dd7cf7cad81c784c695c36bc475b518f0ea Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 3 Nov 2020 22:16:00 +0800 Subject: [PATCH 008/167] usb: cdns3: gadget: calculate TD_SIZE based on TD The TRB entry TD_SIZE is the packet number for the TD (request) but not the each TRB, so it only needs to be assigned for the first TRB during the TD, and the value of it is for TD too. Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Peter Chen --- drivers/usb/cdns3/gadget.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index 1ee6d3941dd4..365f30fb1159 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -1193,10 +1193,20 @@ static int cdns3_ep_run_transfer(struct cdns3_endpoint *priv_ep, /* set incorrect Cycle Bit for first trb*/ control = priv_ep->pcs ? 0 : TRB_CYCLE; + trb->length = 0; + if (priv_dev->dev_ver >= DEV_VER_V2) { + u16 td_size; + + td_size = DIV_ROUND_UP(request->length, + priv_ep->endpoint.maxpacket); + if (priv_dev->gadget.speed == USB_SPEED_SUPER) + trb->length = TRB_TDL_SS_SIZE(td_size); + else + control |= TRB_TDL_HS_SIZE(td_size); + } do { u32 length; - u16 td_size = 0; /* fill TRB */ control |= TRB_TYPE(TRB_NORMAL); @@ -1208,20 +1218,12 @@ static int cdns3_ep_run_transfer(struct cdns3_endpoint *priv_ep, length = request->length; } - if (likely(priv_dev->dev_ver >= DEV_VER_V2)) - td_size = DIV_ROUND_UP(length, - priv_ep->endpoint.maxpacket); - else if (priv_ep->flags & EP_TDLCHK_EN) + if (priv_ep->flags & EP_TDLCHK_EN) total_tdl += DIV_ROUND_UP(length, priv_ep->endpoint.maxpacket); - trb->length = cpu_to_le32(TRB_BURST_LEN(priv_ep->trb_burst_size) | + trb->length |= cpu_to_le32(TRB_BURST_LEN(priv_ep->trb_burst_size) | TRB_LEN(length)); - if (priv_dev->gadget.speed == USB_SPEED_SUPER) - trb->length |= cpu_to_le32(TRB_TDL_SS_SIZE(td_size)); - else - control |= TRB_TDL_HS_SIZE(td_size); - pcs = priv_ep->pcs ? TRB_CYCLE : 0; /* From 231655eb55b0f9899054dec9432482dbf986a9c5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Nov 2020 15:46:00 +0100 Subject: [PATCH 009/167] phy: intel: PHY_INTEL_KEEMBAY_EMMC should depend on ARCH_KEEMBAY The Intel Keem Bay eMMC PHY is only present on Intel Keem Bay SoCs. Hence add a dependency on ARCH_KEEMBAY, to prevent asking the user about this driver when configuring a kernel without Intel Keem Bay platform support. Fixes: 885c4f4d6cf448f6 ("phy: intel: Add Keem Bay eMMC PHY support") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20201110144600.3279752-1-geert+renesas@glider.be Signed-off-by: Vinod Koul --- drivers/phy/intel/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/intel/Kconfig b/drivers/phy/intel/Kconfig index 58ec695c92ec..62c24764654b 100644 --- a/drivers/phy/intel/Kconfig +++ b/drivers/phy/intel/Kconfig @@ -4,7 +4,7 @@ # config PHY_INTEL_KEEMBAY_EMMC tristate "Intel Keem Bay EMMC PHY driver" - depends on (OF && ARM64) || COMPILE_TEST + depends on ARCH_KEEMBAY || COMPILE_TEST depends on HAS_IOMEM select GENERIC_PHY select REGMAP_MMIO From 44786a26a7485e12a1d2aaad2adfb3c82f6ad171 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 13 Nov 2020 15:12:24 +0000 Subject: [PATCH 010/167] phy: qualcomm: usb: Fix SuperSpeed PHY OF dependency This Kconfig entry should declare a dependency on OF Fixes: 6076967a500c ("phy: qualcomm: usb: Add SuperSpeed PHY driver") Reported-by: kernel test robot Link: https://lkml.org/lkml/2020/11/13/414 Signed-off-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20201113151225.1657600-2-bryan.odonoghue@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig index 928db510b86c..9129c4b8bb9b 100644 --- a/drivers/phy/qualcomm/Kconfig +++ b/drivers/phy/qualcomm/Kconfig @@ -98,7 +98,7 @@ config PHY_QCOM_USB_HS_28NM config PHY_QCOM_USB_SS tristate "Qualcomm USB Super-Speed PHY driver" - depends on ARCH_QCOM || COMPILE_TEST + depends on OF && (ARCH_QCOM || COMPILE_TEST) depends on EXTCON || !EXTCON # if EXTCON=m, this cannot be built-in select GENERIC_PHY help From 14839107b51cc0db19579039b1f72cba7a0c8049 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 13 Nov 2020 15:12:25 +0000 Subject: [PATCH 011/167] phy: qualcomm: Fix 28 nm Hi-Speed USB PHY OF dependency This Kconfig entry should declare a dependency on OF Fixes: 67b27dbeac4d ("phy: qualcomm: Add Synopsys 28nm Hi-Speed USB PHY driver") Signed-off-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20201113151225.1657600-3-bryan.odonoghue@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig index 9129c4b8bb9b..7f6fcb8ec5ba 100644 --- a/drivers/phy/qualcomm/Kconfig +++ b/drivers/phy/qualcomm/Kconfig @@ -87,7 +87,7 @@ config PHY_QCOM_USB_HSIC config PHY_QCOM_USB_HS_28NM tristate "Qualcomm 28nm High-Speed PHY" - depends on ARCH_QCOM || COMPILE_TEST + depends on OF && (ARCH_QCOM || COMPILE_TEST) depends on EXTCON || !EXTCON # if EXTCON=m, this cannot be built-in select GENERIC_PHY help From fb89b2544b645527b3a359176999a416e65f5ada Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 14 Nov 2020 12:05:18 +0000 Subject: [PATCH 012/167] phy: mediatek: fix spelling mistake in Kconfig "veriosn" -> "version" There is a spelling mistake in the Kconfig. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20201114120518.416120-1-colin.king@canonical.com Signed-off-by: Vinod Koul --- drivers/phy/mediatek/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/mediatek/Kconfig b/drivers/phy/mediatek/Kconfig index 50c5e9306e19..c8126bde9d7c 100644 --- a/drivers/phy/mediatek/Kconfig +++ b/drivers/phy/mediatek/Kconfig @@ -12,7 +12,7 @@ config PHY_MTK_TPHY it supports multiple usb2.0, usb3.0 ports, PCIe and SATA, and meanwhile supports two version T-PHY which have different banks layout, the T-PHY with shared banks between - multi-ports is first version, otherwise is second veriosn, + multi-ports is first version, otherwise is second version, so you can easily distinguish them by banks layout. config PHY_MTK_UFS From 24880a87042b3032a6ac04d79cb51892c5a7901d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 15 Nov 2020 20:06:53 -0800 Subject: [PATCH 013/167] usb: typec: qcom-pmic-typec: fix builtin build errors Fix build errors when CONFIG_TYPEC_QCOM_PMIC=y and CONFIG_USB_ROLE_SWITCH=m by limiting the former to =m when USB_ROLE_SWITCH also =m. powerpc64-linux-ld: drivers/usb/typec/qcom-pmic-typec.o: in function `.qcom_pmic_typec_remove': qcom-pmic-typec.c:(.text+0x28): undefined reference to `.usb_role_switch_set_role' powerpc64-linux-ld: qcom-pmic-typec.c:(.text+0x64): undefined reference to `.usb_role_switch_put' powerpc64-linux-ld: drivers/usb/typec/qcom-pmic-typec.o: in function `.qcom_pmic_typec_check_connection': qcom-pmic-typec.c:(.text+0x120): undefined reference to `.usb_role_switch_set_role' powerpc64-linux-ld: drivers/usb/typec/qcom-pmic-typec.o: in function `.qcom_pmic_typec_probe': qcom-pmic-typec.c:(.text+0x360): undefined reference to `.fwnode_usb_role_switch_get' powerpc64-linux-ld: qcom-pmic-typec.c:(.text+0x4e4): undefined reference to `.usb_role_switch_put' Fixes: 6c8cf3695176 ("usb: typec: Add QCOM PMIC typec detection driver") Cc: linux-usb@vger.kernel.org Cc: Wesley Cheng Reported-by: kernel test robot Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20201116040653.7943-1-rdunlap@infradead.org Reviewed-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig index 6c5908a37ee8..e7f120874c48 100644 --- a/drivers/usb/typec/Kconfig +++ b/drivers/usb/typec/Kconfig @@ -88,6 +88,7 @@ config TYPEC_STUSB160X config TYPEC_QCOM_PMIC tristate "Qualcomm PMIC USB Type-C driver" depends on ARCH_QCOM || COMPILE_TEST + depends on USB_ROLE_SWITCH || !USB_ROLE_SWITCH help Driver for supporting role switch over the Qualcomm PMIC. This will handle the USB Type-C role and orientation detection reported by the From 7381e27b1e563aa8a1c6bcf74a8cadb6901c283a Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Fri, 6 Nov 2020 16:48:47 +0200 Subject: [PATCH 014/167] interconnect: qcom: msm8974: Prevent integer overflow in rate When sync_state support got introduced recently, by default we try to set the NoCs to run initially at maximum rate. But as these values are aggregated, we may end with a really big clock rate value, which is then converted from "u64" to "long" during the clock rate rounding. But on 32bit platforms this may result an overflow. Fix it by making sure that the rate is within range. Reported-by: Luca Weiss Reviewed-by: Brian Masney Link: https://lore.kernel.org/r/20201106144847.7726-1-georgi.djakov@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/msm8974.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/interconnect/qcom/msm8974.c b/drivers/interconnect/qcom/msm8974.c index 3a313e11e73d..b6b639dad691 100644 --- a/drivers/interconnect/qcom/msm8974.c +++ b/drivers/interconnect/qcom/msm8974.c @@ -618,6 +618,8 @@ static int msm8974_icc_set(struct icc_node *src, struct icc_node *dst) do_div(rate, src_qn->buswidth); + rate = min_t(u32, rate, INT_MAX); + if (src_qn->rate == rate) return 0; @@ -758,6 +760,7 @@ static struct platform_driver msm8974_noc_driver = { .driver = { .name = "qnoc-msm8974", .of_match_table = msm8974_noc_of_match, + .sync_state = icc_sync_state, }, }; module_platform_driver(msm8974_noc_driver); From 9caf2d956cfa254c6d89c5f4d7b3f8235d75b28f Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Mon, 9 Nov 2020 14:45:12 +0200 Subject: [PATCH 015/167] interconnect: qcom: msm8974: Don't boost the NoC rate during boot It has been reported that on Fairphone 2 (msm8974-based), increasing the clock rate for some of the NoCs during boot may lead to hangs. Let's restore the original behavior and not touch the clock rate of any of the NoCs to fix the regression. Reported-by: Luca Weiss Tested-by: Luca Weiss Fixes: b1d681d8d324 ("interconnect: Add sync state support") Link: https://lore.kernel.org/r/20201109124512.10776-1-georgi.djakov@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/msm8974.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/interconnect/qcom/msm8974.c b/drivers/interconnect/qcom/msm8974.c index b6b639dad691..da68ce375a89 100644 --- a/drivers/interconnect/qcom/msm8974.c +++ b/drivers/interconnect/qcom/msm8974.c @@ -637,6 +637,14 @@ static int msm8974_icc_set(struct icc_node *src, struct icc_node *dst) return 0; } +static int msm8974_get_bw(struct icc_node *node, u32 *avg, u32 *peak) +{ + *avg = 0; + *peak = 0; + + return 0; +} + static int msm8974_icc_probe(struct platform_device *pdev) { const struct msm8974_icc_desc *desc; @@ -690,6 +698,7 @@ static int msm8974_icc_probe(struct platform_device *pdev) provider->aggregate = icc_std_aggregate; provider->xlate = of_icc_xlate_onecell; provider->data = data; + provider->get_bw = msm8974_get_bw; ret = icc_provider_add(provider); if (ret) { From fbdae7d6d04d2db36c687723920f612e93b2cbda Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 18 Nov 2020 13:15:15 +0100 Subject: [PATCH 016/167] ASoC: Intel: bytcr_rt5640: Fix HP Pavilion x2 Detachable quirks The HP Pavilion x2 Detachable line comes in many variants: 1. Bay Trail SoC + AXP288 PMIC, Micro-USB charging (10-k010nz, ...) DMI_SYS_VENDOR: "Hewlett-Packard" DMI_PRODUCT_NAME: "HP Pavilion x2 Detachable PC 10" DMI_BOARD_NAME: "8021" 2. Bay Trail SoC + AXP288 PMIC, Type-C charging (10-n000nd, 10-n010nl, ...) DMI_SYS_VENDOR: "Hewlett-Packard" DMI_PRODUCT_NAME: "HP Pavilion x2 Detachable" DMI_BOARD_NAME: "815D" 3. Cherry Trail SoC + AXP288 PMIC, Type-C charging (10-n101ng, ...) DMI_SYS_VENDOR: "HP" DMI_PRODUCT_NAME: "HP Pavilion x2 Detachable" DMI_BOARD_NAME: "813E" 4. Cherry Trail SoC + TI PMIC, Type-C charging (10-p002nd, 10-p018wm, ...) DMI_SYS_VENDOR: "HP" DMI_PRODUCT_NAME: "HP x2 Detachable 10-p0XX" DMI_BOARD_NAME: "827C" 5. Cherry Trail SoC + TI PMIC, Type-C charging (x2-210-g2, ...) DMI_SYS_VENDOR: "HP" DMI_PRODUCT_NAME: "HP x2 210 G2" DMI_BOARD_NAME: "82F4" Variant 1 needs the exact same quirk as variant 2, so relax the DMI check for the existing quirk a bit so that it matches both variant 1 and 2 (note the other variants will still not match). Variant 2 already has an existing quirk (which now also matches variant 1) Variant 3 uses a cx2072x codec, so is not applicable here. Variant 4 almost works with the defaults, but it also needs a quirk to fix jack-detection, add a new quirk for this. Variant 5 does use a RT5640 codec (based on old dmesg output), but was otherwise not tested, keep using the defaults for this variant. Fixes: ec8e8418ff7d ("ASoC: Intel: bytcr_rt5640: Add quirks for various devices") Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20201118121515.11441-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 9dadf6561444..f790514a147d 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -520,10 +520,10 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { .driver_data = (void *)(BYT_RT5640_IN1_MAP | BYT_RT5640_MCLK_EN), }, - { /* HP Pavilion x2 10-n000nd */ + { /* HP Pavilion x2 10-k0XX, 10-n0XX */ .matches = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), }, .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | BYT_RT5640_JD_SRC_JD2_IN4N | @@ -532,6 +532,17 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF1 | BYT_RT5640_MCLK_EN), }, + { /* HP Pavilion x2 10-p0XX */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"), + }, + .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | + BYT_RT5640_JD_SRC_JD1_IN4P | + BYT_RT5640_OVCD_TH_1500UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_MCLK_EN), + }, { /* HP Stream 7 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), From a61ea6379ae9dbb63fbf022d1456733520db6be7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:22 +0900 Subject: [PATCH 017/167] tools/bootconfig: Fix errno reference after printf() Fix not to refer the errno variable as the result of previous libc functions after printf() because printf() can change the errno. Link: https://lkml.kernel.org/r/160576520243.320071.51093664672431249.stgit@devnote2 Fixes: 85c46b78da58 ("bootconfig: Add bootconfig magic word for indicating bootconfig explicitly") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 52 +++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index eb92027817a7..52eb2bbe8966 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -147,6 +147,12 @@ static int load_xbc_file(const char *path, char **buf) return ret; } +static int pr_errno(const char *msg, int err) +{ + pr_err("%s: %d\n", msg, err); + return err; +} + static int load_xbc_from_initrd(int fd, char **buf) { struct stat stat; @@ -162,26 +168,24 @@ static int load_xbc_from_initrd(int fd, char **buf) if (stat.st_size < 8 + BOOTCONFIG_MAGIC_LEN) return 0; - if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) { - pr_err("Failed to lseek: %d\n", -errno); - return -errno; - } + if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) + return pr_errno("Failed to lseek for magic", -errno); + if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0) - return -errno; + return pr_errno("Failed to read", -errno); + /* Check the bootconfig magic bytes */ if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0) return 0; - if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) { - pr_err("Failed to lseek: %d\n", -errno); - return -errno; - } + if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) + return pr_errno("Failed to lseek for size", -errno); if (read(fd, &size, sizeof(u32)) < 0) - return -errno; + return pr_errno("Failed to read size", -errno); if (read(fd, &csum, sizeof(u32)) < 0) - return -errno; + return pr_errno("Failed to read checksum", -errno); /* Wrong size error */ if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) { @@ -190,10 +194,8 @@ static int load_xbc_from_initrd(int fd, char **buf) } if (lseek(fd, stat.st_size - (size + 8 + BOOTCONFIG_MAGIC_LEN), - SEEK_SET) < 0) { - pr_err("Failed to lseek: %d\n", -errno); - return -errno; - } + SEEK_SET) < 0) + return pr_errno("Failed to lseek", -errno); ret = load_xbc_fd(fd, buf, size); if (ret < 0) @@ -262,14 +264,16 @@ static int show_xbc(const char *path, bool list) ret = stat(path, &st); if (ret < 0) { - pr_err("Failed to stat %s: %d\n", path, -errno); - return -errno; + ret = -errno; + pr_err("Failed to stat %s: %d\n", path, ret); + return ret; } fd = open(path, O_RDONLY); if (fd < 0) { - pr_err("Failed to open initrd %s: %d\n", path, fd); - return -errno; + ret = -errno; + pr_err("Failed to open initrd %s: %d\n", path, ret); + return ret; } ret = load_xbc_from_initrd(fd, &buf); @@ -307,8 +311,9 @@ static int delete_xbc(const char *path) fd = open(path, O_RDWR); if (fd < 0) { - pr_err("Failed to open initrd %s: %d\n", path, fd); - return -errno; + ret = -errno; + pr_err("Failed to open initrd %s: %d\n", path, ret); + return ret; } size = load_xbc_from_initrd(fd, &buf); @@ -383,9 +388,10 @@ static int apply_xbc(const char *path, const char *xbc_path) /* Apply new one */ fd = open(path, O_RDWR | O_APPEND); if (fd < 0) { - pr_err("Failed to open %s: %d\n", path, fd); + ret = -errno; + pr_err("Failed to open %s: %d\n", path, ret); free(data); - return fd; + return ret; } /* TODO: Ensure the @path is initramfs/initrd image */ ret = write(fd, data, size + 8); From a995e6bc0524450adfd6181dfdcd9d0520cfaba5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:31 +0900 Subject: [PATCH 018/167] tools/bootconfig: Fix to check the write failure correctly Fix to check the write(2) failure including partial write correctly and try to rollback the partial write, because if there is no BOOTCONFIG_MAGIC string, we can not remove it. Link: https://lkml.kernel.org/r/160576521135.320071.3883101436675969998.stgit@devnote2 Fixes: 85c46b78da58 ("bootconfig: Add bootconfig magic word for indicating bootconfig explicitly") Suggested-by: Linus Torvalds Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 52eb2bbe8966..a0733cbb3c49 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -337,6 +337,7 @@ static int delete_xbc(const char *path) static int apply_xbc(const char *path, const char *xbc_path) { + struct stat stat; u32 size, csum; char *buf, *data; int ret, fd; @@ -394,16 +395,26 @@ static int apply_xbc(const char *path, const char *xbc_path) return ret; } /* TODO: Ensure the @path is initramfs/initrd image */ - ret = write(fd, data, size + 8); - if (ret < 0) { - pr_err("Failed to apply a boot config: %d\n", ret); + if (fstat(fd, &stat) < 0) { + pr_err("Failed to get the size of %s\n", path); goto out; } + ret = write(fd, data, size + 8); + if (ret < size + 8) { + if (ret < 0) + ret = -errno; + pr_err("Failed to apply a boot config: %d\n", ret); + if (ret < 0) + goto out; + goto out_rollback; + } /* Write a magic word of the bootconfig */ ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); - if (ret < 0) { + if (ret < BOOTCONFIG_MAGIC_LEN) { + if (ret < 0) + ret = -errno; pr_err("Failed to apply a boot config magic: %d\n", ret); - goto out; + goto out_rollback; } ret = 0; out: @@ -411,6 +422,17 @@ out: free(data); return ret; + +out_rollback: + /* Map the partial write to -ENOSPC */ + if (ret >= 0) + ret = -ENOSPC; + if (ftruncate(fd, stat.st_size) < 0) { + ret = -errno; + pr_err("Failed to rollback the write error: %d\n", ret); + pr_err("The initrd %s may be corrupted. Recommend to rebuild.\n", path); + } + goto out; } static int usage(void) From e1cef2d4c379b2aab43a7dc9601f645048209090 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:40 +0900 Subject: [PATCH 019/167] tools/bootconfig: Align the bootconfig applied initrd image size to 4 Align the bootconfig applied initrd image size to 4. To fill the gap, the bootconfig command uses null characters in between the bootconfig data and the footer. This will expands the footer size but don't change the checksum. Thus the block image of the initrd file with bootconfig is as follows. [initrd][bootconfig][(pad)][size][csum]["#BOOTCONFIG\n"] Link: https://lkml.kernel.org/r/160576522046.320071.8550680670010950634.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/bootconfig.h | 3 ++ tools/bootconfig/main.c | 59 +++++++++++++++++------------ tools/bootconfig/test-bootconfig.sh | 6 ++- 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 9903088891fa..2696eb0fc149 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -12,6 +12,9 @@ #define BOOTCONFIG_MAGIC "#BOOTCONFIG\n" #define BOOTCONFIG_MAGIC_LEN 12 +#define BOOTCONFIG_ALIGN_SHIFT 2 +#define BOOTCONFIG_ALIGN (1 << BOOTCONFIG_ALIGN_SHIFT) +#define BOOTCONFIG_ALIGN_MASK (BOOTCONFIG_ALIGN - 1) /* XBC tree node */ struct xbc_node { diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index a0733cbb3c49..4a445b6304bb 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -337,12 +337,13 @@ static int delete_xbc(const char *path) static int apply_xbc(const char *path, const char *xbc_path) { + char *buf, *data, *p; + size_t total_size; struct stat stat; - u32 size, csum; - char *buf, *data; - int ret, fd; const char *msg; - int pos; + u32 size, csum; + int pos, pad; + int ret, fd; ret = load_xbc_file(xbc_path, &buf); if (ret < 0) { @@ -352,13 +353,12 @@ static int apply_xbc(const char *path, const char *xbc_path) size = strlen(buf) + 1; csum = checksum((unsigned char *)buf, size); - /* Prepare xbc_path data */ - data = malloc(size + 8); + /* Backup the bootconfig data */ + data = calloc(size + BOOTCONFIG_ALIGN + + sizeof(u32) + sizeof(u32) + BOOTCONFIG_MAGIC_LEN, 1); if (!data) return -ENOMEM; - strcpy(data, buf); - *(u32 *)(data + size) = size; - *(u32 *)(data + size + 4) = csum; + memcpy(data, buf, size); /* Check the data format */ ret = xbc_init(buf, &msg, &pos); @@ -399,24 +399,35 @@ static int apply_xbc(const char *path, const char *xbc_path) pr_err("Failed to get the size of %s\n", path); goto out; } - ret = write(fd, data, size + 8); - if (ret < size + 8) { + + /* To align up the total size to BOOTCONFIG_ALIGN, get padding size */ + total_size = stat.st_size + size + sizeof(u32) * 2 + BOOTCONFIG_MAGIC_LEN; + pad = ((total_size + BOOTCONFIG_ALIGN - 1) & (~BOOTCONFIG_ALIGN_MASK)) - total_size; + size += pad; + + /* Add a footer */ + p = data + size; + *(u32 *)p = size; + p += sizeof(u32); + + *(u32 *)p = csum; + p += sizeof(u32); + + memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); + p += BOOTCONFIG_MAGIC_LEN; + + total_size = p - data; + + ret = write(fd, data, total_size); + if (ret < total_size) { if (ret < 0) ret = -errno; pr_err("Failed to apply a boot config: %d\n", ret); - if (ret < 0) - goto out; - goto out_rollback; - } - /* Write a magic word of the bootconfig */ - ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); - if (ret < BOOTCONFIG_MAGIC_LEN) { - if (ret < 0) - ret = -errno; - pr_err("Failed to apply a boot config magic: %d\n", ret); - goto out_rollback; - } - ret = 0; + if (ret >= 0) + goto out_rollback; + } else + ret = 0; + out: close(fd); free(data); diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index d295e406a756..baed891d0ba4 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -9,6 +9,7 @@ else TESTDIR=. fi BOOTCONF=${TESTDIR}/bootconfig +ALIGN=4 INITRD=`mktemp ${TESTDIR}/initrd-XXXX` TEMPCONF=`mktemp ${TESTDIR}/temp-XXXX.bconf` @@ -59,7 +60,10 @@ echo "Show command test" xpass $BOOTCONF $INITRD echo "File size check" -xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9 + 12) +total_size=$(expr $bconf_size + $initrd_size + 9 + 12 + $ALIGN - 1 ) +total_size=$(expr $total_size / $ALIGN) +total_size=$(expr $total_size \* $ALIGN) +xpass test $new_size -eq $total_size echo "Apply command repeat test" xpass $BOOTCONF -a $TEMPCONF $INITRD From fbc6e1c6e0a4b5ef402f9eb8d00880a5e1d98df3 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:49 +0900 Subject: [PATCH 020/167] docs: bootconfig: Update file format on initrd image To align the total file size, add padding null character when appending the bootconfig to initrd image. Link: https://lkml.kernel.org/r/160576522916.320071.4145530996151028855.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/bootconfig.rst | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index a22024f9175e..363599683784 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -137,15 +137,22 @@ Boot Kernel With a Boot Config ============================== Since the boot configuration file is loaded with initrd, it will be added -to the end of the initrd (initramfs) image file with size, checksum and -12-byte magic word as below. +to the end of the initrd (initramfs) image file with padding, size, +checksum and 12-byte magic word as below. -[initrd][bootconfig][size(u32)][checksum(u32)][#BOOTCONFIG\n] +[initrd][bootconfig][padding][size(u32)][checksum(u32)][#BOOTCONFIG\n] + +When the boot configuration is added to the initrd image, the total +file size is aligned to 4 bytes. To fill the gap, null characters +(``\0``) will be added. Thus the ``size`` is the length of the bootconfig +file + padding bytes. The Linux kernel decodes the last part of the initrd image in memory to get the boot configuration data. Because of this "piggyback" method, there is no need to change or -update the boot loader and the kernel image itself. +update the boot loader and the kernel image itself as long as the boot +loader passes the correct initrd file size. If by any chance, the boot +loader passes a longer size, the kernel feils to find the bootconfig data. To do this operation, Linux kernel provides "bootconfig" command under tools/bootconfig, which allows admin to apply or delete the config file @@ -176,7 +183,8 @@ up to 512 key-value pairs. If keys contains 3 words in average, it can contain 256 key-value pairs. In most cases, the number of config items will be under 100 entries and smaller than 8KB, so it would be enough. If the node number exceeds 1024, parser returns an error even if the file -size is smaller than 32KB. +size is smaller than 32KB. (Note that this maximum size is not including +the padding null characters.) Anyway, since bootconfig command verifies it when appending a boot config to initrd image, user can notice it before boot. From c497f9322af947204c28292be6f20dd2d97483dd Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Thu, 12 Nov 2020 12:51:40 +0200 Subject: [PATCH 021/167] interconnect: qcom: msm8916: Remove rpm-ids from non-RPM nodes Some nodes are incorrectly marked as RPM-controlled (they have RPM master and slave ids assigned), but are actually controlled by the application CPU instead. The RPM complains when we send requests for resources that it can't control. Let's fix this by replacing the IDs, with the default "-1" in which case no requests are sent. Reviewed-by: Mike Tipton Link: https://lore.kernel.org/r/20201112105140.10092-1-georgi.djakov@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/msm8916.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/interconnect/qcom/msm8916.c b/drivers/interconnect/qcom/msm8916.c index 42c6c5581662..e8371d40ab8d 100644 --- a/drivers/interconnect/qcom/msm8916.c +++ b/drivers/interconnect/qcom/msm8916.c @@ -182,7 +182,7 @@ DEFINE_QNODE(mas_pcnoc_sdcc_1, MSM8916_MASTER_SDCC_1, 8, -1, -1, MSM8916_PNOC_IN DEFINE_QNODE(mas_pcnoc_sdcc_2, MSM8916_MASTER_SDCC_2, 8, -1, -1, MSM8916_PNOC_INT_1); DEFINE_QNODE(mas_qdss_bam, MSM8916_MASTER_QDSS_BAM, 8, -1, -1, MSM8916_SNOC_QDSS_INT); DEFINE_QNODE(mas_qdss_etr, MSM8916_MASTER_QDSS_ETR, 8, -1, -1, MSM8916_SNOC_QDSS_INT); -DEFINE_QNODE(mas_snoc_cfg, MSM8916_MASTER_SNOC_CFG, 4, 20, -1, MSM8916_SNOC_QDSS_INT); +DEFINE_QNODE(mas_snoc_cfg, MSM8916_MASTER_SNOC_CFG, 4, -1, -1, MSM8916_SNOC_QDSS_INT); DEFINE_QNODE(mas_spdm, MSM8916_MASTER_SPDM, 4, -1, -1, MSM8916_PNOC_MAS_0); DEFINE_QNODE(mas_tcu0, MSM8916_MASTER_TCU0, 8, -1, -1, MSM8916_SLAVE_EBI_CH0, MSM8916_BIMC_SNOC_MAS, MSM8916_SLAVE_AMPSS_L2); DEFINE_QNODE(mas_tcu1, MSM8916_MASTER_TCU1, 8, -1, -1, MSM8916_SLAVE_EBI_CH0, MSM8916_BIMC_SNOC_MAS, MSM8916_SLAVE_AMPSS_L2); @@ -208,14 +208,14 @@ DEFINE_QNODE(pcnoc_snoc_mas, MSM8916_PNOC_SNOC_MAS, 8, 29, -1, MSM8916_PNOC_SNOC DEFINE_QNODE(pcnoc_snoc_slv, MSM8916_PNOC_SNOC_SLV, 8, -1, 45, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_BIMC, MSM8916_SNOC_INT_1); DEFINE_QNODE(qdss_int, MSM8916_SNOC_QDSS_INT, 8, -1, -1, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_BIMC); DEFINE_QNODE(slv_apps_l2, MSM8916_SLAVE_AMPSS_L2, 8, -1, -1, 0); -DEFINE_QNODE(slv_apss, MSM8916_SLAVE_APSS, 4, -1, 20, 0); +DEFINE_QNODE(slv_apss, MSM8916_SLAVE_APSS, 4, -1, -1, 0); DEFINE_QNODE(slv_audio, MSM8916_SLAVE_LPASS, 4, -1, -1, 0); DEFINE_QNODE(slv_bimc_cfg, MSM8916_SLAVE_BIMC_CFG, 4, -1, -1, 0); DEFINE_QNODE(slv_blsp_1, MSM8916_SLAVE_BLSP_1, 4, -1, -1, 0); DEFINE_QNODE(slv_boot_rom, MSM8916_SLAVE_BOOT_ROM, 4, -1, -1, 0); DEFINE_QNODE(slv_camera_cfg, MSM8916_SLAVE_CAMERA_CFG, 4, -1, -1, 0); -DEFINE_QNODE(slv_cats_0, MSM8916_SLAVE_CATS_128, 16, -1, 106, 0); -DEFINE_QNODE(slv_cats_1, MSM8916_SLAVE_OCMEM_64, 8, -1, 107, 0); +DEFINE_QNODE(slv_cats_0, MSM8916_SLAVE_CATS_128, 16, -1, -1, 0); +DEFINE_QNODE(slv_cats_1, MSM8916_SLAVE_OCMEM_64, 8, -1, -1, 0); DEFINE_QNODE(slv_clk_ctl, MSM8916_SLAVE_CLK_CTL, 4, -1, -1, 0); DEFINE_QNODE(slv_crypto_0_cfg, MSM8916_SLAVE_CRYPTO_0_CFG, 4, -1, -1, 0); DEFINE_QNODE(slv_dehr_cfg, MSM8916_SLAVE_DEHR_CFG, 4, -1, -1, 0); @@ -239,7 +239,7 @@ DEFINE_QNODE(slv_sdcc_2, MSM8916_SLAVE_SDCC_2, 4, -1, -1, 0); DEFINE_QNODE(slv_security, MSM8916_SLAVE_SECURITY, 4, -1, -1, 0); DEFINE_QNODE(slv_snoc_cfg, MSM8916_SLAVE_SNOC_CFG, 4, -1, -1, 0); DEFINE_QNODE(slv_spdm, MSM8916_SLAVE_SPDM, 4, -1, -1, 0); -DEFINE_QNODE(slv_srvc_snoc, MSM8916_SLAVE_SRVC_SNOC, 8, -1, 29, 0); +DEFINE_QNODE(slv_srvc_snoc, MSM8916_SLAVE_SRVC_SNOC, 8, -1, -1, 0); DEFINE_QNODE(slv_tcsr, MSM8916_SLAVE_TCSR, 4, -1, -1, 0); DEFINE_QNODE(slv_tlmm, MSM8916_SLAVE_TLMM, 4, -1, -1, 0); DEFINE_QNODE(slv_usb_hs, MSM8916_SLAVE_USB_HS, 4, -1, -1, 0); @@ -249,7 +249,7 @@ DEFINE_QNODE(snoc_bimc_0_slv, MSM8916_SNOC_BIMC_0_SLV, 8, -1, 24, MSM8916_SLAVE_ DEFINE_QNODE(snoc_bimc_1_mas, MSM8916_SNOC_BIMC_1_MAS, 16, -1, -1, MSM8916_SNOC_BIMC_1_SLV); DEFINE_QNODE(snoc_bimc_1_slv, MSM8916_SNOC_BIMC_1_SLV, 8, -1, -1, MSM8916_SLAVE_EBI_CH0); DEFINE_QNODE(snoc_int_0, MSM8916_SNOC_INT_0, 8, 99, 130, MSM8916_SLAVE_QDSS_STM, MSM8916_SLAVE_IMEM, MSM8916_SNOC_PNOC_MAS); -DEFINE_QNODE(snoc_int_1, MSM8916_SNOC_INT_1, 8, 100, 131, MSM8916_SLAVE_APSS, MSM8916_SLAVE_CATS_128, MSM8916_SLAVE_OCMEM_64); +DEFINE_QNODE(snoc_int_1, MSM8916_SNOC_INT_1, 8, -1, -1, MSM8916_SLAVE_APSS, MSM8916_SLAVE_CATS_128, MSM8916_SLAVE_OCMEM_64); DEFINE_QNODE(snoc_int_bimc, MSM8916_SNOC_INT_BIMC, 8, 101, 132, MSM8916_SNOC_BIMC_0_MAS); DEFINE_QNODE(snoc_pcnoc_mas, MSM8916_SNOC_PNOC_MAS, 8, -1, -1, MSM8916_SNOC_PNOC_SLV); DEFINE_QNODE(snoc_pcnoc_slv, MSM8916_SNOC_PNOC_SLV, 8, -1, -1, MSM8916_PNOC_INT_0); From 7ab1e9117607485df977bb6e271be5c5ad649a4c Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Wed, 18 Nov 2020 13:10:44 +0200 Subject: [PATCH 022/167] interconnect: qcom: qcs404: Remove GPU and display RPM IDs The following errors are noticed during boot on a QCS404 board: [ 2.926647] qcom_icc_rpm_smd_send mas 6 error -6 [ 2.934573] qcom_icc_rpm_smd_send mas 8 error -6 These errors show when we try to configure the GPU and display nodes. Since these particular nodes aren't supported on RPM and are purely local, we should just change their mas_rpm_id to -1 to avoid any requests being sent for these master IDs. Reviewed-by: Mike Tipton Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201118111044.26056-1-georgi.djakov@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/qcs404.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/interconnect/qcom/qcs404.c b/drivers/interconnect/qcom/qcs404.c index d4769a5ea182..9820709b43db 100644 --- a/drivers/interconnect/qcom/qcs404.c +++ b/drivers/interconnect/qcom/qcs404.c @@ -157,8 +157,8 @@ struct qcom_icc_desc { } DEFINE_QNODE(mas_apps_proc, QCS404_MASTER_AMPSS_M0, 8, 0, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV); -DEFINE_QNODE(mas_oxili, QCS404_MASTER_GRAPHICS_3D, 8, 6, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV); -DEFINE_QNODE(mas_mdp, QCS404_MASTER_MDP_PORT0, 8, 8, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV); +DEFINE_QNODE(mas_oxili, QCS404_MASTER_GRAPHICS_3D, 8, -1, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV); +DEFINE_QNODE(mas_mdp, QCS404_MASTER_MDP_PORT0, 8, -1, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV); DEFINE_QNODE(mas_snoc_bimc_1, QCS404_SNOC_BIMC_1_MAS, 8, 76, -1, QCS404_SLAVE_EBI_CH0); DEFINE_QNODE(mas_tcu_0, QCS404_MASTER_TCU_0, 8, -1, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV); DEFINE_QNODE(mas_spdm, QCS404_MASTER_SPDM, 4, -1, -1, QCS404_PNOC_INT_3); From 92666d45adcfd4a4a70580ff9f732309e16131f9 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 19 Nov 2020 17:04:21 +0800 Subject: [PATCH 023/167] ALSA: hda/realtek - Fixed Dell AIO wrong sound tone This platform only had one audio jack. If it plugged speaker then replug with speaker or headset, the sound tone will change to abnormal. Headset Mic also can't record when this issue was happen. [ Added a short comment about the COEF by tiwai ] Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/593c777dcfef4546aa050e105b8e53b5@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 739dbaf54517..b90cd4c65b58 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -119,6 +119,7 @@ struct alc_spec { unsigned int no_shutup_pins:1; unsigned int ultra_low_power:1; unsigned int has_hs_key:1; + unsigned int no_internal_mic_pin:1; /* for PLL fix */ hda_nid_t pll_nid; @@ -4523,6 +4524,7 @@ static const struct coef_fw alc225_pre_hsmode[] = { static void alc_headset_mode_unplugged(struct hda_codec *codec) { + struct alc_spec *spec = codec->spec; static const struct coef_fw coef0255[] = { WRITE_COEF(0x1b, 0x0c0b), /* LDO and MISC control */ WRITE_COEF(0x45, 0xd089), /* UAJ function set to menual mode */ @@ -4597,6 +4599,11 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) {} }; + if (spec->no_internal_mic_pin) { + alc_update_coef_idx(codec, 0x45, 0xf<<12 | 1<<10, 5<<12); + return; + } + switch (codec->core.vendor_id) { case 0x10ec0255: alc_process_coef_fw(codec, coef0255); @@ -5163,6 +5170,11 @@ static void alc_determine_headset_type(struct hda_codec *codec) {} }; + if (spec->no_internal_mic_pin) { + alc_update_coef_idx(codec, 0x45, 0xf<<12 | 1<<10, 5<<12); + return; + } + switch (codec->core.vendor_id) { case 0x10ec0255: alc_process_coef_fw(codec, coef0255); @@ -6121,6 +6133,23 @@ static void alc274_fixup_hp_headset_mic(struct hda_codec *codec, } } +static void alc_fixup_no_int_mic(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + /* Mic RING SLEEVE swap for combo jack */ + alc_update_coef_idx(codec, 0x45, 0xf<<12 | 1<<10, 5<<12); + spec->no_internal_mic_pin = true; + break; + case HDA_FIXUP_ACT_INIT: + alc_combo_jack_hp_jd_restart(codec); + break; + } +} + /* for hda_fixup_thinkpad_acpi() */ #include "thinkpad_helper.c" @@ -6320,6 +6349,7 @@ enum { ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, ALC287_FIXUP_HP_GPIO_LED, ALC256_FIXUP_HP_HEADSET_MIC, + ALC236_FIXUP_DELL_AIO_HEADSET_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -7738,6 +7768,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc274_fixup_hp_headset_mic, }, + [ALC236_FIXUP_DELL_AIO_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_no_int_mic, + .chained = true, + .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7815,6 +7851,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x097d, "Dell Precision", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x098d, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), @@ -8353,6 +8391,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x19, 0x02a11020}, {0x1a, 0x02a11030}, {0x21, 0x0221101f}), + SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC, + {0x21, 0x02211010}), SND_HDA_PIN_QUIRK(0x10ec0236, 0x103c, "HP", ALC256_FIXUP_HP_HEADSET_MIC, {0x14, 0x90170110}, {0x19, 0x02a11020}, From 017496af28e2589c2c2cb396baba0507179d2748 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 19 Nov 2020 11:37:46 +0100 Subject: [PATCH 024/167] interconnect: fix memory trashing in of_count_icc_providers() of_count_icc_providers() function uses for_each_available_child_of_node() helper to recursively check all the available nodes. This helper already properly handles child nodes' reference count, so there is no need to do it explicitly. Remove the excessive call to of_node_put(). This fixes memory trashing when CONFIG_OF_DYNAMIC is enabled (for example arm/multi_v7_defconfig). Fixes: b1d681d8d324 ("interconnect: Add sync state support") Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20201119103746.32564-1-m.szyprowski@samsung.com Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 974a66725d09..5ad519c9f239 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -1083,7 +1083,6 @@ static int of_count_icc_providers(struct device_node *np) count++; count += of_count_icc_providers(child); } - of_node_put(np); return count; } From 6200d5c3831370cd0ab4b6455933d12e82ea9956 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Nov 2020 09:53:09 +0100 Subject: [PATCH 025/167] MAINTAINERS: Update XDP and AF_XDP entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Getting too many false positive matches with current use of the content regex K: and file regex N: patterns. This patch drops file match N: and makes K: more restricted. Some more normal F: file wildcards are added. Notice that AF_XDP forgot to some F: files that is also updated in this patch. Suggested-by: Jakub Kicinski Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/160586238944.2808432.4401269290440394008.stgit@firesoul --- MAINTAINERS | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c7f6924d34c7..15355c055a1a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19109,12 +19109,17 @@ L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Supported F: include/net/xdp.h +F: include/net/xdp_priv.h F: include/trace/events/xdp.h F: kernel/bpf/cpumap.c F: kernel/bpf/devmap.c F: net/core/xdp.c -N: xdp -K: xdp +F: samples/bpf/xdp* +F: tools/testing/selftests/bpf/*xdp* +F: tools/testing/selftests/bpf/*/*xdp* +F: drivers/net/ethernet/*/*/*/*/*xdp* +F: drivers/net/ethernet/*/*/*xdp* +K: (?:\b|_)xdp(?:\b|_) XDP SOCKETS (AF_XDP) M: Björn Töpel @@ -19123,9 +19128,12 @@ R: Jonathan Lemon L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Maintained +F: Documentation/networking/af_xdp.rst F: include/net/xdp_sock* F: include/net/xsk_buff_pool.h F: include/uapi/linux/if_xdp.h +F: include/uapi/linux/xdp_diag.h +F: include/net/netns/xdp.h F: net/xdp/ F: samples/bpf/xdpsock* F: tools/lib/bpf/xsk* From 537cf4e3cc2f6cc9088dcd6162de573f603adc29 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 20 Nov 2020 12:53:39 +0100 Subject: [PATCH 026/167] xsk: Fix umem cleanup bug at socket destruct Fix a bug that is triggered when a partially setup socket is destroyed. For a fully setup socket, a socket that has been bound to a device, the cleanup of the umem is performed at the end of the buffer pool's cleanup work queue item. This has to be performed in a work queue, and not in RCU cleanup, as it is doing a vunmap that cannot execute in interrupt context. However, when a socket has only been partially set up so that a umem has been created but the buffer pool has not, the code erroneously directly calls the umem cleanup function instead of using a work queue, and this leads to a BUG_ON() in vunmap(). As there in this case is no buffer pool, we cannot use its work queue, so we need to introduce a work queue for the umem and schedule this for the cleanup. So in the case there is no pool, we are going to use the umem's own work queue to schedule the cleanup. But if there is a pool, the cleanup of the umem is still being performed by the pool's work queue, as it is important that the umem is cleaned up after the pool. Fixes: e5e1a4bc916d ("xsk: Fix possible memory leak at socket close") Reported-by: Marek Majtyka Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Tested-by: Marek Majtyka Link: https://lore.kernel.org/bpf/1605873219-21629-1-git-send-email-magnus.karlsson@gmail.com --- include/net/xdp_sock.h | 1 + net/xdp/xdp_umem.c | 19 ++++++++++++++++--- net/xdp/xdp_umem.h | 2 +- net/xdp/xsk.c | 2 +- net/xdp/xsk_buff_pool.c | 2 +- 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 1a9559c0cbdd..4f4e93bf814c 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -31,6 +31,7 @@ struct xdp_umem { struct page **pgs; int id; struct list_head xsk_dma_list; + struct work_struct work; }; struct xsk_map { diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 56d052bc65cb..56a28a686988 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -66,18 +66,31 @@ static void xdp_umem_release(struct xdp_umem *umem) kfree(umem); } +static void xdp_umem_release_deferred(struct work_struct *work) +{ + struct xdp_umem *umem = container_of(work, struct xdp_umem, work); + + xdp_umem_release(umem); +} + void xdp_get_umem(struct xdp_umem *umem) { refcount_inc(&umem->users); } -void xdp_put_umem(struct xdp_umem *umem) +void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) { if (!umem) return; - if (refcount_dec_and_test(&umem->users)) - xdp_umem_release(umem); + if (refcount_dec_and_test(&umem->users)) { + if (defer_cleanup) { + INIT_WORK(&umem->work, xdp_umem_release_deferred); + schedule_work(&umem->work); + } else { + xdp_umem_release(umem); + } + } } static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index 181fdda2f2a8..aa9fe2780410 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h @@ -9,7 +9,7 @@ #include void xdp_get_umem(struct xdp_umem *umem); -void xdp_put_umem(struct xdp_umem *umem); +void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup); struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr); #endif /* XDP_UMEM_H_ */ diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index cfbec3989a76..5a6cdf7b320d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1147,7 +1147,7 @@ static void xsk_destruct(struct sock *sk) return; if (!xp_put_pool(xs->pool)) - xdp_put_umem(xs->umem); + xdp_put_umem(xs->umem, !xs->pool); sk_refcnt_debug_dec(sk); } diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 8a3bf4e1318e..3c5a1423d922 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -242,7 +242,7 @@ static void xp_release_deferred(struct work_struct *work) pool->cq = NULL; } - xdp_put_umem(pool->umem); + xdp_put_umem(pool->umem, false); xp_destroy(pool); } From 9ca57518361418ad5ae7dc38a2128fbf4855e1a2 Mon Sep 17 00:00:00 2001 From: penghao Date: Wed, 18 Nov 2020 20:30:39 +0800 Subject: [PATCH 027/167] USB: quirks: Add USB_QUIRK_DISCONNECT_SUSPEND quirk for Lenovo A630Z TIO built-in usb-audio card Add a USB_QUIRK_DISCONNECT_SUSPEND quirk for the Lenovo TIO built-in usb-audio. when A630Z going into S3,the system immediately wakeup 7-8 seconds later by usb-audio disconnect interrupt to avoids the issue. eg dmesg: .... [ 626.974091 ] usb 7-1.1: USB disconnect, device number 3 .... .... [ 1774.486691] usb 7-1.1: new full-speed USB device number 5 using xhci_hcd [ 1774.947742] usb 7-1.1: New USB device found, idVendor=17ef, idProduct=a012, bcdDevice= 0.55 [ 1774.956588] usb 7-1.1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 1774.964339] usb 7-1.1: Product: Thinkcentre TIO24Gen3 for USB-audio [ 1774.970999] usb 7-1.1: Manufacturer: Lenovo [ 1774.975447] usb 7-1.1: SerialNumber: 000000000000 [ 1775.048590] usb 7-1.1: 2:1: cannot get freq at ep 0x1 ....... Seeking a better fix, we've tried a lot of things, including: - Check that the device's power/wakeup is disabled - Check that remote wakeup is off at the USB level - All the quirks in drivers/usb/core/quirks.c e.g. USB_QUIRK_RESET_RESUME, USB_QUIRK_RESET, USB_QUIRK_IGNORE_REMOTE_WAKEUP, USB_QUIRK_NO_LPM. but none of that makes any difference. There are no errors in the logs showing any suspend/resume-related issues. When the system wakes up due to the modem, log-wise it appears to be a normal resume. Introduce a quirk to disable the port during suspend when the modem is detected. Signed-off-by: penghao Link: https://lore.kernel.org/r/20201118123039.11696-1-penghao@uniontech.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index a1e3a037a289..f536ea9fe945 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -421,6 +421,10 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Lenovo ThinkCenter A630Z TI024Gen3 usb-audio */ + { USB_DEVICE(0x17ef, 0xa012), .driver_info = + USB_QUIRK_DISCONNECT_SUSPEND }, + /* BUILDWIN Photo Frame */ { USB_DEVICE(0x1908, 0x1315), .driver_info = USB_QUIRK_HONOR_BNUMINTERFACES }, From e7694cb6998379341fd9bf3bd62b48c4e6a79385 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 17 Nov 2020 10:16:28 +0800 Subject: [PATCH 028/167] usb: gadget: f_midi: Fix memleak in f_midi_alloc In the error path, if midi is not null, we should free the midi->id if necessary to prevent memleak. Fixes: b85e9de9e818d ("usb: gadget: f_midi: convert to new function interface with backward compatibility") Reported-by: Hulk Robot Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201117021629.1470544-2-zhangqilong3@huawei.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_midi.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c index 85cb15734aa8..19d97940eeb9 100644 --- a/drivers/usb/gadget/function/f_midi.c +++ b/drivers/usb/gadget/function/f_midi.c @@ -1315,7 +1315,7 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi) midi->id = kstrdup(opts->id, GFP_KERNEL); if (opts->id && !midi->id) { status = -ENOMEM; - goto setup_fail; + goto midi_free; } midi->in_ports = opts->in_ports; midi->out_ports = opts->out_ports; @@ -1327,7 +1327,7 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi) status = kfifo_alloc(&midi->in_req_fifo, midi->qlen, GFP_KERNEL); if (status) - goto setup_fail; + goto midi_free; spin_lock_init(&midi->transmit_lock); @@ -1343,9 +1343,13 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi) return &midi->func; +midi_free: + if (midi) + kfree(midi->id); + kfree(midi); setup_fail: mutex_unlock(&opts->lock); - kfree(midi); + return ERR_PTR(status); } From 87bed3d7d26c974948a3d6e7176f304b2d41272b Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 17 Nov 2020 10:16:29 +0800 Subject: [PATCH 029/167] usb: gadget: Fix memleak in gadgetfs_fill_super usb_get_gadget_udc_name will alloc memory for CHIP in "Enomem" branch. we should free it before error returns to prevent memleak. Fixes: 175f712119c57 ("usb: gadget: provide interface for legacy gadgets to get UDC name") Reported-by: Hulk Robot Acked-by: Alan Stern Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201117021629.1470544-3-zhangqilong3@huawei.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 1b430b36d0a6..71e7d10dd76b 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -2039,6 +2039,9 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc) return 0; Enomem: + kfree(CHIP); + CHIP = NULL; + return -ENOMEM; } From 184eead057cc7e803558269babc1f2cfb9113ad1 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 19 Nov 2020 12:00:40 -0500 Subject: [PATCH 030/167] USB: core: Fix regression in Hercules audio card Commit 3e4f8e21c4f2 ("USB: core: fix check for duplicate endpoints") aimed to make the USB stack more reliable by detecting and skipping over endpoints that are duplicated between interfaces. This caused a regression for a Hercules audio card (reported as Bugzilla #208357), which contains such non-compliant duplications. Although the duplications are harmless, skipping the valid endpoints prevented the device from working. This patch fixes the regression by adding ENDPOINT_IGNORE quirks for the Hercules card, telling the kernel to ignore the invalid duplicate endpoints and thereby allowing the valid endpoints to be used as intended. Fixes: 3e4f8e21c4f2 ("USB: core: fix check for duplicate endpoints") CC: Reported-by: Alexander Chalikiopoulos Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20201119170040.GA576844@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index f536ea9fe945..fad31ccd1fa8 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -348,6 +348,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Guillemot Webcam Hercules Dualpix Exchange*/ { USB_DEVICE(0x06f8, 0x3005), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Guillemot Hercules DJ Console audio card (BZ 208357) */ + { USB_DEVICE(0x06f8, 0xb000), .driver_info = + USB_QUIRK_ENDPOINT_IGNORE }, + /* Midiman M-Audio Keystation 88es */ { USB_DEVICE(0x0763, 0x0192), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -525,6 +529,8 @@ static const struct usb_device_id usb_amd_resume_quirk_list[] = { * Matched for devices with USB_QUIRK_ENDPOINT_IGNORE. */ static const struct usb_device_id usb_endpoint_ignore[] = { + { USB_DEVICE_INTERFACE_NUMBER(0x06f8, 0xb000, 5), .driver_info = 0x01 }, + { USB_DEVICE_INTERFACE_NUMBER(0x06f8, 0xb000, 5), .driver_info = 0x81 }, { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0202, 1), .driver_info = 0x85 }, { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0208, 1), .driver_info = 0x85 }, { } From f3bc432aa8a7a2bfe9ebb432502be5c5d979d7fe Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 19 Nov 2020 12:02:28 -0500 Subject: [PATCH 031/167] USB: core: Change %pK for __user pointers to %px Commit 2f964780c03b ("USB: core: replace %p with %pK") used the %pK format specifier for a bunch of __user pointers. But as the 'K' in the specifier indicates, it is meant for kernel pointers. The reason for the %pK specifier is to avoid leaks of kernel addresses, but when the pointer is to an address in userspace the security implications are minimal. In particular, no kernel information is leaked. This patch changes the __user %pK specifiers (used in a bunch of debugging output lines) to %px, which will always print the actual address with no mangling. (Notably, there is no printk format specifier particularly intended for __user pointers.) Fixes: 2f964780c03b ("USB: core: replace %p with %pK") CC: Vamsi Krishna Samavedam CC: Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20201119170228.GB576844@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index e96a858a1218..533236366a03 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -482,11 +482,11 @@ static void snoop_urb(struct usb_device *udev, if (userurb) { /* Async */ if (when == SUBMIT) - dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, " + dev_info(&udev->dev, "userurb %px, ep%d %s-%s, " "length %u\n", userurb, ep, t, d, length); else - dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, " + dev_info(&udev->dev, "userurb %px, ep%d %s-%s, " "actual_length %u status %d\n", userurb, ep, t, d, length, timeout_or_status); @@ -1997,7 +1997,7 @@ static int proc_reapurb(struct usb_dev_state *ps, void __user *arg) if (as) { int retval; - snoop(&ps->dev->dev, "reap %pK\n", as->userurb); + snoop(&ps->dev->dev, "reap %px\n", as->userurb); retval = processcompl(as, (void __user * __user *)arg); free_async(as); return retval; @@ -2014,7 +2014,7 @@ static int proc_reapurbnonblock(struct usb_dev_state *ps, void __user *arg) as = async_getcompleted(ps); if (as) { - snoop(&ps->dev->dev, "reap %pK\n", as->userurb); + snoop(&ps->dev->dev, "reap %px\n", as->userurb); retval = processcompl(as, (void __user * __user *)arg); free_async(as); } else { @@ -2142,7 +2142,7 @@ static int proc_reapurb_compat(struct usb_dev_state *ps, void __user *arg) if (as) { int retval; - snoop(&ps->dev->dev, "reap %pK\n", as->userurb); + snoop(&ps->dev->dev, "reap %px\n", as->userurb); retval = processcompl_compat(as, (void __user * __user *)arg); free_async(as); return retval; @@ -2159,7 +2159,7 @@ static int proc_reapurbnonblock_compat(struct usb_dev_state *ps, void __user *ar as = async_getcompleted(ps); if (as) { - snoop(&ps->dev->dev, "reap %pK\n", as->userurb); + snoop(&ps->dev->dev, "reap %px\n", as->userurb); retval = processcompl_compat(as, (void __user * __user *)arg); free_async(as); } else { @@ -2624,7 +2624,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd, #endif case USBDEVFS_DISCARDURB: - snoop(&dev->dev, "%s: DISCARDURB %pK\n", __func__, p); + snoop(&dev->dev, "%s: DISCARDURB %px\n", __func__, p); ret = proc_unlinkurb(ps, p); break; From d001e41e1b15716e9b759df5ef00510699f85282 Mon Sep 17 00:00:00 2001 From: Chen Baozi Date: Tue, 17 Nov 2020 11:20:15 +0800 Subject: [PATCH 032/167] irqchip/exiu: Fix the index of fwspec for IRQ type Since fwspec->param_count of ACPI node is two, the index of IRQ type in fwspec->param[] should be 1 rather than 2. Fixes: 3d090a36c8c8 ("irqchip/exiu: Implement ACPI support") Signed-off-by: Chen Baozi Signed-off-by: Marc Zyngier Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20201117032015.11805-1-cbz@baozis.org Cc: stable@vger.kernel.org --- drivers/irqchip/irq-sni-exiu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-sni-exiu.c b/drivers/irqchip/irq-sni-exiu.c index 1d027623c776..abd011fcecf4 100644 --- a/drivers/irqchip/irq-sni-exiu.c +++ b/drivers/irqchip/irq-sni-exiu.c @@ -136,7 +136,7 @@ static int exiu_domain_translate(struct irq_domain *domain, if (fwspec->param_count != 2) return -EINVAL; *hwirq = fwspec->param[0]; - *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; + *type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK; } return 0; } From 74cde1a53368aed4f2b4b54bf7030437f64a534b Mon Sep 17 00:00:00 2001 From: Xu Qiang Date: Sat, 7 Nov 2020 10:42:26 +0000 Subject: [PATCH 033/167] irqchip/gic-v3-its: Unconditionally save/restore the ITS state on suspend On systems without HW-based collections (i.e. anything except GIC-500), we rely on firmware to perform the ITS save/restore. This doesn't really work, as although FW can properly save everything, it cannot fully restore the state of the command queue (the read-side is reset to the head of the queue). This results in the ITS consuming previously processed commands, potentially corrupting the state. Instead, let's always save the ITS state on suspend, disabling it in the process, and restore the full state on resume. This saves us from broken FW as long as it doesn't enable the ITS by itself (for which we can't do anything). This amounts to simply dropping the ITS_FLAGS_SAVE_SUSPEND_STATE. Signed-off-by: Xu Qiang [maz: added warning on resume, rewrote commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201107104226.14282-1-xuqiang36@huawei.com --- drivers/irqchip/irq-gic-v3-its.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 0418071a3724..0598c5caff5c 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -42,7 +42,6 @@ #define ITS_FLAGS_CMDQ_NEEDS_FLUSHING (1ULL << 0) #define ITS_FLAGS_WORKAROUND_CAVIUM_22375 (1ULL << 1) #define ITS_FLAGS_WORKAROUND_CAVIUM_23144 (1ULL << 2) -#define ITS_FLAGS_SAVE_SUSPEND_STATE (1ULL << 3) #define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0) #define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1) @@ -4741,9 +4740,6 @@ static int its_save_disable(void) list_for_each_entry(its, &its_nodes, entry) { void __iomem *base; - if (!(its->flags & ITS_FLAGS_SAVE_SUSPEND_STATE)) - continue; - base = its->base; its->ctlr_save = readl_relaxed(base + GITS_CTLR); err = its_force_quiescent(base); @@ -4762,9 +4758,6 @@ err: list_for_each_entry_continue_reverse(its, &its_nodes, entry) { void __iomem *base; - if (!(its->flags & ITS_FLAGS_SAVE_SUSPEND_STATE)) - continue; - base = its->base; writel_relaxed(its->ctlr_save, base + GITS_CTLR); } @@ -4784,9 +4777,6 @@ static void its_restore_enable(void) void __iomem *base; int i; - if (!(its->flags & ITS_FLAGS_SAVE_SUSPEND_STATE)) - continue; - base = its->base; /* @@ -4794,7 +4784,10 @@ static void its_restore_enable(void) * don't restore it since writing to CBASER or BASER * registers is undefined according to the GIC v3 ITS * Specification. + * + * Firmware resuming with the ITS enabled is terminally broken. */ + WARN_ON(readl_relaxed(base + GITS_CTLR) & GITS_CTLR_ENABLE); ret = its_force_quiescent(base); if (ret) { pr_err("ITS@%pa: failed to quiesce on resume: %d\n", @@ -5074,9 +5067,6 @@ static int __init its_probe_one(struct resource *res, ctlr |= GITS_CTLR_ImDe; writel_relaxed(ctlr, its->base + GITS_CTLR); - if (GITS_TYPER_HCC(typer)) - its->flags |= ITS_FLAGS_SAVE_SUSPEND_STATE; - err = its_init_domain(handle, its); if (err) goto out_free_tables; From 178648916e73e00de83150eb0c90c0d3a977a46a Mon Sep 17 00:00:00 2001 From: Marek Majtyka Date: Fri, 20 Nov 2020 16:14:43 +0100 Subject: [PATCH 034/167] xsk: Fix incorrect netdev reference count Fix incorrect netdev reference count in xsk_bind operation. Incorrect reference count of the device appears when a user calls bind with the XDP_ZEROCOPY flag on an interface which does not support zero-copy. In such a case, an error is returned but the reference count is not decreased. This change fixes the fault, by decreasing the reference count in case of such an error. The problem being corrected appeared in '162c820ed896' for the first time, and the code was moved to new file location over the time with commit 'c2d3d6a47462'. This specific patch applies to all version starting from 'c2d3d6a47462'. The same solution should be applied but on different file (net/xdp/xdp_umem.c) and function (xdp_umem_assign_dev) for versions from '162c820ed896' to 'c2d3d6a47462' excluded. Fixes: 162c820ed896 ("xdp: hold device for umem regardless of zero-copy mode") Signed-off-by: Marek Majtyka Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20201120151443.105903-1-marekx.majtyka@intel.com --- net/xdp/xsk_buff_pool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 3c5a1423d922..9287eddec52c 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -185,8 +185,10 @@ err_unreg_xsk: err_unreg_pool: if (!force_zc) err = 0; /* fallback to copy mode */ - if (err) + if (err) { xsk_clear_pool_at_qid(netdev, queue_id); + dev_put(netdev); + } return err; } From 652b44453ea953d3157f02a7f17e18e329952649 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 21 Nov 2020 14:35:35 +0200 Subject: [PATCH 035/167] habanalabs/gaudi: fix missing code in ECC handling There is missing statement and missing "break;" in the ECC handling code in gaudi.c This will cause a wrong behavior upon certain ECC interrupts. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 2519a34e25b7..7ea6b4368a91 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -5436,6 +5436,8 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, params.num_memories = 33; params.derr = true; params.disable_clock_gating = true; + extract_info_from_fw = false; + break; default: return; } From b1824968221ccc498625750d8c49cf0d7d39a4de Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Mon, 23 Nov 2020 21:47:53 +0530 Subject: [PATCH 036/167] ASoC: qcom: Fix enabling BCLK and LRCLK in LPAIF invalid state Fix enabling BCLK and LRCLK only when LPAIF is invalid state and bit clock in enable state. In device suspend/resume scenario LPAIF is going to reset state. which is causing LRCLK disable and BCLK enable. Avoid such inconsitency by removing unnecessary cpu dai prepare API, which is doing LRCLK enable, and by maintaining BLCK state information. Fixes: 7e6799d8f87d ("ASoC: qcom: lpass-cpu: Enable MI2S BCLK and LRCLK together") Signed-off-by: V Sujith Kumar Reddy Signed-off-by: Srinivasa Rao Mandadapu Link: https://lore.kernel.org/r/1606148273-17325-1-git-send-email-srivasam@codeaurora.org Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-cpu.c | 62 ++++++++++++++++---------------- sound/soc/qcom/lpass-lpaif-reg.h | 7 ++++ sound/soc/qcom/lpass-platform.c | 20 +++++++++-- sound/soc/qcom/lpass.h | 1 + 4 files changed, 57 insertions(+), 33 deletions(-) diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index 9d17c87445a9..426235a217ec 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -263,28 +263,6 @@ static int lpass_cpu_daiops_hw_params(struct snd_pcm_substream *substream, return 0; } -static int lpass_cpu_daiops_prepare(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - struct lpass_data *drvdata = snd_soc_dai_get_drvdata(dai); - struct lpaif_i2sctl *i2sctl = drvdata->i2sctl; - unsigned int id = dai->driver->id; - int ret; - - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { - ret = regmap_fields_write(i2sctl->spken, id, - LPAIF_I2SCTL_SPKEN_ENABLE); - } else { - ret = regmap_fields_write(i2sctl->micen, id, - LPAIF_I2SCTL_MICEN_ENABLE); - } - - if (ret) - dev_err(dai->dev, "error writing to i2sctl enable: %d\n", ret); - - return ret; -} - static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { @@ -292,6 +270,18 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream, struct lpaif_i2sctl *i2sctl = drvdata->i2sctl; unsigned int id = dai->driver->id; int ret = -EINVAL; + unsigned int val = 0; + + ret = regmap_read(drvdata->lpaif_map, + LPAIF_I2SCTL_REG(drvdata->variant, dai->driver->id), &val); + if (ret) { + dev_err(dai->dev, "error reading from i2sctl reg: %d\n", ret); + return ret; + } + if (val == LPAIF_I2SCTL_RESET_STATE) { + dev_err(dai->dev, "error in i2sctl register state\n"); + return -ENOTRECOVERABLE; + } switch (cmd) { case SNDRV_PCM_TRIGGER_START: @@ -308,11 +298,14 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream, dev_err(dai->dev, "error writing to i2sctl reg: %d\n", ret); - ret = clk_enable(drvdata->mi2s_bit_clk[id]); - if (ret) { - dev_err(dai->dev, "error in enabling mi2s bit clk: %d\n", ret); - clk_disable(drvdata->mi2s_osr_clk[id]); - return ret; + if (drvdata->bit_clk_state[id] == LPAIF_BIT_CLK_DISABLE) { + ret = clk_enable(drvdata->mi2s_bit_clk[id]); + if (ret) { + dev_err(dai->dev, "error in enabling mi2s bit clk: %d\n", ret); + clk_disable(drvdata->mi2s_osr_clk[id]); + return ret; + } + drvdata->bit_clk_state[id] = LPAIF_BIT_CLK_ENABLE; } break; @@ -329,7 +322,10 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream, if (ret) dev_err(dai->dev, "error writing to i2sctl reg: %d\n", ret); - clk_disable(drvdata->mi2s_bit_clk[dai->driver->id]); + if (drvdata->bit_clk_state[id] == LPAIF_BIT_CLK_ENABLE) { + clk_disable(drvdata->mi2s_bit_clk[dai->driver->id]); + drvdata->bit_clk_state[id] = LPAIF_BIT_CLK_DISABLE; + } break; } @@ -341,7 +337,6 @@ const struct snd_soc_dai_ops asoc_qcom_lpass_cpu_dai_ops = { .startup = lpass_cpu_daiops_startup, .shutdown = lpass_cpu_daiops_shutdown, .hw_params = lpass_cpu_daiops_hw_params, - .prepare = lpass_cpu_daiops_prepare, .trigger = lpass_cpu_daiops_trigger, }; EXPORT_SYMBOL_GPL(asoc_qcom_lpass_cpu_dai_ops); @@ -459,16 +454,20 @@ static bool lpass_cpu_regmap_volatile(struct device *dev, unsigned int reg) struct lpass_variant *v = drvdata->variant; int i; + for (i = 0; i < v->i2s_ports; ++i) + if (reg == LPAIF_I2SCTL_REG(v, i)) + return true; for (i = 0; i < v->irq_ports; ++i) if (reg == LPAIF_IRQSTAT_REG(v, i)) return true; for (i = 0; i < v->rdma_channels; ++i) - if (reg == LPAIF_RDMACURR_REG(v, i)) + if (reg == LPAIF_RDMACURR_REG(v, i) || reg == LPAIF_RDMACTL_REG(v, i)) return true; for (i = 0; i < v->wrdma_channels; ++i) - if (reg == LPAIF_WRDMACURR_REG(v, i + v->wrdma_channel_start)) + if (reg == LPAIF_WRDMACURR_REG(v, i + v->wrdma_channel_start) || + reg == LPAIF_WRDMACTL_REG(v, i + v->wrdma_channel_start)) return true; return false; @@ -861,6 +860,7 @@ int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev) PTR_ERR(drvdata->mi2s_bit_clk[dai_id])); return PTR_ERR(drvdata->mi2s_bit_clk[dai_id]); } + drvdata->bit_clk_state[dai_id] = LPAIF_BIT_CLK_DISABLE; } /* Allocation for i2sctl regmap fields */ diff --git a/sound/soc/qcom/lpass-lpaif-reg.h b/sound/soc/qcom/lpass-lpaif-reg.h index 08f3fe508b85..405542832e99 100644 --- a/sound/soc/qcom/lpass-lpaif-reg.h +++ b/sound/soc/qcom/lpass-lpaif-reg.h @@ -60,6 +60,13 @@ #define LPAIF_I2SCTL_BITWIDTH_24 1 #define LPAIF_I2SCTL_BITWIDTH_32 2 +#define LPAIF_BIT_CLK_DISABLE 0 +#define LPAIF_BIT_CLK_ENABLE 1 + +#define LPAIF_I2SCTL_RESET_STATE 0x003C0004 +#define LPAIF_DMACTL_RESET_STATE 0x00200000 + + /* LPAIF IRQ */ #define LPAIF_IRQ_REG_ADDR(v, addr, port) \ (v->irq_reg_base + (addr) + v->irq_reg_stride * (port)) diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index 7a3fdf89968a..80b09dede5f9 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -110,6 +110,7 @@ static int lpass_platform_pcmops_open(struct snd_soc_component *component, struct regmap *map; unsigned int dai_id = cpu_dai->driver->id; + component->id = dai_id; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -451,19 +452,34 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, unsigned int reg_irqclr = 0, val_irqclr = 0; unsigned int reg_irqen = 0, val_irqen = 0, val_mask = 0; unsigned int dai_id = cpu_dai->driver->id; + unsigned int dma_ctrl_reg = 0; ch = pcm_data->dma_ch; if (dir == SNDRV_PCM_STREAM_PLAYBACK) { id = pcm_data->dma_ch; - if (dai_id == LPASS_DP_RX) + if (dai_id == LPASS_DP_RX) { dmactl = drvdata->hdmi_rd_dmactl; - else + map = drvdata->hdmiif_map; + } else { dmactl = drvdata->rd_dmactl; + map = drvdata->lpaif_map; + } } else { dmactl = drvdata->wr_dmactl; id = pcm_data->dma_ch - v->wrdma_channel_start; + map = drvdata->lpaif_map; + } + ret = regmap_read(map, LPAIF_DMACTL_REG(v, ch, dir, dai_id), &dma_ctrl_reg); + if (ret) { + dev_err(soc_runtime->dev, "error reading from rdmactl reg: %d\n", ret); + return ret; } + if (dma_ctrl_reg == LPAIF_DMACTL_RESET_STATE || + dma_ctrl_reg == LPAIF_DMACTL_RESET_STATE + 1) { + dev_err(soc_runtime->dev, "error in rdmactl register state\n"); + return -ENOTRECOVERABLE; + } switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: diff --git a/sound/soc/qcom/lpass.h b/sound/soc/qcom/lpass.h index b4830f353796..bccd1a05d771 100644 --- a/sound/soc/qcom/lpass.h +++ b/sound/soc/qcom/lpass.h @@ -68,6 +68,7 @@ struct lpass_data { unsigned int mi2s_playback_sd_mode[LPASS_MAX_MI2S_PORTS]; unsigned int mi2s_capture_sd_mode[LPASS_MAX_MI2S_PORTS]; int hdmi_port_enable; + int bit_clk_state[LPASS_MAX_MI2S_PORTS]; /* low-power audio interface (LPAIF) registers */ void __iomem *lpaif; From fd8d9db3559a29fd737bcdb7c4fcbe1940caae34 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sat, 31 Oct 2020 03:10:53 +0800 Subject: [PATCH 037/167] x86/resctrl: Remove superfluous kernfs_get() calls to prevent refcount leak Willem reported growing of kernfs_node_cache entries in slabtop when repeatedly creating and removing resctrl subdirectories as well as when repeatedly mounting and unmounting the resctrl filesystem. On resource group (control as well as monitoring) creation via a mkdir an extra kernfs_node reference is obtained to ensure that the rdtgroup structure remains accessible for the rdtgroup_kn_unlock() calls where it is removed on deletion. The kernfs_node reference count is dropped by kernfs_put() in rdtgroup_kn_unlock(). With the above explaining the need for one kernfs_get()/kernfs_put() pair in resctrl there are more places where a kernfs_node reference is obtained without a corresponding release. The excessive amount of reference count on kernfs nodes will never be dropped to 0 and the kernfs nodes will never be freed in the call paths of rmdir and umount. It leads to reference count leak and kernfs_node_cache memory leak. Remove the superfluous kernfs_get() calls and expand the existing comments surrounding the remaining kernfs_get()/kernfs_put() pair that remains in use. Superfluous kernfs_get() calls are removed from two areas: (1) In call paths of mount and mkdir, when kernfs nodes for "info", "mon_groups" and "mon_data" directories and sub-directories are created, the reference count of newly created kernfs node is set to 1. But after kernfs_create_dir() returns, superfluous kernfs_get() are called to take an additional reference. (2) kernfs_get() calls in rmdir call paths. Fixes: 17eafd076291 ("x86/intel_rdt: Split resource group removal in two") Fixes: 4af4a88e0c92 ("x86/intel_rdt/cqm: Add mount,umount support") Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support") Fixes: d89b7379015f ("x86/intel_rdt/cqm: Add mon_data") Fixes: c7d9aac61311 ("x86/intel_rdt/cqm: Add mkdir support for RDT monitoring") Fixes: 5dc1d5c6bac2 ("x86/intel_rdt: Simplify info and base file lists") Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system") Fixes: 4e978d06dedb ("x86/intel_rdt: Add "info" files to resctrl file system") Reported-by: Willem de Bruijn Signed-off-by: Xiaochen Shen Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Tested-by: Willem de Bruijn Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1604085053-31639-1-git-send-email-xiaochen.shen@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 35 ++------------------------ 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index af323e2e3100..2ab1266a5f14 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1769,7 +1769,6 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, if (IS_ERR(kn_subdir)) return PTR_ERR(kn_subdir); - kernfs_get(kn_subdir); ret = rdtgroup_kn_set_ugid(kn_subdir); if (ret) return ret; @@ -1792,7 +1791,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); if (IS_ERR(kn_info)) return PTR_ERR(kn_info); - kernfs_get(kn_info); ret = rdtgroup_add_files(kn_info, RF_TOP_INFO); if (ret) @@ -1813,12 +1811,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) goto out_destroy; } - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn_info); - ret = rdtgroup_kn_set_ugid(kn_info); if (ret) goto out_destroy; @@ -1847,12 +1839,6 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, if (dest_kn) *dest_kn = kn; - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn); - ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -2139,13 +2125,11 @@ static int rdt_get_tree(struct fs_context *fc) &kn_mongrp); if (ret < 0) goto out_info; - kernfs_get(kn_mongrp); ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); if (ret < 0) goto out_mongrp; - kernfs_get(kn_mondata); rdtgroup_default.mon.mon_data_kn = kn_mondata; } @@ -2499,11 +2483,6 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, if (IS_ERR(kn)) return PTR_ERR(kn); - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that kn is always accessible. - */ - kernfs_get(kn); ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -2838,8 +2817,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, /* * kernfs_remove() will drop the reference count on "kn" which * will free it. But we still need it to stick around for the - * rdtgroup_kn_unlock(kn} call below. Take one extra reference - * here, which will be dropped inside rdtgroup_kn_unlock(). + * rdtgroup_kn_unlock(kn) call. Take one extra reference here, + * which will be dropped inside rdtgroup_kn_unlock(). */ kernfs_get(kn); @@ -3049,11 +3028,6 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp, WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); list_del(&rdtgrp->mon.crdtgrp_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; @@ -3065,11 +3039,6 @@ static int rdtgroup_ctrl_remove(struct kernfs_node *kn, rdtgrp->flags = RDT_DELETED; list_del(&rdtgrp->rdtgroup_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; } From 758999246965eeb8b253d47e72f7bfe508804b16 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sat, 31 Oct 2020 03:11:28 +0800 Subject: [PATCH 038/167] x86/resctrl: Add necessary kernfs_put() calls to prevent refcount leak On resource group creation via a mkdir an extra kernfs_node reference is obtained by kernfs_get() to ensure that the rdtgroup structure remains accessible for the rdtgroup_kn_unlock() calls where it is removed on deletion. Currently the extra kernfs_node reference count is only dropped by kernfs_put() in rdtgroup_kn_unlock() while the rdtgroup structure is removed in a few other locations that lack the matching reference drop. In call paths of rmdir and umount, when a control group is removed, kernfs_remove() is called to remove the whole kernfs nodes tree of the control group (including the kernfs nodes trees of all child monitoring groups), and then rdtgroup structure is freed by kfree(). The rdtgroup structures of all child monitoring groups under the control group are freed by kfree() in free_all_child_rdtgrp(). Before calling kfree() to free the rdtgroup structures, the kernfs node of the control group itself as well as the kernfs nodes of all child monitoring groups still take the extra references which will never be dropped to 0 and the kernfs nodes will never be freed. It leads to reference count leak and kernfs_node_cache memory leak. For example, reference count leak is observed in these two cases: (1) mount -t resctrl resctrl /sys/fs/resctrl mkdir /sys/fs/resctrl/c1 mkdir /sys/fs/resctrl/c1/mon_groups/m1 umount /sys/fs/resctrl (2) mkdir /sys/fs/resctrl/c1 mkdir /sys/fs/resctrl/c1/mon_groups/m1 rmdir /sys/fs/resctrl/c1 The same reference count leak issue also exists in the error exit paths of mkdir in mkdir_rdt_prepare() and rdtgroup_mkdir_ctrl_mon(). Fix this issue by following changes to make sure the extra kernfs_node reference on rdtgroup is dropped before freeing the rdtgroup structure. (1) Introduce rdtgroup removal helper rdtgroup_remove() to wrap up kernfs_put() and kfree(). (2) Call rdtgroup_remove() in rdtgroup removal path where the rdtgroup structure is about to be freed by kfree(). (3) Call rdtgroup_remove() or kernfs_put() as appropriate in the error exit paths of mkdir where an extra reference is taken by kernfs_get(). Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support") Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files") Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system") Reported-by: Willem de Bruijn Signed-off-by: Xiaochen Shen Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1604085088-31707-1-git-send-email-xiaochen.shen@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 32 ++++++++++++++++++++------ 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 2ab1266a5f14..6f4ca4bea625 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -507,6 +507,24 @@ unlock: return ret ?: nbytes; } +/** + * rdtgroup_remove - the helper to remove resource group safely + * @rdtgrp: resource group to remove + * + * On resource group creation via a mkdir, an extra kernfs_node reference is + * taken to ensure that the rdtgroup structure remains accessible for the + * rdtgroup_kn_unlock() calls where it is removed. + * + * Drop the extra reference here, then free the rdtgroup structure. + * + * Return: void + */ +static void rdtgroup_remove(struct rdtgroup *rdtgrp) +{ + kernfs_put(rdtgrp->kn); + kfree(rdtgrp); +} + struct task_move_callback { struct callback_head work; struct rdtgroup *rdtgrp; @@ -529,7 +547,7 @@ static void move_myself(struct callback_head *head) (rdtgrp->flags & RDT_DELETED)) { current->closid = 0; current->rmid = 0; - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } if (unlikely(current->flags & PF_EXITING)) @@ -2065,8 +2083,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) rdtgroup_pseudo_lock_remove(rdtgrp); kernfs_unbreak_active_protection(kn); - kernfs_put(rdtgrp->kn); - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } else { kernfs_unbreak_active_protection(kn); } @@ -2341,7 +2358,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) if (atomic_read(&sentry->waitcount) != 0) sentry->flags = RDT_DELETED; else - kfree(sentry); + rdtgroup_remove(sentry); } } @@ -2383,7 +2400,7 @@ static void rmdir_all_sub(void) if (atomic_read(&rdtgrp->waitcount) != 0) rdtgrp->flags = RDT_DELETED; else - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2818,7 +2835,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, * kernfs_remove() will drop the reference count on "kn" which * will free it. But we still need it to stick around for the * rdtgroup_kn_unlock(kn) call. Take one extra reference here, - * which will be dropped inside rdtgroup_kn_unlock(). + * which will be dropped by kernfs_put() in rdtgroup_remove(). */ kernfs_get(kn); @@ -2859,6 +2876,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, out_idfree: free_rmid(rdtgrp->mon.rmid); out_destroy: + kernfs_put(rdtgrp->kn); kernfs_remove(rdtgrp->kn); out_free_rgrp: kfree(rdtgrp); @@ -2871,7 +2889,7 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) { kernfs_remove(rgrp->kn); free_rmid(rgrp->mon.rmid); - kfree(rgrp); + rdtgroup_remove(rgrp); } /* From eeacd80fcb29b769ea915cd06b7dd35e0bf0bc25 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Tue, 24 Nov 2020 17:20:25 +0800 Subject: [PATCH 039/167] ALSA: hda/realtek: Enable headset of ASUS UX482EG & B9400CEA with ALC294 Some laptops like ASUS UX482EG & B9400CEA's headset audio does not work until the quirk ALC294_FIXUP_ASUS_HPE is applied. Signed-off-by: Jian-Hong Pan Cc: Link: https://lore.kernel.org/r/20201124092024.179540-1-jhp@endlessos.org Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b90cd4c65b58..34ad24d2c970 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8625,6 +8625,9 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0293, 0x1028, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE, ALC292_STANDARD_PINS, {0x13, 0x90a60140}), + SND_HDA_PIN_QUIRK(0x10ec0294, 0x1043, "ASUS", ALC294_FIXUP_ASUS_HPE, + {0x17, 0x90170110}, + {0x21, 0x04211020}), SND_HDA_PIN_QUIRK(0x10ec0294, 0x1043, "ASUS", ALC294_FIXUP_ASUS_MIC, {0x14, 0x90170110}, {0x1b, 0x90a70130}, From 58c644ba512cfbc2e39b758dd979edd1d6d00e27 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2020 11:50:35 +0100 Subject: [PATCH 040/167] sched/idle: Fix arch_cpu_idle() vs tracing We call arch_cpu_idle() with RCU disabled, but then use local_irq_{en,dis}able(), which invokes tracing, which relies on RCU. Switch all arch_cpu_idle() implementations to use raw_local_irq_{en,dis}able() and carefully manage the lockdep,rcu,tracing state like we do in entry. (XXX: we really should change arch_cpu_idle() to not return with interrupts enabled) Reported-by: Sven Schnelle Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mark Rutland Tested-by: Mark Rutland Link: https://lkml.kernel.org/r/20201120114925.594122626@infradead.org --- arch/alpha/kernel/process.c | 2 +- arch/arm/kernel/process.c | 2 +- arch/arm64/kernel/process.c | 2 +- arch/csky/kernel/process.c | 2 +- arch/h8300/kernel/process.c | 2 +- arch/hexagon/kernel/process.c | 2 +- arch/ia64/kernel/process.c | 2 +- arch/microblaze/kernel/process.c | 2 +- arch/mips/kernel/idle.c | 12 ++++++------ arch/nios2/kernel/process.c | 2 +- arch/openrisc/kernel/process.c | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/kernel/idle.c | 4 ++-- arch/riscv/kernel/process.c | 2 +- arch/s390/kernel/idle.c | 6 +++--- arch/sh/kernel/idle.c | 2 +- arch/sparc/kernel/leon_pmc.c | 4 ++-- arch/sparc/kernel/process_32.c | 2 +- arch/sparc/kernel/process_64.c | 4 ++-- arch/um/kernel/process.c | 2 +- arch/x86/include/asm/mwait.h | 2 -- arch/x86/kernel/process.c | 12 +++++++----- kernel/sched/idle.c | 28 +++++++++++++++++++++++++++- 23 files changed, 64 insertions(+), 38 deletions(-) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 7462a7911002..4c7b0414a3ff 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -57,7 +57,7 @@ EXPORT_SYMBOL(pm_power_off); void arch_cpu_idle(void) { wtint(0); - local_irq_enable(); + raw_local_irq_enable(); } void arch_cpu_idle_dead(void) diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 8e6ace03e960..9f199b1e8383 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -71,7 +71,7 @@ void arch_cpu_idle(void) arm_pm_idle(); else cpu_do_idle(); - local_irq_enable(); + raw_local_irq_enable(); } void arch_cpu_idle_prepare(void) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 4784011cecac..9ebe02574127 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -126,7 +126,7 @@ void arch_cpu_idle(void) * tricks */ cpu_do_idle(); - local_irq_enable(); + raw_local_irq_enable(); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index f730869e21ee..69af6bc87e64 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -102,6 +102,6 @@ void arch_cpu_idle(void) #ifdef CONFIG_CPU_PM_STOP asm volatile("stop\n"); #endif - local_irq_enable(); + raw_local_irq_enable(); } #endif diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index aea0a40b77a9..bc1364db58fe 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -57,7 +57,7 @@ asmlinkage void ret_from_kernel_thread(void); */ void arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); __asm__("sleep"); } diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index 5a0a95d93ddb..67767c5ed98c 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -44,7 +44,7 @@ void arch_cpu_idle(void) { __vmwait(); /* interrupts wake us up, but irqs are still disabled */ - local_irq_enable(); + raw_local_irq_enable(); } /* diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 6b61a703bcf5..c9ff8796b509 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -239,7 +239,7 @@ void arch_cpu_idle(void) if (mark_idle) (*mark_idle)(1); - safe_halt(); + raw_safe_halt(); if (mark_idle) (*mark_idle)(0); diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index a9e46e525cd0..f99860771ff4 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -149,5 +149,5 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs) void arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); } diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 5bc3b04693c7..18e69ebf5691 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -33,19 +33,19 @@ static void __cpuidle r3081_wait(void) { unsigned long cfg = read_c0_conf(); write_c0_conf(cfg | R30XX_CONF_HALT); - local_irq_enable(); + raw_local_irq_enable(); } static void __cpuidle r39xx_wait(void) { if (!need_resched()) write_c0_conf(read_c0_conf() | TX39_CONF_HALT); - local_irq_enable(); + raw_local_irq_enable(); } void __cpuidle r4k_wait(void) { - local_irq_enable(); + raw_local_irq_enable(); __r4k_wait(); } @@ -64,7 +64,7 @@ void __cpuidle r4k_wait_irqoff(void) " .set arch=r4000 \n" " wait \n" " .set pop \n"); - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -84,7 +84,7 @@ static void __cpuidle rm7k_wait_irqoff(void) " wait \n" " mtc0 $1, $12 # stalls until W stage \n" " .set pop \n"); - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -257,7 +257,7 @@ void arch_cpu_idle(void) if (cpu_wait) cpu_wait(); else - local_irq_enable(); + raw_local_irq_enable(); } #ifdef CONFIG_CPU_IDLE diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c index 4ffe857e6ada..50b4eb19a6cc 100644 --- a/arch/nios2/kernel/process.c +++ b/arch/nios2/kernel/process.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(pm_power_off); void arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); } /* diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 0ff391f00334..3c98728cce24 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -79,7 +79,7 @@ void machine_power_off(void) */ void arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); if (mfspr(SPR_UPR) & SPR_UPR_PMP) mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME); } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index f196d96e2f9f..a92a23d6acd9 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -169,7 +169,7 @@ void __cpuidle arch_cpu_idle_dead(void) void __cpuidle arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); /* nop on real hardware, qemu will idle sleep. */ asm volatile("or %%r10,%%r10,%%r10\n":::); diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index ae0e2632393d..1f835539fda4 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -52,9 +52,9 @@ void arch_cpu_idle(void) * interrupts enabled, some don't. */ if (irqs_disabled()) - local_irq_enable(); + raw_local_irq_enable(); } else { - local_irq_enable(); + raw_local_irq_enable(); /* * Go into low thread priority and possibly * low power mode. diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 19225ec65db6..dd5f985b1f40 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -36,7 +36,7 @@ extern asmlinkage void ret_from_kernel_thread(void); void arch_cpu_idle(void) { wait_for_interrupt(); - local_irq_enable(); + raw_local_irq_enable(); } void show_regs(struct pt_regs *regs) diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index f7f1e64e0d98..2b85096964f8 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -33,10 +33,10 @@ void enabled_wait(void) PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; clear_cpu_flag(CIF_NOHZ_DELAY); - local_irq_save(flags); + raw_local_irq_save(flags); /* Call the assembler magic in entry.S */ psw_idle(idle, psw_mask); - local_irq_restore(flags); + raw_local_irq_restore(flags); /* Account time spent with enabled wait psw loaded as idle time. */ raw_write_seqcount_begin(&idle->seqcount); @@ -123,7 +123,7 @@ void arch_cpu_idle_enter(void) void arch_cpu_idle(void) { enabled_wait(); - local_irq_enable(); + raw_local_irq_enable(); } void arch_cpu_idle_exit(void) diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 0dc0f52f9bb8..f59814983bd5 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -22,7 +22,7 @@ static void (*sh_idle)(void); void default_idle(void) { set_bl_bit(); - local_irq_enable(); + raw_local_irq_enable(); /* Isn't this racy ? */ cpu_sleep(); clear_bl_bit(); diff --git a/arch/sparc/kernel/leon_pmc.c b/arch/sparc/kernel/leon_pmc.c index 065e2d4b7290..396f46bca52e 100644 --- a/arch/sparc/kernel/leon_pmc.c +++ b/arch/sparc/kernel/leon_pmc.c @@ -50,7 +50,7 @@ static void pmc_leon_idle_fixup(void) register unsigned int address = (unsigned int)leon3_irqctrl_regs; /* Interrupts need to be enabled to not hang the CPU */ - local_irq_enable(); + raw_local_irq_enable(); __asm__ __volatile__ ( "wr %%g0, %%asr19\n" @@ -66,7 +66,7 @@ static void pmc_leon_idle_fixup(void) static void pmc_leon_idle(void) { /* Interrupts need to be enabled to not hang the CPU */ - local_irq_enable(); + raw_local_irq_enable(); /* For systems without power-down, this will be no-op */ __asm__ __volatile__ ("wr %g0, %asr19\n\t"); diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index adfcaeab3ddc..a02363735915 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -74,7 +74,7 @@ void arch_cpu_idle(void) { if (sparc_idle) (*sparc_idle)(); - local_irq_enable(); + raw_local_irq_enable(); } /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */ diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index a75093b993f9..6f8c7822fc06 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -62,11 +62,11 @@ void arch_cpu_idle(void) { if (tlb_type != hypervisor) { touch_nmi_watchdog(); - local_irq_enable(); + raw_local_irq_enable(); } else { unsigned long pstate; - local_irq_enable(); + raw_local_irq_enable(); /* The sun4v sleeping code requires that we have PSTATE.IE cleared over * the cpu sleep hypervisor call. diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 3bed09538dd9..9505a7e87396 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -217,7 +217,7 @@ void arch_cpu_idle(void) { cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); um_idle_sleep(); - local_irq_enable(); + raw_local_irq_enable(); } int __cant_sleep(void) { diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index e039a933aca3..29dd27b5a339 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { - trace_hardirqs_on(); - mds_idle_clear_cpu_buffers(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ba4593a913fa..145a7ac0c19a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -685,7 +685,7 @@ void arch_cpu_idle(void) */ void __cpuidle default_idle(void) { - safe_halt(); + raw_safe_halt(); } #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) EXPORT_SYMBOL(default_idle); @@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy) /* * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power * states (local apic timer and TSC stop). + * + * XXX this function is completely buggered vs RCU and tracing. */ static void amd_e400_idle(void) { @@ -757,9 +759,9 @@ static void amd_e400_idle(void) * The switch back from broadcast mode needs to be called with * interrupts disabled. */ - local_irq_disable(); + raw_local_irq_disable(); tick_broadcast_exit(); - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void) if (!need_resched()) __sti_mwait(0, 0); else - local_irq_enable(); + raw_local_irq_enable(); } else { - local_irq_enable(); + raw_local_irq_enable(); } __current_clr_polling(); } diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 24d0ee26377d..c6932b8f4467 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -78,7 +78,7 @@ void __weak arch_cpu_idle_dead(void) { } void __weak arch_cpu_idle(void) { cpu_idle_force_poll = 1; - local_irq_enable(); + raw_local_irq_enable(); } /** @@ -94,9 +94,35 @@ void __cpuidle default_idle_call(void) trace_cpu_idle(1, smp_processor_id()); stop_critical_timings(); + + /* + * arch_cpu_idle() is supposed to enable IRQs, however + * we can't do that because of RCU and tracing. + * + * Trace IRQs enable here, then switch off RCU, and have + * arch_cpu_idle() use raw_local_irq_enable(). Note that + * rcu_idle_enter() relies on lockdep IRQ state, so switch that + * last -- this is very similar to the entry code. + */ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(_THIS_IP_); rcu_idle_enter(); + lockdep_hardirqs_on(_THIS_IP_); + arch_cpu_idle(); + + /* + * OK, so IRQs are enabled here, but RCU needs them disabled to + * turn itself back on.. funny thing is that disabling IRQs + * will cause tracing, which needs RCU. Jump through hoops to + * make it 'work'. + */ + raw_local_irq_disable(); + lockdep_hardirqs_off(_THIS_IP_); rcu_idle_exit(); + lockdep_hardirqs_on(_THIS_IP_); + raw_local_irq_enable(); + start_critical_timings(); trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } From 6e1d2bc675bd57640f5658a4a657ae488db4c204 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2020 11:28:35 +0100 Subject: [PATCH 041/167] intel_idle: Fix intel_idle() vs tracing cpuidle->enter() callbacks should not call into tracing because RCU has already been disabled. Instead of doing the broadcast thing itself, simply advertise to the cpuidle core that those states stop the timer. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Rafael J. Wysocki Link: https://lkml.kernel.org/r/20201123143510.GR3021@hirez.programming.kicks-ass.net --- drivers/idle/intel_idle.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 01bace49a962..7ee7ffe22ae3 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -126,26 +126,9 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state = &drv->states[index]; unsigned long eax = flg2MWAIT(state->flags); unsigned long ecx = 1; /* break on interrupt flag */ - bool tick; - - if (!static_cpu_has(X86_FEATURE_ARAT)) { - /* - * Switch over to one-shot tick broadcast if the target C-state - * is deeper than C1. - */ - if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) { - tick = true; - tick_broadcast_enter(); - } else { - tick = false; - } - } mwait_idle_with_hints(eax, ecx); - if (!static_cpu_has(X86_FEATURE_ARAT) && tick) - tick_broadcast_exit(); - return index; } @@ -1227,6 +1210,20 @@ static bool __init intel_idle_acpi_cst_extract(void) return false; } +static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) +{ + unsigned long eax = flg2MWAIT(state->flags); + + if (boot_cpu_has(X86_FEATURE_ARAT)) + return false; + + /* + * Switch over to one-shot tick broadcast if the target C-state + * is deeper than C1. + */ + return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); +} + static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); @@ -1269,6 +1266,9 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) if (disabled_states_mask & BIT(cstate)) state->flags |= CPUIDLE_FLAG_OFF; + if (intel_idle_state_needs_timer_stop(state)) + state->flags |= CPUIDLE_FLAG_TIMER_STOP; + state->enter = intel_idle; state->enter_s2idle = intel_idle_s2idle; } @@ -1507,6 +1507,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; + if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count])) + drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP; + drv->state_count++; } From 36ccdf85829a7dd6936dba5d02fa50138471f0d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 23 Nov 2020 18:56:00 +0100 Subject: [PATCH 042/167] net, xsk: Avoid taking multiple skbuff references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY") addressed the problem that packets were discarded from the Tx AF_XDP ring, when the driver returned NETDEV_TX_BUSY. Part of the fix was bumping the skbuff reference count, so that the buffer would not be freed by dev_direct_xmit(). A reference count larger than one means that the skbuff is "shared", which is not the case. If the "shared" skbuff is sent to the generic XDP receive path, netif_receive_generic_xdp(), and pskb_expand_head() is entered the BUG_ON(skb_shared(skb)) will trigger. This patch adds a variant to dev_direct_xmit(), __dev_direct_xmit(), where a user can select the skbuff free policy. This allows AF_XDP to avoid bumping the reference count, but still keep the NETDEV_TX_BUSY behavior. Fixes: 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY") Reported-by: Yonghong Song Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201123175600.146255-1-bjorn.topel@gmail.com --- include/linux/netdevice.h | 14 +++++++++++++- net/core/dev.c | 8 ++------ net/xdp/xsk.c | 8 +------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 964b494b0e8d..76775abf259d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2813,9 +2813,21 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); + int dev_queue_xmit(struct sk_buff *skb); int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); -int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id); + +static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) +{ + int ret; + + ret = __dev_direct_xmit(skb, queue_id); + if (!dev_xmit_complete(ret)) + kfree_skb(skb); + return ret; +} + int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); void unregister_netdevice_many(struct list_head *head); diff --git a/net/core/dev.c b/net/core/dev.c index 82dc6b48e45f..8588ade790cb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4180,7 +4180,7 @@ int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev) } EXPORT_SYMBOL(dev_queue_xmit_accel); -int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id) { struct net_device *dev = skb->dev; struct sk_buff *orig_skb = skb; @@ -4210,17 +4210,13 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) dev_xmit_recursion_dec(); local_bh_enable(); - - if (!dev_xmit_complete(ret)) - kfree_skb(skb); - return ret; drop: atomic_long_inc(&dev->tx_dropped); kfree_skb_list(skb); return NET_XMIT_DROP; } -EXPORT_SYMBOL(dev_direct_xmit); +EXPORT_SYMBOL(__dev_direct_xmit); /************************************************************************* * Receiver routines diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 5a6cdf7b320d..b7b039bd9d03 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -411,11 +411,7 @@ static int xsk_generic_xmit(struct sock *sk) skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; skb->destructor = xsk_destruct_skb; - /* Hinder dev_direct_xmit from freeing the packet and - * therefore completing it in the destructor - */ - refcount_inc(&skb->users); - err = dev_direct_xmit(skb, xs->queue_id); + err = __dev_direct_xmit(skb, xs->queue_id); if (err == NETDEV_TX_BUSY) { /* Tell user-space to retry the send */ skb->destructor = sock_wfree; @@ -429,12 +425,10 @@ static int xsk_generic_xmit(struct sock *sk) /* Ignore NET_XMIT_CN as packet might have been sent */ if (err == NET_XMIT_DROP) { /* SKB completed but not sent */ - kfree_skb(skb); err = -EBUSY; goto out; } - consume_skb(skb); sent_frame = true; } From 68878a5c5b852d17f5827ce8a0f6fbd8b4cdfada Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 24 Nov 2020 18:41:00 +0800 Subject: [PATCH 043/167] bpftool: Fix error return value in build_btf_type_table An appropriate return value should be set on the failed path. Fixes: 4d374ba0bf30 ("tools: bpftool: implement "bpftool btf show|list"") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201124104100.491-1-thunder.leizhen@huawei.com --- tools/bpf/bpftool/btf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 8ab142ff5eac..2afb7d5b1aca 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -693,6 +693,7 @@ build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, obj_node = calloc(1, sizeof(*obj_node)); if (!obj_node) { p_err("failed to allocate memory: %s", strerror(errno)); + err = -ENOMEM; goto err_free; } From 16e6281b6b22b0178eab95c6a82502d7b10f67b8 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 22 Nov 2020 18:10:24 -0500 Subject: [PATCH 044/167] gfs2: Fix deadlock dumping resource group glocks Commit 0e539ca1bbbe ("gfs2: Fix NULL pointer dereference in gfs2_rgrp_dump") introduced additional locking in gfs2_rgrp_go_dump, which is also used for dumping resource group glocks via debugfs. However, on that code path, the glock spin lock is already taken in dump_glock, and taking it again in gfs2_glock2rgrp leads to deadlock. This can be reproduced with: $ mkfs.gfs2 -O -p lock_nolock /dev/FOO $ mount /dev/FOO /mnt/foo $ touch /mnt/foo/bar $ cat /sys/kernel/debug/gfs2/FOO/glocks Fix that by not taking the glock spin lock inside the go_dump callback. Fixes: 0e539ca1bbbe ("gfs2: Fix NULL pointer dereference in gfs2_rgrp_dump") Signed-off-by: Alexander Aring Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 67f2921ae8d4..6cedeefb7b3f 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -245,7 +245,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags) static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl, const char *fs_id_buf) { - struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); + struct gfs2_rgrpd *rgd = gl->gl_object; if (rgd) gfs2_rgrp_dump(seq, rgd, fs_id_buf); From 515b269d5bd29a986d5e1c0a0cba87fa865a48b4 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 23 Nov 2020 10:53:35 -0500 Subject: [PATCH 045/167] gfs2: set lockdep subclass for iopen glocks This patch introduce a new globs attribute to define the subclass of the glock lockref spinlock. This avoid the following lockdep warning, which occurs when we lock an inode lock while an iopen lock is held: ============================================ WARNING: possible recursive locking detected 5.10.0-rc3+ #4990 Not tainted -------------------------------------------- kworker/0:1/12 is trying to acquire lock: ffff9067d45672d8 (&gl->gl_lockref.lock){+.+.}-{3:3}, at: lockref_get+0x9/0x20 but task is already holding lock: ffff9067da308588 (&gl->gl_lockref.lock){+.+.}-{3:3}, at: delete_work_func+0x164/0x260 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&gl->gl_lockref.lock); lock(&gl->gl_lockref.lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by kworker/0:1/12: #0: ffff9067c1bfdd38 ((wq_completion)delete_workqueue){+.+.}-{0:0}, at: process_one_work+0x1b7/0x540 #1: ffffac594006be70 ((work_completion)(&(&gl->gl_delete)->work)){+.+.}-{0:0}, at: process_one_work+0x1b7/0x540 #2: ffff9067da308588 (&gl->gl_lockref.lock){+.+.}-{3:3}, at: delete_work_func+0x164/0x260 stack backtrace: CPU: 0 PID: 12 Comm: kworker/0:1 Not tainted 5.10.0-rc3+ #4990 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 Workqueue: delete_workqueue delete_work_func Call Trace: dump_stack+0x8b/0xb0 __lock_acquire.cold+0x19e/0x2e3 lock_acquire+0x150/0x410 ? lockref_get+0x9/0x20 _raw_spin_lock+0x27/0x40 ? lockref_get+0x9/0x20 lockref_get+0x9/0x20 delete_work_func+0x188/0x260 process_one_work+0x237/0x540 worker_thread+0x4d/0x3b0 ? process_one_work+0x540/0x540 kthread+0x127/0x140 ? __kthread_bind_mask+0x60/0x60 ret_from_fork+0x22/0x30 Suggested-by: Andreas Gruenbacher Signed-off-by: Alexander Aring Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 1 + fs/gfs2/glops.c | 1 + fs/gfs2/incore.h | 1 + 3 files changed, 3 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d98a2e5dab9f..35a6fd103761 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1035,6 +1035,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_node.next = NULL; gl->gl_flags = 0; gl->gl_name = name; + lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); gl->gl_lockref.count = 1; gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 6cedeefb7b3f..e2cfc00ab936 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -781,6 +781,7 @@ const struct gfs2_glock_operations gfs2_iopen_glops = { .go_callback = iopen_go_callback, .go_demote_ok = iopen_go_demote_ok, .go_flags = GLOF_LRU | GLOF_NONDISK, + .go_subclass = 1, }; const struct gfs2_glock_operations gfs2_flock_glops = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index d7707307f4b1..f8858d995b24 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -247,6 +247,7 @@ struct gfs2_glock_operations { const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); void (*go_free)(struct gfs2_glock *gl); + const int go_subclass; const int go_type; const unsigned long go_flags; #define GLOF_ASPACE 1 /* address space attached */ From b4fffc177fad3c99ee049611a508ca9561bb6871 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 20 Nov 2020 09:50:59 -0600 Subject: [PATCH 046/167] vhost scsi: fix lun reset completion handling vhost scsi owns the scsi se_cmd but lio frees the se_cmd->se_tmr before calling release_cmd, so while with normal cmd completion we can access the se_cmd from the vhost work, we can't do the same with se_cmd->se_tmr. This has us copy the tmf response in vhost_scsi_queue_tm_rsp to our internal vhost-scsi tmf struct for when it gets sent to the guest from our worker thread. Fixes: efd838fec17b ("vhost scsi: Add support for LUN resets.") Signed-off-by: Mike Christie Acked-by: Stefan Hajnoczi Link: https://lore.kernel.org/r/1605887459-3864-1-git-send-email-michael.christie@oracle.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index f22fce549862..6ff8a5096691 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -220,6 +220,7 @@ struct vhost_scsi_tmf { struct list_head queue_entry; struct se_cmd se_cmd; + u8 scsi_resp; struct vhost_scsi_inflight *inflight; struct iovec resp_iov; int in_iovs; @@ -426,6 +427,7 @@ static void vhost_scsi_queue_tm_rsp(struct se_cmd *se_cmd) struct vhost_scsi_tmf *tmf = container_of(se_cmd, struct vhost_scsi_tmf, se_cmd); + tmf->scsi_resp = se_cmd->se_tmr_req->response; transport_generic_free_cmd(&tmf->se_cmd, 0); } @@ -1183,7 +1185,7 @@ static void vhost_scsi_tmf_resp_work(struct vhost_work *work) vwork); int resp_code; - if (tmf->se_cmd.se_tmr_req->response == TMR_FUNCTION_COMPLETE) + if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE) resp_code = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED; else resp_code = VIRTIO_SCSI_S_FUNCTION_REJECTED; From 8009b0f4ab3151f3b8c1675ceb0f9151f09dddaa Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 16 Nov 2020 17:16:53 +0100 Subject: [PATCH 047/167] vringh: fix vringh_iov_push_*() documentation vringh_iov_push_*() functions don't have 'dst' parameter, but have the 'src' parameter. Replace 'dst' description with 'src' description. Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201116161653.102904-1-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vringh.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 8bd8b403f087..b7403ba8e7f7 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -730,7 +730,7 @@ EXPORT_SYMBOL(vringh_iov_pull_user); /** * vringh_iov_push_user - copy bytes into vring_iov. * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume) - * @dst: the place to copy. + * @src: the place to copy from. * @len: the maximum length to copy. * * Returns the bytes copied <= len or a negative errno. @@ -976,7 +976,7 @@ EXPORT_SYMBOL(vringh_iov_pull_kern); /** * vringh_iov_push_kern - copy bytes into vring_iov. * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume) - * @dst: the place to copy. + * @src: the place to copy from. * @len: the maximum length to copy. * * Returns the bytes copied <= len or a negative errno. @@ -1333,7 +1333,7 @@ EXPORT_SYMBOL(vringh_iov_pull_iotlb); * vringh_iov_push_iotlb - copy bytes into vring_iov. * @vrh: the vring. * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume) - * @dst: the place to copy. + * @src: the place to copy from. * @len: the maximum length to copy. * * Returns the bytes copied <= len or a negative errno. From ad89653f79f1882d55d9df76c9b2b94f008c4e27 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Thu, 5 Nov 2020 18:26:33 -0500 Subject: [PATCH 048/167] vhost-vdpa: fix page pinning leakage in error path (rework) Pinned pages are not properly accounted particularly when mapping error occurs on IOTLB update. Clean up dangling pinned pages for the error path. The memory usage for bookkeeping pinned pages is reverted to what it was before: only one single free page is needed. This helps reduce the host memory demand for VM with a large amount of memory, or in the situation where host is running short of free memory. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Si-Wei Liu Link: https://lore.kernel.org/r/1604618793-4681-1-git-send-email-si-wei.liu@oracle.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/vdpa.c | 80 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 18 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 2754f3069738..f2db99031e2f 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -577,6 +577,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, if (r) vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); + else + atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); return r; } @@ -608,8 +610,9 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, unsigned long list_size = PAGE_SIZE / sizeof(struct page *); unsigned int gup_flags = FOLL_LONGTERM; unsigned long npages, cur_base, map_pfn, last_pfn = 0; - unsigned long locked, lock_limit, pinned, i; + unsigned long lock_limit, sz2pin, nchunks, i; u64 iova = msg->iova; + long pinned; int ret = 0; if (msg->iova < v->range.first || @@ -620,6 +623,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, msg->iova + msg->size - 1)) return -EEXIST; + /* Limit the use of memory for bookkeeping */ page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) return -ENOMEM; @@ -628,52 +632,75 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, gup_flags |= FOLL_WRITE; npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; - if (!npages) - return -EINVAL; + if (!npages) { + ret = -EINVAL; + goto free; + } mmap_read_lock(dev->mm); - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (locked > lock_limit) { + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { ret = -ENOMEM; - goto out; + goto unlock; } cur_base = msg->uaddr & PAGE_MASK; iova &= PAGE_MASK; + nchunks = 0; while (npages) { - pinned = min_t(unsigned long, npages, list_size); - ret = pin_user_pages(cur_base, pinned, - gup_flags, page_list, NULL); - if (ret != pinned) + sz2pin = min_t(unsigned long, npages, list_size); + pinned = pin_user_pages(cur_base, sz2pin, + gup_flags, page_list, NULL); + if (sz2pin != pinned) { + if (pinned < 0) { + ret = pinned; + } else { + unpin_user_pages(page_list, pinned); + ret = -ENOMEM; + } goto out; + } + nchunks++; if (!last_pfn) map_pfn = page_to_pfn(page_list[0]); - for (i = 0; i < ret; i++) { + for (i = 0; i < pinned; i++) { unsigned long this_pfn = page_to_pfn(page_list[i]); u64 csize; if (last_pfn && (this_pfn != last_pfn + 1)) { /* Pin a contiguous chunk of memory */ csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; - if (vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm)) + ret = vhost_vdpa_map(v, iova, csize, + map_pfn << PAGE_SHIFT, + msg->perm); + if (ret) { + /* + * Unpin the pages that are left unmapped + * from this point on in the current + * page_list. The remaining outstanding + * ones which may stride across several + * chunks will be covered in the common + * error path subsequently. + */ + unpin_user_pages(&page_list[i], + pinned - i); goto out; + } + map_pfn = this_pfn; iova += csize; + nchunks = 0; } last_pfn = this_pfn; } - cur_base += ret << PAGE_SHIFT; - npages -= ret; + cur_base += pinned << PAGE_SHIFT; + npages -= pinned; } /* Pin the rest chunk */ @@ -681,10 +708,27 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, map_pfn << PAGE_SHIFT, msg->perm); out: if (ret) { + if (nchunks) { + unsigned long pfn; + + /* + * Unpin the outstanding pages which are yet to be + * mapped but haven't due to vdpa_map() or + * pin_user_pages() failure. + * + * Mapped pages are accounted in vdpa_map(), hence + * the corresponding unpinning will be handled by + * vdpa_unmap(). + */ + WARN_ON(!last_pfn); + for (pfn = map_pfn; pfn <= last_pfn; pfn++) + unpin_user_page(pfn_to_page(pfn)); + } vhost_vdpa_unmap(v, msg->iova, msg->size); - atomic64_sub(npages, &dev->mm->pinned_vm); } +unlock: mmap_read_unlock(dev->mm); +free: free_page((unsigned long)page_list); return ret; } From 3fba05a2832f93b4d0cd4204f771fdae0d823114 Mon Sep 17 00:00:00 2001 From: Luo Meng Date: Mon, 23 Nov 2020 21:38:39 +0800 Subject: [PATCH 049/167] ASoC: wm_adsp: fix error return code in wm_adsp_load() Fix to return a negative error code from the error handling case instead of 0 in function wm_adsp_load(), as done elsewhere in this function. Fixes: 170b1e123f38 ("ASoC: wm_adsp: Add support for new Halo core DSPs") Reported-by: Hulk Robot Signed-off-by: Luo Meng Acked-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20201123133839.4073787-1-luomeng12@huawei.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index bcf18bf15a02..e61d00486c65 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1937,6 +1937,7 @@ static int wm_adsp_load(struct wm_adsp *dsp) mem = wm_adsp_find_region(dsp, type); if (!mem) { adsp_err(dsp, "No region of type: %x\n", type); + ret = -EINVAL; goto out_fw; } From 50bdcf047503e30126327d0be4f0ad7337106d68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Mon, 23 Nov 2020 12:28:17 -0500 Subject: [PATCH 050/167] efi/efivars: Set generic ops before loading SSDT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Efivars allows for overriding of SSDT tables, however starting with commit bf67fad19e493b ("efi: Use more granular check for availability for variable services") this use case is broken. When loading SSDT generic ops should be set first, however mentioned commit reversed order of operations. Fix this by restoring original order of operations. Fixes: bf67fad19e493b ("efi: Use more granular check for availability for variable services") Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20201123172817.124146-1-amadeuszx.slawinski@linux.intel.com Tested-by: Cezary Rojewski Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 5e5480a0a32d..6c6eec044a97 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -390,10 +390,10 @@ static int __init efisubsys_init(void) if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE | EFI_RT_SUPPORTED_GET_NEXT_VARIABLE_NAME)) { - efivar_ssdt_load(); error = generic_ops_register(); if (error) goto err_put; + efivar_ssdt_load(); platform_device_register_simple("efivars", 0, NULL, 0); } From ff04f3b6f2e27f8ae28a498416af2a8dd5072b43 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 25 Nov 2020 08:45:55 +0100 Subject: [PATCH 051/167] efivarfs: revert "fix memory leak in efivarfs_create()" The memory leak addressed by commit fe5186cf12e3 is a false positive: all allocations are recorded in a linked list, and freed when the filesystem is unmounted. This leads to double frees, and as reported by David, leads to crashes if SLUB is configured to self destruct when double frees occur. So drop the redundant kfree() again, and instead, mark the offending pointer variable so the allocation is ignored by kmemleak. Cc: Vamshi K Sthambamkadi Fixes: fe5186cf12e3 ("efivarfs: fix memory leak in efivarfs_create()") Reported-by: David Laight Signed-off-by: Ard Biesheuvel --- fs/efivarfs/inode.c | 2 ++ fs/efivarfs/super.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 96c0c86f3fff..0297ad95eb5c 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -103,6 +104,7 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, var->var.VariableName[i] = '\0'; inode->i_private = var; + kmemleak_ignore(var); err = efivar_entry_add(var, &efivarfs_list); if (err) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index f943fd0b0699..15880a68faad 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -21,7 +21,6 @@ LIST_HEAD(efivarfs_list); static void efivarfs_evict_inode(struct inode *inode) { clear_inode(inode); - kfree(inode->i_private); } static const struct super_operations efivarfs_ops = { From 36a237526cd81ff4b6829e6ebd60921c6f976e3b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Nov 2020 20:16:46 +0100 Subject: [PATCH 052/167] efi: EFI_EARLYCON should depend on EFI CONFIG_EFI_EARLYCON defaults to yes, and thus is enabled on systems that do not support EFI, or do not have EFI support enabled, but do satisfy the symbol's other dependencies. While drivers/firmware/efi/ won't be entered during the build phase if CONFIG_EFI=n, and drivers/firmware/efi/earlycon.c itself thus won't be built, enabling EFI_EARLYCON does force-enable CONFIG_FONT_SUPPORT and CONFIG_ARCH_USE_MEMREMAP_PROT, and CONFIG_FONT_8x16, which is undesirable. Fix this by making CONFIG_EFI_EARLYCON depend on CONFIG_EFI. This reduces kernel size on headless systems by more than 4 KiB. Fixes: 69c1f396f25b805a ("efi/x86: Convert x86 EFI earlyprintk into generic earlycon implementation") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20201124191646.3559757-1-geert@linux-m68k.org Reviewed-by: Damien Le Moal Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 36ec1f718893..d9895491ff34 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -270,7 +270,7 @@ config EFI_DEV_PATH_PARSER config EFI_EARLYCON def_bool y - depends on SERIAL_EARLYCON && !ARM && !IA64 + depends on EFI && SERIAL_EARLYCON && !ARM && !IA64 select FONT_SUPPORT select ARCH_USE_MEMREMAP_PROT From 778721510e84209f78e31e2ccb296ae36d623f5e Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 24 Nov 2020 10:44:36 -0500 Subject: [PATCH 053/167] gfs2: check for empty rgrp tree in gfs2_ri_update If gfs2 tries to mount a (corrupt) file system that has no resource groups it still tries to set preferences on the first one, which causes a kernel null pointer dereference. This patch adds a check to function gfs2_ri_update so this condition is detected and reported back as an error. Reported-by: syzbot+e3f23ce40269a4c9053a@syzkaller.appspotmail.com Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f7addc6197ed..5e8eef9990e3 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -985,6 +985,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip) if (error < 0) return error; + if (RB_EMPTY_ROOT(&sdp->sd_rindex_tree)) { + fs_err(sdp, "no resource groups found in the file system.\n"); + return -ENOENT; + } set_rgrp_preferences(sdp); sdp->sd_rindex_uptodate = 1; From f39e7d3aae2934b1cfdd209b54c508e2552e9531 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 24 Nov 2020 10:41:40 -0600 Subject: [PATCH 054/167] gfs2: Don't freeze the file system during unmount GFS2's freeze/thaw mechanism uses a special freeze glock to control its operation. It does this with a sync glock operation (glops.c) called freeze_go_sync. When the freeze glock is demoted (glock's do_xmote) the glops function causes the file system to be frozen. This is intended. However, GFS2's mount and unmount processes also hold the freeze glock to prevent other processes, perhaps on different cluster nodes, from mounting the frozen file system in read-write mode. Before this patch, there was no check in freeze_go_sync for whether a freeze in intended or whether the glock demote was caused by a normal unmount. So it was trying to freeze the file system it's trying to unmount, which ends up in a deadlock. This patch adds an additional check to freeze_go_sync so that demotes of the freeze glock are ignored if they come from the unmount process. Fixes: 20b329129009 ("gfs2: Fix regression in freeze_go_sync") Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index e2cfc00ab936..3faa421568b0 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -582,7 +582,8 @@ static int freeze_go_sync(struct gfs2_glock *gl) * Once thawed, the work func acquires the freeze glock in * SH and everybody goes back to thawed. */ - if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp)) { + if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) && + !test_bit(SDF_NORECOVERY, &sdp->sd_flags)) { atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); error = freeze_super(sdp->sd_vfs); if (error) { From e553fdc8105ac2ef3f321739da3908bb6673f7de Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sun, 8 Nov 2020 13:37:37 -0700 Subject: [PATCH 055/167] riscv: Explicitly specify the build id style in vDSO Makefile again Commit a96843372331 ("kbuild: explicitly specify the build id style") explicitly set the build ID style to SHA1. Commit c2c81bb2f691 ("RISC-V: Fix the VDSO symbol generaton for binutils-2.35+") undid this change, likely unintentionally. Restore it so that the build ID style stays consistent across the tree regardless of linker. Fixes: c2c81bb2f691 ("RISC-V: Fix the VDSO symbol generaton for binutils-2.35+") Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Bill Wendling Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index cb8f9e4cfcbf..0cfd6da784f8 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -44,7 +44,7 @@ SYSCFLAGS_vdso.so.dbg = $(c_flags) $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold) SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ - -Wl,--build-id -Wl,--hash-style=both + -Wl,--build-id=sha1 -Wl,--hash-style=both # We also create a special relocatable object that should mirror the symbol # table and layout of the linked DSO. With ld --just-symbols we can then From 6134b110f97178d6919441a82dc91a7f3664b4e0 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 6 Nov 2020 13:23:59 +0530 Subject: [PATCH 056/167] RISC-V: Add missing jump label initialization The jump_label_init() should be called from setup_arch() very early for proper functioning of jump label support. Fixes: ebc00dde8a97 ("riscv: Add jump-label implementation") Signed-off-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index c424cc6dd833..117f3212a8e4 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -75,6 +75,7 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; early_ioremap_setup(); + jump_label_init(); parse_early_param(); efi_init(); From 30aca1bacb398dec6c1ed5eeca33f355bd7b6203 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 Nov 2020 17:39:51 -0800 Subject: [PATCH 057/167] RISC-V: fix barrier() use in riscv's uses barrier() so it should include Fixes this build error: CC [M] drivers/net/ethernet/emulex/benet/be_main.o In file included from ./include/vdso/processor.h:10, from ./arch/riscv/include/asm/processor.h:11, from ./include/linux/prefetch.h:15, from drivers/net/ethernet/emulex/benet/be_main.c:14: ./arch/riscv/include/asm/vdso/processor.h: In function 'cpu_relax': ./arch/riscv/include/asm/vdso/processor.h:14:2: error: implicit declaration of function 'barrier' [-Werror=implicit-function-declaration] 14 | barrier(); This happens with a total of 5 networking drivers -- they all use . rv64 allmodconfig now builds cleanly after this patch. Fixes fallout from: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") Fixes: ad5d1122b82f ("riscv: use vDSO common flow to reduce the latency of the time-related functions") Reported-by: Andreas Schwab Signed-off-by: Randy Dunlap Acked-by: Arvind Sankar Signed-off-by: Palmer Dabbelt Reviewed-by: Nick Desaulniers Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vdso/processor.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h index 82a5693b1861..134388cbaaa1 100644 --- a/arch/riscv/include/asm/vdso/processor.h +++ b/arch/riscv/include/asm/vdso/processor.h @@ -4,6 +4,8 @@ #ifndef __ASSEMBLY__ +#include + static inline void cpu_relax(void) { #ifdef __riscv_muldiv From 33fc379df76b4991e5ae312f07bcd6820811971e Mon Sep 17 00:00:00 2001 From: Anand K Mistry Date: Tue, 10 Nov 2020 12:33:53 +1100 Subject: [PATCH 058/167] x86/speculation: Fix prctl() when spectre_v2_user={seccomp,prctl},ibpb When spectre_v2_user={seccomp,prctl},ibpb is specified on the command line, IBPB is force-enabled and STIPB is conditionally-enabled (or not available). However, since 21998a351512 ("x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced IBRS.") the spectre_v2_user_ibpb variable is set to SPECTRE_V2_USER_{PRCTL,SECCOMP} instead of SPECTRE_V2_USER_STRICT, which is the actual behaviour. Because the issuing of IBPB relies on the switch_mm_*_ibpb static branches, the mitigations behave as expected. Since 1978b3a53a74 ("x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP") this discrepency caused the misreporting of IB speculation via prctl(). On CPUs with STIBP always-on and spectre_v2_user=seccomp,ibpb, prctl(PR_GET_SPECULATION_CTRL) would return PR_SPEC_PRCTL | PR_SPEC_ENABLE instead of PR_SPEC_DISABLE since both IBPB and STIPB are always on. It also allowed prctl(PR_SET_SPECULATION_CTRL) to set the IB speculation mode, even though the flag is ignored. Similarly, for CPUs without SMT, prctl(PR_GET_SPECULATION_CTRL) should also return PR_SPEC_DISABLE since IBPB is always on and STIBP is not available. [ bp: Massage commit message. ] Fixes: 21998a351512 ("x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced IBRS.") Fixes: 1978b3a53a74 ("x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP") Signed-off-by: Anand K Mistry Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20201110123349.1.Id0cbf996d2151f4c143c90f9028651a5b49a5908@changeid --- arch/x86/kernel/cpu/bugs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 581fb7223ad0..d41b70fe4918 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -739,11 +739,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + spectre_v2_user_ibpb = mode; switch (cmd) { case SPECTRE_V2_USER_CMD_FORCE: case SPECTRE_V2_USER_CMD_PRCTL_IBPB: case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: static_branch_enable(&switch_mm_always_ibpb); + spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_PRCTL: case SPECTRE_V2_USER_CMD_AUTO: @@ -757,8 +759,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", static_key_enabled(&switch_mm_always_ibpb) ? "always-on" : "conditional"); - - spectre_v2_user_ibpb = mode; } /* From 9a44bc9449cfe7e39dbadf537ff669fb007a9e63 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 25 Nov 2020 20:24:04 +0000 Subject: [PATCH 059/167] bpf: Add MAINTAINERS entry for BPF LSM Similar to XDP and some JITs, also add Brendan and Florent who have been reviewing all my patches internally as reviewers. The patches are expected as usual to go via the BPF tree(s) / list / merge workflows. Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201125202404.1419509-1-kpsingh@chromium.org --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 15355c055a1a..3004f0abfe7d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3356,6 +3356,17 @@ S: Supported F: arch/x86/net/ X: arch/x86/net/bpf_jit_comp32.c +BPF LSM (Security Audit and Enforcement using BPF) +M: KP Singh +R: Florent Revest +R: Brendan Jackman +L: bpf@vger.kernel.org +S: Maintained +F: Documentation/bpf/bpf_lsm.rst +F: include/linux/bpf_lsm.h +F: kernel/bpf/bpf_lsm.c +F: security/bpf/ + BROADCOM B44 10/100 ETHERNET DRIVER M: Michael Chan L: netdev@vger.kernel.org From 68ad89de918e1c5a79c9c56127e5e31741fd517e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Nov 2020 01:59:32 -0800 Subject: [PATCH 060/167] netfilter: ipset: prevent uninit-value in hash_ip6_add syzbot found that we are not validating user input properly before copying 16 bytes [1]. Using NLA_BINARY in ipaddr_policy[] for IPv6 address is not correct, since it ensures at most 16 bytes were provided. We should instead make sure user provided exactly 16 bytes. In old kernels (before v4.20), fix would be to remove the NLA_BINARY, since NLA_POLICY_EXACT_LEN() was not yet available. [1] BUG: KMSAN: uninit-value in hash_ip6_add+0x1cba/0x3a50 net/netfilter/ipset/ip_set_hash_gen.h:892 CPU: 1 PID: 11611 Comm: syz-executor.0 Not tainted 5.10.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x21c/0x280 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x5f/0xa0 mm/kmsan/kmsan_instr.c:197 hash_ip6_add+0x1cba/0x3a50 net/netfilter/ipset/ip_set_hash_gen.h:892 hash_ip6_uadt+0x976/0xbd0 net/netfilter/ipset/ip_set_hash_ip.c:267 call_ad+0x329/0xd00 net/netfilter/ipset/ip_set_core.c:1720 ip_set_ad+0x111f/0x1440 net/netfilter/ipset/ip_set_core.c:1808 ip_set_uadd+0xf6/0x110 net/netfilter/ipset/ip_set_core.c:1833 nfnetlink_rcv_msg+0xc7d/0xdf0 net/netfilter/nfnetlink.c:252 netlink_rcv_skb+0x70a/0x820 net/netlink/af_netlink.c:2494 nfnetlink_rcv+0x4f0/0x4380 net/netfilter/nfnetlink.c:600 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x11da/0x14b0 net/netlink/af_netlink.c:1330 netlink_sendmsg+0x173c/0x1840 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] ____sys_sendmsg+0xc7a/0x1240 net/socket.c:2353 ___sys_sendmsg net/socket.c:2407 [inline] __sys_sendmsg+0x6d5/0x830 net/socket.c:2440 __do_sys_sendmsg net/socket.c:2449 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2447 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2447 do_syscall_64+0x9f/0x140 arch/x86/entry/common.c:48 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45deb9 Code: 0d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fe2e503fc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000029ec0 RCX: 000000000045deb9 RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000003 RBP: 000000000118bf60 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000118bf2c R13: 000000000169fb7f R14: 00007fe2e50409c0 R15: 000000000118bf2c Uninit was stored to memory at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:121 [inline] kmsan_internal_chain_origin+0xad/0x130 mm/kmsan/kmsan.c:289 __msan_chain_origin+0x57/0xa0 mm/kmsan/kmsan_instr.c:147 ip6_netmask include/linux/netfilter/ipset/pfxlen.h:49 [inline] hash_ip6_netmask net/netfilter/ipset/ip_set_hash_ip.c:185 [inline] hash_ip6_uadt+0xb1c/0xbd0 net/netfilter/ipset/ip_set_hash_ip.c:263 call_ad+0x329/0xd00 net/netfilter/ipset/ip_set_core.c:1720 ip_set_ad+0x111f/0x1440 net/netfilter/ipset/ip_set_core.c:1808 ip_set_uadd+0xf6/0x110 net/netfilter/ipset/ip_set_core.c:1833 nfnetlink_rcv_msg+0xc7d/0xdf0 net/netfilter/nfnetlink.c:252 netlink_rcv_skb+0x70a/0x820 net/netlink/af_netlink.c:2494 nfnetlink_rcv+0x4f0/0x4380 net/netfilter/nfnetlink.c:600 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x11da/0x14b0 net/netlink/af_netlink.c:1330 netlink_sendmsg+0x173c/0x1840 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] ____sys_sendmsg+0xc7a/0x1240 net/socket.c:2353 ___sys_sendmsg net/socket.c:2407 [inline] __sys_sendmsg+0x6d5/0x830 net/socket.c:2440 __do_sys_sendmsg net/socket.c:2449 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2447 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2447 do_syscall_64+0x9f/0x140 arch/x86/entry/common.c:48 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Uninit was stored to memory at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:121 [inline] kmsan_internal_chain_origin+0xad/0x130 mm/kmsan/kmsan.c:289 kmsan_memcpy_memmove_metadata+0x25e/0x2d0 mm/kmsan/kmsan.c:226 kmsan_memcpy_metadata+0xb/0x10 mm/kmsan/kmsan.c:246 __msan_memcpy+0x46/0x60 mm/kmsan/kmsan_instr.c:110 ip_set_get_ipaddr6+0x2cb/0x370 net/netfilter/ipset/ip_set_core.c:310 hash_ip6_uadt+0x439/0xbd0 net/netfilter/ipset/ip_set_hash_ip.c:255 call_ad+0x329/0xd00 net/netfilter/ipset/ip_set_core.c:1720 ip_set_ad+0x111f/0x1440 net/netfilter/ipset/ip_set_core.c:1808 ip_set_uadd+0xf6/0x110 net/netfilter/ipset/ip_set_core.c:1833 nfnetlink_rcv_msg+0xc7d/0xdf0 net/netfilter/nfnetlink.c:252 netlink_rcv_skb+0x70a/0x820 net/netlink/af_netlink.c:2494 nfnetlink_rcv+0x4f0/0x4380 net/netfilter/nfnetlink.c:600 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x11da/0x14b0 net/netlink/af_netlink.c:1330 netlink_sendmsg+0x173c/0x1840 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] ____sys_sendmsg+0xc7a/0x1240 net/socket.c:2353 ___sys_sendmsg net/socket.c:2407 [inline] __sys_sendmsg+0x6d5/0x830 net/socket.c:2440 __do_sys_sendmsg net/socket.c:2449 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2447 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2447 do_syscall_64+0x9f/0x140 arch/x86/entry/common.c:48 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:121 [inline] kmsan_internal_poison_shadow+0x5c/0xf0 mm/kmsan/kmsan.c:104 kmsan_slab_alloc+0x8d/0xe0 mm/kmsan/kmsan_hooks.c:76 slab_alloc_node mm/slub.c:2906 [inline] __kmalloc_node_track_caller+0xc61/0x15f0 mm/slub.c:4512 __kmalloc_reserve net/core/skbuff.c:142 [inline] __alloc_skb+0x309/0xae0 net/core/skbuff.c:210 alloc_skb include/linux/skbuff.h:1094 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1176 [inline] netlink_sendmsg+0xdb8/0x1840 net/netlink/af_netlink.c:1894 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] ____sys_sendmsg+0xc7a/0x1240 net/socket.c:2353 ___sys_sendmsg net/socket.c:2407 [inline] __sys_sendmsg+0x6d5/0x830 net/socket.c:2440 __do_sys_sendmsg net/socket.c:2449 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2447 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2447 do_syscall_64+0x9f/0x140 arch/x86/entry/common.c:48 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: a7b4f989a629 ("netfilter: ipset: IP set core support") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 7cff6e5e7445..2b19189a930f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -271,8 +271,7 @@ flag_nested(const struct nlattr *nla) static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = { [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 }, - [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY, - .len = sizeof(struct in6_addr) }, + [IPSET_ATTR_IPADDR_IPV6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), }; int From c0700dfa2cae44c033ed97dade8a2679c7d22a9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 19 Nov 2020 16:34:54 +0100 Subject: [PATCH 061/167] netfilter: nf_tables: avoid false-postive lockdep splat There are reports wrt lockdep splat in nftables, e.g.: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 31416 at net/netfilter/nf_tables_api.c:622 lockdep_nfnl_nft_mutex_not_held+0x28/0x38 [nf_tables] ... These are caused by an earlier, unrelated bug such as a n ABBA deadlock in a different subsystem. In such an event, lockdep is disabled and lockdep_is_held returns true unconditionally. This then causes the WARN() in nf_tables. Make the WARN conditional on lockdep still active to avoid this. Fixes: f102d66b335a417 ("netfilter: nf_tables: use dedicated mutex to guard transactions") Reported-by: Naresh Kamboju Link: https://lore.kernel.org/linux-kselftest/CA+G9fYvFUpODs+NkSYcnwKnXm62tmP=ksLeBPmB+KFrB2rvCtQ@mail.gmail.com/ Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0f58e98542be..23abf1578594 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -619,7 +619,8 @@ static int nft_request_module(struct net *net, const char *fmt, ...) static void lockdep_nfnl_nft_mutex_not_held(void) { #ifdef CONFIG_PROVE_LOCKING - WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); + if (debug_locks) + WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); #endif } From c7acb6b9c07b4b75dffadc3b6466b1b43b3fda21 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 25 Nov 2020 15:35:19 -0700 Subject: [PATCH 062/167] MAINTAINERS: Adding help for coresight subsystem With the steady stream of new features coming into the subsystem it has been clear for some time now that help is needed. Suzuki and Leo have worked extensively on various parts of the project and have agreed to help. While at it add the new location for the coresight git tree. Acked-by: Leo Yan Acked-by : Suzuki K Poulose Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20201125223519.734388-1-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e451dcce054f..b9c823c08b57 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1723,11 +1723,13 @@ F: arch/arm/mach-ep93xx/micro9.c ARM/CORESIGHT FRAMEWORK AND DRIVERS M: Mathieu Poirier -R: Suzuki K Poulose +M: Suzuki K Poulose R: Mike Leach +R: Leo Yan L: coresight@lists.linaro.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git F: Documentation/ABI/testing/sysfs-bus-coresight-devices-* F: Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt F: Documentation/devicetree/bindings/arm/coresight-cti.yaml From aa4cb898b80a28a610e26d1513e6dd42d995c225 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Thu, 26 Nov 2020 17:27:59 +0800 Subject: [PATCH 063/167] ASoC: rt5682: change SAR voltage threshold To fix errors in some 4 poles headset detection cases, this patch adjusts the voltage threshold for mic detection. Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20201126092759.9427-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index a9acce7b6cca..d9878173ff89 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -43,6 +43,7 @@ static const struct reg_sequence patch_list[] = { {RT5682_DAC_ADC_DIG_VOL1, 0xa020}, {RT5682_I2C_CTRL, 0x000f}, {RT5682_PLL2_INTERNAL, 0x8266}, + {RT5682_SAR_IL_CMD_3, 0x8365}, }; void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev) From 82e938bd5382b322ce81e6cb8fd030987f2da022 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 25 Nov 2020 23:37:18 +0100 Subject: [PATCH 064/167] gfs2: Upgrade shared glocks for atime updates Commit 20f829999c38 ("gfs2: Rework read and page fault locking") lifted the glock lock taking from the low-level ->readpage and ->readahead address space operations to the higher-level ->read_iter file and ->fault vm operations. The glocks are still taken in LM_ST_SHARED mode only. On filesystems mounted without the noatime option, ->read_iter sometimes needs to update the atime as well, though. Right now, this leads to a failed locking mode assertion in gfs2_dirty_inode. Fix that by introducing a new update_time inode operation. There, if the glock is held non-exclusively, upgrade it to an exclusive lock. Reported-by: Alexander Aring Fixes: 20f829999c38 ("gfs2: Rework read and page fault locking") Cc: stable@vger.kernel.org # v5.8+ Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 077ccb1b3ccc..eed1a1bac6f6 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -2116,6 +2116,25 @@ loff_t gfs2_seek_hole(struct file *file, loff_t offset) return vfs_setpos(file, ret, inode->i_sb->s_maxbytes); } +static int gfs2_update_time(struct inode *inode, struct timespec64 *time, + int flags) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_glock *gl = ip->i_gl; + struct gfs2_holder *gh; + int error; + + gh = gfs2_glock_is_locked_by_me(gl); + if (gh && !gfs2_glock_is_held_excl(gl)) { + gfs2_glock_dq(gh); + gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, gh); + error = gfs2_glock_nq(gh); + if (error) + return error; + } + return generic_update_time(inode, time, flags); +} + const struct inode_operations gfs2_file_iops = { .permission = gfs2_permission, .setattr = gfs2_setattr, @@ -2124,6 +2143,7 @@ const struct inode_operations gfs2_file_iops = { .fiemap = gfs2_fiemap, .get_acl = gfs2_get_acl, .set_acl = gfs2_set_acl, + .update_time = gfs2_update_time, }; const struct inode_operations gfs2_dir_iops = { @@ -2143,6 +2163,7 @@ const struct inode_operations gfs2_dir_iops = { .fiemap = gfs2_fiemap, .get_acl = gfs2_get_acl, .set_acl = gfs2_set_acl, + .update_time = gfs2_update_time, .atomic_open = gfs2_atomic_open, }; From 4ca23e2c2074465bff55ea14221175fecdf63c5f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 26 Nov 2020 18:15:06 +0100 Subject: [PATCH 065/167] batman-adv: Consider fragmentation for needed_headroom MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a batman-adv packets has to be fragmented, then the original batman-adv packet header is not stripped away. Instead, only a new header is added in front of the packet after it was split. This size must be considered to avoid cost intensive reallocations during the transmission through the various device layers. Fixes: 7bca68c7844b ("batman-adv: Add lower layer needed_(head|tail)room to own ones") Reported-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/hard-interface.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index dad99641df2a..33904595fc56 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -554,6 +554,9 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface) needed_headroom = lower_headroom + (lower_header_len - ETH_HLEN); needed_headroom += batadv_max_header_len(); + /* fragmentation headers don't strip the unicast/... header */ + needed_headroom += sizeof(struct batadv_frag_packet); + soft_iface->needed_headroom = needed_headroom; soft_iface->needed_tailroom = lower_tailroom; } From c5cbfc87558168ef4c3c27ce36eba6b83391db19 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 25 Nov 2020 13:16:43 +0100 Subject: [PATCH 066/167] batman-adv: Reserve needed_*room for fragments The batadv net_device is trying to propagate the needed_headroom and needed_tailroom from the lower devices. This is needed to avoid cost intensive reallocations using pskb_expand_head during the transmission. But the fragmentation code split the skb's without adding extra room at the end/beginning of the various fragments. This reduced the performance of transmissions over complex scenarios (batadv on vxlan on wireguard) because the lower devices had to perform the reallocations at least once. Fixes: ee75ed88879a ("batman-adv: Fragment and send skbs larger than mtu") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/fragmentation.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 9a47ef8b95c4..8de1fb567fd7 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -391,6 +391,7 @@ out: /** * batadv_frag_create() - create a fragment from skb + * @net_dev: outgoing device for fragment * @skb: skb to create fragment from * @frag_head: header to use in new fragment * @fragment_size: size of new fragment @@ -401,22 +402,25 @@ out: * * Return: the new fragment, NULL on error. */ -static struct sk_buff *batadv_frag_create(struct sk_buff *skb, +static struct sk_buff *batadv_frag_create(struct net_device *net_dev, + struct sk_buff *skb, struct batadv_frag_packet *frag_head, unsigned int fragment_size) { + unsigned int ll_reserved = LL_RESERVED_SPACE(net_dev); + unsigned int tailroom = net_dev->needed_tailroom; struct sk_buff *skb_fragment; unsigned int header_size = sizeof(*frag_head); unsigned int mtu = fragment_size + header_size; - skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN); + skb_fragment = dev_alloc_skb(ll_reserved + mtu + tailroom); if (!skb_fragment) goto err; skb_fragment->priority = skb->priority; /* Eat the last mtu-bytes of the skb */ - skb_reserve(skb_fragment, header_size + ETH_HLEN); + skb_reserve(skb_fragment, ll_reserved + header_size); skb_split(skb, skb_fragment, skb->len - fragment_size); /* Add the header */ @@ -439,11 +443,12 @@ int batadv_frag_send_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) { + struct net_device *net_dev = neigh_node->if_incoming->net_dev; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; struct batadv_frag_packet frag_header; struct sk_buff *skb_fragment; - unsigned int mtu = neigh_node->if_incoming->net_dev->mtu; + unsigned int mtu = net_dev->mtu; unsigned int header_size = sizeof(frag_header); unsigned int max_fragment_size, num_fragments; int ret; @@ -503,7 +508,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, goto put_primary_if; } - skb_fragment = batadv_frag_create(skb, &frag_header, + skb_fragment = batadv_frag_create(net_dev, skb, &frag_header, max_fragment_size); if (!skb_fragment) { ret = -ENOMEM; From 992b03b88e36254e26e9a4977ab948683e21bd9f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 26 Nov 2020 18:24:49 +0100 Subject: [PATCH 067/167] batman-adv: Don't always reallocate the fragmentation skb head When a packet is fragmented by batman-adv, the original batman-adv header is not modified. Only a new fragmentation is inserted between the original one and the ethernet header. The code must therefore make sure that it has a writable region of this size in the skbuff head. But it is not useful to always reallocate the skbuff by this size even when there would be more than enough headroom still in the skb. The reallocation is just to costly during in this codepath. Fixes: ee75ed88879a ("batman-adv: Fragment and send skbs larger than mtu") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/fragmentation.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 8de1fb567fd7..1f1f5b0873b2 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -527,13 +527,14 @@ int batadv_frag_send_packet(struct sk_buff *skb, frag_header.no++; } - /* Make room for the fragment header. */ - if (batadv_skb_head_push(skb, header_size) < 0 || - pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) { - ret = -ENOMEM; + /* make sure that there is at least enough head for the fragmentation + * and ethernet headers + */ + ret = skb_cow_head(skb, ETH_HLEN + header_size); + if (ret < 0) goto put_primary_if; - } + skb_push(skb, header_size); memcpy(skb->data, &frag_header, header_size); /* Send the last fragment */ From e5782a5d5054bf1e03cb7fbd87035037c2a22698 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 27 Nov 2020 14:39:23 +0800 Subject: [PATCH 068/167] ALSA: hda/realtek - Add new codec supported for ALC897 Enable new codec supported for ALC897. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/3b00520f304842aab8291eb8d9191bd8@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 34ad24d2c970..fcd1fb18ece9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -446,6 +446,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) alc_update_coef_idx(codec, 0x7, 1<<5, 0); break; case 0x10ec0892: + case 0x10ec0897: alc_update_coef_idx(codec, 0x7, 1<<5, 0); break; case 0x10ec0899: @@ -10214,6 +10215,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0888, "ALC888", patch_alc882), HDA_CODEC_ENTRY(0x10ec0889, "ALC889", patch_alc882), HDA_CODEC_ENTRY(0x10ec0892, "ALC892", patch_alc662), + HDA_CODEC_ENTRY(0x10ec0897, "ALC897", patch_alc662), HDA_CODEC_ENTRY(0x10ec0899, "ALC898", patch_alc882), HDA_CODEC_ENTRY(0x10ec0900, "ALC1150", patch_alc882), HDA_CODEC_ENTRY(0x10ec0b00, "ALCS1200A", patch_alc882), From 4bc3c8dc9f5f1eff0d3bfa59491383ac11308b6b Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 24 Nov 2020 16:07:49 +0800 Subject: [PATCH 069/167] ipvs: fix possible memory leak in ip_vs_control_net_init kmemleak report a memory leak as follows: BUG: memory leak unreferenced object 0xffff8880759ea000 (size 256): backtrace: [<00000000c0bf2deb>] kmem_cache_zalloc include/linux/slab.h:656 [inline] [<00000000c0bf2deb>] __proc_create+0x23d/0x7d0 fs/proc/generic.c:421 [<000000009d718d02>] proc_create_reg+0x8e/0x140 fs/proc/generic.c:535 [<0000000097bbfc4f>] proc_create_net_data+0x8c/0x1b0 fs/proc/proc_net.c:126 [<00000000652480fc>] ip_vs_control_net_init+0x308/0x13a0 net/netfilter/ipvs/ip_vs_ctl.c:4169 [<000000004c927ebe>] __ip_vs_init+0x211/0x400 net/netfilter/ipvs/ip_vs_core.c:2429 [<00000000aa6b72d9>] ops_init+0xa8/0x3c0 net/core/net_namespace.c:151 [<00000000153fd114>] setup_net+0x2de/0x7e0 net/core/net_namespace.c:341 [<00000000be4e4f07>] copy_net_ns+0x27d/0x530 net/core/net_namespace.c:482 [<00000000f1c23ec9>] create_new_namespaces+0x382/0xa30 kernel/nsproxy.c:110 [<00000000098a5757>] copy_namespaces+0x2e6/0x3b0 kernel/nsproxy.c:179 [<0000000026ce39e9>] copy_process+0x220a/0x5f00 kernel/fork.c:2072 [<00000000b71f4efe>] _do_fork+0xc7/0xda0 kernel/fork.c:2428 [<000000002974ee96>] __do_sys_clone3+0x18a/0x280 kernel/fork.c:2703 [<0000000062ac0a4d>] do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46 [<0000000093f1ce2c>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 In the error path of ip_vs_control_net_init(), remove_proc_entry() needs to be called to remove the added proc entry, otherwise a memory leak will occur. Also, add some '#ifdef CONFIG_PROC_FS' because proc_create_net* return NULL when PROC is not used. Fixes: b17fc9963f83 ("IPVS: netns, ip_vs_stats and its procfs") Fixes: 61b1ab4583e2 ("IPVS: netns, add basic init per netns.") Reported-by: Hulk Robot Signed-off-by: Wang Hai Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e279ded4e306..d45dbcba8b49 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -4167,12 +4167,18 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) spin_lock_init(&ipvs->tot_stats.lock); - proc_create_net("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_seq_ops, - sizeof(struct ip_vs_iter)); - proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net, - ip_vs_stats_show, NULL); - proc_create_net_single("ip_vs_stats_percpu", 0, ipvs->net->proc_net, - ip_vs_stats_percpu_show, NULL); +#ifdef CONFIG_PROC_FS + if (!proc_create_net("ip_vs", 0, ipvs->net->proc_net, + &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter))) + goto err_vs; + if (!proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net, + ip_vs_stats_show, NULL)) + goto err_stats; + if (!proc_create_net_single("ip_vs_stats_percpu", 0, + ipvs->net->proc_net, + ip_vs_stats_percpu_show, NULL)) + goto err_percpu; +#endif if (ip_vs_control_net_init_sysctl(ipvs)) goto err; @@ -4180,6 +4186,17 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) return 0; err: +#ifdef CONFIG_PROC_FS + remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); + +err_percpu: + remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); + +err_stats: + remove_proc_entry("ip_vs", ipvs->net->proc_net); + +err_vs: +#endif free_percpu(ipvs->tot_stats.cpustats); return -ENOMEM; } @@ -4188,9 +4205,11 @@ void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) { ip_vs_trash_cleanup(ipvs); ip_vs_control_net_cleanup_sysctl(ipvs); +#ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); remove_proc_entry("ip_vs", ipvs->net->proc_net); +#endif free_percpu(ipvs->tot_stats.cpustats); } From 3c78e9e0d33a27ab8050e4492c03c6a1f8d0ed6b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 25 Nov 2020 23:50:07 +0100 Subject: [PATCH 070/167] netfilter: nftables_offload: set address type in control dissector This patch adds nft_flow_rule_set_addr_type() to set the address type from the nft_payload expression accordingly. If the address type is not set in the control dissector then a rule that matches either on source or destination IP address does not work. After this patch, nft hardware offload generates the flow dissector configuration as tc-flower does to match on an IP address. This patch has been also tested functionally to make sure packets are filtered out by the NIC. This is also getting the code aligned with the existing netfilter flow offload infrastructure which is also setting the control dissector. Fixes: c9626a2cbdb2 ("netfilter: nf_tables: add hardware offload support") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_offload.h | 4 ++++ net/netfilter/nf_tables_offload.c | 17 +++++++++++++++++ net/netfilter/nft_payload.c | 4 ++++ 3 files changed, 25 insertions(+) diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index ea7d1d78b92d..bddd34c5bd79 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -37,6 +37,7 @@ void nft_offload_update_dependency(struct nft_offload_ctx *ctx, struct nft_flow_key { struct flow_dissector_key_basic basic; + struct flow_dissector_key_control control; union { struct flow_dissector_key_ipv4_addrs ipv4; struct flow_dissector_key_ipv6_addrs ipv6; @@ -62,6 +63,9 @@ struct nft_flow_rule { #define NFT_OFFLOAD_F_ACTION (1 << 0) +void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, + enum flow_dissector_key_id addr_type); + struct nft_rule; struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule); void nft_flow_rule_destroy(struct nft_flow_rule *flow); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 9f625724a20f..9ae14270c543 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -28,6 +28,23 @@ static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions) return flow; } +void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, + enum flow_dissector_key_id addr_type) +{ + struct nft_flow_match *match = &flow->match; + struct nft_flow_key *mask = &match->mask; + struct nft_flow_key *key = &match->key; + + if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) + return; + + key->control.addr_type = addr_type; + mask->control.addr_type = 0xffff; + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL); + match->dissector.offset[FLOW_DISSECTOR_KEY_CONTROL] = + offsetof(struct nft_flow_key, control); +} + struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule) { diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index dcd3c7b8a367..bbf811d030d5 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -244,6 +244,7 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src, sizeof(struct in_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS); break; case offsetof(struct iphdr, daddr): if (priv->len != sizeof(struct in_addr)) @@ -251,6 +252,7 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst, sizeof(struct in_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS); break; case offsetof(struct iphdr, protocol): if (priv->len != sizeof(__u8)) @@ -280,6 +282,7 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src, sizeof(struct in6_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS); break; case offsetof(struct ipv6hdr, daddr): if (priv->len != sizeof(struct in6_addr)) @@ -287,6 +290,7 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst, sizeof(struct in6_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS); break; case offsetof(struct ipv6hdr, nexthdr): if (priv->len != sizeof(__u8)) From a5d45bc0dc50f9dd83703510e9804d813a9cac32 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 25 Nov 2020 23:50:17 +0100 Subject: [PATCH 071/167] netfilter: nftables_offload: build mask based from the matching bytes Userspace might match on prefix bytes of header fields if they are on the byte boundary, this requires that the mask is adjusted accordingly. Use NFT_OFFLOAD_MATCH_EXACT() for meta since prefix byte matching is not allowed for this type of selector. The bitwise expression might be optimized out by userspace, hence the kernel needs to infer the prefix from the number of payload bytes to match on. This patch adds nft_payload_offload_mask() to calculate the bitmask to match on the prefix. Fixes: c9626a2cbdb2 ("netfilter: nf_tables: add hardware offload support") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_offload.h | 3 ++ net/netfilter/nft_cmp.c | 8 +-- net/netfilter/nft_meta.c | 16 +++--- net/netfilter/nft_payload.c | 66 +++++++++++++++++------ 4 files changed, 64 insertions(+), 29 deletions(-) diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index bddd34c5bd79..1d34fe154fe0 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -78,6 +78,9 @@ int nft_flow_rule_offload_commit(struct net *net); offsetof(struct nft_flow_key, __base.__field); \ (__reg)->len = __len; \ (__reg)->key = __key; \ + +#define NFT_OFFLOAD_MATCH_EXACT(__key, __base, __field, __len, __reg) \ + NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ memset(&(__reg)->mask, 0xff, (__reg)->len); int nft_chain_offload_priority(struct nft_base_chain *basechain); diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index bc079d68a536..00e563a72d3d 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -123,11 +123,11 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx, u8 *mask = (u8 *)&flow->match.mask; u8 *key = (u8 *)&flow->match.key; - if (priv->op != NFT_CMP_EQ || reg->len != priv->len) + if (priv->op != NFT_CMP_EQ || priv->len > reg->len) return -EOPNOTSUPP; - memcpy(key + reg->offset, &priv->data, priv->len); - memcpy(mask + reg->offset, ®->mask, priv->len); + memcpy(key + reg->offset, &priv->data, reg->len); + memcpy(mask + reg->offset, ®->mask, reg->len); flow->match.dissector.used_keys |= BIT(reg->key); flow->match.dissector.offset[reg->key] = reg->base_offset; @@ -137,7 +137,7 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx, nft_reg_load16(priv->data.data) != ARPHRD_ETHER) return -EOPNOTSUPP; - nft_offload_update_dependency(ctx, &priv->data, priv->len); + nft_offload_update_dependency(ctx, &priv->data, reg->len); return 0; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index b37bd02448d8..bf4b3ad5314c 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -724,22 +724,22 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx, switch (priv->key) { case NFT_META_PROTOCOL: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto, - sizeof(__u16), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto, + sizeof(__u16), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case NFT_META_L4PROTO: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, - sizeof(__u8), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, + sizeof(__u8), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT); break; case NFT_META_IIF: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta, - ingress_ifindex, sizeof(__u32), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta, + ingress_ifindex, sizeof(__u32), reg); break; case NFT_META_IIFTYPE: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta, - ingress_iftype, sizeof(__u16), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta, + ingress_iftype, sizeof(__u16), reg); break; default: return -EOPNOTSUPP; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index bbf811d030d5..47d4e0e21651 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -165,6 +165,34 @@ nla_put_failure: return -1; } +static bool nft_payload_offload_mask(struct nft_offload_reg *reg, + u32 priv_len, u32 field_len) +{ + unsigned int remainder, delta, k; + struct nft_data mask = {}; + __be32 remainder_mask; + + if (priv_len == field_len) { + memset(®->mask, 0xff, priv_len); + return true; + } else if (priv_len > field_len) { + return false; + } + + memset(&mask, 0xff, field_len); + remainder = priv_len % sizeof(u32); + if (remainder) { + k = priv_len / sizeof(u32); + delta = field_len - priv_len; + remainder_mask = htonl(~((1 << (delta * BITS_PER_BYTE)) - 1)); + mask.data[k] = (__force u32)remainder_mask; + } + + memcpy(®->mask, &mask, field_len); + + return true; +} + static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_payload *priv) @@ -173,21 +201,21 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct ethhdr, h_source): - if (priv->len != ETH_ALEN) + if (!nft_payload_offload_mask(reg, priv->len, ETH_ALEN)) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs, src, ETH_ALEN, reg); break; case offsetof(struct ethhdr, h_dest): - if (priv->len != ETH_ALEN) + if (!nft_payload_offload_mask(reg, priv->len, ETH_ALEN)) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs, dst, ETH_ALEN, reg); break; case offsetof(struct ethhdr, h_proto): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, @@ -195,14 +223,14 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case offsetof(struct vlan_ethhdr, h_vlan_TCI): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan, vlan_tci, sizeof(__be16), reg); break; case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan, @@ -210,7 +238,7 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case offsetof(struct vlan_ethhdr, h_vlan_TCI) + sizeof(struct vlan_hdr): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan, @@ -218,7 +246,7 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, break; case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto) + sizeof(struct vlan_hdr): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan, @@ -239,7 +267,8 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct iphdr, saddr): - if (priv->len != sizeof(struct in_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src, @@ -247,7 +276,8 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS); break; case offsetof(struct iphdr, daddr): - if (priv->len != sizeof(struct in_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst, @@ -255,7 +285,7 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS); break; case offsetof(struct iphdr, protocol): - if (priv->len != sizeof(__u8)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__u8))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, @@ -277,7 +307,8 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct ipv6hdr, saddr): - if (priv->len != sizeof(struct in6_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in6_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src, @@ -285,7 +316,8 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS); break; case offsetof(struct ipv6hdr, daddr): - if (priv->len != sizeof(struct in6_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in6_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst, @@ -293,7 +325,7 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS); break; case offsetof(struct ipv6hdr, nexthdr): - if (priv->len != sizeof(__u8)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__u8))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, @@ -335,14 +367,14 @@ static int nft_payload_offload_tcp(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct tcphdr, source): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src, sizeof(__be16), reg); break; case offsetof(struct tcphdr, dest): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst, @@ -363,14 +395,14 @@ static int nft_payload_offload_udp(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct udphdr, source): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src, sizeof(__be16), reg); break; case offsetof(struct udphdr, dest): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst, From 484cfbe5fb61469a5f5a276258a8b3973164b56f Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 27 Nov 2020 14:17:35 +0100 Subject: [PATCH 072/167] usb: typec: stusb160x: fix power-opmode property with typec-power-opmode Device tree property is named typec-power-opmode, not power-opmode. Fixes: da0cb6310094 ("usb: typec: add support for STUSB160x Type-C controller family") Signed-off-by: Amelie Delaunay Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201127131735.28280-1-amelie.delaunay@st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/stusb160x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/stusb160x.c b/drivers/usb/typec/stusb160x.c index 2a618f02f4f1..d21750bbbb44 100644 --- a/drivers/usb/typec/stusb160x.c +++ b/drivers/usb/typec/stusb160x.c @@ -562,7 +562,7 @@ static int stusb160x_get_fw_caps(struct stusb160x *chip, * Supported power operation mode can be configured through device tree * else it is read from chip registers in stusb160x_get_caps. */ - ret = fwnode_property_read_string(fwnode, "power-opmode", &cap_str); + ret = fwnode_property_read_string(fwnode, "typec-power-opmode", &cap_str); if (!ret) { ret = typec_find_pwr_opmode(cap_str); /* Power delivery not yet supported */ From 402d5840b0d40a2a26c8651165d29b534abb6d36 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Fri, 27 Nov 2020 22:26:35 +0900 Subject: [PATCH 073/167] ALSA: usb-audio: US16x08: fix value count for level meters The level meter control returns 34 integers of info. This fixes: snd-usb-audio 3-1:1.0: control 2:0:0:Level Meter:0: access overflow Fixes: d2bb390a2081 ("ALSA: usb-audio: Tascam US-16x08 DSP mixer quirk") Cc: stable@vger.kernel.org Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20201127132635.18947-1-marcan@marcan.st Signed-off-by: Takashi Iwai --- sound/usb/mixer_us16x08.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c index 92b1a6d9c931..bd63a9ce6a70 100644 --- a/sound/usb/mixer_us16x08.c +++ b/sound/usb/mixer_us16x08.c @@ -607,7 +607,7 @@ static int snd_us16x08_eq_put(struct snd_kcontrol *kcontrol, static int snd_us16x08_meter_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { - uinfo->count = 1; + uinfo->count = 34; uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; uinfo->value.integer.max = 0x7FFF; uinfo->value.integer.min = 0; From 25bc65d8ddfc17cc1d7a45bd48e9bdc0e729ced3 Mon Sep 17 00:00:00 2001 From: Gabriele Paoloni Date: Fri, 27 Nov 2020 16:18:15 +0000 Subject: [PATCH 074/167] x86/mce: Do not overwrite no_way_out if mce_end() fails Currently, if mce_end() fails, no_way_out - the variable denoting whether the machine can recover from this MCE - is determined by whether the worst severity that was found across the MCA banks associated with the current CPU, is of panic severity. However, at this point no_way_out could have been already set by mca_start() after looking at all severities of all CPUs that entered the MCE handler. If mce_end() fails, check first if no_way_out is already set and, if so, stick to it, otherwise use the local worst value. [ bp: Massage. ] Signed-off-by: Gabriele Paoloni Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Cc: Link: https://lkml.kernel.org/r/20201127161819.3106432-2-gabriele.paoloni@intel.com --- arch/x86/kernel/cpu/mce/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 4102b866e7c0..32b7099e3511 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1384,8 +1384,10 @@ noinstr void do_machine_check(struct pt_regs *regs) * When there's any problem use only local no_way_out state. */ if (!lmce) { - if (mce_end(order) < 0) - no_way_out = worst >= MCE_PANIC_SEVERITY; + if (mce_end(order) < 0) { + if (!no_way_out) + no_way_out = worst >= MCE_PANIC_SEVERITY; + } } else { /* * If there was a fatal machine check we should have From 3b13eaf0ba1d5ab59368e23ff5e5350f51c1a352 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Nov 2020 08:32:33 -0300 Subject: [PATCH 075/167] perf tools: Update copy of libbpf's hashmap.c To pick the changes in: 7a078d2d18801bba ("libbpf, hashmap: Fix undefined behavior in hash_bits") That don't entail any changes in tools/perf. This addresses this perf build warning: Warning: Kernel ABI header at 'tools/perf/util/hashmap.h' differs from latest version at 'tools/lib/bpf/hashmap.h' diff -u tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h Not a kernel ABI, its just that this uses the mechanism in place for checking kernel ABI files drift. Cc: Adrian Hunter Cc: Daniel Borkmann Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hashmap.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index d9b385fe808c..10a4c4cd13cf 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -15,6 +15,9 @@ static inline size_t hash_bits(size_t h, int bits) { /* shuffle bits and return requested number of upper bits */ + if (bits == 0) + return 0; + #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__) /* LP64 case */ return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits); @@ -174,17 +177,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur; \ cur = cur->next) \ if (map->equal_fn(cur->key, (_key), map->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - cur = map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ if (map->equal_fn(cur->key, (_key), map->ctx)) From 9713070028b9ab317f395ee130fa2c4ea741bab4 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 24 Nov 2020 18:36:52 +0800 Subject: [PATCH 076/167] perf diff: Fix error return value in __cmd_diff() An appropriate return value should be set on the failed path. Fixes: 2a09a84c720b436a ("perf diff: Support hot streams comparison") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jin Yao Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201124103652.438-1-thunder.leizhen@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 584e2e1a3793..cefc71506409 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1222,8 +1222,10 @@ static int __cmd_diff(void) if (compute == COMPUTE_STREAM) { d->evlist_streams = evlist__create_streams( d->session->evlist, 5); - if (!d->evlist_streams) + if (!d->evlist_streams) { + ret = -ENOMEM; goto out_delete; + } } } From aa50d953c169e876413bf237319e728dd41d9fdd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 27 Nov 2020 14:43:56 +0900 Subject: [PATCH 077/167] perf record: Synthesize cgroup events only if needed It didn't check the tool->cgroup_events bit which is set when the --all-cgroups option is given. Without it, samples will not have cgroup info so no reason to synthesize. We can check the PERF_RECORD_CGROUP records after running perf record *WITHOUT* the --all-cgroups option: Before: $ perf report -D | grep CGROUP 0 0 0x8430 [0x38]: PERF_RECORD_CGROUP cgroup: 1 / CGROUP events: 1 CGROUP events: 0 CGROUP events: 0 After: $ perf report -D | grep CGROUP CGROUP events: 0 CGROUP events: 0 CGROUP events: 0 Committer testing: Before: # perf record -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.208 MB perf.data (10003 samples) ] # perf report -D | grep "CGROUP events" CGROUP events: 146 CGROUP events: 0 CGROUP events: 0 # After: # perf record -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.208 MB perf.data (10448 samples) ] # perf report -D | grep "CGROUP events" CGROUP events: 0 CGROUP events: 0 CGROUP events: 0 # With all-cgroups: # perf record --all-cgroups -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.374 MB perf.data (11526 samples) ] # perf report -D | grep "CGROUP events" CGROUP events: 146 CGROUP events: 0 CGROUP events: 0 # Fixes: 8fb4b67939e16 ("perf record: Add --all-cgroups option") Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201127054356.405481-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/synthetic-events.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 8a23391558cf..d9c624377da7 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -563,6 +563,9 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool, char cgrp_root[PATH_MAX]; size_t mount_len; /* length of mount point in the path */ + if (!tool || !tool->cgroup_events) + return 0; + if (cgroupfs_find_mountpoint(cgrp_root, PATH_MAX, "perf_event") < 0) { pr_debug("cannot find cgroup mount point\n"); return -1; From c0ee1d5ae8c8650031badcfca6483a28c0f94f38 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 27 Nov 2020 13:14:03 +0900 Subject: [PATCH 078/167] perf stat: Use proper cpu for shadow stats Currently perf stat shows some metrics (like IPC) for defined events. But when no aggregation mode is used (-A option), it shows incorrect values since it used a value from a different cpu. Before: $ perf stat -aA -e cycles,instructions sleep 1 Performance counter stats for 'system wide': CPU0 116,057,380 cycles CPU1 86,084,722 cycles CPU2 99,423,125 cycles CPU3 98,272,994 cycles CPU0 53,369,217 instructions # 0.46 insn per cycle CPU1 33,378,058 instructions # 0.29 insn per cycle CPU2 58,150,086 instructions # 0.50 insn per cycle CPU3 40,029,703 instructions # 0.34 insn per cycle 1.001816971 seconds time elapsed So the IPC for CPU1 should be 0.38 (= 33,378,058 / 86,084,722) but it was 0.29 (= 33,378,058 / 116,057,380) and so on. After: $ perf stat -aA -e cycles,instructions sleep 1 Performance counter stats for 'system wide': CPU0 109,621,384 cycles CPU1 159,026,454 cycles CPU2 99,460,366 cycles CPU3 124,144,142 cycles CPU0 44,396,706 instructions # 0.41 insn per cycle CPU1 120,195,425 instructions # 0.76 insn per cycle CPU2 44,763,978 instructions # 0.45 insn per cycle CPU3 69,049,079 instructions # 0.56 insn per cycle 1.001910444 seconds time elapsed Fixes: 44d49a600259 ("perf stat: Support metrics in --per-core/socket mode") Reported-by: Sam Xi Signed-off-by: Namhyung Kim Reviewed-by: Andi Kleen Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201127041404.390276-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 4b57c0c07632..a963b5b8eb72 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -324,13 +324,10 @@ static int first_shadow_cpu(struct perf_stat_config *config, struct evlist *evlist = evsel->evlist; int i; - if (!config->aggr_get_id) - return 0; - if (config->aggr_mode == AGGR_NONE) return id; - if (config->aggr_mode == AGGR_GLOBAL) + if (!config->aggr_get_id) return 0; for (i = 0; i < evsel__nr_cpus(evsel); i++) { From ab4200c17ba6fe71d2da64317aae8a8aa684624c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 27 Nov 2020 14:48:46 +0900 Subject: [PATCH 079/167] perf probe: Fix to die_entrypc() returns error correctly Fix die_entrypc() to return error correctly if the DIE has no DW_AT_ranges attribute. Since dwarf_ranges() will treat the case as an empty ranges and return 0, we have to check it by ourselves. Fixes: 91e2f539eeda ("perf probe: Fix to show function entry line as probe-able") Signed-off-by: Masami Hiramatsu Cc: Sumanth Korikkar Cc: Thomas Richter Link: http://lore.kernel.org/lkml/160645612634.2824037.5284932731175079426.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index aa898014ad12..03c1a39c312a 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -373,6 +373,7 @@ bool die_is_func_def(Dwarf_Die *dw_die) int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr) { Dwarf_Addr base, end; + Dwarf_Attribute attr; if (!addr) return -EINVAL; @@ -380,6 +381,13 @@ int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr) if (dwarf_entrypc(dw_die, addr) == 0) return 0; + /* + * Since the dwarf_ranges() will return 0 if there is no + * DW_AT_ranges attribute, we should check it first. + */ + if (!dwarf_attr(dw_die, DW_AT_ranges, &attr)) + return -ENOENT; + return dwarf_ranges(dw_die, 0, &base, addr, &end) < 0 ? -ENOENT : 0; } From a9ffd0484eb4426e6befd07e7be6c01108716302 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 27 Nov 2020 14:48:55 +0900 Subject: [PATCH 080/167] perf probe: Change function definition check due to broken DWARF Since some gcc generates a broken DWARF which lacks DW_AT_declaration attribute from the subprogram DIE of function prototype. (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97060) So, in addition to the DW_AT_declaration check, we also check the subprogram DIE has DW_AT_inline or actual entry pc. Committer testing: # cat /etc/fedora-release Fedora release 33 (Thirty Three) # Before: # perf test vfs_getname 78: Use vfs_getname probe to get syscall args filenames : FAILED! 79: Check open filename arg using perf trace + vfs_getname : FAILED! 81: Add vfs_getname probe to get syscall args filenames : FAILED! # After: # perf test vfs_getname 78: Use vfs_getname probe to get syscall args filenames : Ok 79: Check open filename arg using perf trace + vfs_getname : Ok 81: Add vfs_getname probe to get syscall args filenames : Ok # Reported-by: Thomas Richter Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Sumanth Korikkar Link: http://lore.kernel.org/lkml/160645613571.2824037.7441351537890235895.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 20 ++++++++++++++++++-- tools/perf/util/probe-finder.c | 3 +-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 03c1a39c312a..7b2d471a6419 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -356,9 +356,25 @@ bool die_is_signed_type(Dwarf_Die *tp_die) bool die_is_func_def(Dwarf_Die *dw_die) { Dwarf_Attribute attr; + Dwarf_Addr addr = 0; - return (dwarf_tag(dw_die) == DW_TAG_subprogram && - dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL); + if (dwarf_tag(dw_die) != DW_TAG_subprogram) + return false; + + if (dwarf_attr(dw_die, DW_AT_declaration, &attr)) + return false; + + /* + * DW_AT_declaration can be lost from function declaration + * by gcc's bug #97060. + * So we need to check this subprogram DIE has DW_AT_inline + * or an entry address. + */ + if (!dwarf_attr(dw_die, DW_AT_inline, &attr) && + die_entrypc(dw_die, &addr) < 0) + return false; + + return true; } /** diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 2c4061035f77..76dd349aa48d 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1885,8 +1885,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die))) return DWARF_CB_OK; - if (die_is_func_def(sp_die) && - die_match_name(sp_die, lr->function)) { + if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) { lf->fname = dwarf_decl_file(sp_die); dwarf_decl_line(sp_die, &lr->offset); pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset); From 242d990c158d5b1dabd166516e21992baef5f26a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 27 Nov 2020 15:11:03 +0100 Subject: [PATCH 081/167] ALSA: hda/generic: Add option to enforce preferred_dacs pairs The generic parser accepts the preferred_dacs[] pairs as a hint for assigning a DAC to each pin, but this hint doesn't work always effectively. Currently it's merely a secondary choice after the trial with the path index failed. This made sometimes it difficult to assign DACs without mimicking the connection list and/or the badness table. This patch adds a new flag, obey_preferred_dacs, that changes the behavior of the parser. As its name stands, the parser obeys the given preferred_dacs[] pairs by skipping the path index matching and giving a high penalty if no DAC is assigned by the pairs. This mode will help for assigning the fixed DACs forcibly from the codec driver. Cc: Link: https://lore.kernel.org/r/20201127141104.11041-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_generic.c | 12 ++++++++---- sound/pci/hda/hda_generic.h | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index bbb17481159e..8060cc86dfea 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -1364,16 +1364,20 @@ static int try_assign_dacs(struct hda_codec *codec, int num_outs, struct nid_path *path; hda_nid_t pin = pins[i]; - path = snd_hda_get_path_from_idx(codec, path_idx[i]); - if (path) { - badness += assign_out_path_ctls(codec, path); - continue; + if (!spec->obey_preferred_dacs) { + path = snd_hda_get_path_from_idx(codec, path_idx[i]); + if (path) { + badness += assign_out_path_ctls(codec, path); + continue; + } } dacs[i] = get_preferred_dac(codec, pin); if (dacs[i]) { if (is_dac_already_used(codec, dacs[i])) badness += bad->shared_primary; + } else if (spec->obey_preferred_dacs) { + badness += BAD_NO_PRIMARY_DAC; } if (!dacs[i]) diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index a43f0bb77dae..0886bc81f40b 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -237,6 +237,7 @@ struct hda_gen_spec { unsigned int power_down_unused:1; /* power down unused widgets */ unsigned int dac_min_mute:1; /* minimal = mute for DACs */ unsigned int suppress_vmaster:1; /* don't create vmaster kctls */ + unsigned int obey_preferred_dacs:1; /* obey preferred_dacs assignment */ /* other internal flags */ unsigned int no_analog:1; /* digital I/O only */ From c84bfedce60192c08455ee2d25dd13d19274a266 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 27 Nov 2020 15:11:04 +0100 Subject: [PATCH 082/167] ALSA: hda/realtek: Fix bass speaker DAC assignment on Asus Zephyrus G14 ASUS Zephyrus G14 has two speaker pins, and the auto-parser tries to assign an individual DAC to each pin as much as possible. Unfortunately the third DAC has no volume control unlike the two DACs, and this resulted in the inconsistent speaker volumes. As a workaround, wire both speaker pins to the same DAC by modifying the existing quirk (ALC289_FIXUP_ASUS_GA401) applied to this device. Since this quirk entry is chained by another, we need to avoid applying the DAC assignment change for it. Luckily, there is another quirk entry (ALC289_FIXUP_ASUS_GA502) doing the very same thing, so we can chain to the GA502 quirk instead. Note that this patch uses a new flag of the generic parser, obey_preferred_dacs, for enforcing the DACs. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=210359 Cc: Link: https://lore.kernel.org/r/20201127141104.11041-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fcd1fb18ece9..c7e8680a7190 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6027,6 +6027,21 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec, codec->power_save_node = 0; } +/* avoid DAC 0x06 for bass speaker 0x17; it has no volume control */ +static void alc289_fixup_asus_ga401(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const hda_nid_t preferred_pairs[] = { + 0x14, 0x02, 0x17, 0x02, 0x21, 0x03, 0 + }; + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->gen.preferred_dacs = preferred_pairs; + spec->gen.obey_preferred_dacs = 1; + } +} + /* The DAC of NID 0x3 will introduce click/pop noise on headphones, so invalidate it */ static void alc285_fixup_invalidate_dacs(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -7600,11 +7615,10 @@ static const struct hda_fixup alc269_fixups[] = { .chain_id = ALC269_FIXUP_HEADSET_MIC }, [ALC289_FIXUP_ASUS_GA401] = { - .type = HDA_FIXUP_PINS, - .v.pins = (const struct hda_pintbl[]) { - { 0x19, 0x03a11020 }, /* headset mic with jack detect */ - { } - }, + .type = HDA_FIXUP_FUNC, + .v.func = alc289_fixup_asus_ga401, + .chained = true, + .chain_id = ALC289_FIXUP_ASUS_GA502, }, [ALC289_FIXUP_ASUS_GA502] = { .type = HDA_FIXUP_PINS, @@ -7728,7 +7742,7 @@ static const struct hda_fixup alc269_fixups[] = { { } }, .chained = true, - .chain_id = ALC289_FIXUP_ASUS_GA401 + .chain_id = ALC289_FIXUP_ASUS_GA502 }, [ALC274_FIXUP_HP_MIC] = { .type = HDA_FIXUP_VERBS, From aeedad2504997be262c98f6e3228173225a8d868 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 28 Nov 2020 10:00:15 +0100 Subject: [PATCH 083/167] ALSA: hda/realtek: Add mute LED quirk to yet another HP x360 model HP Spectre x360 Convertible 15" version (SSID 103c:827f) needs the same quirk to make the mute LED working like other models. System Information Manufacturer: HP Product Name: HP Spectre x360 Convertible 15-bl1XX Sound Codec: Codec: Realtek ALC295 Vendor Id: 0x10ec0295 Subsystem Id: 0x103c827f Revision Id: 0x100002 Reported-by: Cc: Link: https://lore.kernel.org/r/20201128090015.7743-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c7e8680a7190..8616c5624870 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7934,6 +7934,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x820d, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8256, "HP", ALC221_FIXUP_HP_FRONT_MIC), SND_PCI_QUIRK(0x103c, 0x827e, "HP x360", ALC295_FIXUP_HP_X360), + SND_PCI_QUIRK(0x103c, 0x827f, "HP x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x82bf, "HP G3 mini", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x82c0, "HP G3 mini premium", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), From 44f64f23bae2f0fad25503bc7ab86cd08d04cd47 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 23 Nov 2020 18:49:02 +0100 Subject: [PATCH 084/167] netfilter: bridge: reset skb->pkt_type after NF_INET_POST_ROUTING traversal Netfilter changes PACKET_OTHERHOST to PACKET_HOST before invoking the hooks as, while it's an expected value for a bridge, routing expects PACKET_HOST. The change is undone later on after hook traversal. This can be seen with pairs of functions updating skb>pkt_type and then reverting it to its original value: For hook NF_INET_PRE_ROUTING: setup_pre_routing / br_nf_pre_routing_finish For hook NF_INET_FORWARD: br_nf_forward_ip / br_nf_forward_finish But the third case where netfilter does this, for hook NF_INET_POST_ROUTING, the packet type is changed in br_nf_post_routing but never reverted. A comment says: /* We assume any code from br_dev_queue_push_xmit onwards doesn't care * about the value of skb->pkt_type. */ But when having a tunnel (say vxlan) attached to a bridge we have the following call trace: br_nf_pre_routing br_nf_pre_routing_ipv6 br_nf_pre_routing_finish br_nf_forward_ip br_nf_forward_finish br_nf_post_routing <- pkt_type is updated to PACKET_HOST br_nf_dev_queue_xmit <- but not reverted to its original value vxlan_xmit vxlan_xmit_one skb_tunnel_check_pmtu <- a check on pkt_type is performed In this specific case, this creates issues such as when an ICMPv6 PTB should be sent back. When CONFIG_BRIDGE_NETFILTER is enabled, the PTB isn't sent (as skb_tunnel_check_pmtu checks if pkt_type is PACKET_HOST and returns early). If the comment is right and no one cares about the value of skb->pkt_type after br_dev_queue_push_xmit (which isn't true), resetting it to its original value should be safe. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Antoine Tenart Reviewed-by: Florian Westphal Link: https://lore.kernel.org/r/20201123174902.622102-1-atenart@kernel.org Signed-off-by: Jakub Kicinski --- net/bridge/br_netfilter_hooks.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 04c3f9a82650..8edfb98ae1d5 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -735,6 +735,11 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff mtu_reserved = nf_bridge_mtu_reduction(skb); mtu = skb->dev->mtu; + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) mtu = nf_bridge->frag_max_size; @@ -835,8 +840,6 @@ static unsigned int br_nf_post_routing(void *priv, else return NF_ACCEPT; - /* We assume any code from br_dev_queue_push_xmit onwards doesn't care - * about the value of skb->pkt_type. */ if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; nf_bridge->pkt_otherhost = true; From 1ebf179037cb46c19da3a9c1e2ca16e7a754b75e Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 26 Nov 2020 19:09:22 +0100 Subject: [PATCH 085/167] ipv4: Fix tos mask in inet_rtm_getroute() When inet_rtm_getroute() was converted to use the RCU variants of ip_route_input() and ip_route_output_key(), the TOS parameters stopped being masked with IPTOS_RT_MASK before doing the route lookup. As a result, "ip route get" can return a different route than what would be used when sending real packets. For example: $ ip route add 192.0.2.11/32 dev eth0 $ ip route add unreachable 192.0.2.11/32 tos 2 $ ip route get 192.0.2.11 tos 2 RTNETLINK answers: No route to host But, packets with TOS 2 (ECT(0) if interpreted as an ECN bit) would actually be routed using the first route: $ ping -c 1 -Q 2 192.0.2.11 PING 192.0.2.11 (192.0.2.11) 56(84) bytes of data. 64 bytes from 192.0.2.11: icmp_seq=1 ttl=64 time=0.173 ms --- 192.0.2.11 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.173/0.173/0.173/0.000 ms This patch re-applies IPTOS_RT_MASK in inet_rtm_getroute(), to return results consistent with real route lookups. Fixes: 3765d35ed8b9 ("net: ipv4: Convert inet_rtm_getroute to rcu versions of route lookup") Signed-off-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/b2d237d08317ca55926add9654a48409ac1b8f5b.1606412894.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index dc2a399cd9f4..9f43abeac3a8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3222,7 +3222,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.daddr = dst; fl4.saddr = src; - fl4.flowi4_tos = rtm->rtm_tos; + fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; fl4.flowi4_uid = uid; @@ -3246,8 +3246,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.flowi4_iif = iif; /* for rt_fill_info */ skb->dev = dev; skb->mark = mark; - err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos, - dev, &res); + err = ip_route_input_rcu(skb, dst, src, + rtm->rtm_tos & IPTOS_RT_MASK, dev, + &res); rt = skb_rtable(skb); if (err == 0 && rt->dst.error) From e14038a7ead09faa180eb072adc4a2157a0b475f Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 26 Nov 2020 19:47:47 +0100 Subject: [PATCH 086/167] selftests: tc-testing: enable CONFIG_NET_SCH_RED as a module a proper kernel configuration for running kselftest can be obtained with: $ yes | make kselftest-merge enable compile support for the 'red' qdisc: otherwise, tdc kselftest fail when trying to run tdc test items contained in red.json. Signed-off-by: Davide Caratti Link: https://lore.kernel.org/r/cfa23f2d4f672401e6cebca3a321dd1901a9ff07.1606416464.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index c33a7aac27ff..b71828df5a6d 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -59,6 +59,7 @@ CONFIG_NET_IFE_SKBPRIO=m CONFIG_NET_IFE_SKBTCINDEX=m CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_ETS=m +CONFIG_NET_SCH_RED=m # ## Network testing From 31d6b4036098f6b59bcfa20375626b500c7d7417 Mon Sep 17 00:00:00 2001 From: Dany Madden Date: Wed, 25 Nov 2020 18:04:24 -0600 Subject: [PATCH 087/167] ibmvnic: handle inconsistent login with reset Inconsistent login with the vnicserver is causing the device to be removed. This does not give the device a chance to recover from error state. This patch schedules a FATAL reset instead to bring the adapter up. Fixes: 032c5e82847a2 ("Driver for IBM System i/p VNIC protocol") Signed-off-by: Dany Madden Signed-off-by: Lijun Pan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2491ebc97871..b28a979061dc 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -4414,7 +4414,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, adapter->req_rx_add_queues != be32_to_cpu(login_rsp->num_rxadd_subcrqs))) { dev_err(dev, "FATAL: Inconsistent login and login rsp\n"); - ibmvnic_remove(adapter->vdev); + ibmvnic_reset(adapter, VNIC_RESET_FATAL); return -EIO; } size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + From 18f141bf97d42f65abfdf17fd93fb3a0dac100e7 Mon Sep 17 00:00:00 2001 From: Dany Madden Date: Wed, 25 Nov 2020 18:04:25 -0600 Subject: [PATCH 088/167] ibmvnic: stop free_all_rwi on failed reset When ibmvnic fails to reset, it breaks out of the reset loop and frees all of the remaining resets from the workqueue. Doing so prevents the adapter from recovering if no reset is scheduled after that. Instead, have the driver continue to process resets on the workqueue. Remove the no longer need free_all_rwi(). Fixes: ed651a10875f1 ("ibmvnic: Updated reset handling") Signed-off-by: Dany Madden Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b28a979061dc..e9243d8c5abc 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2173,17 +2173,6 @@ static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) return rwi; } -static void free_all_rwi(struct ibmvnic_adapter *adapter) -{ - struct ibmvnic_rwi *rwi; - - rwi = get_next_rwi(adapter); - while (rwi) { - kfree(rwi); - rwi = get_next_rwi(adapter); - } -} - static void __ibmvnic_reset(struct work_struct *work) { struct ibmvnic_rwi *rwi; @@ -2252,9 +2241,9 @@ static void __ibmvnic_reset(struct work_struct *work) else adapter->state = reset_state; rc = 0; - } else if (rc && rc != IBMVNIC_INIT_FAILED && - !adapter->force_reset_recovery) - break; + } + if (rc) + netdev_dbg(adapter->netdev, "Reset failed, rc=%d\n", rc); rwi = get_next_rwi(adapter); @@ -2268,11 +2257,6 @@ static void __ibmvnic_reset(struct work_struct *work) complete(&adapter->reset_done); } - if (rc) { - netdev_dbg(adapter->netdev, "Reset failed\n"); - free_all_rwi(adapter); - } - clear_bit_unlock(0, &adapter->resetting); } From 9281cf2d584083a450fd65fd27cc5f0e692f6e30 Mon Sep 17 00:00:00 2001 From: Dany Madden Date: Wed, 25 Nov 2020 18:04:26 -0600 Subject: [PATCH 089/167] ibmvnic: avoid memset null scrq msgs scrq->msgs could be NULL during device reset, causing Linux to crash. So, check before memset scrq->msgs. Fixes: c8b2ad0a4a901 ("ibmvnic: Sanitize entire SCRQ buffer on reset") Signed-off-by: Dany Madden Signed-off-by: Lijun Pan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index e9243d8c5abc..939a276dae7d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2844,15 +2844,26 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter, { int rc; + if (!scrq) { + netdev_dbg(adapter->netdev, + "Invalid scrq reset. irq (%d) or msgs (%p).\n", + scrq->irq, scrq->msgs); + return -EINVAL; + } + if (scrq->irq) { free_irq(scrq->irq, scrq); irq_dispose_mapping(scrq->irq); scrq->irq = 0; } - - memset(scrq->msgs, 0, 4 * PAGE_SIZE); - atomic_set(&scrq->used, 0); - scrq->cur = 0; + if (scrq->msgs) { + memset(scrq->msgs, 0, 4 * PAGE_SIZE); + atomic_set(&scrq->used, 0); + scrq->cur = 0; + } else { + netdev_dbg(adapter->netdev, "Invalid scrq reset\n"); + return -EINVAL; + } rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token, 4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq); From 0cb4bc66ba5ea2d3b94ec2a00775888130db628a Mon Sep 17 00:00:00 2001 From: Dany Madden Date: Wed, 25 Nov 2020 18:04:27 -0600 Subject: [PATCH 090/167] ibmvnic: restore adapter state on failed reset In a failed reset, driver could end up in VNIC_PROBED or VNIC_CLOSED state and cannot recover in subsequent resets, leaving it offline. This patch restores the adapter state to reset_state, the original state when reset was called. Fixes: b27507bb59ed5 ("net/ibmvnic: unlock rtnl_lock in reset so linkwatch_event can run") Fixes: 2770a7984db58 ("ibmvnic: Introduce hard reset recovery") Signed-off-by: Dany Madden Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 67 ++++++++++++++++-------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 939a276dae7d..5ce9f70984ee 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1857,7 +1857,7 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter, if (reset_state == VNIC_OPEN) { rc = __ibmvnic_close(netdev); if (rc) - return rc; + goto out; } release_resources(adapter); @@ -1875,24 +1875,25 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter, } rc = ibmvnic_reset_init(adapter, true); - if (rc) - return IBMVNIC_INIT_FAILED; + if (rc) { + rc = IBMVNIC_INIT_FAILED; + goto out; + } /* If the adapter was in PROBE state prior to the reset, * exit here. */ if (reset_state == VNIC_PROBED) - return 0; + goto out; rc = ibmvnic_login(netdev); if (rc) { - adapter->state = reset_state; - return rc; + goto out; } rc = init_resources(adapter); if (rc) - return rc; + goto out; ibmvnic_disable_irqs(adapter); @@ -1902,8 +1903,10 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter, return 0; rc = __ibmvnic_open(netdev); - if (rc) - return IBMVNIC_OPEN_FAILED; + if (rc) { + rc = IBMVNIC_OPEN_FAILED; + goto out; + } /* refresh device's multicast list */ ibmvnic_set_multi(netdev); @@ -1912,7 +1915,10 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter, for (i = 0; i < adapter->req_rx_queues; i++) napi_schedule(&adapter->napi[i]); - return 0; +out: + if (rc) + adapter->state = reset_state; + return rc; } /** @@ -2015,7 +2021,6 @@ static int do_reset(struct ibmvnic_adapter *adapter, rc = ibmvnic_login(netdev); if (rc) { - adapter->state = reset_state; goto out; } @@ -2083,6 +2088,9 @@ static int do_reset(struct ibmvnic_adapter *adapter, rc = 0; out: + /* restore the adapter state if reset failed */ + if (rc) + adapter->state = reset_state; rtnl_unlock(); return rc; @@ -2115,43 +2123,46 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, if (rc) { netdev_err(adapter->netdev, "Couldn't initialize crq. rc=%d\n", rc); - return rc; + goto out; } rc = ibmvnic_reset_init(adapter, false); if (rc) - return rc; + goto out; /* If the adapter was in PROBE state prior to the reset, * exit here. */ if (reset_state == VNIC_PROBED) - return 0; + goto out; rc = ibmvnic_login(netdev); - if (rc) { - adapter->state = VNIC_PROBED; - return 0; - } + if (rc) + goto out; rc = init_resources(adapter); if (rc) - return rc; + goto out; ibmvnic_disable_irqs(adapter); adapter->state = VNIC_CLOSED; if (reset_state == VNIC_CLOSED) - return 0; + goto out; rc = __ibmvnic_open(netdev); - if (rc) - return IBMVNIC_OPEN_FAILED; + if (rc) { + rc = IBMVNIC_OPEN_FAILED; + goto out; + } call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev); call_netdevice_notifiers(NETDEV_RESEND_IGMP, netdev); - - return 0; +out: + /* restore adapter state if reset failed */ + if (rc) + adapter->state = reset_state; + return rc; } static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) @@ -2235,13 +2246,7 @@ static void __ibmvnic_reset(struct work_struct *work) rc = do_reset(adapter, rwi, reset_state); } kfree(rwi); - if (rc == IBMVNIC_OPEN_FAILED) { - if (list_empty(&adapter->rwi_list)) - adapter->state = VNIC_CLOSED; - else - adapter->state = reset_state; - rc = 0; - } + if (rc) netdev_dbg(adapter->netdev, "Reset failed, rc=%d\n", rc); From f15fde9d47b887b406f5e76490d601cfc26643c9 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 25 Nov 2020 18:04:28 -0600 Subject: [PATCH 091/167] ibmvnic: delay next reset if hard reset fails If auto-priority failover is enabled, the backing device needs time to settle if hard resetting fails for any reason. Add a delay of 60 seconds before retrying the hard-reset. Fixes: 2770a7984db5 ("ibmvnic: Introduce hard reset recovery") Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5ce9f70984ee..d4d40b349788 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2241,6 +2241,14 @@ static void __ibmvnic_reset(struct work_struct *work) rc = do_hard_reset(adapter, rwi, reset_state); rtnl_unlock(); } + if (rc) { + /* give backing device time to settle down */ + netdev_dbg(adapter->netdev, + "[S:%d] Hard reset failed, waiting 60 secs\n", + adapter->state); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(60 * HZ); + } } else if (!(rwi->reset_reason == VNIC_RESET_FATAL && adapter->from_passive_init)) { rc = do_reset(adapter, rwi, reset_state); From 76cdc5c5d99ce4856ad0ac38facc33b52fa64f77 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 25 Nov 2020 18:04:29 -0600 Subject: [PATCH 092/167] ibmvnic: track pending login If after ibmvnic sends a LOGIN it gets a FAILOVER, it is possible that the worker thread will start reset process and free the login response buffer before it gets a (now stale) LOGIN_RSP. The ibmvnic tasklet will then try to access the login response buffer and crash. Have ibmvnic track pending logins and discard any stale login responses. Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 17 +++++++++++++++++ drivers/net/ethernet/ibm/ibmvnic.h | 1 + 2 files changed, 18 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index d4d40b349788..13eae1666477 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3841,6 +3841,8 @@ static int send_login(struct ibmvnic_adapter *adapter) crq.login.cmd = LOGIN; crq.login.ioba = cpu_to_be32(buffer_token); crq.login.len = cpu_to_be32(buffer_size); + + adapter->login_pending = true; ibmvnic_send_crq(adapter, &crq); return 0; @@ -4393,6 +4395,15 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, u64 *size_array; int i; + /* CHECK: Test/set of login_pending does not need to be atomic + * because only ibmvnic_tasklet tests/clears this. + */ + if (!adapter->login_pending) { + netdev_warn(netdev, "Ignoring unexpected login response\n"); + return 0; + } + adapter->login_pending = false; + dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz, DMA_TO_DEVICE); dma_unmap_single(dev, adapter->login_rsp_buf_token, @@ -4764,6 +4775,11 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, case IBMVNIC_CRQ_INIT: dev_info(dev, "Partner initialized\n"); adapter->from_passive_init = true; + /* Discard any stale login responses from prev reset. + * CHECK: should we clear even on INIT_COMPLETE? + */ + adapter->login_pending = false; + if (!completion_done(&adapter->init_done)) { complete(&adapter->init_done); adapter->init_done_rc = -EIO; @@ -5196,6 +5212,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) dev_set_drvdata(&dev->dev, netdev); adapter->vdev = dev; adapter->netdev = netdev; + adapter->login_pending = false; ether_addr_copy(adapter->mac_addr, mac_addr_p); ether_addr_copy(netdev->dev_addr, adapter->mac_addr); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 47a3fd71c96f..bffa7f939ee1 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1086,6 +1086,7 @@ struct ibmvnic_adapter { struct delayed_work ibmvnic_delayed_reset; unsigned long resetting; bool napi_enabled, from_passive_init; + bool login_pending; bool failover_pending; bool force_reset_recovery; From c98d9cc4170da7e16a1012563d0f9fbe1c7cfe27 Mon Sep 17 00:00:00 2001 From: Dany Madden Date: Wed, 25 Nov 2020 18:04:30 -0600 Subject: [PATCH 093/167] ibmvnic: send_login should check for crq errors send_login() does not check for the result of ibmvnic_send_crq() of the login request. This results in the driver needlessly retrying the login 10 times even when CRQ is no longer active. Check the return code and give up in case of errors in sending the CRQ. The only time we want to retry is if we get a PARITALSUCCESS response from the partner. Fixes: 032c5e82847a2 ("Driver for IBM System i/p VNIC protocol") Signed-off-by: Dany Madden Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 13eae1666477..5154e0421175 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -850,10 +850,8 @@ static int ibmvnic_login(struct net_device *netdev) adapter->init_done_rc = 0; reinit_completion(&adapter->init_done); rc = send_login(adapter); - if (rc) { - netdev_warn(netdev, "Unable to login\n"); + if (rc) return rc; - } if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { @@ -3729,15 +3727,16 @@ static int send_login(struct ibmvnic_adapter *adapter) struct ibmvnic_login_rsp_buffer *login_rsp_buffer; struct ibmvnic_login_buffer *login_buffer; struct device *dev = &adapter->vdev->dev; + struct vnic_login_client_data *vlcd; dma_addr_t rsp_buffer_token; dma_addr_t buffer_token; size_t rsp_buffer_size; union ibmvnic_crq crq; + int client_data_len; size_t buffer_size; __be64 *tx_list_p; __be64 *rx_list_p; - int client_data_len; - struct vnic_login_client_data *vlcd; + int rc; int i; if (!adapter->tx_scrq || !adapter->rx_scrq) { @@ -3843,16 +3842,23 @@ static int send_login(struct ibmvnic_adapter *adapter) crq.login.len = cpu_to_be32(buffer_size); adapter->login_pending = true; - ibmvnic_send_crq(adapter, &crq); + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + adapter->login_pending = false; + netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc); + goto buf_rsp_map_failed; + } return 0; buf_rsp_map_failed: kfree(login_rsp_buffer); + adapter->login_rsp_buf = NULL; buf_rsp_alloc_failed: dma_unmap_single(dev, buffer_token, buffer_size, DMA_TO_DEVICE); buf_map_failed: kfree(login_buffer); + adapter->login_buf = NULL; buf_alloc_failed: return -1; } From a86d5c682b798b2dadaa4171c1d124cf3c45a17c Mon Sep 17 00:00:00 2001 From: Dany Madden Date: Wed, 25 Nov 2020 18:04:31 -0600 Subject: [PATCH 094/167] ibmvnic: no reset timeout for 5 seconds after reset Reset timeout is going off right after adapter reset. This patch ensures that timeout is scheduled if it has been 5 seconds since the last reset. 5 seconds is the default watchdog timeout. Fixes: ed651a10875f1 ("ibmvnic: Updated reset handling") Signed-off-by: Dany Madden Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 11 +++++++++-- drivers/net/ethernet/ibm/ibmvnic.h | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5154e0421175..d48a694b92c9 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2252,6 +2252,7 @@ static void __ibmvnic_reset(struct work_struct *work) rc = do_reset(adapter, rwi, reset_state); } kfree(rwi); + adapter->last_reset_time = jiffies; if (rc) netdev_dbg(adapter->netdev, "Reset failed, rc=%d\n", rc); @@ -2355,7 +2356,13 @@ static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue) "Adapter is resetting, skip timeout reset\n"); return; } - + /* No queuing up reset until at least 5 seconds (default watchdog val) + * after last reset + */ + if (time_before(jiffies, (adapter->last_reset_time + dev->watchdog_timeo))) { + netdev_dbg(dev, "Not yet time to tx timeout.\n"); + return; + } ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT); } @@ -5282,7 +5289,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->state = VNIC_PROBED; adapter->wait_for_reset = false; - + adapter->last_reset_time = jiffies; return 0; ibmvnic_register_fail: diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index bffa7f939ee1..21e7ea858cda 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1087,6 +1087,8 @@ struct ibmvnic_adapter { unsigned long resetting; bool napi_enabled, from_passive_init; bool login_pending; + /* last device reset time */ + unsigned long last_reset_time; bool failover_pending; bool force_reset_recovery; From 98c41f04a67abf5e7f7191d55d286e905d1430ef Mon Sep 17 00:00:00 2001 From: Dany Madden Date: Wed, 25 Nov 2020 18:04:32 -0600 Subject: [PATCH 095/167] ibmvnic: reduce wait for completion time Reduce the wait time for Command Response Queue response from 30 seconds to 20 seconds, as recommended by VIOS and Power Hypervisor teams. Fixes: bd0b672313941 ("ibmvnic: Move login and queue negotiation into ibmvnic_open") Fixes: 53da09e92910f ("ibmvnic: Add set_link_state routine for setting adapter link state") Signed-off-by: Dany Madden Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index d48a694b92c9..bca1becd33f0 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -834,7 +834,7 @@ static void release_napi(struct ibmvnic_adapter *adapter) static int ibmvnic_login(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - unsigned long timeout = msecs_to_jiffies(30000); + unsigned long timeout = msecs_to_jiffies(20000); int retry_count = 0; int retries = 10; bool retry; @@ -938,7 +938,7 @@ static void release_resources(struct ibmvnic_adapter *adapter) static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) { struct net_device *netdev = adapter->netdev; - unsigned long timeout = msecs_to_jiffies(30000); + unsigned long timeout = msecs_to_jiffies(20000); union ibmvnic_crq crq; bool resend; int rc; @@ -5130,7 +5130,7 @@ map_failed: static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) { struct device *dev = &adapter->vdev->dev; - unsigned long timeout = msecs_to_jiffies(30000); + unsigned long timeout = msecs_to_jiffies(20000); u64 old_num_rx_queues, old_num_tx_queues; int rc; From 4d521943f76bd0d1e68ea5e02df7aadd30b2838a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 26 Oct 2020 16:36:20 +0100 Subject: [PATCH 096/167] dt-bindings: net: correct interrupt flags in examples GPIO_ACTIVE_x flags are not correct in the context of interrupt flags. These are simple defines so they could be used in DTS but they will not have the same meaning: 1. GPIO_ACTIVE_HIGH = 0 = IRQ_TYPE_NONE 2. GPIO_ACTIVE_LOW = 1 = IRQ_TYPE_EDGE_RISING Correct the interrupt flags, assuming the author of the code wanted same logical behavior behind the name "ACTIVE_xxx", this is: ACTIVE_LOW => IRQ_TYPE_LEVEL_LOW ACTIVE_HIGH => IRQ_TYPE_LEVEL_HIGH Fixes: a1a8b4594f8d ("NFC: pn544: i2c: Add DTS Documentation") Fixes: 6be88670fc59 ("NFC: nxp-nci_i2c: Add I2C support to NXP NCI driver") Fixes: e3b329221567 ("dt-bindings: can: tcan4x5x: Update binding to use interrupt property") Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Acked-by: Marc Kleine-Budde # for tcan4x5x.txt Link: https://lore.kernel.org/r/20201026153620.89268-1-krzk@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/can/tcan4x5x.txt | 2 +- Documentation/devicetree/bindings/net/nfc/nxp-nci.txt | 2 +- Documentation/devicetree/bindings/net/nfc/pn544.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt index 3613c2c8f75d..0968b40aef1e 100644 --- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt +++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt @@ -33,7 +33,7 @@ tcan4x5x: tcan4x5x@0 { spi-max-frequency = <10000000>; bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>; interrupt-parent = <&gpio1>; - interrupts = <14 GPIO_ACTIVE_LOW>; + interrupts = <14 IRQ_TYPE_LEVEL_LOW>; device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>; reset-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>; diff --git a/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt b/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt index cfaf88998918..9e4dc510a40a 100644 --- a/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt +++ b/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt @@ -25,7 +25,7 @@ Example (for ARM-based BeagleBone with NPC100 NFC controller on I2C2): clock-frequency = <100000>; interrupt-parent = <&gpio1>; - interrupts = <29 GPIO_ACTIVE_HIGH>; + interrupts = <29 IRQ_TYPE_LEVEL_HIGH>; enable-gpios = <&gpio0 30 GPIO_ACTIVE_HIGH>; firmware-gpios = <&gpio0 31 GPIO_ACTIVE_HIGH>; diff --git a/Documentation/devicetree/bindings/net/nfc/pn544.txt b/Documentation/devicetree/bindings/net/nfc/pn544.txt index 92f399ec22b8..2bd82562ce8e 100644 --- a/Documentation/devicetree/bindings/net/nfc/pn544.txt +++ b/Documentation/devicetree/bindings/net/nfc/pn544.txt @@ -25,7 +25,7 @@ Example (for ARM-based BeagleBone with PN544 on I2C2): clock-frequency = <400000>; interrupt-parent = <&gpio1>; - interrupts = <17 GPIO_ACTIVE_HIGH>; + interrupts = <17 IRQ_TYPE_LEVEL_HIGH>; enable-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; firmware-gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>; From b65054597872ce3aefbc6a666385eabdf9e288da Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 Nov 2020 15:50:50 -0800 Subject: [PATCH 097/167] Linux 5.10-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 56c0782bde56..43ecedeb3f02 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Kleptomaniac Octopus # *DOCUMENTATION* From ad1f5e826d91d6c27ecd36a607ad7c7f4d0b0733 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 27 Nov 2020 16:17:11 +0100 Subject: [PATCH 098/167] can: m_can: tcan4x5x_can_probe(): fix error path: remove erroneous clk_disable_unprepare() The clocks mcan_class->cclk and mcan_class->hclk are not prepared by any call during tcan4x5x_can_probe(), so remove erroneous clk_disable_unprepare() on them. Fixes: 5443c226ba91 ("can: tcan4x5x: Add tcan4x5x driver to the kernel") Link: http://lore.kernel.org/r/20201130114252.215334-1-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/tcan4x5x.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c index e5d7d85e0b6d..7347ab39c5b6 100644 --- a/drivers/net/can/m_can/tcan4x5x.c +++ b/drivers/net/can/m_can/tcan4x5x.c @@ -489,18 +489,18 @@ static int tcan4x5x_can_probe(struct spi_device *spi) spi->bits_per_word = 32; ret = spi_setup(spi); if (ret) - goto out_clk; + goto out_m_can_class_free_dev; priv->regmap = devm_regmap_init(&spi->dev, &tcan4x5x_bus, &spi->dev, &tcan4x5x_regmap); if (IS_ERR(priv->regmap)) { ret = PTR_ERR(priv->regmap); - goto out_clk; + goto out_m_can_class_free_dev; } ret = tcan4x5x_power_enable(priv->power, 1); if (ret) - goto out_clk; + goto out_m_can_class_free_dev; ret = tcan4x5x_parse_config(mcan_class); if (ret) @@ -519,11 +519,6 @@ static int tcan4x5x_can_probe(struct spi_device *spi) out_power: tcan4x5x_power_enable(priv->power, 0); -out_clk: - if (!IS_ERR(mcan_class->cclk)) { - clk_disable_unprepare(mcan_class->cclk); - clk_disable_unprepare(mcan_class->hclk); - } out_m_can_class_free_dev: m_can_class_free_dev(mcan_class->net); dev_err(&spi->dev, "Probe failed, err=%d\n", ret); From bd0ccb92efb09c7da5b55162b283b42a93539ed7 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Fri, 27 Nov 2020 10:59:38 +0100 Subject: [PATCH 099/167] can: sja1000: sja1000_err(): don't count arbitration lose as an error Losing arbitration is normal in a CAN-bus network, it means that a higher priority frame is being send and the pending message will be retried later. Hence most driver only increment arbitration_lost, but the sja1000 driver also incremeants tx_error, causing errors to be reported on a normal functioning CAN-bus. So stop counting them as errors. Fixes: 8935f57e68c4 ("can: sja1000: fix network statistics update") Signed-off-by: Jeroen Hofstee Link: https://lore.kernel.org/r/20201127095941.21609-1-jhofstee@victronenergy.com [mkl: split into two seperate patches] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/sja1000.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 9f107798f904..25a4d7d0b349 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -474,7 +474,6 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) netdev_dbg(dev, "arbitration lost interrupt\n"); alc = priv->read_reg(priv, SJA1000_ALC); priv->can.can_stats.arbitration_lost++; - stats->tx_errors++; cf->can_id |= CAN_ERR_LOSTARB; cf->data[0] = alc & 0x1f; } From c2d095eff797813461a426b97242e3ffc50e4134 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Fri, 27 Nov 2020 10:59:38 +0100 Subject: [PATCH 100/167] can: sun4i_can: sun4i_can_err(): don't count arbitration lose as an error Losing arbitration is normal in a CAN-bus network, it means that a higher priority frame is being send and the pending message will be retried later. Hence most driver only increment arbitration_lost, but the sun4i driver also incremeants tx_error, causing errors to be reported on a normal functioning CAN-bus. So stop counting them as errors. Fixes: 0738eff14d81 ("can: Allwinner A10/A20 CAN Controller support - Kernel module") Signed-off-by: Jeroen Hofstee Link: https://lore.kernel.org/r/20201127095941.21609-1-jhofstee@victronenergy.com [mkl: split into two seperate patches] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sun4i_can.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index e2c6cf4b2228..b3f2f4fe5ee0 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -604,7 +604,6 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status) netdev_dbg(dev, "arbitration lost interrupt\n"); alc = readl(priv->base + SUN4I_REG_STA_ADDR); priv->can.can_stats.arbitration_lost++; - stats->tx_errors++; if (likely(skb)) { cf->can_id |= CAN_ERR_LOSTARB; cf->data[0] = (alc >> 8) & 0x1f; From 44cef0c0ffbd8d61143712ce874be68a273b7884 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sat, 28 Nov 2020 21:39:21 +0800 Subject: [PATCH 101/167] can: c_can: c_can_power_up(): fix error handling In the error handling in c_can_power_up(), there are two bugs: 1) c_can_pm_runtime_get_sync() will increase usage counter if device is not empty. Forgetting to call c_can_pm_runtime_put_sync() will result in a reference leak here. 2) c_can_reset_ram() operation will set start bit when enable is true. We should clear it in the error handling. We fix it by adding c_can_pm_runtime_put_sync() for 1), and c_can_reset_ram(enable is false) for 2) in the error handling. Fixes: 8212003260c60 ("can: c_can: Add d_can suspend resume support") Fixes: 52cde85acc23f ("can: c_can: Add d_can raminit support") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201128133922.3276973-2-zhangqilong3@huawei.com [mkl: return "0" instead of "ret"] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 1ccdbe89585b..1a9e9b9a4bf6 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -1295,12 +1295,22 @@ int c_can_power_up(struct net_device *dev) time_after(time_out, jiffies)) cpu_relax(); - if (time_after(jiffies, time_out)) - return -ETIMEDOUT; + if (time_after(jiffies, time_out)) { + ret = -ETIMEDOUT; + goto err_out; + } ret = c_can_start(dev); - if (!ret) - c_can_irq_control(priv, true); + if (ret) + goto err_out; + + c_can_irq_control(priv, true); + + return 0; + +err_out: + c_can_reset_ram(priv, false); + c_can_pm_runtime_put_sync(priv); return ret; } From 13a84cf37a4cf1155a41684236c2314eb40cd65c Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sat, 28 Nov 2020 21:39:22 +0800 Subject: [PATCH 102/167] can: kvaser_pciefd: kvaser_pciefd_open(): fix error handling If kvaser_pciefd_bus_on() failed, we should call close_candev() to avoid reference leak. Fixes: 26ad340e582d3 ("can: kvaser_pciefd: Add driver for Kvaser PCIEcan devices") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201128133922.3276973-3-zhangqilong3@huawei.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/kvaser_pciefd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 72acd1ba162d..43151dd6cb1c 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -692,8 +692,10 @@ static int kvaser_pciefd_open(struct net_device *netdev) return err; err = kvaser_pciefd_bus_on(can); - if (err) + if (err) { + close_candev(netdev); return err; + } return 0; } From ca1314d73eed493c49bb1932c60a8605530db2e4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:40 +0000 Subject: [PATCH 103/167] arm64: syscall: exit userspace before unmasking exceptions In el0_svc_common() we unmask exceptions before we call user_exit(), and so there's a window where an IRQ or debug exception can be taken while RCU is not watching. In do_debug_exception() we account for this in via debug_exception_{enter,exit}(), but in the el1_irq asm we do not and we call trace functions which rely on RCU before we have a guarantee that RCU is watching. Let's avoid this by having el0_svc_common() exit userspace before unmasking exceptions, matching what we do for all other EL0 entry paths. We can use user_exit_irqoff() to avoid the pointless save/restore of IRQ flags while we're sure exceptions are masked in DAIF. The workaround for Cortex-A76 erratum 1463225 may trigger a debug exception before this point, but the debug code invoked in this case is safe even when RCU is not watching. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index e4c0dadf0d92..13fe79f8e2db 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -120,8 +120,8 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, */ cortex_a76_erratum_1463225_svc_handler(); + user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); - user_exit(); if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { /* From 114e0a684753516ef4b71ccb55a8ebcfa8735edb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:41 +0000 Subject: [PATCH 104/167] arm64: mark idle code as noinstr Core code disables RCU when calling arch_cpu_idle(), so it's not safe for arch_cpu_idle() or its calees to be instrumented, as the instrumentation callbacks may attempt to use RCU or other features which are unsafe to use in this context. Mark them noinstr to prevent issues. The use of local_irq_enable() in arch_cpu_idle() is similarly problematic, and the "sched/idle: Fix arch_cpu_idle() vs tracing" patch queued in the tip tree addresses that case. Reported-by: Marco Elver Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a47a40ec6ad9..0c65a91f8f53 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -72,13 +72,13 @@ EXPORT_SYMBOL_GPL(pm_power_off); void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); -static void __cpu_do_idle(void) +static void noinstr __cpu_do_idle(void) { dsb(sy); wfi(); } -static void __cpu_do_idle_irqprio(void) +static void noinstr __cpu_do_idle_irqprio(void) { unsigned long pmr; unsigned long daif_bits; @@ -108,7 +108,7 @@ static void __cpu_do_idle_irqprio(void) * ensure that interrupts are not masked at the PMR (because the core will * not wake up if we block the wake up signal in the interrupt controller). */ -void cpu_do_idle(void) +void noinstr cpu_do_idle(void) { if (system_uses_irq_prio_masking()) __cpu_do_idle_irqprio(); @@ -119,7 +119,7 @@ void cpu_do_idle(void) /* * This is our default idle handler. */ -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { /* * This should do all the clock switching and wait for interrupt From da192676483232a0a9478c89cdddd412e5167470 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:42 +0000 Subject: [PATCH 105/167] arm64: entry: mark entry code as noinstr Functions in entry-common.c are marked as notrace and NOKPROBE_SYMBOL(), but they're still subject to other instrumentation which may rely on lockdep/rcu/context-tracking being up-to-date, and may cause nested exceptions (e.g. for WARN/BUG or KASAN's use of BRK) which will corrupt exceptions registers which have not yet been read. Prevent this by marking all functions in entry-common.c as noinstr to prevent compiler instrumentation. This also blacklists the functions for tracing and kprobes, so we don't need to handle that separately. Functions elsewhere will be dealt with in subsequent patches. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-4-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 75 +++++++++++--------------------- 1 file changed, 25 insertions(+), 50 deletions(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 43d4c329775f..75e99161f79e 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -17,7 +17,7 @@ #include #include -static void notrace el1_abort(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -25,32 +25,28 @@ static void notrace el1_abort(struct pt_regs *regs, unsigned long esr) far = untagged_addr(far); do_mem_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el1_abort); -static void notrace el1_pc(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); local_daif_inherit(regs); do_sp_pc_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el1_pc); -static void notrace el1_undef(struct pt_regs *regs) +static void noinstr el1_undef(struct pt_regs *regs) { local_daif_inherit(regs); do_undefinstr(regs); } -NOKPROBE_SYMBOL(el1_undef); -static void notrace el1_inv(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) { local_daif_inherit(regs); bad_mode(regs, 0, esr); } -NOKPROBE_SYMBOL(el1_inv); -static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -64,16 +60,14 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) do_debug_exception(far, esr, regs); } -NOKPROBE_SYMBOL(el1_dbg); -static void notrace el1_fpac(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) { local_daif_inherit(regs); do_ptrauth_fault(regs, esr); } -NOKPROBE_SYMBOL(el1_fpac); -asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) +asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -106,9 +100,8 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) el1_inv(regs, esr); } } -NOKPROBE_SYMBOL(el1_sync_handler); -static void notrace el0_da(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -117,9 +110,8 @@ static void notrace el0_da(struct pt_regs *regs, unsigned long esr) far = untagged_addr(far); do_mem_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_da); -static void notrace el0_ia(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -135,41 +127,36 @@ static void notrace el0_ia(struct pt_regs *regs, unsigned long esr) local_daif_restore(DAIF_PROCCTX); do_mem_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_ia); -static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_acc(esr, regs); } -NOKPROBE_SYMBOL(el0_fpsimd_acc); -static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_sve_acc(esr, regs); } -NOKPROBE_SYMBOL(el0_sve_acc); -static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_exc(esr, regs); } -NOKPROBE_SYMBOL(el0_fpsimd_exc); -static void notrace el0_sys(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_sysinstr(esr, regs); } -NOKPROBE_SYMBOL(el0_sys); -static void notrace el0_pc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -180,41 +167,36 @@ static void notrace el0_pc(struct pt_regs *regs, unsigned long esr) local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_pc); -static void notrace el0_sp(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(regs->sp, esr, regs); } -NOKPROBE_SYMBOL(el0_sp); -static void notrace el0_undef(struct pt_regs *regs) +static void noinstr el0_undef(struct pt_regs *regs) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_undefinstr(regs); } -NOKPROBE_SYMBOL(el0_undef); -static void notrace el0_bti(struct pt_regs *regs) +static void noinstr el0_bti(struct pt_regs *regs) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_bti(regs); } -NOKPROBE_SYMBOL(el0_bti); -static void notrace el0_inv(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); bad_el0_sync(regs, 0, esr); } -NOKPROBE_SYMBOL(el0_inv); -static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) { /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ unsigned long far = read_sysreg(far_el1); @@ -226,26 +208,23 @@ static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr) do_debug_exception(far, esr, regs); local_daif_restore(DAIF_PROCCTX_NOIRQ); } -NOKPROBE_SYMBOL(el0_dbg); -static void notrace el0_svc(struct pt_regs *regs) +static void noinstr el0_svc(struct pt_regs *regs) { if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); do_el0_svc(regs); } -NOKPROBE_SYMBOL(el0_svc); -static void notrace el0_fpac(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_ptrauth_fault(regs, esr); } -NOKPROBE_SYMBOL(el0_fpac); -asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) +asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -297,27 +276,24 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) el0_inv(regs, esr); } } -NOKPROBE_SYMBOL(el0_sync_handler); #ifdef CONFIG_COMPAT -static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_cp15instr(esr, regs); } -NOKPROBE_SYMBOL(el0_cp15); -static void notrace el0_svc_compat(struct pt_regs *regs) +static void noinstr el0_svc_compat(struct pt_regs *regs) { if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); do_el0_svc_compat(regs); } -NOKPROBE_SYMBOL(el0_svc_compat); -asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs) +asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -360,5 +336,4 @@ asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs) el0_inv(regs, esr); } } -NOKPROBE_SYMBOL(el0_sync_compat_handler); #endif /* CONFIG_COMPAT */ From 2f911d494f3f028bbe6346e383a354225682cf1b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:43 +0000 Subject: [PATCH 106/167] arm64: entry: move enter_from_user_mode to entry-common.c In later patches we'll want to extend enter_from_user_mode() and add a corresponding exit_to_user_mode(). As these will be common for all entries/exits from userspace, it'd be better for these to live in entry-common.c with the rest of the entry logic. This patch moves enter_from_user_mode() into entry-common.c. As with other functions in entry-common.c it is marked as noinstr (which prevents all instrumentation, tracing, and kprobes) but there are no other functional changes. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-5-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 6 ++++++ arch/arm64/kernel/traps.c | 7 ------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 75e99161f79e..9a685e7686fe 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -101,6 +101,12 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) } } +asmlinkage void noinstr enter_from_user_mode(void) +{ + CT_WARN_ON(ct_state() != CONTEXT_USER); + user_exit_irqoff(); +} + static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8af4e0e85736..580c60afc39a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -876,13 +876,6 @@ asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) nmi_exit(); } -asmlinkage void enter_from_user_mode(void) -{ - CT_WARN_ON(ct_state() != CONTEXT_USER); - user_exit_irqoff(); -} -NOKPROBE_SYMBOL(enter_from_user_mode); - /* GENERIC_BUG traps */ int is_valid_bugaddr(unsigned long addr) From 3cb5ed4d76c15fb97c10e5e9f5268d92c68222ca Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:44 +0000 Subject: [PATCH 107/167] arm64: entry: prepare ret_to_user for function call In a subsequent patch ret_to_user will need to make a C function call (in some configurations) which may clobber x0-x18 at the start of the finish_ret_to_user block, before enable_step_tsk consumes the flags loaded into x1. In preparation for this, let's load the flags into x19, which is preserved across C function calls. This avoids a redundant reload of the flags and ensures we operate on a consistent shapshot regardless. There should be no functional change as a result of this patch. At this point of the entry/exit paths we only need to preserve x28 (tsk) and the sp, and x19 is free for this use. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-6-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b295fb912b12..84aec600eeed 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -774,13 +774,13 @@ SYM_CODE_END(el0_error) SYM_CODE_START_LOCAL(ret_to_user) disable_daif gic_prio_kentry_setup tmp=x3 - ldr x1, [tsk, #TSK_TI_FLAGS] - and x2, x1, #_TIF_WORK_MASK + ldr x19, [tsk, #TSK_TI_FLAGS] + and x2, x19, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: /* Ignore asynchronous tag check faults in the uaccess routines */ clear_mte_async_tcf - enable_step_tsk x1, x2 + enable_step_tsk x19, x2 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK bl stackleak_erase #endif @@ -791,11 +791,12 @@ finish_ret_to_user: */ work_pending: mov x0, sp // 'regs' + mov x1, x19 bl do_notify_resume #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_on // enabled while in userspace #endif - ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step + ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step b finish_ret_to_user SYM_CODE_END(ret_to_user) From 105fc3352077bba5faaf12cf39f7e3aad26fb70b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:45 +0000 Subject: [PATCH 108/167] arm64: entry: move el1 irq/nmi logic to C In preparation for reworking the EL1 irq/nmi entry code, move the existing logic to C. We no longer need the asm_nmi_enter() and asm_nmi_exit() wrappers, so these are removed. The new C functions are marked noinstr, which prevents compiler instrumentation and runtime probing. In subsequent patches we'll want the new C helpers to be called in all cases, so we don't bother wrapping the calls with ifdeferry. Even when the new C functions are stubs the trivial calls are unlikely to have a measurable impact on the IRQ or NMI paths anyway. Prototypes are added to as otherwise (in some configurations) GCC will complain about the lack of a forward declaration. We already do this for existing function, e.g. enter_from_user_mode(). The new helpers are marked as noinstr (which prevents all instrumentation, tracing, and kprobes). Otherwise, there should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-7-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 2 ++ arch/arm64/kernel/entry-common.c | 16 ++++++++++++++ arch/arm64/kernel/entry.S | 34 ++++-------------------------- arch/arm64/kernel/irq.c | 15 ------------- 4 files changed, 22 insertions(+), 45 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 99b9383cd036..d69d53dd7be7 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -31,6 +31,8 @@ static inline u32 disr_to_esr(u64 disr) return esr; } +asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); +asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void enter_from_user_mode(void); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 9a685e7686fe..920da254be1d 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -17,6 +17,22 @@ #include #include +asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + nmi_enter(); + + trace_hardirqs_off(); +} + +asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + nmi_exit(); + else + trace_hardirqs_on(); +} + static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 84aec600eeed..53e30750fc28 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -637,16 +637,8 @@ SYM_CODE_START_LOCAL_NOALIGN(el1_irq) gic_prio_irq_setup pmr=x20, tmp=x1 enable_da_f -#ifdef CONFIG_ARM64_PSEUDO_NMI - test_irqs_unmasked res=x0, pmr=x20 - cbz x0, 1f - bl asm_nmi_enter -1: -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif + mov x0, sp + bl enter_el1_irq_or_nmi irq_handler @@ -665,26 +657,8 @@ alternative_else_nop_endif 1: #endif -#ifdef CONFIG_ARM64_PSEUDO_NMI - /* - * When using IRQ priority masking, we can get spurious interrupts while - * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a - * section with interrupts disabled. Skip tracing in those cases. - */ - test_irqs_unmasked res=x0, pmr=x20 - cbz x0, 1f - bl asm_nmi_exit -1: -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS -#ifdef CONFIG_ARM64_PSEUDO_NMI - test_irqs_unmasked res=x0, pmr=x20 - cbnz x0, 1f -#endif - bl trace_hardirqs_on -1: -#endif + mov x0, sp + bl exit_el1_irq_or_nmi kernel_exit 1 SYM_CODE_END(el1_irq) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 9cf2fb87584a..60456a62da11 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -67,18 +67,3 @@ void __init init_IRQ(void) local_daif_restore(DAIF_PROCCTX_NOIRQ); } } - -/* - * Stubs to make nmi_enter/exit() code callable from ASM - */ -asmlinkage void notrace asm_nmi_enter(void) -{ - nmi_enter(); -} -NOKPROBE_SYMBOL(asm_nmi_enter); - -asmlinkage void notrace asm_nmi_exit(void) -{ - nmi_exit(); -} -NOKPROBE_SYMBOL(asm_nmi_exit); From 23529049c68423820487304f244144e0d576e85a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:46 +0000 Subject: [PATCH 109/167] arm64: entry: fix non-NMI user<->kernel transitions When built with PROVE_LOCKING, NO_HZ_FULL, and CONTEXT_TRACKING_FORCE will WARN() at boot time that interrupts are enabled when we call context_tracking_user_enter(), despite the DAIF flags indicating that IRQs are masked. The problem is that we're not tracking IRQ flag changes accurately, and so lockdep believes interrupts are enabled when they are not (and vice-versa). We can shuffle things so to make this more accurate. For kernel->user transitions there are a number of constraints we need to consider: 1) When we call __context_tracking_user_enter() HW IRQs must be disabled and lockdep must be up-to-date with this. 2) Userspace should be treated as having IRQs enabled from the PoV of both lockdep and tracing. 3) As context_tracking_user_enter() stops RCU from watching, we cannot use RCU after calling it. 4) IRQ flag tracing and lockdep have state that must be manipulated before RCU is disabled. ... with similar constraints applying for user->kernel transitions, with the ordering reversed. The generic entry code has enter_from_user_mode() and exit_to_user_mode() helpers to handle this. We can't use those directly, so we add arm64 copies for now (without the instrumentation markers which aren't used on arm64). These replace the existing user_exit() and user_exit_irqoff() calls spread throughout handlers, and the exception unmasking is left as-is. Note that: * The accounting for debug exceptions from userspace now happens in el0_dbg() and ret_to_user(), so this is removed from debug_exception_enter() and debug_exception_exit(). As user_exit_irqoff() wakes RCU, the userspace-specific check is removed. * The accounting for syscalls now happens in el0_svc(), el0_svc_compat(), and ret_to_user(), so this is removed from el0_svc_common(). This does not adversely affect the workaround for erratum 1463225, as this does not depend on any of the state tracking. * In ret_to_user() we mask interrupts with local_daif_mask(), and so we need to inform lockdep and tracing. Here a trace_hardirqs_off() is sufficient and safe as we have not yet exited kernel context and RCU is usable. * As PROVE_LOCKING selects TRACE_IRQFLAGS, the ifdeferry in entry.S only needs to check for the latter. * EL0 SError handling will be dealt with in a subsequent patch, as this needs to be treated as an NMI. Prior to this patch, booting an appropriately-configured kernel would result in spats as below: | DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled()) | WARNING: CPU: 2 PID: 1 at kernel/locking/lockdep.c:5280 check_flags.part.54+0x1dc/0x1f0 | Modules linked in: | CPU: 2 PID: 1 Comm: init Not tainted 5.10.0-rc3 #3 | Hardware name: linux,dummy-virt (DT) | pstate: 804003c5 (Nzcv DAIF +PAN -UAO -TCO BTYPE=--) | pc : check_flags.part.54+0x1dc/0x1f0 | lr : check_flags.part.54+0x1dc/0x1f0 | sp : ffff80001003bd80 | x29: ffff80001003bd80 x28: ffff66ce801e0000 | x27: 00000000ffffffff x26: 00000000000003c0 | x25: 0000000000000000 x24: ffffc31842527258 | x23: ffffc31842491368 x22: ffffc3184282d000 | x21: 0000000000000000 x20: 0000000000000001 | x19: ffffc318432ce000 x18: 0080000000000000 | x17: 0000000000000000 x16: ffffc31840f18a78 | x15: 0000000000000001 x14: ffffc3184285c810 | x13: 0000000000000001 x12: 0000000000000000 | x11: ffffc318415857a0 x10: ffffc318406614c0 | x9 : ffffc318415857a0 x8 : ffffc31841f1d000 | x7 : 647261685f706564 x6 : ffffc3183ff7c66c | x5 : ffff66ce801e0000 x4 : 0000000000000000 | x3 : ffffc3183fe00000 x2 : ffffc31841500000 | x1 : e956dc24146b3500 x0 : 0000000000000000 | Call trace: | check_flags.part.54+0x1dc/0x1f0 | lock_is_held_type+0x10c/0x188 | rcu_read_lock_sched_held+0x70/0x98 | __context_tracking_enter+0x310/0x350 | context_tracking_enter.part.3+0x5c/0xc8 | context_tracking_user_enter+0x6c/0x80 | finish_ret_to_user+0x2c/0x13cr Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-8-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 1 + arch/arm64/kernel/entry-common.c | 40 +++++++++++++++++++----------- arch/arm64/kernel/entry.S | 35 ++++++++++---------------- arch/arm64/kernel/syscall.c | 1 - arch/arm64/mm/fault.c | 22 ++++++++-------- 5 files changed, 51 insertions(+), 48 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d69d53dd7be7..d579b2e6db7a 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -34,6 +34,7 @@ static inline u32 disr_to_esr(u64 disr) asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void enter_from_user_mode(void); +asmlinkage void exit_to_user_mode(void); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); void do_bti(struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 920da254be1d..49d1c1dd9baf 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -119,15 +119,25 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) asmlinkage void noinstr enter_from_user_mode(void) { + lockdep_hardirqs_off(CALLER_ADDR0); CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); + trace_hardirqs_off_finish(); +} + +asmlinkage void noinstr exit_to_user_mode(void) +{ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + user_enter_irqoff(); + lockdep_hardirqs_on(CALLER_ADDR0); } static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); far = untagged_addr(far); do_mem_abort(far, esr, regs); @@ -145,35 +155,35 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr) if (!is_ttbr0_addr(far)) arm64_apply_bp_hardening(); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_mem_abort(far, esr, regs); } static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_acc(esr, regs); } static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sve_acc(esr, regs); } static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_exc(esr, regs); } static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sysinstr(esr, regs); } @@ -185,35 +195,35 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr) if (!is_ttbr0_addr(instruction_pointer(regs))) arm64_apply_bp_hardening(); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(far, esr, regs); } static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(regs->sp, esr, regs); } static void noinstr el0_undef(struct pt_regs *regs) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_undefinstr(regs); } static void noinstr el0_bti(struct pt_regs *regs) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_bti(regs); } static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); bad_el0_sync(regs, 0, esr); } @@ -226,7 +236,7 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - user_exit_irqoff(); + enter_from_user_mode(); do_debug_exception(far, esr, regs); local_daif_restore(DAIF_PROCCTX_NOIRQ); } @@ -236,12 +246,13 @@ static void noinstr el0_svc(struct pt_regs *regs) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + enter_from_user_mode(); do_el0_svc(regs); } static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_ptrauth_fault(regs, esr); } @@ -302,7 +313,7 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) #ifdef CONFIG_COMPAT static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_cp15instr(esr, regs); } @@ -312,6 +323,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + enter_from_user_mode(); do_el0_svc_compat(regs); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 53e30750fc28..d72c818b019c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,18 +30,18 @@ #include /* - * Context tracking subsystem. Used to instrument transitions - * between user and kernel mode. + * Context tracking and irqflag tracing need to instrument transitions between + * user and kernel mode. */ - .macro ct_user_exit_irqoff -#ifdef CONFIG_CONTEXT_TRACKING + .macro user_exit_irqoff +#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) bl enter_from_user_mode #endif .endm - .macro ct_user_enter -#ifdef CONFIG_CONTEXT_TRACKING - bl context_tracking_user_enter + .macro user_enter_irqoff +#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) + bl exit_to_user_mode #endif .endm @@ -298,9 +298,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING alternative_else_nop_endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR - .if \el == 0 - ct_user_enter - .endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN alternative_if_not ARM64_HAS_PAN @@ -700,21 +697,14 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq) kernel_entry 0 el0_irq_naked: gic_prio_irq_setup pmr=x20, tmp=x0 - ct_user_exit_irqoff + user_exit_irqoff enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - tbz x22, #55, 1f bl do_el0_irq_bp_hardening 1: irq_handler -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on -#endif b ret_to_user SYM_CODE_END(el0_irq) @@ -733,7 +723,7 @@ SYM_CODE_START_LOCAL(el0_error) el0_error_naked: mrs x25, esr_el1 gic_prio_kentry_setup tmp=x2 - ct_user_exit_irqoff + user_exit_irqoff enable_dbg mov x0, sp mov x1, x25 @@ -748,10 +738,14 @@ SYM_CODE_END(el0_error) SYM_CODE_START_LOCAL(ret_to_user) disable_daif gic_prio_kentry_setup tmp=x3 +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif ldr x19, [tsk, #TSK_TI_FLAGS] and x2, x19, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: + user_enter_irqoff /* Ignore asynchronous tag check faults in the uaccess routines */ clear_mte_async_tcf enable_step_tsk x19, x2 @@ -767,9 +761,6 @@ work_pending: mov x0, sp // 'regs' mov x1, x19 bl do_notify_resume -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on // enabled while in userspace -#endif ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step b finish_ret_to_user SYM_CODE_END(ret_to_user) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 13fe79f8e2db..f8f758e4a306 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -120,7 +120,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, */ cortex_a76_erratum_1463225_svc_handler(); - user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1ee94002801f..1f450b784d2c 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -789,16 +789,14 @@ void __init hook_debug_fault_code(int nr, */ static void debug_exception_enter(struct pt_regs *regs) { - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); + if (!user_mode(regs)) { + /* + * Tell lockdep we disabled irqs in entry.S. Do nothing if they were + * already disabled to preserve the last enabled/disabled addresses. + */ + if (interrupts_enabled(regs)) + trace_hardirqs_off(); - if (user_mode(regs)) { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - } else { /* * We might have interrupted pretty much anything. In * fact, if we're a debug exception, we can even interrupt @@ -819,8 +817,10 @@ static void debug_exception_exit(struct pt_regs *regs) { preempt_enable_no_resched(); - if (!user_mode(regs)) - rcu_nmi_exit(); + if (user_mode(regs)) + return; + + rcu_nmi_exit(); if (interrupts_enabled(regs)) trace_hardirqs_on(); From 1ec2f2c05b2ab845d068bff29bd32dbfc6a6ad4c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:47 +0000 Subject: [PATCH 110/167] arm64: ptrace: prepare for EL1 irq/rcu tracking Exceptions from EL1 may be taken when RCU isn't watching (e.g. in idle sequences), or when the lockdep hardirqs transiently out-of-sync with the hardware state (e.g. in the middle of local_irq_enable()). To correctly handle these cases, we'll need to save/restore this state across some exceptions taken from EL1. A series of subsequent patches will update EL1 exception handlers to handle this. In preparation for this, and to avoid dependencies between those patches, this patch adds two new fields to struct pt_regs so that exception handlers can track this state. Note that this is placed in pt_regs as some entry/exit sequences such as el1_irq are invoked from assembly, which makes it very difficult to add a separate structure as with the irqentry_state used by x86. We can separate this once more of the exception logic is moved to C. While the fields only need to be bool, they are both made u64 to keep pt_regs 16-byte aligned. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-9-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/ptrace.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 997cf8c8cd52..28c85b87b8cd 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -193,6 +193,10 @@ struct pt_regs { /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */ u64 pmr_save; u64 stackframe[2]; + + /* Only valid for some EL1 exceptions. */ + u64 lockdep_hardirqs; + u64 exit_rcu; }; static inline bool in_syscall(struct pt_regs const *regs) From 7cd1ea1010acbede7eb87b6abb6198921fb36957 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:48 +0000 Subject: [PATCH 111/167] arm64: entry: fix non-NMI kernel<->kernel transitions There are periods in kernel mode when RCU is not watching and/or the scheduler tick is disabled, but we can still take exceptions such as interrupts. The arm64 exception handlers do not account for this, and it's possible that RCU is not watching while an exception handler runs. The x86/generic entry code handles this by ensuring that all (non-NMI) kernel exception handlers call irqentry_enter() and irqentry_exit(), which handle RCU, lockdep, and IRQ flag tracing. We can't yet move to the generic entry code, and already hadnle the user<->kernel transitions elsewhere, so we add new kernel<->kernel transition helpers alog the lines of the generic entry code. Since we now track interrupts becoming masked when an exception is taken, local_daif_inherit() is modified to track interrupts becoming re-enabled when the original context is inherited. To balance the entry/exit paths, each handler masks all DAIF exceptions before exit_to_kernel_mode(). Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-10-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/daifflags.h | 3 ++ arch/arm64/kernel/entry-common.c | 67 ++++++++++++++++++++++++++++-- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index ec213b4a1650..1c26d7baa67f 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -128,6 +128,9 @@ static inline void local_daif_inherit(struct pt_regs *regs) { unsigned long flags = regs->pstate & DAIF_MASK; + if (interrupts_enabled(regs)) + trace_hardirqs_on(); + /* * We can't use local_daif_restore(regs->pstate) here as * system_has_prio_mask_debugging() won't restore the I bit if it can diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 49d1c1dd9baf..86622d8dd0d8 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -17,12 +17,58 @@ #include #include +/* + * This is intended to match the logic in irqentry_enter(), handling the kernel + * mode transitions only. + */ +static void noinstr enter_from_kernel_mode(struct pt_regs *regs) +{ + regs->exit_rcu = false; + + if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_irq_enter(); + trace_hardirqs_off_finish(); + + regs->exit_rcu = true; + return; + } + + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_irq_enter_check_tick(); + trace_hardirqs_off_finish(); +} + +/* + * This is intended to match the logic in irqentry_exit(), handling the kernel + * mode transitions only, and with preemption handled elsewhere. + */ +static void noinstr exit_to_kernel_mode(struct pt_regs *regs) +{ + lockdep_assert_irqs_disabled(); + + if (interrupts_enabled(regs)) { + if (regs->exit_rcu) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + rcu_irq_exit(); + lockdep_hardirqs_on(CALLER_ADDR0); + return; + } + + trace_hardirqs_on(); + } else { + if (regs->exit_rcu) + rcu_irq_exit(); + } +} + asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) nmi_enter(); - - trace_hardirqs_off(); + else + enter_from_kernel_mode(regs); } asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) @@ -30,36 +76,48 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) nmi_exit(); else - trace_hardirqs_on(); + exit_to_kernel_mode(regs); } static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); + enter_from_kernel_mode(regs); local_daif_inherit(regs); far = untagged_addr(far); do_mem_abort(far, esr, regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_sp_pc_abort(far, esr, regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } static void noinstr el1_undef(struct pt_regs *regs) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_undefinstr(regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); bad_mode(regs, 0, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); } static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) @@ -79,8 +137,11 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_ptrauth_fault(regs, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); } asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) From f0cd5ac1e4c53cb691b3ed3cda1031e1c42153e2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:49 +0000 Subject: [PATCH 112/167] arm64: entry: fix NMI {user, kernel}->kernel transitions Exceptions which can be taken at (almost) any time are consdiered to be NMIs. On arm64 that includes: * SDEI events * GICv3 Pseudo-NMIs * Kernel stack overflows * Unexpected/unhandled exceptions ... but currently debug exceptions (BRKs, breakpoints, watchpoints, single-step) are not considered NMIs. As these can be taken at any time, kernel features (lockdep, RCU, ftrace) may not be in a consistent kernel state. For example, we may take an NMI from the idle code or partway through an entry/exit path. While nmi_enter() and nmi_exit() handle most of this state, notably they don't save/restore the lockdep state across an NMI being taken and handled. When interrupts are enabled and an NMI is taken, lockdep may see interrupts become disabled within the NMI code, but not see interrupts become enabled when returning from the NMI, leaving lockdep believing interrupts are disabled when they are actually disabled. The x86 code handles this in idtentry_{enter,exit}_nmi(), which will shortly be moved to the generic entry code. As we can't use either yet, we copy the x86 approach in arm64-specific helpers. All the NMI entrypoints are marked as noinstr to prevent any instrumentation handling code being invoked before the state has been corrected. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-11-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 2 ++ arch/arm64/kernel/entry-common.c | 34 ++++++++++++++++++++++++++++-- arch/arm64/kernel/sdei.c | 7 +++--- arch/arm64/kernel/traps.c | 15 ++++++++----- 4 files changed, 48 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d579b2e6db7a..0756191f44f6 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -35,6 +35,8 @@ asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void enter_from_user_mode(void); asmlinkage void exit_to_user_mode(void); +void arm64_enter_nmi(struct pt_regs *regs); +void arm64_exit_nmi(struct pt_regs *regs); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); void do_bti(struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 86622d8dd0d8..6d70be0d3e8f 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -63,10 +63,40 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs) } } +void noinstr arm64_enter_nmi(struct pt_regs *regs) +{ + regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); + + __nmi_enter(); + lockdep_hardirqs_off(CALLER_ADDR0); + lockdep_hardirq_enter(); + rcu_nmi_enter(); + + trace_hardirqs_off_finish(); + ftrace_nmi_enter(); +} + +void noinstr arm64_exit_nmi(struct pt_regs *regs) +{ + bool restore = regs->lockdep_hardirqs; + + ftrace_nmi_exit(); + if (restore) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + } + + rcu_nmi_exit(); + lockdep_hardirq_exit(); + if (restore) + lockdep_hardirqs_on(CALLER_ADDR0); + __nmi_exit(); +} + asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) - nmi_enter(); + arm64_enter_nmi(regs); else enter_from_kernel_mode(regs); } @@ -74,7 +104,7 @@ asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) - nmi_exit(); + arm64_exit_nmi(regs); else exit_to_kernel_mode(regs); } diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 7689f2031c0c..793c46d6a447 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -223,16 +224,16 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, } -asmlinkage __kprobes notrace unsigned long +asmlinkage noinstr unsigned long __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) { unsigned long ret; - nmi_enter(); + arm64_enter_nmi(regs); ret = _sdei_handler(regs, arg); - nmi_exit(); + arm64_exit_nmi(regs); return ret; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 580c60afc39a..2059d8f43f55 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -753,8 +754,10 @@ const char *esr_get_class_string(u32 esr) * bad_mode handles the impossible case in the exception vector. This is always * fatal. */ -asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) +asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr) { + arm64_enter_nmi(regs); + console_verbose(); pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n", @@ -786,7 +789,7 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) __aligned(16); -asmlinkage void handle_bad_stack(struct pt_regs *regs) +asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr); @@ -794,6 +797,8 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs) unsigned int esr = read_sysreg(esr_el1); unsigned long far = read_sysreg(far_el1); + arm64_enter_nmi(regs); + console_verbose(); pr_emerg("Insufficient stack space to handle exception!"); @@ -865,15 +870,15 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) } } -asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) +asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr) { - nmi_enter(); + arm64_enter_nmi(regs); /* non-RAS errors are not containable */ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) arm64_serror_panic(regs, esr); - nmi_exit(); + arm64_exit_nmi(regs); } /* GENERIC_BUG traps */ From 2a9b3e6ac69a8bf177d8496a11e749e2dc72fa22 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:50 +0000 Subject: [PATCH 113/167] arm64: entry: fix EL1 debug transitions In debug_exception_enter() and debug_exception_exit() we trace hardirqs on/off while RCU isn't guaranteed to be watching, and we don't save and restore the hardirq state, and so may return with this having changed. Handle this appropriately with new entry/exit helpers which do the bare minimum to ensure this is appropriately maintained, without marking debug exceptions as NMIs. These are placed in entry-common.c with the other entry/exit helpers. In future we'll want to reconsider whether some debug exceptions should be NMIs, but this will require a significant refactoring, and for now this should prevent issues with lockdep and RCU. Signed-off-by: Mark Rutland Cc: Catalin Marins Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-12-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 26 ++++++++++++++++++++++++++ arch/arm64/mm/fault.c | 25 ------------------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 6d70be0d3e8f..70e0a7591245 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -150,6 +150,30 @@ static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) exit_to_kernel_mode(regs); } +static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs) +{ + regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); + + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_nmi_enter(); + + trace_hardirqs_off_finish(); +} + +static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs) +{ + bool restore = regs->lockdep_hardirqs; + + if (restore) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + } + + rcu_nmi_exit(); + if (restore) + lockdep_hardirqs_on(CALLER_ADDR0); +} + static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -162,7 +186,9 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + arm64_enter_el1_dbg(regs); do_debug_exception(far, esr, regs); + arm64_exit_el1_dbg(regs); } static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1f450b784d2c..795d224f184f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -789,23 +789,6 @@ void __init hook_debug_fault_code(int nr, */ static void debug_exception_enter(struct pt_regs *regs) { - if (!user_mode(regs)) { - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); - - /* - * We might have interrupted pretty much anything. In - * fact, if we're a debug exception, we can even interrupt - * NMI processing. We don't want this code makes in_nmi() - * to return true, but we need to notify RCU. - */ - rcu_nmi_enter(); - } - preempt_disable(); /* This code is a bit fragile. Test it. */ @@ -816,14 +799,6 @@ NOKPROBE_SYMBOL(debug_exception_enter); static void debug_exception_exit(struct pt_regs *regs) { preempt_enable_no_resched(); - - if (user_mode(regs)) - return; - - rcu_nmi_exit(); - - if (interrupts_enabled(regs)) - trace_hardirqs_on(); } NOKPROBE_SYMBOL(debug_exception_exit); From 9e5344e0ffc33f4fee899f98b6939a0682b1d9c3 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 30 Nov 2020 17:07:09 +0000 Subject: [PATCH 114/167] arm64: mte: Fix typo in macro definition UL in the definition of SYS_TFSR_EL1_TF1 was misspelled causing compilation issues when trying to implement in kernel MTE async mode. Fix the macro correcting the typo. Note: MTE async mode will be introduced with a future series. Fixes: c058b1c4a5ea ("arm64: mte: system register definitions") Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Vincenzo Frascino Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20201130170709.22309-1-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 174817ba119c..c8ab9900293f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -983,7 +983,7 @@ #define SYS_TFSR_EL1_TF0_SHIFT 0 #define SYS_TFSR_EL1_TF1_SHIFT 1 #define SYS_TFSR_EL1_TF0 (UL(1) << SYS_TFSR_EL1_TF0_SHIFT) -#define SYS_TFSR_EL1_TF1 (UK(2) << SYS_TFSR_EL1_TF1_SHIFT) +#define SYS_TFSR_EL1_TF1 (UL(1) << SYS_TFSR_EL1_TF1_SHIFT) /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ #define SYS_MPIDR_SAFE_VAL (BIT(31)) From 55ea4cf403800af2ce6b125bc3d853117e0c0456 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 27 Nov 2020 11:20:58 -0500 Subject: [PATCH 115/167] ring-buffer: Update write stamp with the correct ts The write stamp, used to calculate deltas between events, was updated with the stale "ts" value in the "info" structure, and not with the updated "ts" variable. This caused the deltas between events to be inaccurate, and when crossing into a new sub buffer, had time go backwards. Link: https://lkml.kernel.org/r/20201124223917.795844-1-elavila@google.com Cc: stable@vger.kernel.org Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp") Reported-by: "J. Avila" Tested-by: Daniel Mentz Tested-by: Will McVicker Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index dc83b3fa9fe7..bccaf88d3706 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3291,7 +3291,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* Nothing came after this event between C and E */ info->delta = ts - info->after; (void)rb_time_cmpxchg(&cpu_buffer->write_stamp, - info->after, info->ts); + info->after, ts); info->ts = ts; } else { /* From 8785f51a17083eee7c37606079c6447afc6ba102 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sat, 28 Nov 2020 10:15:17 +0100 Subject: [PATCH 116/167] ring-buffer: Set the right timestamp in the slow path of __rb_reserve_next() In the slow path of __rb_reserve_next() a nested event(s) can happen between evaluating the timestamp delta of the current event and updating write_stamp via local_cmpxchg(); in this case the delta is not valid anymore and it should be set to 0 (same timestamp as the interrupting event), since the event that we are currently processing is not the last event in the buffer. Link: https://lkml.kernel.org/r/X8IVJcp1gRE+FJCJ@xps-13-7390 Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: stable@vger.kernel.org Link: https://lwn.net/Articles/831207 Fixes: a389d86f7fd0 ("ring-buffer: Have nested events still record running time stamp") Signed-off-by: Andrea Righi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bccaf88d3706..35d91b20d47a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3287,11 +3287,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, ts = rb_time_stamp(cpu_buffer->buffer); barrier(); /*E*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) && - info->after < ts) { + info->after < ts && + rb_time_cmpxchg(&cpu_buffer->write_stamp, + info->after, ts)) { /* Nothing came after this event between C and E */ info->delta = ts - info->after; - (void)rb_time_cmpxchg(&cpu_buffer->write_stamp, - info->after, ts); info->ts = ts; } else { /* From 6988a619f5b79e4efadea6e19dcfe75fbcd350b5 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sat, 28 Nov 2020 15:57:06 -0300 Subject: [PATCH 117/167] cifs: allow syscalls to be restarted in __smb_send_rqst() A customer has reported that several files in their multi-threaded app were left with size of 0 because most of the read(2) calls returned -EINTR and they assumed no bytes were read. Obviously, they could have fixed it by simply retrying on -EINTR. We noticed that most of the -EINTR on read(2) were due to real-time signals sent by glibc to process wide credential changes (SIGRT_1), and its signal handler had been established with SA_RESTART, in which case those calls could have been automatically restarted by the kernel. Let the kernel decide to whether or not restart the syscalls when there is a signal pending in __smb_send_rqst() by returning -ERESTARTSYS. If it can't, it will return -EINTR anyway. Signed-off-by: Paulo Alcantara (SUSE) CC: Stable Reviewed-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index e27e255d40dd..36b2ece43403 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -339,8 +339,8 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, return -EAGAIN; if (signal_pending(current)) { - cifs_dbg(FYI, "signal is pending before sending any data\n"); - return -EINTR; + cifs_dbg(FYI, "signal pending before send request\n"); + return -ERESTARTSYS; } /* cork the socket */ From 212253367dc7b49ed3fc194ce71b0992eacaecf2 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sat, 28 Nov 2020 16:54:02 -0300 Subject: [PATCH 118/167] cifs: fix potential use-after-free in cifs_echo_request() This patch fixes a potential use-after-free bug in cifs_echo_request(). For instance, thread 1 -------- cifs_demultiplex_thread() clean_demultiplex_info() kfree(server) thread 2 (workqueue) -------- apic_timer_interrupt() smp_apic_timer_interrupt() irq_exit() __do_softirq() run_timer_softirq() call_timer_fn() cifs_echo_request() <- use-after-free in server ptr Signed-off-by: Paulo Alcantara (SUSE) CC: Stable Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/connect.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index c38156f324dd..28c1459fb0fc 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -876,6 +876,8 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) list_del_init(&server->tcp_ses_list); spin_unlock(&cifs_tcp_ses_lock); + cancel_delayed_work_sync(&server->echo); + spin_lock(&GlobalMid_Lock); server->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); From dd0ecf544125639e54056d851e4887dbb94b6d2f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 30 Nov 2020 16:07:25 +0100 Subject: [PATCH 119/167] gfs2: Fix deadlock between gfs2_{create_inode,inode_lookup} and delete_work_func In gfs2_create_inode and gfs2_inode_lookup, make sure to cancel any pending delete work before taking the inode glock. Otherwise, gfs2_cancel_delete_work may block waiting for delete_work_func to complete, and delete_work_func may block trying to acquire the inode glock in gfs2_inode_lookup. Reported-by: Alexander Aring Fixes: a0e3cc65fa29 ("gfs2: Turn gl_delete into a delayed work") Cc: stable@vger.kernel.org # v5.8+ Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index eed1a1bac6f6..65ae4fc28ede 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -150,6 +150,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (unlikely(error)) goto fail; + if (blktype != GFS2_BLKST_UNLINKED) + gfs2_cancel_delete_work(io_gl); if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) { /* @@ -180,8 +182,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) goto fail; - if (blktype != GFS2_BLKST_UNLINKED) - gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl); glock_set_object(ip->i_iopen_gh.gh_gl, ip); gfs2_glock_put(io_gl); io_gl = NULL; @@ -725,13 +725,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, flush_delayed_work(&ip->i_gl->gl_work); glock_set_object(ip->i_gl, ip); - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (error) goto fail_free_inode; + gfs2_cancel_delete_work(io_gl); + glock_set_object(io_gl, ip); + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + if (error) + goto fail_gunlock2; error = gfs2_trans_begin(sdp, blocks, 0); if (error) - goto fail_free_inode; + goto fail_gunlock2; if (blocks > 1) { ip->i_eattr = ip->i_no_addr + 1; @@ -740,18 +746,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, init_dinode(dip, ip, symname); gfs2_trans_end(sdp); - error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); - if (error) - goto fail_free_inode; - BUG_ON(test_and_set_bit(GLF_INODE_CREATING, &io_gl->gl_flags)); error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (error) goto fail_gunlock2; - gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl); - glock_set_object(ip->i_iopen_gh.gh_gl, ip); gfs2_set_iop(inode); insert_inode_hash(inode); @@ -803,6 +803,7 @@ fail_gunlock3: gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_gunlock2: clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); + glock_clear_object(io_gl, ip); gfs2_glock_put(io_gl); fail_free_inode: if (ip->i_gl) { From e3d5e971d2f83d8ddd4b91a50cea4517fb488383 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Thu, 26 Nov 2020 03:19:14 +0530 Subject: [PATCH 120/167] chelsio/chtls: fix panic during unload reload chtls there is kernel panic in inet_twsk_free() while chtls module unload when socket is in TIME_WAIT state because sk_prot_creator was not preserved on connection socket. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Udai Sharma Signed-off-by: Vinay Kumar Yadav Link: https://lore.kernel.org/r/20201125214913.16938-1-vinay.yadav@chelsio.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 96d561653496..50e3a70e5a29 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1206,6 +1206,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, sk_setup_caps(newsk, dst); ctx = tls_get_ctx(lsk); newsk->sk_destruct = ctx->sk_destruct; + newsk->sk_prot_creator = lsk->sk_prot_creator; csk->sk = newsk; csk->passive_reap_next = oreq; csk->tx_chan = cxgb4_port_chan(ndev); From 0a35dc41fea67ac4495ce7584406bf9557a6e7d0 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 26 Nov 2020 13:52:46 +0100 Subject: [PATCH 121/167] vxlan: Add needed_headroom for lower device It was observed that sending data via batadv over vxlan (on top of wireguard) reduced the performance massively compared to raw ethernet or batadv on raw ethernet. A check of perf data showed that the vxlan_build_skb was calling all the time pskb_expand_head to allocate enough headroom for: min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + VXLAN_HLEN + iphdr_len; But the vxlan_config_apply only requested needed headroom for: lowerdev->hard_header_len + VXLAN6_HEADROOM or VXLAN_HEADROOM So it completely ignored the needed_headroom of the lower device. The first caller of net_dev_xmit could therefore never make sure that enough headroom was allocated for the rest of the transmit path. Cc: Annika Wickert Signed-off-by: Sven Eckelmann Tested-by: Annika Wickert Link: https://lore.kernel.org/r/20201126125247.1047977-1-sven@narfation.org Signed-off-by: Jakub Kicinski --- drivers/net/vxlan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 1a557aeba32b..55fa3fbb80f1 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3798,6 +3798,7 @@ static void vxlan_config_apply(struct net_device *dev, dev->gso_max_segs = lowerdev->gso_max_segs; needed_headroom = lowerdev->hard_header_len; + needed_headroom += lowerdev->needed_headroom; max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); From a5e74021e84bb5eadf760aaf2c583304f02269be Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 26 Nov 2020 13:52:47 +0100 Subject: [PATCH 122/167] vxlan: Copy needed_tailroom from lowerdev While vxlan doesn't need any extra tailroom, the lowerdev might need it. In that case, copy it over to reduce the chance for additional (re)allocations in the transmit path. Signed-off-by: Sven Eckelmann Link: https://lore.kernel.org/r/20201126125247.1047977-2-sven@narfation.org Signed-off-by: Jakub Kicinski --- drivers/net/vxlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 55fa3fbb80f1..032f78261913 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3800,6 +3800,8 @@ static void vxlan_config_apply(struct net_device *dev, needed_headroom = lowerdev->hard_header_len; needed_headroom += lowerdev->needed_headroom; + dev->needed_tailroom = lowerdev->needed_tailroom; + max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); if (max_mtu < ETH_MIN_MTU) From 983df5f2699f83f78643b19d3399b160d1e64f5b Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 13 Nov 2020 10:34:14 -0800 Subject: [PATCH 123/167] samples/ftrace: Mark my_tramp[12]? global my_tramp[12]? are declared as global functions in C, but they are not marked global in the inline assembly definition. This mismatch confuses Clang's Control-Flow Integrity checking. Fix the definitions by adding .globl. Link: https://lkml.kernel.org/r/20201113183414.1446671-1-samitolvanen@google.com Fixes: 9d907f1ae80b8 ("ftrace/samples: Add a sample module that implements modify_ftrace_direct()") Reviewed-by: Kees Cook Signed-off-by: Sami Tolvanen Signed-off-by: Steven Rostedt (VMware) --- samples/ftrace/ftrace-direct-modify.c | 2 ++ samples/ftrace/ftrace-direct-too.c | 1 + samples/ftrace/ftrace-direct.c | 1 + 3 files changed, 4 insertions(+) diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index c13a5bc5095b..5b9a09957c6e 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -21,6 +21,7 @@ static unsigned long my_ip = (unsigned long)schedule; asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp1, @function\n" +" .globl my_tramp1\n" " my_tramp1:" " pushq %rbp\n" " movq %rsp, %rbp\n" @@ -29,6 +30,7 @@ asm ( " .size my_tramp1, .-my_tramp1\n" " ret\n" " .type my_tramp2, @function\n" +" .globl my_tramp2\n" " my_tramp2:" " pushq %rbp\n" " movq %rsp, %rbp\n" diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index d5c5022be664..3f0079c9bd6f 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -16,6 +16,7 @@ extern void my_tramp(void *); asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp, @function\n" +" .globl my_tramp\n" " my_tramp:" " pushq %rbp\n" " movq %rsp, %rbp\n" diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index 63ca06d42c80..a2729d1ef17f 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -14,6 +14,7 @@ extern void my_tramp(void *); asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp, @function\n" +" .globl my_tramp\n" " my_tramp:" " pushq %rbp\n" " movq %rsp, %rbp\n" From 310e3a4b5a4fc718a72201c1e4cf5c64ac6f5442 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 18 Nov 2020 15:05:20 +0300 Subject: [PATCH 124/167] tracing: Remove WARN_ON in start_thread() This patch reverts commit 978defee11a5 ("tracing: Do a WARN_ON() if start_thread() in hwlat is called when thread exists") .start hook can be legally called several times if according tracer is stopped screen window 1 [root@localhost ~]# echo 1 > /sys/kernel/tracing/events/kmem/kfree/enable [root@localhost ~]# echo 1 > /sys/kernel/tracing/options/pause-on-trace [root@localhost ~]# less -F /sys/kernel/tracing/trace screen window 2 [root@localhost ~]# cat /sys/kernel/debug/tracing/tracing_on 0 [root@localhost ~]# echo hwlat > /sys/kernel/debug/tracing/current_tracer [root@localhost ~]# echo 1 > /sys/kernel/debug/tracing/tracing_on [root@localhost ~]# cat /sys/kernel/debug/tracing/tracing_on 0 [root@localhost ~]# echo 2 > /sys/kernel/debug/tracing/tracing_on triggers warning in dmesg: WARNING: CPU: 3 PID: 1403 at kernel/trace/trace_hwlat.c:371 hwlat_tracer_start+0xc9/0xd0 Link: https://lkml.kernel.org/r/bd4d3e70-400d-9c82-7b73-a2d695e86b58@virtuozzo.com Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 978defee11a5 ("tracing: Do a WARN_ON() if start_thread() in hwlat is called when thread exists") Signed-off-by: Vasily Averin Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_hwlat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index c9ad5c6fbaad..d071fc271eef 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -368,7 +368,7 @@ static int start_kthread(struct trace_array *tr) struct task_struct *kthread; int next_cpu; - if (WARN_ON(hwlat_kthread)) + if (hwlat_kthread) return 0; /* Just pick the first CPU on first iteration */ From 8fa655a3a0013a0c2a2aada6f39a93ee6fc25549 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 25 Nov 2020 14:56:54 -0800 Subject: [PATCH 125/167] tracing: Fix alignment of static buffer With 5.9 kernel on ARM64, I found ftrace_dump output was broken but it had no problem with normal output "cat /sys/kernel/debug/tracing/trace". With investigation, it seems coping the data into temporal buffer seems to break the align binary printf expects if the static buffer is not aligned with 4-byte. IIUC, get_arg in bstr_printf expects that args has already right align to be decoded and seq_buf_bprintf says ``the arguments are saved in a 32bit word array that is defined by the format string constraints``. So if we don't keep the align under copy to temporal buffer, the output will be broken by shifting some bytes. This patch fixes it. Link: https://lkml.kernel.org/r/20201125225654.1618966-1-minchan@kernel.org Cc: Fixes: 8e99cf91b99bb ("tracing: Do not allocate buffer in trace_find_next_entry() in atomic") Signed-off-by: Namhyung Kim Signed-off-by: Minchan Kim Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 410cfeb16db5..7d53c5bdea3e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3534,7 +3534,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, } #define STATIC_TEMP_BUF_SIZE 128 -static char static_temp_buf[STATIC_TEMP_BUF_SIZE]; +static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); /* Find the next real entry, without updating the iterator itself */ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, From 4c75b0ff4e4bf7a45b5aef9639799719c28d0073 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 26 Nov 2020 23:38:38 +0530 Subject: [PATCH 126/167] ftrace: Fix updating FTRACE_FL_TRAMP On powerpc, kprobe-direct.tc triggered FTRACE_WARN_ON() in ftrace_get_addr_new() followed by the below message: Bad trampoline accounting at: 000000004222522f (wake_up_process+0xc/0x20) (f0000001) The set of steps leading to this involved: - modprobe ftrace-direct-too - enable_probe - modprobe ftrace-direct - rmmod ftrace-direct <-- trigger The problem turned out to be that we were not updating flags in the ftrace record properly. From the above message about the trampoline accounting being bad, it can be seen that the ftrace record still has FTRACE_FL_TRAMP set though ftrace-direct module is going away. This happens because we are checking if any ftrace_ops has the FTRACE_FL_TRAMP flag set _before_ updating the filter hash. The fix for this is to look for any _other_ ftrace_ops that also needs FTRACE_FL_TRAMP. Link: https://lkml.kernel.org/r/56c113aa9c3e10c19144a36d9684c7882bf09af5.1606412433.git.naveen.n.rao@linux.vnet.ibm.com Cc: stable@vger.kernel.org Fixes: a124692b698b0 ("ftrace: Enable trampoline when rec count returns back to one") Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8185f7240095..9c1bba8cc51b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1629,6 +1629,8 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec) static struct ftrace_ops * ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); static struct ftrace_ops * +ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_exclude); +static struct ftrace_ops * ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops); static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, @@ -1778,7 +1780,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, * to it. */ if (ftrace_rec_count(rec) == 1 && - ftrace_find_tramp_ops_any(rec)) + ftrace_find_tramp_ops_any_other(rec, ops)) rec->flags |= FTRACE_FL_TRAMP; else rec->flags &= ~FTRACE_FL_TRAMP; @@ -2244,6 +2246,24 @@ ftrace_find_tramp_ops_any(struct dyn_ftrace *rec) return NULL; } +static struct ftrace_ops * +ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_exclude) +{ + struct ftrace_ops *op; + unsigned long ip = rec->ip; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + + if (op == op_exclude || !op->trampoline) + continue; + + if (hash_contains_ip(ip, op->func_hash)) + return op; + } while_for_each_ftrace_op(op); + + return NULL; +} + static struct ftrace_ops * ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *op) From 49a962c075dfa41c78e34784772329bc8784d217 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 26 Nov 2020 23:38:39 +0530 Subject: [PATCH 127/167] ftrace: Fix DYNAMIC_FTRACE_WITH_DIRECT_CALLS dependency DYNAMIC_FTRACE_WITH_DIRECT_CALLS should depend on DYNAMIC_FTRACE_WITH_REGS since we need ftrace_regs_caller(). Link: https://lkml.kernel.org/r/fc4b257ea8689a36f086d2389a9ed989496ca63a.1606412433.git.naveen.n.rao@linux.vnet.ibm.com Cc: stable@vger.kernel.org Fixes: 763e34e74bb7d5c ("ftrace: Add register_ftrace_direct()") Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a4020c0b4508..e1bf5228fb69 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -202,7 +202,7 @@ config DYNAMIC_FTRACE_WITH_REGS config DYNAMIC_FTRACE_WITH_DIRECT_CALLS def_bool y - depends on DYNAMIC_FTRACE + depends on DYNAMIC_FTRACE_WITH_REGS depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS config FUNCTION_PROFILER From 68e10d5ff512b503dcba1246ad5620f32035e135 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 30 Nov 2020 23:16:03 -0500 Subject: [PATCH 128/167] ring-buffer: Always check to put back before stamp when crossing pages The current ring buffer logic checks to see if the updating of the event buffer was interrupted, and if it is, it will try to fix up the before stamp with the write stamp to make them equal again. This logic is flawed, because if it is not interrupted, the two are guaranteed to be different, as the current event just updated the before stamp before allocation. This guarantees that the next event (this one or another interrupting one) will think it interrupted the time updates of a previous event and inject an absolute time stamp to compensate. The correct logic is to always update the timestamps when traversing to a new sub buffer. Cc: stable@vger.kernel.org Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 35d91b20d47a..a6268e09160a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3234,14 +3234,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* See if we shot pass the end of this buffer page */ if (unlikely(write > BUF_PAGE_SIZE)) { - if (tail != w) { - /* before and after may now different, fix it up*/ - b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); - a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); - if (a_ok && b_ok && info->before != info->after) - (void)rb_time_cmpxchg(&cpu_buffer->before_stamp, - info->before, info->after); - } + /* before and after may now different, fix it up*/ + b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); + a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); + if (a_ok && b_ok && info->before != info->after) + (void)rb_time_cmpxchg(&cpu_buffer->before_stamp, + info->before, info->after); return rb_move_tail(cpu_buffer, tail, info); } From 24aed09451270b6a2a78adf8a34918d12ffb7dcf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 20 Nov 2020 11:29:04 +0900 Subject: [PATCH 129/167] bootconfig: Load size and checksum in the footer as le32 Load the size and the checksum fields in the footer as le32 instead of u32. This will allow us to apply bootconfig to the cross build initrd without caring the endianness. Link: https://lkml.kernel.org/r/160583934457.547349.10504070298990791074.stgit@devnote2 Reported-by: Steven Rostedt Suggested-by: Linus Torvalds Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/init/main.c b/init/main.c index 20baced721ad..32b2a8affafd 100644 --- a/init/main.c +++ b/init/main.c @@ -288,8 +288,8 @@ static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum) found: hdr = (u32 *)(data - 8); - size = hdr[0]; - csum = hdr[1]; + size = le32_to_cpu(hdr[0]); + csum = le32_to_cpu(hdr[1]); data = ((void *)hdr) - size; if ((unsigned long)data < initrd_start) { From e86843580d1bb1ce12544bca3115cf11d51603ff Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 20 Nov 2020 11:29:13 +0900 Subject: [PATCH 130/167] tools/bootconfig: Store size and checksum in footer as le32 Store the size and the checksum fields in the footer as le32 instead of u32. This will allow us to apply bootconfig to the cross build initrd without caring the endianness. Link: https://lkml.kernel.org/r/160583935332.547349.5897811300636587426.stgit@devnote2 Reported-by: Steven Rostedt Suggested-by: Linus Torvalds Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 4a445b6304bb..7362bef1a368 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -183,9 +184,11 @@ static int load_xbc_from_initrd(int fd, char **buf) if (read(fd, &size, sizeof(u32)) < 0) return pr_errno("Failed to read size", -errno); + size = le32toh(size); if (read(fd, &csum, sizeof(u32)) < 0) return pr_errno("Failed to read checksum", -errno); + csum = le32toh(csum); /* Wrong size error */ if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) { @@ -407,10 +410,10 @@ static int apply_xbc(const char *path, const char *xbc_path) /* Add a footer */ p = data + size; - *(u32 *)p = size; + *(u32 *)p = htole32(size); p += sizeof(u32); - *(u32 *)p = csum; + *(u32 *)p = htole32(csum); p += sizeof(u32); memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); From 05227490c5f0f1bbd3693a7a70b3fb5b09d2a996 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 20 Nov 2020 11:29:22 +0900 Subject: [PATCH 131/167] docs: bootconfig: Add the endianness of fields Add a description about the endianness of the size and the checksum fields. Those must be stored as le32 instead of u32. This will allow us to apply bootconfig to the cross build initrd without caring the endianness. Link: https://lkml.kernel.org/r/160583936246.547349.10964204130590955409.stgit@devnote2 Reported-by: Steven Rostedt Suggested-by: Linus Torvalds Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/bootconfig.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index 363599683784..9b90efcc3a35 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -140,7 +140,9 @@ Since the boot configuration file is loaded with initrd, it will be added to the end of the initrd (initramfs) image file with padding, size, checksum and 12-byte magic word as below. -[initrd][bootconfig][padding][size(u32)][checksum(u32)][#BOOTCONFIG\n] +[initrd][bootconfig][padding][size(le32)][checksum(le32)][#BOOTCONFIG\n] + +The size and checksum fields are unsigned 32bit little endian value. When the boot configuration is added to the initrd image, the total file size is aligned to 4 bytes. To fill the gap, null characters From cf03f316ad20dac16b5adae3f6dedd7d188c7f65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 1 Dec 2020 14:54:09 +0100 Subject: [PATCH 132/167] fs: 9p: add generic splice_read file operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The v9fs file operations were missing the splice_read operations, which breaks sendfile() of files on such a filesystem. I discovered this while trying to load an eBPF program using iproute2 inside a 'virtme' environment which uses 9pfs for the virtual file system. iproute2 relies on sendfile() with an AF_ALG socket to hash files, which was erroring out in the virtual environment. Since generic_file_splice_read() seems to just implement splice_read in terms of the read_iter operation, I simply added the generic implementation to the file operations, which fixed the error I was seeing. A quick grep indicates that this is what most other file systems do as well. Link: http://lkml.kernel.org/r/20201201135409.55510-1-toke@redhat.com Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Dominique Martinet --- fs/9p/vfs_file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index b177fd3b1eb3..01026b47018c 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -655,6 +655,7 @@ const struct file_operations v9fs_cached_file_operations = { .release = v9fs_dir_release, .lock = v9fs_file_lock, .mmap = v9fs_file_mmap, + .splice_read = generic_file_splice_read, .fsync = v9fs_file_fsync, }; @@ -667,6 +668,7 @@ const struct file_operations v9fs_cached_file_operations_dotl = { .lock = v9fs_file_lock_dotl, .flock = v9fs_file_flock_dotl, .mmap = v9fs_file_mmap, + .splice_read = generic_file_splice_read, .fsync = v9fs_file_fsync_dotl, }; @@ -678,6 +680,7 @@ const struct file_operations v9fs_file_operations = { .release = v9fs_dir_release, .lock = v9fs_file_lock, .mmap = generic_file_readonly_mmap, + .splice_read = generic_file_splice_read, .fsync = v9fs_file_fsync, }; @@ -690,6 +693,7 @@ const struct file_operations v9fs_file_operations_dotl = { .lock = v9fs_file_lock_dotl, .flock = v9fs_file_flock_dotl, .mmap = generic_file_readonly_mmap, + .splice_read = generic_file_splice_read, .fsync = v9fs_file_fsync_dotl, }; @@ -701,6 +705,7 @@ const struct file_operations v9fs_mmap_file_operations = { .release = v9fs_dir_release, .lock = v9fs_file_lock, .mmap = v9fs_mmap_file_mmap, + .splice_read = generic_file_splice_read, .fsync = v9fs_file_fsync, }; @@ -713,5 +718,6 @@ const struct file_operations v9fs_mmap_file_operations_dotl = { .lock = v9fs_file_lock_dotl, .flock = v9fs_file_flock_dotl, .mmap = v9fs_mmap_file_mmap, + .splice_read = generic_file_splice_read, .fsync = v9fs_file_fsync_dotl, }; From b71ec952234610b4f90ef17a2fdcb124d5320070 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 1 Dec 2020 09:52:10 -0600 Subject: [PATCH 133/167] ibmvnic: Ensure that SCRQ entry reads are correctly ordered Ensure that received Subordinate Command-Response Queue (SCRQ) entries are properly read in order by the driver. These queues are used in the ibmvnic device to process RX buffer and TX completion descriptors. dma_rmb barriers have been added after checking for a pending descriptor to ensure the correct descriptor entry is checked and after reading the SCRQ descriptor to ensure the entire descriptor is read before processing. Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index bca1becd33f0..0e34c36c3e86 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2404,6 +2404,12 @@ restart_poll: if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num])) break; + /* The queue entry at the current index is peeked at above + * to determine that there is a valid descriptor awaiting + * processing. We want to be sure that the current slot + * holds a valid descriptor before reading its contents. + */ + dma_rmb(); next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]); rx_buff = (struct ibmvnic_rx_buff *)be64_to_cpu(next-> @@ -3113,6 +3119,13 @@ restart_loop: unsigned int pool = scrq->pool_index; int num_entries = 0; + /* The queue entry at the current index is peeked at above + * to determine that there is a valid descriptor awaiting + * processing. We want to be sure that the current slot + * holds a valid descriptor before reading its contents. + */ + dma_rmb(); + next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { if (next->tx_comp.rcs[i]) { @@ -3513,6 +3526,11 @@ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter, } spin_unlock_irqrestore(&scrq->lock, flags); + /* Ensure that the entire buffer descriptor has been + * loaded before reading its contents + */ + dma_rmb(); + return entry; } From ba246c175116e2e8fa4fdfa5f8e958e086a9a818 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 1 Dec 2020 09:52:11 -0600 Subject: [PATCH 134/167] ibmvnic: Fix TX completion error handling TX completions received with an error return code are not being processed properly. When an error code is seen, do not proceed to the next completion before cleaning up the existing entry's data structures. Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 0e34c36c3e86..da9450f18717 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3128,11 +3128,9 @@ restart_loop: next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { - if (next->tx_comp.rcs[i]) { + if (next->tx_comp.rcs[i]) dev_err(dev, "tx error %x\n", next->tx_comp.rcs[i]); - continue; - } index = be32_to_cpu(next->tx_comp.correlators[i]); if (index & IBMVNIC_TSO_POOL_MASK) { tx_pool = &adapter->tso_pool[pool]; From 14483cbf040fcb38113497161088a1ce8ce5d713 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 28 Nov 2020 23:08:43 -0800 Subject: [PATCH 135/167] net: broadcom CNIC: requires MMU The CNIC kconfig symbol selects UIO and UIO depends on MMU. Since 'select' does not follow dependency chains, add the same MMU dependency to CNIC. Quietens this kconfig warning: WARNING: unmet direct dependencies detected for UIO Depends on [n]: MMU [=n] Selected by [m]: - CNIC [=m] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_BROADCOM [=y] && PCI [=y] && (IPV6 [=m] || IPV6 [=m]=n) Fixes: adfc5217e9db ("broadcom: Move the Broadcom drivers") Signed-off-by: Randy Dunlap Cc: Jeff Kirsher Cc: Rasesh Mody Cc: GR-Linux-NIC-Dev@marvell.com Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/20201129070843.3859-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 7fb42f388d59..7b79528d6eed 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -88,6 +88,7 @@ config BNX2 config CNIC tristate "QLogic CNIC support" depends on PCI && (IPV6 || IPV6=n) + depends on MMU select BNX2 select UIO help From 960f4f8a4e60da610af73c1264673f71f5a36efd Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 1 Dec 2020 16:09:37 +0100 Subject: [PATCH 136/167] fs: 9p: add generic splice_write file operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default splice operations got removed recently, add it back to 9p with iter_file_splice_write like many other filesystems do. Link: http://lkml.kernel.org/r/1606837496-21717-1-git-send-email-asmadeus@codewreck.org Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops") Signed-off-by: Dominique Martinet Acked-by: Toke Høiland-Jørgensen Reviewed-by: Christoph Hellwig --- fs/9p/vfs_file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 01026b47018c..be5768949cb1 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -656,6 +656,7 @@ const struct file_operations v9fs_cached_file_operations = { .lock = v9fs_file_lock, .mmap = v9fs_file_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync, }; @@ -669,6 +670,7 @@ const struct file_operations v9fs_cached_file_operations_dotl = { .flock = v9fs_file_flock_dotl, .mmap = v9fs_file_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync_dotl, }; @@ -681,6 +683,7 @@ const struct file_operations v9fs_file_operations = { .lock = v9fs_file_lock, .mmap = generic_file_readonly_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync, }; @@ -694,6 +697,7 @@ const struct file_operations v9fs_file_operations_dotl = { .flock = v9fs_file_flock_dotl, .mmap = generic_file_readonly_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync_dotl, }; @@ -706,6 +710,7 @@ const struct file_operations v9fs_mmap_file_operations = { .lock = v9fs_file_lock, .mmap = v9fs_mmap_file_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync, }; @@ -719,5 +724,6 @@ const struct file_operations v9fs_mmap_file_operations_dotl = { .flock = v9fs_file_flock_dotl, .mmap = v9fs_mmap_file_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync_dotl, }; From 0643334902fcdc770e2d9555811200213339a3f6 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Mon, 30 Nov 2020 09:55:44 +0700 Subject: [PATCH 137/167] tipc: fix incompatible mtu of transmission In commit 682cd3cf946b6 ("tipc: confgiure and apply UDP bearer MTU on running links"), we introduced a function to change UDP bearer MTU and applied this new value across existing per-link. However, we did not apply this new MTU value at node level. This lead to packet dropped at link level if its size is greater than new MTU value. To fix this issue, we also apply this new MTU value for node level. Fixes: 682cd3cf946b6 ("tipc: confgiure and apply UDP bearer MTU on running links") Acked-by: Jon Maloy Signed-off-by: Hoang Le Link: https://lore.kernel.org/r/20201130025544.3602-1-hoang.h.le@dektech.com.au Signed-off-by: Jakub Kicinski --- net/tipc/node.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/node.c b/net/tipc/node.c index d269ebe382e1..c95d037fde51 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2182,6 +2182,8 @@ void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, else if (prop == TIPC_NLA_PROP_MTU) tipc_link_set_mtu(e->link, b->mtu); } + /* Update MTU for node link entry */ + e->mtu = tipc_link_mss(e->link); tipc_node_write_unlock(n); tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL); } From 2867e1eac61016f59b3d730e3f7aa488e186e917 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 30 Nov 2020 19:37:05 +0100 Subject: [PATCH 138/167] inet_ecn: Fix endianness of checksum update when setting ECT(1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When adding support for propagating ECT(1) marking in IP headers it seems I suffered from endianness-confusion in the checksum update calculation: In fact the ECN field is in the *lower* bits of the first 16-bit word of the IP header when calculating in network byte order. This means that the addition performed to update the checksum field was wrong; let's fix that. Fixes: b723748750ec ("tunnel: Propagate ECT(1) when decapsulating as recommended by RFC6040") Reported-by: Jonathan Morton Tested-by: Pete Heist Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20201130183705.17540-1-toke@redhat.com Signed-off-by: Jakub Kicinski --- include/net/inet_ecn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index e1eaf1780288..563457fec557 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -107,7 +107,7 @@ static inline int IP_ECN_set_ect1(struct iphdr *iph) if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; - check += (__force u16)htons(0x100); + check += (__force u16)htons(0x1); iph->check = (__force __sum16)(check + (check>=0xFFFF)); iph->tos ^= INET_ECN_MASK; From 4179b00c04d18ea7013f68d578d80f3c9d13150a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Dec 2020 01:05:07 -0800 Subject: [PATCH 139/167] geneve: pull IP header before ECN decapsulation IP_ECN_decapsulate() and IP6_ECN_decapsulate() assume IP header is already pulled. geneve does not ensure this yet. Fixing this generically in IP_ECN_decapsulate() and IP6_ECN_decapsulate() is not possible, since callers pass a pointer that might be freed by pskb_may_pull() syzbot reported : BUG: KMSAN: uninit-value in __INET_ECN_decapsulate include/net/inet_ecn.h:238 [inline] BUG: KMSAN: uninit-value in INET_ECN_decapsulate+0x345/0x1db0 include/net/inet_ecn.h:260 CPU: 1 PID: 8941 Comm: syz-executor.0 Not tainted 5.10.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x21c/0x280 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x5f/0xa0 mm/kmsan/kmsan_instr.c:197 __INET_ECN_decapsulate include/net/inet_ecn.h:238 [inline] INET_ECN_decapsulate+0x345/0x1db0 include/net/inet_ecn.h:260 geneve_rx+0x2103/0x2980 include/net/inet_ecn.h:306 geneve_udp_encap_recv+0x105c/0x1340 drivers/net/geneve.c:377 udp_queue_rcv_one_skb+0x193a/0x1af0 net/ipv4/udp.c:2093 udp_queue_rcv_skb+0x282/0x1050 net/ipv4/udp.c:2167 udp_unicast_rcv_skb net/ipv4/udp.c:2325 [inline] __udp4_lib_rcv+0x399d/0x5880 net/ipv4/udp.c:2394 udp_rcv+0x5c/0x70 net/ipv4/udp.c:2564 ip_protocol_deliver_rcu+0x572/0xc50 net/ipv4/ip_input.c:204 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_local_deliver+0x583/0x8d0 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:449 [inline] ip_rcv_finish net/ipv4/ip_input.c:428 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_rcv+0x5c3/0x840 net/ipv4/ip_input.c:539 __netif_receive_skb_one_core net/core/dev.c:5315 [inline] __netif_receive_skb+0x1ec/0x640 net/core/dev.c:5429 process_backlog+0x523/0xc10 net/core/dev.c:6319 napi_poll+0x420/0x1010 net/core/dev.c:6763 net_rx_action+0x35c/0xd40 net/core/dev.c:6833 __do_softirq+0x1a9/0x6fa kernel/softirq.c:298 asm_call_irq_on_stack+0xf/0x20 __run_on_irqstack arch/x86/include/asm/irq_stack.h:26 [inline] run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:77 [inline] do_softirq_own_stack+0x6e/0x90 arch/x86/kernel/irq_64.c:77 do_softirq kernel/softirq.c:343 [inline] __local_bh_enable_ip+0x184/0x1d0 kernel/softirq.c:195 local_bh_enable+0x36/0x40 include/linux/bottom_half.h:32 rcu_read_unlock_bh include/linux/rcupdate.h:730 [inline] __dev_queue_xmit+0x3a9b/0x4520 net/core/dev.c:4167 dev_queue_xmit+0x4b/0x60 net/core/dev.c:4173 packet_snd net/packet/af_packet.c:2992 [inline] packet_sendmsg+0x86f9/0x99d0 net/packet/af_packet.c:3017 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] __sys_sendto+0x9dc/0xc80 net/socket.c:1992 __do_sys_sendto net/socket.c:2004 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:2000 __x64_sys_sendto+0x6e/0x90 net/socket.c:2000 do_syscall_64+0x9f/0x140 arch/x86/entry/common.c:48 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20201201090507.4137906-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/geneve.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 1426bfc009bc..8ae9ce2014a4 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -257,11 +257,21 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, skb_dst_set(skb, &tun_dst->dst); /* Ignore packet loops (and multicast echo) */ - if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) { - geneve->dev->stats.rx_errors++; - goto drop; - } + if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) + goto rx_error; + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): + if (pskb_may_pull(skb, sizeof(struct iphdr))) + goto rx_error; + break; + case htons(ETH_P_IPV6): + if (pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto rx_error; + break; + default: + goto rx_error; + } oiph = skb_network_header(skb); skb_reset_network_header(skb); @@ -298,6 +308,8 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, dev_sw_netstats_rx_add(geneve->dev, len); return; +rx_error: + geneve->dev->stats.rx_errors++; drop: /* Consume bad packet */ kfree_skb(skb); From 98701a2a861fa87a5055cf2809758e8725e8b146 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 28 Nov 2020 13:39:05 -0800 Subject: [PATCH 140/167] vdpa: mlx5: fix vdpa/vhost dependencies drivers/vdpa/mlx5/ uses vhost_iotlb*() interfaces, so select VHOST_IOTLB to make them be built. However, if VHOST_IOTLB is the only VHOST symbol that is set/enabled, the object file still won't be built because drivers/Makefile won't descend into drivers/vhost/ to build it, so make drivers/Makefile build the needed binary whenever VHOST_IOTLB is set, like it does for VHOST_RING. Fixes these build errors: ERROR: modpost: "vhost_iotlb_itree_next" [drivers/vdpa/mlx5/mlx5_vdpa.ko] undefined! ERROR: modpost: "vhost_iotlb_itree_first" [drivers/vdpa/mlx5/mlx5_vdpa.ko] undefined! Fixes: 29064bfdabd5 ("vdpa/mlx5: Add support library for mlx5 VDPA implementation") Fixes: aff90770e54c ("vdpa/mlx5: Fix dependency on MLX5_CORE") Reported-by: kernel test robot Signed-off-by: Randy Dunlap Cc: Eli Cohen Cc: Parav Pandit Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: virtualization@lists.linux-foundation.org Cc: Saeed Mahameed Cc: Leon Romanovsky Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/20201128213905.27409-1-rdunlap@infradead.org Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/Makefile | 1 + drivers/vdpa/Kconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/Makefile b/drivers/Makefile index c0cd1b9075e3..576228037718 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -145,6 +145,7 @@ obj-$(CONFIG_OF) += of/ obj-$(CONFIG_SSB) += ssb/ obj-$(CONFIG_BCMA) += bcma/ obj-$(CONFIG_VHOST_RING) += vhost/ +obj-$(CONFIG_VHOST_IOTLB) += vhost/ obj-$(CONFIG_VHOST) += vhost/ obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_GREYBUS) += greybus/ diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 358f6048dd3c..6caf539091e5 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -32,6 +32,7 @@ config IFCVF config MLX5_VDPA bool + select VHOST_IOTLB help Support library for Mellanox VDPA drivers. Provides code that is common for all types of VDPA drivers. The following drivers are planned: From 2c602741b51daa12f8457f222ce9ce9c4825d067 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 2 Dec 2020 09:44:43 +0300 Subject: [PATCH 141/167] vhost_vdpa: return -EFAULT if copy_to_user() fails The copy_to_user() function returns the number of bytes remaining to be copied but this should return -EFAULT to the user. Fixes: 1b48dc03e575 ("vhost: vdpa: report iova range") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/X8c32z5EtDsMyyIL@mwanda Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- drivers/vhost/vdpa.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index f2db99031e2f..29ed4173f04e 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -348,7 +348,9 @@ static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) .last = v->range.last, }; - return copy_to_user(argp, &range, sizeof(range)); + if (copy_to_user(argp, &range, sizeof(range))) + return -EFAULT; + return 0; } static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, From a2bd4097b3ec242f4de4924db463a9c94530e03a Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 26 Nov 2020 18:00:37 +0100 Subject: [PATCH 142/167] s390/pci: fix CPU address in MSI for directed IRQ The directed MSIs are delivered to CPUs whose address is written to the MSI message address. The current code assumes that a CPU logical number (as it is seen by the kernel) is also the CPU address. The above assumption is not correct, as the CPU address is rather the value returned by STAP instruction. That value does not necessarily match the kernel logical CPU number. Fixes: e979ce7bced2 ("s390/pci: provide support for CPU directed interrupts") Cc: # v5.2+ Signed-off-by: Alexander Gordeev Reviewed-by: Halil Pasic Reviewed-by: Niklas Schnelle Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_irq.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 743f257cf2cb..75217fb63d7b 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -103,9 +103,10 @@ static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *de { struct msi_desc *entry = irq_get_msi_desc(data->irq); struct msi_msg msg = entry->msg; + int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest)); msg.address_lo &= 0xff0000ff; - msg.address_lo |= (cpumask_first(dest) << 8); + msg.address_lo |= (cpu_addr << 8); pci_write_msi_msg(data->irq, &msg); return IRQ_SET_MASK_OK; @@ -238,6 +239,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) unsigned long bit; struct msi_desc *msi; struct msi_msg msg; + int cpu_addr; int rc, irq; zdev->aisb = -1UL; @@ -287,9 +289,15 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) handle_percpu_irq); msg.data = hwirq - bit; if (irq_delivery == DIRECTED) { + if (msi->affinity) + cpu = cpumask_first(&msi->affinity->mask); + else + cpu = 0; + cpu_addr = smp_cpu_get_cpu_address(cpu); + msg.address_lo = zdev->msi_addr & 0xff0000ff; - msg.address_lo |= msi->affinity ? - (cpumask_first(&msi->affinity->mask) << 8) : 0; + msg.address_lo |= (cpu_addr << 8); + for_each_possible_cpu(cpu) { airq_iv_set_data(zpci_ibv[cpu], hwirq, irq); } From b1cae1f84a0f609a34ebcaa087fbecef32f69882 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 2 Dec 2020 11:46:01 +0100 Subject: [PATCH 143/167] s390: fix irq state tracing With commit 58c644ba512c ("sched/idle: Fix arch_cpu_idle() vs tracing") common code calls arch_cpu_idle() with a lockdep state that tells irqs are on. This doesn't work very well for s390: psw_idle() will enable interrupts to wait for an interrupt. As soon as an interrupt occurs the interrupt handler will verify if the old context was psw_idle(). If that is the case the interrupt enablement bits in the old program status word will be cleared. A subsequent test in both the external as well as the io interrupt handler checks if in the old context interrupts were enabled. Due to the above patching of the old program status word it is assumed the old context had interrupts disabled, and therefore a call to TRACE_IRQS_OFF (aka trace_hardirqs_off_caller) is skipped. Which in turn makes lockdep incorrectly "think" that interrupts are enabled within the interrupt handler. Fix this by unconditionally calling TRACE_IRQS_OFF when entering interrupt handlers. Also call unconditionally TRACE_IRQS_ON when leaving interrupts handlers. This leaves the special psw_idle() case, which now returns with interrupts disabled, but has an "irqs on" lockdep state. So callers of psw_idle() must adjust the state on their own, if required. This is currently only __udelay_disabled(). Fixes: 58c644ba512c ("sched/idle: Fix arch_cpu_idle() vs tracing") Acked-by: Peter Zijlstra (Intel) Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 15 --------------- arch/s390/lib/delay.c | 5 ++--- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 26bb0603c5a1..92beb1444644 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -763,12 +763,7 @@ ENTRY(io_int_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore -#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) - tmhh %r8,0x300 - jz 1f TRACE_IRQS_OFF -1: -#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) .Lio_loop: lgr %r2,%r11 # pass pointer to pt_regs @@ -791,12 +786,7 @@ ENTRY(io_int_handler) TSTMSK __LC_CPU_FLAGS,_CIF_WORK jnz .Lio_work .Lio_restore: -#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) - tm __PT_PSW(%r11),3 - jno 0f TRACE_IRQS_ON -0: -#endif mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tm __PT_PSW+1(%r11),0x01 # returning to user ? jno .Lio_exit_kernel @@ -976,12 +966,7 @@ ENTRY(ext_int_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore -#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) - tmhh %r8,0x300 - jz 1f TRACE_IRQS_OFF -1: -#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs lghi %r3,EXT_INTERRUPT diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index daca7bad66de..8c0c68e7770e 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(__delay); static void __udelay_disabled(unsigned long long usecs) { - unsigned long cr0, cr0_new, psw_mask, flags; + unsigned long cr0, cr0_new, psw_mask; struct s390_idle_data idle; u64 end; @@ -45,9 +45,8 @@ static void __udelay_disabled(unsigned long long usecs) psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT; set_clock_comparator(end); set_cpu_flag(CIF_IGNORE_IRQ); - local_irq_save(flags); psw_idle(&idle, psw_mask); - local_irq_restore(flags); + trace_hardirqs_off(); clear_cpu_flag(CIF_IGNORE_IRQ); set_clock_comparator(S390_lowcore.clock_comparator); __ctl_load(cr0, 0, 0); From abfccc3af786bb33210e39638268ea3a7bf80e63 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 29 Nov 2020 15:14:38 +0200 Subject: [PATCH 144/167] iwlwifi: update MAINTAINERS entry Reflect the fact that the linuxwifi@intel.com address will disappear, and that neither Emmanuel nor myself are really much involved with the maintenance these days. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20201129151117.a25afe6d2c7f.I8f13a5689dd353825fb2b9bd5b6f0fbce92cb12b@changeid --- MAINTAINERS | 3 --- 1 file changed, 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 900ff33789eb..60dd9fe60fe0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9097,10 +9097,7 @@ S: Supported F: drivers/net/wireless/intel/iwlegacy/ INTEL WIRELESS WIFI LINK (iwlwifi) -M: Johannes Berg -M: Emmanuel Grumbach M: Luca Coelho -M: Intel Linux Wireless L: linux-wireless@vger.kernel.org S: Supported W: https://wireless.wiki.kernel.org/en/users/drivers/iwlwifi From 5febcdef30902fa870128b9789b873199f13aff1 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 2 Dec 2020 14:41:49 +0200 Subject: [PATCH 145/167] iwlwifi: pcie: add one missing entry for AX210 The 0x0024 subsytem device ID was missing from the list, so some AX210 devices were not recognized. Add it. Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20201202143859.308eab4db42c.I3763196cd3f7bb36f3dcabf02ec4e7c4fe859c0f@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 129021f26791..466a967da39c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -536,6 +536,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2725, 0x0090, iwlax211_2ax_cfg_so_gf_a0)}, {IWL_PCI_DEVICE(0x2725, 0x0020, iwlax210_2ax_cfg_ty_gf_a0)}, + {IWL_PCI_DEVICE(0x2725, 0x0024, iwlax210_2ax_cfg_ty_gf_a0)}, {IWL_PCI_DEVICE(0x2725, 0x0310, iwlax210_2ax_cfg_ty_gf_a0)}, {IWL_PCI_DEVICE(0x2725, 0x0510, iwlax210_2ax_cfg_ty_gf_a0)}, {IWL_PCI_DEVICE(0x2725, 0x0A10, iwlax210_2ax_cfg_ty_gf_a0)}, From 568d3434178b00274615190a19d29c3d235b4e6d Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 2 Dec 2020 14:41:50 +0200 Subject: [PATCH 146/167] iwlwifi: pcie: invert values of NO_160 device config entries The NO_160 flag specifies if the device doesn't have 160 MHz support, but we errorneously assumed the opposite. If the flag was set, we were considering that 160 MHz was supported, but it's actually the opposite. Fix it by inverting the bits, i.e. NO_160 is 0x1 and 160 is 0x0. Fixes: d6f2134a3831 ("iwlwifi: add mac/rf types and 160MHz to the device tables") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20201202143859.375bec857ccb.I83884286b688965293e9810381808039bd7eedae@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-config.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index ca4967b81d01..580b07a43856 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -491,8 +491,8 @@ struct iwl_cfg { #define IWL_CFG_RF_ID_HR 0x7 #define IWL_CFG_RF_ID_HR1 0x4 -#define IWL_CFG_NO_160 0x0 -#define IWL_CFG_160 0x1 +#define IWL_CFG_NO_160 0x1 +#define IWL_CFG_160 0x0 #define IWL_CFG_CORES_BT 0x0 #define IWL_CFG_CORES_BT_GNSS 0x5 From 9b15596c5006d82b2f82810e8cbf80d8c6e7e7b4 Mon Sep 17 00:00:00 2001 From: Golan Ben Ami Date: Wed, 2 Dec 2020 14:41:51 +0200 Subject: [PATCH 147/167] iwlwifi: pcie: add some missing entries for AX210 Some subsytem device IDs were missing from the list, so some AX210 devices were not recognized. Add them. Signed-off-by: Golan Ben Ami Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20201202143859.a06ba7540449.I7390305d088a49c1043c9b489154fe057989c18f@changeid Link: https://lore.kernel.org/r/20201121003411.9450-1-ikegami.t@gmail.com --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 466a967da39c..7b5ece380fbf 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -540,6 +540,11 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2725, 0x0310, iwlax210_2ax_cfg_ty_gf_a0)}, {IWL_PCI_DEVICE(0x2725, 0x0510, iwlax210_2ax_cfg_ty_gf_a0)}, {IWL_PCI_DEVICE(0x2725, 0x0A10, iwlax210_2ax_cfg_ty_gf_a0)}, + {IWL_PCI_DEVICE(0x2725, 0xE020, iwlax210_2ax_cfg_ty_gf_a0)}, + {IWL_PCI_DEVICE(0x2725, 0xE024, iwlax210_2ax_cfg_ty_gf_a0)}, + {IWL_PCI_DEVICE(0x2725, 0x4020, iwlax210_2ax_cfg_ty_gf_a0)}, + {IWL_PCI_DEVICE(0x2725, 0x6020, iwlax210_2ax_cfg_ty_gf_a0)}, + {IWL_PCI_DEVICE(0x2725, 0x6024, iwlax210_2ax_cfg_ty_gf_a0)}, {IWL_PCI_DEVICE(0x2725, 0x00B0, iwlax411_2ax_cfg_sosnj_gf4_a0)}, {IWL_PCI_DEVICE(0x2726, 0x0070, iwlax201_cfg_snj_hr_b0)}, {IWL_PCI_DEVICE(0x2726, 0x0074, iwlax201_cfg_snj_hr_b0)}, From 807982017730cfe853fce49ba26d453e31c84898 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 26 Nov 2020 13:55:20 +0100 Subject: [PATCH 148/167] mt76: usb: fix crash on device removal Currently 'while (q->queued > 0)' loop was removed from mt76u_stop_tx() code. This causes crash on device removal as we try to cleanup empty queue: [ 96.495571] kernel BUG at include/linux/skbuff.h:2297! [ 96.498983] invalid opcode: 0000 [#1] SMP PTI [ 96.501162] CPU: 3 PID: 27 Comm: kworker/3:0 Not tainted 5.10.0-rc5+ #11 [ 96.502754] Hardware name: LENOVO 20DGS08H00/20DGS08H00, BIOS J5ET48WW (1.19 ) 08/27/2015 [ 96.504378] Workqueue: usb_hub_wq hub_event [ 96.505983] RIP: 0010:skb_pull+0x2d/0x30 [ 96.507576] Code: 00 00 8b 47 70 39 c6 77 1e 29 f0 89 47 70 3b 47 74 72 17 48 8b 87 c8 00 00 00 89 f6 48 01 f0 48 89 87 c8 00 00 00 c3 31 c0 c3 <0f> 0b 90 0f 1f 44 00 00 53 48 89 fb 48 8b bf c8 00 00 00 8b 43 70 [ 96.509296] RSP: 0018:ffffb11b801639b8 EFLAGS: 00010287 [ 96.511038] RAX: 000000001c6939ed RBX: ffffb11b801639f8 RCX: 0000000000000000 [ 96.512964] RDX: ffffb11b801639f8 RSI: 0000000000000018 RDI: ffff90c64e4fb800 [ 96.514710] RBP: ffff90c654551ee0 R08: ffff90c652bce7a8 R09: ffffb11b80163728 [ 96.516450] R10: 0000000000000001 R11: 0000000000000001 R12: ffff90c64e4fb800 [ 96.519749] R13: 0000000000000010 R14: 0000000000000020 R15: ffff90c64e352ce8 [ 96.523455] FS: 0000000000000000(0000) GS:ffff90c96eec0000(0000) knlGS:0000000000000000 [ 96.527171] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 96.530900] CR2: 0000242556f18288 CR3: 0000000146a10002 CR4: 00000000003706e0 [ 96.534678] Call Trace: [ 96.538418] mt76x02u_tx_complete_skb+0x1f/0x50 [mt76x02_usb] [ 96.542231] mt76_queue_tx_complete+0x23/0x50 [mt76] [ 96.546028] mt76u_stop_tx.cold+0x71/0xa2 [mt76_usb] [ 96.549797] mt76x0u_stop+0x2f/0x90 [mt76x0u] [ 96.553638] drv_stop+0x33/0xd0 [mac80211] [ 96.557449] ieee80211_do_stop+0x558/0x860 [mac80211] [ 96.561262] ? dev_deactivate_many+0x298/0x2d0 [ 96.565101] ieee80211_stop+0x16/0x20 [mac80211] Fix that by adding while loop again. We need loop, not just single check, to clean all pending entries. Additionally move mt76_worker_disable/enable after !mt76_has_tx_pending() as we want to tx_worker to run to process tx queues, while we wait for exactly that. I was a bit worried about accessing q->queued without lock, but mt76_worker_disable() -> kthread_park() should assure this value will be seen updated on other cpus. Fixes: fe5b5ab52e9d ("mt76: unify queue tx cleanup code") Signed-off-by: Stanislaw Gruszka Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201126125520.72912-1-stf_xl@wp.pl --- drivers/net/wireless/mediatek/mt76/usb.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index 7d3f0a2e5fa0..f1ae9ff835b2 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -1020,8 +1020,6 @@ void mt76u_stop_tx(struct mt76_dev *dev) { int ret; - mt76_worker_disable(&dev->tx_worker); - ret = wait_event_timeout(dev->tx_wait, !mt76_has_tx_pending(&dev->phy), HZ / 5); if (!ret) { @@ -1040,6 +1038,8 @@ void mt76u_stop_tx(struct mt76_dev *dev) usb_kill_urb(q->entry[j].urb); } + mt76_worker_disable(&dev->tx_worker); + /* On device removal we maight queue skb's, but mt76u_tx_kick() * will fail to submit urb, cleanup those skb's manually. */ @@ -1048,18 +1048,19 @@ void mt76u_stop_tx(struct mt76_dev *dev) if (!q) continue; - entry = q->entry[q->tail]; - q->entry[q->tail].done = false; - - mt76_queue_tx_complete(dev, q, &entry); + while (q->queued > 0) { + entry = q->entry[q->tail]; + q->entry[q->tail].done = false; + mt76_queue_tx_complete(dev, q, &entry); + } } + + mt76_worker_enable(&dev->tx_worker); } cancel_work_sync(&dev->usb.stat_work); clear_bit(MT76_READING_STATS, &dev->phy.state); - mt76_worker_enable(&dev->tx_worker); - mt76_tx_status_check(dev, NULL, true); } EXPORT_SYMBOL_GPL(mt76u_stop_tx); From 832ba596494b2c9eac7760259eff2d8b7dcad0ee Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 30 Nov 2020 17:19:11 +0100 Subject: [PATCH 149/167] net: ip6_gre: set dev->hard_header_len when using header_ops syzkaller managed to crash the kernel using an NBMA ip6gre interface. I could reproduce it creating an NBMA ip6gre interface and forwarding traffic to it: skbuff: skb_under_panic: text:ffffffff8250e927 len:148 put:44 head:ffff8c03c7a33 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:109! Call Trace: skb_push+0x10/0x10 ip6gre_header+0x47/0x1b0 neigh_connected_output+0xae/0xf0 ip6gre tunnel provides its own header_ops->create, and sets it conditionally when initializing the tunnel in NBMA mode. When header_ops->create is used, dev->hard_header_len should reflect the length of the header created. Otherwise, when not used, dev->needed_headroom should be used. Fixes: eb95f52fc72d ("net: ipv6_gre: Fix GRO to work on IPv6 over GRE tap") Cc: Maria Pasechnik Signed-off-by: Antoine Tenart Link: https://lore.kernel.org/r/20201130161911.464106-1-atenart@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_gre.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 931b186d2e48..cf6e1380b527 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1133,8 +1133,13 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, return; if (rt->dst.dev) { - dev->needed_headroom = rt->dst.dev->hard_header_len + - t_hlen; + unsigned short dst_len = rt->dst.dev->hard_header_len + + t_hlen; + + if (t->dev->header_ops) + dev->hard_header_len = dst_len; + else + dev->needed_headroom = dst_len; if (set_mtu) { dev->mtu = rt->dst.dev->mtu - t_hlen; @@ -1159,7 +1164,12 @@ static int ip6gre_calc_hlen(struct ip6_tnl *tunnel) tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); - tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; + + if (tunnel->dev->header_ops) + tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; + else + tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; + return t_hlen; } From 07500a6085806d97039ebcba8d9b8b29129f0106 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 1 Dec 2020 15:52:58 +0800 Subject: [PATCH 150/167] dpaa_eth: copy timestamp fields to new skb in A-050385 workaround The timestamp fields should be copied to new skb too in A-050385 workaround for later TX timestamping handling. Fixes: 3c68b8fffb48 ("dpaa_eth: FMan erratum A050385 workaround") Signed-off-by: Yangbo Lu Acked-by: Camelia Groza Link: https://lore.kernel.org/r/20201201075258.1875-1-yangbo.lu@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index d9c285948fc2..cb7c028b1bf5 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2120,6 +2120,15 @@ workaround: skb_copy_header(new_skb, skb); new_skb->dev = skb->dev; + /* Copy relevant timestamp info from the old skb to the new */ + if (priv->tx_tstamp) { + skb_shinfo(new_skb)->tx_flags = skb_shinfo(skb)->tx_flags; + skb_shinfo(new_skb)->hwtstamps = skb_shinfo(skb)->hwtstamps; + skb_shinfo(new_skb)->tskey = skb_shinfo(skb)->tskey; + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); + } + /* We move the headroom when we align it so we have to reset the * network and transport header offsets relative to the new data * pointer. The checksum offload relies on these offsets. @@ -2127,7 +2136,6 @@ workaround: skb_set_network_header(new_skb, skb_network_offset(skb)); skb_set_transport_header(new_skb, skb_transport_offset(skb)); - /* TODO: does timestamping need the result in the old skb? */ dev_kfree_skb(skb); *s = new_skb; From 6ee50c8e262a0f0693dad264c3c99e30e6442a56 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 1 Dec 2020 18:15:12 +0300 Subject: [PATCH 151/167] net/x25: prevent a couple of overflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .x25_addr[] address comes from the user and is not necessarily NUL terminated. This leads to a couple problems. The first problem is that the strlen() in x25_bind() can read beyond the end of the buffer. The second problem is more subtle and could result in memory corruption. The call tree is: x25_connect() --> x25_write_internal() --> x25_addr_aton() The .x25_addr[] buffers are copied to the "addresses" buffer from x25_write_internal() so it will lead to stack corruption. Verify that the strings are NUL terminated and return -EINVAL if they are not. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Fixes: a9288525d2ae ("X25: Dont let x25_bind use addresses containing characters") Reported-by: "kiyin(尹亮)" Signed-off-by: Dan Carpenter Acked-by: Martin Schiller Link: https://lore.kernel.org/r/X8ZeAKm8FnFpN//B@mwanda Signed-off-by: Jakub Kicinski --- net/x25/af_x25.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index a10487e7574c..e65a50192432 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -681,7 +681,8 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) int len, i, rc = 0; if (addr_len != sizeof(struct sockaddr_x25) || - addr->sx25_family != AF_X25) { + addr->sx25_family != AF_X25 || + strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN) { rc = -EINVAL; goto out; } @@ -775,7 +776,8 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, rc = -EINVAL; if (addr_len != sizeof(struct sockaddr_x25) || - addr->sx25_family != AF_X25) + addr->sx25_family != AF_X25 || + strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN) goto out; rc = -ENETUNREACH; From ff9924897f8bfed82e61894b373ab9d2dfea5b10 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Wed, 2 Dec 2020 17:56:05 +0800 Subject: [PATCH 152/167] cxgb3: fix error return code in t3_sge_alloc_qset() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: b1fb1f280d09 ("cxgb3 - Fix dma mapping error path") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Acked-by: Raju Rangoju Link: https://lore.kernel.org/r/1606902965-1646-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb3/sge.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index e18e9ce27f94..1cc3c51eff71 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -3175,6 +3175,7 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, GFP_KERNEL | __GFP_COMP); if (!avail) { CH_ALERT(adapter, "free list queue 0 initialization failed\n"); + ret = -ENOMEM; goto err; } if (avail < q->fl[0].size) From aba84871bd4f52c4dfcf3ad5d4501a6c9d2de90e Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Wed, 2 Dec 2020 17:57:15 +0800 Subject: [PATCH 153/167] net: pasemi: fix error return code in pasemi_mac_open() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 72b05b9940f0 ("pasemi_mac: RX/TX ring management cleanup") Fixes: 8d636d8bc5ff ("pasemi_mac: jumbo frame support") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1606903035-1838-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pasemi/pasemi_mac.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index be6660128b55..040a15a828b4 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1078,16 +1078,20 @@ static int pasemi_mac_open(struct net_device *dev) mac->tx = pasemi_mac_setup_tx_resources(dev); - if (!mac->tx) + if (!mac->tx) { + ret = -ENOMEM; goto out_tx_ring; + } /* We might already have allocated rings in case mtu was changed * before interface was brought up. */ if (dev->mtu > 1500 && !mac->num_cs) { pasemi_mac_setup_csrings(mac); - if (!mac->num_cs) + if (!mac->num_cs) { + ret = -ENOMEM; goto out_tx_ring; + } } /* Zero out rmon counters */ From 832e09798c261cf58de3a68cfcc6556408c16a5a Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Wed, 2 Dec 2020 17:58:42 +0800 Subject: [PATCH 154/167] vxlan: fix error return code in __vxlan_dev_create() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 0ce1822c2a08 ("vxlan: add adjacent link to limit depth level") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1606903122-2098-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 032f78261913..977f77e2c2ce 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3880,8 +3880,10 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, if (dst->remote_ifindex) { remote_dev = __dev_get_by_index(net, dst->remote_ifindex); - if (!remote_dev) + if (!remote_dev) { + err = -ENODEV; goto errout; + } err = netdev_upper_dev_link(remote_dev, dev, extack); if (err) From 74a8c816fa8fa7862df870660e9821abb56649fe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 3 Dec 2020 11:43:37 +0300 Subject: [PATCH 155/167] rtw88: debug: Fix uninitialized memory in debugfs code This code does not ensure that the whole buffer is initialized and none of the callers check for errors so potentially none of the buffer is initialized. Add a memset to eliminate this bug. Fixes: e3037485c68e ("rtw88: new Realtek 802.11ac driver") Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/X8ilOfVz3pf0T5ec@mwanda --- drivers/net/wireless/realtek/rtw88/debug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/debug.c b/drivers/net/wireless/realtek/rtw88/debug.c index 3852c4f0ac0b..efbba9caef3b 100644 --- a/drivers/net/wireless/realtek/rtw88/debug.c +++ b/drivers/net/wireless/realtek/rtw88/debug.c @@ -147,6 +147,8 @@ static int rtw_debugfs_copy_from_user(char tmp[], int size, { int tmp_len; + memset(tmp, 0, size); + if (count < num) return -EFAULT; From 4f134b89a24b965991e7c345b9a4591821f7c2a6 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 30 Nov 2020 08:36:48 +0100 Subject: [PATCH 156/167] lib/syscall: fix syscall registers retrieval on 32-bit platforms Lilith >_> and Claudio Bozzato of Cisco Talos security team reported that collect_syscall() improperly casts the syscall registers to 64-bit values leaking the uninitialized last 24 bytes on 32-bit platforms, that are visible in /proc/self/syscall. The cause is that info->data.args are u64 while syscall_get_arguments() uses longs, as hinted by the bogus pointer cast in the function. Let's just proceed like the other call places, by retrieving the registers into an array of longs before assigning them to the caller's array. This was successfully tested on x86_64, i386 and ppc32. Reference: CVE-2020-28588, TALOS-2020-1211 Fixes: 631b7abacd02 ("ptrace: Remove maxargs from task_current_syscall()") Cc: Greg KH Reviewed-by: Kees Cook Tested-by: Michael Ellerman (ppc32) Signed-off-by: Willy Tarreau Reviewed-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- lib/syscall.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/syscall.c b/lib/syscall.c index 8533d2fea2d7..ba13e924c430 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -7,6 +7,7 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info) { + unsigned long args[6] = { }; struct pt_regs *regs; if (!try_get_task_stack(target)) { @@ -27,8 +28,14 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info info->data.nr = syscall_get_nr(target, regs); if (info->data.nr != -1L) - syscall_get_arguments(target, regs, - (unsigned long *)&info->data.args[0]); + syscall_get_arguments(target, regs, args); + + info->data.args[0] = args[0]; + info->data.args[1] = args[1]; + info->data.args[2] = args[2]; + info->data.args[3] = args[3]; + info->data.args[4] = args[4]; + info->data.args[5] = args[5]; put_task_stack(target); return 0; From 062c9cdf60a1e581b1002d372f1cf8e745fe3c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 3 Dec 2020 09:41:42 +0100 Subject: [PATCH 157/167] pwm: sl28cpld: fix getting driver data in pwm callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently .get_state() and .apply() use dev_get_drvdata() on the struct device related to the pwm chip. This only works after .probe() called platform_set_drvdata() which in this driver happens only after pwmchip_add() and so comes possibly too late. Instead of setting the driver data earlier use the traditional container_of approach as this way the driver data is conceptually and computational nearer. Fixes: 9db33d221efc ("pwm: Add support for sl28cpld PWM controller") Tested-by: Michael Walle Signed-off-by: Uwe Kleine-König Signed-off-by: Linus Torvalds --- drivers/pwm/pwm-sl28cpld.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-sl28cpld.c b/drivers/pwm/pwm-sl28cpld.c index 5046b6b7fd35..b4c651fc749c 100644 --- a/drivers/pwm/pwm-sl28cpld.c +++ b/drivers/pwm/pwm-sl28cpld.c @@ -84,12 +84,14 @@ struct sl28cpld_pwm { struct regmap *regmap; u32 offset; }; +#define sl28cpld_pwm_from_chip(_chip) \ + container_of(_chip, struct sl28cpld_pwm, pwm_chip) static void sl28cpld_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state) { - struct sl28cpld_pwm *priv = dev_get_drvdata(chip->dev); + struct sl28cpld_pwm *priv = sl28cpld_pwm_from_chip(chip); unsigned int reg; int prescaler; @@ -118,7 +120,7 @@ static void sl28cpld_pwm_get_state(struct pwm_chip *chip, static int sl28cpld_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { - struct sl28cpld_pwm *priv = dev_get_drvdata(chip->dev); + struct sl28cpld_pwm *priv = sl28cpld_pwm_from_chip(chip); unsigned int cycle, prescaler; bool write_duty_cycle_first; int ret; From 72d1249e2ffdbc344e465031ec5335fa3489d62e Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 1 Dec 2020 17:21:40 -0600 Subject: [PATCH 158/167] uapi: fix statx attribute value overlap for DAX & MOUNT_ROOT STATX_ATTR_MOUNT_ROOT and STATX_ATTR_DAX got merged with the same value, so one of them needs fixing. Move STATX_ATTR_DAX. While we're in here, clarify the value-matching scheme for some of the attributes, and explain why the value for DAX does not match. Fixes: 80340fe3605c ("statx: add mount_root") Fixes: 712b2698e4c0 ("fs/stat: Define DAX statx attribute") Link: https://lore.kernel.org/linux-fsdevel/7027520f-7c79-087e-1d00-743bdefa1a1e@redhat.com/ Link: https://lore.kernel.org/lkml/20201202214629.1563760-1-ira.weiny@intel.com/ Reported-by: David Howells Signed-off-by: Eric Sandeen Reviewed-by: David Howells Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Ira Weiny Cc: # 5.8 Signed-off-by: Linus Torvalds --- include/uapi/linux/stat.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 82cc58fe9368..1500a0f58041 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -171,9 +171,12 @@ struct statx { * be of use to ordinary userspace programs such as GUIs or ls rather than * specialised tools. * - * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS + * Note that the flags marked [I] correspond to the FS_IOC_SETFLAGS flags * semantically. Where possible, the numerical value is picked to correspond - * also. + * also. Note that the DAX attribute indicates that the file is in the CPU + * direct access state. It does not correspond to the per-inode flag that + * some filesystems support. + * */ #define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */ #define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */ @@ -183,7 +186,7 @@ struct statx { #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ -#define STATX_ATTR_DAX 0x00002000 /* [I] File is DAX */ +#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ #endif /* _UAPI_LINUX_STAT_H */ From 391119fb5c5c4bdb4d57c7ffeb5e8d18560783d1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 3 Dec 2020 11:44:31 +0300 Subject: [PATCH 159/167] chelsio/chtls: fix a double free in chtls_setkey() The "skb" is freed by the transmit code in cxgb4_ofld_send() and we shouldn't use it again. But in the current code, if we hit an error later on in the function then the clean up code will call kfree_skb(skb) and so it causes a double free. Set the "skb" to NULL and that makes the kfree_skb() a no-op. Fixes: d25f2f71f653 ("crypto: chtls - Program the TLS session Key") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/X8ilb6PtBRLWiSHp@mwanda Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c index 62c829023da5..a4fb463af22a 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c @@ -391,6 +391,7 @@ int chtls_setkey(struct chtls_sock *csk, u32 keylen, csk->wr_unacked += DIV_ROUND_UP(len, 16); enqueue_wr(csk, skb); cxgb4_ofld_send(csk->egress_dev, skb); + skb = NULL; chtls_set_scmd(csk); /* Clear quiesce for Rx key */ From 82a10dc7f0960735f40e8d7d3bee56934291600f Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 3 Dec 2020 22:18:06 +0800 Subject: [PATCH 160/167] net: mvpp2: Fix error return code in mvpp2_open() Fix to return negative error code -ENOENT from invalid configuration error handling case instead of 0, as done elsewhere in this function. Fixes: 4bb043262878 ("net: mvpp2: phylink support") Reported-by: Hulk Robot Signed-off-by: Wang Hai Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20201203141806.37966-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index f6616c8933ca..cea886c5bcb5 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4426,6 +4426,7 @@ static int mvpp2_open(struct net_device *dev) if (!valid) { netdev_err(port->dev, "invalid configuration: no dt or link IRQ"); + err = -ENOENT; goto err_free_irq; } From 13de4ed9e3a9ccbe54d05f7d5c773f69ecaf6c64 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 3 Dec 2020 10:58:21 +0100 Subject: [PATCH 161/167] net: skbuff: ensure LSE is pullable before decrementing the MPLS ttl skb_mpls_dec_ttl() reads the LSE without ensuring that it is contained in the skb "linear" area. Fix this calling pskb_may_pull() before reading the current ttl. Found by code inspection. Fixes: 2a2ea50870ba ("net: sched: add mpls manipulation actions to TC") Reported-by: Marcelo Ricardo Leitner Signed-off-by: Davide Caratti Link: https://lore.kernel.org/r/53659f28be8bc336c113b5254dc637cc76bbae91.1606987074.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 06c526e0d810..e578544b2cc7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5786,6 +5786,9 @@ int skb_mpls_dec_ttl(struct sk_buff *skb) if (unlikely(!eth_p_mpls(skb->protocol))) return -EINVAL; + if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN)) + return -ENOMEM; + lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry); ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; if (!--ttl) From 43c13605bad44b8abbc9776d6e63f62ccb7a47d6 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 3 Dec 2020 10:46:06 +0100 Subject: [PATCH 162/167] net: openvswitch: ensure LSE is pullable before reading it when openvswitch is configured to mangle the LSE, the current value is read from the packet dereferencing 4 bytes at mpls_hdr(): ensure that the label is contained in the skb "linear" area. Found by code inspection. Fixes: d27cf5c59a12 ("net: core: add MPLS update core helper and use in OvS") Signed-off-by: Davide Caratti Link: https://lore.kernel.org/r/aa099f245d93218b84b5c056b67b6058ccf81a66.1606987185.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/openvswitch/actions.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 5829a020b81c..c3a664871cb5 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -199,6 +199,9 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, __be32 lse; int err; + if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN)) + return -ENOMEM; + stack = mpls_hdr(skb); lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask); err = skb_mpls_update_lse(skb, lse); From 9608fa653059c3f72faab0c148ac8773c46e7314 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 3 Dec 2020 10:37:52 +0100 Subject: [PATCH 163/167] net/sched: act_mpls: ensure LSE is pullable before reading it when 'act_mpls' is used to mangle the LSE, the current value is read from the packet dereferencing 4 bytes at mpls_hdr(): ensure that the label is contained in the skb "linear" area. Found by code inspection. v2: - use MPLS_HLEN instead of sizeof(new_lse), thanks to Jakub Kicinski Fixes: 2a2ea50870ba ("net: sched: add mpls manipulation actions to TC") Signed-off-by: Davide Caratti Acked-by: Guillaume Nault Link: https://lore.kernel.org/r/3243506cba43d14858f3bd21ee0994160e44d64a.1606987058.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/sched/act_mpls.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 5c7456e5b5cf..d1486ea496a2 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -105,6 +105,9 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, goto drop; break; case TCA_MPLS_ACT_MODIFY: + if (!pskb_may_pull(skb, + skb_network_offset(skb) + MPLS_HLEN)) + goto drop; new_lse = tcf_mpls_get_lse(mpls_hdr(skb), p, false); if (skb_mpls_update_lse(skb, new_lse)) goto drop; From 1d2bb5ad89f47d8ce8aedc70ef85059ab3870292 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 2 Dec 2020 20:39:43 -0800 Subject: [PATCH 164/167] net/mlx5: Fix wrong address reclaim when command interface is down When command interface is down, driver to reclaim all 4K page chucks that were hold by the Firmeware. Fix a bug for 64K page size systems, where driver repeatedly released only the first chunk of the page. Define helper function to fill 4K chunks for a given Firmware pages. Iterate over all unreleased Firmware pages and call the hepler per each. Fixes: 5adff6a08862 ("net/mlx5: Fix incorrect page count when in internal error") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/pagealloc.c | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 150638814517..4d7f8a357df7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -422,6 +422,24 @@ static void release_all_pages(struct mlx5_core_dev *dev, u32 func_id, npages, ec_function, func_id); } +static u32 fwp_fill_manage_pages_out(struct fw_page *fwp, u32 *out, u32 index, + u32 npages) +{ + u32 pages_set = 0; + unsigned int n; + + for_each_clear_bit(n, &fwp->bitmask, MLX5_NUM_4K_IN_PAGE) { + MLX5_ARRAY_SET64(manage_pages_out, out, pas, index + pages_set, + fwp->addr + (n * MLX5_ADAPTER_PAGE_SIZE)); + pages_set++; + + if (!--npages) + break; + } + + return pages_set; +} + static int reclaim_pages_cmd(struct mlx5_core_dev *dev, u32 *in, int in_size, u32 *out, int out_size) { @@ -448,8 +466,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, fwp = rb_entry(p, struct fw_page, rb_node); p = rb_next(p); - MLX5_ARRAY_SET64(manage_pages_out, out, pas, i, fwp->addr); - i++; + i += fwp_fill_manage_pages_out(fwp, out, i, npages - i); } MLX5_SET(manage_pages_out, out, output_num_entries, i); From 8a78a440108e55ddd845b0ef46df575248667520 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 2 Dec 2020 20:39:44 -0800 Subject: [PATCH 165/167] net: mlx5e: fix fs_tcp.c build when IPV6 is not enabled Fix build when CONFIG_IPV6 is not enabled by making a function be built conditionally. Fixes these build errors and warnings: ../drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c: In function 'accel_fs_tcp_set_ipv6_flow': ../include/net/sock.h:380:34: error: 'struct sock_common' has no member named 'skc_v6_daddr'; did you mean 'skc_daddr'? 380 | #define sk_v6_daddr __sk_common.skc_v6_daddr | ^~~~~~~~~~~~ ../drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c:55:14: note: in expansion of macro 'sk_v6_daddr' 55 | &sk->sk_v6_daddr, 16); | ^~~~~~~~~~~ At top level: ../drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c:47:13: warning: 'accel_fs_tcp_set_ipv6_flow' defined but not used [-Wunused-function] 47 | static void accel_fs_tcp_set_ipv6_flow(struct mlx5_flow_spec *spec, struct sock *sk) Fixes: 5229a96e59ec ("net/mlx5e: Accel, Expose flow steering API for rules add/del") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index 97f1594cee11..e51f60b55daa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -44,6 +44,7 @@ static void accel_fs_tcp_set_ipv4_flow(struct mlx5_flow_spec *spec, struct sock outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); } +#if IS_ENABLED(CONFIG_IPV6) static void accel_fs_tcp_set_ipv6_flow(struct mlx5_flow_spec *spec, struct sock *sk) { MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); @@ -63,6 +64,7 @@ static void accel_fs_tcp_set_ipv6_flow(struct mlx5_flow_spec *spec, struct sock outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16); } +#endif void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule) { From b336e6b25e2d053c482ee4339787e6428f390864 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 2 Dec 2020 20:39:45 -0800 Subject: [PATCH 166/167] net/mlx5e: kTLS, Enforce HW TX csum offload with kTLS Checksum calculation cannot be done in SW for TX kTLS HW offloaded packets. Offload it to the device, disregard the declared state of the TX csum offload feature. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Reviewed-by: Boris Pismenny Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_tx.c | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 6dd3ea3cbbed..d97203cf6a00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -161,7 +161,9 @@ ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, } static inline void -mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) +mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_accel_tx_state *accel, + struct mlx5_wqe_eth_seg *eseg) { if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; @@ -173,6 +175,11 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; sq->stats->csum_partial++; } +#ifdef CONFIG_MLX5_EN_TLS + } else if (unlikely(accel && accel->tls.tls_tisn)) { + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; + sq->stats->csum_partial++; +#endif } else if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) { ipsec_txwqe_build_eseg_csum(sq, skb, eseg); @@ -607,12 +614,13 @@ void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) } static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, - struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) + struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, + struct mlx5_wqe_eth_seg *eseg) { if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg))) return false; - mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg); return true; } @@ -639,7 +647,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) { struct mlx5_wqe_eth_seg eseg = {}; - if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &eseg))) + if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg))) return NETDEV_TX_OK; mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more()); @@ -656,7 +664,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) /* May update the WQE, but may not post other WQEs. */ mlx5e_accel_tx_finish(sq, wqe, &accel, (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl)); - if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &wqe->eth))) + if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth))) return NETDEV_TX_OK; mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more()); @@ -675,7 +683,7 @@ void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr); pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); wqe = MLX5E_TX_FETCH_WQE(sq, pi); - mlx5e_txwqe_build_eseg_csum(sq, skb, &wqe->eth); + mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, &wqe->eth); mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more); } @@ -944,7 +952,7 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); - mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, eseg); eseg->mss = attr.mss; From d421e466c2373095f165ddd25cbabd6c5b077928 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 2 Dec 2020 20:39:46 -0800 Subject: [PATCH 167/167] net/mlx5: DR, Proper handling of unsupported Connect-X6DX SW steering STEs format for Connect-X5 and Connect-X6DX different. Currently, on Connext-X6DX the SW steering would break at some point when building STEs w/o giving a proper error message. Fix this by checking the STE format of the current device when initializing domain: add mlx5_ifc definitions for Connect-X6DX SW steering, read FW capability to get the current format version, and check this version when domain is being created. Fixes: 26d688e33f88 ("net/mlx5: DR, Add Steering entry (STE) utilities") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 1 + .../net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 5 +++++ .../net/ethernet/mellanox/mlx5/core/steering/dr_types.h | 1 + include/linux/mlx5/mlx5_ifc.h | 9 ++++++++- 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 6bd34b293007..51bbd88ff021 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -92,6 +92,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, caps->eswitch_manager = MLX5_CAP_GEN(mdev, eswitch_manager); caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); + caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version); if (mlx5dr_matcher_supp_flex_parser_icmp_v4(caps)) { caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 890767a2a7cb..aa2c2d6c44e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -223,6 +223,11 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev, if (ret) return ret; + if (dmn->info.caps.sw_format_ver != MLX5_STEERING_FORMAT_CONNECTX_5) { + mlx5dr_err(dmn, "SW steering is not supported on this device\n"); + return -EOPNOTSUPP; + } + ret = dr_domain_query_fdb_caps(mdev, dmn); if (ret) return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index f50f3b107aa3..cf62ea4f882e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -625,6 +625,7 @@ struct mlx5dr_cmd_caps { u8 max_ft_level; u16 roce_min_src_udp; u8 num_esw_ports; + u8 sw_format_ver; bool eswitch_manager; bool rx_sw_owner; bool tx_sw_owner; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a092346c7b2d..233352447b1a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1223,6 +1223,11 @@ enum mlx5_fc_bulk_alloc_bitmask { #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum)) +enum { + MLX5_STEERING_FORMAT_CONNECTX_5 = 0, + MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x30]; u8 vhca_id[0x10]; @@ -1521,7 +1526,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 general_obj_types[0x40]; - u8 reserved_at_440[0x20]; + u8 reserved_at_440[0x4]; + u8 steering_format_version[0x4]; + u8 create_qp_start_hint[0x18]; u8 reserved_at_460[0x3]; u8 log_max_uctx[0x5];