From 08d2061ff9c5319a07bf9ca6bbf11fdec68f704a Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Wed, 17 Nov 2021 15:02:22 +0100 Subject: [PATCH 001/251] arm64: dts: allwinner: orangepi-zero-plus: fix PHY mode Orange Pi Zero Plus uses a Realtek RTL8211E RGMII Gigabit PHY, but its currently set to plain RGMII mode meaning that it doesn't introduce delays. With this setup, TX packets are completely lost and changing the mode to RGMII-ID so the PHY will add delays internally fixes the issue. Fixes: a7affb13b271 ("arm64: allwinner: H5: Add Xunlong Orange Pi Zero Plus") Acked-by: Chen-Yu Tsai Tested-by: Ron Goossens Tested-by: Samuel Holland Signed-off-by: Robert Marko Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20211117140222.43692-1-robert.marko@sartura.hr --- arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts index d13980ed7a79..7ec5ac850a0d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts @@ -69,7 +69,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From 885633075847f475f26a29249d772cc0da85d8cd Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Mon, 25 Oct 2021 12:16:56 -0600 Subject: [PATCH 002/251] dmaengine: dw-axi-dmac: Fix uninitialized variable in axi_chan_block_xfer_start() Coverity complains of an uninitialized variable: 5. uninit_use_in_call: Using uninitialized value config.dst_per when calling axi_chan_config_write. [show details] 6. uninit_use_in_call: Using uninitialized value config.hs_sel_src when calling axi_chan_config_write. [show details] CID 121164 (#1-3 of 3): Uninitialized scalar variable (UNINIT) 7. uninit_use_in_call: Using uninitialized value config.src_per when calling axi_chan_config_write. [show details] 418 axi_chan_config_write(chan, &config); Fix this by initializing the structure to 0 which should at least be benign in axi_chan_config_write(). Also fix what looks like a cut-n-paste error when initializing config.hs_sel_dst. Fixes: 824351668a413 ("dmaengine: dw-axi-dmac: support DMAX_NUM_CHANNELS > 8") Cc: Eugeniy Paltsev Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Tim Gardner Link: https://lore.kernel.org/r/20211025181656.31658-1-tim.gardner@canonical.com Signed-off-by: Vinod Koul --- drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c index cd0d745eb071..33baf1591a49 100644 --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c @@ -373,7 +373,7 @@ static void axi_chan_block_xfer_start(struct axi_dma_chan *chan, struct axi_dma_desc *first) { u32 priority = chan->chip->dw->hdata->priority[chan->id]; - struct axi_dma_chan_config config; + struct axi_dma_chan_config config = {}; u32 irq_mask; u8 lms = 0; /* Select AXI0 master for LLI fetching */ @@ -391,7 +391,7 @@ static void axi_chan_block_xfer_start(struct axi_dma_chan *chan, config.tt_fc = DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC; config.prior = priority; config.hs_sel_dst = DWAXIDMAC_HS_SEL_HW; - config.hs_sel_dst = DWAXIDMAC_HS_SEL_HW; + config.hs_sel_src = DWAXIDMAC_HS_SEL_HW; switch (chan->direction) { case DMA_MEM_TO_DEV: dw_axi_dma_set_byte_halfword(chan, true); From 1ffc6f359f7ab114ad0d2bbe6a85cbd848709ab2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 9 Nov 2021 22:09:56 +0100 Subject: [PATCH 003/251] dmaengine: dw-edma: Fix return value check for dma_set_mask_and_coherent() The commit in the Fixes: tag has changed the logic of the code and now it is likely that the probe will return an early success (0), even if not completely executed. This should lead to a crash or similar issue later on when the code accesses to some never allocated resources. Change the '!err' into a 'err' when checking if 'dma_set_mask_and_coherent()' has failed or not. While at it, simplify the code and remove the "can't success code" related to 32 DMA mask. As stated in [1], 'dma_set_mask_and_coherent(DMA_BIT_MASK(64))' can't fail if 'dev->dma_mask' is non-NULL. And if it is NULL, it would fail for the same reason when tried with DMA_BIT_MASK(32). [1]: https://lkml.org/lkml/2021/6/7/398 Fixes: ecb8c88bd31c ("dmaengine: dw-edma-pcie: switch from 'pci_' to 'dma_' API") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/935fbb40ae930c5fe87482a41dcb73abf2257973.1636492127.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-pcie.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c index 198f6cd8ac1b..cee7aa231d7b 100644 --- a/drivers/dma/dw-edma/dw-edma-pcie.c +++ b/drivers/dma/dw-edma/dw-edma-pcie.c @@ -187,17 +187,9 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev, /* DMA configuration */ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { + if (err) { pci_err(pdev, "DMA mask 64 set failed\n"); return err; - } else { - pci_err(pdev, "DMA mask 64 set failed\n"); - - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - pci_err(pdev, "DMA mask 32 set failed\n"); - return err; - } } /* Data structure allocation */ From fa51b16d05583c7aebbc06330afb50276243d198 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 17 Nov 2021 10:03:51 -0700 Subject: [PATCH 004/251] dmaengine: idxd: fix calling wq quiesce inside spinlock Dan reports that smatch has found idxd_wq_quiesce() is being called inside the idxd->dev_lock. idxd_wq_quiesce() calls wait_for_completion() and therefore it can sleep. Move the call outside of the spinlock as it does not need device lock. Fixes: 5b0c68c473a1 ("dmaengine: idxd: support reporting of halt interrupt") Reported-by: Dan Carpenter Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163716858508.1721911.15051495873516709923.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 17f2f8a31b63..cf2c8bc4f147 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -137,10 +137,10 @@ halt: INIT_WORK(&idxd->work, idxd_device_reinit); queue_work(idxd->wq, &idxd->work); } else { - spin_lock(&idxd->dev_lock); idxd->state = IDXD_DEV_HALTED; idxd_wqs_quiesce(idxd); idxd_wqs_unmap_portal(idxd); + spin_lock(&idxd->dev_lock); idxd_device_clear_state(idxd); dev_err(&idxd->pdev->dev, "idxd halted, need %s.\n", From 017a716e7b0e9d4ac06a4d7779bd04fca009bbc9 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sun, 21 Nov 2021 09:35:37 +0100 Subject: [PATCH 005/251] bus: sunxi-rsb: Fix shutdown Function sunxi_rsb_hw_exit() is sometimes called with pm runtime disabled, so in such cases pm_runtime_resume() will fail with -EACCES. Instead of doing whole dance of enabling pm runtime and thus clock just to disable it again immediately, just check if disabling clock is needed. That way calling pm_runtime_resume() is not needed at all. Fixes: 4a0dbc12e618 ("bus: sunxi-rsb: Implement runtime power management") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20211121083537.612473-1-jernej.skrabec@gmail.com --- drivers/bus/sunxi-rsb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 6f225dddc74f..4566e730ef2b 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -687,11 +687,11 @@ err_clk_disable: static void sunxi_rsb_hw_exit(struct sunxi_rsb *rsb) { - /* Keep the clock and PM reference counts consistent. */ - if (pm_runtime_status_suspended(rsb->dev)) - pm_runtime_resume(rsb->dev); reset_control_assert(rsb->rstc); - clk_disable_unprepare(rsb->clk); + + /* Keep the clock and PM reference counts consistent. */ + if (!pm_runtime_status_suspended(rsb->dev)) + clk_disable_unprepare(rsb->clk); } static int __maybe_unused sunxi_rsb_runtime_suspend(struct device *dev) From 2d5446da5acecf9c67db1c9d55ae2c3e5de01f8d Mon Sep 17 00:00:00 2001 From: Guodong Liu Date: Wed, 10 Nov 2021 15:19:00 +0800 Subject: [PATCH 006/251] pinctrl: mediatek: fix global-out-of-bounds issue When eint virtual eint number is greater than gpio number, it maybe produce 'desc[eint_n]' size globle-out-of-bounds issue. Signed-off-by: Guodong Liu Signed-off-by: Zhiyong Tao Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20211110071900.4490-2-zhiyong.tao@mediatek.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index 91553b2fc160..53779822348d 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -285,8 +285,12 @@ static int mtk_xt_get_gpio_n(void *data, unsigned long eint_n, desc = (const struct mtk_pin_desc *)hw->soc->pins; *gpio_chip = &hw->chip; - /* Be greedy to guess first gpio_n is equal to eint_n */ - if (desc[eint_n].eint.eint_n == eint_n) + /* + * Be greedy to guess first gpio_n is equal to eint_n. + * Only eint virtual eint number is greater than gpio number. + */ + if (hw->soc->npins > eint_n && + desc[eint_n].eint.eint_n == eint_n) *gpio_n = eint_n; else *gpio_n = mtk_xt_find_eint_num(hw, eint_n); From 94047df12fec0e51e860b5317223f67a3ea4eb07 Mon Sep 17 00:00:00 2001 From: Luiz Sampaio Date: Tue, 9 Nov 2021 19:07:31 -0300 Subject: [PATCH 007/251] auxdisplay: charlcd: fixing coding style issue Removing 'int' from 'unsigned long int' declaration, which is unnecessary. Signed-off-by: Luiz Sampaio Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/charlcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index 304accde365c..cca3b600c0ba 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -37,7 +37,7 @@ struct charlcd_priv { bool must_clear; /* contains the LCD config state */ - unsigned long int flags; + unsigned long flags; /* Current escape sequence and it's length or -1 if outside */ struct { From 4daa9ff89ef27be43c15995412d6aee393a78200 Mon Sep 17 00:00:00 2001 From: Luiz Sampaio Date: Tue, 9 Nov 2021 19:07:32 -0300 Subject: [PATCH 008/251] auxdisplay: charlcd: checking for pointer reference before dereferencing Check if the pointer lcd->ops->init_display exists before dereferencing it. If a driver called charlcd_init() without defining the ops, this would return segmentation fault, as happened to me when implementing a charlcd driver. Checking the pointer before dereferencing protects from segmentation fault. Signed-off-by: Luiz Sampaio Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/charlcd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index cca3b600c0ba..6d309e4971b6 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -578,6 +578,9 @@ static int charlcd_init(struct charlcd *lcd) * Since charlcd_init_display() needs to write data, we have to * enable mark the LCD initialized just before. */ + if (WARN_ON(!lcd->ops->init_display)) + return -EINVAL; + ret = lcd->ops->init_display(lcd); if (ret) return ret; From 6331b8765cd0634a4e4cdcc1a6f1a74196616b94 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Wed, 16 Jun 2021 15:46:44 +0800 Subject: [PATCH 009/251] riscv: dts: unleashed: Add gpio card detect to mmc-spi-slot Per HiFive Unleashed schematics, the card detect signal of the micro SD card is connected to gpio pin #11, which should be reflected in the DT via the property, as described in Documentation/devicetree/bindings/mmc/mmc-spi-slot.txt. [1] https://sifive.cdn.prismic.io/sifive/c52a8e32-05ce-4aaf-95c8-7bf8453f8698_hifive-unleashed-a00-schematics-1.pdf Signed-off-by: Bin Meng Fixes: d573b5558abb ("riscv: dts: add initial board data for the SiFive HiFive Unmatched") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index ba304d4c455c..ced0d4e47938 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -76,6 +76,7 @@ spi-max-frequency = <20000000>; voltage-ranges = <3300 3300>; disable-wp; + gpios = <&gpio 11 GPIO_ACTIVE_LOW>; }; }; From 298d03c2d7f1b5daacb6d4f4053fd3d677d67087 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Wed, 16 Jun 2021 15:46:45 +0800 Subject: [PATCH 010/251] riscv: dts: unmatched: Add gpio card detect to mmc-spi-slot Per HiFive Unmatched schematics, the card detect signal of the micro SD card is connected to gpio pin #15, which should be reflected in the DT via the property, as described in Documentation/devicetree/bindings/mmc/mmc-spi-slot.txt. [1] https://sifive.cdn.prismic.io/sifive/6a06d6c0-6e66-49b5-8e9e-e68ce76f4192_hifive-unmatched-schematics-v3.pdf Signed-off-by: Bin Meng Fixes: d573b5558abb ("riscv: dts: add initial board data for the SiFive HiFive Unmatched") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 4f66919215f6..3c796d64cf51 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -2,6 +2,7 @@ /* Copyright (c) 2020 SiFive, Inc */ #include "fu740-c000.dtsi" +#include #include /* Clock frequency (in Hz) of the PCB crystal for rtcclk */ @@ -223,6 +224,7 @@ spi-max-frequency = <20000000>; voltage-ranges = <3300 3300>; disable-wp; + gpios = <&gpio 15 GPIO_ACTIVE_LOW>; }; }; From 3dc709e518b47386e6af937eaec37bb36539edfd Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 26 Nov 2021 12:11:53 +0800 Subject: [PATCH 011/251] powerpc/85xx: Fix oops when CONFIG_FSL_PMC=n When CONFIG_FSL_PMC is set to n, no value is assigned to cpu_up_prepare in the mpc85xx_pm_ops structure. As a result, oops is triggered in smp_85xx_start_cpu(). smp: Bringing up secondary CPUs ... kernel tried to execute user page (0) - exploit attempt? (uid: 0) BUG: Unable to handle kernel instruction fetch (NULL pointer?) Faulting instruction address: 0x00000000 Oops: Kernel access of bad area, sig: 11 [#1] ... NIP [00000000] 0x0 LR [c0021d2c] smp_85xx_kick_cpu+0xe8/0x568 Call Trace: [c1051da8] [c0021cb8] smp_85xx_kick_cpu+0x74/0x568 (unreliable) [c1051de8] [c0011460] __cpu_up+0xc0/0x228 [c1051e18] [c0031bbc] bringup_cpu+0x30/0x224 [c1051e48] [c0031f3c] cpu_up.constprop.0+0x180/0x33c [c1051e88] [c00322e8] bringup_nonboot_cpus+0x88/0xc8 [c1051eb8] [c07e67bc] smp_init+0x30/0x78 [c1051ed8] [c07d9e28] kernel_init_freeable+0x118/0x2a8 [c1051f18] [c00032d8] kernel_init+0x14/0x124 [c1051f38] [c0010278] ret_from_kernel_thread+0x14/0x1c Fixes: c45361abb918 ("powerpc/85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n") Reported-by: Martin Kennedy Signed-off-by: Xiaoming Ni Tested-by: Martin Kennedy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211126041153.16926-1-nixiaoming@huawei.com --- arch/powerpc/platforms/85xx/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 83f4a6389a28..d7081e9af65c 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -220,7 +220,7 @@ static int smp_85xx_start_cpu(int cpu) local_irq_save(flags); hard_irq_disable(); - if (qoriq_pm_ops) + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare) qoriq_pm_ops->cpu_up_prepare(cpu); /* if cpu is not spinning, reset it */ @@ -292,7 +292,7 @@ static int smp_85xx_kick_cpu(int nr) booting_thread_hwid = cpu_thread_in_core(nr); primary = cpu_first_thread_sibling(nr); - if (qoriq_pm_ops) + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare) qoriq_pm_ops->cpu_up_prepare(nr); /* From 9222ba68c3f4065f6364b99cc641b6b019ef2d42 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 28 Nov 2021 23:21:41 -0800 Subject: [PATCH 012/251] Input: i8042 - add deferred probe support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've got a bug report about the non-working keyboard on ASUS ZenBook UX425UA. It seems that the PS/2 device isn't ready immediately at boot but takes some seconds to get ready. Until now, the only workaround is to defer the probe, but it's available only when the driver is a module. However, many distros, including openSUSE as in the original report, build the PS/2 input drivers into kernel, hence it won't work easily. This patch adds the support for the deferred probe for i8042 stuff as a workaround of the problem above. When the deferred probe mode is enabled and the device couldn't be probed, it'll be repeated with the standard deferred probe mechanism. The deferred probe mode is enabled either via the new option i8042.probe_defer or via the quirk table entry. As of this patch, the quirk table contains only ASUS ZenBook UX425UA. The deferred probe part is based on Fabio's initial work. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1190256 Signed-off-by: Takashi Iwai Tested-by: Samuel Čavoj Link: https://lore.kernel.org/r/20211117063757.11380-1-tiwai@suse.de Signed-off-by: Dmitry Torokhov --- .../admin-guide/kernel-parameters.txt | 2 + drivers/input/serio/i8042-x86ia64io.h | 14 +++++ drivers/input/serio/i8042.c | 54 ++++++++++++------- 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cb89dbdedc46..ddfa50578a3e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1627,6 +1627,8 @@ architectures force reset to be always executed i8042.unlock [HW] Unlock (ignore) the keylock i8042.kbdreset [HW] Reset device connected to KBD port + i8042.probe_defer + [HW] Allow deferred probing upon i8042 probe errors i810= [HW,DRM] diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index aedd05541044..1acc7c844929 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -995,6 +995,17 @@ static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = { { } }; +static const struct dmi_system_id i8042_dmi_probe_defer_table[] __initconst = { + { + /* ASUS ZenBook UX425UA */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"), + }, + }, + { } +}; + #endif /* CONFIG_X86 */ #ifdef CONFIG_PNP @@ -1315,6 +1326,9 @@ static int __init i8042_platform_init(void) if (dmi_check_system(i8042_dmi_kbdreset_table)) i8042_kbdreset = true; + if (dmi_check_system(i8042_dmi_probe_defer_table)) + i8042_probe_defer = true; + /* * A20 was already enabled during early kernel init. But some buggy * BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 0b9f1d0a8f8b..3fc0a89cc785 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -45,6 +45,10 @@ static bool i8042_unlock; module_param_named(unlock, i8042_unlock, bool, 0); MODULE_PARM_DESC(unlock, "Ignore keyboard lock."); +static bool i8042_probe_defer; +module_param_named(probe_defer, i8042_probe_defer, bool, 0); +MODULE_PARM_DESC(probe_defer, "Allow deferred probing."); + enum i8042_controller_reset_mode { I8042_RESET_NEVER, I8042_RESET_ALWAYS, @@ -711,7 +715,7 @@ static int i8042_set_mux_mode(bool multiplex, unsigned char *mux_version) * LCS/Telegraphics. */ -static int __init i8042_check_mux(void) +static int i8042_check_mux(void) { unsigned char mux_version; @@ -740,10 +744,10 @@ static int __init i8042_check_mux(void) /* * The following is used to test AUX IRQ delivery. */ -static struct completion i8042_aux_irq_delivered __initdata; -static bool i8042_irq_being_tested __initdata; +static struct completion i8042_aux_irq_delivered; +static bool i8042_irq_being_tested; -static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id) +static irqreturn_t i8042_aux_test_irq(int irq, void *dev_id) { unsigned long flags; unsigned char str, data; @@ -770,7 +774,7 @@ static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id) * verifies success by readinng CTR. Used when testing for presence of AUX * port. */ -static int __init i8042_toggle_aux(bool on) +static int i8042_toggle_aux(bool on) { unsigned char param; int i; @@ -798,7 +802,7 @@ static int __init i8042_toggle_aux(bool on) * the presence of an AUX interface. */ -static int __init i8042_check_aux(void) +static int i8042_check_aux(void) { int retval = -1; bool irq_registered = false; @@ -1005,7 +1009,7 @@ static int i8042_controller_init(void) if (i8042_command(&ctr[n++ % 2], I8042_CMD_CTL_RCTR)) { pr_err("Can't read CTR while initializing i8042\n"); - return -EIO; + return i8042_probe_defer ? -EPROBE_DEFER : -EIO; } } while (n < 2 || ctr[0] != ctr[1]); @@ -1320,7 +1324,7 @@ static void i8042_shutdown(struct platform_device *dev) i8042_controller_reset(false); } -static int __init i8042_create_kbd_port(void) +static int i8042_create_kbd_port(void) { struct serio *serio; struct i8042_port *port = &i8042_ports[I8042_KBD_PORT_NO]; @@ -1349,7 +1353,7 @@ static int __init i8042_create_kbd_port(void) return 0; } -static int __init i8042_create_aux_port(int idx) +static int i8042_create_aux_port(int idx) { struct serio *serio; int port_no = idx < 0 ? I8042_AUX_PORT_NO : I8042_MUX_PORT_NO + idx; @@ -1386,13 +1390,13 @@ static int __init i8042_create_aux_port(int idx) return 0; } -static void __init i8042_free_kbd_port(void) +static void i8042_free_kbd_port(void) { kfree(i8042_ports[I8042_KBD_PORT_NO].serio); i8042_ports[I8042_KBD_PORT_NO].serio = NULL; } -static void __init i8042_free_aux_ports(void) +static void i8042_free_aux_ports(void) { int i; @@ -1402,7 +1406,7 @@ static void __init i8042_free_aux_ports(void) } } -static void __init i8042_register_ports(void) +static void i8042_register_ports(void) { int i; @@ -1443,7 +1447,7 @@ static void i8042_free_irqs(void) i8042_aux_irq_registered = i8042_kbd_irq_registered = false; } -static int __init i8042_setup_aux(void) +static int i8042_setup_aux(void) { int (*aux_enable)(void); int error; @@ -1485,7 +1489,7 @@ static int __init i8042_setup_aux(void) return error; } -static int __init i8042_setup_kbd(void) +static int i8042_setup_kbd(void) { int error; @@ -1535,7 +1539,7 @@ static int i8042_kbd_bind_notifier(struct notifier_block *nb, return 0; } -static int __init i8042_probe(struct platform_device *dev) +static int i8042_probe(struct platform_device *dev) { int error; @@ -1600,6 +1604,7 @@ static struct platform_driver i8042_driver = { .pm = &i8042_pm_ops, #endif }, + .probe = i8042_probe, .remove = i8042_remove, .shutdown = i8042_shutdown, }; @@ -1610,7 +1615,6 @@ static struct notifier_block i8042_kbd_bind_notifier_block = { static int __init i8042_init(void) { - struct platform_device *pdev; int err; dbg_init(); @@ -1626,17 +1630,29 @@ static int __init i8042_init(void) /* Set this before creating the dev to allow i8042_command to work right away */ i8042_present = true; - pdev = platform_create_bundle(&i8042_driver, i8042_probe, NULL, 0, NULL, 0); - if (IS_ERR(pdev)) { - err = PTR_ERR(pdev); + err = platform_driver_register(&i8042_driver); + if (err) goto err_platform_exit; + + i8042_platform_device = platform_device_alloc("i8042", -1); + if (!i8042_platform_device) { + err = -ENOMEM; + goto err_unregister_driver; } + err = platform_device_add(i8042_platform_device); + if (err) + goto err_free_device; + bus_register_notifier(&serio_bus, &i8042_kbd_bind_notifier_block); panic_blink = i8042_panic_blink; return 0; +err_free_device: + platform_device_put(i8042_platform_device); +err_unregister_driver: + platform_driver_unregister(&i8042_driver); err_platform_exit: i8042_platform_exit(); return err; From e1f5e848209a1b51ccae50721b27684c6f9d978f Mon Sep 17 00:00:00 2001 From: Jeff LaBundy Date: Sun, 28 Nov 2021 23:41:42 -0800 Subject: [PATCH 013/251] Input: iqs626a - prohibit inlining of channel parsing functions Some automated builds report a stack frame size in excess of 2 kB for iqs626_probe(); the culprit appears to be the call to iqs626_parse_prop(). To solve this problem, specify noinline_for_stack for all of the iqs626_parse_*() helper functions which are called inside a for loop within iqs626_parse_prop(). As a result, a build with '-Wframe-larger-than' as low as 512 is free of any such warnings. Reported-by: kernel test robot Signed-off-by: Jeff LaBundy Link: https://lore.kernel.org/r/20211129004104.453930-1-jeff@labundy.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/iqs626a.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/input/misc/iqs626a.c b/drivers/input/misc/iqs626a.c index d57e996732cf..23b5dd9552dc 100644 --- a/drivers/input/misc/iqs626a.c +++ b/drivers/input/misc/iqs626a.c @@ -456,9 +456,10 @@ struct iqs626_private { unsigned int suspend_mode; }; -static int iqs626_parse_events(struct iqs626_private *iqs626, - const struct fwnode_handle *ch_node, - enum iqs626_ch_id ch_id) +static noinline_for_stack int +iqs626_parse_events(struct iqs626_private *iqs626, + const struct fwnode_handle *ch_node, + enum iqs626_ch_id ch_id) { struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg; struct i2c_client *client = iqs626->client; @@ -604,9 +605,10 @@ static int iqs626_parse_events(struct iqs626_private *iqs626, return 0; } -static int iqs626_parse_ati_target(struct iqs626_private *iqs626, - const struct fwnode_handle *ch_node, - enum iqs626_ch_id ch_id) +static noinline_for_stack int +iqs626_parse_ati_target(struct iqs626_private *iqs626, + const struct fwnode_handle *ch_node, + enum iqs626_ch_id ch_id) { struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg; struct i2c_client *client = iqs626->client; @@ -885,9 +887,10 @@ static int iqs626_parse_trackpad(struct iqs626_private *iqs626, return 0; } -static int iqs626_parse_channel(struct iqs626_private *iqs626, - const struct fwnode_handle *ch_node, - enum iqs626_ch_id ch_id) +static noinline_for_stack int +iqs626_parse_channel(struct iqs626_private *iqs626, + const struct fwnode_handle *ch_node, + enum iqs626_ch_id ch_id) { struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg; struct i2c_client *client = iqs626->client; From 1d72d9f960ccf1052a0630a68c3d358791dbdaaa Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 29 Nov 2021 00:08:13 -0800 Subject: [PATCH 014/251] Input: elantech - fix stack out of bound access in elantech_change_report_id() The array param[] in elantech_change_report_id() must be at least 3 bytes, because elantech_read_reg_params() is calling ps2_command() with PSMOUSE_CMD_GETINFO, that is going to access 3 bytes from param[], but it's defined in the stack as an array of 2 bytes, therefore we have a potential stack out-of-bounds access here, also confirmed by KASAN: [ 6.512374] BUG: KASAN: stack-out-of-bounds in __ps2_command+0x372/0x7e0 [ 6.512397] Read of size 1 at addr ffff8881024d77c2 by task kworker/2:1/118 [ 6.512416] CPU: 2 PID: 118 Comm: kworker/2:1 Not tainted 5.13.0-22-generic #22+arighi20211110 [ 6.512428] Hardware name: LENOVO 20T8000QGE/20T8000QGE, BIOS R1AET32W (1.08 ) 08/14/2020 [ 6.512436] Workqueue: events_long serio_handle_event [ 6.512453] Call Trace: [ 6.512462] show_stack+0x52/0x58 [ 6.512474] dump_stack+0xa1/0xd3 [ 6.512487] print_address_description.constprop.0+0x1d/0x140 [ 6.512502] ? __ps2_command+0x372/0x7e0 [ 6.512516] __kasan_report.cold+0x7d/0x112 [ 6.512527] ? _raw_write_lock_irq+0x20/0xd0 [ 6.512539] ? __ps2_command+0x372/0x7e0 [ 6.512552] kasan_report+0x3c/0x50 [ 6.512564] __asan_load1+0x6a/0x70 [ 6.512575] __ps2_command+0x372/0x7e0 [ 6.512589] ? ps2_drain+0x240/0x240 [ 6.512601] ? dev_printk_emit+0xa2/0xd3 [ 6.512612] ? dev_vprintk_emit+0xc5/0xc5 [ 6.512621] ? __kasan_check_write+0x14/0x20 [ 6.512634] ? mutex_lock+0x8f/0xe0 [ 6.512643] ? __mutex_lock_slowpath+0x20/0x20 [ 6.512655] ps2_command+0x52/0x90 [ 6.512670] elantech_ps2_command+0x4f/0xc0 [psmouse] [ 6.512734] elantech_change_report_id+0x1e6/0x256 [psmouse] [ 6.512799] ? elantech_report_trackpoint.constprop.0.cold+0xd/0xd [psmouse] [ 6.512863] ? ps2_command+0x7f/0x90 [ 6.512877] elantech_query_info.cold+0x6bd/0x9ed [psmouse] [ 6.512943] ? elantech_setup_ps2+0x460/0x460 [psmouse] [ 6.513005] ? psmouse_reset+0x69/0xb0 [psmouse] [ 6.513064] ? psmouse_attr_set_helper+0x2a0/0x2a0 [psmouse] [ 6.513122] ? phys_pmd_init+0x30e/0x521 [ 6.513137] elantech_init+0x8a/0x200 [psmouse] [ 6.513200] ? elantech_init_ps2+0xf0/0xf0 [psmouse] [ 6.513249] ? elantech_query_info+0x440/0x440 [psmouse] [ 6.513296] ? synaptics_send_cmd+0x60/0x60 [psmouse] [ 6.513342] ? elantech_query_info+0x440/0x440 [psmouse] [ 6.513388] ? psmouse_try_protocol+0x11e/0x170 [psmouse] [ 6.513432] psmouse_extensions+0x65d/0x6e0 [psmouse] [ 6.513476] ? psmouse_try_protocol+0x170/0x170 [psmouse] [ 6.513519] ? mutex_unlock+0x22/0x40 [ 6.513526] ? ps2_command+0x7f/0x90 [ 6.513536] ? psmouse_probe+0xa3/0xf0 [psmouse] [ 6.513580] psmouse_switch_protocol+0x27d/0x2e0 [psmouse] [ 6.513624] psmouse_connect+0x272/0x530 [psmouse] [ 6.513669] serio_driver_probe+0x55/0x70 [ 6.513679] really_probe+0x190/0x720 [ 6.513689] driver_probe_device+0x160/0x1f0 [ 6.513697] device_driver_attach+0x119/0x130 [ 6.513705] ? device_driver_attach+0x130/0x130 [ 6.513713] __driver_attach+0xe7/0x1a0 [ 6.513720] ? device_driver_attach+0x130/0x130 [ 6.513728] bus_for_each_dev+0xfb/0x150 [ 6.513738] ? subsys_dev_iter_exit+0x10/0x10 [ 6.513748] ? _raw_write_unlock_bh+0x30/0x30 [ 6.513757] driver_attach+0x2d/0x40 [ 6.513764] serio_handle_event+0x199/0x3d0 [ 6.513775] process_one_work+0x471/0x740 [ 6.513785] worker_thread+0x2d2/0x790 [ 6.513794] ? process_one_work+0x740/0x740 [ 6.513802] kthread+0x1b4/0x1e0 [ 6.513809] ? set_kthread_struct+0x80/0x80 [ 6.513816] ret_from_fork+0x22/0x30 [ 6.513832] The buggy address belongs to the page: [ 6.513838] page:00000000bc35e189 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1024d7 [ 6.513847] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff) [ 6.513860] raw: 0017ffffc0000000 dead000000000100 dead000000000122 0000000000000000 [ 6.513867] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 6.513872] page dumped because: kasan: bad access detected [ 6.513879] addr ffff8881024d77c2 is located in stack of task kworker/2:1/118 at offset 34 in frame: [ 6.513887] elantech_change_report_id+0x0/0x256 [psmouse] [ 6.513941] this frame has 1 object: [ 6.513947] [32, 34) 'param' [ 6.513956] Memory state around the buggy address: [ 6.513962] ffff8881024d7680: f2 f2 f2 f2 f2 00 00 f3 f3 00 00 00 00 00 00 00 [ 6.513969] ffff8881024d7700: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 6.513976] >ffff8881024d7780: 00 00 00 00 f1 f1 f1 f1 02 f3 f3 f3 00 00 00 00 [ 6.513982] ^ [ 6.513988] ffff8881024d7800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 6.513995] ffff8881024d7880: 00 f1 f1 f1 f1 03 f2 03 f2 03 f3 f3 f3 00 00 00 [ 6.514000] ================================================================== Define param[] in elantech_change_report_id() as an array of 3 bytes to prevent the out-of-bounds access in the stack. Fixes: e4c9062717fe ("Input: elantech - fix protocol errors for some trackpoints in SMBus mode") BugLink: https://bugs.launchpad.net/bugs/1945590 Signed-off-by: Andrea Righi Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/20211116095559.24395-1-andrea.righi@canonical.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 956d9cd34796..ece97f8c6a3e 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1588,7 +1588,13 @@ static const struct dmi_system_id no_hw_res_dmi_table[] = { */ static int elantech_change_report_id(struct psmouse *psmouse) { - unsigned char param[2] = { 0x10, 0x03 }; + /* + * NOTE: the code is expecting to receive param[] as an array of 3 + * items (see __ps2_command()), even if in this case only 2 are + * actually needed. Make sure the array size is 3 to avoid potential + * stack out-of-bound accesses. + */ + unsigned char param[3] = { 0x10, 0x03 }; if (elantech_write_reg_params(psmouse, 0x7, param) || elantech_read_reg_params(psmouse, 0x7, param) || From cd57eb3c403cb864e5558874ecd57dd954a5a7f7 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 3 Dec 2021 19:15:41 +0200 Subject: [PATCH 015/251] ASoC: SOF: Intel: pci-tgl: add ADL-N support Add PCI DID for Intel AlderLake-N. Signed-off-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20211203171542.1021399-1-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-tgl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index f2ea34df9741..302068fd0b81 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -114,6 +114,8 @@ static const struct pci_device_id sof_pci_ids[] = { .driver_data = (unsigned long)&adl_desc}, { PCI_DEVICE(0x8086, 0x51cc), /* ADL-M */ .driver_data = (unsigned long)&adl_desc}, + { PCI_DEVICE(0x8086, 0x54c8), /* ADL-N */ + .driver_data = (unsigned long)&adl_desc}, { 0, } }; MODULE_DEVICE_TABLE(pci, sof_pci_ids); From de7dd9092cd38384f774d345cccafe81b4b866b0 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 3 Dec 2021 19:15:42 +0200 Subject: [PATCH 016/251] ASoC: SOF: Intel: pci-tgl: add new ADL-P variant Add a PCI DID for a variant of Intel AlderLake-P. Signed-off-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20211203171542.1021399-2-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-tgl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index 302068fd0b81..fd46210f1730 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -112,6 +112,8 @@ static const struct pci_device_id sof_pci_ids[] = { .driver_data = (unsigned long)&adls_desc}, { PCI_DEVICE(0x8086, 0x51c8), /* ADL-P */ .driver_data = (unsigned long)&adl_desc}, + { PCI_DEVICE(0x8086, 0x51cd), /* ADL-P */ + .driver_data = (unsigned long)&adl_desc}, { PCI_DEVICE(0x8086, 0x51cc), /* ADL-M */ .driver_data = (unsigned long)&adl_desc}, { PCI_DEVICE(0x8086, 0x54c8), /* ADL-N */ From 85223d609c99eaa07cc598632b426cb33753526f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 6 Dec 2021 13:43:06 +0100 Subject: [PATCH 017/251] regulator: dt-bindings: samsung,s5m8767: add missing op_mode to bucks While converting bindings to dtschema, the buck regulators lost "op_mode" property. The "op_mode" is a valid property for all regulators (both LDOs and bucks), so add it. Reported-by: Rob Herring Fixes: fab58debc137 ("regulator: dt-bindings: samsung,s5m8767: convert to dtschema") Cc: Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20211206124306.14006-1-krzysztof.kozlowski@canonical.com Signed-off-by: Mark Brown --- .../bindings/regulator/samsung,s5m8767.yaml | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml index 80a63d47790a..c98929a213e9 100644 --- a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml +++ b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml @@ -51,6 +51,19 @@ patternProperties: description: Properties for single BUCK regulator. + properties: + op_mode: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2, 3] + default: 1 + description: | + Describes the different operating modes of the regulator with power + mode change in SOC. The different possible values are: + 0 - always off mode + 1 - on in normal mode + 2 - low power mode + 3 - suspend mode + required: - regulator-name @@ -63,6 +76,18 @@ patternProperties: Properties for single BUCK regulator. properties: + op_mode: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2, 3] + default: 1 + description: | + Describes the different operating modes of the regulator with power + mode change in SOC. The different possible values are: + 0 - always off mode + 1 - on in normal mode + 2 - low power mode + 3 - suspend mode + s5m8767,pmic-ext-control-gpios: maxItems: 1 description: | From db6689b643d8653092f5853751ea2cdbc299f8d3 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Mon, 6 Dec 2021 18:19:31 +0800 Subject: [PATCH 018/251] spi: change clk_disable_unprepare to clk_unprepare The corresponding API for clk_prepare is clk_unprepare, other than clk_disable_unprepare. Fix this by changing clk_disable_unprepare to clk_unprepare. Fixes: 5762ab71eb24 ("spi: Add support for Armada 3700 SPI Controller") Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20211206101931.2816597-1-mudongliangabcd@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-armada-3700.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 46feafe4e201..d8cc4b270644 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -901,7 +901,7 @@ static int a3700_spi_probe(struct platform_device *pdev) return 0; error_clk: - clk_disable_unprepare(spi->clk); + clk_unprepare(spi->clk); error: spi_master_put(master); out: From 44ee250aeeabb28b52a10397ac17ffb8bfe94839 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20=C4=8Cavoj?= Date: Sat, 4 Dec 2021 13:17:36 -0800 Subject: [PATCH 019/251] Input: i8042 - enable deferred probe quirk for ASUS UM325UA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ASUS UM325UA suffers from the same issue as the ASUS UX425UA, which is a very similar laptop. The i8042 device is not usable immediately after boot and fails to initialize, requiring a deferred retry. Enable the deferred probe quirk for the UM325UA. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1190256 Signed-off-by: Samuel Čavoj Link: https://lore.kernel.org/r/20211204015615.232948-1-samuel@cavoj.net Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 1acc7c844929..148a7c5fd0e2 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -1003,6 +1003,13 @@ static const struct dmi_system_id i8042_dmi_probe_defer_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"), }, }, + { + /* ASUS ZenBook UM325UA */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), + }, + }, { } }; From a2fd46cd3dbb83b373ba74f4043f8dae869c65f1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 6 Dec 2021 23:15:09 -0800 Subject: [PATCH 020/251] Input: goodix - try not to touch the reset-pin on x86/ACPI devices Unless the controller is not responding at boot or after suspend/resume, the driver never resets the controller on x86/ACPI platforms. The driver still requesting the reset pin at probe() though in case it needs it. Until now the driver has always requested the reset pin with GPIOD_IN as type. The idea being to put the pin in high-impedance mode to save power until the driver actually wants to issue a reset. But this means that just requesting the pin can cause issues, since requesting it in another mode then GPIOD_ASIS may cause the pinctrl driver to touch the pin settings. We have already had issues before due to a bug in the pinctrl-cherryview.c driver which has been fixed in commit 921daeeca91b ("pinctrl: cherryview: Preserve CHV_PADCTRL1_INVRXTX_TXDATA flag on GPIOs"). And now it turns out that requesting the reset-pin as GPIOD_IN also stops the touchscreen from working on the GPD P2 max mini-laptop. The behavior of putting the pin in high-impedance mode relies on there being some external pull-up to keep it high and there seems to be no pull-up on the GPD P2 max, causing things to break. This commit fixes this by requesting the reset pin as is when using the x86/ACPI code paths to lookup the GPIOs; and by not dropping it back into input-mode in case the driver does end up issuing a reset for error-recovery. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209061 Fixes: a7d4b171660c ("Input: goodix - add support for getting IRQ + reset GPIOs on Cherry Trail devices") Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211206091116.44466-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 30 +++++++++++++++++++++++++----- drivers/input/touchscreen/goodix.h | 1 + 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 906b5a6b52d1..e7efc32043e7 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -695,10 +695,16 @@ int goodix_reset_no_int_sync(struct goodix_ts_data *ts) usleep_range(6000, 10000); /* T4: > 5ms */ - /* end select I2C slave addr */ - error = gpiod_direction_input(ts->gpiod_rst); - if (error) - goto error; + /* + * Put the reset pin back in to input / high-impedance mode to save + * power. Only do this in the non ACPI case since some ACPI boards + * don't have a pull-up, so there the reset pin must stay active-high. + */ + if (ts->irq_pin_access_method == IRQ_PIN_ACCESS_GPIO) { + error = gpiod_direction_input(ts->gpiod_rst); + if (error) + goto error; + } return 0; @@ -832,6 +838,14 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) return -EINVAL; } + /* + * Normally we put the reset pin in input / high-impedance mode to save + * power. But some x86/ACPI boards don't have a pull-up, so for the ACPI + * case, leave the pin as is. This results in the pin not being touched + * at all on x86/ACPI boards, except when needed for error-recover. + */ + ts->gpiod_rst_flags = GPIOD_ASIS; + return devm_acpi_dev_add_driver_gpios(dev, gpio_mapping); } #else @@ -857,6 +871,12 @@ static int goodix_get_gpio_config(struct goodix_ts_data *ts) return -EINVAL; dev = &ts->client->dev; + /* + * By default we request the reset pin as input, leaving it in + * high-impedance when not resetting the controller to save power. + */ + ts->gpiod_rst_flags = GPIOD_IN; + ts->avdd28 = devm_regulator_get(dev, "AVDD28"); if (IS_ERR(ts->avdd28)) { error = PTR_ERR(ts->avdd28); @@ -894,7 +914,7 @@ retry_get_irq_gpio: ts->gpiod_int = gpiod; /* Get the reset line GPIO pin number */ - gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_RST_NAME, GPIOD_IN); + gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_RST_NAME, ts->gpiod_rst_flags); if (IS_ERR(gpiod)) { error = PTR_ERR(gpiod); if (error != -EPROBE_DEFER) diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h index 62138f930d1a..02065d1c3263 100644 --- a/drivers/input/touchscreen/goodix.h +++ b/drivers/input/touchscreen/goodix.h @@ -87,6 +87,7 @@ struct goodix_ts_data { struct gpio_desc *gpiod_rst; int gpio_count; int gpio_int_idx; + enum gpiod_flags gpiod_rst_flags; char id[GOODIX_ID_MAX_LEN + 1]; char cfg_name[64]; u16 version; From 81e818869be522bc8fa6f7df1b92d7e76537926c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 6 Dec 2021 23:29:27 -0800 Subject: [PATCH 021/251] Input: goodix - add id->model mapping for the "9111" model Add d->model mapping for the "9111" model, this fixes uses using a wrong config_len of 240 bytes while the "9111" model uses only 186 bytes of config. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211206164747.197309-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index e7efc32043e7..87263eb9e5a4 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -102,6 +102,7 @@ static const struct goodix_chip_id goodix_chip_ids[] = { { .id = "911", .data = >911_chip_data }, { .id = "9271", .data = >911_chip_data }, { .id = "9110", .data = >911_chip_data }, + { .id = "9111", .data = >911_chip_data }, { .id = "927", .data = >911_chip_data }, { .id = "928", .data = >911_chip_data }, From 34f35f8f14bc406efc06ee4ff73202c6fd245d15 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Wed, 8 Dec 2021 10:32:39 +0100 Subject: [PATCH 022/251] ipmi: ssif: initialize ssif_info->client early During probe ssif_info->client is dereferenced in error path. However, it is set when some of the error checking has already been done. This causes following kernel crash if an error path is taken: [ 30.645593][ T674] ipmi_ssif 0-000e: ipmi_ssif: Not probing, Interface already present [ 30.657616][ T674] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000088 ... [ 30.657723][ T674] pc : __dev_printk+0x28/0xa0 [ 30.657732][ T674] lr : _dev_err+0x7c/0xa0 ... [ 30.657772][ T674] Call trace: [ 30.657775][ T674] __dev_printk+0x28/0xa0 [ 30.657778][ T674] _dev_err+0x7c/0xa0 [ 30.657781][ T674] ssif_probe+0x548/0x900 [ipmi_ssif 62ce4b08badc1458fd896206d9ef69a3c31f3d3e] [ 30.657791][ T674] i2c_device_probe+0x37c/0x3c0 ... Initialize ssif_info->client before any error path can be taken. Clear i2c_client data in the error path to prevent the dangling pointer from leaking. Fixes: c4436c9149c5 ("ipmi_ssif: avoid registering duplicate ssif interface") Cc: stable@vger.kernel.org # 5.4.x Suggested-by: Takashi Iwai Signed-off-by: Mian Yousaf Kaukab Message-Id: <20211208093239.4432-1-ykaukab@suse.de> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_ssif.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 0c62e578749e..48aab77abebf 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -1659,6 +1659,9 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) } } + ssif_info->client = client; + i2c_set_clientdata(client, ssif_info); + rv = ssif_check_and_remove(client, ssif_info); /* If rv is 0 and addr source is not SI_ACPI, continue probing */ if (!rv && ssif_info->addr_source == SI_ACPI) { @@ -1679,9 +1682,6 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) ipmi_addr_src_to_str(ssif_info->addr_source), client->addr, client->adapter->name, slave_addr); - ssif_info->client = client; - i2c_set_clientdata(client, ssif_info); - /* Now check for system interface capabilities */ msg[0] = IPMI_NETFN_APP_REQUEST << 2; msg[1] = IPMI_GET_SYSTEM_INTERFACE_CAPABILITIES_CMD; @@ -1881,6 +1881,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_err(&ssif_info->client->dev, "Unable to start IPMI SSIF: %d\n", rv); + i2c_set_clientdata(client, NULL); kfree(ssif_info); } kfree(resp); From 09d97da660ff77df20984496aa0abcd6b88819f2 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 8 Dec 2021 17:27:19 +0800 Subject: [PATCH 023/251] MIPS: Only define pci_remap_iospace() for Ralink After commit 9f76779f2418 ("MIPS: implement architecture-specific 'pci_remap_iospace()'"), there exists the following warning on the Loongson64 platform: loongson-pci 1a000000.pci: IO 0x0018020000..0x001803ffff -> 0x0000020000 loongson-pci 1a000000.pci: MEM 0x0040000000..0x007fffffff -> 0x0040000000 ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1 at arch/mips/pci/pci-generic.c:55 pci_remap_iospace+0x84/0x90 resource start address is not zero ... Call Trace: [] show_stack+0x40/0x120 [] dump_stack_lvl+0x58/0x74 [] __warn+0xe0/0x110 [] warn_slowpath_fmt+0xa4/0xd0 [] pci_remap_iospace+0x84/0x90 [] devm_pci_remap_iospace+0x5c/0xb8 [] devm_of_pci_bridge_init+0x178/0x1f8 [] devm_pci_alloc_host_bridge+0x78/0x98 [] loongson_pci_probe+0x34/0x160 [] platform_probe+0x6c/0xe0 [] really_probe+0xbc/0x340 [] __driver_probe_device+0x98/0x110 [] driver_probe_device+0x50/0x118 [] __driver_attach+0x80/0x118 [] bus_for_each_dev+0x80/0xc8 [] bus_add_driver+0x130/0x210 [] driver_register+0x8c/0x150 [] do_one_initcall+0x54/0x288 [] kernel_init_freeable+0x27c/0x2e4 [] kernel_init+0x2c/0x134 [] ret_from_kernel_thread+0x14/0x1c ---[ end trace e4a0efe10aa5cce6 ]--- loongson-pci 1a000000.pci: error -19: failed to map resource [io 0x20000-0x3ffff] We can see that the resource start address is 0x0000020000, because the ISA Bridge used the zero address which is defined in the dts file arch/mips/boot/dts/loongson/ls7a-pch.dtsi: ISA Bridge: /bus@10000000/isa@18000000 IO 0x0000000018000000..0x000000001801ffff -> 0x0000000000000000 Based on the above analysis, the architecture-specific pci_remap_iospace() is not suitable for Loongson64, we should only define pci_remap_iospace() for Ralink on MIPS based on the commit background. Fixes: 9f76779f2418 ("MIPS: implement architecture-specific 'pci_remap_iospace()'") Suggested-by: Thomas Bogendoerfer Signed-off-by: Tiezhu Yang Tested-by: Sergio Paracuellos Acked-by: Sergio Paracuellos Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/mach-ralink/spaces.h | 2 ++ arch/mips/include/asm/pci.h | 4 ---- arch/mips/pci/pci-generic.c | 2 ++ 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/mach-ralink/spaces.h b/arch/mips/include/asm/mach-ralink/spaces.h index 05d14c21c417..f7af11ea2d61 100644 --- a/arch/mips/include/asm/mach-ralink/spaces.h +++ b/arch/mips/include/asm/mach-ralink/spaces.h @@ -6,5 +6,7 @@ #define PCI_IOSIZE SZ_64K #define IO_SPACE_LIMIT (PCI_IOSIZE - 1) +#define pci_remap_iospace pci_remap_iospace + #include #endif diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index 421231f55935..9ffc8192adae 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -20,10 +20,6 @@ #include #include -#ifdef CONFIG_PCI_DRIVERS_GENERIC -#define pci_remap_iospace pci_remap_iospace -#endif - #ifdef CONFIG_PCI_DRIVERS_LEGACY /* diff --git a/arch/mips/pci/pci-generic.c b/arch/mips/pci/pci-generic.c index 18eb8a453a86..d2d68bac3d25 100644 --- a/arch/mips/pci/pci-generic.c +++ b/arch/mips/pci/pci-generic.c @@ -47,6 +47,7 @@ void pcibios_fixup_bus(struct pci_bus *bus) pci_read_bridge_bases(bus); } +#ifdef pci_remap_iospace int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr) { unsigned long vaddr; @@ -60,3 +61,4 @@ int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr) set_io_port_base(vaddr); return 0; } +#endif From 842470c4e211f284a224842849b1fa81b130c154 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 20 Oct 2021 18:57:40 +0200 Subject: [PATCH 024/251] Revert "drm/fb-helper: improve DRM fbdev emulation device names" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b3484d2b03e4c940a9598aa841a52d69729c582a. That change attempted to improve the DRM drivers fbdev emulation device names to avoid having confusing names like "simpledrmdrmfb" in /proc/fb. But unfortunately, there are user-space programs such as pm-utils that match against the fbdev names and so broke after the mentioned commit. Since the names in /proc/fb are used by tools that consider it an uAPI, let's restore the old names even when this lead to silly names like the one mentioned above. Fixes: b3484d2b03e4 ("drm/fb-helper: improve DRM fbdev emulation device names") Reported-by: Johannes Stezenbach Signed-off-by: Javier Martinez Canillas Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20211020165740.3011927-1-javierm@redhat.com --- drivers/gpu/drm/drm_fb_helper.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 8e7a124d6c5a..22bf690910b2 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1743,7 +1743,13 @@ void drm_fb_helper_fill_info(struct fb_info *info, sizes->fb_width, sizes->fb_height); info->par = fb_helper; - snprintf(info->fix.id, sizeof(info->fix.id), "%s", + /* + * The DRM drivers fbdev emulation device name can be confusing if the + * driver name also has a "drm" suffix on it. Leading to names such as + * "simpledrmdrmfb" in /proc/fb. Unfortunately, it's an uAPI and can't + * be changed due user-space tools (e.g: pm-utils) matching against it. + */ + snprintf(info->fix.id, sizeof(info->fix.id), "%sdrmfb", fb_helper->dev->driver->name); } From 59ec71575ab440cd5ca0aa53b2a2985b3639fad4 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Mon, 29 Nov 2021 21:37:25 +0100 Subject: [PATCH 025/251] ucounts: Fix rlimit max values check The semantics of the rlimit max values differs from ucounts itself. When creating a new userns, we store the current rlimit of the process in ucount_max. Thus, the value of the limit in the parent userns is saved in the created one. The problem is that now we are taking the maximum value for counter from the same userns. So for init_user_ns it will always be RLIM_INFINITY. To fix the problem we need to check the counter value with the max value stored in userns. Reproducer: su - test -c "ulimit -u 3; sleep 5 & sleep 6 & unshare -U --map-root-user sh -c 'sleep 7 & sleep 8 & date; wait'" Before: [1] 175 [2] 176 Fri Nov 26 13:48:20 UTC 2021 [1]- Done sleep 5 [2]+ Done sleep 6 After: [1] 167 [2] 168 sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: Interrupted system call [1]- Done sleep 5 [2]+ Done sleep 6 Fixes: c54b245d0118 ("Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace") Reported-by: Gleb Fotengauer-Malinovskiy Signed-off-by: "Eric W. Biederman" Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/024ec805f6e16896f0b23e094773790d171d2c1c.1638218242.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- kernel/ucount.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 4f5613dac227..7b32c356ebc5 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -264,15 +264,16 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type) long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v) { struct ucounts *iter; + long max = LONG_MAX; long ret = 0; for (iter = ucounts; iter; iter = iter->ns->ucounts) { - long max = READ_ONCE(iter->ns->ucount_max[type]); long new = atomic_long_add_return(v, &iter->ucount[type]); if (new < 0 || new > max) ret = LONG_MAX; else if (iter == ucounts) ret = new; + max = READ_ONCE(iter->ns->ucount_max[type]); } return ret; } @@ -312,15 +313,16 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type) { /* Caller must hold a reference to ucounts */ struct ucounts *iter; + long max = LONG_MAX; long dec, ret = 0; for (iter = ucounts; iter; iter = iter->ns->ucounts) { - long max = READ_ONCE(iter->ns->ucount_max[type]); long new = atomic_long_add_return(1, &iter->ucount[type]); if (new < 0 || new > max) goto unwind; if (iter == ucounts) ret = new; + max = READ_ONCE(iter->ns->ucount_max[type]); /* * Grab an extra ucount reference for the caller when * the rlimit count was previously 0. @@ -339,15 +341,16 @@ unwind: return 0; } -bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max) +bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long rlimit) { struct ucounts *iter; - if (get_ucounts_value(ucounts, type) > max) - return true; + long max = rlimit; + if (rlimit > LONG_MAX) + max = LONG_MAX; for (iter = ucounts; iter; iter = iter->ns->ucounts) { - max = READ_ONCE(iter->ns->ucount_max[type]); if (get_ucounts_value(iter, type) > max) return true; + max = READ_ONCE(iter->ns->ucount_max[type]); } return false; } From 266423e60ea1b953fcc0cd97f3dad85857e434d1 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Mon, 6 Dec 2021 09:22:36 +0000 Subject: [PATCH 026/251] pinctrl: bcm2835: Change init order for gpio hogs ...and gpio-ranges pinctrl-bcm2835 is a combined pinctrl/gpio driver. Currently the gpio side is registered first, but this breaks gpio hogs (which are configured during gpiochip_add_data). Part of the hog initialisation is a call to pinctrl_gpio_request, and since the pinctrl driver hasn't yet been registered this results in an -EPROBE_DEFER from which it can never recover. Change the initialisation sequence to register the pinctrl driver first. This also solves a similar problem with the gpio-ranges property, which is required in order for released pins to be returned to inputs. Fixes: 73345a18d464b ("pinctrl: bcm2835: Pass irqchip when adding gpiochip") Signed-off-by: Phil Elwell Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20211206092237.4105895-2-phil@raspberrypi.com Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-bcm2835.c | 29 +++++++++++++++------------ 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index 2abcc6ce4eba..b607d10e4cbd 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -1244,6 +1244,18 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) raw_spin_lock_init(&pc->irq_lock[i]); } + pc->pctl_desc = *pdata->pctl_desc; + pc->pctl_dev = devm_pinctrl_register(dev, &pc->pctl_desc, pc); + if (IS_ERR(pc->pctl_dev)) { + gpiochip_remove(&pc->gpio_chip); + return PTR_ERR(pc->pctl_dev); + } + + pc->gpio_range = *pdata->gpio_range; + pc->gpio_range.base = pc->gpio_chip.base; + pc->gpio_range.gc = &pc->gpio_chip; + pinctrl_add_gpio_range(pc->pctl_dev, &pc->gpio_range); + girq = &pc->gpio_chip.irq; girq->chip = &bcm2835_gpio_irq_chip; girq->parent_handler = bcm2835_gpio_irq_handler; @@ -1251,8 +1263,10 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) girq->parents = devm_kcalloc(dev, BCM2835_NUM_IRQS, sizeof(*girq->parents), GFP_KERNEL); - if (!girq->parents) + if (!girq->parents) { + pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); return -ENOMEM; + } if (is_7211) { pc->wake_irq = devm_kcalloc(dev, BCM2835_NUM_IRQS, @@ -1307,21 +1321,10 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) err = gpiochip_add_data(&pc->gpio_chip, pc); if (err) { dev_err(dev, "could not add GPIO chip\n"); + pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); return err; } - pc->pctl_desc = *pdata->pctl_desc; - pc->pctl_dev = devm_pinctrl_register(dev, &pc->pctl_desc, pc); - if (IS_ERR(pc->pctl_dev)) { - gpiochip_remove(&pc->gpio_chip); - return PTR_ERR(pc->pctl_dev); - } - - pc->gpio_range = *pdata->gpio_range; - pc->gpio_range.base = pc->gpio_chip.base; - pc->gpio_range.gc = &pc->gpio_chip; - pinctrl_add_gpio_range(pc->pctl_dev, &pc->gpio_range); - return 0; } From 3fd6e12a401ead0345e4b7e6a73e117f0713e0c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Thu, 9 Dec 2021 21:18:13 -0800 Subject: [PATCH 027/251] Input: goodix - fix memory leak in goodix_firmware_upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses-Coverity-ID: 1493934 ("Resource leak") Signed-off-by: José Expósito Link: https://lore.kernel.org/r/20211208173321.26659-1-jose.exposito89@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix_fwupload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/goodix_fwupload.c b/drivers/input/touchscreen/goodix_fwupload.c index c1e7a2413078..191d4f38d991 100644 --- a/drivers/input/touchscreen/goodix_fwupload.c +++ b/drivers/input/touchscreen/goodix_fwupload.c @@ -207,7 +207,7 @@ static int goodix_firmware_upload(struct goodix_ts_data *ts) error = goodix_reset_no_int_sync(ts); if (error) - return error; + goto release; error = goodix_enter_upload_mode(ts->client); if (error) From fce15c45d3fbd9fc1feaaf3210d8e3f8b33dfd3a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 6 Nov 2021 10:02:44 -0700 Subject: [PATCH 028/251] hwmon: (lm90) Fix usage of CONFIG2 register in detect function The detect function had a comment "Make compiler happy" when id did not read the second configuration register. As it turns out, the code was checking the contents of this register for manufacturer ID 0xA1 (NXP Semiconductor/Philips), but never actually read the register. So it wasn't surprising that the compiler complained, and it indeed had a point. Fix the code to read the register contents for manufacturer ID 0xa1. At the same time, the code was reading the register for manufacturer ID 0x41 (Analog Devices), but it was not using the results. In effect it was just checking if reading the register returned an error. That doesn't really add much if any value, so stop doing that. Fixes: f90be42fb383 ("hwmon: (lm90) Refactor reading of config2 register") Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 618052c6cdb6..b05d73c4fbe2 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -1465,12 +1465,11 @@ static int lm90_detect(struct i2c_client *client, if (man_id < 0 || chip_id < 0 || config1 < 0 || convrate < 0) return -ENODEV; - if (man_id == 0x01 || man_id == 0x5C || man_id == 0x41) { + if (man_id == 0x01 || man_id == 0x5C || man_id == 0xA1) { config2 = i2c_smbus_read_byte_data(client, LM90_REG_R_CONFIG2); if (config2 < 0) return -ENODEV; - } else - config2 = 0; /* Make compiler happy */ + } if ((address == 0x4C || address == 0x4D) && man_id == 0x01) { /* National Semiconductor */ From 55840b9eae5367b5d5b29619dc2fb7e4596dba46 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 17 Nov 2021 09:51:47 -0800 Subject: [PATCH 029/251] hwmon: (lm90) Prevent integer overflow/underflow in hysteresis calculations Commit b50aa49638c7 ("hwmon: (lm90) Prevent integer underflows of temperature calculations") addressed a number of underflow situations when writing temperature limits. However, it missed one situation, seen when an attempt is made to set the hysteresis value to MAX_LONG and the critical temperature limit is negative. Use clamp_val() when setting the hysteresis temperature to ensure that the provided value can never overflow or underflow. Fixes: b50aa49638c7 ("hwmon: (lm90) Prevent integer underflows of temperature calculations") Cc: Dmitry Osipenko Reviewed-by: Dmitry Osipenko Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index b05d73c4fbe2..72969ea83d82 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -1160,8 +1160,8 @@ static int lm90_set_temphyst(struct lm90_data *data, long val) else temp = temp_from_s8(data->temp8[LOCAL_CRIT]); - /* prevent integer underflow */ - val = max(val, -128000l); + /* prevent integer overflow/underflow */ + val = clamp_val(val, -128000l, 255000l); data->temp_hyst = hyst_to_reg(temp - val); err = i2c_smbus_write_byte_data(client, LM90_REG_W_TCRIT_HYST, From 16ba51b5dcd3f6dde2e51d5ccc86313119dcf889 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 13 Nov 2021 08:55:06 -0800 Subject: [PATCH 030/251] hwmon: (lm90) Drop critical attribute support for MAX6654 Tests with a real chip and a closer look into the datasheet show that MAX6654 does not support CRIT/THERM/OVERTEMP limits, so drop support of the respective attributes for this chip. Introduce LM90_HAVE_CRIT flag and use it to instantiate critical limit attributes to solve the problem. Cc: Josh Lehan Fixes: 229d495d8189 ("hwmon: (lm90) Add max6654 support to lm90 driver") Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 86 +++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 72969ea83d82..6597d055e09d 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -35,13 +35,14 @@ * explicitly as max6659, or if its address is not 0x4c. * These chips lack the remote temperature offset feature. * - * This driver also supports the MAX6654 chip made by Maxim. This chip can - * be at 9 different addresses, similar to MAX6680/MAX6681. The MAX6654 is - * otherwise similar to MAX6657/MAX6658/MAX6659. Extended range is available - * by setting the configuration register accordingly, and is done during - * initialization. Extended precision is only available at conversion rates - * of 1 Hz and slower. Note that extended precision is not enabled by - * default, as this driver initializes all chips to 2 Hz by design. + * This driver also supports the MAX6654 chip made by Maxim. This chip can be + * at 9 different addresses, similar to MAX6680/MAX6681. The MAX6654 is similar + * to MAX6657/MAX6658/MAX6659, but does not support critical temperature + * limits. Extended range is available by setting the configuration register + * accordingly, and is done during initialization. Extended precision is only + * available at conversion rates of 1 Hz and slower. Note that extended + * precision is not enabled by default, as this driver initializes all chips + * to 2 Hz by design. * * This driver also supports the MAX6646, MAX6647, MAX6648, MAX6649 and * MAX6692 chips made by Maxim. These are again similar to the LM86, @@ -188,6 +189,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_HAVE_BROKEN_ALERT (1 << 7) /* Broken alert */ #define LM90_HAVE_EXTENDED_TEMP (1 << 8) /* extended temperature support*/ #define LM90_PAUSE_FOR_CONFIG (1 << 9) /* Pause conversion for config */ +#define LM90_HAVE_CRIT (1 << 10)/* Chip supports CRIT/OVERT register */ /* LM90 status */ #define LM90_STATUS_LTHRM (1 << 0) /* local THERM limit tripped */ @@ -354,38 +356,43 @@ struct lm90_params { static const struct lm90_params lm90_params[] = { [adm1032] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 10, }, [adt7461] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP + | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 10, }, [g781] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, }, [lm86] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [lm90] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [lm99] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [max6646] = { + .flags = LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 6, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, @@ -396,50 +403,50 @@ static const struct lm90_params lm90_params[] = { .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6657] = { - .flags = LM90_PAUSE_FOR_CONFIG, + .flags = LM90_PAUSE_FOR_CONFIG | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6659] = { - .flags = LM90_HAVE_EMERGENCY, + .flags = LM90_HAVE_EMERGENCY | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6680] = { - .flags = LM90_HAVE_OFFSET, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 7, }, [max6696] = { .flags = LM90_HAVE_EMERGENCY - | LM90_HAVE_EMERGENCY_ALARM | LM90_HAVE_TEMP3, + | LM90_HAVE_EMERGENCY_ALARM | LM90_HAVE_TEMP3 | LM90_HAVE_CRIT, .alert_alarms = 0x1c7c, .max_convrate = 6, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [w83l771] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, }, [sa56004] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, .reg_local_ext = SA56004_REG_R_LOCAL_TEMPL, }, [tmp451] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 9, .reg_local_ext = TMP451_REG_R_LOCAL_TEMPL, }, [tmp461] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 9, .reg_local_ext = TMP451_REG_R_LOCAL_TEMPL, @@ -668,20 +675,22 @@ static int lm90_update_limits(struct device *dev) struct i2c_client *client = data->client; int val; - val = lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT); - if (val < 0) - return val; - data->temp8[LOCAL_CRIT] = val; + if (data->flags & LM90_HAVE_CRIT) { + val = lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT); + if (val < 0) + return val; + data->temp8[LOCAL_CRIT] = val; - val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT); - if (val < 0) - return val; - data->temp8[REMOTE_CRIT] = val; + val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT); + if (val < 0) + return val; + data->temp8[REMOTE_CRIT] = val; - val = lm90_read_reg(client, LM90_REG_R_TCRIT_HYST); - if (val < 0) - return val; - data->temp_hyst = val; + val = lm90_read_reg(client, LM90_REG_R_TCRIT_HYST); + if (val < 0) + return val; + data->temp_hyst = val; + } val = lm90_read_reg(client, LM90_REG_R_REMOTE_LOWH); if (val < 0) @@ -1902,11 +1911,14 @@ static int lm90_probe(struct i2c_client *client) info->config = data->channel_config; data->channel_config[0] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | - HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM | - HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM; + HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM; data->channel_config[1] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | - HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM | - HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT; + HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM | HWMON_T_FAULT; + + if (data->flags & LM90_HAVE_CRIT) { + data->channel_config[0] |= HWMON_T_CRIT | HWMON_T_CRIT_ALARM | HWMON_T_CRIT_HYST; + data->channel_config[1] |= HWMON_T_CRIT | HWMON_T_CRIT_ALARM | HWMON_T_CRIT_HYST; + } if (data->flags & LM90_HAVE_OFFSET) data->channel_config[1] |= HWMON_T_OFFSET; From da7dc0568491104c7acb632e9d41ddce9aaabbb1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 26 Nov 2021 22:43:39 -0800 Subject: [PATCH 031/251] hwmom: (lm90) Fix citical alarm status for MAX6680/MAX6681 Tests with a real chip and a closer look into the datasheet reveals that the local and remote critical alarm status bits are swapped for MAX6680/MAX6681. Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 6597d055e09d..dd8612a9d536 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -190,6 +190,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_HAVE_EXTENDED_TEMP (1 << 8) /* extended temperature support*/ #define LM90_PAUSE_FOR_CONFIG (1 << 9) /* Pause conversion for config */ #define LM90_HAVE_CRIT (1 << 10)/* Chip supports CRIT/OVERT register */ +#define LM90_HAVE_CRIT_ALRM_SWP (1 << 11)/* critical alarm bits swapped */ /* LM90 status */ #define LM90_STATUS_LTHRM (1 << 0) /* local THERM limit tripped */ @@ -415,7 +416,8 @@ static const struct lm90_params lm90_params[] = { .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6680] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT + | LM90_HAVE_CRIT_ALRM_SWP, .alert_alarms = 0x7c, .max_convrate = 7, }, @@ -1201,6 +1203,7 @@ static const u8 lm90_temp_emerg_index[3] = { static const u8 lm90_min_alarm_bits[3] = { 5, 3, 11 }; static const u8 lm90_max_alarm_bits[3] = { 6, 4, 12 }; static const u8 lm90_crit_alarm_bits[3] = { 0, 1, 9 }; +static const u8 lm90_crit_alarm_bits_swapped[3] = { 1, 0, 9 }; static const u8 lm90_emergency_alarm_bits[3] = { 15, 13, 14 }; static const u8 lm90_fault_bits[3] = { 0, 2, 10 }; @@ -1226,7 +1229,10 @@ static int lm90_temp_read(struct device *dev, u32 attr, int channel, long *val) *val = (data->alarms >> lm90_max_alarm_bits[channel]) & 1; break; case hwmon_temp_crit_alarm: - *val = (data->alarms >> lm90_crit_alarm_bits[channel]) & 1; + if (data->flags & LM90_HAVE_CRIT_ALRM_SWP) + *val = (data->alarms >> lm90_crit_alarm_bits_swapped[channel]) & 1; + else + *val = (data->alarms >> lm90_crit_alarm_bits[channel]) & 1; break; case hwmon_temp_emergency_alarm: *val = (data->alarms >> lm90_emergency_alarm_bits[channel]) & 1; From cdc5287acad9ede121924a9c9313544b80d15842 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 3 Dec 2021 13:42:22 -0800 Subject: [PATCH 032/251] hwmon: (lm90) Do not report 'busy' status bit as alarm Bit 7 of the status register indicates that the chip is busy doing a conversion. It does not indicate an alarm status. Stop reporting it as alarm status bit. Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index dd8612a9d536..74019dff2550 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -200,6 +200,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_STATUS_RHIGH (1 << 4) /* remote high temp limit tripped */ #define LM90_STATUS_LLOW (1 << 5) /* local low temp limit tripped */ #define LM90_STATUS_LHIGH (1 << 6) /* local high temp limit tripped */ +#define LM90_STATUS_BUSY (1 << 7) /* conversion is ongoing */ #define MAX6696_STATUS2_R2THRM (1 << 1) /* remote2 THERM limit tripped */ #define MAX6696_STATUS2_R2OPEN (1 << 2) /* remote2 is an open circuit */ @@ -820,7 +821,7 @@ static int lm90_update_device(struct device *dev) val = lm90_read_reg(client, LM90_REG_R_STATUS); if (val < 0) return val; - data->alarms = val; /* lower 8 bit of alarms */ + data->alarms = val & ~LM90_STATUS_BUSY; if (data->kind == max6696) { val = lm90_select_remote_channel(data, 1); From 12f247ab590a08856441efdbd351cf2cc8f60a2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Sun, 12 Dec 2021 21:01:49 -0800 Subject: [PATCH 033/251] Input: atmel_mxt_ts - fix double free in mxt_read_info_block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "id_buf" buffer is stored in "data->raw_info_block" and freed by "mxt_free_object_table" in case of error. Return instead of jumping to avoid a double free. Addresses-Coverity-ID: 1474582 ("Double free") Fixes: 068bdb67ef74 ("Input: atmel_mxt_ts - fix the firmware update") Signed-off-by: José Expósito Link: https://lore.kernel.org/r/20211212194257.68879-1-jose.exposito89@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 05de92c0293b..eb66cd2689b7 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -1882,7 +1882,7 @@ static int mxt_read_info_block(struct mxt_data *data) if (error) { dev_err(&client->dev, "Error %d parsing object table\n", error); mxt_free_object_table(data); - goto err_free_mem; + return error; } data->object_table = (struct mxt_object *)(id_buf + MXT_OBJECT_START); From 80936d68665be88dc3bf60884a71f2694eb6b1f1 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Thu, 9 Dec 2021 23:39:56 +0530 Subject: [PATCH 034/251] dmaengine: ti: k3-udma: Fix smatch warnings Smatch reports below warnings [1] wrt dereferencing rm_res when it can potentially be ERR_PTR(). This is possible when entire range is allocated to Linux Fix this case by making sure, there is no deference of rm_res when its ERR_PTR(). [1]: drivers/dma/ti/k3-udma.c:4524 udma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4537 udma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4681 bcdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4696 bcdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4711 bcdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4848 pktdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4861 pktdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() Reported-by: Nishanth Menon Signed-off-by: Vignesh Raghavendra Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20211209180957.29036-1-vigneshr@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 153 +++++++++++++++++++++++++++------------ 1 file changed, 105 insertions(+), 48 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 041d8e32d630..6e56d1cef5ee 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4534,45 +4534,60 @@ static int udma_setup_resources(struct udma_dev *ud) rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->tchan_map, ud->tchan_cnt); + irq_res.sets = 1; } else { bitmap_fill(ud->tchan_map, ud->tchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->tchan_map, &rm_res->desc[i], "tchan"); + irq_res.sets = rm_res->sets; } - irq_res.sets = rm_res->sets; /* rchan and matching default flow ranges */ rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->rchan_map, ud->rchan_cnt); + irq_res.sets++; } else { bitmap_fill(ud->rchan_map, ud->rchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->rchan_map, &rm_res->desc[i], "rchan"); + irq_res.sets += rm_res->sets; } - irq_res.sets += rm_res->sets; irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL); + if (!irq_res.desc) + return -ENOMEM; rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; - for (i = 0; i < rm_res->sets; i++) { - irq_res.desc[i].start = rm_res->desc[i].start; - irq_res.desc[i].num = rm_res->desc[i].num; - irq_res.desc[i].start_sec = rm_res->desc[i].start_sec; - irq_res.desc[i].num_sec = rm_res->desc[i].num_sec; + if (IS_ERR(rm_res)) { + irq_res.desc[0].start = 0; + irq_res.desc[0].num = ud->tchan_cnt; + i = 1; + } else { + for (i = 0; i < rm_res->sets; i++) { + irq_res.desc[i].start = rm_res->desc[i].start; + irq_res.desc[i].num = rm_res->desc[i].num; + irq_res.desc[i].start_sec = rm_res->desc[i].start_sec; + irq_res.desc[i].num_sec = rm_res->desc[i].num_sec; + } } rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; - for (j = 0; j < rm_res->sets; j++, i++) { - if (rm_res->desc[j].num) { - irq_res.desc[i].start = rm_res->desc[j].start + - ud->soc_data->oes.udma_rchan; - irq_res.desc[i].num = rm_res->desc[j].num; - } - if (rm_res->desc[j].num_sec) { - irq_res.desc[i].start_sec = rm_res->desc[j].start_sec + - ud->soc_data->oes.udma_rchan; - irq_res.desc[i].num_sec = rm_res->desc[j].num_sec; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = 0; + irq_res.desc[i].num = ud->rchan_cnt; + } else { + for (j = 0; j < rm_res->sets; j++, i++) { + if (rm_res->desc[j].num) { + irq_res.desc[i].start = rm_res->desc[j].start + + ud->soc_data->oes.udma_rchan; + irq_res.desc[i].num = rm_res->desc[j].num; + } + if (rm_res->desc[j].num_sec) { + irq_res.desc[i].start_sec = rm_res->desc[j].start_sec + + ud->soc_data->oes.udma_rchan; + irq_res.desc[i].num_sec = rm_res->desc[j].num_sec; + } } } ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res); @@ -4690,14 +4705,15 @@ static int bcdma_setup_resources(struct udma_dev *ud) rm_res = tisci_rm->rm_ranges[RM_RANGE_BCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->bchan_map, ud->bchan_cnt); + irq_res.sets++; } else { bitmap_fill(ud->bchan_map, ud->bchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->bchan_map, &rm_res->desc[i], "bchan"); + irq_res.sets += rm_res->sets; } - irq_res.sets += rm_res->sets; } /* tchan ranges */ @@ -4705,14 +4721,15 @@ static int bcdma_setup_resources(struct udma_dev *ud) rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->tchan_map, ud->tchan_cnt); + irq_res.sets += 2; } else { bitmap_fill(ud->tchan_map, ud->tchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->tchan_map, &rm_res->desc[i], "tchan"); + irq_res.sets += rm_res->sets * 2; } - irq_res.sets += rm_res->sets * 2; } /* rchan ranges */ @@ -4720,47 +4737,72 @@ static int bcdma_setup_resources(struct udma_dev *ud) rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->rchan_map, ud->rchan_cnt); + irq_res.sets += 2; } else { bitmap_fill(ud->rchan_map, ud->rchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->rchan_map, &rm_res->desc[i], "rchan"); + irq_res.sets += rm_res->sets * 2; } - irq_res.sets += rm_res->sets * 2; } irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL); + if (!irq_res.desc) + return -ENOMEM; if (ud->bchan_cnt) { rm_res = tisci_rm->rm_ranges[RM_RANGE_BCHAN]; - for (i = 0; i < rm_res->sets; i++) { - irq_res.desc[i].start = rm_res->desc[i].start + - oes->bcdma_bchan_ring; - irq_res.desc[i].num = rm_res->desc[i].num; + if (IS_ERR(rm_res)) { + irq_res.desc[0].start = oes->bcdma_bchan_ring; + irq_res.desc[0].num = ud->bchan_cnt; + i = 1; + } else { + for (i = 0; i < rm_res->sets; i++) { + irq_res.desc[i].start = rm_res->desc[i].start + + oes->bcdma_bchan_ring; + irq_res.desc[i].num = rm_res->desc[i].num; + } } } if (ud->tchan_cnt) { rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; - for (j = 0; j < rm_res->sets; j++, i += 2) { - irq_res.desc[i].start = rm_res->desc[j].start + - oes->bcdma_tchan_data; - irq_res.desc[i].num = rm_res->desc[j].num; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = oes->bcdma_tchan_data; + irq_res.desc[i].num = ud->tchan_cnt; + irq_res.desc[i + 1].start = oes->bcdma_tchan_ring; + irq_res.desc[i + 1].num = ud->tchan_cnt; + i += 2; + } else { + for (j = 0; j < rm_res->sets; j++, i += 2) { + irq_res.desc[i].start = rm_res->desc[j].start + + oes->bcdma_tchan_data; + irq_res.desc[i].num = rm_res->desc[j].num; - irq_res.desc[i + 1].start = rm_res->desc[j].start + - oes->bcdma_tchan_ring; - irq_res.desc[i + 1].num = rm_res->desc[j].num; + irq_res.desc[i + 1].start = rm_res->desc[j].start + + oes->bcdma_tchan_ring; + irq_res.desc[i + 1].num = rm_res->desc[j].num; + } } } if (ud->rchan_cnt) { rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; - for (j = 0; j < rm_res->sets; j++, i += 2) { - irq_res.desc[i].start = rm_res->desc[j].start + - oes->bcdma_rchan_data; - irq_res.desc[i].num = rm_res->desc[j].num; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = oes->bcdma_rchan_data; + irq_res.desc[i].num = ud->rchan_cnt; + irq_res.desc[i + 1].start = oes->bcdma_rchan_ring; + irq_res.desc[i + 1].num = ud->rchan_cnt; + i += 2; + } else { + for (j = 0; j < rm_res->sets; j++, i += 2) { + irq_res.desc[i].start = rm_res->desc[j].start + + oes->bcdma_rchan_data; + irq_res.desc[i].num = rm_res->desc[j].num; - irq_res.desc[i + 1].start = rm_res->desc[j].start + - oes->bcdma_rchan_ring; - irq_res.desc[i + 1].num = rm_res->desc[j].num; + irq_res.desc[i + 1].start = rm_res->desc[j].start + + oes->bcdma_rchan_ring; + irq_res.desc[i + 1].num = rm_res->desc[j].num; + } } } @@ -4858,39 +4900,54 @@ static int pktdma_setup_resources(struct udma_dev *ud) if (IS_ERR(rm_res)) { /* all rflows are assigned exclusively to Linux */ bitmap_zero(ud->rflow_in_use, ud->rflow_cnt); + irq_res.sets = 1; } else { bitmap_fill(ud->rflow_in_use, ud->rflow_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->rflow_in_use, &rm_res->desc[i], "rflow"); + irq_res.sets = rm_res->sets; } - irq_res.sets = rm_res->sets; /* tflow ranges */ rm_res = tisci_rm->rm_ranges[RM_RANGE_TFLOW]; if (IS_ERR(rm_res)) { /* all tflows are assigned exclusively to Linux */ bitmap_zero(ud->tflow_map, ud->tflow_cnt); + irq_res.sets++; } else { bitmap_fill(ud->tflow_map, ud->tflow_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->tflow_map, &rm_res->desc[i], "tflow"); + irq_res.sets += rm_res->sets; } - irq_res.sets += rm_res->sets; irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL); + if (!irq_res.desc) + return -ENOMEM; rm_res = tisci_rm->rm_ranges[RM_RANGE_TFLOW]; - for (i = 0; i < rm_res->sets; i++) { - irq_res.desc[i].start = rm_res->desc[i].start + - oes->pktdma_tchan_flow; - irq_res.desc[i].num = rm_res->desc[i].num; + if (IS_ERR(rm_res)) { + irq_res.desc[0].start = oes->pktdma_tchan_flow; + irq_res.desc[0].num = ud->tflow_cnt; + i = 1; + } else { + for (i = 0; i < rm_res->sets; i++) { + irq_res.desc[i].start = rm_res->desc[i].start + + oes->pktdma_tchan_flow; + irq_res.desc[i].num = rm_res->desc[i].num; + } } rm_res = tisci_rm->rm_ranges[RM_RANGE_RFLOW]; - for (j = 0; j < rm_res->sets; j++, i++) { - irq_res.desc[i].start = rm_res->desc[j].start + - oes->pktdma_rchan_flow; - irq_res.desc[i].num = rm_res->desc[j].num; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = oes->pktdma_rchan_flow; + irq_res.desc[i].num = ud->rflow_cnt; + } else { + for (j = 0; j < rm_res->sets; j++, i++) { + irq_res.desc[i].start = rm_res->desc[j].start + + oes->pktdma_rchan_flow; + irq_res.desc[i].num = rm_res->desc[j].num; + } } ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res); kfree(irq_res.desc); From 8affd8a4b5ce356c8900cfb037674f3a4a11fbdb Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 8 Dec 2021 10:01:27 -0700 Subject: [PATCH 035/251] dmaengine: idxd: fix missed completion on abort path Ming reported that with the abort path of the descriptor submission, there can be a window where a completed descriptor can be missed to be completed by the irq completion thread: CPU A CPU B Submit (successful) Submit (fail) irq_process_work_list() // empty llist_abort_desc() // remove all descs from pending list irq_process_pending_llist() // empty exit idxd_wq_thread() with no processing Add opportunistic descriptor completion in the abort path in order to remove the missed completion. Fixes: 6b4b87f2c31a ("dmaengine: idxd: fix submission race window") Reported-by: Ming Li Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163898288714.443911.16084982766671976640.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/submit.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index de76fb4abac2..83452fbbb168 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -106,6 +106,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, { struct idxd_desc *d, *t, *found = NULL; struct llist_node *head; + LIST_HEAD(flist); desc->completion->status = IDXD_COMP_DESC_ABORT; /* @@ -120,7 +121,11 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, found = desc; continue; } - list_add_tail(&desc->list, &ie->work_list); + + if (d->completion->status) + list_add_tail(&d->list, &flist); + else + list_add_tail(&d->list, &ie->work_list); } } @@ -130,6 +135,17 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, if (found) complete_desc(found, IDXD_COMPLETE_ABORT); + + /* + * complete_desc() will return desc to allocator and the desc can be + * acquired by a different process and the desc->list can be modified. + * Delete desc from list so the list trasversing does not get corrupted + * by the other process. + */ + list_for_each_entry_safe(d, t, &flist, list) { + list_del_init(&d->list); + complete_desc(d, IDXD_COMPLETE_NORMAL); + } } int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) From 822c9f2b833c53fc67e8adf6f63ecc3ea24d502c Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Thu, 25 Nov 2021 15:44:38 +0000 Subject: [PATCH 036/251] dmaengine: st_fdma: fix MODULE_ALIAS modprobe can't handle spaces in aliases. Fixes: 6b4cd727eaf1 ("dmaengine: st_fdma: Add STMicroelectronics FDMA engine driver support") Signed-off-by: Alyssa Ross Link: https://lore.kernel.org/r/20211125154441.2626214-1-hi@alyssa.is Signed-off-by: Vinod Koul --- drivers/dma/st_fdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/st_fdma.c b/drivers/dma/st_fdma.c index 962b6e05287b..d95c421877fb 100644 --- a/drivers/dma/st_fdma.c +++ b/drivers/dma/st_fdma.c @@ -874,4 +874,4 @@ MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("STMicroelectronics FDMA engine driver"); MODULE_AUTHOR("Ludovic.barre "); MODULE_AUTHOR("Peter Griffin "); -MODULE_ALIAS("platform: " DRIVER_NAME); +MODULE_ALIAS("platform:" DRIVER_NAME); From 2dee54b289fbc810669a1b2b8a0887fa1c9a14d7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 12 Dec 2021 17:20:25 +0000 Subject: [PATCH 037/251] ALSA: drivers: opl3: Fix incorrect use of vp->state Static analysis with scan-build has found an assignment to vp2 that is never used. It seems that the check on vp->state > 0 should be actually on vp2->state instead. Fix this. This dates back to 2002, I found the offending commit from the git history git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git, commit 91e39521bbf6 ("[PATCH] ALSA patch for 2.5.4") Signed-off-by: Colin Ian King Cc: Link: https://lore.kernel.org/r/20211212172025.470367-1-colin.i.king@gmail.com Signed-off-by: Takashi Iwai --- sound/drivers/opl3/opl3_midi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/drivers/opl3/opl3_midi.c b/sound/drivers/opl3/opl3_midi.c index e1b69c65c3c8..e2b7be67f0e3 100644 --- a/sound/drivers/opl3/opl3_midi.c +++ b/sound/drivers/opl3/opl3_midi.c @@ -397,7 +397,7 @@ void snd_opl3_note_on(void *p, int note, int vel, struct snd_midi_channel *chan) } if (instr_4op) { vp2 = &opl3->voices[voice + 3]; - if (vp->state > 0) { + if (vp2->state > 0) { opl3_reg = reg_side | (OPL3_REG_KEYON_BLOCK + voice_offset + 3); reg_val = vp->keyon_reg & ~OPL3_KEYON_BIT; From c01c1db1dc632edafb0dff32d40daf4f9c1a4e19 Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Mon, 13 Dec 2021 15:39:31 +0800 Subject: [PATCH 038/251] ALSA: jack: Check the return value of kstrdup() kstrdup() can return NULL, it is better to check the return value of it. Signed-off-by: Xiaoke Wang Cc: Link: https://lore.kernel.org/r/tencent_094816F3522E0DC704056C789352EBBF0606@qq.com Signed-off-by: Takashi Iwai --- sound/core/jack.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/jack.c b/sound/core/jack.c index 32350c6aba84..537df1e98f8a 100644 --- a/sound/core/jack.c +++ b/sound/core/jack.c @@ -509,6 +509,10 @@ int snd_jack_new(struct snd_card *card, const char *id, int type, return -ENOMEM; jack->id = kstrdup(id, GFP_KERNEL); + if (jack->id == NULL) { + kfree(jack); + return -ENOMEM; + } /* don't creat input device for phantom jack */ if (!phantom_jack) { From 5cf06065bd1f7b94fbb80e7eeb033899f77ab5ba Mon Sep 17 00:00:00 2001 From: Alejandro Concepcion-Rodriguez Date: Sun, 12 Dec 2021 16:06:02 +0000 Subject: [PATCH 039/251] drm: simpledrm: fix wrong unit with pixel clock Pixel clock has to be set in kHz. Signed-off-by: Alejandro Concepcion-Rodriguez Fixes: 11e8f5fd223b ("drm: Add simpledrm driver") Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/6f8554ef-1305-0dda-821c-f7d2e5644a48@acoro.eu --- drivers/gpu/drm/tiny/simpledrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index 481b48bde047..5a6e89825bc2 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -458,7 +458,7 @@ static struct drm_display_mode simpledrm_mode(unsigned int width, { struct drm_display_mode mode = { SIMPLEDRM_MODE(width, height) }; - mode.clock = 60 /* Hz */ * mode.hdisplay * mode.vdisplay; + mode.clock = mode.hdisplay * mode.vdisplay * 60 / 1000 /* kHz */; drm_mode_set_name(&mode); return mode; From 3b8e19a0aa3933a785be9f1541afd8d398c4ec69 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 28 Oct 2021 09:43:11 +0200 Subject: [PATCH 040/251] drm/mediatek: hdmi: Perform NULL pointer check for mtk_hdmi_conf In commit 41ca9caaae0b ("drm/mediatek: hdmi: Add check for CEA modes only") a check for CEA modes was added to function mtk_hdmi_bridge_mode_valid() in order to address possible issues on MT8167; moreover, with commit c91026a938c2 ("drm/mediatek: hdmi: Add optional limit on maximal HDMI mode clock") another similar check was introduced. Unfortunately though, at the time of writing, MT8173 does not provide any mtk_hdmi_conf structure and this is crashing the kernel with NULL pointer upon entering mtk_hdmi_bridge_mode_valid(), which happens as soon as a HDMI cable gets plugged in. To fix this regression, add a NULL pointer check for hdmi->conf in the said function, restoring HDMI functionality and avoiding NULL pointer kernel panics. Fixes: 41ca9caaae0b ("drm/mediatek: hdmi: Add check for CEA modes only") Fixes: c91026a938c2 ("drm/mediatek: hdmi: Add optional limit on maximal HDMI mode clock") Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 5838c44cbf6f..3196189429bc 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1224,12 +1224,14 @@ static int mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge, return MODE_BAD; } - if (hdmi->conf->cea_modes_only && !drm_match_cea_mode(mode)) - return MODE_BAD; + if (hdmi->conf) { + if (hdmi->conf->cea_modes_only && !drm_match_cea_mode(mode)) + return MODE_BAD; - if (hdmi->conf->max_mode_clock && - mode->clock > hdmi->conf->max_mode_clock) - return MODE_CLOCK_HIGH; + if (hdmi->conf->max_mode_clock && + mode->clock > hdmi->conf->max_mode_clock) + return MODE_CLOCK_HIGH; + } if (mode->clock < 27000) return MODE_CLOCK_LOW; From 4bc5e64e6cf37007e436970024e5998ee0935651 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 26 Nov 2021 01:13:32 +0100 Subject: [PATCH 041/251] efi: Move efifb_setup_from_dmi() prototype from arch headers Commit 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") made the Generic System Framebuffers (sysfb) driver able to be built on non-x86 architectures. But it left the efifb_setup_from_dmi() function prototype declaration in the architecture specific headers. This could lead to the following compiler warning as reported by the kernel test robot: drivers/firmware/efi/sysfb_efi.c:70:6: warning: no previous prototype for function 'efifb_setup_from_dmi' [-Wmissing-prototypes] void efifb_setup_from_dmi(struct screen_info *si, const char *opt) ^ drivers/firmware/efi/sysfb_efi.c:70:1: note: declare 'static' if the function is not intended to be used outside of this translation unit void efifb_setup_from_dmi(struct screen_info *si, const char *opt) Fixes: 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") Reported-by: kernel test robot Cc: # 5.15.x Signed-off-by: Javier Martinez Canillas Acked-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20211126001333.555514-1-javierm@redhat.com Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/efi.h | 1 - arch/arm64/include/asm/efi.h | 1 - arch/riscv/include/asm/efi.h | 1 - arch/x86/include/asm/efi.h | 2 -- include/linux/efi.h | 6 ++++++ 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index a6f3b179e8a9..27218eabbf9a 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -17,7 +17,6 @@ #ifdef CONFIG_EFI void efi_init(void); -extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index d3e1825337be..ad55079abe47 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,7 +14,6 @@ #ifdef CONFIG_EFI extern void efi_init(void); -extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); #else #define efi_init() #endif diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index 49b398fe99f1..cc4f6787f937 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -13,7 +13,6 @@ #ifdef CONFIG_EFI extern void efi_init(void); -extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); #else #define efi_init() #endif diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 4d0b126835b8..63158fd55856 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -197,8 +197,6 @@ static inline bool efi_runtime_supported(void) extern void parse_efi_setup(u64 phys_addr, u32 data_len); -extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); - extern void efi_thunk_runtime_setup(void); efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, unsigned long descriptor_size, diff --git a/include/linux/efi.h b/include/linux/efi.h index dbd39b20e034..ef8dbc0a1522 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1283,4 +1283,10 @@ static inline struct efi_mokvar_table_entry *efi_mokvar_entry_find( } #endif +#ifdef CONFIG_SYSFB +extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); +#else +static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { } +#endif + #endif /* _LINUX_EFI_H */ From 890d5b40908bfd1a79be018d2d297cf9df60f4ee Mon Sep 17 00:00:00 2001 From: Marian Postevca Date: Sat, 4 Dec 2021 23:49:12 +0200 Subject: [PATCH 042/251] usb: gadget: u_ether: fix race in setting MAC address in setup phase When listening for notifications through netlink of a new interface being registered, sporadically, it is possible for the MAC to be read as zero. The zero MAC address lasts a short period of time and then switches to a valid random MAC address. This causes problems for netd in Android, which assumes that the interface is malfunctioning and will not use it. In the good case we get this log: InterfaceController::getCfg() ifName usb0 hwAddr 92:a8:f0:73:79:5b ipv4Addr 0.0.0.0 flags 0x1002 In the error case we get these logs: InterfaceController::getCfg() ifName usb0 hwAddr 00:00:00:00:00:00 ipv4Addr 0.0.0.0 flags 0x1002 netd : interfaceGetCfg("usb0") netd : interfaceSetCfg() -> ServiceSpecificException (99, "[Cannot assign requested address] : ioctl() failed") The reason for the issue is the order in which the interface is setup, it is first registered through register_netdev() and after the MAC address is set. Fixed by first setting the MAC address of the net_device and after that calling register_netdev(). Fixes: bcd4a1c40bee885e ("usb: gadget: u_ether: construct with default values and add setters/getters") Cc: stable@vger.kernel.org Signed-off-by: Marian Postevca Link: https://lore.kernel.org/r/20211204214912.17627-1-posteuca@mutex.one Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_ether.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index e0ad5aed6ac9..6f5d45ef2e39 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "u_ether.h" @@ -863,19 +864,23 @@ int gether_register_netdev(struct net_device *net) { struct eth_dev *dev; struct usb_gadget *g; - struct sockaddr sa; int status; if (!net->dev.parent) return -EINVAL; dev = netdev_priv(net); g = dev->gadget; + + net->addr_assign_type = NET_ADDR_RANDOM; + eth_hw_addr_set(net, dev->dev_mac); + status = register_netdev(net); if (status < 0) { dev_dbg(&g->dev, "register_netdev failed, %d\n", status); return status; } else { INFO(dev, "HOST MAC %pM\n", dev->host_mac); + INFO(dev, "MAC %pM\n", dev->dev_mac); /* two kinds of host-initiated state changes: * - iff DATA transfer is active, carrier is "on" @@ -883,15 +888,6 @@ int gether_register_netdev(struct net_device *net) */ netif_carrier_off(net); } - sa.sa_family = net->type; - memcpy(sa.sa_data, dev->dev_mac, ETH_ALEN); - rtnl_lock(); - status = dev_set_mac_address(net, &sa, NULL); - rtnl_unlock(); - if (status) - pr_warn("cannot set self ethernet address: %d\n", status); - else - INFO(dev, "MAC %pM\n", dev->dev_mac); return status; } From ccc14c6cfd346e85c3ecb970975afd5132763437 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 9 Dec 2021 10:54:22 +0800 Subject: [PATCH 043/251] usb: xhci-mtk: fix list_del warning when enable list debug There is warning of 'list_del corruption' when enable list debug (CONFIG_DEBUG_LIST=y), fix it by using list_del_init() Fixes: 4ce186665e7c ("usb: xhci-mtk: Do not use xhci's virt_dev in drop_endpoint") Cc: stable Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211209025422.17108-1-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index 1edef7527c11..edbfa82c6565 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -781,7 +781,7 @@ int xhci_mtk_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) ret = xhci_check_bandwidth(hcd, udev); if (!ret) - INIT_LIST_HEAD(&mtk->bw_ep_chk_list); + list_del_init(&mtk->bw_ep_chk_list); return ret; } From 16f00d969afe60e233c1a91af7ac840df60d3536 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 10 Dec 2021 12:29:45 +0100 Subject: [PATCH 044/251] usb: cdnsp: Fix incorrect calling of cdnsp_died function Patch restrict calling of cdnsp_died function during removing modules or software disconnect. This function was called because after transition controller to HALT state the driver starts handling the deferred interrupt. In this case such interrupt can be simple ignored. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: Reviewed-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20211210112945.660-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ring.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 1b1438457fb0..e1ac6c398bd3 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1523,7 +1523,14 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data) spin_lock_irqsave(&pdev->lock, flags); if (pdev->cdnsp_state & (CDNSP_STATE_HALTED | CDNSP_STATE_DYING)) { - cdnsp_died(pdev); + /* + * While removing or stopping driver there may still be deferred + * not handled interrupt which should not be treated as error. + * Driver should simply ignore it. + */ + if (pdev->gadget_driver) + cdnsp_died(pdev); + spin_unlock_irqrestore(&pdev->lock, flags); return IRQ_HANDLED; } From 50931ba27d1665c8b038cd1d16c5869301f32fd6 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Mon, 13 Dec 2021 06:06:09 +0100 Subject: [PATCH 045/251] usb: cdnsp: Fix issue in cdnsp_log_ep trace event Patch fixes incorrect order of __entry->stream_id and __entry->state parameters in TP_printk macro. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: Reviewed-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20211213050609.22640-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-trace.h b/drivers/usb/cdns3/cdnsp-trace.h index 6a2571c6aa9e..5983dfb99653 100644 --- a/drivers/usb/cdns3/cdnsp-trace.h +++ b/drivers/usb/cdns3/cdnsp-trace.h @@ -57,9 +57,9 @@ DECLARE_EVENT_CLASS(cdnsp_log_ep, __entry->first_prime_det = pep->stream_info.first_prime_det; __entry->drbls_count = pep->stream_info.drbls_count; ), - TP_printk("%s: SID: %08x ep state: %x stream: enabled: %d num %d " + TP_printk("%s: SID: %08x, ep state: %x, stream: enabled: %d num %d " "tds %d, first prime: %d drbls %d", - __get_str(name), __entry->state, __entry->stream_id, + __get_str(name), __entry->stream_id, __entry->state, __entry->enabled, __entry->num_streams, __entry->td_count, __entry->first_prime_det, __entry->drbls_count) ); From 99ea221f2e2f2743314e348b25c1e2574b467528 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 7 Dec 2021 10:18:38 +0100 Subject: [PATCH 046/251] usb: cdnsp: Fix incorrect status for control request Patch fixes incorrect status for control request. Without this fix all usb_request objects were returned to upper drivers with usb_reqest->status field set to -EINPROGRESS. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: Reported-by: Ken (Jian) He Reviewed-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20211207091838.39572-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index e1ac6c398bd3..e45c3d6e1536 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1029,6 +1029,8 @@ static void cdnsp_process_ctrl_td(struct cdnsp_device *pdev, return; } + *status = 0; + cdnsp_finish_td(pdev, td, event, pep, status); } From d800c65c2d4eccebb27ffb7808e842d5b533823c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Dec 2021 09:04:01 -0700 Subject: [PATCH 047/251] io-wq: drop wqe lock before creating new worker We have two io-wq creation paths: - On queue enqueue - When a worker goes to sleep The latter invokes worker creation with the wqe->lock held, but that can run into problems if we end up exiting and need to cancel the queued work. syzbot caught this: ============================================ WARNING: possible recursive locking detected 5.16.0-rc4-syzkaller #0 Not tainted -------------------------------------------- iou-wrk-6468/6471 is trying to acquire lock: ffff88801aa98018 (&wqe->lock){+.+.}-{2:2}, at: io_worker_cancel_cb+0xb7/0x210 fs/io-wq.c:187 but task is already holding lock: ffff88801aa98018 (&wqe->lock){+.+.}-{2:2}, at: io_wq_worker_sleeping+0xb6/0x140 fs/io-wq.c:700 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&wqe->lock); lock(&wqe->lock); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by iou-wrk-6468/6471: #0: ffff88801aa98018 (&wqe->lock){+.+.}-{2:2}, at: io_wq_worker_sleeping+0xb6/0x140 fs/io-wq.c:700 stack backtrace: CPU: 1 PID: 6471 Comm: iou-wrk-6468 Not tainted 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1dc/0x2d8 lib/dump_stack.c:106 print_deadlock_bug kernel/locking/lockdep.c:2956 [inline] check_deadlock kernel/locking/lockdep.c:2999 [inline] validate_chain+0x5984/0x8240 kernel/locking/lockdep.c:3788 __lock_acquire+0x1382/0x2b00 kernel/locking/lockdep.c:5027 lock_acquire+0x19f/0x4d0 kernel/locking/lockdep.c:5637 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:154 io_worker_cancel_cb+0xb7/0x210 fs/io-wq.c:187 io_wq_cancel_tw_create fs/io-wq.c:1220 [inline] io_queue_worker_create+0x3cf/0x4c0 fs/io-wq.c:372 io_wq_worker_sleeping+0xbe/0x140 fs/io-wq.c:701 sched_submit_work kernel/sched/core.c:6295 [inline] schedule+0x67/0x1f0 kernel/sched/core.c:6323 schedule_timeout+0xac/0x300 kernel/time/timer.c:1857 wait_woken+0xca/0x1b0 kernel/sched/wait.c:460 unix_msg_wait_data net/unix/unix_bpf.c:32 [inline] unix_bpf_recvmsg+0x7f9/0xe20 net/unix/unix_bpf.c:77 unix_stream_recvmsg+0x214/0x2c0 net/unix/af_unix.c:2832 sock_recvmsg_nosec net/socket.c:944 [inline] sock_recvmsg net/socket.c:962 [inline] sock_read_iter+0x3a7/0x4d0 net/socket.c:1035 call_read_iter include/linux/fs.h:2156 [inline] io_iter_do_read fs/io_uring.c:3501 [inline] io_read fs/io_uring.c:3558 [inline] io_issue_sqe+0x144c/0x9590 fs/io_uring.c:6671 io_wq_submit_work+0x2d8/0x790 fs/io_uring.c:6836 io_worker_handle_work+0x808/0xdd0 fs/io-wq.c:574 io_wqe_worker+0x395/0x870 fs/io-wq.c:630 ret_from_fork+0x1f/0x30 We can safely drop the lock before doing work creation, making the two contexts the same in that regard. Reported-by: syzbot+b18b8be69df33a3918e9@syzkaller.appspotmail.com Fixes: 71a85387546e ("io-wq: check for wq exit after adding new worker task_work") Signed-off-by: Jens Axboe --- fs/io-wq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io-wq.c b/fs/io-wq.c index 8d2bb818a3bb..5c4f582d6549 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -395,7 +395,9 @@ static void io_wqe_dec_running(struct io_worker *worker) if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) { atomic_inc(&acct->nr_running); atomic_inc(&wqe->wq->worker_refs); + raw_spin_unlock(&wqe->lock); io_queue_worker_create(worker, acct, create_worker_cb); + raw_spin_lock(&wqe->lock); } } From d341b427c3c3fd6a58263ce01e01700d16861c28 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 12 Dec 2021 02:11:45 +0300 Subject: [PATCH 048/251] ASoC: tegra: Add DAPM switches for headphones and mic jack UCM of Acer Chromebook (Nyan) uses DAPM switches of headphones and mic jack. These switches were lost by accident during unification of the machine drivers, restore them. Cc: Fixes: cc8f70f ("ASoC: tegra: Unify ASoC machine drivers") Reported-by: Thomas Graichen # T124 Nyan Big Tested-by: Thomas Graichen # T124 Nyan Big Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20211211231146.6137-1-digetx@gmail.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_asoc_machine.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/tegra/tegra_asoc_machine.c b/sound/soc/tegra/tegra_asoc_machine.c index b95438c3dbf7..f3e86bd714b4 100644 --- a/sound/soc/tegra/tegra_asoc_machine.c +++ b/sound/soc/tegra/tegra_asoc_machine.c @@ -116,6 +116,8 @@ static const struct snd_kcontrol_new tegra_machine_controls[] = { SOC_DAPM_PIN_SWITCH("Headset Mic"), SOC_DAPM_PIN_SWITCH("Internal Mic 1"), SOC_DAPM_PIN_SWITCH("Internal Mic 2"), + SOC_DAPM_PIN_SWITCH("Headphones"), + SOC_DAPM_PIN_SWITCH("Mic Jack"), }; int tegra_asoc_machine_init(struct snd_soc_pcm_runtime *rtd) From db635ba4fadf3ba676d07537f3b3f58166aa7b0e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 12 Dec 2021 02:11:46 +0300 Subject: [PATCH 049/251] ASoC: tegra: Restore headphones jack name on Nyan Big UCM of Acer Chromebook (Nyan) uses a different name for the headphones jack. The name was changed during unification of the machine drivers and UCM fails now to load because of that. Restore the old jack name. Cc: Fixes: cc8f70f ("ASoC: tegra: Unify ASoC machine drivers") Reported-by: Thomas Graichen # T124 Nyan Big Tested-by: Thomas Graichen # T124 Nyan Big Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20211211231146.6137-2-digetx@gmail.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_asoc_machine.c | 9 ++++++++- sound/soc/tegra/tegra_asoc_machine.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/soc/tegra/tegra_asoc_machine.c b/sound/soc/tegra/tegra_asoc_machine.c index f3e86bd714b4..a73404879aa1 100644 --- a/sound/soc/tegra/tegra_asoc_machine.c +++ b/sound/soc/tegra/tegra_asoc_machine.c @@ -124,10 +124,16 @@ int tegra_asoc_machine_init(struct snd_soc_pcm_runtime *rtd) { struct snd_soc_card *card = rtd->card; struct tegra_machine *machine = snd_soc_card_get_drvdata(card); + const char *jack_name; int err; if (machine->gpiod_hp_det && machine->asoc->add_hp_jack) { - err = snd_soc_card_jack_new(card, "Headphones Jack", + if (machine->asoc->hp_jack_name) + jack_name = machine->asoc->hp_jack_name; + else + jack_name = "Headphones Jack"; + + err = snd_soc_card_jack_new(card, jack_name, SND_JACK_HEADPHONE, &tegra_machine_hp_jack, tegra_machine_hp_jack_pins, @@ -660,6 +666,7 @@ static struct snd_soc_card snd_soc_tegra_max98090 = { static const struct tegra_asoc_data tegra_max98090_data = { .mclk_rate = tegra_machine_mclk_rate_12mhz, .card = &snd_soc_tegra_max98090, + .hp_jack_name = "Headphones", .add_common_dapm_widgets = true, .add_common_controls = true, .add_common_snd_ops = true, diff --git a/sound/soc/tegra/tegra_asoc_machine.h b/sound/soc/tegra/tegra_asoc_machine.h index d6a8d1320551..6f795d7dff7c 100644 --- a/sound/soc/tegra/tegra_asoc_machine.h +++ b/sound/soc/tegra/tegra_asoc_machine.h @@ -14,6 +14,7 @@ struct snd_soc_pcm_runtime; struct tegra_asoc_data { unsigned int (*mclk_rate)(unsigned int srate); const char *codec_dev_name; + const char *hp_jack_name; struct snd_soc_card *card; unsigned int mclk_id; bool hp_jack_gpio_active_low; From 2fe24343922e0428fb68674a4fae099171141bc7 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 13 Dec 2021 18:10:48 +0800 Subject: [PATCH 050/251] scsi: pm8001: Fix phys_to_virt() usage on dma_addr_t The driver supports a "direct" mode of operation, where the SMP req frame is directly copied into the command payload (and vice-versa for the SMP resp). To get at the SMP req frame data in the scatterlist the driver uses phys_to_virt() on the DMA mapped memory dma_addr_t . This is broken, and subsequently crashes as follows when an IOMMU is enabled: Unable to handle kernel paging request at virtual address ffff0000fcebfb00 ... pc : pm80xx_chip_smp_req+0x2d0/0x3d0 lr : pm80xx_chip_smp_req+0xac/0x3d0 pm80xx_chip_smp_req+0x2d0/0x3d0 pm8001_task_exec.constprop.0+0x368/0x520 pm8001_queue_command+0x1c/0x30 smp_execute_task_sg+0xdc/0x204 sas_discover_expander.part.0+0xac/0x6cc sas_discover_root_expander+0x8c/0x150 sas_discover_domain+0x3ac/0x6a0 process_one_work+0x1d0/0x354 worker_thread+0x13c/0x470 kthread+0x17c/0x190 ret_from_fork+0x10/0x20 Code: 371806e1 910006d6 6b16033f 54000249 (38766b05) ---[ end trace b91d59aaee98ea2d ]--- note: kworker/u192:0[7] exited with preempt_count 1 Instead use kmap_atomic(). -- Difference to v1: - use kmap_atomic() in both locations Difference to v2: - add whitespace around arithmetic (Damien) Link: https://lore.kernel.org/r/1639390248-213603-1-git-send-email-john.garry@huawei.com Reviewed-by: Damien Le Moal Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm80xx_hwi.c | 38 ++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index b9f6d83ff380..2101fc5761c3 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -3053,7 +3053,6 @@ mpi_smp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) struct smp_completion_resp *psmpPayload; struct task_status_struct *ts; struct pm8001_device *pm8001_dev; - char *pdma_respaddr = NULL; psmpPayload = (struct smp_completion_resp *)(piomb + 4); status = le32_to_cpu(psmpPayload->status); @@ -3080,19 +3079,23 @@ mpi_smp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) if (pm8001_dev) atomic_dec(&pm8001_dev->running_req); if (pm8001_ha->smp_exp_mode == SMP_DIRECT) { + struct scatterlist *sg_resp = &t->smp_task.smp_resp; + u8 *payload; + void *to; + pm8001_dbg(pm8001_ha, IO, "DIRECT RESPONSE Length:%d\n", param); - pdma_respaddr = (char *)(phys_to_virt(cpu_to_le64 - ((u64)sg_dma_address - (&t->smp_task.smp_resp)))); + to = kmap_atomic(sg_page(sg_resp)); + payload = to + sg_resp->offset; for (i = 0; i < param; i++) { - *(pdma_respaddr+i) = psmpPayload->_r_a[i]; + *(payload + i) = psmpPayload->_r_a[i]; pm8001_dbg(pm8001_ha, IO, "SMP Byte%d DMA data 0x%x psmp 0x%x\n", - i, *(pdma_respaddr + i), + i, *(payload + i), psmpPayload->_r_a[i]); } + kunmap_atomic(to); } break; case IO_ABORTED: @@ -4236,14 +4239,14 @@ static int pm80xx_chip_smp_req(struct pm8001_hba_info *pm8001_ha, struct sas_task *task = ccb->task; struct domain_device *dev = task->dev; struct pm8001_device *pm8001_dev = dev->lldd_dev; - struct scatterlist *sg_req, *sg_resp; + struct scatterlist *sg_req, *sg_resp, *smp_req; u32 req_len, resp_len; struct smp_req smp_cmd; u32 opc; struct inbound_queue_table *circularQ; - char *preq_dma_addr = NULL; - __le64 tmp_addr; u32 i, length; + u8 *payload; + u8 *to; memset(&smp_cmd, 0, sizeof(smp_cmd)); /* @@ -4280,8 +4283,9 @@ static int pm80xx_chip_smp_req(struct pm8001_hba_info *pm8001_ha, pm8001_ha->smp_exp_mode = SMP_INDIRECT; - tmp_addr = cpu_to_le64((u64)sg_dma_address(&task->smp_task.smp_req)); - preq_dma_addr = (char *)phys_to_virt(tmp_addr); + smp_req = &task->smp_task.smp_req; + to = kmap_atomic(sg_page(smp_req)); + payload = to + smp_req->offset; /* INDIRECT MODE command settings. Use DMA */ if (pm8001_ha->smp_exp_mode == SMP_INDIRECT) { @@ -4289,7 +4293,7 @@ static int pm80xx_chip_smp_req(struct pm8001_hba_info *pm8001_ha, /* for SPCv indirect mode. Place the top 4 bytes of * SMP Request header here. */ for (i = 0; i < 4; i++) - smp_cmd.smp_req16[i] = *(preq_dma_addr + i); + smp_cmd.smp_req16[i] = *(payload + i); /* exclude top 4 bytes for SMP req header */ smp_cmd.long_smp_req.long_req_addr = cpu_to_le64((u64)sg_dma_address @@ -4320,20 +4324,20 @@ static int pm80xx_chip_smp_req(struct pm8001_hba_info *pm8001_ha, pm8001_dbg(pm8001_ha, IO, "SMP REQUEST DIRECT MODE\n"); for (i = 0; i < length; i++) if (i < 16) { - smp_cmd.smp_req16[i] = *(preq_dma_addr+i); + smp_cmd.smp_req16[i] = *(payload + i); pm8001_dbg(pm8001_ha, IO, "Byte[%d]:%x (DMA data:%x)\n", i, smp_cmd.smp_req16[i], - *(preq_dma_addr)); + *(payload)); } else { - smp_cmd.smp_req[i] = *(preq_dma_addr+i); + smp_cmd.smp_req[i] = *(payload + i); pm8001_dbg(pm8001_ha, IO, "Byte[%d]:%x (DMA data:%x)\n", i, smp_cmd.smp_req[i], - *(preq_dma_addr)); + *(payload)); } } - + kunmap_atomic(to); build_smp_cmd(pm8001_dev->device_id, smp_cmd.tag, &smp_cmd, pm8001_ha->smp_exp_mode, length); rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &smp_cmd, From fea3fdf975dd9f3e5248afaab8fe023db313f005 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 14 Dec 2021 09:41:26 +0800 Subject: [PATCH 051/251] drm/ast: potential dereference of null pointer The return value of kzalloc() needs to be checked. To avoid use of null pointer '&ast_state->base' in case of the failure of alloc. Fixes: f0adbc382b8b ("drm/ast: Allocate initial CRTC state of the correct size") Signed-off-by: Jiasheng Jiang Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20211214014126.2211535-1-jiasheng@iscas.ac.cn --- drivers/gpu/drm/ast/ast_mode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 1e30eaeb0e1b..d5c98f79d58d 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -1121,7 +1121,10 @@ static void ast_crtc_reset(struct drm_crtc *crtc) if (crtc->state) crtc->funcs->atomic_destroy_state(crtc, crtc->state); - __drm_atomic_helper_crtc_reset(crtc, &ast_state->base); + if (ast_state) + __drm_atomic_helper_crtc_reset(crtc, &ast_state->base); + else + __drm_atomic_helper_crtc_reset(crtc, NULL); } static struct drm_crtc_state * From 83b67041f3eaf33f98a075249aa7f4c7617c2f85 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 26 Nov 2021 10:43:48 +0100 Subject: [PATCH 052/251] USB: serial: cp210x: fix CP2105 GPIO registration When generalising GPIO support and adding support for CP2102N, the GPIO registration for some CP2105 devices accidentally broke. Specifically, when all the pins of a port are in "modem" mode, and thus unavailable for GPIO use, the GPIO chip would now be registered without having initialised the number of GPIO lines. This would in turn be rejected by gpiolib and some errors messages would be printed (but importantly probe would still succeed). Fix this by initialising the number of GPIO lines before registering the GPIO chip. Note that as for the other device types, and as when all CP2105 pins are muxed for LED function, the GPIO chip is registered also when no pins are available for GPIO use. Reported-by: Maarten Brock Link: https://lore.kernel.org/r/5eb560c81d2ea1a2b4602a92d9f48a89@vanmierlo.com Fixes: c8acfe0aadbe ("USB: serial: cp210x: implement GPIO support for CP2102N") Cc: stable@vger.kernel.org # 4.19 Cc: Karoly Pados Link: https://lore.kernel.org/r/20211126094348.31698-1-johan@kernel.org Reviewed-by: Greg Kroah-Hartman Tested-by: Maarten Brock Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 7705328034ca..8a60c0d56863 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -1635,6 +1635,8 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) /* 2 banks of GPIO - One for the pins taken from each serial port */ if (intf_num == 0) { + priv->gc.ngpio = 2; + if (mode.eci == CP210X_PIN_MODE_MODEM) { /* mark all GPIOs of this interface as reserved */ priv->gpio_altfunc = 0xff; @@ -1645,8 +1647,9 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) & CP210X_ECI_GPIO_MODE_MASK) >> CP210X_ECI_GPIO_MODE_OFFSET); - priv->gc.ngpio = 2; } else if (intf_num == 1) { + priv->gc.ngpio = 3; + if (mode.sci == CP210X_PIN_MODE_MODEM) { /* mark all GPIOs of this interface as reserved */ priv->gpio_altfunc = 0xff; @@ -1657,7 +1660,6 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) & CP210X_SCI_GPIO_MODE_MASK) >> CP210X_SCI_GPIO_MODE_OFFSET); - priv->gc.ngpio = 3; } else { return -ENODEV; } From 2b503c8598d1b232e7fc7526bce9326d92331541 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 10 Dec 2021 11:07:14 +0100 Subject: [PATCH 053/251] USB: serial: option: add Telit FN990 compositions Add the following Telit FN990 compositions: 0x1070: tty, adb, rmnet, tty, tty, tty, tty 0x1071: tty, adb, mbim, tty, tty, tty, tty 0x1072: rndis, tty, adb, tty, tty, tty, tty 0x1073: tty, adb, ecm, tty, tty, tty, tty Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20211210100714.22587-1-dnlplm@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 546fce4617a8..42420bfc983c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1219,6 +1219,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1063, 0xff), /* Telit LN920 (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1070, 0xff), /* Telit FN990 (rmnet) */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1071, 0xff), /* Telit FN990 (MBIM) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1072, 0xff), /* Telit FN990 (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff), /* Telit FN990 (ECM) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), From 53b3495273282aa844c4613d19c3b30558c70c84 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Thu, 9 Dec 2021 20:41:24 -0800 Subject: [PATCH 054/251] drm/i915/display: Fix an unsigned subtraction which can never be negative. smatch warning: drivers/gpu/drm/i915/display/intel_dmc.c:601 parse_dmc_fw() warn: unsigned 'fw->size - offset' is never less than zero Firmware size is size_t and offset is u32. So the subtraction is unsigned which can never be less than zero. Fixes: 3d5928a168a9 ("drm/i915/xelpd: Pipe A DMC plugging") Signed-off-by: Harshit Mogalapalli Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20211210044129.12422-1-harshit.m.mogalapalli@oracle.com (cherry picked from commit 87bb2a410dcfb617b88e4695edf4beb6336dc314) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 2dc9d632969d..aef69522f0be 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -596,7 +596,7 @@ static void parse_dmc_fw(struct drm_i915_private *dev_priv, continue; offset = readcount + dmc->dmc_info[id].dmc_offset * 4; - if (fw->size - offset < 0) { + if (offset > fw->size) { drm_err(&dev_priv->drm, "Reading beyond the fw_size\n"); continue; } From d296a74b7b59ff9116236c17edb25f26935dbf70 Mon Sep 17 00:00:00 2001 From: Bradley Scott Date: Mon, 13 Dec 2021 10:49:39 -0500 Subject: [PATCH 055/251] ALSA: hda/realtek: Amp init fixup for HP ZBook 15 G6 HP ZBook 15 G6 (SSID 103c:860f) needs the same speaker amplifier initialization as used on several other HP laptops using ALC285. Signed-off-by: Bradley Scott Cc: Link: https://lore.kernel.org/r/20211213154938.503201-1-Bradley.Scott@zebra.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3599f4c85ebf..d162662fe684 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8660,6 +8660,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), + SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), From aa72394667e5cea3547e4c41ddff7ca8c632d764 Mon Sep 17 00:00:00 2001 From: Bradley Scott Date: Mon, 13 Dec 2021 11:22:47 -0500 Subject: [PATCH 056/251] ALSA: hda/realtek: Add new alc285-hp-amp-init model Adds a new "alc285-hp-amp-init" model that can be used to apply the ALC285 HP speaker amplifier initialization fixup to devices that are not already known by passing "hda_model=alc285-hp-amp-init" to the snd-sof-intel-hda-common module or "model=alc285-hp-amp-init" to the snd-hda-intel module, depending on which is being used. Signed-off-by: Bradley Scott Cc: Link: https://lore.kernel.org/r/20211213162246.506838-1-bscott@teksavvy.com Signed-off-by: Takashi Iwai --- Documentation/sound/hd-audio/models.rst | 2 ++ sound/pci/hda/patch_realtek.c | 1 + 2 files changed, 3 insertions(+) diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst index 0ea967d34583..d25335993e55 100644 --- a/Documentation/sound/hd-audio/models.rst +++ b/Documentation/sound/hd-audio/models.rst @@ -326,6 +326,8 @@ usi-headset Headset support on USI machines dual-codecs Lenovo laptops with dual codecs +alc285-hp-amp-init + HP laptops which require speaker amplifier initialization (ALC285) ALC680 ====== diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d162662fe684..fc41f3e8ddc3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9124,6 +9124,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"}, {.id = ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, .name = "alc255-acer-headphone-and-mic"}, + {.id = ALC285_FIXUP_HP_GPIO_AMP_INIT, .name = "alc285-hp-amp-init"}, {} }; #define ALC225_STANDARD_PINS \ From 8734b41b3efe0fc6082c1937b0e88556c396dc96 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Tue, 23 Nov 2021 18:15:20 +1000 Subject: [PATCH 057/251] powerpc/module_64: Fix livepatching for RO modules Livepatching a loaded module involves applying relocations through apply_relocate_add(), which attempts to write to read-only memory when CONFIG_STRICT_MODULE_RWX=y. Work around this by performing these writes through the text poke area by using patch_instruction(). R_PPC_REL24 is the only relocation type generated by the kpatch-build userspace tool or klp-convert kernel tree that I observed applying a relocation to a post-init module. A more comprehensive solution is planned, but using patch_instruction() for R_PPC_REL24 on should serve as a sufficient fix. This does have a performance impact, I observed ~15% overhead in module_load() on POWER8 bare metal with checksum verification off. Fixes: c35717c71e98 ("powerpc: Set ARCH_HAS_STRICT_MODULE_RWX") Cc: stable@vger.kernel.org # v5.14+ Reported-by: Joe Lawrence Signed-off-by: Russell Currey Tested-by: Joe Lawrence [mpe: Check return codes from patch_instruction()] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211214121248.777249-1-mpe@ellerman.id.au --- arch/powerpc/kernel/module_64.c | 42 ++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 6baa676e7cb6..5d77d3f5fbb5 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -422,11 +422,17 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, const char *name) { long reladdr; + func_desc_t desc; + int i; if (is_mprofile_ftrace_call(name)) return create_ftrace_stub(entry, addr, me); - memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns)); + for (i = 0; i < sizeof(ppc64_stub_insns) / sizeof(u32); i++) { + if (patch_instruction(&entry->jump[i], + ppc_inst(ppc64_stub_insns[i]))) + return 0; + } /* Stub uses address relative to r2. */ reladdr = (unsigned long)entry - my_r2(sechdrs, me); @@ -437,10 +443,24 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, } pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); - entry->jump[0] |= PPC_HA(reladdr); - entry->jump[1] |= PPC_LO(reladdr); - entry->funcdata = func_desc(addr); - entry->magic = STUB_MAGIC; + if (patch_instruction(&entry->jump[0], + ppc_inst(entry->jump[0] | PPC_HA(reladdr)))) + return 0; + + if (patch_instruction(&entry->jump[1], + ppc_inst(entry->jump[1] | PPC_LO(reladdr)))) + return 0; + + // func_desc_t is 8 bytes if ABIv2, else 16 bytes + desc = func_desc(addr); + for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) { + if (patch_instruction(((u32 *)&entry->funcdata) + i, + ppc_inst(((u32 *)(&desc))[i]))) + return 0; + } + + if (patch_instruction(&entry->magic, ppc_inst(STUB_MAGIC))) + return 0; return 1; } @@ -495,8 +515,11 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) me->name, *instruction, instruction); return 0; } + /* ld r2,R2_STACK_OFFSET(r1) */ - *instruction = PPC_INST_LD_TOC; + if (patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC))) + return 0; + return 1; } @@ -636,9 +659,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } /* Only replace bits 2 through 26 */ - *(uint32_t *)location - = (*(uint32_t *)location & ~0x03fffffc) + value = (*(uint32_t *)location & ~0x03fffffc) | (value & 0x03fffffc); + + if (patch_instruction((u32 *)location, ppc_inst(value))) + return -EFAULT; + break; case R_PPC64_REL64: From 83dbf898a2d45289be875deb580e93050ba67529 Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Tue, 14 Dec 2021 12:49:32 +0100 Subject: [PATCH 058/251] PCI/MSI: Mask MSI-X vectors only on success MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Masking all unused MSI-X entries is done to ensure that a crash kernel starts from a clean slate, which correponds to the reset state of the device as defined in the PCI-E specificion 3.0 and later: Vector Control for MSI-X Table Entries -------------------------------------- "00: Mask bit: When this bit is set, the function is prohibited from sending a message using this MSI-X Table entry. ... This bit’s state after reset is 1 (entry is masked)." A Marvell NVME device fails to deliver MSI interrupts after trying to enable MSI-X interrupts due to that masking. It seems to take the MSI-X mask bits into account even when MSI-X is disabled. While not specification compliant, this can be cured by moving the masking into the success path, so that the MSI-X table entries stay in device reset state when the MSI-X setup fails. [ tglx: Move it into the success path, add comment and amend changelog ] Fixes: aa8092c1d1f1 ("PCI/MSI: Mask all unused MSI-X entries") Signed-off-by: Stefan Roese Signed-off-by: Thomas Gleixner Cc: linux-pci@vger.kernel.org Cc: Bjorn Helgaas Cc: Michal Simek Cc: Marek Vasut Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211210161025.3287927-1-sr@denx.de --- drivers/pci/msi.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 48e3f4e47b29..6748cf9d7d90 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -722,9 +722,6 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, goto out_disable; } - /* Ensure that all table entries are masked. */ - msix_mask_all(base, tsize); - ret = msix_setup_entries(dev, base, entries, nvec, affd); if (ret) goto out_disable; @@ -751,6 +748,16 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, /* Set MSI-X enabled bits and unmask the function */ pci_intx_for_msi(dev, 0); dev->msix_enabled = 1; + + /* + * Ensure that all table entries are masked to prevent + * stale entries from firing in a crash kernel. + * + * Done late to deal with a broken Marvell NVME device + * which takes the MSI-X mask bits into account even + * when MSI-X is disabled, which prevents MSI delivery. + */ + msix_mask_all(base, tsize); pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); pcibios_free_irq(dev); From 94185adbfad56815c2c8401e16d81bdb74a79201 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Dec 2021 12:42:14 +0100 Subject: [PATCH 059/251] PCI/MSI: Clear PCI_MSIX_FLAGS_MASKALL on error PCI_MSIX_FLAGS_MASKALL is set in the MSI-X control register at MSI-X interrupt setup time. It's cleared on success, but the error handling path only clears the PCI_MSIX_FLAGS_ENABLE bit. That's incorrect as the reset state of the PCI_MSIX_FLAGS_MASKALL bit is zero. That can be observed via lspci: Capabilities: [b0] MSI-X: Enable- Count=67 Masked+ Clear the bit in the error path to restore the reset state. Fixes: 438553958ba1 ("PCI/MSI: Enable and mask MSI-X early") Reported-by: Stefan Roese Signed-off-by: Thomas Gleixner Tested-by: Stefan Roese Cc: linux-pci@vger.kernel.org Cc: Bjorn Helgaas Cc: Michal Simek Cc: Marek Vasut Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87tufevoqx.ffs@tglx --- drivers/pci/msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 6748cf9d7d90..d84cf30bb279 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -784,7 +784,7 @@ out_free: free_msi_irqs(dev); out_disable: - pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); + pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0); return ret; } From 4fc7261dbab139d3c64c3b618262504e16cfe7ee Mon Sep 17 00:00:00 2001 From: Prathamesh Shete Date: Tue, 14 Dec 2021 17:06:53 +0530 Subject: [PATCH 060/251] mmc: sdhci-tegra: Fix switch to HS400ES mode When CMD13 is sent after switching to HS400ES mode, the bus is operating at either MMC_HIGH_26_MAX_DTR or MMC_HIGH_52_MAX_DTR. To meet Tegra SDHCI requirement at HS400ES mode, force SDHCI interface clock to MMC_HS200_MAX_DTR (200 MHz) so that host controller CAR clock and the interface clock are rate matched. Signed-off-by: Prathamesh Shete Acked-by: Adrian Hunter Fixes: dfc9700cef77 ("mmc: tegra: Implement HS400 enhanced strobe") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211214113653.4631-1-pshete@nvidia.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-tegra.c | 43 ++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index a5001875876b..9762ffab2e23 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -356,23 +356,6 @@ static void tegra_sdhci_set_tap(struct sdhci_host *host, unsigned int tap) } } -static void tegra_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc, - struct mmc_ios *ios) -{ - struct sdhci_host *host = mmc_priv(mmc); - u32 val; - - val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); - - if (ios->enhanced_strobe) - val |= SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; - else - val &= ~SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; - - sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); - -} - static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); @@ -793,6 +776,32 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) } } +static void tegra_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc, + struct mmc_ios *ios) +{ + struct sdhci_host *host = mmc_priv(mmc); + u32 val; + + val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); + + if (ios->enhanced_strobe) { + val |= SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; + /* + * When CMD13 is sent from mmc_select_hs400es() after + * switching to HS400ES mode, the bus is operating at + * either MMC_HIGH_26_MAX_DTR or MMC_HIGH_52_MAX_DTR. + * To meet Tegra SDHCI requirement at HS400ES mode, force SDHCI + * interface clock to MMC_HS200_MAX_DTR (200 MHz) so that host + * controller CAR clock and the interface clock are rate matched. + */ + tegra_sdhci_set_clock(host, MMC_HS200_MAX_DTR); + } else { + val &= ~SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; + } + + sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); +} + static unsigned int tegra_sdhci_get_max_clock(struct sdhci_host *host) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); From 8deb34a90f06374fd26f722c2a79e15160f66be7 Mon Sep 17 00:00:00 2001 From: Derek Fang Date: Tue, 14 Dec 2021 18:50:33 +0800 Subject: [PATCH 061/251] ASoC: rt5682: fix the wrong jack type detected Some powers were changed during the jack insert detection and clk's enable/disable in CCF. If in parallel, the influence has a chance to detect the wrong jack type, so add a lock. Signed-off-by: Derek Fang Link: https://lore.kernel.org/r/20211214105033.471-1-derek.fang@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 5224123d0d3b..b34a8542077d 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -929,6 +929,8 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) unsigned int val, count; if (jack_insert) { + snd_soc_dapm_mutex_lock(dapm); + snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_VREF2 | RT5682_PWR_MB, RT5682_PWR_VREF2 | RT5682_PWR_MB); @@ -979,6 +981,8 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) snd_soc_component_update_bits(component, RT5682_MICBIAS_2, RT5682_PWR_CLK25M_MASK | RT5682_PWR_CLK1M_MASK, RT5682_PWR_CLK25M_PU | RT5682_PWR_CLK1M_PU); + + snd_soc_dapm_mutex_unlock(dapm); } else { rt5682_enable_push_button_irq(component, false); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, From edaa26334c117a584add6053f48d63a988d25a6e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 13 Dec 2021 14:14:43 -1000 Subject: [PATCH 062/251] iocost: Fix divide-by-zero on donation from low hweight cgroup The donation calculation logic assumes that the donor has non-zero after-donation hweight, so the lowest active hweight a donating cgroup can have is 2 so that it can donate 1 while keeping the other 1 for itself. Earlier, we only donated from cgroups with sizable surpluses so this condition was always true. However, with the precise donation algorithm implemented, f1de2439ec43 ("blk-iocost: revamp donation amount determination") made the donation amount calculation exact enabling even low hweight cgroups to donate. This means that in rare occasions, a cgroup with active hweight of 1 can enter donation calculation triggering the following warning and then a divide-by-zero oops. WARNING: CPU: 4 PID: 0 at block/blk-iocost.c:1928 transfer_surpluses.cold+0x0/0x53 [884/94867] ... RIP: 0010:transfer_surpluses.cold+0x0/0x53 Code: 92 ff 48 c7 c7 28 d1 ab b5 65 48 8b 34 25 00 ae 01 00 48 81 c6 90 06 00 00 e8 8b 3f fe ff 48 c7 c0 ea ff ff ff e9 95 ff 92 ff <0f> 0b 48 c7 c7 30 da ab b5 e8 71 3f fe ff 4c 89 e8 4d 85 ed 74 0 4 ... Call Trace: ioc_timer_fn+0x1043/0x1390 call_timer_fn+0xa1/0x2c0 __run_timers.part.0+0x1ec/0x2e0 run_timer_softirq+0x35/0x70 ... iocg: invalid donation weights in /a/b: active=1 donating=1 after=0 Fix it by excluding cgroups w/ active hweight < 2 from donating. Excluding these extreme low hweight donations shouldn't affect work conservation in any meaningful way. Signed-off-by: Tejun Heo Fixes: f1de2439ec43 ("blk-iocost: revamp donation amount determination") Cc: stable@vger.kernel.org # v5.10+ Link: https://lore.kernel.org/r/Ybfh86iSvpWKxhVM@slm.duckdns.org Signed-off-by: Jens Axboe --- block/blk-iocost.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index a5b37cc65b17..769b64394298 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2311,7 +2311,14 @@ static void ioc_timer_fn(struct timer_list *timer) hwm = current_hweight_max(iocg); new_hwi = hweight_after_donation(iocg, old_hwi, hwm, usage, &now); - if (new_hwi < hwm) { + /* + * Donation calculation assumes hweight_after_donation + * to be positive, a condition that a donor w/ hwa < 2 + * can't meet. Don't bother with donation if hwa is + * below 2. It's not gonna make a meaningful difference + * anyway. + */ + if (new_hwi < hwm && hwa >= 2) { iocg->hweight_donating = hwa; iocg->hweight_after_donation = new_hwi; list_add(&iocg->surplus_list, &surpluses); From f35838a6930296fc1988764cfa54cb3f705c0665 Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Thu, 9 Dec 2021 14:56:31 +0800 Subject: [PATCH 063/251] btrfs: fix memory leak in __add_inode_ref() Line 1169 (#3) allocates a memory chunk for victim_name by kmalloc(), but when the function returns in line 1184 (#4) victim_name allocated by line 1169 (#3) is not freed, which will lead to a memory leak. There is a similar snippet of code in this function as allocating a memory chunk for victim_name in line 1104 (#1) as well as releasing the memory in line 1116 (#2). We should kfree() victim_name when the return value of backref_in_log() is less than zero and before the function returns in line 1184 (#4). 1057 static inline int __add_inode_ref(struct btrfs_trans_handle *trans, 1058 struct btrfs_root *root, 1059 struct btrfs_path *path, 1060 struct btrfs_root *log_root, 1061 struct btrfs_inode *dir, 1062 struct btrfs_inode *inode, 1063 u64 inode_objectid, u64 parent_objectid, 1064 u64 ref_index, char *name, int namelen, 1065 int *search_done) 1066 { 1104 victim_name = kmalloc(victim_name_len, GFP_NOFS); // #1: kmalloc (victim_name-1) 1105 if (!victim_name) 1106 return -ENOMEM; 1112 ret = backref_in_log(log_root, &search_key, 1113 parent_objectid, victim_name, 1114 victim_name_len); 1115 if (ret < 0) { 1116 kfree(victim_name); // #2: kfree (victim_name-1) 1117 return ret; 1118 } else if (!ret) { 1169 victim_name = kmalloc(victim_name_len, GFP_NOFS); // #3: kmalloc (victim_name-2) 1170 if (!victim_name) 1171 return -ENOMEM; 1180 ret = backref_in_log(log_root, &search_key, 1181 parent_objectid, victim_name, 1182 victim_name_len); 1183 if (ret < 0) { 1184 return ret; // #4: missing kfree (victim_name-2) 1185 } else if (!ret) { 1241 return 0; 1242 } Fixes: d3316c8233bb ("btrfs: Properly handle backref_in_log retval") CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: Jianglei Nie Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3e6f14e13918..8778401665c3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1181,6 +1181,7 @@ again: parent_objectid, victim_name, victim_name_len); if (ret < 0) { + kfree(victim_name); return ret; } else if (!ret) { ret = -ENOENT; From 33fab972497ae66822c0b6846d4f9382938575b6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 10 Dec 2021 19:02:18 +0000 Subject: [PATCH 064/251] btrfs: fix double free of anon_dev after failure to create subvolume When creating a subvolume, at create_subvol(), we allocate an anonymous device and later call btrfs_get_new_fs_root(), which in turn just calls btrfs_get_root_ref(). There we call btrfs_init_fs_root() which assigns the anonymous device to the root, but if after that call there's an error, when we jump to 'fail' label, we call btrfs_put_root(), which frees the anonymous device and then returns an error that is propagated back to create_subvol(). Than create_subvol() frees the anonymous device again. When this happens, if the anonymous device was not reallocated after the first time it was freed with btrfs_put_root(), we get a kernel message like the following: (...) [13950.282466] BTRFS: error (device dm-0) in create_subvol:663: errno=-5 IO failure [13950.283027] ida_free called for id=65 which is not allocated. [13950.285974] BTRFS info (device dm-0): forced readonly (...) If the anonymous device gets reallocated by another btrfs filesystem or any other kernel subsystem, then bad things can happen. So fix this by setting the root's anonymous device to 0 at btrfs_get_root_ref(), before we call btrfs_put_root(), if an error happened. Fixes: 2dfb1e43f57dd3 ("btrfs: preallocate anon block device at first phase of snapshot creation") CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 847aabb30676..28449ca66dbd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1732,6 +1732,14 @@ again: } return root; fail: + /* + * If our caller provided us an anonymous device, then it's his + * responsability to free it in case we fail. So we have to set our + * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() + * and once again by our caller. + */ + if (anon_dev) + root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); } From 1b2e5e5c7feabb4f3041f637b96494944da6aeff Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 14 Dec 2021 11:29:01 +0000 Subject: [PATCH 065/251] btrfs: fix missing last dir item offset update when logging directory When logging a directory, once we finish processing a leaf that is full of dir items, if we find the next leaf was not modified in the current transaction, we grab the first key of that next leaf and log it as to mark the end of a key range boundary. However we did not update the value of ctx->last_dir_item_offset, which tracks the offset of the last logged key. This can result in subsequent logging of the same directory in the current transaction to not realize that key was already logged, and then add it to the middle of a batch that starts with a lower key, resulting later in a leaf with one key that is duplicated and at non-consecutive slots. When that happens we get an error later when writing out the leaf, reporting that there is a pair of keys in wrong order. The report is something like the following: Dec 13 21:44:50 kernel: BTRFS critical (device dm-0): corrupt leaf: root=18446744073709551610 block=118444032 slot=21, bad key order, prev (704687 84 4146773349) current (704687 84 1063561078) Dec 13 21:44:50 kernel: BTRFS info (device dm-0): leaf 118444032 gen 91449 total ptrs 39 free space 546 owner 18446744073709551610 Dec 13 21:44:50 kernel: item 0 key (704687 1 0) itemoff 3835 itemsize 160 Dec 13 21:44:50 kernel: inode generation 35532 size 1026 mode 40755 Dec 13 21:44:50 kernel: item 1 key (704687 12 704685) itemoff 3822 itemsize 13 Dec 13 21:44:50 kernel: item 2 key (704687 24 3817753667) itemoff 3736 itemsize 86 Dec 13 21:44:50 kernel: item 3 key (704687 60 0) itemoff 3728 itemsize 8 Dec 13 21:44:50 kernel: item 4 key (704687 72 0) itemoff 3720 itemsize 8 Dec 13 21:44:50 kernel: item 5 key (704687 84 140445108) itemoff 3666 itemsize 54 Dec 13 21:44:50 kernel: dir oid 704793 type 1 Dec 13 21:44:50 kernel: item 6 key (704687 84 298800632) itemoff 3599 itemsize 67 Dec 13 21:44:50 kernel: dir oid 707849 type 2 Dec 13 21:44:50 kernel: item 7 key (704687 84 476147658) itemoff 3532 itemsize 67 Dec 13 21:44:50 kernel: dir oid 707901 type 2 Dec 13 21:44:50 kernel: item 8 key (704687 84 633818382) itemoff 3471 itemsize 61 Dec 13 21:44:50 kernel: dir oid 704694 type 2 Dec 13 21:44:50 kernel: item 9 key (704687 84 654256665) itemoff 3403 itemsize 68 Dec 13 21:44:50 kernel: dir oid 707841 type 1 Dec 13 21:44:50 kernel: item 10 key (704687 84 995843418) itemoff 3331 itemsize 72 Dec 13 21:44:50 kernel: dir oid 2167736 type 1 Dec 13 21:44:50 kernel: item 11 key (704687 84 1063561078) itemoff 3278 itemsize 53 Dec 13 21:44:50 kernel: dir oid 704799 type 2 Dec 13 21:44:50 kernel: item 12 key (704687 84 1101156010) itemoff 3225 itemsize 53 Dec 13 21:44:50 kernel: dir oid 704696 type 1 Dec 13 21:44:50 kernel: item 13 key (704687 84 2521936574) itemoff 3173 itemsize 52 Dec 13 21:44:50 kernel: dir oid 704704 type 2 Dec 13 21:44:50 kernel: item 14 key (704687 84 2618368432) itemoff 3112 itemsize 61 Dec 13 21:44:50 kernel: dir oid 704738 type 1 Dec 13 21:44:50 kernel: item 15 key (704687 84 2676316190) itemoff 3046 itemsize 66 Dec 13 21:44:50 kernel: dir oid 2167729 type 1 Dec 13 21:44:50 kernel: item 16 key (704687 84 3319104192) itemoff 2986 itemsize 60 Dec 13 21:44:50 kernel: dir oid 704745 type 2 Dec 13 21:44:50 kernel: item 17 key (704687 84 3908046265) itemoff 2929 itemsize 57 Dec 13 21:44:50 kernel: dir oid 2167734 type 1 Dec 13 21:44:50 kernel: item 18 key (704687 84 3945713089) itemoff 2857 itemsize 72 Dec 13 21:44:50 kernel: dir oid 2167730 type 1 Dec 13 21:44:50 kernel: item 19 key (704687 84 4077169308) itemoff 2795 itemsize 62 Dec 13 21:44:50 kernel: dir oid 704688 type 1 Dec 13 21:44:50 kernel: item 20 key (704687 84 4146773349) itemoff 2727 itemsize 68 Dec 13 21:44:50 kernel: dir oid 707892 type 1 Dec 13 21:44:50 kernel: item 21 key (704687 84 1063561078) itemoff 2674 itemsize 53 Dec 13 21:44:50 kernel: dir oid 704799 type 2 Dec 13 21:44:50 kernel: item 22 key (704687 96 2) itemoff 2612 itemsize 62 Dec 13 21:44:50 kernel: item 23 key (704687 96 6) itemoff 2551 itemsize 61 Dec 13 21:44:50 kernel: item 24 key (704687 96 7) itemoff 2498 itemsize 53 Dec 13 21:44:50 kernel: item 25 key (704687 96 12) itemoff 2446 itemsize 52 Dec 13 21:44:50 kernel: item 26 key (704687 96 14) itemoff 2385 itemsize 61 Dec 13 21:44:50 kernel: item 27 key (704687 96 18) itemoff 2325 itemsize 60 Dec 13 21:44:50 kernel: item 28 key (704687 96 24) itemoff 2271 itemsize 54 Dec 13 21:44:50 kernel: item 29 key (704687 96 28) itemoff 2218 itemsize 53 Dec 13 21:44:50 kernel: item 30 key (704687 96 62) itemoff 2150 itemsize 68 Dec 13 21:44:50 kernel: item 31 key (704687 96 66) itemoff 2083 itemsize 67 Dec 13 21:44:50 kernel: item 32 key (704687 96 75) itemoff 2015 itemsize 68 Dec 13 21:44:50 kernel: item 33 key (704687 96 79) itemoff 1948 itemsize 67 Dec 13 21:44:50 kernel: item 34 key (704687 96 82) itemoff 1882 itemsize 66 Dec 13 21:44:50 kernel: item 35 key (704687 96 83) itemoff 1810 itemsize 72 Dec 13 21:44:50 kernel: item 36 key (704687 96 85) itemoff 1753 itemsize 57 Dec 13 21:44:50 kernel: item 37 key (704687 96 87) itemoff 1681 itemsize 72 Dec 13 21:44:50 kernel: item 38 key (704694 1 0) itemoff 1521 itemsize 160 Dec 13 21:44:50 kernel: inode generation 35534 size 30 mode 40755 Dec 13 21:44:50 kernel: BTRFS error (device dm-0): block=118444032 write time tree block corruption detected So fix that by adding the missing update of ctx->last_dir_item_offset with the offset of the boundary key. Reported-by: Chris Murphy Link: https://lore.kernel.org/linux-btrfs/CAJCQCtT+RSzpUjbMq+UfzNUMe1X5+1G+DnAGbHC=OZ=iRS24jg@mail.gmail.com/ Fixes: dc2872247ec0ca ("btrfs: keep track of the last logged keys when logging a directory") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8778401665c3..6993dcdba6f1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3978,6 +3978,7 @@ search: goto done; } if (btrfs_header_generation(path->nodes[0]) != trans->transid) { + ctx->last_dir_item_offset = min_key.offset; ret = overwrite_item(trans, log, dst_path, path->nodes[0], path->slots[0], &min_key); From 80d5be1a057e05f01d66e986cfd34d71845e5190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Mon, 6 Dec 2021 22:45:43 +0000 Subject: [PATCH 066/251] ASoC: tas2770: Fix setting of high sample rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although the codec advertises support for 176.4 and 192 ksps, without this fix setting those sample rates fails with EINVAL at hw_params time. Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20211206224529.74656-1-povik@protonmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2770.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index 172e79cbe0da..6549e7fef3e3 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -291,11 +291,11 @@ static int tas2770_set_samplerate(struct tas2770_priv *tas2770, int samplerate) ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_44_1KHZ | TAS2770_TDM_CFG_REG0_31_88_2_96KHZ; break; - case 19200: + case 192000: ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_48KHZ | TAS2770_TDM_CFG_REG0_31_176_4_192KHZ; break; - case 17640: + case 176400: ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_44_1KHZ | TAS2770_TDM_CFG_REG0_31_176_4_192KHZ; break; From 1bcd326631dc4faa3322d60b4fc45e8b3747993e Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 6 Dec 2021 22:08:03 +0100 Subject: [PATCH 067/251] ASoC: meson: aiu: fifo: Add missing dma_coerce_mask_and_coherent() The FIFO registers which take an DMA-able address are only 32-bit wide on AIU. Add dma_coerce_mask_and_coherent() to make the DMA core aware of this limitation. Fixes: 6ae9ca9ce986bf ("ASoC: meson: aiu: add i2s and spdif support") Signed-off-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20211206210804.2512999-2-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- sound/soc/meson/aiu-fifo.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/meson/aiu-fifo.c b/sound/soc/meson/aiu-fifo.c index 4ad23267cace..d67ff4cdabd5 100644 --- a/sound/soc/meson/aiu-fifo.c +++ b/sound/soc/meson/aiu-fifo.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -179,6 +180,11 @@ int aiu_fifo_pcm_new(struct snd_soc_pcm_runtime *rtd, struct snd_card *card = rtd->card->snd_card; struct aiu_fifo *fifo = dai->playback_dma_data; size_t size = fifo->pcm->buffer_bytes_max; + int ret; + + ret = dma_coerce_mask_and_coherent(card->dev, DMA_BIT_MASK(32)); + if (ret) + return ret; snd_pcm_set_managed_buffer_all(rtd->pcm, SNDRV_DMA_TYPE_DEV, card->dev, size, size); From ee907afb0c39a41ee74b862882cfe12820c74b98 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 6 Dec 2021 22:08:04 +0100 Subject: [PATCH 068/251] ASoC: meson: aiu: Move AIU_I2S_MISC hold setting to aiu-fifo-i2s The out-of-tree vendor driver uses the following approach to set the AIU_I2S_MISC register: 1) write AIU_MEM_I2S_START_PTR and AIU_MEM_I2S_RD_PTR 2) configure AIU_I2S_MUTE_SWAP[15:0] 3) write AIU_MEM_I2S_END_PTR 4) set AIU_I2S_MISC[2] to 1 (documented as: "put I2S interface in hold mode") 5) set AIU_I2S_MISC[4] to 1 (depending on the driver revision it always stays at 1 while for older drivers this bit is unset in step 4) 6) set AIU_I2S_MISC[2] to 0 7) write AIU_MEM_I2S_MASKS 8) toggle AIU_MEM_I2S_CONTROL[0] 9) toggle AIU_MEM_I2S_BUF_CNTL[0] Move setting the AIU_I2S_MISC[2] bit to aiu_fifo_i2s_hw_params() so it resembles the flow in the vendor kernel more closely. While here also configure AIU_I2S_MISC[4] (documented as: "force each audio data to left or right according to the bit attached with the audio data") similar to how the vendor driver does this. This fixes the infamous and long-standing "machine gun noise" issue (a buffer underrun issue). Fixes: 6ae9ca9ce986bf ("ASoC: meson: aiu: add i2s and spdif support") Reported-by: Christian Hewitt Reported-by: Geraldo Nascimento Tested-by: Christian Hewitt Tested-by: Geraldo Nascimento Acked-by: Jerome Brunet Cc: stable@vger.kernel.org Signed-off-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20211206210804.2512999-3-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- sound/soc/meson/aiu-encoder-i2s.c | 33 ------------------------------- sound/soc/meson/aiu-fifo-i2s.c | 19 ++++++++++++++++++ 2 files changed, 19 insertions(+), 33 deletions(-) diff --git a/sound/soc/meson/aiu-encoder-i2s.c b/sound/soc/meson/aiu-encoder-i2s.c index 932224552146..67729de41a73 100644 --- a/sound/soc/meson/aiu-encoder-i2s.c +++ b/sound/soc/meson/aiu-encoder-i2s.c @@ -18,7 +18,6 @@ #define AIU_RST_SOFT_I2S_FAST BIT(0) #define AIU_I2S_DAC_CFG_MSB_FIRST BIT(2) -#define AIU_I2S_MISC_HOLD_EN BIT(2) #define AIU_CLK_CTRL_I2S_DIV_EN BIT(0) #define AIU_CLK_CTRL_I2S_DIV GENMASK(3, 2) #define AIU_CLK_CTRL_AOCLK_INVERT BIT(6) @@ -36,37 +35,6 @@ static void aiu_encoder_i2s_divider_enable(struct snd_soc_component *component, enable ? AIU_CLK_CTRL_I2S_DIV_EN : 0); } -static void aiu_encoder_i2s_hold(struct snd_soc_component *component, - bool enable) -{ - snd_soc_component_update_bits(component, AIU_I2S_MISC, - AIU_I2S_MISC_HOLD_EN, - enable ? AIU_I2S_MISC_HOLD_EN : 0); -} - -static int aiu_encoder_i2s_trigger(struct snd_pcm_substream *substream, int cmd, - struct snd_soc_dai *dai) -{ - struct snd_soc_component *component = dai->component; - - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - case SNDRV_PCM_TRIGGER_RESUME: - case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - aiu_encoder_i2s_hold(component, false); - return 0; - - case SNDRV_PCM_TRIGGER_STOP: - case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - aiu_encoder_i2s_hold(component, true); - return 0; - - default: - return -EINVAL; - } -} - static int aiu_encoder_i2s_setup_desc(struct snd_soc_component *component, struct snd_pcm_hw_params *params) { @@ -353,7 +321,6 @@ static void aiu_encoder_i2s_shutdown(struct snd_pcm_substream *substream, } const struct snd_soc_dai_ops aiu_encoder_i2s_dai_ops = { - .trigger = aiu_encoder_i2s_trigger, .hw_params = aiu_encoder_i2s_hw_params, .hw_free = aiu_encoder_i2s_hw_free, .set_fmt = aiu_encoder_i2s_set_fmt, diff --git a/sound/soc/meson/aiu-fifo-i2s.c b/sound/soc/meson/aiu-fifo-i2s.c index 2388a2d0b3a6..57e6e7160d2f 100644 --- a/sound/soc/meson/aiu-fifo-i2s.c +++ b/sound/soc/meson/aiu-fifo-i2s.c @@ -20,6 +20,8 @@ #define AIU_MEM_I2S_CONTROL_MODE_16BIT BIT(6) #define AIU_MEM_I2S_BUF_CNTL_INIT BIT(0) #define AIU_RST_SOFT_I2S_FAST BIT(0) +#define AIU_I2S_MISC_HOLD_EN BIT(2) +#define AIU_I2S_MISC_FORCE_LEFT_RIGHT BIT(4) #define AIU_FIFO_I2S_BLOCK 256 @@ -90,6 +92,10 @@ static int aiu_fifo_i2s_hw_params(struct snd_pcm_substream *substream, unsigned int val; int ret; + snd_soc_component_update_bits(component, AIU_I2S_MISC, + AIU_I2S_MISC_HOLD_EN, + AIU_I2S_MISC_HOLD_EN); + ret = aiu_fifo_hw_params(substream, params, dai); if (ret) return ret; @@ -117,6 +123,19 @@ static int aiu_fifo_i2s_hw_params(struct snd_pcm_substream *substream, snd_soc_component_update_bits(component, AIU_MEM_I2S_MASKS, AIU_MEM_I2S_MASKS_IRQ_BLOCK, val); + /* + * Most (all?) supported SoCs have this bit set by default. The vendor + * driver however sets it manually (depending on the version either + * while un-setting AIU_I2S_MISC_HOLD_EN or right before that). Follow + * the same approach for consistency with the vendor driver. + */ + snd_soc_component_update_bits(component, AIU_I2S_MISC, + AIU_I2S_MISC_FORCE_LEFT_RIGHT, + AIU_I2S_MISC_FORCE_LEFT_RIGHT); + + snd_soc_component_update_bits(component, AIU_I2S_MISC, + AIU_I2S_MISC_HOLD_EN, 0); + return 0; } From 6c3118c32129b4197999a8928ba776bcabd0f5c4 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Fri, 10 Dec 2021 14:55:03 -0800 Subject: [PATCH 069/251] signal: Skip the altstack update when not needed == Background == Support for large, "dynamic" fpstates was recently merged. This included code to ensure that sigaltstacks are sufficiently sized for these large states. A new lock was added to remove races between enabling large features and setting up sigaltstacks. == Problem == The new lock (sigaltstack_lock()) is acquired in the sigreturn path before restoring the old sigaltstack. Unfortunately, contention on the new lock causes a measurable signal handling performance regression [1]. However, the common case is that no *changes* are made to the sigaltstack state at sigreturn. == Solution == do_sigaltstack() acquires sigaltstack_lock() and is used for both sys_sigaltstack() and restoring the sigaltstack in sys_sigreturn(). Check for changes to the sigaltstack before taking the lock. If no changes were made, return before acquiring the lock. This removes lock contention from the common-case sigreturn path. [1] https://lore.kernel.org/lkml/20211207012128.GA16074@xsang-OptiPlex-9020/ Fixes: 3aac3ebea08f ("x86/signal: Implement sigaltstack size validation") Reported-by: kernel test robot Signed-off-by: Chang S. Bae Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20211210225503.12734-1-chang.seok.bae@intel.com --- kernel/signal.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/signal.c b/kernel/signal.c index a629b11bf3e0..dfcee3888b00 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -4185,6 +4185,15 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, ss_mode != 0)) return -EINVAL; + /* + * Return before taking any locks if no actual + * sigaltstack changes were requested. + */ + if (t->sas_ss_sp == (unsigned long)ss_sp && + t->sas_ss_size == ss_size && + t->sas_ss_flags == ss_flags) + return 0; + sigaltstack_lock(); if (ss_mode == SS_DISABLE) { ss_size = 0; From f3a8076eb28cae1553958c629aecec479394bbe2 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Sat, 4 Dec 2021 18:59:08 +0800 Subject: [PATCH 070/251] drm/amdgpu: correct register access for RLC_JUMP_TABLE_RESTORE should count on GC IP base address Signed-off-by: Le Ma Signed-off-by: Hawking Zhang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b305fd39874f..edb3e3b08eed 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3070,8 +3070,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GDS | AMD_PG_SUPPORT_RLC_SMU_HS)) { - WREG32(mmRLC_JUMP_TABLE_RESTORE, - adev->gfx.rlc.cp_table_gpu_addr >> 8); + WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); gfx_v9_0_init_gfx_power_gating(adev); } } From 841933d5b8aa853abe68e63827f68f50fab37226 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sat, 4 Dec 2021 19:22:12 +0800 Subject: [PATCH 071/251] drm/amdgpu: don't override default ECO_BITs setting Leave this bit as hardware default setting Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 1 - drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 1 - drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 2 -- 8 files changed, 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 480e41847d7c..ec4d5e15b766 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -162,7 +162,6 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 14c1c1a297dd..6e0ace2fbfab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -196,7 +196,6 @@ static void gfxhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index e80d1dc43079..b4eddf6e98a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -197,7 +197,6 @@ static void gfxhub_v2_1_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index a99953833820..b3bede1dc41d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -145,7 +145,6 @@ static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index f80a14a1b82d..f5f7181f9af5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -165,7 +165,6 @@ static void mmhub_v1_7_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 25f8e93e5ec3..3718ff610ab2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -267,7 +267,6 @@ static void mmhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index a11d60ec6321..9e16da28505a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -194,7 +194,6 @@ static void mmhub_v2_3_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index c4ef822bbe8c..ff49eeaf7882 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -189,8 +189,6 @@ static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, - ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, From dcd10d879a9d1d4e929d374c2f24aba8fac3252b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 9 Dec 2021 12:13:53 -0600 Subject: [PATCH 072/251] drm/amd/pm: fix reading SMU FW version from amdgpu_firmware_info on YC This value does not get cached into adev->pm.fw_version during startup for smu13 like it does for other SMU like smu12. Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 35145db6eedf..19a5d2c39c8d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -198,6 +198,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu) int smu_v13_0_check_fw_version(struct smu_context *smu) { + struct amdgpu_device *adev = smu->adev; uint32_t if_version = 0xff, smu_version = 0xff; uint16_t smu_major; uint8_t smu_minor, smu_debug; @@ -210,6 +211,8 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) smu_major = (smu_version >> 16) & 0xffff; smu_minor = (smu_version >> 8) & 0xff; smu_debug = (smu_version >> 0) & 0xff; + if (smu->is_apu) + adev->pm.fw_version = smu_version; switch (smu->adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 2): From 7e4d2f30df3fb48f75ce9e96867d42bdddab83ac Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 10 Dec 2021 15:03:59 -0800 Subject: [PATCH 073/251] drm/amd/display: Set exit_optimized_pwr_state for DCN31 [Why] SMU now respects the PHY refclk disable request from driver. This causes a hang during hotplug when PHY refclk was disabled because it's not being re-enabled and the transmitter control starts on dc_link_detect. [How] We normally would re-enable the clk with exit_optimized_pwr_state but this is only set on DCN21 and DCN301. Set it for dcn31 as well. This fixes DMCUB timeouts in the PHY. Fixes: 64b1d0e8d500 ("drm/amd/display: Add DCN3.1 HWSEQ") Reviewed-by: Eric Yang Acked-by: Pavle Kotarac Tested-by: Daniel Wheeler Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index 05335a8c3c2d..4f6e639e9353 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -101,6 +101,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .z10_restore = dcn31_z10_restore, .z10_save_init = dcn31_z10_save_init, .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn20_update_visual_confirm_color, }; From 791255ca9fbe38042cfd55df5deb116dc11fef18 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 10 Dec 2021 15:04:05 -0800 Subject: [PATCH 074/251] drm/amd/display: Reset DMCUB before HW init [Why] If the firmware wasn't reset by PSP or HW and is currently running then the firmware will hang or perform underfined behavior when we modify its firmware state underneath it. [How] Reset DMCUB before setting up cache windows and performing HW init. Reviewed-by: Aurabindo Jayamohanan Pillai Acked-by: Pavle Kotarac Tested-by: Daniel Wheeler Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 122dae1a1813..e727f1dd2a9a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1051,6 +1051,11 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) return 0; } + /* Reset DMCUB if it was previously running - before we overwrite its memory. */ + status = dmub_srv_hw_reset(dmub_srv); + if (status != DMUB_STATUS_OK) + DRM_WARN("Error resetting DMUB HW: %d\n", status); + hdr = (const struct dmcub_firmware_header_v1_0 *)dmub_fw->data; fw_inst_const = dmub_fw->data + From 17c65d6fca844ee72a651944d8ce721e9040bf70 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 13 Dec 2021 14:38:38 +0800 Subject: [PATCH 075/251] drm/amdgpu: correct the wrong cached state for GMC on PICASSO Pair the operations did in GMC ->hw_init and ->hw_fini. That can help to maintain correct cached state for GMC and avoid unintention gate operation dropping due to wrong cached state. BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1828 Signed-off-by: Evan Quan Acked-by: Guchun Chen Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 8 ++++---- drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c | 7 ++++++- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index cb82404df534..d84523cf5f75 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1808,6 +1808,14 @@ static int gmc_v9_0_hw_fini(void *handle) return 0; } + /* + * Pair the operations did in gmc_v9_0_hw_init and thus maintain + * a correct cached state for GMC. Otherwise, the "gate" again + * operation on S3 resuming will fail due to wrong cached state. + */ + if (adev->mmhub.funcs->update_power_gating) + adev->mmhub.funcs->update_power_gating(adev, false); + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index b3bede1dc41d..1da2ec692057 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -301,10 +301,10 @@ static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev)) return; - if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) { - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true); - - } + if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB) + amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GMC, + enable); } static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 8d796ed3b7d1..619f8d305292 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1328,7 +1328,12 @@ static int pp_set_powergating_by_smu(void *handle, pp_dpm_powergate_vce(handle, gate); break; case AMD_IP_BLOCK_TYPE_GMC: - pp_dpm_powergate_mmhub(handle); + /* + * For now, this is only used on PICASSO. + * And only "gate" operation is supported. + */ + if (gate) + pp_dpm_powergate_mmhub(handle); break; case AMD_IP_BLOCK_TYPE_GFX: ret = pp_dpm_powergate_gfx(handle, gate); From aa464957f7e660abd554f2546a588f6533720e21 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Tue, 14 Dec 2021 15:25:54 +0800 Subject: [PATCH 076/251] drm/amd/pm: fix a potential gpu_metrics_table memory leak Memory is allocated for gpu_metrics_table in renoir_init_smc_tables(), but not freed in int smu_v12_0_fini_smc_tables(). Free it! Fixes: 95868b85764a ("drm/amd/powerplay: add Renoir support for gpu metrics export") Signed-off-by: Lang Yu Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index d60b8c5e8715..43028f2cd28b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -191,6 +191,9 @@ int smu_v12_0_fini_smc_tables(struct smu_context *smu) kfree(smu_table->watermarks_table); smu_table->watermarks_table = NULL; + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + return 0; } From 4ad8181426df92976feee5fbc55236293d069b37 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 9 Dec 2021 22:06:55 +0800 Subject: [PATCH 077/251] RDMA/hns: Fix RNR retransmission issue for HIP08 Due to the discrete nature of the HIP08 timer unit, a requester might finish the timeout period sooner, in elapsed real time, than its responder does, even when both sides share the identical RNR timeout length included in the RNR Nak packet and the responder indeed starts the timing prior to the requester. Furthermore, if a 'providential' resend packet arrived before the responder's timeout period expired, the responder is certainly entitled to drop the packet silently in the light of IB protocol. To address this problem, our team made good use of certain hardware facts: 1) The timing resolution regards the transmission arrangements is 1 microsecond, e.g. if cq_period field is set to 3, it would be interpreted as 3 microsecond by hardware 2) A QPC field shall inform the hardware how many timing unit (ticks) constitutes a full microsecond, which, by default, is 1000 3) It takes 14ns for the processor to handle a packet in the buffer, so the RNR timeout length of 10ns would ensure our processing mechanism is disabled during the entire timeout period and the packet won't be dropped silently To achieve (3), we permanently set the QPC field mentioned in (2) to zero which nominally indicates every time tick is equivalent to a microsecond in wall-clock time; now, a RNR timeout period at face value of 10 would only last 10 ticks, which is 10ns in wall-clock time. It's worth noting that we adapt the driver by magnifying certain configuration parameters(cq_period, eq_period and ack_timeout)by 1000 given the user assumes the configuring timing unit to be microseconds. Also, this particular improvisation is only deployed on HIP08 since other hardware has already solved this issue. Fixes: cfc85f3e4b7f ("RDMA/hns: Add profile support for hip08 driver") Link: https://lore.kernel.org/r/20211209140655.49493-1-liangwenpeng@huawei.com Signed-off-by: Yangyang Li Signed-off-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 64 +++++++++++++++++++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 8 +++ 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index bbfa1332dedc..eb0defa80d0d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1594,11 +1594,17 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc; struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data; + u32 clock_cycles_of_1us; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GLOBAL_PARAM, false); - hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, 0x3e8); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + clock_cycles_of_1us = HNS_ROCE_1NS_CFG; + else + clock_cycles_of_1us = HNS_ROCE_1US_CFG; + + hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, clock_cycles_of_1us); hr_reg_write(req, CFG_GLOBAL_PARAM_UDP_PORT, ROCE_V2_UDP_DPORT); return hns_roce_cmq_send(hr_dev, &desc, 1); @@ -4802,6 +4808,30 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, return ret; } +static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout) +{ +#define QP_ACK_TIMEOUT_MAX_HIP08 20 +#define QP_ACK_TIMEOUT_OFFSET 10 +#define QP_ACK_TIMEOUT_MAX 31 + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + if (*timeout > QP_ACK_TIMEOUT_MAX_HIP08) { + ibdev_warn(&hr_dev->ib_dev, + "Local ACK timeout shall be 0 to 20.\n"); + return false; + } + *timeout += QP_ACK_TIMEOUT_OFFSET; + } else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) { + if (*timeout > QP_ACK_TIMEOUT_MAX) { + ibdev_warn(&hr_dev->ib_dev, + "Local ACK timeout shall be 0 to 31.\n"); + return false; + } + } + + return true; +} + static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -4811,6 +4841,7 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); int ret = 0; + u8 timeout; if (attr_mask & IB_QP_AV) { ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context, @@ -4820,12 +4851,10 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, } if (attr_mask & IB_QP_TIMEOUT) { - if (attr->timeout < 31) { - hr_reg_write(context, QPC_AT, attr->timeout); + timeout = attr->timeout; + if (check_qp_timeout_cfg_range(hr_dev, &timeout)) { + hr_reg_write(context, QPC_AT, timeout); hr_reg_clear(qpc_mask, QPC_AT); - } else { - ibdev_warn(&hr_dev->ib_dev, - "Local ACK timeout shall be 0 to 30.\n"); } } @@ -4882,7 +4911,9 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); if (attr_mask & IB_QP_MIN_RNR_TIMER) { - hr_reg_write(context, QPC_MIN_RNR_TIME, attr->min_rnr_timer); + hr_reg_write(context, QPC_MIN_RNR_TIME, + hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ? + HNS_ROCE_RNR_TIMER_10NS : attr->min_rnr_timer); hr_reg_clear(qpc_mask, QPC_MIN_RNR_TIME); } @@ -5499,6 +5530,16 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) hr_reg_write(cq_context, CQC_CQ_MAX_CNT, cq_count); hr_reg_clear(cqc_mask, CQC_CQ_MAX_CNT); + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + if (cq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) { + dev_info(hr_dev->dev, + "cq_period(%u) reached the upper limit, adjusted to 65.\n", + cq_period); + cq_period = HNS_ROCE_MAX_CQ_PERIOD; + } + cq_period *= HNS_ROCE_CLOCK_ADJUST; + } hr_reg_write(cq_context, CQC_CQ_PERIOD, cq_period); hr_reg_clear(cqc_mask, CQC_CQ_PERIOD); @@ -5894,6 +5935,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, hr_reg_write(eqc, EQC_EQ_PROD_INDX, HNS_ROCE_EQ_INIT_PROD_IDX); hr_reg_write(eqc, EQC_EQ_MAX_CNT, eq->eq_max_cnt); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + if (eq->eq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) { + dev_info(hr_dev->dev, "eq_period(%u) reached the upper limit, adjusted to 65.\n", + eq->eq_period); + eq->eq_period = HNS_ROCE_MAX_EQ_PERIOD; + } + eq->eq_period *= HNS_ROCE_CLOCK_ADJUST; + } + hr_reg_write(eqc, EQC_EQ_PERIOD, eq->eq_period); hr_reg_write(eqc, EQC_EQE_REPORT_TIMER, HNS_ROCE_EQ_INIT_REPORT_TIMER); hr_reg_write(eqc, EQC_EQE_BA_L, bt_ba >> 3); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 4d904d5e82be..35c61da7ba15 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1444,6 +1444,14 @@ struct hns_roce_dip { struct list_head node; /* all dips are on a list */ }; +/* only for RNR timeout issue of HIP08 */ +#define HNS_ROCE_CLOCK_ADJUST 1000 +#define HNS_ROCE_MAX_CQ_PERIOD 65 +#define HNS_ROCE_MAX_EQ_PERIOD 65 +#define HNS_ROCE_RNR_TIMER_10NS 1 +#define HNS_ROCE_1US_CFG 999 +#define HNS_ROCE_1NS_CFG 0 + #define HNS_ROCE_AEQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_AEQ_DEFAULT_INTERVAL 0x0 #define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x0 From bee90911e0138c76ee67458ac0d58b38a3190f65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Wed, 8 Dec 2021 18:52:38 +0100 Subject: [PATCH 078/251] IB/qib: Fix memory leak in qib_user_sdma_queue_pkts() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wrong goto label was used for the error case and missed cleanup of the pkt allocation. Fixes: d39bf40e55e6 ("IB/qib: Protect from buffer overflow in struct qib_user_sdma_pkt fields") Link: https://lore.kernel.org/r/20211208175238.29983-1-jose.exposito89@gmail.com Addresses-Coverity-ID: 1493352 ("Resource leak") Signed-off-by: José Expósito Acked-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_user_sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index ac11943a5ddb..bf2f30d67949 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -941,7 +941,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, &addrlimit) || addrlimit > type_max(typeof(pkt->addrlimit))) { ret = -EINVAL; - goto free_pbc; + goto free_pkt; } pkt->addrlimit = addrlimit; From 12d3bbdd6bd2780b71cc466f3fbc6eb7d43bbc2a Mon Sep 17 00:00:00 2001 From: Jiacheng Shi Date: Fri, 10 Dec 2021 01:42:34 -0800 Subject: [PATCH 079/251] RDMA/hns: Replace kfree() with kvfree() Variables allocated by kvmalloc_array() should not be freed by kfree. Because they may be allocated by vmalloc. So we replace kfree() with kvfree() here. Fixes: 6fd610c5733d ("RDMA/hns: Support 0 hop addressing for SRQ buffer") Link: https://lore.kernel.org/r/20211210094234.5829-1-billsjc@sjtu.edu.cn Signed-off-by: Jiacheng Shi Acked-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_srq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 6eee9deadd12..e64ef6903fb4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -259,7 +259,7 @@ static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) static void free_srq_wrid(struct hns_roce_srq *srq) { - kfree(srq->wrid); + kvfree(srq->wrid); srq->wrid = NULL; } From cb2ac2912a9ca7d3d26291c511939a41361d2d83 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Dec 2021 07:03:24 -0700 Subject: [PATCH 080/251] block: reduce kblockd_mod_delayed_work_on() CPU consumption Dexuan reports that he's seeing spikes of very heavy CPU utilization when running 24 disks and using the 'none' scheduler. This happens off the sched restart path, because SCSI requires the queue to be restarted async, and hence we're hammering on mod_delayed_work_on() to ensure that the work item gets run appropriately. Avoid hammering on the timer and just use queue_work_on() if no delay has been specified. Reported-and-tested-by: Dexuan Cui Link: https://lore.kernel.org/linux-block/BYAPR21MB1270C598ED214C0490F47400BF719@BYAPR21MB1270.namprd21.prod.outlook.com/ Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 1378d084c770..c1833f95cb97 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1484,6 +1484,8 @@ EXPORT_SYMBOL(kblockd_schedule_work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { + if (!delay) + return queue_work_on(cpu, kblockd_workqueue, &dwork->work); return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay); } EXPORT_SYMBOL(kblockd_mod_delayed_work_on); From aa97f6cdb7e92909e17c8ca63e622fcb81d57a57 Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Fri, 12 Nov 2021 13:36:29 +0800 Subject: [PATCH 081/251] bcache: fix NULL pointer reference in cached_dev_detach_finish Commit 0259d4498ba4 ("bcache: move calc_cached_dev_sectors to proper place on backing device detach") tries to fix calc_cached_dev_sectors when bcache device detaches, but now we have: cached_dev_detach_finish ... bcache_device_detach(&dc->disk); ... closure_put(&d->c->caching); d->c = NULL; [*explicitly set dc->disk.c to NULL*] list_move(&dc->list, &uncached_devices); calc_cached_dev_sectors(dc->disk.c); [*passing a NULL pointer*] ... Upper codeflows shows how bug happens, this patch fix the problem by caching dc->disk.c beforehand, and cache_set won't be freed under us because c->caching closure at least holds a reference count and closure callback __cache_set_unregister only being called by bch_cache_set_stop which using closure_queue(&c->caching), that means c->caching closure callback for destroying cache_set won't be trigger by previous closure_put(&d->c->caching). So at this stage(while cached_dev_detach_finish is calling) it's safe to access cache_set dc->disk.c. Fixes: 0259d4498ba4 ("bcache: move calc_cached_dev_sectors to proper place on backing device detach") Signed-off-by: Lin Feng Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211112053629.3437-2-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 86b9e355c583..140f35dc0c45 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1139,6 +1139,7 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc) static void cached_dev_detach_finish(struct work_struct *w) { struct cached_dev *dc = container_of(w, struct cached_dev, detach); + struct cache_set *c = dc->disk.c; BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)); BUG_ON(refcount_read(&dc->count)); @@ -1156,7 +1157,7 @@ static void cached_dev_detach_finish(struct work_struct *w) bcache_device_detach(&dc->disk); list_move(&dc->list, &uncached_devices); - calc_cached_dev_sectors(dc->disk.c); + calc_cached_dev_sectors(c); clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); From f7ac570d0f026cf5475d4cc4d8040bd947980b3a Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Wed, 15 Dec 2021 00:41:54 +0800 Subject: [PATCH 082/251] ALSA: hda/realtek: fix mute/micmute LEDs for a HP ProBook There is a HP ProBook which using ALC236 codec and need the ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make mute LED and micmute LED work. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20211214164156.49711-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fc41f3e8ddc3..e59ff75eea75 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8706,6 +8706,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 58e138d62476fc5f889252dcf73848beeaa54789 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 13 Dec 2021 12:27:55 +0100 Subject: [PATCH 083/251] Revert "x86/boot: Mark prepare_command_line() __init" This reverts commit c0f2077baa4113f38f008b8e912b9fb3ff8d43df. Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211213112757.2612-2-bp@alien8.de --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6a190c7f4d71..c410be738ae7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -742,7 +742,7 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) return 0; } -static char * __init prepare_command_line(void) +static char *prepare_command_line(void) { #ifdef CONFIG_CMDLINE_BOOL #ifdef CONFIG_CMDLINE_OVERRIDE From fbe6183998546f8896ee0b620ece86deff5a2fd1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 13 Dec 2021 12:27:56 +0100 Subject: [PATCH 084/251] Revert "x86/boot: Pull up cmdline preparation and early param parsing" This reverts commit 8d48bf8206f77aa8687f0e241e901e5197e52423. It turned out to be a bad idea as it broke supplying mem= cmdline parameters due to parse_memopt() requiring preparatory work like setting up the e820 table in e820__memory_setup() in order to be able to exclude the range specified by mem=. Pulling that up would've broken Xen PV again, see threads at https://lkml.kernel.org/r/20210920120421.29276-1-jgross@suse.com due to xen_memory_setup() needing the first reservations in early_reserve_memory() - kernel and initrd - to have happened already. This could be fixed again by having Xen do those reservations itself... Long story short, revert this and do a simpler fix in a later patch. Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211213112757.2612-3-bp@alien8.de --- arch/x86/kernel/setup.c | 66 +++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c410be738ae7..49b596db5631 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -742,28 +742,6 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) return 0; } -static char *prepare_command_line(void) -{ -#ifdef CONFIG_CMDLINE_BOOL -#ifdef CONFIG_CMDLINE_OVERRIDE - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); -#else - if (builtin_cmdline[0]) { - /* append boot loader cmdline to builtin */ - strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); - strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); - } -#endif -#endif - - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); - - parse_early_param(); - - return command_line; -} - /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -852,23 +830,6 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); - /* - * x86_configure_nx() is called before parse_early_param() (called by - * prepare_command_line()) to detect whether hardware doesn't support - * NX (so that the early EHCI debug console setup can safely call - * set_fixmap()). It may then be called again from within noexec_setup() - * during parsing early parameters to honor the respective command line - * option. - */ - x86_configure_nx(); - - /* - * This parses early params and it needs to run before - * early_reserve_memory() because latter relies on such settings - * supplied as early params. - */ - *cmdline_p = prepare_command_line(); - /* * Do some memory reservations *before* memory is added to memblock, so * memblock allocations won't overwrite it. @@ -902,6 +863,33 @@ void __init setup_arch(char **cmdline_p) bss_resource.start = __pa_symbol(__bss_start); bss_resource.end = __pa_symbol(__bss_stop)-1; +#ifdef CONFIG_CMDLINE_BOOL +#ifdef CONFIG_CMDLINE_OVERRIDE + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); +#else + if (builtin_cmdline[0]) { + /* append boot loader cmdline to builtin */ + strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); + strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + } +#endif +#endif + + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + + /* + * x86_configure_nx() is called before parse_early_param() to detect + * whether hardware doesn't support NX (so that the early EHCI debug + * console setup can safely call set_fixmap()). It may then be called + * again from within noexec_setup() during parsing early parameters + * to honor the respective command line option. + */ + x86_configure_nx(); + + parse_early_param(); + #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux From 04e57a2d952bbd34bc45744e72be3eecdc344294 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 14 Dec 2021 10:45:26 +0100 Subject: [PATCH 085/251] tomoyo: Check exceeded quota early in tomoyo_domain_quota_is_ok(). If tomoyo is used in a testing/fuzzing environment in learning mode, for lots of domains the quota will be exceeded and stay exceeded for prolonged periods of time. In such cases it's pointless (and slow) to walk the whole acl list again and again just to rediscover that the quota is exceeded. We already have the TOMOYO_DIF_QUOTA_WARNED flag that notes the overflow condition. Check it early to avoid the slowdown. [penguin-kernel] This patch causes a user visible change that the learning mode will not be automatically resumed after the quota is increased. To resume the learning mode, administrator will need to explicitly clear TOMOYO_DIF_QUOTA_WARNED flag after increasing the quota. But I think that this change is generally preferable, for administrator likely wants to optimize the acl list for that domain before increasing the quota, or that domain likely hits the quota again. Therefore, don't try to care to clear TOMOYO_DIF_QUOTA_WARNED flag automatically when the quota for that domain changed. Signed-off-by: Dmitry Vyukov Signed-off-by: Tetsuo Handa --- security/tomoyo/util.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c index 1da2e3722b12..af8cd2af3466 100644 --- a/security/tomoyo/util.c +++ b/security/tomoyo/util.c @@ -1051,6 +1051,8 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) return false; if (!domain) return true; + if (READ_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED])) + return false; list_for_each_entry_rcu(ptr, &domain->acl_info_list, list, srcu_read_lock_held(&tomoyo_ss)) { u16 perm; @@ -1096,14 +1098,12 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) if (count < tomoyo_profile(domain->ns, domain->profile)-> pref[TOMOYO_PREF_MAX_LEARNING_ENTRY]) return true; - if (!domain->flags[TOMOYO_DIF_QUOTA_WARNED]) { - domain->flags[TOMOYO_DIF_QUOTA_WARNED] = true; - /* r->granted = false; */ - tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]); + WRITE_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED], true); + /* r->granted = false; */ + tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]); #ifndef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING - pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n", - domain->domainname->name); + pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n", + domain->domainname->name); #endif - } return false; } From f702e1107601230eec707739038a89018ea3468d Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 15 Dec 2021 20:13:55 +0900 Subject: [PATCH 086/251] tomoyo: use hwight16() in tomoyo_domain_quota_is_ok() hwight16() is much faster. While we are at it, no need to include "perm =" part into data_race() macro, for perm is a local variable that cannot be accessed by other threads. Signed-off-by: Tetsuo Handa --- security/tomoyo/util.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c index af8cd2af3466..6799b1122c9d 100644 --- a/security/tomoyo/util.c +++ b/security/tomoyo/util.c @@ -1056,7 +1056,6 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) list_for_each_entry_rcu(ptr, &domain->acl_info_list, list, srcu_read_lock_held(&tomoyo_ss)) { u16 perm; - u8 i; if (ptr->is_deleted) continue; @@ -1067,23 +1066,23 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) */ switch (ptr->type) { case TOMOYO_TYPE_PATH_ACL: - data_race(perm = container_of(ptr, struct tomoyo_path_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_path_acl, head)->perm); break; case TOMOYO_TYPE_PATH2_ACL: - data_race(perm = container_of(ptr, struct tomoyo_path2_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_path2_acl, head)->perm); break; case TOMOYO_TYPE_PATH_NUMBER_ACL: - data_race(perm = container_of(ptr, struct tomoyo_path_number_acl, head) + perm = data_race(container_of(ptr, struct tomoyo_path_number_acl, head) ->perm); break; case TOMOYO_TYPE_MKDEV_ACL: - data_race(perm = container_of(ptr, struct tomoyo_mkdev_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_mkdev_acl, head)->perm); break; case TOMOYO_TYPE_INET_ACL: - data_race(perm = container_of(ptr, struct tomoyo_inet_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_inet_acl, head)->perm); break; case TOMOYO_TYPE_UNIX_ACL: - data_race(perm = container_of(ptr, struct tomoyo_unix_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_unix_acl, head)->perm); break; case TOMOYO_TYPE_MANUAL_TASK_ACL: perm = 0; @@ -1091,9 +1090,7 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) default: perm = 1; } - for (i = 0; i < 16; i++) - if (perm & (1 << i)) - count++; + count += hweight16(perm); } if (count < tomoyo_profile(domain->ns, domain->profile)-> pref[TOMOYO_PREF_MAX_LEARNING_ENTRY]) From 2f5b3514c33fecad4003ce0f22ca9691492d310b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 13 Dec 2021 12:27:57 +0100 Subject: [PATCH 087/251] x86/boot: Move EFI range reservation after cmdline parsing The memory reservation in arch/x86/platform/efi/efi.c depends on at least two command line parameters. Put it back later in the boot process and move efi_memblock_x86_reserve_range() out of early_memory_reserve(). An attempt to fix this was done in 8d48bf8206f7 ("x86/boot: Pull up cmdline preparation and early param parsing") but that caused other troubles so it got reverted. The bug this is addressing is: Dan reports that Anjaneya Chagam can no longer use the efi=nosoftreserve kernel command line parameter to suppress "soft reservation" behavior. This is due to the fact that the following call-chain happens at boot: early_reserve_memory |-> efi_memblock_x86_reserve_range |-> efi_fake_memmap_early which does if (!efi_soft_reserve_enabled()) return; and that would have set EFI_MEM_NO_SOFT_RESERVE after having parsed "nosoftreserve". However, parse_early_param() gets called *after* it, leading to the boot cmdline not being taken into account. See also https://lore.kernel.org/r/e8dd8993c38702ee6dd73b3c11f158617e665607.camel@intel.com [ bp: Turn into a proper patch. ] Signed-off-by: Mike Rapoport Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211213112757.2612-4-bp@alien8.de --- arch/x86/kernel/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 49b596db5631..e04f5e6eb33f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -713,9 +713,6 @@ static void __init early_reserve_memory(void) early_reserve_initrd(); - if (efi_enabled(EFI_BOOT)) - efi_memblock_x86_reserve_range(); - memblock_x86_reserve_range_setup_data(); reserve_ibft_region(); @@ -890,6 +887,9 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); + if (efi_enabled(EFI_BOOT)) + efi_memblock_x86_reserve_range(); + #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux From 651740a502411793327e2f0741104749c4eedcd1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 13 Dec 2021 14:22:33 -0500 Subject: [PATCH 088/251] btrfs: check WRITE_ERR when trying to read an extent buffer Filipe reported a hang when we have errors on btrfs. This turned out to be a side-effect of my fix c2e39305299f01 ("btrfs: clear extent buffer uptodate when we fail to write it") which made it so we clear EXTENT_BUFFER_UPTODATE on an eb when we fail to write it out. Below is a paste of Filipe's analysis he got from using drgn to debug the hang """ btree readahead code calls read_extent_buffer_pages(), sets ->io_pages to a value while writeback of all pages has not yet completed: --> writeback for the first 3 pages finishes, we clear EXTENT_BUFFER_UPTODATE from eb on the first page when we get an error. --> at this point eb->io_pages is 1 and we cleared Uptodate bit from the first 3 pages --> read_extent_buffer_pages() does not see EXTENT_BUFFER_UPTODATE() so it continues, it's able to lock the pages since we obviously don't hold the pages locked during writeback --> read_extent_buffer_pages() then computes 'num_reads' as 3, and sets eb->io_pages to 3, since only the first page does not have Uptodate bit set at this point --> writeback for the remaining page completes, we ended decrementing eb->io_pages by 1, resulting in eb->io_pages == 2, and therefore never calling end_extent_buffer_writeback(), so EXTENT_BUFFER_WRITEBACK remains in the eb's flags --> of course, when the read bio completes, it doesn't and shouldn't call end_extent_buffer_writeback() --> we should clear EXTENT_BUFFER_UPTODATE only after all pages of the eb finished writeback? or maybe make the read pages code wait for writeback of all pages of the eb to complete before checking which pages need to be read, touch ->io_pages, submit read bio, etc writeback bit never cleared means we can hang when aborting a transaction, at: btrfs_cleanup_one_transaction() btrfs_destroy_marked_extents() wait_on_extent_buffer_writeback() """ This is a problem because our writes are not synchronized with reads in any way. We clear the UPTODATE flag and then we can easily come in and try to read the EB while we're still waiting on other bio's to complete. We have two options here, we could lock all the pages, and then check to see if eb->io_pages != 0 to know if we've already got an outstanding write on the eb. Or we can simply check to see if we have WRITE_ERR set on this extent buffer. We set this bit _before_ we clear UPTODATE, so if the read gets triggered because we aren't UPTODATE because of a write error we're guaranteed to have WRITE_ERR set, and in this case we can simply return -EIO. This will fix the reported hang. Reported-by: Filipe Manana Fixes: c2e39305299f01 ("btrfs: clear extent buffer uptodate when we fail to write it") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3258b6f01e85..9234d96a7fd5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -6611,6 +6611,14 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; + /* + * We could have had EXTENT_BUFFER_UPTODATE cleared by the write + * operation, which could potentially still be in flight. In this case + * we simply want to return an error. + */ + if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))) + return -EIO; + if (eb->fs_info->sectorsize < PAGE_SIZE) return read_extent_buffer_subpage(eb, wait, mirror_num); From 7a1636089acfee7562fe79aff7d1b4c57869896d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 13 Dec 2021 08:45:12 +0000 Subject: [PATCH 089/251] btrfs: fix invalid delayed ref after subvolume creation failure When creating a subvolume, at ioctl.c:create_subvol(), if we fail to insert the new root's root item into the root tree, we are freeing the metadata extent we reserved for the new root to prevent a metadata extent leak, as we don't abort the transaction at that point (since there is nothing at that point that is irreversible). However we allocated the metadata extent for the new root which we are creating for the new subvolume, so its delayed reference refers to the ID of this new root. But when we free the metadata extent we pass the root of the subvolume where the new subvolume is located to btrfs_free_tree_block() - this is incorrect because this will generate a delayed reference that refers to the ID of the parent subvolume's root, and not to ID of the new root. This results in a failure when running delayed references that leads to a transaction abort and a trace like the following: [3868.738042] RIP: 0010:__btrfs_free_extent+0x709/0x950 [btrfs] [3868.739857] Code: 68 0f 85 e6 fb ff (...) [3868.742963] RSP: 0018:ffffb0e9045cf910 EFLAGS: 00010246 [3868.743908] RAX: 00000000fffffffe RBX: 00000000fffffffe RCX: 0000000000000002 [3868.745312] RDX: 00000000fffffffe RSI: 0000000000000002 RDI: ffff90b0cd793b88 [3868.746643] RBP: 000000000e5d8000 R08: 0000000000000000 R09: ffff90b0cd793b88 [3868.747979] R10: 0000000000000002 R11: 00014ded97944d68 R12: 0000000000000000 [3868.749373] R13: ffff90b09afe4a28 R14: 0000000000000000 R15: ffff90b0cd793b88 [3868.750725] FS: 00007f281c4a8b80(0000) GS:ffff90b3ada00000(0000) knlGS:0000000000000000 [3868.752275] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [3868.753515] CR2: 00007f281c6a5000 CR3: 0000000108a42006 CR4: 0000000000370ee0 [3868.754869] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [3868.756228] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [3868.757803] Call Trace: [3868.758281] [3868.758655] ? btrfs_merge_delayed_refs+0x178/0x1c0 [btrfs] [3868.759827] __btrfs_run_delayed_refs+0x2b1/0x1250 [btrfs] [3868.761047] btrfs_run_delayed_refs+0x86/0x210 [btrfs] [3868.762069] ? lock_acquired+0x19f/0x420 [3868.762829] btrfs_commit_transaction+0x69/0xb20 [btrfs] [3868.763860] ? _raw_spin_unlock+0x29/0x40 [3868.764614] ? btrfs_block_rsv_release+0x1c2/0x1e0 [btrfs] [3868.765870] create_subvol+0x1d8/0x9a0 [btrfs] [3868.766766] btrfs_mksubvol+0x447/0x4c0 [btrfs] [3868.767669] ? preempt_count_add+0x49/0xa0 [3868.768444] __btrfs_ioctl_snap_create+0x123/0x190 [btrfs] [3868.769639] ? _copy_from_user+0x66/0xa0 [3868.770391] btrfs_ioctl_snap_create_v2+0xbb/0x140 [btrfs] [3868.771495] btrfs_ioctl+0xd1e/0x35c0 [btrfs] [3868.772364] ? __slab_free+0x10a/0x360 [3868.773198] ? rcu_read_lock_sched_held+0x12/0x60 [3868.774121] ? lock_release+0x223/0x4a0 [3868.774863] ? lock_acquired+0x19f/0x420 [3868.775634] ? rcu_read_lock_sched_held+0x12/0x60 [3868.776530] ? trace_hardirqs_on+0x1b/0xe0 [3868.777373] ? _raw_spin_unlock_irqrestore+0x3e/0x60 [3868.778280] ? kmem_cache_free+0x321/0x3c0 [3868.779011] ? __x64_sys_ioctl+0x83/0xb0 [3868.779718] __x64_sys_ioctl+0x83/0xb0 [3868.780387] do_syscall_64+0x3b/0xc0 [3868.781059] entry_SYSCALL_64_after_hwframe+0x44/0xae [3868.781953] RIP: 0033:0x7f281c59e957 [3868.782585] Code: 3c 1c 48 f7 d8 4c (...) [3868.785867] RSP: 002b:00007ffe1f83e2b8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [3868.787198] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f281c59e957 [3868.788450] RDX: 00007ffe1f83e2c0 RSI: 0000000050009418 RDI: 0000000000000003 [3868.789748] RBP: 00007ffe1f83f300 R08: 0000000000000000 R09: 00007ffe1f83fe36 [3868.791214] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000003 [3868.792468] R13: 0000000000000003 R14: 00007ffe1f83e2c0 R15: 00000000000003cc [3868.793765] [3868.794037] irq event stamp: 0 [3868.794548] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [3868.795670] hardirqs last disabled at (0): [] copy_process+0x934/0x2040 [3868.797086] softirqs last enabled at (0): [] copy_process+0x934/0x2040 [3868.798309] softirqs last disabled at (0): [<0000000000000000>] 0x0 [3868.799284] ---[ end trace be24c7002fe27747 ]--- [3868.799928] BTRFS info (device dm-0): leaf 241188864 gen 1268 total ptrs 214 free space 469 owner 2 [3868.801133] BTRFS info (device dm-0): refs 2 lock_owner 225627 current 225627 [3868.802056] item 0 key (237436928 169 0) itemoff 16250 itemsize 33 [3868.802863] extent refs 1 gen 1265 flags 2 [3868.803447] ref#0: tree block backref root 1610 (...) [3869.064354] item 114 key (241008640 169 0) itemoff 12488 itemsize 33 [3869.065421] extent refs 1 gen 1268 flags 2 [3869.066115] ref#0: tree block backref root 1689 (...) [3869.403834] BTRFS error (device dm-0): unable to find ref byte nr 241008640 parent 0 root 1622 owner 0 offset 0 [3869.405641] BTRFS: error (device dm-0) in __btrfs_free_extent:3076: errno=-2 No such entry [3869.407138] BTRFS: error (device dm-0) in btrfs_run_delayed_refs:2159: errno=-2 No such entry Fix this by passing the new subvolume's root ID to btrfs_free_tree_block(). This requires changing the root argument of btrfs_free_tree_block() from struct btrfs_root * to a u64, since at this point during the subvolume creation we have not yet created the struct btrfs_root for the new subvolume, and btrfs_free_tree_block() only needs a root ID and nothing else from a struct btrfs_root. This was triggered by test case generic/475 from fstests. Fixes: 67addf29004c5b ("btrfs: fix metadata extent leak after failure to create subvolume") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 17 +++++++++-------- fs/btrfs/ctree.h | 7 ++++++- fs/btrfs/extent-tree.c | 13 +++++++------ fs/btrfs/free-space-tree.c | 4 ++-- fs/btrfs/ioctl.c | 9 +++++---- fs/btrfs/qgroup.c | 3 ++- 6 files changed, 31 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 74c8e18f3720..64599625c7d7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -462,8 +462,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, BUG_ON(ret < 0); rcu_assign_pointer(root->node, cow); - btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref); + btrfs_free_tree_block(trans, btrfs_root_id(root), buf, + parent_start, last_ref); free_extent_buffer(buf); add_root_to_dirty_list(root); } else { @@ -484,8 +484,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, return ret; } } - btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref); + btrfs_free_tree_block(trans, btrfs_root_id(root), buf, + parent_start, last_ref); } if (unlock_orig) btrfs_tree_unlock(buf); @@ -926,7 +926,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, free_extent_buffer(mid); root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); /* once for the root ptr */ free_extent_buffer_stale(mid); return 0; @@ -985,7 +985,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_unlock(right); del_ptr(root, path, level + 1, pslot + 1); root_sub_used(root, right->len); - btrfs_free_tree_block(trans, root, right, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), right, + 0, 1); free_extent_buffer_stale(right); right = NULL; } else { @@ -1030,7 +1031,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_unlock(mid); del_ptr(root, path, level + 1, pslot); root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); free_extent_buffer_stale(mid); mid = NULL; } else { @@ -4031,7 +4032,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, root_sub_used(root, leaf->len); atomic_inc(&leaf->refs); - btrfs_free_tree_block(trans, root, leaf, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1); free_extent_buffer_stale(leaf); } /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7553e9dc5f93..5fe5eccb3c87 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2257,6 +2257,11 @@ static inline bool btrfs_root_dead(const struct btrfs_root *root) return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0; } +static inline u64 btrfs_root_id(const struct btrfs_root *root) +{ + return root->root_key.objectid; +} + /* struct btrfs_root_backup */ BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, tree_root, 64); @@ -2719,7 +2724,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, u64 empty_size, enum btrfs_lock_nesting nest); void btrfs_free_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + u64 root_id, struct extent_buffer *buf, u64 parent, int last_ref); int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fc4895e6a62c..25ef6e3fd306 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3275,20 +3275,20 @@ out_delayed_unlock: } void btrfs_free_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + u64 root_id, struct extent_buffer *buf, u64 parent, int last_ref) { - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_ref generic_ref = { 0 }; int ret; btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, buf->start, buf->len, parent); btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf), - root->root_key.objectid, 0, false); + root_id, 0, false); - if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { + if (root_id != BTRFS_TREE_LOG_OBJECTID) { btrfs_ref_tree_mod(fs_info, &generic_ref); ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL); BUG_ON(ret); /* -ENOMEM */ @@ -3298,7 +3298,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct btrfs_block_group *cache; bool must_pin = false; - if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { + if (root_id != BTRFS_TREE_LOG_OBJECTID) { ret = check_ref_cleanup(trans, buf->start); if (!ret) { btrfs_redirty_list_add(trans->transaction, buf); @@ -5472,7 +5472,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, goto owner_mismatch; } - btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent, + wc->refs[level] == 1); out: wc->refs[level] = 0; wc->flags[level] = 0; diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index a33bca94d133..3abec44c6255 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1256,8 +1256,8 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) btrfs_tree_lock(free_space_root->node); btrfs_clean_tree_block(free_space_root->node); btrfs_tree_unlock(free_space_root->node); - btrfs_free_tree_block(trans, free_space_root, free_space_root->node, - 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), + free_space_root->node, 0, 1); btrfs_put_root(free_space_root); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1b85d98df66b..a7533416370a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -617,11 +617,12 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, * Since we don't abort the transaction in this case, free the * tree block so that we don't leak space and leave the * filesystem in an inconsistent state (an extent item in the - * extent tree without backreferences). Also no need to have - * the tree block locked since it is not in any tree at this - * point, so no other task can find it and use it. + * extent tree with a backreference for a root that does not + * exists). Also no need to have the tree block locked since it + * is not in any tree at this point, so no other task can find + * it and use it. */ - btrfs_free_tree_block(trans, root, leaf, 0, 1); + btrfs_free_tree_block(trans, objectid, leaf, 0, 1); free_extent_buffer(leaf); goto fail; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index db680f5be745..6c037f1252b7 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1219,7 +1219,8 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) btrfs_tree_lock(quota_root->node); btrfs_clean_tree_block(quota_root->node); btrfs_tree_unlock(quota_root->node); - btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(quota_root), + quota_root->node, 0, 1); btrfs_put_root(quota_root); From 212a58fda9b9077e0efc20200a4feb76afacfd95 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 13 Dec 2021 08:45:13 +0000 Subject: [PATCH 090/251] btrfs: fix warning when freeing leaf after subvolume creation failure When creating a subvolume, at ioctl.c:create_subvol(), if we fail to insert the root item for the new subvolume into the root tree, we can trigger the following warning: [78961.741046] WARNING: CPU: 0 PID: 4079814 at fs/btrfs/extent-tree.c:3357 btrfs_free_tree_block+0x2af/0x310 [btrfs] [78961.743344] Modules linked in: [78961.749440] dm_snapshot dm_thin_pool (...) [78961.773648] CPU: 0 PID: 4079814 Comm: fsstress Not tainted 5.16.0-rc4-btrfs-next-108 #1 [78961.775198] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [78961.777266] RIP: 0010:btrfs_free_tree_block+0x2af/0x310 [btrfs] [78961.778398] Code: 17 00 48 85 (...) [78961.781067] RSP: 0018:ffffaa4001657b28 EFLAGS: 00010202 [78961.781877] RAX: 0000000000000213 RBX: ffff897f8a796910 RCX: 0000000000000000 [78961.782780] RDX: 0000000000000000 RSI: 0000000011004000 RDI: 00000000ffffffff [78961.783764] RBP: ffff8981f490e800 R08: 0000000000000001 R09: 0000000000000000 [78961.784740] R10: 0000000000000000 R11: 0000000000000001 R12: ffff897fc963fcc8 [78961.785665] R13: 0000000000000001 R14: ffff898063548000 R15: ffff898063548000 [78961.786620] FS: 00007f31283c6b80(0000) GS:ffff8982ace00000(0000) knlGS:0000000000000000 [78961.787717] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [78961.788598] CR2: 00007f31285c3000 CR3: 000000023fcc8003 CR4: 0000000000370ef0 [78961.789568] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [78961.790585] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [78961.791684] Call Trace: [78961.792082] [78961.792359] create_subvol+0x5d1/0x9a0 [btrfs] [78961.793054] btrfs_mksubvol+0x447/0x4c0 [btrfs] [78961.794009] ? preempt_count_add+0x49/0xa0 [78961.794705] __btrfs_ioctl_snap_create+0x123/0x190 [btrfs] [78961.795712] ? _copy_from_user+0x66/0xa0 [78961.796382] btrfs_ioctl_snap_create_v2+0xbb/0x140 [btrfs] [78961.797392] btrfs_ioctl+0xd1e/0x35c0 [btrfs] [78961.798172] ? __slab_free+0x10a/0x360 [78961.798820] ? rcu_read_lock_sched_held+0x12/0x60 [78961.799664] ? lock_release+0x223/0x4a0 [78961.800321] ? lock_acquired+0x19f/0x420 [78961.800992] ? rcu_read_lock_sched_held+0x12/0x60 [78961.801796] ? trace_hardirqs_on+0x1b/0xe0 [78961.802495] ? _raw_spin_unlock_irqrestore+0x3e/0x60 [78961.803358] ? kmem_cache_free+0x321/0x3c0 [78961.804071] ? __x64_sys_ioctl+0x83/0xb0 [78961.804711] __x64_sys_ioctl+0x83/0xb0 [78961.805348] do_syscall_64+0x3b/0xc0 [78961.805969] entry_SYSCALL_64_after_hwframe+0x44/0xae [78961.806830] RIP: 0033:0x7f31284bc957 [78961.807517] Code: 3c 1c 48 f7 d8 (...) This is because we are calling btrfs_free_tree_block() on an extent buffer that is dirty. Fix that by cleaning the extent buffer, with btrfs_clean_tree_block(), before freeing it. This was triggered by test case generic/475 from fstests. Fixes: 67addf29004c5b ("btrfs: fix metadata extent leak after failure to create subvolume") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a7533416370a..8a442b59eee0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -618,10 +618,11 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, * tree block so that we don't leak space and leave the * filesystem in an inconsistent state (an extent item in the * extent tree with a backreference for a root that does not - * exists). Also no need to have the tree block locked since it - * is not in any tree at this point, so no other task can find - * it and use it. + * exists). */ + btrfs_tree_lock(leaf); + btrfs_clean_tree_block(leaf); + btrfs_tree_unlock(leaf); btrfs_free_tree_block(trans, objectid, leaf, 0, 1); free_extent_buffer(leaf); goto fail; From 4989d4a0aed3fb30f5b48787a689d7090de6f86d Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Wed, 15 Dec 2021 19:38:43 +0900 Subject: [PATCH 091/251] btrfs: fix missing blkdev_put() call in btrfs_scan_one_device() The function btrfs_scan_one_device() calls blkdev_get_by_path() and blkdev_put() to get and release its target block device. However, when btrfs_sb_log_location_bdev() fails, blkdev_put() is not called and the block device is left without clean up. This triggered failure of fstests generic/085. Fix the failure path of btrfs_sb_log_location_bdev() to call blkdev_put(). Fixes: 12659251ca5df ("btrfs: implement log-structured superblock for ZONED mode") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Nikolay Borisov Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cc80f2a97a0b..b4da58fd0e1a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1370,8 +1370,10 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, bytenr_orig = btrfs_sb_offset(0); ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr); - if (ret) - return ERR_PTR(ret); + if (ret) { + device = ERR_PTR(ret); + goto error_bdev_put; + } disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig); if (IS_ERR(disk_super)) { From f08adf5add9a071160c68bb2a61d697f39ab0758 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Dec 2021 19:46:21 +0100 Subject: [PATCH 092/251] USB: gadget: bRequestType is a bitfield, not a enum Szymon rightly pointed out that the previous check for the endpoint direction in bRequestType was not looking at only the bit involved, but rather the whole value. Normally this is ok, but for some request types, bits other than bit 8 could be set and the check for the endpoint length could not stall correctly. Fix that up by only checking the single bit. Fixes: 153a2d7e3350 ("USB: gadget: detect too-big endpoint 0 requests") Cc: Felipe Balbi Reported-by: Szymon Heidrich Link: https://lore.kernel.org/r/20211214184621.385828-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 6 +++--- drivers/usb/gadget/legacy/dbgp.c | 6 +++--- drivers/usb/gadget/legacy/inode.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 284eea9f6e4d..3789c329183c 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1680,14 +1680,14 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) u8 endp; if (w_length > USB_COMP_EP0_BUFSIZ) { - if (ctrl->bRequestType == USB_DIR_OUT) { - goto done; - } else { + if (ctrl->bRequestType & USB_DIR_IN) { /* Cast away the const, we are going to overwrite on purpose. */ __le16 *temp = (__le16 *)&ctrl->wLength; *temp = cpu_to_le16(USB_COMP_EP0_BUFSIZ); w_length = USB_COMP_EP0_BUFSIZ; + } else { + goto done; } } diff --git a/drivers/usb/gadget/legacy/dbgp.c b/drivers/usb/gadget/legacy/dbgp.c index 355bc7dab9d5..6bcbad382580 100644 --- a/drivers/usb/gadget/legacy/dbgp.c +++ b/drivers/usb/gadget/legacy/dbgp.c @@ -346,14 +346,14 @@ static int dbgp_setup(struct usb_gadget *gadget, u16 len = 0; if (length > DBGP_REQ_LEN) { - if (ctrl->bRequestType == USB_DIR_OUT) { - return err; - } else { + if (ctrl->bRequestType & USB_DIR_IN) { /* Cast away the const, we are going to overwrite on purpose. */ __le16 *temp = (__le16 *)&ctrl->wLength; *temp = cpu_to_le16(DBGP_REQ_LEN); length = DBGP_REQ_LEN; + } else { + return err; } } diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 63150e3889ef..3b58f4fc0a80 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1334,14 +1334,14 @@ gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) u16 w_length = le16_to_cpu(ctrl->wLength); if (w_length > RBUF_SIZE) { - if (ctrl->bRequestType == USB_DIR_OUT) { - return value; - } else { + if (ctrl->bRequestType & USB_DIR_IN) { /* Cast away the const, we are going to overwrite on purpose. */ __le16 *temp = (__le16 *)&ctrl->wLength; *temp = cpu_to_le16(RBUF_SIZE); w_length = RBUF_SIZE; + } else { + return value; } } From fac6bf87c55f7f0733efb0375565fb6a50cf2caf Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Tue, 7 Dec 2021 13:45:10 +0100 Subject: [PATCH 093/251] usb: dwc2: fix STM ID/VBUS detection startup delay in dwc2_driver_probe When activate_stm_id_vb_detection is enabled, ID and Vbus detection relies on sensing comparators. This detection needs time to stabilize. A delay was already applied in dwc2_resume() when reactivating the detection, but it wasn't done in dwc2_probe(). This patch adds delay after enabling STM ID/VBUS detection. Then, ID state is good when initializing gadget and host, and avoid to get a wrong Connector ID Status Change interrupt. Fixes: a415083a11cc ("usb: dwc2: add support for STM32MP15 SoCs USB OTG HS and FS") Cc: stable Acked-by: Minas Harutyunyan Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20211207124510.268841-1-amelie.delaunay@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/platform.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index c8f18f3ba9e3..c331a5128c2c 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -575,6 +575,9 @@ static int dwc2_driver_probe(struct platform_device *dev) ggpio |= GGPIO_STM32_OTG_GCCFG_IDEN; ggpio |= GGPIO_STM32_OTG_GCCFG_VBDEN; dwc2_writel(hsotg, ggpio, GGPIO); + + /* ID/VBUS detection startup time */ + usleep_range(5000, 7000); } retval = dwc2_drd_init(hsotg); From 1ee33b1ca2b8dabfcc17198ffd049a6b55674a86 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 15 Dec 2021 20:52:40 +0900 Subject: [PATCH 094/251] tty: n_hdlc: make n_hdlc_tty_wakeup() asynchronous syzbot is reporting that an unprivileged user who logged in from tty console can crash the system using a reproducer shown below [1], for n_hdlc_tty_wakeup() is synchronously calling n_hdlc_send_frames(). ---------- #include #include int main(int argc, char *argv[]) { const int disc = 0xd; ioctl(1, TIOCSETD, &disc); while (1) { ioctl(1, TCXONC, 0); write(1, "", 1); ioctl(1, TCXONC, 1); /* Kernel panic - not syncing: scheduling while atomic */ } } ---------- Linus suspected that "struct tty_ldisc"->ops->write_wakeup() must not sleep, and Jiri confirmed it from include/linux/tty_ldisc.h. Thus, defer n_hdlc_send_frames() from n_hdlc_tty_wakeup() to a WQ context like net/nfc/nci/uart.c does. Link: https://syzkaller.appspot.com/bug?extid=5f47a8cea6a12b77a876 [1] Reported-by: syzbot Cc: stable Analyzed-by: Fabio M. De Francesco Suggested-by: Linus Torvalds Confirmed-by: Jiri Slaby Reviewed-by: Fabio M. De Francesco Signed-off-by: Tetsuo Handa Link: https://lore.kernel.org/r/40de8b7e-a3be-4486-4e33-1b1d1da452f8@i-love.sakura.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_hdlc.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 7e0884ecc74f..23ba1fc99df8 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -140,6 +140,8 @@ struct n_hdlc { struct n_hdlc_buf_list rx_buf_list; struct n_hdlc_buf_list tx_free_buf_list; struct n_hdlc_buf_list rx_free_buf_list; + struct work_struct write_work; + struct tty_struct *tty_for_write_work; }; /* @@ -154,6 +156,7 @@ static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list); /* Local functions */ static struct n_hdlc *n_hdlc_alloc(void); +static void n_hdlc_tty_write_work(struct work_struct *work); /* max frame size for memory allocations */ static int maxframe = 4096; @@ -210,6 +213,8 @@ static void n_hdlc_tty_close(struct tty_struct *tty) wake_up_interruptible(&tty->read_wait); wake_up_interruptible(&tty->write_wait); + cancel_work_sync(&n_hdlc->write_work); + n_hdlc_free_buf_list(&n_hdlc->rx_free_buf_list); n_hdlc_free_buf_list(&n_hdlc->tx_free_buf_list); n_hdlc_free_buf_list(&n_hdlc->rx_buf_list); @@ -241,6 +246,8 @@ static int n_hdlc_tty_open(struct tty_struct *tty) return -ENFILE; } + INIT_WORK(&n_hdlc->write_work, n_hdlc_tty_write_work); + n_hdlc->tty_for_write_work = tty; tty->disc_data = n_hdlc; tty->receive_room = 65536; @@ -334,6 +341,20 @@ check_again: goto check_again; } /* end of n_hdlc_send_frames() */ +/** + * n_hdlc_tty_write_work - Asynchronous callback for transmit wakeup + * @work: pointer to work_struct + * + * Called when low level device driver can accept more send data. + */ +static void n_hdlc_tty_write_work(struct work_struct *work) +{ + struct n_hdlc *n_hdlc = container_of(work, struct n_hdlc, write_work); + struct tty_struct *tty = n_hdlc->tty_for_write_work; + + n_hdlc_send_frames(n_hdlc, tty); +} /* end of n_hdlc_tty_write_work() */ + /** * n_hdlc_tty_wakeup - Callback for transmit wakeup * @tty: pointer to associated tty instance data @@ -344,7 +365,7 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty) { struct n_hdlc *n_hdlc = tty->disc_data; - n_hdlc_send_frames(n_hdlc, tty); + schedule_work(&n_hdlc->write_work); } /* end of n_hdlc_tty_wakeup() */ /** From 6c33ff728812aa18792afffaf2c9873b898e7512 Mon Sep 17 00:00:00 2001 From: "Ji-Ze Hong (Peter Hong)" Date: Wed, 15 Dec 2021 15:58:35 +0800 Subject: [PATCH 095/251] serial: 8250_fintek: Fix garbled text for console Commit fab8a02b73eb ("serial: 8250_fintek: Enable high speed mode on Fintek F81866") introduced support to use high baudrate with Fintek SuperIO UARTs. It'll change clocksources when the UART probed. But when user add kernel parameter "console=ttyS0,115200 console=tty0" to make the UART as console output, the console will output garbled text after the following kernel message. [ 3.681188] Serial: 8250/16550 driver, 32 ports, IRQ sharing enabled The issue is occurs in following step: probe_setup_port() -> fintek_8250_goto_highspeed() It change clocksource from 115200 to 921600 with wrong time, it should change clocksource in set_termios() not in probed. The following 3 patches are implemented change clocksource in fintek_8250_set_termios(). Commit 58178914ae5b ("serial: 8250_fintek: UART dynamic clocksource on Fintek F81216H") Commit 195638b6d44f ("serial: 8250_fintek: UART dynamic clocksource on Fintek F81866") Commit 423d9118c624 ("serial: 8250_fintek: Add F81966 Support") Due to the high baud rate had implemented above 3 patches and the patch Commit fab8a02b73eb ("serial: 8250_fintek: Enable high speed mode on Fintek F81866") is bugged, So this patch will remove it. Fixes: fab8a02b73eb ("serial: 8250_fintek: Enable high speed mode on Fintek F81866") Signed-off-by: Ji-Ze Hong (Peter Hong) Link: https://lore.kernel.org/r/20211215075835.2072-1-hpeter+linux_kernel@gmail.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_fintek.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c index 31c9e83ea3cb..251f0018ae8c 100644 --- a/drivers/tty/serial/8250/8250_fintek.c +++ b/drivers/tty/serial/8250/8250_fintek.c @@ -290,25 +290,6 @@ static void fintek_8250_set_max_fifo(struct fintek_8250 *pdata) } } -static void fintek_8250_goto_highspeed(struct uart_8250_port *uart, - struct fintek_8250 *pdata) -{ - sio_write_reg(pdata, LDN, pdata->index); - - switch (pdata->pid) { - case CHIP_ID_F81966: - case CHIP_ID_F81866: /* set uart clock for high speed serial mode */ - sio_write_mask_reg(pdata, F81866_UART_CLK, - F81866_UART_CLK_MASK, - F81866_UART_CLK_14_769MHZ); - - uart->port.uartclk = 921600 * 16; - break; - default: /* leave clock speed untouched */ - break; - } -} - static void fintek_8250_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old) @@ -430,7 +411,6 @@ static int probe_setup_port(struct fintek_8250 *pdata, fintek_8250_set_irq_mode(pdata, level_mode); fintek_8250_set_max_fifo(pdata); - fintek_8250_goto_highspeed(uart, pdata); fintek_8250_exit_key(addr[i]); From f886d4fbb7c97b8f5f447c92d2dab99c841803c0 Mon Sep 17 00:00:00 2001 From: Nehal Bakulchandra Shah Date: Wed, 15 Dec 2021 15:02:16 +0530 Subject: [PATCH 096/251] usb: xhci: Extend support for runtime power management for AMD's Yellow carp. AMD's Yellow Carp platform has few more XHCI controllers, enable the runtime power management support for the same. Signed-off-by: Nehal Bakulchandra Shah Cc: stable Link: https://lore.kernel.org/r/20211215093216.1839065-1-Nehal-Bakulchandra.shah@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 92adf6107864..3af017883231 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -71,6 +71,8 @@ #define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 0x161e #define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 0x15d6 #define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 0x15d7 +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 0x161c +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8 0x161f #define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 @@ -330,7 +332,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_3 || pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 || pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 || - pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6)) + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (xhci->quirks & XHCI_RESET_ON_RESUME) From 0ad3bd562bb91853b9f42bda145b5db6255aee90 Mon Sep 17 00:00:00 2001 From: Jimmy Wang Date: Tue, 14 Dec 2021 09:26:50 +0800 Subject: [PATCH 097/251] USB: NO_LPM quirk Lenovo USB-C to Ethernet Adapher(RTL8153-04) This device doesn't work well with LPM, losing connectivity intermittently. Disable LPM to resolve the issue. Reviewed-by: Signed-off-by: Jimmy Wang Cc: stable Link: https://lore.kernel.org/r/20211214012652.4898-1-wangjm221@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 019351c0b52c..d3c14b5ed4a1 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -434,6 +434,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Lenovo USB-C to Ethernet Adapter RTL8153-04 */ + { USB_DEVICE(0x17ef, 0x720c), .driver_info = USB_QUIRK_NO_LPM }, + /* Lenovo Powered USB-C Travel Hub (4X90S92381, RTL8153 GigE) */ { USB_DEVICE(0x17ef, 0x721e), .driver_info = USB_QUIRK_NO_LPM }, From 4c4e162d9cf38528c4f13df09d5755cbc06f6c77 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 14 Dec 2021 05:55:27 +0100 Subject: [PATCH 098/251] usb: cdnsp: Fix lack of spin_lock_irqsave/spin_lock_restore Patch puts content of cdnsp_gadget_pullup function inside spin_lock_irqsave and spin_lock_restore section. This construction is required here to keep the data consistency, otherwise some data can be changed e.g. from interrupt context. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Reported-by: Ken (Jian) He cc: Signed-off-by: Pawel Laszczak -- Changelog: v2: - added disable_irq/enable_irq as sugester by Peter Chen drivers/usb/cdns3/cdnsp-gadget.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/20211214045527.26823-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 27df0c697897..e85bf768c66d 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1541,15 +1541,27 @@ static int cdnsp_gadget_pullup(struct usb_gadget *gadget, int is_on) { struct cdnsp_device *pdev = gadget_to_cdnsp(gadget); struct cdns *cdns = dev_get_drvdata(pdev->dev); + unsigned long flags; trace_cdnsp_pullup(is_on); + /* + * Disable events handling while controller is being + * enabled/disabled. + */ + disable_irq(cdns->dev_irq); + spin_lock_irqsave(&pdev->lock, flags); + if (!is_on) { cdnsp_reset_device(pdev); cdns_clear_vbus(cdns); } else { cdns_set_vbus(cdns); } + + spin_unlock_irqrestore(&pdev->lock, flags); + enable_irq(cdns->dev_irq); + return 0; } From ca4d8344a72b91fb9d4c8bfbc22204b4c09c5d8f Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 9 Dec 2021 18:15:07 +0800 Subject: [PATCH 099/251] usb: typec: tcpm: fix tcpm unregister port but leave a pending timer In current design, when the tcpm port is unregisterd, the kthread_worker will be destroyed in the last step. Inside the kthread_destroy_worker(), the worker will flush all the works and wait for them to end. However, if one of the works calls hrtimer_start(), this hrtimer will be pending until timeout even though tcpm port is removed. Once the hrtimer timeout, many strange kernel dumps appear. Thus, we can first complete kthread_destroy_worker(), then cancel all the hrtimers. This will guarantee that no hrtimer is pending at the end. Fixes: 3ed8e1c2ac99 ("usb: typec: tcpm: Migrate workqueue to RT priority for processing events") cc: Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20211209101507.499096-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 6010b9901126..59d4fa2443f2 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -324,6 +324,7 @@ struct tcpm_port { bool attached; bool connected; + bool registered; bool pd_supported; enum typec_port_type port_type; @@ -6291,7 +6292,8 @@ static enum hrtimer_restart state_machine_timer_handler(struct hrtimer *timer) { struct tcpm_port *port = container_of(timer, struct tcpm_port, state_machine_timer); - kthread_queue_work(port->wq, &port->state_machine); + if (port->registered) + kthread_queue_work(port->wq, &port->state_machine); return HRTIMER_NORESTART; } @@ -6299,7 +6301,8 @@ static enum hrtimer_restart vdm_state_machine_timer_handler(struct hrtimer *time { struct tcpm_port *port = container_of(timer, struct tcpm_port, vdm_state_machine_timer); - kthread_queue_work(port->wq, &port->vdm_state_machine); + if (port->registered) + kthread_queue_work(port->wq, &port->vdm_state_machine); return HRTIMER_NORESTART; } @@ -6307,7 +6310,8 @@ static enum hrtimer_restart enable_frs_timer_handler(struct hrtimer *timer) { struct tcpm_port *port = container_of(timer, struct tcpm_port, enable_frs_timer); - kthread_queue_work(port->wq, &port->enable_frs); + if (port->registered) + kthread_queue_work(port->wq, &port->enable_frs); return HRTIMER_NORESTART; } @@ -6315,7 +6319,8 @@ static enum hrtimer_restart send_discover_timer_handler(struct hrtimer *timer) { struct tcpm_port *port = container_of(timer, struct tcpm_port, send_discover_timer); - kthread_queue_work(port->wq, &port->send_discover_work); + if (port->registered) + kthread_queue_work(port->wq, &port->send_discover_work); return HRTIMER_NORESTART; } @@ -6403,6 +6408,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) typec_port_register_altmodes(port->typec_port, &tcpm_altmode_ops, port, port->port_altmode, ALTMODE_DISCOVERY_MAX); + port->registered = true; mutex_lock(&port->lock); tcpm_init(port); @@ -6424,6 +6430,9 @@ void tcpm_unregister_port(struct tcpm_port *port) { int i; + port->registered = false; + kthread_destroy_worker(port->wq); + hrtimer_cancel(&port->send_discover_timer); hrtimer_cancel(&port->enable_frs_timer); hrtimer_cancel(&port->vdm_state_machine_timer); @@ -6435,7 +6444,6 @@ void tcpm_unregister_port(struct tcpm_port *port) typec_unregister_port(port->typec_port); usb_role_switch_put(port->role_sw); tcpm_debugfs_exit(port); - kthread_destroy_worker(port->wq); } EXPORT_SYMBOL_GPL(tcpm_unregister_port); From b67210cc217f9ca1c576909454d846970c13dfd4 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Wed, 15 Dec 2021 10:58:08 +0100 Subject: [PATCH 100/251] pinctrl: stm32: consider the GPIO offset to expose all the GPIO lines Consider the GPIO controller offset (from "gpio-ranges") to compute the maximum GPIO line number. This fixes an issue where gpio-ranges uses a non-null offset. e.g.: gpio-ranges = <&pinctrl 6 86 10> In that case the last valid GPIO line is not 9 but 15 (6 + 10 - 1) Cc: stable@vger.kernel.org Fixes: 67e2996f72c7 ("pinctrl: stm32: fix the reported number of GPIO lines per bank") Reported-by: Christoph Fritz Signed-off-by: Fabien Dessenne Link: https://lore.kernel.org/r/20211215095808.621716-1-fabien.dessenne@foss.st.com Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 24764ebcc936..9ed764731570 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1251,10 +1251,10 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, bank_nr = args.args[1] / STM32_GPIO_PINS_PER_BANK; bank->gpio_chip.base = args.args[1]; - npins = args.args[2]; - while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, - ++i, &args)) - npins += args.args[2]; + /* get the last defined gpio line (offset + nb of pins) */ + npins = args.args[0] + args.args[2]; + while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, ++i, &args)) + npins = max(npins, (int)(args.args[0] + args.args[2])); } else { bank_nr = pctl->nbanks; bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK; From 39e660687ac0c57499134765abbecf71cfd11eae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Haa=C3=9F?= Date: Sun, 12 Dec 2021 09:30:30 -0300 Subject: [PATCH 101/251] ARM: dts: imx6qdl-wandboard: Fix Ethernet support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the imx6q-wandboard Ethernet does not transmit any data. This issue has been exposed by commit f5d9aa79dfdf ("ARM: imx6q: remove clk-out fixup for the Atheros AR8031 and AR8035 PHYs"). Fix it by describing the qca,clk-out-frequency property as suggested by the commit above. Fixes: 77591e42458d ("ARM: dts: imx6qdl-wandboard: add ethernet PHY description") Signed-off-by: Martin Haaß Tested-by: Fabio Estevam Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-wandboard.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi index b62a0dbb033f..ec6fba5ee8fd 100644 --- a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi +++ b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi @@ -309,6 +309,7 @@ ethphy: ethernet-phy@1 { reg = <1>; + qca,clk-out-frequency = <125000000>; }; }; }; From 0fd08a34e8e3b67ec9bd8287ac0facf8374b844a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:24:08 +0100 Subject: [PATCH 102/251] xen/blkfront: harden blkfront against event channel storms The Xen blkfront driver is still vulnerable for an attack via excessive number of events sent by the backend. Fix that by using lateeoi event channels. This is part of XSA-391 Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- drivers/block/xen-blkfront.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8e3983e456f3..286cf1afad78 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1512,9 +1512,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) unsigned long flags; struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; struct blkfront_info *info = rinfo->dev_info; + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); return IRQ_HANDLED; + } spin_lock_irqsave(&rinfo->ring_lock, flags); again: @@ -1530,6 +1533,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) unsigned long id; unsigned int op; + eoiflag = 0; + RING_COPY_RESPONSE(&rinfo->ring, i, &bret); id = bret.id; @@ -1646,6 +1651,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&rinfo->ring_lock, flags); + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; err: @@ -1653,6 +1660,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&rinfo->ring_lock, flags); + /* No EOI in order to avoid further interrupts. */ + pr_alert("%s disabled for further use\n", info->gd->disk_name); return IRQ_HANDLED; } @@ -1692,8 +1701,8 @@ static int setup_blkring(struct xenbus_device *dev, if (err) goto fail; - err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0, - "blkif", rinfo); + err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt, + 0, "blkif", rinfo); if (err <= 0) { xenbus_dev_fatal(dev, err, "bind_evtchn_to_irqhandler failed"); From b27d47950e481f292c0a5ad57357edb9d95d03ba Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:24:08 +0100 Subject: [PATCH 103/251] xen/netfront: harden netfront against event channel storms The Xen netfront driver is still vulnerable for an attack via excessive number of events sent by the backend. Fix that by using lateeoi event channels. For being able to detect the case of no rx responses being added while the carrier is down a new lock is needed in order to update and test rsp_cons and the number of seen unconsumed responses atomically. This is part of XSA-391 Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V2: - don't eoi irq in case of interface set broken (Jan Beulich) - handle carrier off + no new responses added (Jan Beulich) V3: - add rx_ prefix to rsp_unconsumed (Jan Beulich) - correct xennet_set_rx_rsp_cons() spelling (Jan Beulich) --- drivers/net/xen-netfront.c | 127 +++++++++++++++++++++++++++---------- 1 file changed, 95 insertions(+), 32 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 911f43986a8c..d514d96027a6 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -148,6 +148,9 @@ struct netfront_queue { grant_ref_t gref_rx_head; grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; + unsigned int rx_rsp_unconsumed; + spinlock_t rx_cons_lock; + struct page_pool *page_pool; struct xdp_rxq_info xdp_rxq; }; @@ -376,12 +379,13 @@ static int xennet_open(struct net_device *dev) return 0; } -static void xennet_tx_buf_gc(struct netfront_queue *queue) +static bool xennet_tx_buf_gc(struct netfront_queue *queue) { RING_IDX cons, prod; unsigned short id; struct sk_buff *skb; bool more_to_do; + bool work_done = false; const struct device *dev = &queue->info->netdev->dev; BUG_ON(!netif_carrier_ok(queue->info->netdev)); @@ -398,6 +402,8 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) for (cons = queue->tx.rsp_cons; cons != prod; cons++) { struct xen_netif_tx_response txrsp; + work_done = true; + RING_COPY_RESPONSE(&queue->tx, cons, &txrsp); if (txrsp.status == XEN_NETIF_RSP_NULL) continue; @@ -441,11 +447,13 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) xennet_maybe_wake_tx(queue); - return; + return work_done; err: queue->info->broken = true; dev_alert(dev, "Disabled for further use\n"); + + return work_done; } struct xennet_gnttab_make_txreq { @@ -834,6 +842,16 @@ static int xennet_close(struct net_device *dev) return 0; } +static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->rx_cons_lock, flags); + queue->rx.rsp_cons = val; + queue->rx_rsp_unconsumed = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); +} + static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb, grant_ref_t ref) { @@ -885,7 +903,7 @@ static int xennet_get_extras(struct netfront_queue *queue, xennet_move_rx_slot(queue, skb, ref); } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return err; } @@ -1039,7 +1057,7 @@ next: } if (unlikely(err)) - queue->rx.rsp_cons = cons + slots; + xennet_set_rx_rsp_cons(queue, cons + slots); return err; } @@ -1093,7 +1111,8 @@ static int xennet_fill_frags(struct netfront_queue *queue, __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { - queue->rx.rsp_cons = ++cons + skb_queue_len(list); + xennet_set_rx_rsp_cons(queue, + ++cons + skb_queue_len(list)); kfree_skb(nskb); return -ENOENT; } @@ -1106,7 +1125,7 @@ static int xennet_fill_frags(struct netfront_queue *queue, kfree_skb(nskb); } - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return 0; } @@ -1229,7 +1248,9 @@ err: if (unlikely(xennet_set_skb_gso(skb, gso))) { __skb_queue_head(&tmpq, skb); - queue->rx.rsp_cons += skb_queue_len(&tmpq); + xennet_set_rx_rsp_cons(queue, + queue->rx.rsp_cons + + skb_queue_len(&tmpq)); goto err; } } @@ -1253,7 +1274,8 @@ err: __skb_queue_tail(&rxq, skb); - i = ++queue->rx.rsp_cons; + i = queue->rx.rsp_cons + 1; + xennet_set_rx_rsp_cons(queue, i); work_done++; } if (need_xdp_flush) @@ -1417,40 +1439,79 @@ static int xennet_set_features(struct net_device *dev, return 0; } -static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi) { - struct netfront_queue *queue = dev_id; unsigned long flags; - if (queue->info->broken) - return IRQ_HANDLED; + if (unlikely(queue->info->broken)) + return false; spin_lock_irqsave(&queue->tx_lock, flags); - xennet_tx_buf_gc(queue); + if (xennet_tx_buf_gc(queue)) + *eoi = 0; spin_unlock_irqrestore(&queue->tx_lock, flags); + return true; +} + +static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +{ + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (likely(xennet_handle_tx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } +static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi) +{ + unsigned int work_queued; + unsigned long flags; + + if (unlikely(queue->info->broken)) + return false; + + spin_lock_irqsave(&queue->rx_cons_lock, flags); + work_queued = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + if (work_queued > queue->rx_rsp_unconsumed) { + queue->rx_rsp_unconsumed = work_queued; + *eoi = 0; + } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) { + const struct device *dev = &queue->info->netdev->dev; + + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); + dev_alert(dev, "RX producer index going backwards\n"); + dev_alert(dev, "Disabled for further use\n"); + queue->info->broken = true; + return false; + } + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); + + if (likely(netif_carrier_ok(queue->info->netdev) && work_queued)) + napi_schedule(&queue->napi); + + return true; +} + static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) { - struct netfront_queue *queue = dev_id; - struct net_device *dev = queue->info->netdev; + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; - if (queue->info->broken) - return IRQ_HANDLED; - - if (likely(netif_carrier_ok(dev) && - RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))) - napi_schedule(&queue->napi); + if (likely(xennet_handle_rx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); return IRQ_HANDLED; } static irqreturn_t xennet_interrupt(int irq, void *dev_id) { - xennet_tx_interrupt(irq, dev_id); - xennet_rx_interrupt(irq, dev_id); + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (xennet_handle_tx(dev_id, &eoiflag) && + xennet_handle_rx(dev_id, &eoiflag)) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } @@ -1768,9 +1829,10 @@ static int setup_netfront_single(struct netfront_queue *queue) if (err < 0) goto fail; - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_interrupt, - 0, queue->info->netdev->name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_interrupt, 0, + queue->info->netdev->name, + queue); if (err < 0) goto bind_fail; queue->rx_evtchn = queue->tx_evtchn; @@ -1798,18 +1860,18 @@ static int setup_netfront_split(struct netfront_queue *queue) snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), "%s-tx", queue->name); - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_tx_interrupt, - 0, queue->tx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_tx_interrupt, 0, + queue->tx_irq_name, queue); if (err < 0) goto bind_tx_fail; queue->tx_irq = err; snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), "%s-rx", queue->name); - err = bind_evtchn_to_irqhandler(queue->rx_evtchn, - xennet_rx_interrupt, - 0, queue->rx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn, + xennet_rx_interrupt, 0, + queue->rx_irq_name, queue); if (err < 0) goto bind_rx_fail; queue->rx_irq = err; @@ -1911,6 +1973,7 @@ static int xennet_init_queue(struct netfront_queue *queue) spin_lock_init(&queue->tx_lock); spin_lock_init(&queue->rx_lock); + spin_lock_init(&queue->rx_cons_lock); timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0); From fe415186b43df0db1f17fa3a46275fd92107fe71 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:24:08 +0100 Subject: [PATCH 104/251] xen/console: harden hvc_xen against event channel storms The Xen console driver is still vulnerable for an attack via excessive number of events sent by the backend. Fix that by using a lateeoi event channel. For the normal domU initial console this requires the introduction of bind_evtchn_to_irq_lateeoi() as there is no xenbus device available at the time the event channel is bound to the irq. As the decision whether an interrupt was spurious or not requires to test for bytes having been read from the backend, move sending the event into the if statement, as sending an event without having found any bytes to be read is making no sense at all. This is part of XSA-391 Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V2: - slightly adapt spurious irq detection (Jan Beulich) V3: - fix spurious irq detection (Jan Beulich) --- drivers/tty/hvc/hvc_xen.c | 30 +++++++++++++++++++++++++++--- drivers/xen/events/events_base.c | 6 ++++++ include/xen/events.h | 1 + 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 71e0dd2c0ce5..ebaf7500f48f 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -37,6 +37,8 @@ struct xencons_info { struct xenbus_device *xbdev; struct xencons_interface *intf; unsigned int evtchn; + XENCONS_RING_IDX out_cons; + unsigned int out_cons_same; struct hvc_struct *hvc; int irq; int vtermno; @@ -138,6 +140,8 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) XENCONS_RING_IDX cons, prod; int recv = 0; struct xencons_info *xencons = vtermno_to_xencons(vtermno); + unsigned int eoiflag = 0; + if (xencons == NULL) return -EINVAL; intf = xencons->intf; @@ -157,7 +161,27 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) mb(); /* read ring before consuming */ intf->in_cons = cons; - notify_daemon(xencons); + /* + * When to mark interrupt having been spurious: + * - there was no new data to be read, and + * - the backend did not consume some output bytes, and + * - the previous round with no read data didn't see consumed bytes + * (we might have a race with an interrupt being in flight while + * updating xencons->out_cons, so account for that by allowing one + * round without any visible reason) + */ + if (intf->out_cons != xencons->out_cons) { + xencons->out_cons = intf->out_cons; + xencons->out_cons_same = 0; + } + if (recv) { + notify_daemon(xencons); + } else if (xencons->out_cons_same++ > 1) { + eoiflag = XEN_EOI_FLAG_SPURIOUS; + } + + xen_irq_lateeoi(xencons->irq, eoiflag); + return recv; } @@ -386,7 +410,7 @@ static int xencons_connect_backend(struct xenbus_device *dev, if (ret) return ret; info->evtchn = evtchn; - irq = bind_evtchn_to_irq(evtchn); + irq = bind_interdomain_evtchn_to_irq_lateeoi(dev, evtchn); if (irq < 0) return irq; info->irq = irq; @@ -551,7 +575,7 @@ static int __init xen_hvc_init(void) return r; info = vtermno_to_xencons(HVC_COOKIE); - info->irq = bind_evtchn_to_irq(info->evtchn); + info->irq = bind_evtchn_to_irq_lateeoi(info->evtchn); } if (info->irq < 0) info->irq = 0; /* NO_IRQ */ diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index a78704ae3618..46d9295d9a6e 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1251,6 +1251,12 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn) } EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) +{ + return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL); +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); + static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; diff --git a/include/xen/events.h b/include/xen/events.h index c204262d9fc2..344081e71584 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -17,6 +17,7 @@ struct xenbus_device; unsigned xen_evtchn_nr_channels(void); int bind_evtchn_to_irq(evtchn_port_t evtchn); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn); int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, From 6032046ec4b70176d247a71836186d47b25d1684 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:25:12 +0100 Subject: [PATCH 105/251] xen/netback: fix rx queue stall detection Commit 1d5d48523900a4b ("xen-netback: require fewer guest Rx slots when not using GSO") introduced a security problem in netback, as an interface would only be regarded to be stalled if no slot is available in the rx queue ring page. In case the SKB at the head of the queued requests will need more than one rx slot and only one slot is free the stall detection logic will never trigger, as the test for that is only looking for at least one slot to be free. Fix that by testing for the needed number of slots instead of only one slot being available. In order to not have to take the rx queue lock that often, store the number of needed slots in the queue data. As all SKB dequeue operations happen in the rx queue kernel thread this is safe, as long as the number of needed slots is accessed via READ/WRITE_ONCE() only and updates are always done with the rx queue lock held. Add a small helper for obtaining the number of free slots. This is part of XSA-392 Fixes: 1d5d48523900a4b ("xen-netback: require fewer guest Rx slots when not using GSO") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- drivers/net/xen-netback/common.h | 1 + drivers/net/xen-netback/rx.c | 65 ++++++++++++++++++++------------ 2 files changed, 42 insertions(+), 24 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 4a16d6e33c09..d9dea4829c86 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -203,6 +203,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ unsigned int rx_queue_max; unsigned int rx_queue_len; unsigned long last_rx_time; + unsigned int rx_slots_needed; bool stalled; struct xenvif_copy_state rx_copy; diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index accc991d153f..a8511e27d6c1 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -33,28 +33,36 @@ #include #include +/* + * Update the needed ring page slots for the first SKB queued. + * Note that any call sequence outside the RX thread calling this function + * needs to wake up the RX thread via a call of xenvif_kick_thread() + * afterwards in order to avoid a race with putting the thread to sleep. + */ +static void xenvif_update_needed_slots(struct xenvif_queue *queue, + const struct sk_buff *skb) +{ + unsigned int needed = 0; + + if (skb) { + needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); + if (skb_is_gso(skb)) + needed++; + if (skb->sw_hash) + needed++; + } + + WRITE_ONCE(queue->rx_slots_needed, needed); +} + static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) { RING_IDX prod, cons; - struct sk_buff *skb; - int needed; - unsigned long flags; + unsigned int needed; - spin_lock_irqsave(&queue->rx_queue.lock, flags); - - skb = skb_peek(&queue->rx_queue); - if (!skb) { - spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + needed = READ_ONCE(queue->rx_slots_needed); + if (!needed) return false; - } - - needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); - if (skb_is_gso(skb)) - needed++; - if (skb->sw_hash) - needed++; - - spin_unlock_irqrestore(&queue->rx_queue.lock, flags); do { prod = queue->rx.sring->req_prod; @@ -80,6 +88,9 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) spin_lock_irqsave(&queue->rx_queue.lock, flags); + if (skb_queue_empty(&queue->rx_queue)) + xenvif_update_needed_slots(queue, skb); + __skb_queue_tail(&queue->rx_queue, skb); queue->rx_queue_len += skb->len; @@ -100,6 +111,8 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) skb = __skb_dequeue(&queue->rx_queue); if (skb) { + xenvif_update_needed_slots(queue, skb_peek(&queue->rx_queue)); + queue->rx_queue_len -= skb->len; if (queue->rx_queue_len < queue->rx_queue_max) { struct netdev_queue *txq; @@ -487,27 +500,31 @@ void xenvif_rx_action(struct xenvif_queue *queue) xenvif_rx_copy_flush(queue); } -static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) +static RING_IDX xenvif_rx_queue_slots(const struct xenvif_queue *queue) { RING_IDX prod, cons; prod = queue->rx.sring->req_prod; cons = queue->rx.req_cons; + return prod - cons; +} + +static bool xenvif_rx_queue_stalled(const struct xenvif_queue *queue) +{ + unsigned int needed = READ_ONCE(queue->rx_slots_needed); + return !queue->stalled && - prod - cons < 1 && + xenvif_rx_queue_slots(queue) < needed && time_after(jiffies, queue->last_rx_time + queue->vif->stall_timeout); } static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) { - RING_IDX prod, cons; + unsigned int needed = READ_ONCE(queue->rx_slots_needed); - prod = queue->rx.sring->req_prod; - cons = queue->rx.req_cons; - - return queue->stalled && prod - cons >= 1; + return queue->stalled && xenvif_rx_queue_slots(queue) >= needed; } bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread) From be81992f9086b230623ae3ebbc85ecee4d00a3d3 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 30 Nov 2021 08:36:12 +0100 Subject: [PATCH 106/251] xen/netback: don't queue unlimited number of packages In case a guest isn't consuming incoming network traffic as fast as it is coming in, xen-netback is buffering network packages in unlimited numbers today. This can result in host OOM situations. Commit f48da8b14d04ca8 ("xen-netback: fix unlimited guest Rx internal queue and carrier flapping") meant to introduce a mechanism to limit the amount of buffered data by stopping the Tx queue when reaching the data limit, but this doesn't work for cases like UDP. When hitting the limit don't queue further SKBs, but drop them instead. In order to be able to tell Rx packages have been dropped increment the rx_dropped statistics counter in this case. It should be noted that the old solution to continue queueing SKBs had the additional problem of an overflow of the 32-bit rx_queue_len value would result in intermittent Tx queue enabling. This is part of XSA-392 Fixes: f48da8b14d04ca8 ("xen-netback: fix unlimited guest Rx internal queue and carrier flapping") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- drivers/net/xen-netback/rx.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index a8511e27d6c1..dbac4c03d21a 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -88,16 +88,19 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) spin_lock_irqsave(&queue->rx_queue.lock, flags); - if (skb_queue_empty(&queue->rx_queue)) - xenvif_update_needed_slots(queue, skb); - - __skb_queue_tail(&queue->rx_queue, skb); - - queue->rx_queue_len += skb->len; - if (queue->rx_queue_len > queue->rx_queue_max) { + if (queue->rx_queue_len >= queue->rx_queue_max) { struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); + kfree_skb(skb); + queue->vif->dev->stats.rx_dropped++; + } else { + if (skb_queue_empty(&queue->rx_queue)) + xenvif_update_needed_slots(queue, skb); + + __skb_queue_tail(&queue->rx_queue, skb); + + queue->rx_queue_len += skb->len; } spin_unlock_irqrestore(&queue->rx_queue.lock, flags); @@ -147,6 +150,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) break; xenvif_rx_dequeue(queue); kfree_skb(skb); + queue->vif->dev->stats.rx_dropped++; } } From dfd0743f1d9ea76931510ed150334d571fbab49d Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Thu, 9 Dec 2021 15:59:37 +0100 Subject: [PATCH 107/251] tee: handle lookup of shm with reference count 0 Since the tee subsystem does not keep a strong reference to its idle shared memory buffers, it races with other threads that try to destroy a shared memory through a close of its dma-buf fd or by unmapping the memory. In tee_shm_get_from_id() when a lookup in teedev->idr has been successful, it is possible that the tee_shm is in the dma-buf teardown path, but that path is blocked by the teedev mutex. Since we don't have an API to tell if the tee_shm is in the dma-buf teardown path or not we must find another way of detecting this condition. Fix this by doing the reference counting directly on the tee_shm using a new refcount_t refcount field. dma-buf is replaced by using anon_inode_getfd() instead, this separates the life-cycle of the underlying file from the tee_shm. tee_shm_put() is updated to hold the mutex when decreasing the refcount to 0 and then remove the tee_shm from teedev->idr before releasing the mutex. This means that the tee_shm can never be found unless it has a refcount larger than 0. Fixes: 967c9cca2cc5 ("tee: generic TEE subsystem") Cc: stable@vger.kernel.org Reviewed-by: Greg Kroah-Hartman Reviewed-by: Lars Persson Reviewed-by: Sumit Garg Reported-by: Patrik Lantz Signed-off-by: Jens Wiklander --- drivers/tee/tee_shm.c | 174 +++++++++++++++------------------------- include/linux/tee_drv.h | 4 +- 2 files changed, 68 insertions(+), 110 deletions(-) diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 8a8deb95e918..499fccba3d74 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -1,20 +1,17 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2015-2016, Linaro Limited + * Copyright (c) 2015-2017, 2019-2021 Linaro Limited */ +#include #include -#include -#include #include +#include #include #include #include #include -#include #include "tee_private.h" -MODULE_IMPORT_NS(DMA_BUF); - static void release_registered_pages(struct tee_shm *shm) { if (shm->pages) { @@ -31,16 +28,8 @@ static void release_registered_pages(struct tee_shm *shm) } } -static void tee_shm_release(struct tee_shm *shm) +static void tee_shm_release(struct tee_device *teedev, struct tee_shm *shm) { - struct tee_device *teedev = shm->ctx->teedev; - - if (shm->flags & TEE_SHM_DMA_BUF) { - mutex_lock(&teedev->mutex); - idr_remove(&teedev->idr, shm->id); - mutex_unlock(&teedev->mutex); - } - if (shm->flags & TEE_SHM_POOL) { struct tee_shm_pool_mgr *poolm; @@ -67,45 +56,6 @@ static void tee_shm_release(struct tee_shm *shm) tee_device_put(teedev); } -static struct sg_table *tee_shm_op_map_dma_buf(struct dma_buf_attachment - *attach, enum dma_data_direction dir) -{ - return NULL; -} - -static void tee_shm_op_unmap_dma_buf(struct dma_buf_attachment *attach, - struct sg_table *table, - enum dma_data_direction dir) -{ -} - -static void tee_shm_op_release(struct dma_buf *dmabuf) -{ - struct tee_shm *shm = dmabuf->priv; - - tee_shm_release(shm); -} - -static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) -{ - struct tee_shm *shm = dmabuf->priv; - size_t size = vma->vm_end - vma->vm_start; - - /* Refuse sharing shared memory provided by application */ - if (shm->flags & TEE_SHM_USER_MAPPED) - return -EINVAL; - - return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT, - size, vma->vm_page_prot); -} - -static const struct dma_buf_ops tee_shm_dma_buf_ops = { - .map_dma_buf = tee_shm_op_map_dma_buf, - .unmap_dma_buf = tee_shm_op_unmap_dma_buf, - .release = tee_shm_op_release, - .mmap = tee_shm_op_mmap, -}; - struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) { struct tee_device *teedev = ctx->teedev; @@ -140,6 +90,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) goto err_dev_put; } + refcount_set(&shm->refcount, 1); shm->flags = flags | TEE_SHM_POOL; shm->ctx = ctx; if (flags & TEE_SHM_DMA_BUF) @@ -153,10 +104,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) goto err_kfree; } - if (flags & TEE_SHM_DMA_BUF) { - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - mutex_lock(&teedev->mutex); shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL); mutex_unlock(&teedev->mutex); @@ -164,28 +112,11 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) ret = ERR_PTR(shm->id); goto err_pool_free; } - - exp_info.ops = &tee_shm_dma_buf_ops; - exp_info.size = shm->size; - exp_info.flags = O_RDWR; - exp_info.priv = shm; - - shm->dmabuf = dma_buf_export(&exp_info); - if (IS_ERR(shm->dmabuf)) { - ret = ERR_CAST(shm->dmabuf); - goto err_rem; - } } teedev_ctx_get(ctx); return shm; -err_rem: - if (flags & TEE_SHM_DMA_BUF) { - mutex_lock(&teedev->mutex); - idr_remove(&teedev->idr, shm->id); - mutex_unlock(&teedev->mutex); - } err_pool_free: poolm->ops->free(poolm, shm); err_kfree: @@ -246,6 +177,7 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, goto err; } + refcount_set(&shm->refcount, 1); shm->flags = flags | TEE_SHM_REGISTER; shm->ctx = ctx; shm->id = -1; @@ -306,22 +238,6 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, goto err; } - if (flags & TEE_SHM_DMA_BUF) { - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - - exp_info.ops = &tee_shm_dma_buf_ops; - exp_info.size = shm->size; - exp_info.flags = O_RDWR; - exp_info.priv = shm; - - shm->dmabuf = dma_buf_export(&exp_info); - if (IS_ERR(shm->dmabuf)) { - ret = ERR_CAST(shm->dmabuf); - teedev->desc->ops->shm_unregister(ctx, shm); - goto err; - } - } - return shm; err: if (shm) { @@ -339,6 +255,35 @@ err: } EXPORT_SYMBOL_GPL(tee_shm_register); +static int tee_shm_fop_release(struct inode *inode, struct file *filp) +{ + tee_shm_put(filp->private_data); + return 0; +} + +static int tee_shm_fop_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct tee_shm *shm = filp->private_data; + size_t size = vma->vm_end - vma->vm_start; + + /* Refuse sharing shared memory provided by application */ + if (shm->flags & TEE_SHM_USER_MAPPED) + return -EINVAL; + + /* check for overflowing the buffer's size */ + if (vma->vm_pgoff + vma_pages(vma) > shm->size >> PAGE_SHIFT) + return -EINVAL; + + return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT, + size, vma->vm_page_prot); +} + +static const struct file_operations tee_shm_fops = { + .owner = THIS_MODULE, + .release = tee_shm_fop_release, + .mmap = tee_shm_fop_mmap, +}; + /** * tee_shm_get_fd() - Increase reference count and return file descriptor * @shm: Shared memory handle @@ -351,10 +296,11 @@ int tee_shm_get_fd(struct tee_shm *shm) if (!(shm->flags & TEE_SHM_DMA_BUF)) return -EINVAL; - get_dma_buf(shm->dmabuf); - fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC); + /* matched by tee_shm_put() in tee_shm_op_release() */ + refcount_inc(&shm->refcount); + fd = anon_inode_getfd("tee_shm", &tee_shm_fops, shm, O_RDWR); if (fd < 0) - dma_buf_put(shm->dmabuf); + tee_shm_put(shm); return fd; } @@ -364,17 +310,7 @@ int tee_shm_get_fd(struct tee_shm *shm) */ void tee_shm_free(struct tee_shm *shm) { - /* - * dma_buf_put() decreases the dmabuf reference counter and will - * call tee_shm_release() when the last reference is gone. - * - * In the case of driver private memory we call tee_shm_release - * directly instead as it doesn't have a reference counter. - */ - if (shm->flags & TEE_SHM_DMA_BUF) - dma_buf_put(shm->dmabuf); - else - tee_shm_release(shm); + tee_shm_put(shm); } EXPORT_SYMBOL_GPL(tee_shm_free); @@ -481,10 +417,15 @@ struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id) teedev = ctx->teedev; mutex_lock(&teedev->mutex); shm = idr_find(&teedev->idr, id); + /* + * If the tee_shm was found in the IDR it must have a refcount + * larger than 0 due to the guarantee in tee_shm_put() below. So + * it's safe to use refcount_inc(). + */ if (!shm || shm->ctx != ctx) shm = ERR_PTR(-EINVAL); - else if (shm->flags & TEE_SHM_DMA_BUF) - get_dma_buf(shm->dmabuf); + else + refcount_inc(&shm->refcount); mutex_unlock(&teedev->mutex); return shm; } @@ -496,7 +437,24 @@ EXPORT_SYMBOL_GPL(tee_shm_get_from_id); */ void tee_shm_put(struct tee_shm *shm) { - if (shm->flags & TEE_SHM_DMA_BUF) - dma_buf_put(shm->dmabuf); + struct tee_device *teedev = shm->ctx->teedev; + bool do_release = false; + + mutex_lock(&teedev->mutex); + if (refcount_dec_and_test(&shm->refcount)) { + /* + * refcount has reached 0, we must now remove it from the + * IDR before releasing the mutex. This will guarantee that + * the refcount_inc() in tee_shm_get_from_id() never starts + * from 0. + */ + if (shm->flags & TEE_SHM_DMA_BUF) + idr_remove(&teedev->idr, shm->id); + do_release = true; + } + mutex_unlock(&teedev->mutex); + + if (do_release) + tee_shm_release(teedev, shm); } EXPORT_SYMBOL_GPL(tee_shm_put); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index a1f03461369b..cf5999626e28 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -195,7 +195,7 @@ int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method, * @offset: offset of buffer in user space * @pages: locked pages from userspace * @num_pages: number of locked pages - * @dmabuf: dmabuf used to for exporting to user space + * @refcount: reference counter * @flags: defined by TEE_SHM_* in tee_drv.h * @id: unique id of a shared memory object on this device, shared * with user space @@ -214,7 +214,7 @@ struct tee_shm { unsigned int offset; struct page **pages; size_t num_pages; - struct dma_buf *dmabuf; + refcount_t refcount; u32 flags; int id; u64 sec_world_id; From 849e087ba68ac6956c11016ce34f9f10a09a4186 Mon Sep 17 00:00:00 2001 From: Zhang Ying-22455 Date: Tue, 14 Dec 2021 01:23:33 -0600 Subject: [PATCH 108/251] arm64: dts: lx2160a: fix scl-gpios property name Fix the typo in the property name. Fixes: d548c217c6a3c ("arm64: dts: add QorIQ LX2160A SoC support") Signed-off-by: Zhang Ying Signed-off-by: Li Yang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi index dc8661ebd1f6..2433e6f2eda8 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -719,7 +719,7 @@ clock-names = "i2c"; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(16)>; - scl-gpio = <&gpio2 15 GPIO_ACTIVE_HIGH>; + scl-gpios = <&gpio2 15 GPIO_ACTIVE_HIGH>; status = "disabled"; }; @@ -768,7 +768,7 @@ clock-names = "i2c"; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(16)>; - scl-gpio = <&gpio2 16 GPIO_ACTIVE_HIGH>; + scl-gpios = <&gpio2 16 GPIO_ACTIVE_HIGH>; status = "disabled"; }; From 18549bf4b21c739a9def39f27dcac53e27286ab5 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Thu, 16 Dec 2021 11:17:25 +0530 Subject: [PATCH 109/251] tee: optee: Fix incorrect page free bug Pointer to the allocated pages (struct page *page) has already progressed towards the end of allocation. It is incorrect to perform __free_pages(page, order) using this pointer as we would free any arbitrary pages. Fix this by stop modifying the page pointer. Fixes: ec185dd3ab25 ("optee: Fix memory leak when failing to register shm pages") Cc: stable@vger.kernel.org Reported-by: Patrik Lantz Signed-off-by: Sumit Garg Reviewed-by: Tyler Hicks Signed-off-by: Jens Wiklander --- drivers/tee/optee/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index ab2edfcc6c70..2a66a5203d2f 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -48,10 +48,8 @@ int optee_pool_op_alloc_helper(struct tee_shm_pool_mgr *poolm, goto err; } - for (i = 0; i < nr_pages; i++) { - pages[i] = page; - page++; - } + for (i = 0; i < nr_pages; i++) + pages[i] = page + i; shm->flags |= TEE_SHM_REGISTER; rc = shm_register(shm->ctx, shm, pages, nr_pages, From 6add87fdae9bcb1d20b4503df5bd02ce5246cc8b Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Mon, 6 Dec 2021 20:05:33 +0800 Subject: [PATCH 110/251] optee: Suppress false positive kmemleak report in optee_handle_rpc() We observed the following kmemleak report: unreferenced object 0xffff000007904500 (size 128): comm "swapper/0", pid 1, jiffies 4294892671 (age 44.036s) hex dump (first 32 bytes): 00 47 90 07 00 00 ff ff 60 00 c0 ff 00 00 00 00 .G......`....... 60 00 80 13 00 80 ff ff a0 00 00 00 00 00 00 00 `............... backtrace: [<000000004c12b1c7>] kmem_cache_alloc+0x1ac/0x2f4 [<000000005d23eb4f>] tee_shm_alloc+0x78/0x230 [<00000000794dd22c>] optee_handle_rpc+0x60/0x6f0 [<00000000d9f7c52d>] optee_do_call_with_arg+0x17c/0x1dc [<00000000c35884da>] optee_open_session+0x128/0x1ec [<000000001748f2ff>] tee_client_open_session+0x28/0x40 [<00000000aecb5389>] optee_enumerate_devices+0x84/0x2a0 [<000000003df18bf1>] optee_probe+0x674/0x6cc [<000000003a4a534a>] platform_drv_probe+0x54/0xb0 [<000000000c51ce7d>] really_probe+0xe4/0x4d0 [<000000002f04c865>] driver_probe_device+0x58/0xc0 [<00000000b485397d>] device_driver_attach+0xc0/0xd0 [<00000000c835f0df>] __driver_attach+0x84/0x124 [<000000008e5a429c>] bus_for_each_dev+0x70/0xc0 [<000000001735e8a8>] driver_attach+0x24/0x30 [<000000006d94b04f>] bus_add_driver+0x104/0x1ec This is not a memory leak because we pass the share memory pointer to secure world and would get it from secure world before releasing it. Signed-off-by: Xiaolei Wang Signed-off-by: Jens Wiklander --- drivers/tee/optee/smc_abi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tee/optee/smc_abi.c b/drivers/tee/optee/smc_abi.c index 6196d7c3888f..cf2e3293567d 100644 --- a/drivers/tee/optee/smc_abi.c +++ b/drivers/tee/optee/smc_abi.c @@ -23,6 +23,7 @@ #include "optee_private.h" #include "optee_smc.h" #include "optee_rpc_cmd.h" +#include #define CREATE_TRACE_POINTS #include "optee_trace.h" @@ -783,6 +784,7 @@ static void optee_handle_rpc(struct tee_context *ctx, param->a4 = 0; param->a5 = 0; } + kmemleak_not_leak(shm); break; case OPTEE_SMC_RPC_FUNC_FREE: shm = reg_pair_to_ptr(param->a1, param->a2); From ef399469d9ceb9f2171cdd79863f9434b9fa3edc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Nov 2021 15:50:47 +0300 Subject: [PATCH 111/251] ksmbd: fix error code in ndr_read_int32() This is a failure path and it should return -EINVAL instead of success. Otherwise it could result in the caller using uninitialized memory. Fixes: 303fff2b8c77 ("ksmbd: add validation for ndr read/write functions") Cc: stable@vger.kernel.org # v5.15 Acked-by: Namjae Jeon Signed-off-by: Dan Carpenter Signed-off-by: Steve French --- fs/ksmbd/ndr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c index 8317f7ca402b..5052be9261d9 100644 --- a/fs/ksmbd/ndr.c +++ b/fs/ksmbd/ndr.c @@ -148,7 +148,7 @@ static int ndr_read_int16(struct ndr *n, __u16 *value) static int ndr_read_int32(struct ndr *n, __u32 *value) { if (n->offset + sizeof(__u32) > n->length) - return 0; + return -EINVAL; if (value) *value = le32_to_cpu(*(__le32 *)ndr_get_field(n)); From f2e78affc48dee29b989c1d9b0d89b503dcd1204 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 1 Dec 2021 10:12:39 +0900 Subject: [PATCH 112/251] ksmbd: fix uninitialized symbol 'pntsd_size' No check for if "rc" is an error code for build_sec_desc(). This can cause problems with using uninitialized pntsd_size. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org # v5.15 Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 49c9da37315c..125590d5e940 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -2962,6 +2962,10 @@ int smb2_open(struct ksmbd_work *work) &pntsd_size, &fattr); posix_acl_release(fattr.cf_acls); posix_acl_release(fattr.cf_dacls); + if (rc) { + kfree(pntsd); + goto err_out; + } rc = ksmbd_vfs_set_sd_xattr(conn, user_ns, From cc274ae7763d9700a56659f3228641d7069e7a3f Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Wed, 15 Dec 2021 16:28:40 -0500 Subject: [PATCH 113/251] selinux: fix sleeping function called from invalid context selinux_sb_mnt_opts_compat() is called via sget_fc() under the sb_lock spinlock, so it can't use GFP_KERNEL allocations: [ 868.565200] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:230 [ 868.568246] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 4914, name: mount.nfs [ 868.569626] preempt_count: 1, expected: 0 [ 868.570215] RCU nest depth: 0, expected: 0 [ 868.570809] Preemption disabled at: [ 868.570810] [<0000000000000000>] 0x0 [ 868.571848] CPU: 1 PID: 4914 Comm: mount.nfs Kdump: loaded Tainted: G W 5.16.0-rc5.2585cf9dfa #1 [ 868.573273] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-4.fc34 04/01/2014 [ 868.574478] Call Trace: [ 868.574844] [ 868.575156] dump_stack_lvl+0x34/0x44 [ 868.575692] __might_resched.cold+0xd6/0x10f [ 868.576308] slab_pre_alloc_hook.constprop.0+0x89/0xf0 [ 868.577046] __kmalloc_track_caller+0x72/0x420 [ 868.577684] ? security_context_to_sid_core+0x48/0x2b0 [ 868.578569] kmemdup_nul+0x22/0x50 [ 868.579108] security_context_to_sid_core+0x48/0x2b0 [ 868.579854] ? _nfs4_proc_pathconf+0xff/0x110 [nfsv4] [ 868.580742] ? nfs_reconfigure+0x80/0x80 [nfs] [ 868.581355] security_context_str_to_sid+0x36/0x40 [ 868.581960] selinux_sb_mnt_opts_compat+0xb5/0x1e0 [ 868.582550] ? nfs_reconfigure+0x80/0x80 [nfs] [ 868.583098] security_sb_mnt_opts_compat+0x2a/0x40 [ 868.583676] nfs_compare_super+0x113/0x220 [nfs] [ 868.584249] ? nfs_try_mount_request+0x210/0x210 [nfs] [ 868.584879] sget_fc+0xb5/0x2f0 [ 868.585267] nfs_get_tree_common+0x91/0x4a0 [nfs] [ 868.585834] vfs_get_tree+0x25/0xb0 [ 868.586241] fc_mount+0xe/0x30 [ 868.586605] do_nfs4_mount+0x130/0x380 [nfsv4] [ 868.587160] nfs4_try_get_tree+0x47/0xb0 [nfsv4] [ 868.587724] vfs_get_tree+0x25/0xb0 [ 868.588193] do_new_mount+0x176/0x310 [ 868.588782] __x64_sys_mount+0x103/0x140 [ 868.589388] do_syscall_64+0x3b/0x90 [ 868.589935] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 868.590699] RIP: 0033:0x7f2b371c6c4e [ 868.591239] Code: 48 8b 0d dd 71 0e 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d aa 71 0e 00 f7 d8 64 89 01 48 [ 868.593810] RSP: 002b:00007ffc83775d88 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [ 868.594691] RAX: ffffffffffffffda RBX: 00007ffc83775f10 RCX: 00007f2b371c6c4e [ 868.595504] RDX: 0000555d517247a0 RSI: 0000555d51724700 RDI: 0000555d51724540 [ 868.596317] RBP: 00007ffc83775f10 R08: 0000555d51726890 R09: 0000555d51726890 [ 868.597162] R10: 0000000000000000 R11: 0000000000000246 R12: 0000555d51726890 [ 868.598005] R13: 0000000000000003 R14: 0000555d517246e0 R15: 0000555d511ac925 [ 868.598826] Cc: stable@vger.kernel.org Fixes: 69c4a42d72eb ("lsm,selinux: add new hook to compare new mount to an existing mount") Signed-off-by: Scott Mayhew [PM: cleanup/line-wrap the backtrace] Signed-off-by: Paul Moore --- security/selinux/hooks.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 62d30c0a30c2..1afc06ffd969 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -611,10 +611,11 @@ static int bad_option(struct superblock_security_struct *sbsec, char flag, return 0; } -static int parse_sid(struct super_block *sb, const char *s, u32 *sid) +static int parse_sid(struct super_block *sb, const char *s, u32 *sid, + gfp_t gfp) { int rc = security_context_str_to_sid(&selinux_state, s, - sid, GFP_KERNEL); + sid, gfp); if (rc) pr_warn("SELinux: security_context_str_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", @@ -685,7 +686,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, */ if (opts) { if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &fscontext_sid); + rc = parse_sid(sb, opts->fscontext, &fscontext_sid, + GFP_KERNEL); if (rc) goto out; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, @@ -694,7 +696,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= FSCONTEXT_MNT; } if (opts->context) { - rc = parse_sid(sb, opts->context, &context_sid); + rc = parse_sid(sb, opts->context, &context_sid, + GFP_KERNEL); if (rc) goto out; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, @@ -703,7 +706,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= CONTEXT_MNT; } if (opts->rootcontext) { - rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid); + rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid, + GFP_KERNEL); if (rc) goto out; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, @@ -712,7 +716,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= ROOTCONTEXT_MNT; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &defcontext_sid); + rc = parse_sid(sb, opts->defcontext, &defcontext_sid, + GFP_KERNEL); if (rc) goto out; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, @@ -2702,14 +2707,14 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) return (sbsec->flags & SE_MNTMASK) ? 1 : 0; if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &sid); + rc = parse_sid(sb, opts->fscontext, &sid, GFP_NOWAIT); if (rc) return 1; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) return 1; } if (opts->context) { - rc = parse_sid(sb, opts->context, &sid); + rc = parse_sid(sb, opts->context, &sid, GFP_NOWAIT); if (rc) return 1; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) @@ -2719,14 +2724,14 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) struct inode_security_struct *root_isec; root_isec = backing_inode_security(sb->s_root); - rc = parse_sid(sb, opts->rootcontext, &sid); + rc = parse_sid(sb, opts->rootcontext, &sid, GFP_NOWAIT); if (rc) return 1; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) return 1; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &sid); + rc = parse_sid(sb, opts->defcontext, &sid, GFP_NOWAIT); if (rc) return 1; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) @@ -2749,14 +2754,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) return 0; if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &sid); + rc = parse_sid(sb, opts->fscontext, &sid, GFP_KERNEL); if (rc) return rc; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) goto out_bad_option; } if (opts->context) { - rc = parse_sid(sb, opts->context, &sid); + rc = parse_sid(sb, opts->context, &sid, GFP_KERNEL); if (rc) return rc; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) @@ -2765,14 +2770,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) if (opts->rootcontext) { struct inode_security_struct *root_isec; root_isec = backing_inode_security(sb->s_root); - rc = parse_sid(sb, opts->rootcontext, &sid); + rc = parse_sid(sb, opts->rootcontext, &sid, GFP_KERNEL); if (rc) return rc; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) goto out_bad_option; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &sid); + rc = parse_sid(sb, opts->defcontext, &sid, GFP_KERNEL); if (rc) return rc; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) From 5da5231bb47864e5dd6c6731151e98b6ee498827 Mon Sep 17 00:00:00 2001 From: George Kennedy Date: Tue, 14 Dec 2021 09:45:10 -0500 Subject: [PATCH 114/251] libata: if T_LENGTH is zero, dma direction should be DMA_NONE Avoid data corruption by rejecting pass-through commands where T_LENGTH is zero (No data is transferred) and the dma direction is not DMA_NONE. Cc: Reported-by: syzkaller Signed-off-by: George Kennedy Signed-off-by: Damien Le Moal --- drivers/ata/libata-scsi.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 1b84d5526d77..313e9475507b 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2859,8 +2859,19 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) goto invalid_fld; } - if (ata_is_ncq(tf->protocol) && (cdb[2 + cdb_offset] & 0x3) == 0) - tf->protocol = ATA_PROT_NCQ_NODATA; + if ((cdb[2 + cdb_offset] & 0x3) == 0) { + /* + * When T_LENGTH is zero (No data is transferred), dir should + * be DMA_NONE. + */ + if (scmd->sc_data_direction != DMA_NONE) { + fp = 2 + cdb_offset; + goto invalid_fld; + } + + if (ata_is_ncq(tf->protocol)) + tf->protocol = ATA_PROT_NCQ_NODATA; + } /* enable LBA */ tf->flags |= ATA_TFLAG_LBA; From 27750a315aba7e6675bb1c3dfd4481c4f6888af1 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Wed, 17 Nov 2021 14:30:34 +0000 Subject: [PATCH 115/251] crypto: qat - do not handle PFVF sources for qat_4xxx The QAT driver does not have support for PFVF interrupts for GEN4 devices, therefore report the vf2pf sources as 0. This prevents a NULL pointer dereference in the function adf_msix_isr_ae() if the device triggers a spurious interrupt. Fixes: 993161d36ab5 ("crypto: qat - fix handling of VF to PF interrupts") Reported-by: Adam Guerin Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index fa768f10635f..fd29861526d6 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -211,6 +211,12 @@ static u32 uof_get_ae_mask(u32 obj_num) return adf_4xxx_fw_config[obj_num].ae_mask; } +static u32 get_vf2pf_sources(void __iomem *pmisc_addr) +{ + /* For the moment do not report vf2pf sources */ + return 0; +} + void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) { hw_data->dev_class = &adf_4xxx_class; @@ -254,6 +260,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) hw_data->set_msix_rttable = set_msix_default_rttable; hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer; hw_data->enable_pfvf_comms = pfvf_comms_disabled; + hw_data->get_vf2pf_sources = get_vf2pf_sources; hw_data->disable_iov = adf_disable_sriov; hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; From ea81b91e4e256b0bb75d47ad3a5c230b2171a005 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:37 +0000 Subject: [PATCH 116/251] riscv: dts: sifive unmatched: Name gpio lines Follow the pin descriptions given in the version 3 of the board schematics. Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 3c796d64cf51..f8648ee1785a 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -247,4 +247,8 @@ &gpio { status = "okay"; + gpio-line-names = "J29.1", "PMICNTB", "PMICSHDN", "J8.1", "J8.3", + "PCIe_PWREN", "THERM", "UBRDG_RSTN", "PCIe_PERSTN", + "ULPI_RSTN", "J8.2", "UHUB_RSTN", "GEMGXL_RST", "J8.4", + "EN_VDD_SD", "SD_CD"; }; From 8120393b74b31bbaf293f59896de6b0d50febc48 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:38 +0000 Subject: [PATCH 117/251] riscv: dts: sifive unmatched: Expose the board ID eeprom Mark it as read-only as it is factory-programmed with identifying information, and no executable nor configuration: - eth MAC address - board model (PCB version, BoM version) - board serial number Accidental modification would cause misidentification which could brick the board, so marking read-only seem like both a safe and non-constraining choice. Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index f8648ee1785a..d1f2289e529b 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -59,6 +59,16 @@ interrupts = <6 IRQ_TYPE_LEVEL_LOW>; }; + eeprom@54 { + compatible = "microchip,24c02", "atmel,24c02"; + reg = <0x54>; + vcc-supply = <&vdd_bpro>; + label = "board-id"; + pagesize = <16>; + read-only; + size = <256>; + }; + pmic@58 { compatible = "dlg,da9063"; reg = <0x58>; From cd29cc8ad2540a4f9a0a3e174394d39e648ef941 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:39 +0000 Subject: [PATCH 118/251] riscv: dts: sifive unmatched: Expose the PMIC sub-functions These sub-functions are available in the chip revision on this board, so expose them. Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index d1f2289e529b..91b3e76b2bb2 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -76,6 +76,18 @@ interrupts = <1 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + onkey { + compatible = "dlg,da9063-onkey"; + }; + + rtc { + compatible = "dlg,da9063-rtc"; + }; + + wdt { + compatible = "dlg,da9063-watchdog"; + }; + regulators { vdd_bcore1: bcore1 { regulator-min-microvolt = <900000>; From ad931d9b3b2e21586de8e6b34346d0a30c13721d Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:41 +0000 Subject: [PATCH 119/251] riscv: dts: sifive unmatched: Fix regulator for board rev3 The existing values are rejected by the da9063 regulator driver, as they are unachievable with the declared chip setup (non-merged vcore and bmem are unable to provide the declared curent). Fix voltages to match rev3 schematics, which also matches their boot-up configuration within the chip's available precision. Declare bcore1/bcore2 and bmem/bio as merged. Set ldo09 and ldo10 as always-on as their consumers are not declared but exist. Drop ldo current limits as there is no current limit feature for these regulators in the DA9063. Fixes warnings like: DA9063_LDO3: Operation of current configuration missing Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- .../boot/dts/sifive/hifive-unmatched-a00.dts | 84 ++++++------------- 1 file changed, 24 insertions(+), 60 deletions(-) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 91b3e76b2bb2..58de5a312fc9 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -89,47 +89,31 @@ }; regulators { - vdd_bcore1: bcore1 { - regulator-min-microvolt = <900000>; - regulator-max-microvolt = <900000>; - regulator-min-microamp = <5000000>; - regulator-max-microamp = <5000000>; - regulator-always-on; - }; - - vdd_bcore2: bcore2 { - regulator-min-microvolt = <900000>; - regulator-max-microvolt = <900000>; - regulator-min-microamp = <5000000>; - regulator-max-microamp = <5000000>; + vdd_bcore: bcores-merged { + regulator-min-microvolt = <1050000>; + regulator-max-microvolt = <1050000>; + regulator-min-microamp = <4800000>; + regulator-max-microamp = <4800000>; regulator-always-on; }; vdd_bpro: bpro { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; - regulator-min-microamp = <2500000>; - regulator-max-microamp = <2500000>; + regulator-min-microamp = <2400000>; + regulator-max-microamp = <2400000>; regulator-always-on; }; vdd_bperi: bperi { - regulator-min-microvolt = <1050000>; - regulator-max-microvolt = <1050000>; + regulator-min-microvolt = <1060000>; + regulator-max-microvolt = <1060000>; regulator-min-microamp = <1500000>; regulator-max-microamp = <1500000>; regulator-always-on; }; - vdd_bmem: bmem { - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; - regulator-min-microamp = <3000000>; - regulator-max-microamp = <3000000>; - regulator-always-on; - }; - - vdd_bio: bio { + vdd_bmem_bio: bmem-bio-merged { regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-min-microamp = <3000000>; @@ -140,86 +124,66 @@ vdd_ldo1: ldo1 { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; - regulator-min-microamp = <100000>; - regulator-max-microamp = <100000>; regulator-always-on; }; vdd_ldo2: ldo2 { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; regulator-always-on; }; vdd_ldo3: ldo3 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ldo4: ldo4 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <2500000>; + regulator-max-microvolt = <2500000>; regulator-always-on; }; vdd_ldo5: ldo5 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <100000>; - regulator-max-microamp = <100000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ldo6: ldo6 { - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; regulator-always-on; }; vdd_ldo7: ldo7 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ldo8: ldo8 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ld09: ldo9 { regulator-min-microvolt = <1050000>; regulator-max-microvolt = <1050000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-always-on; }; vdd_ldo10: ldo10 { regulator-min-microvolt = <1000000>; regulator-max-microvolt = <1000000>; - regulator-min-microamp = <300000>; - regulator-max-microamp = <300000>; + regulator-always-on; }; vdd_ldo11: ldo11 { regulator-min-microvolt = <2500000>; regulator-max-microvolt = <2500000>; - regulator-min-microamp = <300000>; - regulator-max-microamp = <300000>; regulator-always-on; }; }; From f6f7fbb89bf8dc9132fde55cfe67483138eea880 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:42 +0000 Subject: [PATCH 120/251] riscv: dts: sifive unmatched: Link the tmp451 with its power supply Fixes the following probe warning: lm90 0-004c: Looking up vcc-supply from device tree lm90 0-004c: Looking up vcc-supply property in node /soc/i2c@10030000/temperature-sensor@4c failed lm90 0-004c: supply vcc not found, using dummy regulator Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 58de5a312fc9..6bfa1f24d3de 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -55,6 +55,7 @@ temperature-sensor@4c { compatible = "ti,tmp451"; reg = <0x4c>; + vcc-supply = <&vdd_bpro>; interrupt-parent = <&gpio>; interrupts = <6 IRQ_TYPE_LEVEL_LOW>; }; From 8ffea2599f63fdbee968b894eab78170abf3ec2c Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 17 Dec 2021 15:15:45 +0900 Subject: [PATCH 121/251] zonefs: add MODULE_ALIAS_FS Add MODULE_ALIAS_FS() to load the module automatically when you do "mount -t zonefs". Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Cc: stable # 5.6+ Signed-off-by: Naohiro Aota Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 259ee2bda492..b76dfb310ab6 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1787,5 +1787,6 @@ static void __exit zonefs_exit(void) MODULE_AUTHOR("Damien Le Moal"); MODULE_DESCRIPTION("Zone file system for zoned block devices"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_FS("zonefs"); module_init(zonefs_init); module_exit(zonefs_exit); From bce472f90952cc8be03dded25c4aa109d27e5924 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 17 Dec 2021 16:41:17 +0900 Subject: [PATCH 122/251] MAITAINERS: Change zonefs maintainer email address Update my email address from damien.lemoal@wdc.com to damien.lemoal@opensource.wdc.com. Signed-off-by: Damien Le Moal --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 13f9a84a617e..d01ae22c55f8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21059,7 +21059,7 @@ S: Maintained F: arch/x86/kernel/cpu/zhaoxin.c ZONEFS FILESYSTEM -M: Damien Le Moal +M: Damien Le Moal M: Naohiro Aota R: Johannes Thumshirn L: linux-fsdevel@vger.kernel.org From 7202216a6f34d571a22274e729f841256bf8b1ef Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 25 Nov 2021 12:05:19 +0100 Subject: [PATCH 123/251] ARM: 9160/1: NOMMU: Reload __secondary_data after PROCINFO_INITFUNC __secondary_data used to reside in r7 around call to PROCINFO_INITFUNC. After commit 95731b8ee63e ("ARM: 9059/1: cache-v7: get rid of mini-stack") r7 is used as a scratch register, so we have to reload __secondary_data before we setup the stack pointer. Fixes: 95731b8ee63e ("ARM: 9059/1: cache-v7: get rid of mini-stack") Signed-off-by: Vladimir Murzin Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/head-nommu.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index fadfee9e2b45..950bef83339f 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -114,6 +114,7 @@ ENTRY(secondary_startup) add r12, r12, r10 ret r12 1: bl __after_proc_init + ldr r7, __secondary_data @ reload r7 ldr sp, [r7, #12] @ set up the stack pointer ldr r0, [r7, #16] @ set up task pointer mov fp, #0 From 8536a5ef886005bc443c2da9b842d69fd3d7647f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Dec 2021 09:31:36 +0100 Subject: [PATCH 124/251] ARM: 9169/1: entry: fix Thumb2 bug in iWMMXt exception handling The Thumb2 version of the FP exception handling entry code treats the register holding the CP number (R8) differently, resulting in the iWMMXT CP number check to be incorrect. Fix this by unifying the ARM and Thumb2 code paths, and switch the order of the additions of the TI_USED_CP offset and the shifted CP index. Cc: Fixes: b86040a59feb ("Thumb-2: Implementation of the unified start-up and exceptions code") Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/entry-armv.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index deff286eb5ea..5cd057859fe9 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -596,11 +596,9 @@ call_fpe: tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2 reteq lr and r8, r0, #0x00000f00 @ mask out CP number - THUMB( lsr r8, r8, #8 ) mov r7, #1 - add r6, r10, #TI_USED_CP - ARM( strb r7, [r6, r8, lsr #8] ) @ set appropriate used_cp[] - THUMB( strb r7, [r6, r8] ) @ set appropriate used_cp[] + add r6, r10, r8, lsr #8 @ add used_cp[] array offset first + strb r7, [r6, #TI_USED_CP] @ set appropriate used_cp[] #ifdef CONFIG_IWMMXT @ Test if we need to give access to iWMMXt coprocessors ldr r5, [r10, #TI_FLAGS] @@ -609,7 +607,7 @@ call_fpe: bcs iwmmxt_task_enable #endif ARM( add pc, pc, r8, lsr #6 ) - THUMB( lsl r8, r8, #2 ) + THUMB( lsr r8, r8, #6 ) THUMB( add pc, r8 ) nop From c4d936efa46d8ea183df16c0f3fa4423327da51d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 17 Dec 2021 16:24:30 +0100 Subject: [PATCH 125/251] Revert "usb: early: convert to readl_poll_timeout_atomic()" This reverts commit 796eed4b2342c9d6b26c958e92af91253a2390e1. This change causes boot lockups when using "arlyprintk=xdbc" because ktime can not be used at this point in time in the boot process. Also, it is not needed for very small delays like this. Reported-by: Mathias Nyman Reported-by: Peter Zijlstra Cc: Jann Horn Cc: Chunfeng Yun Fixes: 796eed4b2342 ("usb: early: convert to readl_poll_timeout_atomic()") Link: https://lore.kernel.org/r/c2b5c9bb-1b75-bf56-3754-b5b18812d65e@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/early/xhci-dbc.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c index 933d77ad0a64..4502108069cd 100644 --- a/drivers/usb/early/xhci-dbc.c +++ b/drivers/usb/early/xhci-dbc.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -136,9 +135,17 @@ static int handshake(void __iomem *ptr, u32 mask, u32 done, int wait, int delay) { u32 result; - return readl_poll_timeout_atomic(ptr, result, - ((result & mask) == done), - delay, wait); + /* Can not use readl_poll_timeout_atomic() for early boot things */ + do { + result = readl(ptr); + result &= mask; + if (result == done) + return 0; + udelay(delay); + wait -= delay; + } while (wait > 0); + + return -ETIMEDOUT; } static void __init xdbc_bios_handoff(void) From 544e737dea5ad1a457f25dbddf68761ff25e028b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 16 Dec 2021 20:30:18 +0100 Subject: [PATCH 126/251] PM: sleep: Fix error handling in dpm_prepare() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 2aa36604e824 ("PM: sleep: Avoid calling put_device() under dpm_list_mtx") forgot to update the while () loop termination condition to also break the loop if error is nonzero, which causes the loop to become infinite if device_prepare() returns an error for one device. Add the missing !error check. Fixes: 2aa36604e824 ("PM: sleep: Avoid calling put_device() under dpm_list_mtx") Signed-off-by: Rafael J. Wysocki Reported-by: Thomas Hellström Reviewed-by: Thomas Hellström Reviewed-by: Ulf Hansson Cc: All applicable --- drivers/base/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index f4d0c555de29..04ea92cbd9cf 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1902,7 +1902,7 @@ int dpm_prepare(pm_message_t state) device_block_probing(); mutex_lock(&dpm_list_mtx); - while (!list_empty(&dpm_list)) { + while (!list_empty(&dpm_list) && !error) { struct device *dev = to_device(dpm_list.next); get_device(dev); From 2b5160b12091285c5aca45980f100a9294af7b04 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 17 Dec 2021 12:44:09 -0300 Subject: [PATCH 127/251] ipmi: bail out if init_srcu_struct fails In case, init_srcu_struct fails (because of memory allocation failure), we might proceed with the driver initialization despite srcu_struct not being entirely initialized. Fixes: 913a89f009d9 ("ipmi: Don't initialize anything in the core until something uses it") Signed-off-by: Thadeu Lima de Souza Cascardo Cc: Corey Minyard Cc: stable@vger.kernel.org Message-Id: <20211217154410.1228673-1-cascardo@canonical.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index c837d5416e0e..84975b21fff2 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -5392,7 +5392,9 @@ static int ipmi_init_msghandler(void) if (initialized) goto out; - init_srcu_struct(&ipmi_interfaces_srcu); + rv = init_srcu_struct(&ipmi_interfaces_srcu); + if (rv) + goto out; timer_setup(&ipmi_timer, ipmi_timeout, 0); mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES); From 75d70d76cb7b927cace2cb34265d68ebb3306b13 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 17 Dec 2021 12:44:10 -0300 Subject: [PATCH 128/251] ipmi: fix initialization when workqueue allocation fails If the workqueue allocation fails, the driver is marked as not initialized, and timer and panic_notifier will be left registered. Instead of removing those when workqueue allocation fails, do the workqueue initialization before doing it, and cleanup srcu_struct if it fails. Fixes: 1d49eb91e86e ("ipmi: Move remove_work to dedicated workqueue") Signed-off-by: Thadeu Lima de Souza Cascardo Cc: Corey Minyard Cc: Ioanna Alifieraki Cc: stable@vger.kernel.org Message-Id: <20211217154410.1228673-2-cascardo@canonical.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 84975b21fff2..266c7bc58dda 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -5396,20 +5396,23 @@ static int ipmi_init_msghandler(void) if (rv) goto out; + remove_work_wq = create_singlethread_workqueue("ipmi-msghandler-remove-wq"); + if (!remove_work_wq) { + pr_err("unable to create ipmi-msghandler-remove-wq workqueue"); + rv = -ENOMEM; + goto out_wq; + } + timer_setup(&ipmi_timer, ipmi_timeout, 0); mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES); atomic_notifier_chain_register(&panic_notifier_list, &panic_block); - remove_work_wq = create_singlethread_workqueue("ipmi-msghandler-remove-wq"); - if (!remove_work_wq) { - pr_err("unable to create ipmi-msghandler-remove-wq workqueue"); - rv = -ENOMEM; - goto out; - } - initialized = true; +out_wq: + if (rv) + cleanup_srcu_struct(&ipmi_interfaces_srcu); out: mutex_unlock(&ipmi_interfaces_mutex); return rv; From 4e8c11b6b3f0b6a283e898344f154641eda94266 Mon Sep 17 00:00:00 2001 From: Yu Liao Date: Mon, 13 Dec 2021 21:57:27 +0800 Subject: [PATCH 129/251] timekeeping: Really make sure wall_to_monotonic isn't positive Even after commit e1d7ba873555 ("time: Always make sure wall_to_monotonic isn't positive") it is still possible to make wall_to_monotonic positive by running the following code: int main(void) { struct timespec time; clock_gettime(CLOCK_MONOTONIC, &time); time.tv_nsec = 0; clock_settime(CLOCK_REALTIME, &time); return 0; } The reason is that the second parameter of timespec64_compare(), ts_delta, may be unnormalized because the delta is calculated with an open coded substraction which causes the comparison of tv_sec to yield the wrong result: wall_to_monotonic = { .tv_sec = -10, .tv_nsec = 900000000 } ts_delta = { .tv_sec = -9, .tv_nsec = -900000000 } That makes timespec64_compare() claim that wall_to_monotonic < ts_delta, but actually the result should be wall_to_monotonic > ts_delta. After normalization, the result of timespec64_compare() is correct because the tv_sec comparison is not longer misleading: wall_to_monotonic = { .tv_sec = -10, .tv_nsec = 900000000 } ts_delta = { .tv_sec = -10, .tv_nsec = 100000000 } Use timespec64_sub() to ensure that ts_delta is normalized, which fixes the issue. Fixes: e1d7ba873555 ("time: Always make sure wall_to_monotonic isn't positive") Signed-off-by: Yu Liao Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211213135727.1656662-1-liaoyu15@huawei.com --- kernel/time/timekeeping.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b348749a9fc6..dcdcb85121e4 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1306,8 +1306,7 @@ int do_settimeofday64(const struct timespec64 *ts) timekeeping_forward_now(tk); xt = tk_xtime(tk); - ts_delta.tv_sec = ts->tv_sec - xt.tv_sec; - ts_delta.tv_nsec = ts->tv_nsec - xt.tv_nsec; + ts_delta = timespec64_sub(*ts, xt); if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) { ret = -EINVAL; From b774302e885697dde027825f8de9beb985d037bd Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 8 Dec 2021 16:33:19 +0000 Subject: [PATCH 130/251] cifs: ignore resource_id while getting fscache super cookie We have a cyclic dependency between fscache super cookie and root inode cookie. The super cookie relies on tcon->resource_id, which gets populated from the root inode number. However, fetching the root inode initializes inode cookie as a child of super cookie, which is yet to be populated. resource_id is only used as auxdata to check the validity of super cookie. We can completely avoid setting resource_id to remove the circular dependency. Since vol creation time and vol serial numbers are used for auxdata, we should be fine. Additionally, there will be auxiliary data check for each inode cookie as well. Fixes: 5bf91ef03d98 ("cifs: wait for tcon resource_id before getting fscache super") CC: David Howells Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/connect.c | 7 +++++++ fs/cifs/inode.c | 13 ------------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 18448dbd762a..1060164b984a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3064,6 +3064,13 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx))) cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); + /* + * The cookie is initialized from volume info returned above. + * Inside cifs_fscache_get_super_cookie it checks + * that we do not get super cookie twice. + */ + cifs_fscache_get_super_cookie(tcon); + out: mnt_ctx->server = server; mnt_ctx->ses = ses; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 96d083db1737..279622e4eb1c 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1356,11 +1356,6 @@ iget_no_retry: goto out; } -#ifdef CONFIG_CIFS_FSCACHE - /* populate tcon->resource_id */ - tcon->resource_id = CIFS_I(inode)->uniqueid; -#endif - if (rc && tcon->pipe) { cifs_dbg(FYI, "ipc connection - fake read inode\n"); spin_lock(&inode->i_lock); @@ -1375,14 +1370,6 @@ iget_no_retry: iget_failed(inode); inode = ERR_PTR(rc); } - - /* - * The cookie is initialized from volume info returned above. - * Inside cifs_fscache_get_super_cookie it checks - * that we do not get super cookie twice. - */ - cifs_fscache_get_super_cookie(tcon); - out: kfree(path); free_xid(xid); From a31080899d5fdafcccf7f39dd214a814a2c82626 Mon Sep 17 00:00:00 2001 From: Thiago Rafael Becker Date: Fri, 17 Dec 2021 15:20:22 -0300 Subject: [PATCH 131/251] cifs: sanitize multiple delimiters in prepath mount.cifs can pass a device with multiple delimiters in it. This will cause rename(2) to fail with ENOENT. V2: - Make sanitize_path more readable. - Fix multiple delimiters between UNC and prepath. - Avoid a memory leak if a bad user starts putting a lot of delimiters in the path on purpose. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=2031200 Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Cc: stable@vger.kernel.org # 5.11+ Acked-by: Ronnie Sahlberg Signed-off-by: Thiago Rafael Becker Signed-off-by: Steve French --- fs/cifs/fs_context.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 6a179ae753c1..e3ed25dc6f3f 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -434,6 +434,42 @@ out: return rc; } +/* + * Remove duplicate path delimiters. Windows is supposed to do that + * but there are some bugs that prevent rename from working if there are + * multiple delimiters. + * + * Returns a sanitized duplicate of @path. The caller is responsible for + * cleaning up the original. + */ +#define IS_DELIM(c) ((c) == '/' || (c) == '\\') +static char *sanitize_path(char *path) +{ + char *cursor1 = path, *cursor2 = path; + + /* skip all prepended delimiters */ + while (IS_DELIM(*cursor1)) + cursor1++; + + /* copy the first letter */ + *cursor2 = *cursor1; + + /* copy the remainder... */ + while (*(cursor1++)) { + /* ... skipping all duplicated delimiters */ + if (IS_DELIM(*cursor1) && IS_DELIM(*cursor2)) + continue; + *(++cursor2) = *cursor1; + } + + /* if the last character is a delimiter, skip it */ + if (IS_DELIM(*(cursor2 - 1))) + cursor2--; + + *(cursor2) = '\0'; + return kstrdup(path, GFP_KERNEL); +} + /* * Parse a devname into substrings and populate the ctx->UNC and ctx->prepath * fields with the result. Returns 0 on success and an error otherwise @@ -493,7 +529,7 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx) if (!*pos) return 0; - ctx->prepath = kstrdup(pos, GFP_KERNEL); + ctx->prepath = sanitize_path(pos); if (!ctx->prepath) return -ENOMEM; From 83912d6d55be10d65b5268d1871168b9ebe1ec4b Mon Sep 17 00:00:00 2001 From: Marcos Del Sol Vives Date: Thu, 16 Dec 2021 11:37:22 +0100 Subject: [PATCH 132/251] ksmbd: disable SMB2_GLOBAL_CAP_ENCRYPTION for SMB 3.1.1 According to the official Microsoft MS-SMB2 document section 3.3.5.4, this flag should be used only for 3.0 and 3.0.2 dialects. Setting it for 3.1.1 is a violation of the specification. This causes my Windows 10 client to detect an anomaly in the negotiation, and disable encryption entirely despite being explicitly enabled in ksmbd, causing all data transfers to go in plain text. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org # v5.15 Acked-by: Namjae Jeon Signed-off-by: Marcos Del Sol Vives Signed-off-by: Steve French --- fs/ksmbd/smb2ops.c | 3 --- fs/ksmbd/smb2pdu.c | 25 +++++++++++++++++++++---- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c index 0a5d8450e835..02a44d28bdaf 100644 --- a/fs/ksmbd/smb2ops.c +++ b/fs/ksmbd/smb2ops.c @@ -271,9 +271,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn) if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; - if (conn->cipher_type) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 125590d5e940..b8b3a4c28b74 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -915,6 +915,25 @@ static void decode_encrypt_ctxt(struct ksmbd_conn *conn, } } +/** + * smb3_encryption_negotiated() - checks if server and client agreed on enabling encryption + * @conn: smb connection + * + * Return: true if connection should be encrypted, else false + */ +static bool smb3_encryption_negotiated(struct ksmbd_conn *conn) +{ + if (!conn->ops->generate_encryptionkey) + return false; + + /* + * SMB 3.0 and 3.0.2 dialects use the SMB2_GLOBAL_CAP_ENCRYPTION flag. + * SMB 3.1.1 uses the cipher_type field. + */ + return (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) || + conn->cipher_type; +} + static void decode_compress_ctxt(struct ksmbd_conn *conn, struct smb2_compression_capabilities_context *pneg_ctxt) { @@ -1469,8 +1488,7 @@ static int ntlm_authenticate(struct ksmbd_work *work) (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) sess->sign = true; - if (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION && - conn->ops->generate_encryptionkey && + if (smb3_encryption_negotiated(conn) && !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) { rc = conn->ops->generate_encryptionkey(sess); if (rc) { @@ -1559,8 +1577,7 @@ static int krb5_authenticate(struct ksmbd_work *work) (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) sess->sign = true; - if ((conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) && - conn->ops->generate_encryptionkey) { + if (smb3_encryption_negotiated(conn)) { retval = conn->ops->generate_encryptionkey(sess); if (retval) { ksmbd_debug(SMB, From 8f556a326c93213927e683fc32bbf5be1b62540a Mon Sep 17 00:00:00 2001 From: Zqiang Date: Fri, 17 Dec 2021 15:42:07 +0800 Subject: [PATCH 133/251] locking/rtmutex: Fix incorrect condition in rtmutex_spin_on_owner() Optimistic spinning needs to be terminated when the spinning waiter is not longer the top waiter on the lock, but the condition is negated. It terminates if the waiter is the top waiter, which is defeating the whole purpose. Fixes: c3123c431447 ("locking/rtmutex: Dont dereference waiter lockless") Signed-off-by: Zqiang Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211217074207.77425-1-qiang1.zhang@intel.com --- kernel/locking/rtmutex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 0c6a48dfcecb..1f25a4d7de27 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1380,7 +1380,7 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, * - the VCPU on which owner runs is preempted */ if (!owner->on_cpu || need_resched() || - rt_mutex_waiter_is_top_waiter(lock, waiter) || + !rt_mutex_waiter_is_top_waiter(lock, waiter) || vcpu_is_preempted(task_cpu(owner))) { res = false; break; From 0a515a06c5ebfa46fee3ac519e418f801e718da4 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 12 Dec 2021 06:25:02 +0000 Subject: [PATCH 134/251] perf expr: Fix missing check for return value of hashmap__new() The hashmap__new() function may return ERR_PTR(-ENOMEM) when malloc() fails, add IS_ERR() checking for ctx->ids. Signed-off-by: Miaoqian Lin Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20211212062504.25841-1-linmq006@gmail.com [ s/kfree()/free()/ and add missing linux/err.h include ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 1d532b9fed29..254601060b39 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -12,6 +12,7 @@ #include "expr-bison.h" #include "expr-flex.h" #include "smt.h" +#include #include #include #include @@ -299,6 +300,10 @@ struct expr_parse_ctx *expr__ctx_new(void) return NULL; ctx->ids = hashmap__new(key_hash, key_equal, NULL); + if (IS_ERR(ctx->ids)) { + free(ctx); + return NULL; + } ctx->runtime = 0; return ctx; From 0c8e32fe48f549eef27c8c6b0a63530f83c3a643 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Dec 2021 10:48:28 +0200 Subject: [PATCH 135/251] perf inject: Fix segfault due to close without open The fixed commit attempts to close inject.output even if it was never opened e.g. $ perf record uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (7 samples) ] $ perf inject -i perf.data --vm-time-correlation=dry-run Segmentation fault (core dumped) $ gdb --quiet perf Reading symbols from perf... (gdb) r inject -i perf.data --vm-time-correlation=dry-run Starting program: /home/ahunter/bin/perf inject -i perf.data --vm-time-correlation=dry-run [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". Program received signal SIGSEGV, Segmentation fault. 0x00007eff8afeef5b in _IO_new_fclose (fp=0x0) at iofclose.c:48 48 iofclose.c: No such file or directory. (gdb) bt #0 0x00007eff8afeef5b in _IO_new_fclose (fp=0x0) at iofclose.c:48 #1 0x0000557fc7b74f92 in perf_data__close (data=data@entry=0x7ffcdafa6578) at util/data.c:376 #2 0x0000557fc7a6b807 in cmd_inject (argc=, argv=) at builtin-inject.c:1085 #3 0x0000557fc7ac4783 in run_builtin (p=0x557fc8074878 , argc=4, argv=0x7ffcdafb6a60) at perf.c:313 #4 0x0000557fc7a25d5c in handle_internal_command (argv=, argc=) at perf.c:365 #5 run_argv (argcp=, argv=) at perf.c:409 #6 main (argc=4, argv=0x7ffcdafb6a60) at perf.c:539 (gdb) Fixes: 02e6246f5364d526 ("perf inject: Close inject.output on exit") Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Riccardo Mancini Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20211213084829.114772-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index b9d6306cc14e..af70f1c72052 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -1078,7 +1078,8 @@ out_delete: zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); out_close_output: - perf_data__close(&inject.output); + if (!inject.in_place_update) + perf_data__close(&inject.output); free(inject.itrace_synth_opts.vm_tm_corr_args); return ret; } From c271a55b0c6029fed0cac909fa57999a11467132 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Dec 2021 10:48:29 +0200 Subject: [PATCH 136/251] perf inject: Fix segfault due to perf_data__fd() without open The fixed commit attempts to get the output file descriptor even if the file was never opened e.g. $ perf record uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (7 samples) ] $ perf inject -i perf.data --vm-time-correlation=dry-run Segmentation fault (core dumped) $ gdb --quiet perf Reading symbols from perf... (gdb) r inject -i perf.data --vm-time-correlation=dry-run Starting program: /home/ahunter/bin/perf inject -i perf.data --vm-time-correlation=dry-run [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". Program received signal SIGSEGV, Segmentation fault. __GI___fileno (fp=0x0) at fileno.c:35 35 fileno.c: No such file or directory. (gdb) bt #0 __GI___fileno (fp=0x0) at fileno.c:35 #1 0x00005621e48dd987 in perf_data__fd (data=0x7fff4c68bd08) at util/data.h:72 #2 perf_data__fd (data=0x7fff4c68bd08) at util/data.h:69 #3 cmd_inject (argc=, argv=0x7fff4c69c1f0) at builtin-inject.c:1017 #4 0x00005621e4936783 in run_builtin (p=0x5621e4ee6878 , argc=4, argv=0x7fff4c69c1f0) at perf.c:313 #5 0x00005621e4897d5c in handle_internal_command (argv=, argc=) at perf.c:365 #6 run_argv (argcp=, argv=) at perf.c:409 #7 main (argc=4, argv=0x7fff4c69c1f0) at perf.c:539 (gdb) Fixes: 0ae03893623dd1dd ("perf tools: Pass a fd to perf_file_header__read_pipe()") Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Riccardo Mancini Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20211213084829.114772-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index af70f1c72052..409b721666cb 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -755,12 +755,16 @@ static int parse_vm_time_correlation(const struct option *opt, const char *str, return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; } +static int output_fd(struct perf_inject *inject) +{ + return inject->in_place_update ? -1 : perf_data__fd(&inject->output); +} + static int __cmd_inject(struct perf_inject *inject) { int ret = -EINVAL; struct perf_session *session = inject->session; - struct perf_data *data_out = &inject->output; - int fd = inject->in_place_update ? -1 : perf_data__fd(data_out); + int fd = output_fd(inject); u64 output_data_offset; signal(SIGINT, sig_handler); @@ -1015,7 +1019,7 @@ int cmd_inject(int argc, const char **argv) } inject.session = __perf_session__new(&data, repipe, - perf_data__fd(&inject.output), + output_fd(&inject), &inject.tool); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); From 53b1119a6e5028b125f431a0116ba73510d82a72 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 16 Dec 2021 11:12:11 -0500 Subject: [PATCH 137/251] NFSD: Fix READDIR buffer overflow If a client sends a READDIR count argument that is too small (say, zero), then the buffer size calculation in the new init_dirlist helper functions results in an underflow, allowing the XDR stream functions to write beyond the actual buffer. This calculation has always been suspect. NFSD has never sanity- checked the READDIR count argument, but the old entry encoders managed the problem correctly. With the commits below, entry encoding changed, exposing the underflow to the pointer arithmetic in xdr_reserve_space(). Modern NFS clients attempt to retrieve as much data as possible for each READDIR request. Also, we have no unit tests that exercise the behavior of READDIR at the lower bound of @count values. Thus this case was missed during testing. Reported-by: Anatoly Trosinenko Fixes: f5dcccd647da ("NFSD: Update the NFSv2 READDIR entry encoder to use struct xdr_stream") Fixes: 7f87fc2d34d4 ("NFSD: Update NFSv3 READDIR entry encoders to use struct xdr_stream") Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 11 ++++------- fs/nfsd/nfsproc.c | 8 ++++---- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 4418517f6f12..15dac36ca852 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -438,22 +438,19 @@ nfsd3_proc_link(struct svc_rqst *rqstp) static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, struct nfsd3_readdirres *resp, - int count) + u32 count) { struct xdr_buf *buf = &resp->dirlist; struct xdr_stream *xdr = &resp->xdr; - count = min_t(u32, count, svc_max_payload(rqstp)); + count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp)); memset(buf, 0, sizeof(*buf)); /* Reserve room for the NULL ptr & eof flag (-2 words) */ buf->buflen = count - XDR_UNIT * 2; buf->pages = rqstp->rq_next_page; - while (count > 0) { - rqstp->rq_next_page++; - count -= PAGE_SIZE; - } + rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; /* This is xdr_init_encode(), but it assumes that * the head kvec has already been consumed. */ @@ -462,7 +459,7 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, xdr->page_ptr = buf->pages; xdr->iov = NULL; xdr->p = page_address(*buf->pages); - xdr->end = xdr->p + (PAGE_SIZE >> 2); + xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); xdr->rqst = NULL; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index eea5b59b6a6c..de282f3273c5 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -556,17 +556,17 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp) static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, struct nfsd_readdirres *resp, - int count) + u32 count) { struct xdr_buf *buf = &resp->dirlist; struct xdr_stream *xdr = &resp->xdr; - count = min_t(u32, count, PAGE_SIZE); + count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp)); memset(buf, 0, sizeof(*buf)); /* Reserve room for the NULL ptr & eof flag (-2 words) */ - buf->buflen = count - sizeof(__be32) * 2; + buf->buflen = count - XDR_UNIT * 2; buf->pages = rqstp->rq_next_page; rqstp->rq_next_page++; @@ -577,7 +577,7 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, xdr->page_ptr = buf->pages; xdr->iov = NULL; xdr->p = page_address(*buf->pages); - xdr->end = xdr->p + (PAGE_SIZE >> 2); + xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); xdr->rqst = NULL; } From 9a5875f14b0e3a13ae314883f1bb72b7f31fac07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Mon, 18 Oct 2021 13:22:01 +0200 Subject: [PATCH 138/251] gpio: dln2: Fix interrupts when replugging the device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When replugging the device the following message shows up: gpio gpiochip2: (dln2): detected irqchip that is shared with multiple gpiochips: please fix the driver. This also has the effect that interrupts won't work. The same problem would also show up if multiple devices where plugged in. Fix this by allocating the irq_chip data structure per instance like other drivers do. I don't know when this problem appeared, but it is present in 5.10. Cc: # 5.10+ Cc: Daniel Baluta Signed-off-by: Noralf Trønnes Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-dln2.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c index 026903e3ef54..08b9e2cf4f2d 100644 --- a/drivers/gpio/gpio-dln2.c +++ b/drivers/gpio/gpio-dln2.c @@ -46,6 +46,7 @@ struct dln2_gpio { struct platform_device *pdev; struct gpio_chip gpio; + struct irq_chip irqchip; /* * Cache pin direction to save us one transfer, since the hardware has @@ -383,15 +384,6 @@ static void dln2_irq_bus_unlock(struct irq_data *irqd) mutex_unlock(&dln2->irq_lock); } -static struct irq_chip dln2_gpio_irqchip = { - .name = "dln2-irq", - .irq_mask = dln2_irq_mask, - .irq_unmask = dln2_irq_unmask, - .irq_set_type = dln2_irq_set_type, - .irq_bus_lock = dln2_irq_bus_lock, - .irq_bus_sync_unlock = dln2_irq_bus_unlock, -}; - static void dln2_gpio_event(struct platform_device *pdev, u16 echo, const void *data, int len) { @@ -473,8 +465,15 @@ static int dln2_gpio_probe(struct platform_device *pdev) dln2->gpio.direction_output = dln2_gpio_direction_output; dln2->gpio.set_config = dln2_gpio_set_config; + dln2->irqchip.name = "dln2-irq", + dln2->irqchip.irq_mask = dln2_irq_mask, + dln2->irqchip.irq_unmask = dln2_irq_unmask, + dln2->irqchip.irq_set_type = dln2_irq_set_type, + dln2->irqchip.irq_bus_lock = dln2_irq_bus_lock, + dln2->irqchip.irq_bus_sync_unlock = dln2_irq_bus_unlock, + girq = &dln2->gpio.irq; - girq->chip = &dln2_gpio_irqchip; + girq->chip = &dln2->irqchip; /* The event comes from the outside so no parent handler */ girq->parent_handler = NULL; girq->num_parents = 0; From 87959fa16cfbcf76245c11559db1940069621274 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 19 Dec 2021 07:58:44 -0700 Subject: [PATCH 139/251] Revert "block: reduce kblockd_mod_delayed_work_on() CPU consumption" This reverts commit cb2ac2912a9ca7d3d26291c511939a41361d2d83. Alex and the kernel test robot report that this causes a significant performance regression with BFQ. I can reproduce that result, so let's revert this one as we're close to -rc6 and we there's no point in trying to rush a fix. Link: https://lore.kernel.org/linux-block/1639853092.524jxfaem2.none@localhost/ Link: https://lore.kernel.org/lkml/20211219141852.GH14057@xsang-OptiPlex-9020/ Reported-by: Alex Xu (Hello71) Reported-by: kernel test robot Signed-off-by: Jens Axboe --- block/blk-core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index c1833f95cb97..1378d084c770 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1484,8 +1484,6 @@ EXPORT_SYMBOL(kblockd_schedule_work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { - if (!delay) - return queue_work_on(cpu, kblockd_workqueue, &dwork->work); return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay); } EXPORT_SYMBOL(kblockd_mod_delayed_work_on); From 1aa2abb33a419090c7c87d4ae842a6347078ee12 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 16 Dec 2021 17:52:13 +0100 Subject: [PATCH 140/251] KVM: x86: Drop guest CPUID check for host initiated writes to MSR_IA32_PERF_CAPABILITIES The ability to write to MSR_IA32_PERF_CAPABILITIES from the host should not depend on guest visible CPUID entries, even if just to allow creating/restoring guest MSRs and CPUIDs in any sequence. Fixes: 27461da31089 ("KVM: x86/pmu: Support full width counting") Suggested-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Message-Id: <20211216165213.338923-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0cf1082455df..9a2972fdae82 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3413,7 +3413,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated) return 1; - if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) && kvm_get_msr_feature(&msr_ent)) + if (kvm_get_msr_feature(&msr_ent)) return 1; if (data & ~msr_ent.data) return 1; From 0b091a43d704997789c6d812b02167c8f5f9f061 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 16 Dec 2021 17:52:12 +0100 Subject: [PATCH 141/251] KVM: selftests: vmx_pmu_msrs_test: Drop tests mangling guest visible CPUIDs Host initiated writes to MSR_IA32_PERF_CAPABILITIES should not depend on guest visible CPUIDs and (incorrect) KVM logic implementing it is about to change. Also, KVM_SET_CPUID{,2} after KVM_RUN is now forbidden and causes test to fail. Reported-by: kernel test robot Fixes: feb627e8d6f6 ("KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN") Signed-off-by: Vitaly Kuznetsov Message-Id: <20211216165213.338923-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86_64/vmx_pmu_msrs_test.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c index 23051d84b907..2454a1f2ca0c 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c @@ -110,22 +110,5 @@ int main(int argc, char *argv[]) ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT); TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); - /* testcase 4, set capabilities when we don't have PDCM bit */ - entry_1_0->ecx &= ~X86_FEATURE_PDCM; - vcpu_set_cpuid(vm, VCPU_ID, cpuid); - ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); - TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); - - /* testcase 5, set capabilities when we don't have PMU version bits */ - entry_1_0->ecx |= X86_FEATURE_PDCM; - eax.split.version_id = 0; - entry_1_0->ecx = eax.full; - vcpu_set_cpuid(vm, VCPU_ID, cpuid); - ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES); - TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); - - vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0); - ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0); - kvm_vm_free(vm); } From 18c841e1f4112d3fb742aca3429e84117fcb1e1c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 9 Dec 2021 06:05:46 +0000 Subject: [PATCH 142/251] KVM: x86: Retry page fault if MMU reload is pending and root has no sp Play nice with a NULL shadow page when checking for an obsolete root in the page fault handler by flagging the page fault as stale if there's no shadow page associated with the root and KVM_REQ_MMU_RELOAD is pending. Invalidating memslots, which is the only case where _all_ roots need to be reloaded, requests all vCPUs to reload their MMUs while holding mmu_lock for lock. The "special" roots, e.g. pae_root when KVM uses PAE paging, are not backed by a shadow page. Running with TDP disabled or with nested NPT explodes spectaculary due to dereferencing a NULL shadow page pointer. Skip the KVM_REQ_MMU_RELOAD check if there is a valid shadow page for the root. Zapping shadow pages in response to guest activity, e.g. when the guest frees a PGD, can trigger KVM_REQ_MMU_RELOAD even if the current vCPU isn't using the affected root. I.e. KVM_REQ_MMU_RELOAD can be seen with a completely valid root shadow page. This is a bit of a moot point as KVM currently unloads all roots on KVM_REQ_MMU_RELOAD, but that will be cleaned up in the future. Fixes: a955cad84cda ("KVM: x86/mmu: Retry page fault if root is invalidated by memslot update") Cc: stable@vger.kernel.org Cc: Maxim Levitsky Signed-off-by: Sean Christopherson Message-Id: <20211209060552.2956723-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e2e1d012df22..fcdf3f8bb59a 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3987,7 +3987,21 @@ out_retry: static bool is_page_fault_stale(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int mmu_seq) { - if (is_obsolete_sp(vcpu->kvm, to_shadow_page(vcpu->arch.mmu->root_hpa))) + struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root_hpa); + + /* Special roots, e.g. pae_root, are not backed by shadow pages. */ + if (sp && is_obsolete_sp(vcpu->kvm, sp)) + return true; + + /* + * Roots without an associated shadow page are considered invalid if + * there is a pending request to free obsolete roots. The request is + * only a hint that the current root _may_ be obsolete and needs to be + * reloaded, e.g. if the guest frees a PGD that KVM is tracking as a + * previous root, then __kvm_mmu_prepare_zap_page() signals all vCPUs + * to reload even if no vCPU is actively using the root. + */ + if (!sp && kvm_test_request(KVM_REQ_MMU_RELOAD, vcpu)) return true; return fault->slot && From 57690554abe135fee81d6ac33cc94d75a7e224bb Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Thu, 16 Dec 2021 00:08:56 +0000 Subject: [PATCH 143/251] x86/pkey: Fix undefined behaviour with PKRU_WD_BIT Both __pkru_allows_write() and arch_set_user_pkey_access() shift PKRU_WD_BIT (a signed constant) by up to 30 bits, hitting the sign bit. Use unsigned constants instead. Clearly pkey 15 has not been used in combination with UBSAN yet. Noticed by code inspection only. I can't actually provoke the compiler into generating incorrect logic as far as this shift is concerned. [ dhansen: add stable@ tag, plus minor changelog massaging, For anyone doing backports, these #defines were in arch/x86/include/asm/pgtable.h before 784a46618f6. ] Fixes: 33a709b25a76 ("mm/gup, x86/mm/pkeys: Check VMAs and PTEs for protection keys") Signed-off-by: Andrew Cooper Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20211216000856.4480-1-andrew.cooper3@citrix.com --- arch/x86/include/asm/pkru.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h index 4cd49afa0ca4..74f0a2d34ffd 100644 --- a/arch/x86/include/asm/pkru.h +++ b/arch/x86/include/asm/pkru.h @@ -4,8 +4,8 @@ #include -#define PKRU_AD_BIT 0x1 -#define PKRU_WD_BIT 0x2 +#define PKRU_AD_BIT 0x1u +#define PKRU_WD_BIT 0x2u #define PKRU_BITS_PER_PKEY 2 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS From a7904a538933c525096ca2ccde1e60d0ee62c08e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Dec 2021 14:14:33 -0800 Subject: [PATCH 144/251] Linux 5.16-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 765115c99655..d85f1ff79f5c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Gobble Gobble # *DOCUMENTATION* From 4ebfee2bbc1a9c343dd50565ba5ae249fac32267 Mon Sep 17 00:00:00 2001 From: Johnny Chuang Date: Mon, 20 Dec 2021 00:28:45 -0800 Subject: [PATCH 145/251] Input: elants_i2c - do not check Remark ID on eKTH3900/eKTH5312 The eKTH3900/eKTH5312 series do not support the firmware update rules of Remark ID. Exclude these two series from checking it when updating the firmware in touch controllers. Signed-off-by: Johnny Chuang Link: https://lore.kernel.org/r/1639619603-20616-1-git-send-email-johnny.chuang.emc@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/elants_i2c.c | 46 +++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index 7e13a66a8a95..879a4d984c90 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -117,6 +117,19 @@ #define ELAN_POWERON_DELAY_USEC 500 #define ELAN_RESET_DELAY_MSEC 20 +/* FW boot code version */ +#define BC_VER_H_BYTE_FOR_EKTH3900x1_I2C 0x72 +#define BC_VER_H_BYTE_FOR_EKTH3900x2_I2C 0x82 +#define BC_VER_H_BYTE_FOR_EKTH3900x3_I2C 0x92 +#define BC_VER_H_BYTE_FOR_EKTH5312x1_I2C 0x6D +#define BC_VER_H_BYTE_FOR_EKTH5312x2_I2C 0x6E +#define BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C 0x77 +#define BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C 0x78 +#define BC_VER_H_BYTE_FOR_EKTH5312x1_I2C_USB 0x67 +#define BC_VER_H_BYTE_FOR_EKTH5312x2_I2C_USB 0x68 +#define BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C_USB 0x74 +#define BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C_USB 0x75 + enum elants_chip_id { EKTH3500, EKTF3624, @@ -736,6 +749,37 @@ static int elants_i2c_validate_remark_id(struct elants_data *ts, return 0; } +static bool elants_i2c_should_check_remark_id(struct elants_data *ts) +{ + struct i2c_client *client = ts->client; + const u8 bootcode_version = ts->iap_version; + bool check; + + /* I2C eKTH3900 and eKTH5312 are NOT support Remark ID */ + if ((bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x1_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x2_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x3_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x1_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x2_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x1_I2C_USB) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x2_I2C_USB) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C_USB) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C_USB)) { + dev_dbg(&client->dev, + "eKTH3900/eKTH5312(0x%02x) are not support remark id\n", + bootcode_version); + check = false; + } else if (bootcode_version >= 0x60) { + check = true; + } else { + check = false; + } + + return check; +} + static int elants_i2c_do_update_firmware(struct i2c_client *client, const struct firmware *fw, bool force) @@ -749,7 +793,7 @@ static int elants_i2c_do_update_firmware(struct i2c_client *client, u16 send_id; int page, n_fw_pages; int error; - bool check_remark_id = ts->iap_version >= 0x60; + bool check_remark_id = elants_i2c_should_check_remark_id(ts); /* Recovery mode detection! */ if (force) { From 9fb12fe5b93b94b9e607509ba461e17f4cc6a264 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 17 Dec 2021 07:49:34 -0500 Subject: [PATCH 146/251] KVM: x86: remove PMU FIXED_CTR3 from msrs_to_save_all The fixed counter 3 is used for the Topdown metrics, which hasn't been enabled for KVM guests. Userspace accessing to it will fail as it's not included in get_fixed_pmc(). This breaks KVM selftests on ICX+ machines, which have this counter. To reproduce it on ICX+ machines, ./state_test reports: ==== Test Assertion Failure ==== lib/x86_64/processor.c:1078: r == nmsrs pid=4564 tid=4564 - Argument list too long 1 0x000000000040b1b9: vcpu_save_state at processor.c:1077 2 0x0000000000402478: main at state_test.c:209 (discriminator 6) 3 0x00007fbe21ed5f92: ?? ??:0 4 0x000000000040264d: _start at ??:? Unexpected result from KVM_GET_MSRS, r: 17 (failed MSR was 0x30c) With this patch, it works well. Signed-off-by: Wei Wang Message-Id: <20211217124934.32893-1-wei.w.wang@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9a2972fdae82..d490b83d640c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1331,7 +1331,7 @@ static const u32 msrs_to_save_all[] = { MSR_IA32_UMWAIT_CONTROL, MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, - MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3, + MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL, MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, From 66c915d09b942fb3b2b0cb2f56562180901fba17 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 3 Dec 2021 15:15:54 +0100 Subject: [PATCH 147/251] mmc: core: Disable card detect during shutdown It's seems prone to problems by allowing card detect and its corresponding mmc_rescan() work to run, during platform shutdown. For example, we may end up turning off the power while initializing a card, which potentially could damage it. To avoid this scenario, let's add ->shutdown_pre() callback for the mmc host class device and then turn of the card detect from there. Reported-by: Al Cooper Suggested-by: Adrian Hunter Signed-off-by: Ulf Hansson Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211203141555.105351-1-ulf.hansson@linaro.org --- drivers/mmc/core/core.c | 7 ++++++- drivers/mmc/core/core.h | 1 + drivers/mmc/core/host.c | 9 +++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 240c5af793dc..368f10405e13 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2264,7 +2264,7 @@ void mmc_start_host(struct mmc_host *host) _mmc_detect_change(host, 0, false); } -void mmc_stop_host(struct mmc_host *host) +void __mmc_stop_host(struct mmc_host *host) { if (host->slot.cd_irq >= 0) { mmc_gpio_set_cd_wake(host, false); @@ -2273,6 +2273,11 @@ void mmc_stop_host(struct mmc_host *host) host->rescan_disable = 1; cancel_delayed_work_sync(&host->detect); +} + +void mmc_stop_host(struct mmc_host *host) +{ + __mmc_stop_host(host); /* clear pm flags now and let card drivers set them as needed */ host->pm_flags = 0; diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index 7931a4f0137d..f5f3f623ea49 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -70,6 +70,7 @@ static inline void mmc_delay(unsigned int ms) void mmc_rescan(struct work_struct *work); void mmc_start_host(struct mmc_host *host); +void __mmc_stop_host(struct mmc_host *host); void mmc_stop_host(struct mmc_host *host); void _mmc_detect_change(struct mmc_host *host, unsigned long delay, diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index d4683b1d263f..cf140f4ec864 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -80,9 +80,18 @@ static void mmc_host_classdev_release(struct device *dev) kfree(host); } +static int mmc_host_classdev_shutdown(struct device *dev) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + + __mmc_stop_host(host); + return 0; +} + static struct class mmc_host_class = { .name = "mmc_host", .dev_release = mmc_host_classdev_release, + .shutdown_pre = mmc_host_classdev_shutdown, .pm = MMC_HOST_CLASS_DEV_PM_OPS, }; From f89b548ca66be7500dcd92ee8e61590f7d08ac91 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 19 Dec 2021 16:34:41 +0100 Subject: [PATCH 148/251] mmc: meson-mx-sdhc: Set MANUAL_STOP for multi-block SDIO commands The vendor driver implements special handling for multi-block SD_IO_RW_EXTENDED (and SD_IO_RW_DIRECT) commands which have data attached to them. It sets the MANUAL_STOP bit in the MESON_SDHC_MISC register for these commands. In all other cases this bit is cleared. Here we omit SD_IO_RW_DIRECT since that command never has any data attached to it. This fixes SDIO wifi using the brcmfmac driver which reported the following error without this change on a Netxeon S82 board using a Meson8 (S802) SoC: brcmf_fw_alloc_request: using brcm/brcmfmac43362-sdio for chip BCM43362/1 brcmf_sdiod_ramrw: membytes transfer failed brcmf_sdio_download_code_file: error -110 on writing 219557 membytes at 0x00000000 brcmf_sdio_download_firmware: dongle image file download failed And with this change: brcmf_fw_alloc_request: using brcm/brcmfmac43362-sdio for chip BCM43362/1 brcmf_c_process_clm_blob: no clm_blob available (err=-2), device may have limited channels available brcmf_c_preinit_dcmds: Firmware: BCM43362/1 wl0: Apr 22 2013 14:50:00 version 5.90.195.89.6 FWID 01-b30a427d Fixes: e4bf1b0970ef96 ("mmc: host: meson-mx-sdhc: new driver for the Amlogic Meson SDHC host") Signed-off-by: Martin Blumenstingl Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211219153442.463863-2-martin.blumenstingl@googlemail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-mx-sdhc-mmc.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/mmc/host/meson-mx-sdhc-mmc.c b/drivers/mmc/host/meson-mx-sdhc-mmc.c index 7cd9c0ec2fcf..8fdd0bbbfa21 100644 --- a/drivers/mmc/host/meson-mx-sdhc-mmc.c +++ b/drivers/mmc/host/meson-mx-sdhc-mmc.c @@ -135,6 +135,7 @@ static void meson_mx_sdhc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) { struct meson_mx_sdhc_host *host = mmc_priv(mmc); + bool manual_stop = false; u32 ictl, send; int pack_len; @@ -172,12 +173,27 @@ static void meson_mx_sdhc_start_cmd(struct mmc_host *mmc, else /* software flush: */ ictl |= MESON_SDHC_ICTL_DATA_XFER_OK; + + /* + * Mimic the logic from the vendor driver where (only) + * SD_IO_RW_EXTENDED commands with more than one block set the + * MESON_SDHC_MISC_MANUAL_STOP bit. This fixes the firmware + * download in the brcmfmac driver for a BCM43362/1 card. + * Without this sdio_memcpy_toio() (with a size of 219557 + * bytes) times out if MESON_SDHC_MISC_MANUAL_STOP is not set. + */ + manual_stop = cmd->data->blocks > 1 && + cmd->opcode == SD_IO_RW_EXTENDED; } else { pack_len = 0; ictl |= MESON_SDHC_ICTL_RESP_OK; } + regmap_update_bits(host->regmap, MESON_SDHC_MISC, + MESON_SDHC_MISC_MANUAL_STOP, + manual_stop ? MESON_SDHC_MISC_MANUAL_STOP : 0); + if (cmd->opcode == MMC_STOP_TRANSMISSION) send |= MESON_SDHC_SEND_DATA_STOP; From 93a2207c254ca102ebbdae47b00f19bbfbfa7ecd Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 20 Dec 2021 10:51:20 +0100 Subject: [PATCH 149/251] HID: holtek: fix mouse probing An overlook from the previous commit: we don't even parse or start the device, meaning that the device is not presented to user space. Fixes: 93020953d0fa ("HID: check for valid USB device for many HID drivers") Cc: stable@vger.kernel.org Link: https://bugs.archlinux.org/task/73048 Link: https://bugzilla.kernel.org/show_bug.cgi?id=215341 Link: https://lore.kernel.org/r/e4efbf13-bd8d-0370-629b-6c80c0044b15@leemhuis.info/ Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-holtek-mouse.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/hid/hid-holtek-mouse.c b/drivers/hid/hid-holtek-mouse.c index b7172c48ef9f..7c907939bfae 100644 --- a/drivers/hid/hid-holtek-mouse.c +++ b/drivers/hid/hid-holtek-mouse.c @@ -65,8 +65,23 @@ static __u8 *holtek_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, static int holtek_mouse_probe(struct hid_device *hdev, const struct hid_device_id *id) { + int ret; + if (!hid_is_usb(hdev)) return -EINVAL; + + ret = hid_parse(hdev); + if (ret) { + hid_err(hdev, "hid parse failed: %d\n", ret); + return ret; + } + + ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); + if (ret) { + hid_err(hdev, "hw start failed: %d\n", ret); + return ret; + } + return 0; } From 13251ce1dd9bb525da2becb9b26fdfb94ca58659 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 15 Dec 2021 16:36:05 +0800 Subject: [PATCH 150/251] HID: potential dereference of null pointer The return value of devm_kzalloc() needs to be checked. To avoid hdev->dev->driver_data to be null in case of the failure of alloc. Fixes: 14c9c014babe ("HID: add vivaldi HID driver") Cc: stable@vger.kernel.org Signed-off-by: Jiasheng Jiang Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211215083605.117638-1-jiasheng@iscas.ac.cn --- drivers/hid/hid-vivaldi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-vivaldi.c b/drivers/hid/hid-vivaldi.c index cd7ada48b1d9..72957a9f7117 100644 --- a/drivers/hid/hid-vivaldi.c +++ b/drivers/hid/hid-vivaldi.c @@ -57,6 +57,9 @@ static int vivaldi_probe(struct hid_device *hdev, int ret; drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + hid_set_drvdata(hdev, drvdata); ret = hid_parse(hdev); From 64d16aca3d4f130f35bbf1120e15f58a62f743d5 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 14 Dec 2021 09:04:54 -0800 Subject: [PATCH 151/251] drm/i915/guc: Use correct context lock when callig clr_context_registered s/ce/cn/ when grabbing guc_state.lock before calling clr_context_registered. Fixes: 0f7976506de61 ("drm/i915/guc: Rework and simplify locking") Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20211214170500.28569-2-matthew.brost@intel.com (cherry picked from commit b25db8c782ad7ae80d4cea2a09c222f4f8980bb9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index c48557dfa04c..c50039a5ba1e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1664,9 +1664,9 @@ static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) list_del_init(&cn->guc_id.link); ce->guc_id = cn->guc_id; - spin_lock(&ce->guc_state.lock); + spin_lock(&cn->guc_state.lock); clr_context_registered(cn); - spin_unlock(&ce->guc_state.lock); + spin_unlock(&cn->guc_state.lock); set_context_guc_id_invalid(cn); From 7807bf28fe02a76bf112916c6b9194f282f5e43c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 14 Dec 2021 09:04:55 -0800 Subject: [PATCH 152/251] drm/i915/guc: Only assign guc_id.id when stealing guc_id Previously assigned whole guc_id structure (list, spin lock) which is incorrect, only assign the guc_id.id. Fixes: 0f7976506de61 ("drm/i915/guc: Rework and simplify locking") Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20211214170500.28569-3-matthew.brost@intel.com (cherry picked from commit 939d8e9c87e704fd5437e2c8b80929591fe540eb) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index c50039a5ba1e..302e9ff0602c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1662,7 +1662,7 @@ static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_parent(cn)); list_del_init(&cn->guc_id.link); - ce->guc_id = cn->guc_id; + ce->guc_id.id = cn->guc_id.id; spin_lock(&cn->guc_state.lock); clr_context_registered(cn); From 3a0f64de479cae75effb630a2e0a237ca0d0623c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 14 Dec 2021 03:35:28 +0000 Subject: [PATCH 153/251] KVM: x86/mmu: Don't advance iterator after restart due to yielding After dropping mmu_lock in the TDP MMU, restart the iterator during tdp_iter_next() and do not advance the iterator. Advancing the iterator results in skipping the top-level SPTE and all its children, which is fatal if any of the skipped SPTEs were not visited before yielding. When zapping all SPTEs, i.e. when min_level == root_level, restarting the iter and then invoking tdp_iter_next() is always fatal if the current gfn has as a valid SPTE, as advancing the iterator results in try_step_side() skipping the current gfn, which wasn't visited before yielding. Sprinkle WARNs on iter->yielded being true in various helpers that are often used in conjunction with yielding, and tag the helper with __must_check to reduce the probabily of improper usage. Failing to zap a top-level SPTE manifests in one of two ways. If a valid SPTE is skipped by both kvm_tdp_mmu_zap_all() and kvm_tdp_mmu_put_root(), the shadow page will be leaked and KVM will WARN accordingly. WARNING: CPU: 1 PID: 3509 at arch/x86/kvm/mmu/tdp_mmu.c:46 [kvm] RIP: 0010:kvm_mmu_uninit_tdp_mmu+0x3e/0x50 [kvm] Call Trace: kvm_arch_destroy_vm+0x130/0x1b0 [kvm] kvm_destroy_vm+0x162/0x2a0 [kvm] kvm_vcpu_release+0x34/0x60 [kvm] __fput+0x82/0x240 task_work_run+0x5c/0x90 do_exit+0x364/0xa10 ? futex_unqueue+0x38/0x60 do_group_exit+0x33/0xa0 get_signal+0x155/0x850 arch_do_signal_or_restart+0xed/0x750 exit_to_user_mode_prepare+0xc5/0x120 syscall_exit_to_user_mode+0x1d/0x40 do_syscall_64+0x48/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae If kvm_tdp_mmu_zap_all() skips a gfn/SPTE but that SPTE is then zapped by kvm_tdp_mmu_put_root(), KVM triggers a use-after-free in the form of marking a struct page as dirty/accessed after it has been put back on the free list. This directly triggers a WARN due to encountering a page with page_count() == 0, but it can also lead to data corruption and additional errors in the kernel. WARNING: CPU: 7 PID: 1995658 at arch/x86/kvm/../../../virt/kvm/kvm_main.c:171 RIP: 0010:kvm_is_zone_device_pfn.part.0+0x9e/0xd0 [kvm] Call Trace: kvm_set_pfn_dirty+0x120/0x1d0 [kvm] __handle_changed_spte+0x92e/0xca0 [kvm] __handle_changed_spte+0x63c/0xca0 [kvm] __handle_changed_spte+0x63c/0xca0 [kvm] __handle_changed_spte+0x63c/0xca0 [kvm] zap_gfn_range+0x549/0x620 [kvm] kvm_tdp_mmu_put_root+0x1b6/0x270 [kvm] mmu_free_root_page+0x219/0x2c0 [kvm] kvm_mmu_free_roots+0x1b4/0x4e0 [kvm] kvm_mmu_unload+0x1c/0xa0 [kvm] kvm_arch_destroy_vm+0x1f2/0x5c0 [kvm] kvm_put_kvm+0x3b1/0x8b0 [kvm] kvm_vcpu_release+0x4e/0x70 [kvm] __fput+0x1f7/0x8c0 task_work_run+0xf8/0x1a0 do_exit+0x97b/0x2230 do_group_exit+0xda/0x2a0 get_signal+0x3be/0x1e50 arch_do_signal_or_restart+0x244/0x17f0 exit_to_user_mode_prepare+0xcb/0x120 syscall_exit_to_user_mode+0x1d/0x40 do_syscall_64+0x4d/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Note, the underlying bug existed even before commit 1af4a96025b3 ("KVM: x86/mmu: Yield in TDU MMU iter even if no SPTES changed") moved calls to tdp_mmu_iter_cond_resched() to the beginning of loops, as KVM could still incorrectly advance past a top-level entry when yielding on a lower-level entry. But with respect to leaking shadow pages, the bug was introduced by yielding before processing the current gfn. Alternatively, tdp_mmu_iter_cond_resched() could simply fall through, or callers could jump to their "retry" label. The downside of that approach is that tdp_mmu_iter_cond_resched() _must_ be called before anything else in the loop, and there's no easy way to enfornce that requirement. Ideally, KVM would handling the cond_resched() fully within the iterator macro (the code is actually quite clean) and avoid this entire class of bugs, but that is extremely difficult do while also supporting yielding after tdp_mmu_set_spte_atomic() fails. Yielding after failing to set a SPTE is very desirable as the "owner" of the REMOVED_SPTE isn't strictly bounded, e.g. if it's zapping a high-level shadow page, the REMOVED_SPTE may block operations on the SPTE for a significant amount of time. Fixes: faaf05b00aec ("kvm: x86/mmu: Support zapping SPTEs in the TDP MMU") Fixes: 1af4a96025b3 ("KVM: x86/mmu: Yield in TDU MMU iter even if no SPTES changed") Reported-by: Ignat Korchagin Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20211214033528.123268-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_iter.c | 6 ++++++ arch/x86/kvm/mmu/tdp_iter.h | 6 ++++++ arch/x86/kvm/mmu/tdp_mmu.c | 29 ++++++++++++++++------------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c index b3ed302c1a35..caa96c270b95 100644 --- a/arch/x86/kvm/mmu/tdp_iter.c +++ b/arch/x86/kvm/mmu/tdp_iter.c @@ -26,6 +26,7 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level) */ void tdp_iter_restart(struct tdp_iter *iter) { + iter->yielded = false; iter->yielded_gfn = iter->next_last_level_gfn; iter->level = iter->root_level; @@ -160,6 +161,11 @@ static bool try_step_up(struct tdp_iter *iter) */ void tdp_iter_next(struct tdp_iter *iter) { + if (iter->yielded) { + tdp_iter_restart(iter); + return; + } + if (try_step_down(iter)) return; diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h index b1748b988d3a..e19cabbcb65c 100644 --- a/arch/x86/kvm/mmu/tdp_iter.h +++ b/arch/x86/kvm/mmu/tdp_iter.h @@ -45,6 +45,12 @@ struct tdp_iter { * iterator walks off the end of the paging structure. */ bool valid; + /* + * True if KVM dropped mmu_lock and yielded in the middle of a walk, in + * which case tdp_iter_next() needs to restart the walk at the root + * level instead of advancing to the next entry. + */ + bool yielded; }; /* diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 1db8496259ad..1beb4ca90560 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -502,6 +502,8 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte) { + WARN_ON_ONCE(iter->yielded); + lockdep_assert_held_read(&kvm->mmu_lock); /* @@ -575,6 +577,8 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte, bool record_acc_track, bool record_dirty_log) { + WARN_ON_ONCE(iter->yielded); + lockdep_assert_held_write(&kvm->mmu_lock); /* @@ -640,18 +644,19 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm, * If this function should yield and flush is set, it will perform a remote * TLB flush before yielding. * - * If this function yields, it will also reset the tdp_iter's walk over the - * paging structure and the calling function should skip to the next - * iteration to allow the iterator to continue its traversal from the - * paging structure root. + * If this function yields, iter->yielded is set and the caller must skip to + * the next iteration, where tdp_iter_next() will reset the tdp_iter's walk + * over the paging structures to allow the iterator to continue its traversal + * from the paging structure root. * - * Return true if this function yielded and the iterator's traversal was reset. - * Return false if a yield was not needed. + * Returns true if this function yielded. */ -static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm, - struct tdp_iter *iter, bool flush, - bool shared) +static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm, + struct tdp_iter *iter, + bool flush, bool shared) { + WARN_ON(iter->yielded); + /* Ensure forward progress has been made before yielding. */ if (iter->next_last_level_gfn == iter->yielded_gfn) return false; @@ -671,12 +676,10 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm, WARN_ON(iter->gfn > iter->next_last_level_gfn); - tdp_iter_restart(iter); - - return true; + iter->yielded = true; } - return false; + return iter->yielded; } /* From c5063551bfcae4e48fec890b7bf369598b77526b Mon Sep 17 00:00:00 2001 From: Marc Orr Date: Thu, 9 Dec 2021 07:52:57 -0800 Subject: [PATCH 154/251] KVM: x86: Always set kvm_run->if_flag The kvm_run struct's if_flag is a part of the userspace/kernel API. The SEV-ES patches failed to set this flag because it's no longer needed by QEMU (according to the comment in the source code). However, other hypervisors may make use of this flag. Therefore, set the flag for guests with encrypted registers (i.e., with guest_state_protected set). Fixes: f1c6366e3043 ("KVM: SVM: Add required changes to support intercepts under SEV-ES") Signed-off-by: Marc Orr Message-Id: <20211209155257.128747-1-marcorr@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Reviewed-by: Maxim Levitsky --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/svm.c | 21 ++++++++++++--------- arch/x86/kvm/vmx/vmx.c | 6 ++++++ arch/x86/kvm/x86.c | 9 +-------- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index cefe1d81e2e8..9e50da3ed01a 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -47,6 +47,7 @@ KVM_X86_OP(set_dr7) KVM_X86_OP(cache_reg) KVM_X86_OP(get_rflags) KVM_X86_OP(set_rflags) +KVM_X86_OP(get_if_flag) KVM_X86_OP(tlb_flush_all) KVM_X86_OP(tlb_flush_current) KVM_X86_OP_NULL(tlb_remote_flush) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2164b9f4c7b0..555f4de47ef2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1349,6 +1349,7 @@ struct kvm_x86_ops { void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); + bool (*get_if_flag)(struct kvm_vcpu *vcpu); void (*tlb_flush_all)(struct kvm_vcpu *vcpu); void (*tlb_flush_current)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d0f68d11ec70..5151efa424ac 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1585,6 +1585,15 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_svm(vcpu)->vmcb->save.rflags = rflags; } +static bool svm_get_if_flag(struct kvm_vcpu *vcpu) +{ + struct vmcb *vmcb = to_svm(vcpu)->vmcb; + + return sev_es_guest(vcpu->kvm) + ? vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK + : kvm_get_rflags(vcpu) & X86_EFLAGS_IF; +} + static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { switch (reg) { @@ -3568,14 +3577,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (!gif_set(svm)) return true; - if (sev_es_guest(vcpu->kvm)) { - /* - * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask - * bit to determine the state of the IF flag. - */ - if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK)) - return true; - } else if (is_guest_mode(vcpu)) { + if (is_guest_mode(vcpu)) { /* As long as interrupts are being delivered... */ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) ? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF) @@ -3586,7 +3588,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (nested_exit_on_intr(svm)) return false; } else { - if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF)) + if (!svm_get_if_flag(vcpu)) return true; } @@ -4621,6 +4623,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, + .get_if_flag = svm_get_if_flag, .tlb_flush_all = svm_flush_tlb, .tlb_flush_current = svm_flush_tlb, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5aadad3e7367..9f7604cbba41 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1363,6 +1363,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) vmx->emulation_required = vmx_emulation_required(vcpu); } +static bool vmx_get_if_flag(struct kvm_vcpu *vcpu) +{ + return vmx_get_rflags(vcpu) & X86_EFLAGS_IF; +} + u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) { u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); @@ -7579,6 +7584,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, + .get_if_flag = vmx_get_if_flag, .tlb_flush_all = vmx_flush_tlb_all, .tlb_flush_current = vmx_flush_tlb_current, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d490b83d640c..e50e97ac4408 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9001,14 +9001,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; - /* - * if_flag is obsolete and useless, so do not bother - * setting it for SEV-ES guests. Userspace can just - * use kvm_run->ready_for_interrupt_injection. - */ - kvm_run->if_flag = !vcpu->arch.guest_state_protected - && (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; - + kvm_run->if_flag = static_call(kvm_x86_get_if_flag)(vcpu); kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); From 577e022b7b41854911dcfb03678d8d2b930e8a3f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 14 Dec 2021 16:18:42 +0100 Subject: [PATCH 155/251] selftests: KVM: Fix non-x86 compiling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Attempting to compile on a non-x86 architecture fails with include/kvm_util.h: In function ‘vm_compute_max_gfn’: include/kvm_util.h:79:21: error: dereferencing pointer to incomplete type ‘struct kvm_vm’ return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; ^~ This is because the declaration of struct kvm_vm is in lib/kvm_util_internal.h as an effort to make it private to the test lib code. We can still provide arch specific functions, though, by making the generic function symbols weak. Do that to fix the compile error. Fixes: c8cc43c1eae2 ("selftests: KVM: avoid failures due to reserved HyperTransport region") Cc: stable@vger.kernel.org Signed-off-by: Andrew Jones Message-Id: <20211214151842.848314-1-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util.h | 10 +--------- tools/testing/selftests/kvm/lib/kvm_util.c | 5 +++++ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index da2b702da71a..2d62edc49d67 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -71,15 +71,6 @@ enum vm_guest_mode { #endif -#if defined(__x86_64__) -unsigned long vm_compute_max_gfn(struct kvm_vm *vm); -#else -static inline unsigned long vm_compute_max_gfn(struct kvm_vm *vm) -{ - return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; -} -#endif - #define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) #define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) @@ -330,6 +321,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm); unsigned int vm_get_page_size(struct kvm_vm *vm); unsigned int vm_get_page_shift(struct kvm_vm *vm); +unsigned long vm_compute_max_gfn(struct kvm_vm *vm); uint64_t vm_get_max_gfn(struct kvm_vm *vm); int vm_get_fd(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index daf6fdb217a7..53d2b5d04b82 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -2328,6 +2328,11 @@ unsigned int vm_get_page_shift(struct kvm_vm *vm) return vm->page_shift; } +unsigned long __attribute__((weak)) vm_compute_max_gfn(struct kvm_vm *vm) +{ + return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; +} + uint64_t vm_get_max_gfn(struct kvm_vm *vm) { return vm->max_gfn; From a80dfc025924024d2c61a4c1b8ef62b2fce76a04 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Dec 2021 19:30:03 +0000 Subject: [PATCH 156/251] KVM: VMX: Always clear vmx->fail on emulation_required Revert a relatively recent change that set vmx->fail if the vCPU is in L2 and emulation_required is true, as that behavior is completely bogus. Setting vmx->fail and synthesizing a VM-Exit is contradictory and wrong: (a) it's impossible to have both a VM-Fail and VM-Exit (b) vmcs.EXIT_REASON is not modified on VM-Fail (c) emulation_required refers to guest state and guest state checks are always VM-Exits, not VM-Fails. For KVM specifically, emulation_required is handled before nested exits in __vmx_handle_exit(), thus setting vmx->fail has no immediate effect, i.e. KVM calls into handle_invalid_guest_state() and vmx->fail is ignored. Setting vmx->fail can ultimately result in a WARN in nested_vmx_vmexit() firing when tearing down the VM as KVM never expects vmx->fail to be set when L2 is active, KVM always reflects those errors into L1. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 21158 at arch/x86/kvm/vmx/nested.c:4548 nested_vmx_vmexit+0x16bd/0x17e0 arch/x86/kvm/vmx/nested.c:4547 Modules linked in: CPU: 0 PID: 21158 Comm: syz-executor.1 Not tainted 5.16.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:nested_vmx_vmexit+0x16bd/0x17e0 arch/x86/kvm/vmx/nested.c:4547 Code: <0f> 0b e9 2e f8 ff ff e8 57 b3 5d 00 0f 0b e9 00 f1 ff ff 89 e9 80 Call Trace: vmx_leave_nested arch/x86/kvm/vmx/nested.c:6220 [inline] nested_vmx_free_vcpu+0x83/0xc0 arch/x86/kvm/vmx/nested.c:330 vmx_free_vcpu+0x11f/0x2a0 arch/x86/kvm/vmx/vmx.c:6799 kvm_arch_vcpu_destroy+0x6b/0x240 arch/x86/kvm/x86.c:10989 kvm_vcpu_destroy+0x29/0x90 arch/x86/kvm/../../../virt/kvm/kvm_main.c:441 kvm_free_vcpus arch/x86/kvm/x86.c:11426 [inline] kvm_arch_destroy_vm+0x3ef/0x6b0 arch/x86/kvm/x86.c:11545 kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1189 [inline] kvm_put_kvm+0x751/0xe40 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1220 kvm_vcpu_release+0x53/0x60 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3489 __fput+0x3fc/0x870 fs/file_table.c:280 task_work_run+0x146/0x1c0 kernel/task_work.c:164 exit_task_work include/linux/task_work.h:32 [inline] do_exit+0x705/0x24f0 kernel/exit.c:832 do_group_exit+0x168/0x2d0 kernel/exit.c:929 get_signal+0x1740/0x2120 kernel/signal.c:2852 arch_do_signal_or_restart+0x9c/0x730 arch/x86/kernel/signal.c:868 handle_signal_work kernel/entry/common.c:148 [inline] exit_to_user_mode_loop kernel/entry/common.c:172 [inline] exit_to_user_mode_prepare+0x191/0x220 kernel/entry/common.c:207 __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline] syscall_exit_to_user_mode+0x2e/0x70 kernel/entry/common.c:300 do_syscall_64+0x53/0xd0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: c8607e4a086f ("KVM: x86: nVMX: don't fail nested VM entry on invalid guest state if !from_vmentry") Reported-by: syzbot+f1d2136db9c80d4733e8@syzkaller.appspotmail.com Reviewed-by: Maxim Levitsky Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20211207193006.120997-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9f7604cbba41..5d4d74dd76f5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6613,9 +6613,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) * consistency check VM-Exit due to invalid guest state and bail. */ if (unlikely(vmx->emulation_required)) { - - /* We don't emulate invalid state of a nested guest */ - vmx->fail = is_guest_mode(vcpu); + vmx->fail = 0; vmx->exit_reason.full = EXIT_REASON_INVALID_STATE; vmx->exit_reason.failed_vmentry = 1; From cd0e615c49e5e5d69885af9ac3b4fa7bb3387f58 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Dec 2021 19:30:04 +0000 Subject: [PATCH 157/251] KVM: nVMX: Synthesize TRIPLE_FAULT for L2 if emulation is required MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Synthesize a triple fault if L2 guest state is invalid at the time of VM-Enter, which can happen if L1 modifies SMRAM or if userspace stuffs guest state via ioctls(), e.g. KVM_SET_SREGS. KVM should never emulate invalid guest state, since from L1's perspective, it's architecturally impossible for L2 to have invalid state while L2 is running in hardware. E.g. attempts to set CR0 or CR4 to unsupported values will either VM-Exit or #GP. Modifying vCPU state via RSM+SMRAM and ioctl() are the only paths that can trigger this scenario, as nested VM-Enter correctly rejects any attempt to enter L2 with invalid state. RSM is a straightforward case as (a) KVM follows AMD's SMRAM layout and behavior, and (b) Intel's SDM states that loading reserved CR0/CR4 bits via RSM results in shutdown, i.e. there is precedent for KVM's behavior. Following AMD's SMRAM layout is important as AMD's layout saves/restores the descriptor cache information, including CS.RPL and SS.RPL, and also defines all the fields relevant to invalid guest state as read-only, i.e. so long as the vCPU had valid state before the SMI, which is guaranteed for L2, RSM will generate valid state unless SMRAM was modified. Intel's layout saves/restores only the selector, which means that scenarios where the selector and cached RPL don't match, e.g. conforming code segments, would yield invalid guest state. Intel CPUs fudge around this issued by stuffing SS.RPL and CS.RPL on RSM. Per Intel's SDM on the "Default Treatment of RSM", paraphrasing for brevity: IF internal storage indicates that the [CPU was post-VMXON] THEN enter VMX operation (root or non-root); restore VMX-critical state as defined in Section 34.14.1; set to their fixed values any bits in CR0 and CR4 whose values must be fixed in VMX operation [unless coming from an unrestricted guest]; IF RFLAGS.VM = 0 AND (in VMX root operation OR the “unrestricted guest” VM-execution control is 0) THEN CS.RPL := SS.DPL; SS.RPL := SS.DPL; FI; restore current VMCS pointer; FI; Note that Intel CPUs also overwrite the fixed CR0/CR4 bits, whereas KVM will sythesize TRIPLE_FAULT in this scenario. KVM's behavior is allowed as both Intel and AMD define CR0/CR4 SMRAM fields as read-only, i.e. the only way for CR0 and/or CR4 to have illegal values is if they were modified by the L1 SMM handler, and Intel's SDM "SMRAM State Save Map" section states "modifying these registers will result in unpredictable behavior". KVM's ioctl() behavior is less straightforward. Because KVM allows ioctls() to be executed in any order, rejecting an ioctl() if it would result in invalid L2 guest state is not an option as KVM cannot know if a future ioctl() would resolve the invalid state, e.g. KVM_SET_SREGS, or drop the vCPU out of L2, e.g. KVM_SET_NESTED_STATE. Ideally, KVM would reject KVM_RUN if L2 contained invalid guest state, but that carries the risk of a false positive, e.g. if RSM loaded invalid guest state and KVM exited to userspace. Setting a flag/request to detect such a scenario is undesirable because (a) it's extremely unlikely to add value to KVM as a whole, and (b) KVM would need to consider ioctl() interactions with such a flag, e.g. if userspace migrated the vCPU while the flag were set. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20211207193006.120997-3-seanjc@google.com> Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5d4d74dd76f5..5974a88c9d35 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5882,18 +5882,14 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) vmx_flush_pml_buffer(vcpu); /* - * We should never reach this point with a pending nested VM-Enter, and - * more specifically emulation of L2 due to invalid guest state (see - * below) should never happen as that means we incorrectly allowed a - * nested VM-Enter with an invalid vmcs12. + * KVM should never reach this point with a pending nested VM-Enter. + * More specifically, short-circuiting VM-Entry to emulate L2 due to + * invalid guest state should never happen as that means KVM knowingly + * allowed a nested VM-Enter with an invalid vmcs12. More below. */ if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm)) return -EIO; - /* If guest state is invalid, start emulating */ - if (vmx->emulation_required) - return handle_invalid_guest_state(vcpu); - if (is_guest_mode(vcpu)) { /* * PML is never enabled when running L2, bail immediately if a @@ -5915,10 +5911,30 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) */ nested_mark_vmcs12_pages_dirty(vcpu); + /* + * Synthesize a triple fault if L2 state is invalid. In normal + * operation, nested VM-Enter rejects any attempt to enter L2 + * with invalid state. However, those checks are skipped if + * state is being stuffed via RSM or KVM_SET_NESTED_STATE. If + * L2 state is invalid, it means either L1 modified SMRAM state + * or userspace provided bad state. Synthesize TRIPLE_FAULT as + * doing so is architecturally allowed in the RSM case, and is + * the least awful solution for the userspace case without + * risking false positives. + */ + if (vmx->emulation_required) { + nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0); + return 1; + } + if (nested_vmx_reflect_vmexit(vcpu)) return 1; } + /* If guest state is invalid, start emulating. L2 is handled above. */ + if (vmx->emulation_required) + return handle_invalid_guest_state(vcpu); + if (exit_reason.failed_vmentry) { dump_vmcs(vcpu); vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; From 0ff29701ffad9a5d5a24344d8b09f3af7b96ffda Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Dec 2021 19:30:05 +0000 Subject: [PATCH 158/251] KVM: VMX: Fix stale docs for kvm-intel.emulate_invalid_guest_state Update the documentation for kvm-intel's emulate_invalid_guest_state to rectify the description of KVM's default behavior, and to document that the behavior and thus parameter only applies to L1. Fixes: a27685c33acc ("KVM: VMX: Emulate invalid guest state by default") Signed-off-by: Sean Christopherson Message-Id: <20211207193006.120997-4-seanjc@google.com> Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- Documentation/admin-guide/kernel-parameters.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9725c546a0d4..fc34332c8d9a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2413,8 +2413,12 @@ Default is 1 (enabled) kvm-intel.emulate_invalid_guest_state= - [KVM,Intel] Enable emulation of invalid guest states - Default is 0 (disabled) + [KVM,Intel] Disable emulation of invalid guest state. + Ignored if kvm-intel.enable_unrestricted_guest=1, as + guest state is never invalid for unrestricted guests. + This param doesn't apply to nested guests (L2), as KVM + never emulates invalid L2 guest state. + Default is 1 (enabled) kvm-intel.flexpriority= [KVM,Intel] Disable FlexPriority feature (TPR shadow). From ab1ef34416a65ba11f66ae6435fcf0251cb46fd4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Dec 2021 19:30:06 +0000 Subject: [PATCH 159/251] KVM: selftests: Add test to verify TRIPLE_FAULT on invalid L2 guest state Add a selftest to attempt to enter L2 with invalid guests state by exiting to userspace via I/O from L2, and then using KVM_SET_SREGS to set invalid guest state (marking TR unusable is arbitrary chosen for its relative simplicity). This is a regression test for a bug introduced by commit c8607e4a086f ("KVM: x86: nVMX: don't fail nested VM entry on invalid guest state if !from_vmentry"), which incorrectly set vmx->fail=true when L2 had invalid guest state and ultimately triggered a WARN due to nested_vmx_vmexit() seeing vmx->fail==true while attempting to synthesize a nested VM-Exit. The is also a functional test to verify that KVM sythesizes TRIPLE_FAULT for L2, which is somewhat arbitrary behavior, instead of emulating L2. KVM should never emulate L2 due to invalid guest state, as it's architecturally impossible for L1 to run an L2 guest with invalid state as nested VM-Enter should always fail, i.e. L1 needs to do the emulation. Stuffing state via KVM ioctl() is a non-architctural, out-of-band case, hence the TRIPLE_FAULT being rather arbitrary. Signed-off-by: Sean Christopherson Message-Id: <20211207193006.120997-5-seanjc@google.com> Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../x86_64/vmx_invalid_nested_guest_state.c | 105 ++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 00814c0f87a6..3cb5ac5da087 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -35,6 +35,7 @@ /x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test +/x86_64/vmx_invalid_nested_guest_state /x86_64/vmx_preemption_timer_test /x86_64/vmx_set_nested_state_test /x86_64/vmx_tsc_adjust_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index f307c9f61981..17342b575e85 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -64,6 +64,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c new file mode 100644 index 000000000000..489fbed4ca6f --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include +#include + +#include "kselftest.h" + +#define VCPU_ID 0 +#define ARBITRARY_IO_PORT 0x2000 + +static struct kvm_vm *vm; + +static void l2_guest_code(void) +{ + /* + * Generate an exit to L0 userspace, i.e. main(), via I/O to an + * arbitrary port. + */ + asm volatile("inb %%dx, %%al" + : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * L2 must be run without unrestricted guest, verify that the selftests + * library hasn't enabled it. Because KVM selftests jump directly to + * 64-bit mode, unrestricted guest support isn't required. + */ + GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) || + !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST)); + + GUEST_ASSERT(!vmlaunch()); + + /* L2 should triple fault after main() stuffs invalid guest state. */ + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva; + struct kvm_sregs sregs; + struct kvm_run *run; + struct ucall uc; + + nested_vmx_check_supported(); + + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + + /* Allocate VMX pages and shared descriptors (vmx_pages). */ + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + + vcpu_run(vm, VCPU_ID); + + run = vcpu_state(vm, VCPU_ID); + + /* + * The first exit to L0 userspace should be an I/O access from L2. + * Running L1 should launch L2 without triggering an exit to userspace. + */ + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Expected KVM_EXIT_IO, got: %u (%s)\n", + run->exit_reason, exit_reason_str(run->exit_reason)); + + TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT, + "Expected IN from port %d from L2, got port %d", + ARBITRARY_IO_PORT, run->io.port); + + /* + * Stuff invalid guest state for L2 by making TR unusuable. The next + * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support + * emulating invalid guest state for L2. + */ + memset(&sregs, 0, sizeof(sregs)); + vcpu_sregs_get(vm, VCPU_ID, &sregs); + sregs.tr.unusable = 1; + vcpu_sregs_set(vm, VCPU_ID, &sregs); + + vcpu_run(vm, VCPU_ID); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + TEST_FAIL("%s", (const char *)uc.args[0]); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} From 484730e5862f6b872dca13840bed40fd7c60fa26 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 8 Dec 2021 11:06:52 +0100 Subject: [PATCH 160/251] parisc: Clear stale IIR value on instruction access rights trap When a trap 7 (Instruction access rights) occurs, this means the CPU couldn't execute an instruction due to missing execute permissions on the memory region. In this case it seems the CPU didn't even fetched the instruction from memory and thus did not store it in the cr19 (IIR) register before calling the trap handler. So, the trap handler will find some random old stale value in cr19. This patch simply overwrites the stale IIR value with a constant magic "bad food" value (0xbaadf00d), in the hope people don't start to try to understand the various random IIR values in trap 7 dumps. Noticed-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index b11fb26ce299..892b7fc8f3c4 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -730,6 +730,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs) } mmap_read_unlock(current->mm); } + /* CPU could not fetch instruction, so clear stale IIR value. */ + regs->iir = 0xbaadf00d; fallthrough; case 27: /* Data memory protection ID trap */ From 8d84fca4375e3c35dadc16b8c7eee6821b2a575c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 3 Dec 2021 23:41:12 +1100 Subject: [PATCH 161/251] powerpc/ptdump: Fix DEBUG_WX since generic ptdump conversion In note_prot_wx() we bail out without reporting anything if CONFIG_PPC_DEBUG_WX is disabled. But CONFIG_PPC_DEBUG_WX was removed in the conversion to generic ptdump, we now need to use CONFIG_DEBUG_WX instead. Fixes: e084728393a5 ("powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Michael Ellerman Reviewed-by: Christophe Leroy Link: https://lore.kernel.org/r/20211203124112.2912562-1-mpe@ellerman.id.au --- arch/powerpc/mm/ptdump/ptdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index bf251191e78d..32bfb215c485 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -183,7 +183,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) { pte_t pte = __pte(st->current_flags); - if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx) + if (!IS_ENABLED(CONFIG_DEBUG_WX) || !st->check_wx) return; if (!pte_write(pte) || !pte_exec(pte)) From b1e0887379422975f237d43d8839b751a6bcf154 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Sat, 18 Dec 2021 02:18:40 +0000 Subject: [PATCH 162/251] usb: gadget: f_fs: Clear ffs_eventfd in ffs_data_clear. ffs_data_clear is indirectly called from both ffs_fs_kill_sb and ffs_ep0_release, so it ends up being called twice when userland closes ep0 and then unmounts f_fs. If userland provided an eventfd along with function's USB descriptors, it ends up calling eventfd_ctx_put as many times, causing a refcount underflow. NULL-ify ffs_eventfd to prevent these extraneous eventfd_ctx_put calls. Also, set epfiles to NULL right after de-allocating it, for readability. For completeness, ffs_data_clear actually ends up being called thrice, the last call being before the whole ffs structure gets freed, so when this specific sequence happens there is a second underflow happening (but not being reported): /sys/kernel/debug/tracing# modprobe usb_f_fs /sys/kernel/debug/tracing# echo ffs_data_clear > set_ftrace_filter /sys/kernel/debug/tracing# echo function > current_tracer /sys/kernel/debug/tracing# echo 1 > tracing_on (setup gadget, run and kill function userland process, teardown gadget) /sys/kernel/debug/tracing# echo 0 > tracing_on /sys/kernel/debug/tracing# cat trace smartcard-openp-436 [000] ..... 1946.208786: ffs_data_clear <-ffs_data_closed smartcard-openp-431 [000] ..... 1946.279147: ffs_data_clear <-ffs_data_closed smartcard-openp-431 [000] .n... 1946.905512: ffs_data_clear <-ffs_data_put Warning output corresponding to above trace: [ 1946.284139] WARNING: CPU: 0 PID: 431 at lib/refcount.c:28 refcount_warn_saturate+0x110/0x15c [ 1946.293094] refcount_t: underflow; use-after-free. [ 1946.298164] Modules linked in: usb_f_ncm(E) u_ether(E) usb_f_fs(E) hci_uart(E) btqca(E) btrtl(E) btbcm(E) btintel(E) bluetooth(E) nls_ascii(E) nls_cp437(E) vfat(E) fat(E) bcm2835_v4l2(CE) bcm2835_mmal_vchiq(CE) videobuf2_vmalloc(E) videobuf2_memops(E) sha512_generic(E) videobuf2_v4l2(E) sha512_arm(E) videobuf2_common(E) videodev(E) cpufreq_dt(E) snd_bcm2835(CE) brcmfmac(E) mc(E) vc4(E) ctr(E) brcmutil(E) snd_soc_core(E) snd_pcm_dmaengine(E) drbg(E) snd_pcm(E) snd_timer(E) snd(E) soundcore(E) drm_kms_helper(E) cec(E) ansi_cprng(E) rc_core(E) syscopyarea(E) raspberrypi_cpufreq(E) sysfillrect(E) sysimgblt(E) cfg80211(E) max17040_battery(OE) raspberrypi_hwmon(E) fb_sys_fops(E) regmap_i2c(E) ecdh_generic(E) rfkill(E) ecc(E) bcm2835_rng(E) rng_core(E) vchiq(CE) leds_gpio(E) libcomposite(E) fuse(E) configfs(E) ip_tables(E) x_tables(E) autofs4(E) ext4(E) crc16(E) mbcache(E) jbd2(E) crc32c_generic(E) sdhci_iproc(E) sdhci_pltfm(E) sdhci(E) [ 1946.399633] CPU: 0 PID: 431 Comm: smartcard-openp Tainted: G C OE 5.15.0-1-rpi #1 Debian 5.15.3-1 [ 1946.417950] Hardware name: BCM2835 [ 1946.425442] Backtrace: [ 1946.432048] [] (dump_backtrace) from [] (show_stack+0x20/0x24) [ 1946.448226] r7:00000009 r6:0000001c r5:c04a948c r4:c0a64e2c [ 1946.458412] [] (show_stack) from [] (dump_stack+0x28/0x30) [ 1946.470380] [] (dump_stack) from [] (__warn+0xe8/0x154) [ 1946.482067] r5:c04a948c r4:c0a71dc8 [ 1946.490184] [] (__warn) from [] (warn_slowpath_fmt+0xa0/0xe4) [ 1946.506758] r7:00000009 r6:0000001c r5:c0a71dc8 r4:c0a71e04 [ 1946.517070] [] (warn_slowpath_fmt) from [] (refcount_warn_saturate+0x110/0x15c) [ 1946.535309] r8:c0100224 r7:c0dfcb84 r6:ffffffff r5:c3b84c00 r4:c24a17c0 [ 1946.546708] [] (refcount_warn_saturate) from [] (eventfd_ctx_put+0x48/0x74) [ 1946.564476] [] (eventfd_ctx_put) from [] (ffs_data_clear+0xd0/0x118 [usb_f_fs]) [ 1946.582664] r5:c3b84c00 r4:c2695b00 [ 1946.590668] [] (ffs_data_clear [usb_f_fs]) from [] (ffs_data_closed+0x9c/0x150 [usb_f_fs]) [ 1946.609608] r5:bf54d014 r4:c2695b00 [ 1946.617522] [] (ffs_data_closed [usb_f_fs]) from [] (ffs_fs_kill_sb+0x2c/0x30 [usb_f_fs]) [ 1946.636217] r7:c0dfcb84 r6:c3a12260 r5:bf54d014 r4:c229f000 [ 1946.646273] [] (ffs_fs_kill_sb [usb_f_fs]) from [] (deactivate_locked_super+0x54/0x9c) [ 1946.664893] r5:bf54d014 r4:c229f000 [ 1946.672921] [] (deactivate_locked_super) from [] (deactivate_super+0x60/0x64) [ 1946.690722] r5:c2a09000 r4:c229f000 [ 1946.698706] [] (deactivate_super) from [] (cleanup_mnt+0xe4/0x14c) [ 1946.715553] r5:c2a09000 r4:00000000 [ 1946.723528] [] (cleanup_mnt) from [] (__cleanup_mnt+0x1c/0x20) [ 1946.739922] r7:c0dfcb84 r6:c3a12260 r5:c3a126fc r4:00000000 [ 1946.750088] [] (__cleanup_mnt) from [] (task_work_run+0x84/0xb8) [ 1946.766602] [] (task_work_run) from [] (do_work_pending+0x470/0x56c) [ 1946.783540] r7:5ac3c35a r6:c0d0424c r5:c200bfb0 r4:c200a000 [ 1946.793614] [] (do_work_pending) from [] (slow_work_pending+0xc/0x20) [ 1946.810553] Exception stack(0xc200bfb0 to 0xc200bff8) [ 1946.820129] bfa0: 00000000 00000000 000000aa b5e21430 [ 1946.837104] bfc0: bef867a0 00000001 bef86840 00000034 bef86838 bef86790 bef86794 bef867a0 [ 1946.854125] bfe0: 00000000 bef86798 b67b7a1c b6d626a4 60000010 b5a23760 [ 1946.865335] r10:00000000 r9:c200a000 r8:c0100224 r7:00000034 r6:bef86840 r5:00000001 [ 1946.881914] r4:bef867a0 [ 1946.888793] ---[ end trace 7387f2a9725b28d0 ]--- Fixes: 5e33f6fdf735 ("usb: gadget: ffs: add eventfd notification about ffs events") Cc: stable Signed-off-by: Vincent Pelletier Link: https://lore.kernel.org/r/f79eeea29f3f98de6782a064ec0f7351ad2f598f.1639793920.git.plr.vincent@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index e20c19a0f106..a7e069b18544 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1773,11 +1773,15 @@ static void ffs_data_clear(struct ffs_data *ffs) BUG_ON(ffs->gadget); - if (ffs->epfiles) + if (ffs->epfiles) { ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count); + ffs->epfiles = NULL; + } - if (ffs->ffs_eventfd) + if (ffs->ffs_eventfd) { eventfd_ctx_put(ffs->ffs_eventfd); + ffs->ffs_eventfd = NULL; + } kfree(ffs->raw_descs_data); kfree(ffs->raw_strings); @@ -1790,7 +1794,6 @@ static void ffs_data_reset(struct ffs_data *ffs) ffs_data_clear(ffs); - ffs->epfiles = NULL; ffs->raw_descs_data = NULL; ffs->raw_descs = NULL; ffs->raw_strings = NULL; From e3d4621c22f90c33321ae6a6baab60cdb8e5a77c Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Sat, 18 Dec 2021 17:57:46 +0800 Subject: [PATCH 163/251] usb: mtu3: fix interval value for intr and isoc Use the Interval value from isoc/intr endpoint descriptor, no need minus one. The original code doesn't cause transfer error for normal cases, but it may have side effect with respond time of ERDY or tPingTimeout. Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211218095749.6250-1-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_gadget.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index a9a65b4bbfed..c51be015345b 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -77,7 +77,7 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { interval = desc->bInterval; - interval = clamp_val(interval, 1, 16) - 1; + interval = clamp_val(interval, 1, 16); if (usb_endpoint_xfer_isoc(desc) && comp_desc) mult = comp_desc->bmAttributes; } @@ -89,7 +89,7 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) if (usb_endpoint_xfer_isoc(desc) || usb_endpoint_xfer_int(desc)) { interval = desc->bInterval; - interval = clamp_val(interval, 1, 16) - 1; + interval = clamp_val(interval, 1, 16); mult = usb_endpoint_maxp_mult(desc) - 1; } break; From a7aae769ca626819a7f9f078ebdc69a8a1b00c81 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Sat, 18 Dec 2021 17:57:47 +0800 Subject: [PATCH 164/251] usb: mtu3: add memory barrier before set GPD's HWO There is a seldom issue that the controller access invalid address and trigger devapc or emimpu violation. That is due to memory access is out of order and cause gpd data is not correct. Add mb() to prohibit compiler or cpu from reordering to make sure GPD is fully written before setting its HWO. Fixes: 48e0d3735aa5 ("usb: mtu3: supports new QMU format") Cc: stable@vger.kernel.org Reported-by: Eddie Hung Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211218095749.6250-2-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_qmu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/mtu3/mtu3_qmu.c b/drivers/usb/mtu3/mtu3_qmu.c index 3f414f91b589..2ea3157ddb6e 100644 --- a/drivers/usb/mtu3/mtu3_qmu.c +++ b/drivers/usb/mtu3/mtu3_qmu.c @@ -273,6 +273,8 @@ static int mtu3_prepare_tx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq) gpd->dw3_info |= cpu_to_le32(GPD_EXT_FLAG_ZLP); } + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO); mreq->gpd = gpd; @@ -306,6 +308,8 @@ static int mtu3_prepare_rx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq) gpd->next_gpd = cpu_to_le32(lower_32_bits(enq_dma)); ext_addr |= GPD_EXT_NGP(mtu, upper_32_bits(enq_dma)); gpd->dw3_info = cpu_to_le32(ext_addr); + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO); mreq->gpd = gpd; @@ -445,7 +449,8 @@ static void qmu_tx_zlp_error_handler(struct mtu3 *mtu, u8 epnum) return; } mtu3_setbits(mbase, MU3D_EP_TXCR0(mep->epnum), TX_TXPKTRDY); - + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); /* by pass the current GDP */ gpd_current->dw0_info |= cpu_to_le32(GPD_FLAGS_BPS | GPD_FLAGS_HWO); From 8c313e3bfd9adae8d5c4ba1cc696dcbc86fbf9bf Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Sat, 18 Dec 2021 17:57:48 +0800 Subject: [PATCH 165/251] usb: mtu3: fix list_head check warning This is caused by uninitialization of list_head. BUG: KASAN: use-after-free in __list_del_entry_valid+0x34/0xe4 Call trace: dump_backtrace+0x0/0x298 show_stack+0x24/0x34 dump_stack+0x130/0x1a8 print_address_description+0x88/0x56c __kasan_report+0x1b8/0x2a0 kasan_report+0x14/0x20 __asan_load8+0x9c/0xa0 __list_del_entry_valid+0x34/0xe4 mtu3_req_complete+0x4c/0x300 [mtu3] mtu3_gadget_stop+0x168/0x448 [mtu3] usb_gadget_unregister_driver+0x204/0x3a0 unregister_gadget_item+0x44/0xa4 Fixes: 83374e035b62 ("usb: mtu3: add tracepoints to help debug") Cc: stable@vger.kernel.org Reported-by: Yuwen Ng Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211218095749.6250-3-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index c51be015345b..b6c8a4a99c4d 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -235,6 +235,7 @@ struct usb_request *mtu3_alloc_request(struct usb_ep *ep, gfp_t gfp_flags) mreq->request.dma = DMA_ADDR_INVALID; mreq->epnum = mep->epnum; mreq->mep = mep; + INIT_LIST_HEAD(&mreq->list); trace_mtu3_alloc_request(mreq); return &mreq->request; From 43f3b8cbcf93da7c2755af4a543280c31f4adf16 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Sat, 18 Dec 2021 17:57:49 +0800 Subject: [PATCH 166/251] usb: mtu3: set interval of FS intr and isoc endpoint Add support to set interval also for FS intr and isoc endpoint. Fixes: 4d79e042ed8b ("usb: mtu3: add support for usb3.1 IP") Cc: stable@vger.kernel.org Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211218095749.6250-4-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_gadget.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index b6c8a4a99c4d..9977600616d7 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -92,6 +92,13 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) interval = clamp_val(interval, 1, 16); mult = usb_endpoint_maxp_mult(desc) - 1; } + break; + case USB_SPEED_FULL: + if (usb_endpoint_xfer_isoc(desc)) + interval = clamp_val(desc->bInterval, 1, 16); + else if (usb_endpoint_xfer_int(desc)) + interval = clamp_val(desc->bInterval, 1, 255); + break; default: break; /*others are ignored */ From cfd0d84ba28c18b531648c9d4a35ecca89ad9901 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 20 Dec 2021 11:01:50 -0800 Subject: [PATCH 167/251] binder: fix async_free_space accounting for empty parcels In 4.13, commit 74310e06be4d ("android: binder: Move buffer out of area shared with user space") fixed a kernel structure visibility issue. As part of that patch, sizeof(void *) was used as the buffer size for 0-length data payloads so the driver could detect abusive clients sending 0-length asynchronous transactions to a server by enforcing limits on async_free_size. Unfortunately, on the "free" side, the accounting of async_free_space did not add the sizeof(void *) back. The result was that up to 8-bytes of async_free_space were leaked on every async transaction of 8-bytes or less. These small transactions are uncommon, so this accounting issue has gone undetected for several years. The fix is to use "buffer_size" (the allocated buffer size) instead of "size" (the logical buffer size) when updating the async_free_space during the free operation. These are the same except for this corner case of asynchronous transactions with payloads < 8 bytes. Fixes: 74310e06be4d ("android: binder: Move buffer out of area shared with user space") Signed-off-by: Todd Kjos Cc: stable@vger.kernel.org # 4.14+ Link: https://lore.kernel.org/r/20211220190150.2107077-1-tkjos@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 340515f54498..47bc74a8c7b6 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -671,7 +671,7 @@ static void binder_free_buf_locked(struct binder_alloc *alloc, BUG_ON(buffer->user_data > alloc->buffer + alloc->buffer_size); if (buffer->async_transaction) { - alloc->free_async_space += size + sizeof(struct binder_buffer); + alloc->free_async_space += buffer_size + sizeof(struct binder_buffer); binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, "%d: binder_free_buf size %zd async free %zd\n", From 3a0152b219523227c2a62a0a122cf99608287176 Mon Sep 17 00:00:00 2001 From: Andra Paraschiv Date: Mon, 20 Dec 2021 19:58:56 +0000 Subject: [PATCH 168/251] nitro_enclaves: Use get_user_pages_unlocked() call to handle mmap assert After commit 5b78ed24e8ec ("mm/pagemap: add mmap_assert_locked() annotations to find_vma*()"), the call to get_user_pages() will trigger the mmap assert. static inline void mmap_assert_locked(struct mm_struct *mm) { lockdep_assert_held(&mm->mmap_lock); VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); } [ 62.521410] kernel BUG at include/linux/mmap_lock.h:156! ........................................................... [ 62.538938] RIP: 0010:find_vma+0x32/0x80 ........................................................... [ 62.605889] Call Trace: [ 62.608502] [ 62.610956] ? lock_timer_base+0x61/0x80 [ 62.614106] find_extend_vma+0x19/0x80 [ 62.617195] __get_user_pages+0x9b/0x6a0 [ 62.620356] __gup_longterm_locked+0x42d/0x450 [ 62.623721] ? finish_wait+0x41/0x80 [ 62.626748] ? __kmalloc+0x178/0x2f0 [ 62.629768] ne_set_user_memory_region_ioctl.isra.0+0x225/0x6a0 [nitro_enclaves] [ 62.635776] ne_enclave_ioctl+0x1cf/0x6d7 [nitro_enclaves] [ 62.639541] __x64_sys_ioctl+0x82/0xb0 [ 62.642620] do_syscall_64+0x3b/0x90 [ 62.645642] entry_SYSCALL_64_after_hwframe+0x44/0xae Use get_user_pages_unlocked() when setting the enclave memory regions. That's a similar pattern as mmap_read_lock() used together with get_user_pages(). Fixes: 5b78ed24e8ec ("mm/pagemap: add mmap_assert_locked() annotations to find_vma*()") Cc: stable@vger.kernel.org Signed-off-by: Andra Paraschiv Link: https://lore.kernel.org/r/20211220195856.6549-1-andraprs@amazon.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/nitro_enclaves/ne_misc_dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev.c b/drivers/virt/nitro_enclaves/ne_misc_dev.c index 8939612ee0e0..6894ccb868a6 100644 --- a/drivers/virt/nitro_enclaves/ne_misc_dev.c +++ b/drivers/virt/nitro_enclaves/ne_misc_dev.c @@ -886,8 +886,9 @@ static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave, goto put_pages; } - gup_rc = get_user_pages(mem_region.userspace_addr + memory_size, 1, FOLL_GET, - ne_mem_region->pages + i, NULL); + gup_rc = get_user_pages_unlocked(mem_region.userspace_addr + memory_size, 1, + ne_mem_region->pages + i, FOLL_GET); + if (gup_rc < 0) { rc = gup_rc; From e4844092581ceec22489b66c42edc88bc6079783 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 21 Dec 2021 13:28:25 +0200 Subject: [PATCH 169/251] xhci: Fresco FL1100 controller should not have BROKEN_MSI quirk set. The Fresco Logic FL1100 controller needs the TRUST_TX_LENGTH quirk like other Fresco controllers, but should not have the BROKEN_MSI quirks set. BROKEN_MSI quirk causes issues in detecting usb drives connected to docks with this FL1100 controller. The BROKEN_MSI flag was apparently accidentally set together with the TRUST_TX_LENGTH quirk Original patch went to stable so this should go there as well. Fixes: ea0f69d82119 ("xhci: Enable trust tx length quirk for Fresco FL11 USB controller") Cc: stable@vger.kernel.org cc: Nikolay Martynov Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20211221112825.54690-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 3af017883231..5c351970cdf1 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -123,7 +123,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) /* Look for vendor-specific quirks */ if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC && (pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK || - pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100 || pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1400)) { if (pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK && pdev->revision == 0x0) { @@ -158,6 +157,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009) xhci->quirks |= XHCI_BROKEN_STREAMS; + if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC && + pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100) + xhci->quirks |= XHCI_TRUST_TX_LENGTH; + if (pdev->vendor == PCI_VENDOR_ID_NEC) xhci->quirks |= XHCI_NEC_HOST; From ff31ee0a0f471776f67be5e5275c18d17736fc6b Mon Sep 17 00:00:00 2001 From: Yann Gautier Date: Wed, 15 Dec 2021 15:17:26 +0100 Subject: [PATCH 170/251] mmc: mmci: stm32: clear DLYB_CR after sending tuning command During test campaign, and especially after several unbind/bind sequences, it has been seen that the SD-card on SDMMC1 thread could freeze. The freeze always appear on a CMD23 following a CMD19. Checking SDMMC internal registers shows that the tuning command (CMD19) has failed. The freeze is then due to the delay block involved in the tuning sequence. To correct this, clear the delay block register DLYB_CR register after the tuning commands. Signed-off-by: Christophe Kerello Signed-off-by: Yann Gautier Reviewed-by: Linus Walleij Fixes: 1103f807a3b9 ("mmc: mmci_sdmmc: Add execute tuning with delay block") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211215141727.4901-4-yann.gautier@foss.st.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci_stm32_sdmmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index fdaa11f92fe6..a75d3dd34d18 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -441,6 +441,8 @@ static int sdmmc_dlyb_phase_tuning(struct mmci_host *host, u32 opcode) return -EINVAL; } + writel_relaxed(0, dlyb->base + DLYB_CR); + phase = end_of_len - max_len / 2; sdmmc_dlyb_set_cfgr(dlyb, dlyb->unit, phase, false); From ffb76a86f8096a8206be03b14adda6092e18e275 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Tue, 21 Dec 2021 15:00:34 +0800 Subject: [PATCH 171/251] ipmi: Fix UAF when uninstall ipmi_si and ipmi_msghandler module Hi, When testing install and uninstall of ipmi_si.ko and ipmi_msghandler.ko, the system crashed. The log as follows: [ 141.087026] BUG: unable to handle kernel paging request at ffffffffc09b3a5a [ 141.087241] PGD 8fe4c0d067 P4D 8fe4c0d067 PUD 8fe4c0f067 PMD 103ad89067 PTE 0 [ 141.087464] Oops: 0010 [#1] SMP NOPTI [ 141.087580] CPU: 67 PID: 668 Comm: kworker/67:1 Kdump: loaded Not tainted 4.18.0.x86_64 #47 [ 141.088009] Workqueue: events 0xffffffffc09b3a40 [ 141.088009] RIP: 0010:0xffffffffc09b3a5a [ 141.088009] Code: Bad RIP value. [ 141.088009] RSP: 0018:ffffb9094e2c3e88 EFLAGS: 00010246 [ 141.088009] RAX: 0000000000000000 RBX: ffff9abfdb1f04a0 RCX: 0000000000000000 [ 141.088009] RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000246 [ 141.088009] RBP: 0000000000000000 R08: ffff9abfffee3cb8 R09: 00000000000002e1 [ 141.088009] R10: ffffb9094cb73d90 R11: 00000000000f4240 R12: ffff9abfffee8700 [ 141.088009] R13: 0000000000000000 R14: ffff9abfdb1f04a0 R15: ffff9abfdb1f04a8 [ 141.088009] FS: 0000000000000000(0000) GS:ffff9abfffec0000(0000) knlGS:0000000000000000 [ 141.088009] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 141.088009] CR2: ffffffffc09b3a30 CR3: 0000008fe4c0a001 CR4: 00000000007606e0 [ 141.088009] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 141.088009] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 141.088009] PKRU: 55555554 [ 141.088009] Call Trace: [ 141.088009] ? process_one_work+0x195/0x390 [ 141.088009] ? worker_thread+0x30/0x390 [ 141.088009] ? process_one_work+0x390/0x390 [ 141.088009] ? kthread+0x10d/0x130 [ 141.088009] ? kthread_flush_work_fn+0x10/0x10 [ 141.088009] ? ret_from_fork+0x35/0x40] BUG: unable to handle kernel paging request at ffffffffc0b28a5a [ 200.223240] PGD 97fe00d067 P4D 97fe00d067 PUD 97fe00f067 PMD a580cbf067 PTE 0 [ 200.223464] Oops: 0010 [#1] SMP NOPTI [ 200.223579] CPU: 63 PID: 664 Comm: kworker/63:1 Kdump: loaded Not tainted 4.18.0.x86_64 #46 [ 200.224008] Workqueue: events 0xffffffffc0b28a40 [ 200.224008] RIP: 0010:0xffffffffc0b28a5a [ 200.224008] Code: Bad RIP value. [ 200.224008] RSP: 0018:ffffbf3c8e2a3e88 EFLAGS: 00010246 [ 200.224008] RAX: 0000000000000000 RBX: ffffa0799ad6bca0 RCX: 0000000000000000 [ 200.224008] RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000246 [ 200.224008] RBP: 0000000000000000 R08: ffff9fe43fde3cb8 R09: 00000000000000d5 [ 200.224008] R10: ffffbf3c8cb53d90 R11: 00000000000f4240 R12: ffff9fe43fde8700 [ 200.224008] R13: 0000000000000000 R14: ffffa0799ad6bca0 R15: ffffa0799ad6bca8 [ 200.224008] FS: 0000000000000000(0000) GS:ffff9fe43fdc0000(0000) knlGS:0000000000000000 [ 200.224008] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 200.224008] CR2: ffffffffc0b28a30 CR3: 00000097fe00a002 CR4: 00000000007606e0 [ 200.224008] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 200.224008] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 200.224008] PKRU: 55555554 [ 200.224008] Call Trace: [ 200.224008] ? process_one_work+0x195/0x390 [ 200.224008] ? worker_thread+0x30/0x390 [ 200.224008] ? process_one_work+0x390/0x390 [ 200.224008] ? kthread+0x10d/0x130 [ 200.224008] ? kthread_flush_work_fn+0x10/0x10 [ 200.224008] ? ret_from_fork+0x35/0x40 [ 200.224008] kernel fault(0x1) notification starting on CPU 63 [ 200.224008] kernel fault(0x1) notification finished on CPU 63 [ 200.224008] CR2: ffffffffc0b28a5a [ 200.224008] ---[ end trace c82a412d93f57412 ]--- The reason is as follows: T1: rmmod ipmi_si. ->ipmi_unregister_smi() -> ipmi_bmc_unregister() -> __ipmi_bmc_unregister() -> kref_put(&bmc->usecount, cleanup_bmc_device); -> schedule_work(&bmc->remove_work); T2: rmmod ipmi_msghandler. ipmi_msghander module uninstalled, and the module space will be freed. T3: bmc->remove_work doing cleanup the bmc resource. -> cleanup_bmc_work() -> platform_device_unregister(&bmc->pdev); -> platform_device_del(pdev); -> device_del(&pdev->dev); -> kobject_uevent(&dev->kobj, KOBJ_REMOVE); -> kobject_uevent_env() -> dev_uevent() -> if (dev->type && dev->type->name) 'dev->type'(bmc_device_type) pointer space has freed when uninstall ipmi_msghander module, 'dev->type->name' cause the system crash. drivers/char/ipmi/ipmi_msghandler.c: 2820 static const struct device_type bmc_device_type = { 2821 .groups = bmc_dev_attr_groups, 2822 }; Steps to reproduce: Add a time delay in cleanup_bmc_work() function, and uninstall ipmi_si and ipmi_msghandler module. 2910 static void cleanup_bmc_work(struct work_struct *work) 2911 { 2912 struct bmc_device *bmc = container_of(work, struct bmc_device, 2913 remove_work); 2914 int id = bmc->pdev.id; /* Unregister overwrites id */ 2915 2916 msleep(3000); <--- 2917 platform_device_unregister(&bmc->pdev); 2918 ida_simple_remove(&ipmi_bmc_ida, id); 2919 } Use 'remove_work_wq' instead of 'system_wq' to solve this issues. Fixes: b2cfd8ab4add ("ipmi: Rework device id and guid handling to catch changing BMCs") Signed-off-by: Wu Bo Message-Id: <1640070034-56671-1-git-send-email-wubo40@huawei.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 266c7bc58dda..c59265146e9c 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -3031,7 +3031,7 @@ cleanup_bmc_device(struct kref *ref) * with removing the device attributes while reading a device * attribute. */ - schedule_work(&bmc->remove_work); + queue_work(remove_work_wq, &bmc->remove_work); } /* From 3e4d9a485029aa9e172dab5420abe775fd86f8e8 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 20 Dec 2021 14:06:56 +0100 Subject: [PATCH 172/251] gpio: virtio: remove timeout The driver imposes an arbitrary one second timeout on virtio requests, but the specification doesn't prevent the virtio device from taking longer to process requests, so remove this timeout to support all systems and device implementations. Fixes: 3a29355a22c0275fe86 ("gpio: Add virtio-gpio driver") Signed-off-by: Vincent Whitchurch Acked-by: Michael S. Tsirkin Acked-by: Viresh Kumar Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-virtio.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-virtio.c b/drivers/gpio/gpio-virtio.c index 84f96b78f32a..9f4941bc5760 100644 --- a/drivers/gpio/gpio-virtio.c +++ b/drivers/gpio/gpio-virtio.c @@ -100,11 +100,7 @@ static int _virtio_gpio_req(struct virtio_gpio *vgpio, u16 type, u16 gpio, virtqueue_kick(vgpio->request_vq); mutex_unlock(&vgpio->lock); - if (!wait_for_completion_timeout(&line->completion, HZ)) { - dev_err(dev, "GPIO operation timed out\n"); - ret = -ETIMEDOUT; - goto out; - } + wait_for_completion(&line->completion); if (unlikely(res->status != VIRTIO_GPIO_STATUS_OK)) { dev_err(dev, "GPIO request failed: %d\n", gpio); From 3f345e907a8e7c56fdebf7231cd67afc85d02aaa Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 21 Dec 2021 17:03:52 +0300 Subject: [PATCH 173/251] usb: typec: ucsi: Only check the contract if there is a connection The driver must make sure there is an actual connection before checking details about the USB Power Delivery contract. Those details are not valid unless there is a connection. This fixes NULL pointer dereference that is caused by an attempt to register bogus partner alternate mode that the firmware on some platform may report before the actual connection. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215117 Fixes: 6cbe4b2d5a3f ("usb: typec: ucsi: Check the partner alt modes always if there is PD contract") Reported-by: Chris Hixon Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/eb34f98f-00ef-3238-2daa-80481116035d@leemhuis.info/ Link: https://lore.kernel.org/r/20211221140352.45501-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 6aa28384f77f..08561bf7c40c 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1150,7 +1150,9 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) ret = 0; } - if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) == UCSI_CONSTAT_PWR_OPMODE_PD) { + if (con->partner && + UCSI_CONSTAT_PWR_OPMODE(con->status.flags) == + UCSI_CONSTAT_PWR_OPMODE_PD) { ucsi_get_src_pdos(con); ucsi_check_altmodes(con); } From fdba608f15e2427419997b0898750a49a735afcb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 21 Dec 2021 10:37:00 -0500 Subject: [PATCH 174/251] KVM: VMX: Wake vCPU when delivering posted IRQ even if vCPU == this vCPU Drop a check that guards triggering a posted interrupt on the currently running vCPU, and more importantly guards waking the target vCPU if triggering a posted interrupt fails because the vCPU isn't IN_GUEST_MODE. If a vIRQ is delivered from asynchronous context, the target vCPU can be the currently running vCPU and can also be blocking, in which case skipping kvm_vcpu_wake_up() is effectively dropping what is supposed to be a wake event for the vCPU. The "do nothing" logic when "vcpu == running_vcpu" mostly works only because the majority of calls to ->deliver_posted_interrupt(), especially when using posted interrupts, come from synchronous KVM context. But if a device is exposed to the guest using vfio-pci passthrough, the VFIO IRQ and vCPU are bound to the same pCPU, and the IRQ is _not_ configured to use posted interrupts, wake events from the device will be delivered to KVM from IRQ context, e.g. vfio_msihandler() | |-> eventfd_signal() | |-> ... | |-> irqfd_wakeup() | |->kvm_arch_set_irq_inatomic() | |-> kvm_irq_delivery_to_apic_fast() | |-> kvm_apic_set_irq() This also aligns the non-nested and nested usage of triggering posted interrupts, and will allow for additional cleanups. Fixes: 379a3c8ee444 ("KVM: VMX: Optimize posted-interrupt delivery for timer fastpath") Cc: stable@vger.kernel.org Reported-by: Longpeng (Mike) Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Message-Id: <20211208015236.1616697-18-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5974a88c9d35..0dbf94eb954f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3964,8 +3964,7 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) if (pi_test_and_set_on(&vmx->pi_desc)) return 0; - if (vcpu != kvm_get_running_vcpu() && - !kvm_vcpu_trigger_posted_interrupt(vcpu, false)) + if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); return 0; From 804034c4ffc502795cea9b3867acb2ec7fad99ba Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 10 Dec 2021 07:07:53 +0000 Subject: [PATCH 175/251] platform/mellanox: mlxbf-pmc: Fix an IS_ERR() vs NULL bug in mlxbf_pmc_map_counters The devm_ioremap() function returns NULL on error, it doesn't return error pointers. Also according to doc of device_property_read_u64_array, values in info array are properties of device or NULL. Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20211210070753.10761-1-linmq006@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxbf-pmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 04bc3b50aa7a..65b4a819f1bd 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -1374,8 +1374,8 @@ static int mlxbf_pmc_map_counters(struct device *dev) pmc->block[i].counters = info[2]; pmc->block[i].type = info[3]; - if (IS_ERR(pmc->block[i].mmio_base)) - return PTR_ERR(pmc->block[i].mmio_base); + if (!pmc->block[i].mmio_base) + return -ENOMEM; ret = mlxbf_pmc_create_groups(dev, i); if (ret) From 09fc14061f3ed28899c23b8714c066946fdbd43e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 10 Dec 2021 08:35:29 -0600 Subject: [PATCH 176/251] platform/x86: amd-pmc: only use callbacks for suspend This driver is intended to be used exclusively for suspend to idle so callbacks to send OS_HINT during hibernate and S5 will set OS_HINT at the wrong time leading to an undefined behavior. Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20211210143529.10594-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 841c44cd64c2..230593ae5d6d 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -508,7 +508,8 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) } static const struct dev_pm_ops amd_pmc_pm_ops = { - SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(amd_pmc_suspend, amd_pmc_resume) + .suspend_noirq = amd_pmc_suspend, + .resume_noirq = amd_pmc_resume, }; static const struct pci_device_id pmc_pci_ids[] = { From eb66fb03a727cde0ab9b1a3858de55c26f3007da Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Tue, 14 Dec 2021 04:18:36 -0800 Subject: [PATCH 177/251] platform/x86: apple-gmux: use resource_size() with res This should be (res->end - res->start + 1) here actually, use resource_size() derectly. Signed-off-by: Wang Qing Link: https://lore.kernel.org/r/1639484316-75873-1-git-send-email-wangqing@vivo.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/apple-gmux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c index 9aae45a45200..57553f9b4d1d 100644 --- a/drivers/platform/x86/apple-gmux.c +++ b/drivers/platform/x86/apple-gmux.c @@ -625,7 +625,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) } gmux_data->iostart = res->start; - gmux_data->iolen = res->end - res->start; + gmux_data->iolen = resource_size(res); if (gmux_data->iolen < GMUX_MIN_IO_LEN) { pr_err("gmux I/O region too small (%lu < %u)\n", From 8f66fce0f46560b9e910787ff7ad0974441c4f9c Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 21 Dec 2021 13:21:22 -0500 Subject: [PATCH 178/251] parisc: Correct completer in lws start The completer in the "or,ev %r1,%r30,%r30" instruction is reversed, so we are not clipping the LWS number when we are called from a 32-bit process (W=0). We need to nulify the following depdi instruction when the least-significant bit of %r30 is 1. If the %r20 register is not clipped, a user process could perform a LWS call that would branch to an undefined location in the kernel and potentially crash the machine. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # 4.19+ Signed-off-by: Helge Deller --- arch/parisc/kernel/syscall.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index d2497b339d13..65c88ca7a7ac 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -472,7 +472,7 @@ lws_start: extrd,u %r1,PSW_W_BIT,1,%r1 /* sp must be aligned on 4, so deposit the W bit setting into * the bottom of sp temporarily */ - or,ev %r1,%r30,%r30 + or,od %r1,%r30,%r30 /* Clip LWS number to a 32-bit value for 32-bit processes */ depdi 0, 31, 32, %r20 From d3a5a68cff47f6eead84504c3c28376b85053242 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 21 Dec 2021 13:33:16 -0500 Subject: [PATCH 179/251] parisc: Fix mask used to select futex spinlock The address bits used to select the futex spinlock need to match those used in the LWS code in syscall.S. The mask 0x3f8 only selects 7 bits. It should select 8 bits. This change fixes the glibc nptl/tst-cond24 and nptl/tst-cond25 tests. Signed-off-by: John David Anglin Fixes: 53a42b6324b8 ("parisc: Switch to more fine grained lws locks") Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/futex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h index 70cf8f0a7617..9cd4dd6e63ad 100644 --- a/arch/parisc/include/asm/futex.h +++ b/arch/parisc/include/asm/futex.h @@ -14,7 +14,7 @@ static inline void _futex_spin_lock(u32 __user *uaddr) { extern u32 lws_lock_start[]; - long index = ((long)uaddr & 0x3f8) >> 1; + long index = ((long)uaddr & 0x7f8) >> 1; arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index]; preempt_disable(); arch_spin_lock(s); @@ -24,7 +24,7 @@ static inline void _futex_spin_unlock(u32 __user *uaddr) { extern u32 lws_lock_start[]; - long index = ((long)uaddr & 0x3f8) >> 1; + long index = ((long)uaddr & 0x7f8) >> 1; arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index]; arch_spin_unlock(s); preempt_enable(); From cb8747b7d2a9e3d687a19a007575071d4b71cd05 Mon Sep 17 00:00:00 2001 From: Ismael Luceno Date: Mon, 15 Nov 2021 14:46:47 +0100 Subject: [PATCH 180/251] uapi: Fix undefined __always_inline on non-glibc systems This macro is defined by glibc itself, which makes the issue go unnoticed on those systems. On non-glibc systems it causes build failures on several utilities and libraries, like bpftool and objtool. Fixes: 1d509f2a6ebc ("x86/insn: Support big endian cross-compiles") Fixes: 2d7ce0e8a704 ("tools/virtio: more stubs") Fixes: 3fb321fde22d ("selftests/net: ipv6 flowlabel") Fixes: 50b3ed57dee9 ("selftests/bpf: test bpf flow dissection") Fixes: 9cacf81f8161 ("bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE") Fixes: a4b2061242ec ("tools include uapi: Grab a copy of linux/in.h") Fixes: b12d6ec09730 ("bpf: btf: add btf print functionality") Fixes: c0dd967818a2 ("tools, include: Grab a copy of linux/erspan.h") Fixes: c4b6014e8bb0 ("tools: Add copy of perf_event.h to tools/include/linux/") Signed-off-by: Ismael Luceno Acked-by: Masami Hiramatsu Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211115134647.1921-1-ismael@iodev.co.uk Cc: Martin Schwidefsky Cc: Vasily Gorbik --- include/uapi/linux/byteorder/big_endian.h | 1 + include/uapi/linux/byteorder/little_endian.h | 1 + 2 files changed, 2 insertions(+) diff --git a/include/uapi/linux/byteorder/big_endian.h b/include/uapi/linux/byteorder/big_endian.h index 2199adc6a6c2..80aa5c41a763 100644 --- a/include/uapi/linux/byteorder/big_endian.h +++ b/include/uapi/linux/byteorder/big_endian.h @@ -9,6 +9,7 @@ #define __BIG_ENDIAN_BITFIELD #endif +#include #include #include diff --git a/include/uapi/linux/byteorder/little_endian.h b/include/uapi/linux/byteorder/little_endian.h index 601c904fd5cd..cd98982e7523 100644 --- a/include/uapi/linux/byteorder/little_endian.h +++ b/include/uapi/linux/byteorder/little_endian.h @@ -9,6 +9,7 @@ #define __LITTLE_ENDIAN_BITFIELD #endif +#include #include #include From dcce50e6cc4d86a63dc0a9a6ee7d4f948ccd53a1 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 8 Nov 2021 14:35:59 -0800 Subject: [PATCH 181/251] compiler.h: Fix annotation macro misplacement with Clang When building with Clang and CONFIG_TRACE_BRANCH_PROFILING, there are a lot of unreachable warnings, like: arch/x86/kernel/traps.o: warning: objtool: handle_xfd_event()+0x134: unreachable instruction Without an input to the inline asm, 'volatile' is ignored for some reason and Clang feels free to move the reachable() annotation away from its intended location. Fix that by re-adding the counter value to the inputs. Fixes: f1069a8756b9 ("compiler.h: Avoid using inline asm operand modifiers") Fixes: c199f64ff93c ("instrumentation.h: Avoid using inline asm operand modifiers") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/0417e96909b97a406323409210de7bf13df0b170.1636410380.git.jpoimboe@redhat.com Cc: Peter Zijlstra Cc: x86@kernel.org Cc: Vasily Gorbik Cc: Miroslav Benes --- include/linux/compiler.h | 4 ++-- include/linux/instrumentation.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 3d5af56337bd..429dcebe2b99 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -121,7 +121,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.reachable\n\t" \ ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define annotate_reachable() __annotate_reachable(__COUNTER__) @@ -129,7 +129,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.unreachable\n\t" \ ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define annotate_unreachable() __annotate_unreachable(__COUNTER__) diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h index fa2cd8c63dcc..24359b4a9605 100644 --- a/include/linux/instrumentation.h +++ b/include/linux/instrumentation.h @@ -11,7 +11,7 @@ asm volatile(__stringify(c) ": nop\n\t" \ ".pushsection .discard.instr_begin\n\t" \ ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define instrumentation_begin() __instrumentation_begin(__COUNTER__) @@ -50,7 +50,7 @@ asm volatile(__stringify(c) ": nop\n\t" \ ".pushsection .discard.instr_end\n\t" \ ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define instrumentation_end() __instrumentation_end(__COUNTER__) #else From aacb2016063dfa6da9378d76734cd9dc1e977619 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 13 Dec 2021 11:40:44 +0900 Subject: [PATCH 182/251] parisc: remove ARCH_DEFCONFIG Commit 2a86f6612164 ("kbuild: use KBUILD_DEFCONFIG as the fallback for DEFCONFIG_LIST") removed ARCH_DEFCONFIG because it does not make much sense. In the same development cycle, Commit ededa081ed20 ("parisc: Fix defconfig selection") added ARCH_DEFCONFIG for parisc. Please use KBUILD_DEFCONFIG in arch/*/Makefile for defconfig selection. Signed-off-by: Masahiro Yamada Acked-by: Helge Deller Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index b2188da09c73..011dc32fdb4d 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -85,11 +85,6 @@ config MMU config STACK_GROWSUP def_bool y -config ARCH_DEFCONFIG - string - default "arch/parisc/configs/generic-32bit_defconfig" if !64BIT - default "arch/parisc/configs/generic-64bit_defconfig" if 64BIT - config GENERIC_LOCKBREAK bool default y From d7f55471db2719629f773c2d6b5742a69595bfd3 Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Fri, 17 Dec 2021 10:07:54 +0800 Subject: [PATCH 183/251] memblock: fix memblock_phys_alloc() section mismatch error Fix modpost Section mismatch error in memblock_phys_alloc() [...] WARNING: modpost: vmlinux.o(.text.unlikely+0x1dcc): Section mismatch in reference from the function memblock_phys_alloc() to the function .init.text:memblock_phys_alloc_range() The function memblock_phys_alloc() references the function __init memblock_phys_alloc_range(). This is often because memblock_phys_alloc lacks a __init annotation or the annotation of memblock_phys_alloc_range is wrong. ERROR: modpost: Section mismatches detected. Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them. [...] memblock_phys_alloc() is a one-line wrapper, make it __always_inline to avoid these section mismatches. Reported-by: k2ci Suggested-by: Mike Rapoport Signed-off-by: Jackie Liu [rppt: slightly massaged changelog ] Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/20211217020754.2874872-1-liu.yun@linux.dev --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 8adcf1fa8096..9dc7cb239d21 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -405,8 +405,8 @@ phys_addr_t memblock_alloc_range_nid(phys_addr_t size, phys_addr_t end, int nid, bool exact_nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); -static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, - phys_addr_t align) +static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size, + phys_addr_t align) { return memblock_phys_alloc_range(size, align, 0, MEMBLOCK_ALLOC_ACCESSIBLE); From b6fd77472dea76b7a2bad3a338ade920152972b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 22 Dec 2021 16:53:50 +0200 Subject: [PATCH 184/251] ALSA: hda/hdmi: Disable silent stream on GLK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The silent stream stuff recurses back into i915 audio component .get_power() from the .pin_eld_notify() hook. On GLK this will deadlock as i915 may already be holding the relevant modeset locks during .pin_eld_notify() and the GLK audio vs. CDCLK workaround will try to grab the same locks from .get_power(). Until someone comes up with a better fix just disable the silent stream support on GLK. Cc: stable@vger.kernel.org Cc: Harsha Priya Cc: Emmanuel Jillela Cc: Kai Vehmanen Cc: Takashi Iwai Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2623 Fixes: 951894cf30f4 ("ALSA: hda/hdmi: Add Intel silent stream support") Signed-off-by: Ville Syrjälä Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20211222145350.24342-1-ville.syrjala@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 415701bd10ac..ffcde7409d2a 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2947,7 +2947,8 @@ static int parse_intel_hdmi(struct hda_codec *codec) /* Intel Haswell and onwards; audio component with eld notifier */ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, - const int *port_map, int port_num, int dev_num) + const int *port_map, int port_num, int dev_num, + bool send_silent_stream) { struct hdmi_spec *spec; int err; @@ -2980,7 +2981,7 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, * Enable silent stream feature, if it is enabled via * module param or Kconfig option */ - if (enable_silent_stream) + if (send_silent_stream) spec->send_silent_stream = true; return parse_intel_hdmi(codec); @@ -2988,12 +2989,18 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, static int patch_i915_hsw_hdmi(struct hda_codec *codec) { - return intel_hsw_common_init(codec, 0x08, NULL, 0, 3); + return intel_hsw_common_init(codec, 0x08, NULL, 0, 3, + enable_silent_stream); } static int patch_i915_glk_hdmi(struct hda_codec *codec) { - return intel_hsw_common_init(codec, 0x0b, NULL, 0, 3); + /* + * Silent stream calls audio component .get_power() from + * .pin_eld_notify(). On GLK this will deadlock in i915 due + * to the audio vs. CDCLK workaround. + */ + return intel_hsw_common_init(codec, 0x0b, NULL, 0, 3, false); } static int patch_i915_icl_hdmi(struct hda_codec *codec) @@ -3004,7 +3011,8 @@ static int patch_i915_icl_hdmi(struct hda_codec *codec) */ static const int map[] = {0x0, 0x4, 0x6, 0x8, 0xa, 0xb}; - return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 3); + return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 3, + enable_silent_stream); } static int patch_i915_tgl_hdmi(struct hda_codec *codec) @@ -3016,7 +3024,8 @@ static int patch_i915_tgl_hdmi(struct hda_codec *codec) static const int map[] = {0x4, 0x6, 0x8, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf}; int ret; - ret = intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 4); + ret = intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 4, + enable_silent_stream); if (!ret) { struct hdmi_spec *spec = codec->spec; From 385f287f9853da402d94278e59f594501c1d1dad Mon Sep 17 00:00:00 2001 From: Libin Yang Date: Tue, 21 Dec 2021 09:08:16 +0800 Subject: [PATCH 185/251] ALSA: hda: intel-sdw-acpi: harden detection of controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing code currently sets a pointer to an ACPI handle before checking that it's actually a SoundWire controller. This can lead to issues where the graph walk continues and eventually fails, but the pointer was set already. This patch changes the logic so that the information provided to the caller is set when a controller is found. Reviewed-by: Péter Ujfalusi Signed-off-by: Libin Yang Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20211221010817.23636-2-yung-chuan.liao@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-sdw-acpi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index c0123bc31c0d..ba8a872a2901 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -132,8 +132,6 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, return AE_NOT_FOUND; } - info->handle = handle; - /* * On some Intel platforms, multiple children of the HDAS * device can be found, but only one of them is the SoundWire @@ -144,6 +142,9 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE) return AE_OK; /* keep going */ + /* found the correct SoundWire controller */ + info->handle = handle; + /* device found, stop namespace walk */ return AE_CTRL_TERMINATE; } From 78ea40efb48e978756db2ce45fcfa55bac056b91 Mon Sep 17 00:00:00 2001 From: Libin Yang Date: Tue, 21 Dec 2021 09:08:17 +0800 Subject: [PATCH 186/251] ALSA: hda: intel-sdw-acpi: go through HDAS ACPI at max depth of 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the HDAS ACPI scope, the SoundWire may not be the direct child of HDAS. It needs to go through the ACPI table at max depth of 2 to find the SoundWire device from HDAS. Reviewed-by: Péter Ujfalusi Signed-off-by: Libin Yang Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20211221010817.23636-3-yung-chuan.liao@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-sdw-acpi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index ba8a872a2901..b7758dbe2371 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -165,8 +165,14 @@ int sdw_intel_acpi_scan(acpi_handle *parent_handle, acpi_status status; info->handle = NULL; + /* + * In the HDAS ACPI scope, 'SNDW' may be either the child of + * 'HDAS' or the grandchild of 'HDAS'. So let's go through + * the ACPI from 'HDAS' at max depth of 2 to find the 'SNDW' + * device. + */ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, - parent_handle, 1, + parent_handle, 2, sdw_intel_acpi_cb, NULL, info, NULL); if (ACPI_FAILURE(status) || info->handle == NULL) From 39a8fc4971a00d22536aeb7d446ee4a97810611b Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Sat, 18 Dec 2021 13:39:25 +0100 Subject: [PATCH 187/251] ALSA: rawmidi - fix the uninitalized user_pversion The user_pversion was uninitialized for the user space file structure in the open function, because the file private structure use kmalloc for the allocation. The kernel ALSA sequencer code clears the file structure, so no additional fixes are required. Cc: stable@kernel.org Cc: broonie@kernel.org BugLink: https://github.com/alsa-project/alsa-lib/issues/178 Fixes: 09d23174402d ("ALSA: rawmidi: introduce SNDRV_RAWMIDI_IOCTL_USER_PVERSION") Reported-by: syzbot+88412ee8811832b00dbe@syzkaller.appspotmail.com Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20211218123925.2583847-1-perex@perex.cz Signed-off-by: Takashi Iwai --- sound/core/rawmidi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 6f30231bdb88..befa9809ff00 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -447,6 +447,7 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file) err = -ENOMEM; goto __error; } + rawmidi_file->user_pversion = 0; init_waitqueue_entry(&wait, current); add_wait_queue(&rmidi->open_wait, &wait); while (1) { From edca7cc4b0accfa69dc032442fe0684e59c691b8 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Wed, 15 Dec 2021 20:16:46 +0100 Subject: [PATCH 188/251] ALSA: hda/realtek: Fix quirk for Clevo NJ51CU The Clevo NJ51CU comes either with the ALC293 or the ALC256 codec, but uses the 0x8686 subproduct id in both cases. The ALC256 codec needs a different quirk for the headset microphone working and and edditional quirk for sound working after suspend and resume. When waking up from s3 suspend the Coef 0x10 is set to 0x0220 instead of 0x0020 on the ALC256 codec. Setting the value manually makes the sound work again. This patch does this automatically. [ minor coding style fix by tiwai ] Signed-off-by: Werner Sembach Fixes: b5acfe152abaa ("ALSA: hda/realtek: Add some Clove SSID in the ALC293(ALC1220)") Cc: Link: https://lore.kernel.org/r/20211215191646.844644-1-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e59ff75eea75..28255e752c4a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6546,6 +6546,23 @@ static void alc233_fixup_no_audio_jack(struct hda_codec *codec, alc_process_coef_fw(codec, alc233_fixup_no_audio_jack_coefs); } +static void alc256_fixup_mic_no_presence_and_resume(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + /* + * The Clevo NJ51CU comes either with the ALC293 or the ALC256 codec, + * but uses the 0x8686 subproduct id in both cases. The ALC256 codec + * needs an additional quirk for sound working after suspend and resume. + */ + if (codec->core.vendor_id == 0x10ec0256) { + alc_update_coef_idx(codec, 0x10, 1<<9, 0); + snd_hda_codec_set_pincfg(codec, 0x19, 0x04a11120); + } else { + snd_hda_codec_set_pincfg(codec, 0x1a, 0x04a1113c); + } +} + enum { ALC269_FIXUP_GPIO2, ALC269_FIXUP_SONY_VAIO, @@ -6766,6 +6783,7 @@ enum { ALC256_FIXUP_SET_COEF_DEFAULTS, ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, ALC233_FIXUP_NO_AUDIO_JACK, + ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME, }; static const struct hda_fixup alc269_fixups[] = { @@ -8490,6 +8508,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc233_fixup_no_audio_jack, }, + [ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc256_fixup_mic_no_presence_and_resume, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8831,7 +8855,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[57][0-9]RZ[Q]", ALC269_FIXUP_DMIC), SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME), SND_PCI_QUIRK(0x1558, 0x8a20, "Clevo NH55DCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8a51, "Clevo NH70RCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8d50, "Clevo NH55RCQ-M", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), From 7b9762a5e8837b92a027d58d396a9d27f6440c36 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 22 Dec 2021 20:26:56 -0700 Subject: [PATCH 189/251] io_uring: zero iocb->ki_pos for stream file types io_uring supports using offset == -1 for using the current file position, and we read that in as part of read/write command setup. For the non-iter read/write types we pass in NULL for the position pointer, but for the iter types we should not be passing any anything but 0 for the position for a stream. Clear kiocb->ki_pos if the file is a stream, don't leave it as -1. If we do, then the request will error with -ESPIPE. Fixes: ba04291eb66e ("io_uring: allow use of offset == -1 to mean file position") Link: https://github.com/axboe/liburing/discussions/501 Reported-by: Samuel Williams Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d5ab0e9a3f29..fb2a0cb4aaf8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2891,9 +2891,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; kiocb->ki_pos = READ_ONCE(sqe->off); - if (kiocb->ki_pos == -1 && !(file->f_mode & FMODE_STREAM)) { - req->flags |= REQ_F_CUR_POS; - kiocb->ki_pos = file->f_pos; + if (kiocb->ki_pos == -1) { + if (!(file->f_mode & FMODE_STREAM)) { + req->flags |= REQ_F_CUR_POS; + kiocb->ki_pos = file->f_pos; + } else { + kiocb->ki_pos = 0; + } } kiocb->ki_flags = iocb_flags(file); ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); From 6b8b42585886c59a008015083282aae434349094 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 22 Dec 2021 06:54:53 +0000 Subject: [PATCH 190/251] net/mlx5: DR, Fix NULL vs IS_ERR checking in dr_domain_init_resources The mlx5_get_uars_page() function returns error pointers. Using IS_ERR() to check the return value to fix this. Fixes: 4ec9e7b02697 ("net/mlx5: DR, Expose steering domain functionality") Signed-off-by: Miaoqian Lin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 8cbd36c82b3b..f6e6d9209766 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include +#include #include "dr_types.h" #define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ @@ -72,9 +73,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn) } dmn->uar = mlx5_get_uars_page(dmn->mdev); - if (!dmn->uar) { + if (IS_ERR(dmn->uar)) { mlx5dr_err(dmn, "Couldn't allocate UAR\n"); - ret = -ENOMEM; + ret = PTR_ERR(dmn->uar); goto clean_pd; } From 624bf42c2e3930acca9fcfc340b2fa38e712da84 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 12 Dec 2021 16:19:58 +0200 Subject: [PATCH 191/251] net/mlx5: DR, Fix querying eswitch manager vport for ECPF On BlueField the E-Switch manager is the ECPF (vport 0xFFFE), but when querying capabilities of ECPF eswitch manager, need to query vport 0 with other_vport = 0. Fixes: 9091b821aaa4 ("net/mlx5: DR, Handle eswitch manager and uplink vports separately") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index f6e6d9209766..c54cc45f63dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -164,9 +164,7 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn, static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn) { - return dr_domain_query_vport(dmn, - dmn->info.caps.is_ecpf ? MLX5_VPORT_ECPF : 0, - false, + return dr_domain_query_vport(dmn, 0, false, &dmn->info.caps.vports.esw_manager_caps); } From 26a7993c93a74a3fee83a37b46e00e69e49e57c2 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 26 Oct 2021 08:25:19 +0300 Subject: [PATCH 192/251] net/mlx5: Use first online CPU instead of hard coded CPU Hard coded CPU (0 in our case) might be offline. Hence, use the first online CPU instead. Fixes: f891b7cdbdcd ("net/mlx5: Enable single IRQ for PCI Function") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 830444f927d4..0e84c005d160 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -398,7 +398,7 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, cpumask_copy(irq->mask, affinity); if (!irq_pool_is_sf_pool(pool) && !pool->xa_num_irqs.max && cpumask_empty(irq->mask)) - cpumask_set_cpu(0, irq->mask); + cpumask_set_cpu(cpumask_first(cpu_online_mask), irq->mask); irq_set_affinity_hint(irq->irqn, irq->mask); unlock: mutex_unlock(&pool->lock); From aa968f922039706f6d13e8870b49e424d0a8d9ad Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 24 Nov 2021 23:10:57 +0200 Subject: [PATCH 193/251] net/mlx5: Fix error print in case of IRQ request failed In case IRQ layer failed to find or to request irq, the driver is printing the first cpu of the provided affinity as part of the error print. Empty affinity is a valid input for the IRQ layer, and it is an error to call cpumask_first() on empty affinity. Remove the first cpu print from the error message. Fixes: c36326d38d93 ("net/mlx5: Round-Robin EQs over IRQs") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 0e84c005d160..bcee30f5de0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -356,8 +356,8 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool, new_irq = irq_pool_create_irq(pool, affinity); if (IS_ERR(new_irq)) { if (!least_loaded_irq) { - mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n", - cpumask_first(affinity)); + mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n", + PTR_ERR(new_irq)); mutex_unlock(&pool->lock); return new_irq; } From 33de865f7bce3968676e43b0182af0a2dd359dae Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 23 Nov 2021 20:08:13 +0200 Subject: [PATCH 194/251] net/mlx5: Fix SF health recovery flow SF do not directly control the PCI device. During recovery flow SF should not be allowed to do pci disable or pci reset, its PF will do it. It fixes the following kernel trace: mlx5_core.sf mlx5_core.sf.25: mlx5_health_try_recover:387:(pid 40948): starting health recovery flow mlx5_core 0000:03:00.0: mlx5_pci_slot_reset was called mlx5_core 0000:03:00.0: wait vital counter value 0xab175 after 1 iterations mlx5_core.sf mlx5_core.sf.25: firmware version: 24.32.532 mlx5_core.sf mlx5_core.sf.23: mlx5_health_try_recover:387:(pid 40946): starting health recovery flow mlx5_core 0000:03:00.0: mlx5_pci_slot_reset was called mlx5_core 0000:03:00.0: wait vital counter value 0xab193 after 1 iterations mlx5_core.sf mlx5_core.sf.23: firmware version: 24.32.532 mlx5_core.sf mlx5_core.sf.25: mlx5_cmd_check:813:(pid 40948): ENABLE_HCA(0x104) op_mod(0x0) failed, status bad resource state(0x9), syndrome (0x658908) mlx5_core.sf mlx5_core.sf.25: mlx5_function_setup:1292:(pid 40948): enable hca failed mlx5_core.sf mlx5_core.sf.25: mlx5_health_try_recover:389:(pid 40948): health recovery failed Fixes: 1958fc2f0712 ("net/mlx5: SF, Add auxiliary device driver") Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 7df9c7f8d9c8..65083496f913 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1809,12 +1809,13 @@ void mlx5_disable_device(struct mlx5_core_dev *dev) int mlx5_recover_device(struct mlx5_core_dev *dev) { - int ret = -EIO; + if (!mlx5_core_is_sf(dev)) { + mlx5_pci_disable_device(dev); + if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED) + return -EIO; + } - mlx5_pci_disable_device(dev); - if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED) - ret = mlx5_load_one(dev); - return ret; + return mlx5_load_one(dev); } static struct pci_driver mlx5_core_driver = { From d671e109bd8548d067b27e39e183a484430bf102 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 14 Dec 2021 03:52:53 +0200 Subject: [PATCH 195/251] net/mlx5: Fix tc max supported prio for nic mode Only prio 1 is supported if firmware doesn't support ignore flow level for nic mode. The offending commit removed the check wrongly. Add it back. Fixes: 9a99c8f1253a ("net/mlx5e: E-Switch, Offload all chain 0 priorities when modify header and forward action is not supported") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index 97e5845b4cfd..d5e47630e284 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -121,6 +121,9 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) { + if (!mlx5_chains_prios_supported(chains)) + return 1; + if (mlx5_chains_ignore_flow_level_supported(chains)) return UINT_MAX; From 918fc3855a6507a200e9cf22c20be852c0982687 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Tue, 30 Nov 2021 16:05:44 +0200 Subject: [PATCH 196/251] net/mlx5e: Wrap the tx reporter dump callback to extract the sq Function mlx5e_tx_reporter_dump_sq() casts its void * argument to struct mlx5e_txqsq *, but in TX-timeout-recovery flow the argument is actually of type struct mlx5e_tx_timeout_ctx *. mlx5_core 0000:08:00.1 enp8s0f1: TX timeout detected mlx5_core 0000:08:00.1 enp8s0f1: TX timeout on queue: 1, SQ: 0x11ec, CQ: 0x146d, SQ Cons: 0x0 SQ Prod: 0x1, usecs since last trans: 21565000 BUG: stack guard page was hit at 0000000093f1a2de (stack is 00000000b66ea0dc..000000004d932dae) kernel stack overflow (page fault): 0000 [#1] SMP NOPTI CPU: 5 PID: 95 Comm: kworker/u20:1 Tainted: G W OE 5.13.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core] RIP: 0010:mlx5e_tx_reporter_dump_sq+0xd3/0x180 [mlx5_core] Call Trace: mlx5e_tx_reporter_dump+0x43/0x1c0 [mlx5_core] devlink_health_do_dump.part.91+0x71/0xd0 devlink_health_report+0x157/0x1b0 mlx5e_reporter_tx_timeout+0xb9/0xf0 [mlx5_core] ? mlx5e_tx_reporter_err_cqe_recover+0x1d0/0x1d0 [mlx5_core] ? mlx5e_health_queue_dump+0xd0/0xd0 [mlx5_core] ? update_load_avg+0x19b/0x550 ? set_next_entity+0x72/0x80 ? pick_next_task_fair+0x227/0x340 ? finish_task_switch+0xa2/0x280 mlx5e_tx_timeout_work+0x83/0xb0 [mlx5_core] process_one_work+0x1de/0x3a0 worker_thread+0x2d/0x3c0 ? process_one_work+0x3a0/0x3a0 kthread+0x115/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x1f/0x30 --[ end trace 51ccabea504edaff ]--- RIP: 0010:mlx5e_tx_reporter_dump_sq+0xd3/0x180 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled end Kernel panic - not syncing: Fatal exception To fix this bug add a wrapper for mlx5e_tx_reporter_dump_sq() which extracts the sq from struct mlx5e_tx_timeout_ctx and set it as the TX-timeout-recovery flow dump callback. Fixes: 5f29458b77d5 ("net/mlx5e: Support dump callback in TX reporter") Signed-off-by: Aya Levin Signed-off-by: Amir Tzin Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 4f4bc8726ec4..614cd9477600 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -466,6 +466,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } +static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_tx_timeout_ctx *to_ctx = ctx; + + return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); +} + static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { @@ -561,7 +569,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) to_ctx.sq = sq; err_ctx.ctx = &to_ctx; err_ctx.recover = mlx5e_tx_reporter_timeout_recover; - err_ctx.dump = mlx5e_tx_reporter_dump_sq; + err_ctx.dump = mlx5e_tx_reporter_timeout_dump; snprintf(err_str, sizeof(err_str), "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, From a0cb909644c36230a3c48904d14b91732de79fc0 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 13 Dec 2021 11:05:11 +0200 Subject: [PATCH 197/251] net/mlx5e: Fix skb memory leak when TC classifier action offloads are disabled When TC classifier action offloads are disabled (CONFIG_MLX5_CLS_ACT in Kconfig), the mlx5e_rep_tc_receive() function which is responsible for passing the skb to the stack (or freeing it) is defined as a nop, and results in leaking the skb memory. Replace the nop with a call to napi_gro_receive() to resolve the leak. Fixes: 28e7606fa8f1 ("net/mlx5e: Refactor rx handler of represetor device") Signed-off-by: Gal Pressman Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h index d6c7c81690eb..7c9dd3a75f8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -66,7 +66,7 @@ mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, static inline void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, - struct sk_buff *skb) {} + struct sk_buff *skb) { napi_gro_receive(rq->cq.napi, skb); } #endif /* CONFIG_MLX5_CLS_ACT */ From 17958d7cd731b977ae7d4af38d891c3a1235b5f1 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 12 Oct 2021 19:40:09 +0300 Subject: [PATCH 198/251] net/mlx5e: Fix interoperability between XSK and ICOSQ recovery flow Both regular RQ and XSKRQ use the same ICOSQ for UMRs. When doing recovery for the ICOSQ, don't forget to deactivate XSKRQ. XSK can be opened and closed while channels are active, so a new mutex prevents the ICOSQ recovery from running at the same time. The ICOSQ recovery deactivates and reactivates XSKRQ, so any parallel change in XSK state would break consistency. As the regular RQ is running, it's not enough to just flush the recovery work, because it can be rescheduled. Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ .../ethernet/mellanox/mlx5/core/en/health.h | 2 ++ .../mellanox/mlx5/core/en/reporter_rx.c | 35 ++++++++++++++++++- .../mellanox/mlx5/core/en/xsk/setup.c | 16 ++++++++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 7 ++-- 5 files changed, 58 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f0ac6b0d9653..f42067adc79d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -783,6 +783,8 @@ struct mlx5e_channel { DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int cpu; + /* Sync between icosq recovery and XSK enable/disable. */ + struct mutex icosq_recovery_lock; }; struct mlx5e_ptp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index d5b7110a4265..0107e4e73bb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -30,6 +30,8 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c); +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c); #define MLX5E_REPORTER_PER_Q_MAX_LEN 256 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 74086eb556ae..2684e9da9f41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -62,6 +62,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) { + struct mlx5e_rq *xskrq = NULL; struct mlx5_core_dev *mdev; struct mlx5e_icosq *icosq; struct net_device *dev; @@ -70,7 +71,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) int err; icosq = ctx; + + mutex_lock(&icosq->channel->icosq_recovery_lock); + + /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */ rq = &icosq->channel->rq; + if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state)) + xskrq = &icosq->channel->xskrq; mdev = icosq->channel->mdev; dev = icosq->channel->netdev; err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state); @@ -84,6 +91,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_deactivate_rq(rq); + if (xskrq) + mlx5e_deactivate_rq(xskrq); + err = mlx5e_wait_for_icosq_flush(icosq); if (err) goto out; @@ -97,15 +107,28 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_reset_icosq_cc_pc(icosq); + mlx5e_free_rx_in_progress_descs(rq); + if (xskrq) + mlx5e_free_rx_in_progress_descs(xskrq); + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); mlx5e_activate_icosq(icosq); - mlx5e_activate_rq(rq); + mlx5e_activate_rq(rq); rq->stats->recover++; + + if (xskrq) { + mlx5e_activate_rq(xskrq); + xskrq->stats->recover++; + } + + mutex_unlock(&icosq->channel->icosq_recovery_lock); + return 0; out: clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mutex_unlock(&icosq->channel->icosq_recovery_lock); return err; } @@ -706,6 +729,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); } +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c) +{ + mutex_lock(&c->icosq_recovery_lock); +} + +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c) +{ + mutex_unlock(&c->icosq_recovery_lock); +} + static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { .name = "rx", .recover = mlx5e_rx_reporter_recover, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 538bc2419bd8..8526a5fbbf0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -4,6 +4,7 @@ #include "setup.h" #include "en/params.h" #include "en/txrx.h" +#include "en/health.h" /* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may * change unexpectedly, and mlx5e has a minimum valid stride size for striding @@ -170,7 +171,13 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) void mlx5e_activate_xsk(struct mlx5e_channel *c) { + /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid + * activating XSKRQ in the middle of recovery. + */ + mlx5e_reporter_icosq_suspend_recovery(c); set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + /* TX queue is created active. */ spin_lock_bh(&c->async_icosq_lock); @@ -180,6 +187,13 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c) void mlx5e_deactivate_xsk(struct mlx5e_channel *c) { - mlx5e_deactivate_rq(&c->xskrq); + /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the + * middle of recovery. Suspend the recovery to avoid it. + */ + mlx5e_reporter_icosq_suspend_recovery(c); + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ + /* TX queue is disabled on close. */ } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 65571593ec5c..a572fc9690ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1087,8 +1087,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); - if (rq->icosq) - cancel_work_sync(&rq->icosq->recover_work); cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); @@ -2088,6 +2086,8 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, if (err) goto err_close_xdpsq_cq; + mutex_init(&c->icosq_recovery_lock); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); if (err) goto err_close_async_icosq; @@ -2156,9 +2156,12 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) mlx5e_close_xdpsq(&c->xdpsq); if (c->xdp) mlx5e_close_xdpsq(&c->rq_xdpsq); + /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */ + cancel_work_sync(&c->icosq.recover_work); mlx5e_close_rq(&c->rq); mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); + mutex_destroy(&c->icosq_recovery_lock); mlx5e_close_icosq(&c->async_icosq); if (c->xdp) mlx5e_close_cq(&c->rq_xdpsq.cq); From 19c4aba2d4e23997061fb11aed8a3e41334bfa14 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 22 Jul 2020 16:32:44 +0300 Subject: [PATCH 199/251] net/mlx5e: Fix ICOSQ recovery flow for XSK There are two ICOSQs per channel: one is needed for RX, and the other for async operations (XSK TX, kTLS offload). Currently, the recovery flow for both is the same, and async ICOSQ is mistakenly treated like the regular ICOSQ. This patch prevents running the regular ICOSQ recovery on async ICOSQ. The purpose of async ICOSQ is to handle XSK wakeup requests and post kTLS offload RX parameters, it has nothing to do with RQ and XSKRQ UMRs, so the regular recovery sequence is not applicable here. Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 -- .../net/ethernet/mellanox/mlx5/core/en_main.c | 30 ++++++++++++++----- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f42067adc79d..b47a0d3ef22f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1016,9 +1016,6 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param); void mlx5e_destroy_rq(struct mlx5e_rq *rq); struct mlx5e_sq_param; -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq); -void mlx5e_close_icosq(struct mlx5e_icosq *sq); int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, struct mlx5e_xdpsq *sq, bool is_redirect); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a572fc9690ed..3b0f3a831216 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1214,9 +1214,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_icosq_cqe_err(sq); } +static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, + recover_work); + + /* Not implemented yet. */ + + netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n"); +} + static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5e_sq_param *param, - struct mlx5e_icosq *sq) + struct mlx5e_icosq *sq, + work_func_t recover_work_func) { void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); struct mlx5_core_dev *mdev = c->mdev; @@ -1237,7 +1248,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work); + INIT_WORK(&sq->recover_work, recover_work_func); return 0; @@ -1573,13 +1584,14 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_tx_err_cqe(sq); } -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) +static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq, + work_func_t recover_work_func) { struct mlx5e_create_sq_param csp = {}; int err; - err = mlx5e_alloc_icosq(c, param, sq); + err = mlx5e_alloc_icosq(c, param, sq, recover_work_func); if (err) return err; @@ -1618,7 +1630,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) synchronize_net(); /* Sync with NAPI. */ } -void mlx5e_close_icosq(struct mlx5e_icosq *sq) +static void mlx5e_close_icosq(struct mlx5e_icosq *sq) { struct mlx5e_channel *c = sq->channel; @@ -2082,13 +2094,15 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, spin_lock_init(&c->async_icosq_lock); - err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq); + err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq, + mlx5e_async_icosq_err_cqe_work); if (err) goto err_close_xdpsq_cq; mutex_init(&c->icosq_recovery_lock); - err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq, + mlx5e_icosq_err_cqe_work); if (err) goto err_close_async_icosq; From 2820110d945923ab2f4901753e4ccbb2a506fa8e Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 2 Dec 2021 11:18:02 +0800 Subject: [PATCH 200/251] net/mlx5e: Delete forward rule for ct or sample action When there is ct or sample action, the ct or sample rule will be deleted and return. But if there is an extra mirror action, the forward rule can't be deleted because of the return. Fix it by removing the return. Fixes: 69e2916ebce4 ("net/mlx5: CT: Add support for mirroring") Fixes: f94d6389f6a8 ("net/mlx5e: TC, Add support to offload sample action") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3d45f4ae80c0..f633448c3cc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1196,21 +1196,16 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) goto offload_rule_0; - if (flow_flag_test(flow, CT)) { - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); - return; - } - - if (flow_flag_test(flow, SAMPLE)) { - mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); - return; - } - if (attr->esw_attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); + if (flow_flag_test(flow, CT)) + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); + else if (flow_flag_test(flow, SAMPLE)) + mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); + else offload_rule_0: - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); } struct mlx5_flow_handle * From 4390c6edc0fb390e699d0f886f45575dfeafeb4b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 6 Nov 2021 18:08:11 +0100 Subject: [PATCH 201/251] net/mlx5: Fix some error handling paths in 'mlx5e_tc_add_fdb_flow()' All the error handling paths of 'mlx5e_tc_add_fdb_flow()' end to 'err_out' where 'flow_flag_set(flow, FAILED);' is called. All but the new error handling paths added by the commits given in the Fixes tag below. Fix these error handling paths and branch to 'err_out'. Fixes: 166f431ec6be ("net/mlx5e: Add indirect tc offload of ovs internal port") Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device") Signed-off-by: Christophe JAILLET Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed (cherry picked from commit 31108d142f3632970f6f3e0224bd1c6781c9f87d) --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f633448c3cc7..a60c7680fd2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1440,7 +1440,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, metadata); if (err) - return err; + goto err_out; } } @@ -1456,13 +1456,15 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (attr->chain) { NL_SET_ERR_MSG_MOD(extack, "Internal port rule is only supported on chain 0"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } if (attr->dest_chain) { NL_SET_ERR_MSG_MOD(extack, "Internal port rule offload doesn't support goto action"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), @@ -1470,8 +1472,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, flow_flag_test(flow, EGRESS) ? MLX5E_TC_INT_PORT_EGRESS : MLX5E_TC_INT_PORT_INGRESS); - if (IS_ERR(int_port)) - return PTR_ERR(int_port); + if (IS_ERR(int_port)) { + err = PTR_ERR(int_port); + goto err_out; + } esw_attr->int_port = int_port; } From c4499272566d677075c6a84f46baeb826a6a7182 Mon Sep 17 00:00:00 2001 From: Tim Crawford Date: Wed, 22 Dec 2021 11:51:54 -0700 Subject: [PATCH 202/251] platform/x86: system76_acpi: Guard System76 EC specific functionality Certain functionality or its implementation in System76 EC firmware may be different to the proprietary ODM EC firmware. Introduce a new bool, `has_open_ec`, to guard our specific logic. Detect the use of this by looking for a custom ACPI method name used in System76 firmware. Signed-off-by: Tim Crawford Link: https://lore.kernel.org/r/20211222185154.4560-1-tcrawford@system76.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/system76_acpi.c | 58 ++++++++++++++-------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/drivers/platform/x86/system76_acpi.c b/drivers/platform/x86/system76_acpi.c index 8b292ee95a14..7299ad08c838 100644 --- a/drivers/platform/x86/system76_acpi.c +++ b/drivers/platform/x86/system76_acpi.c @@ -35,6 +35,7 @@ struct system76_data { union acpi_object *nfan; union acpi_object *ntmp; struct input_dev *input; + bool has_open_ec; }; static const struct acpi_device_id device_ids[] = { @@ -279,20 +280,12 @@ static struct acpi_battery_hook system76_battery_hook = { static void system76_battery_init(void) { - acpi_handle handle; - - handle = ec_get_handle(); - if (handle && acpi_has_method(handle, "GBCT")) - battery_hook_register(&system76_battery_hook); + battery_hook_register(&system76_battery_hook); } static void system76_battery_exit(void) { - acpi_handle handle; - - handle = ec_get_handle(); - if (handle && acpi_has_method(handle, "GBCT")) - battery_hook_unregister(&system76_battery_hook); + battery_hook_unregister(&system76_battery_hook); } // Get the airplane mode LED brightness @@ -673,6 +666,10 @@ static int system76_add(struct acpi_device *acpi_dev) acpi_dev->driver_data = data; data->acpi_dev = acpi_dev; + // Some models do not run open EC firmware. Check for an ACPI method + // that only exists on open EC to guard functionality specific to it. + data->has_open_ec = acpi_has_method(acpi_device_handle(data->acpi_dev), "NFAN"); + err = system76_get(data, "INIT"); if (err) return err; @@ -718,27 +715,31 @@ static int system76_add(struct acpi_device *acpi_dev) if (err) goto error; - err = system76_get_object(data, "NFAN", &data->nfan); - if (err) - goto error; + if (data->has_open_ec) { + err = system76_get_object(data, "NFAN", &data->nfan); + if (err) + goto error; - err = system76_get_object(data, "NTMP", &data->ntmp); - if (err) - goto error; + err = system76_get_object(data, "NTMP", &data->ntmp); + if (err) + goto error; - data->therm = devm_hwmon_device_register_with_info(&acpi_dev->dev, - "system76_acpi", data, &thermal_chip_info, NULL); - err = PTR_ERR_OR_ZERO(data->therm); - if (err) - goto error; + data->therm = devm_hwmon_device_register_with_info(&acpi_dev->dev, + "system76_acpi", data, &thermal_chip_info, NULL); + err = PTR_ERR_OR_ZERO(data->therm); + if (err) + goto error; - system76_battery_init(); + system76_battery_init(); + } return 0; error: - kfree(data->ntmp); - kfree(data->nfan); + if (data->has_open_ec) { + kfree(data->ntmp); + kfree(data->nfan); + } return err; } @@ -749,14 +750,15 @@ static int system76_remove(struct acpi_device *acpi_dev) data = acpi_driver_data(acpi_dev); - system76_battery_exit(); + if (data->has_open_ec) { + system76_battery_exit(); + kfree(data->nfan); + kfree(data->ntmp); + } devm_led_classdev_unregister(&acpi_dev->dev, &data->ap_led); devm_led_classdev_unregister(&acpi_dev->dev, &data->kb_led); - kfree(data->nfan); - kfree(data->ntmp); - system76_get(data, "FINI"); return 0; From 4f6c131c3c31b9f68470ebd01320d5403d8719bb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Dec 2021 21:49:41 +0200 Subject: [PATCH 203/251] platform/x86/intel: Remove X86_PLATFORM_DRIVERS_INTEL While introduction of this menu brings a nice view in the configuration tools, it brought more issues than solves, i.e. it prevents to locate files in the intel/ subfolder without touching non-related Kconfig dependencies elsewhere. Drop X86_PLATFORM_DRIVERS_INTEL altogether. Note, on x86 it's enabled by default and it's quite unlikely anybody wants to disable all of the modules in this submenu. Fixes: 8bd836feb6ca ("platform/x86: intel_skl_int3472: Move to intel/ subfolder") Suggested-by: Hans de Goede Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211222194941.76054-1-andriy.shevchenko@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/Makefile | 2 +- drivers/platform/x86/intel/Kconfig | 15 --------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index 219478061683..253a096b5dd8 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -68,7 +68,7 @@ obj-$(CONFIG_THINKPAD_ACPI) += thinkpad_acpi.o obj-$(CONFIG_THINKPAD_LMI) += think-lmi.o # Intel -obj-$(CONFIG_X86_PLATFORM_DRIVERS_INTEL) += intel/ +obj-y += intel/ # MSI obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o diff --git a/drivers/platform/x86/intel/Kconfig b/drivers/platform/x86/intel/Kconfig index 38ce3e344589..40096b25994a 100644 --- a/drivers/platform/x86/intel/Kconfig +++ b/drivers/platform/x86/intel/Kconfig @@ -3,19 +3,6 @@ # Intel x86 Platform Specific Drivers # -menuconfig X86_PLATFORM_DRIVERS_INTEL - bool "Intel x86 Platform Specific Device Drivers" - default y - help - Say Y here to get to see options for device drivers for - various Intel x86 platforms, including vendor-specific - drivers. This option alone does not add any kernel code. - - If you say N, all options in this submenu will be skipped - and disabled. - -if X86_PLATFORM_DRIVERS_INTEL - source "drivers/platform/x86/intel/atomisp2/Kconfig" source "drivers/platform/x86/intel/int1092/Kconfig" source "drivers/platform/x86/intel/int33fe/Kconfig" @@ -183,5 +170,3 @@ config INTEL_UNCORE_FREQ_CONTROL To compile this driver as a module, choose M here: the module will be called intel-uncore-frequency. - -endif # X86_PLATFORM_DRIVERS_INTEL From 26a8b09437804fabfb1db080d676b96c0de68e7c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Dec 2021 11:50:23 +0100 Subject: [PATCH 204/251] platform/x86: intel_pmc_core: fix memleak on registration failure In case device registration fails during module initialisation, the platform device structure needs to be freed using platform_device_put() to properly free all resources (e.g. the device name). Fixes: 938835aa903a ("platform/x86: intel_pmc_core: do not create a static struct device") Cc: stable@vger.kernel.org # 5.9 Signed-off-by: Johan Hovold Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20211222105023.6205-1-johan@kernel.org Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/pmc/pltdrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/pmc/pltdrv.c b/drivers/platform/x86/intel/pmc/pltdrv.c index 73797680b895..15ca8afdd973 100644 --- a/drivers/platform/x86/intel/pmc/pltdrv.c +++ b/drivers/platform/x86/intel/pmc/pltdrv.c @@ -65,7 +65,7 @@ static int __init pmc_core_platform_init(void) retval = platform_device_register(pmc_core_device); if (retval) - kfree(pmc_core_device); + platform_device_put(pmc_core_device); return retval; } From 736ef37fd9a44f5966e25319d08ff7ea99ac79e8 Mon Sep 17 00:00:00 2001 From: Coco Li Date: Thu, 23 Dec 2021 22:24:40 +0000 Subject: [PATCH 205/251] udp: using datalen to cap ipv6 udp max gso segments The max number of UDP gso segments is intended to cap to UDP_MAX_SEGMENTS, this is checked in udp_send_skb(). skb->len contains network and transport header len here, we should use only data len instead. This is the ipv6 counterpart to the below referenced commit, which missed the ipv6 change Fixes: 158390e45612 ("udp: using datalen to cap max gso segments") Signed-off-by: Coco Li Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211223222441.2975883-1-lixiaoyan@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a2caca6ccf11..8cde9efd7919 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1204,7 +1204,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } From 5471d5226c3b39b3d2f7011c082d5715795bd65c Mon Sep 17 00:00:00 2001 From: Coco Li Date: Thu, 23 Dec 2021 22:24:41 +0000 Subject: [PATCH 206/251] selftests: Calculate udpgso segment count without header adjustment The below referenced commit correctly updated the computation of number of segments (gso_size) by using only the gso payload size and removing the header lengths. With this change the regression test started failing. Update the tests to match this new behavior. Both IPv4 and IPv6 tests are updated, as a separate patch in this series will update udp_v6_send_skb to match this change in udp_send_skb. Fixes: 158390e45612 ("udp: using datalen to cap max gso segments") Signed-off-by: Coco Li Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211223222441.2975883-2-lixiaoyan@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index c66da6ffd6d8..7badaf215de2 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -156,13 +156,13 @@ struct testcase testcases_v4[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, @@ -259,13 +259,13 @@ struct testcase testcases_v6[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, From 4eb1782eaa9fa1c224ad1fa0d13a9f09c3ab2d80 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 23 Dec 2021 17:43:14 +0100 Subject: [PATCH 207/251] recordmcount.pl: fix typo in s390 mcount regex Commit 85bf17b28f97 ("recordmcount.pl: look for jgnop instruction as well as bcrl on s390") added a new alternative mnemonic for the existing brcl instruction. This is required for the combination old gcc version (pre 9.0) and binutils since version 2.37. However at the same time this commit introduced a typo, replacing brcl with bcrl. As a result no mcount locations are detected anymore with old gcc versions (pre 9.0) and binutils before version 2.37. Fix this by using the correct mnemonic again. Reported-by: Miroslav Benes Cc: Jerome Marchand Cc: Fixes: 85bf17b28f97 ("recordmcount.pl: look for jgnop instruction as well as bcrl on s390") Link: https://lore.kernel.org/r/alpine.LSU.2.21.2112230949520.19849@pobox.suse.cz Signed-off-by: Heiko Carstens --- scripts/recordmcount.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 52a000b057a5..3ccb2c70add4 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -219,7 +219,7 @@ if ($arch eq "x86_64") { } elsif ($arch eq "s390" && $bits == 64) { if ($cc =~ /-DCC_USING_HOTPATCH/) { - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(bcrl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$"; + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(brcl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$"; $mcount_adjust = 0; } $alignment = 8; From b45396afa4177f2b1ddfeff7185da733fade1dc3 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 24 Dec 2021 02:14:59 +0000 Subject: [PATCH 208/251] net: phy: fixed_phy: Fix NULL vs IS_ERR() checking in __fixed_phy_register The fixed_phy_get_gpiod function() returns NULL, it doesn't return error pointers, using NULL checking to fix this.i Fixes: 5468e82f7034 ("net: phy: fixed-phy: Drop GPIO from fixed_phy_add()") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20211224021500.10362-1-linmq006@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/fixed_phy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index c65fb5f5d2dc..a0c256bd5441 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -239,8 +239,8 @@ static struct phy_device *__fixed_phy_register(unsigned int irq, /* Check if we have a GPIO associated with this fixed phy */ if (!gpiod) { gpiod = fixed_phy_get_gpiod(np); - if (IS_ERR(gpiod)) - return ERR_CAST(gpiod); + if (!gpiod) + return ERR_PTR(-EINVAL); } /* Get the next available PHY address, up to PHY_MAX_ADDR */ From 5ec7d18d1813a5bead0b495045606c93873aecbb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 23 Dec 2021 13:04:30 -0500 Subject: [PATCH 209/251] sctp: use call_rcu to free endpoint This patch is to delay the endpoint free by calling call_rcu() to fix another use-after-free issue in sctp_sock_dump(): BUG: KASAN: use-after-free in __lock_acquire+0x36d9/0x4c20 Call Trace: __lock_acquire+0x36d9/0x4c20 kernel/locking/lockdep.c:3218 lock_acquire+0x1ed/0x520 kernel/locking/lockdep.c:3844 __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline] _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:168 spin_lock_bh include/linux/spinlock.h:334 [inline] __lock_sock+0x203/0x350 net/core/sock.c:2253 lock_sock_nested+0xfe/0x120 net/core/sock.c:2774 lock_sock include/net/sock.h:1492 [inline] sctp_sock_dump+0x122/0xb20 net/sctp/diag.c:324 sctp_for_each_transport+0x2b5/0x370 net/sctp/socket.c:5091 sctp_diag_dump+0x3ac/0x660 net/sctp/diag.c:527 __inet_diag_dump+0xa8/0x140 net/ipv4/inet_diag.c:1049 inet_diag_dump+0x9b/0x110 net/ipv4/inet_diag.c:1065 netlink_dump+0x606/0x1080 net/netlink/af_netlink.c:2244 __netlink_dump_start+0x59a/0x7c0 net/netlink/af_netlink.c:2352 netlink_dump_start include/linux/netlink.h:216 [inline] inet_diag_handler_cmd+0x2ce/0x3f0 net/ipv4/inet_diag.c:1170 __sock_diag_cmd net/core/sock_diag.c:232 [inline] sock_diag_rcv_msg+0x31d/0x410 net/core/sock_diag.c:263 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2477 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:274 This issue occurs when asoc is peeled off and the old sk is freed after getting it by asoc->base.sk and before calling lock_sock(sk). To prevent the sk free, as a holder of the sk, ep should be alive when calling lock_sock(). This patch uses call_rcu() and moves sock_put and ep free into sctp_endpoint_destroy_rcu(), so that it's safe to try to hold the ep under rcu_read_lock in sctp_transport_traverse_process(). If sctp_endpoint_hold() returns true, it means this ep is still alive and we have held it and can continue to dump it; If it returns false, it means this ep is dead and can be freed after rcu_read_unlock, and we should skip it. In sctp_sock_dump(), after locking the sk, if this ep is different from tsp->asoc->ep, it means during this dumping, this asoc was peeled off before calling lock_sock(), and the sk should be skipped; If this ep is the same with tsp->asoc->ep, it means no peeloff happens on this asoc, and due to lock_sock, no peeloff will happen either until release_sock. Note that delaying endpoint free won't delay the port release, as the port release happens in sctp_endpoint_destroy() before calling call_rcu(). Also, freeing endpoint by call_rcu() makes it safe to access the sk by asoc->base.sk in sctp_assocs_seq_show() and sctp_rcv(). Thanks Jones to bring this issue up. v1->v2: - improve the changelog. - add kfree(ep) into sctp_endpoint_destroy_rcu(), as Jakub noticed. Reported-by: syzbot+9276d76e83e3bcde6c99@syzkaller.appspotmail.com Reported-by: Lee Jones Fixes: d25adbeb0cdb ("sctp: fix an use-after-free issue in sctp_sock_dump") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 6 +++--- include/net/sctp/structs.h | 3 ++- net/sctp/diag.c | 12 ++++++------ net/sctp/endpointola.c | 23 +++++++++++++++-------- net/sctp/socket.c | 23 +++++++++++++++-------- 5 files changed, 41 insertions(+), 26 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 189fdb9db162..d314a180ab93 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -105,6 +105,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, @@ -115,9 +116,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p); +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 899c29c326ba..8dabd8800006 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1355,6 +1355,7 @@ struct sctp_endpoint { reconf_enable:1; __u8 strreset_enable; + struct rcu_head rcu; }; /* Recover the outter endpoint structure. */ @@ -1370,7 +1371,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); void sctp_endpoint_free(struct sctp_endpoint *); void sctp_endpoint_put(struct sctp_endpoint *); -void sctp_endpoint_hold(struct sctp_endpoint *); +int sctp_endpoint_hold(struct sctp_endpoint *ep); void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 760b367644c1..a7d623171501 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -290,9 +290,8 @@ out: return err; } -static int sctp_sock_dump(struct sctp_transport *tsp, void *p) +static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; @@ -302,6 +301,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p) int err = 0; lock_sock(sk); + if (ep != tsp->asoc->ep) + goto release; list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -344,9 +345,8 @@ release: return err; } -static int sctp_sock_filter(struct sctp_transport *tsp, void *p) +static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *r = commp->r; @@ -505,8 +505,8 @@ skip: if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; - sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, - net, &pos, &commp); + sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump, + net, &pos, &commp); cb->args[2] = pos; done: diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 48c9c2c7602f..efffde7f2328 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) } /* Final destructor for endpoint. */ +static void sctp_endpoint_destroy_rcu(struct rcu_head *head) +{ + struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu); + struct sock *sk = ep->base.sk; + + sctp_sk(sk)->ep = NULL; + sock_put(sk); + + kfree(ep); + SCTP_DBG_OBJCNT_DEC(ep); +} + static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { struct sock *sk; @@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) if (sctp_sk(sk)->bind_hash) sctp_put_port(sk); - sctp_sk(sk)->ep = NULL; - /* Give up our hold on the sock */ - sock_put(sk); - - kfree(ep); - SCTP_DBG_OBJCNT_DEC(ep); + call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu); } /* Hold a reference to an endpoint. */ -void sctp_endpoint_hold(struct sctp_endpoint *ep) +int sctp_endpoint_hold(struct sctp_endpoint *ep) { - refcount_inc(&ep->base.refcnt); + return refcount_inc_not_zero(&ep->base.refcnt); } /* Release a reference to an endpoint and clean up if there are diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 33391254fa82..ad5028a07b18 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5338,11 +5338,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p) { +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p) +{ struct rhashtable_iter hti; struct sctp_transport *tsp; + struct sctp_endpoint *ep; int ret; again: @@ -5351,26 +5352,32 @@ again: tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { - ret = cb(tsp, p); - if (ret) - break; + ep = tsp->asoc->ep; + if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */ + ret = cb(ep, tsp, p); + if (ret) + break; + sctp_endpoint_put(ep); + } (*pos)++; sctp_transport_put(tsp); } sctp_transport_walk_stop(&hti); if (ret) { - if (cb_done && !cb_done(tsp, p)) { + if (cb_done && !cb_done(ep, tsp, p)) { (*pos)++; + sctp_endpoint_put(ep); sctp_transport_put(tsp); goto again; } + sctp_endpoint_put(ep); sctp_transport_put(tsp); } return ret; } -EXPORT_SYMBOL_GPL(sctp_for_each_transport); +EXPORT_SYMBOL_GPL(sctp_transport_traverse_process); /* 7.2.1 Association Status (SCTP_STATUS) From e6007b85dfa284c4726c249e3c2fc4181ca8e179 Mon Sep 17 00:00:00 2001 From: Ma Xinjian Date: Fri, 24 Dec 2021 17:59:28 +0800 Subject: [PATCH 210/251] selftests: mptcp: Remove the deprecated config NFT_COUNTER NFT_COUNTER was removed since 390ad4295aa ("netfilter: nf_tables: make counter support built-in") LKP/0Day will check if all configs listing under selftests are able to be enabled properly. For the missing configs, it will report something like: LKP WARN miss config CONFIG_NFT_COUNTER= of net/mptcp/config - it's not reasonable to keep the deprecated configs. - configs under kselftests are recommended by corresponding tests. So if some configs are missing, it will impact the testing results Reported-by: kernel test robot Signed-off-by: Ma Xinjian Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/config | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 0faaccd21447..2b82628decb1 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -9,7 +9,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_NETLINK=m CONFIG_NF_TABLES=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_MATCH_BPF=m From 0129ab1f268b6cf88825eae819b9b84aa0a85634 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 24 Dec 2021 21:12:32 -0800 Subject: [PATCH 211/251] kfence: fix memory leak when cat kfence objects Hulk robot reported a kmemleak problem: unreferenced object 0xffff93d1d8cc02e8 (size 248): comm "cat", pid 23327, jiffies 4624670141 (age 495992.217s) hex dump (first 32 bytes): 00 40 85 19 d4 93 ff ff 00 10 00 00 00 00 00 00 .@.............. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: seq_open+0x2a/0x80 full_proxy_open+0x167/0x1e0 do_dentry_open+0x1e1/0x3a0 path_openat+0x961/0xa20 do_filp_open+0xae/0x120 do_sys_openat2+0x216/0x2f0 do_sys_open+0x57/0x80 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 unreferenced object 0xffff93d419854000 (size 4096): comm "cat", pid 23327, jiffies 4624670141 (age 495992.217s) hex dump (first 32 bytes): 6b 66 65 6e 63 65 2d 23 32 35 30 3a 20 30 78 30 kfence-#250: 0x0 30 30 30 30 30 30 30 37 35 34 62 64 61 31 32 2d 0000000754bda12- backtrace: seq_read_iter+0x313/0x440 seq_read+0x14b/0x1a0 full_proxy_read+0x56/0x80 vfs_read+0xa5/0x1b0 ksys_read+0xa0/0xf0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 I find that we can easily reproduce this problem with the following commands: cat /sys/kernel/debug/kfence/objects echo scan > /sys/kernel/debug/kmemleak cat /sys/kernel/debug/kmemleak The leaked memory is allocated in the stack below: do_syscall_64 do_sys_open do_dentry_open full_proxy_open seq_open ---> alloc seq_file vfs_read full_proxy_read seq_read seq_read_iter traverse ---> alloc seq_buf And it should have been released in the following process: do_syscall_64 syscall_exit_to_user_mode exit_to_user_mode_prepare task_work_run ____fput __fput full_proxy_release ---> free here However, the release function corresponding to file_operations is not implemented in kfence. As a result, a memory leak occurs. Therefore, the solution to this problem is to implement the corresponding release function. Link: https://lkml.kernel.org/r/20211206133628.2822545-1-libaokun1@huawei.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Baokun Li Reported-by: Hulk Robot Acked-by: Marco Elver Reviewed-by: Kefeng Wang Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Yu Kuai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kfence/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 09945784df9e..a19154a8d196 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -683,6 +683,7 @@ static const struct file_operations objects_fops = { .open = open_objects, .read = seq_read, .llseek = seq_lseek, + .release = seq_release, }; static int __init kfence_debugfs_init(void) From 338635340669d5b317c7e8dcf4fff4a0f3651d87 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 24 Dec 2021 21:12:35 -0800 Subject: [PATCH 212/251] mm: mempolicy: fix THP allocations escaping mempolicy restrictions alloc_pages_vma() may try to allocate THP page on the local NUMA node first: page = __alloc_pages_node(hpage_node, gfp | __GFP_THISNODE | __GFP_NORETRY, order); And if the allocation fails it retries allowing remote memory: if (!page && (gfp & __GFP_DIRECT_RECLAIM)) page = __alloc_pages_node(hpage_node, gfp, order); However, this retry allocation completely ignores memory policy nodemask allowing allocation to escape restrictions. The first appearance of this bug seems to be the commit ac5b2c18911f ("mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings"). The bug disappeared later in the commit 89c83fb539f9 ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask") and reappeared again in slightly different form in the commit 76e654cc91bb ("mm, page_alloc: allow hugepage fallback to remote nodes when madvised") Fix this by passing correct nodemask to the __alloc_pages() call. The demonstration/reproducer of the problem: $ mount -oremount,size=4G,huge=always /dev/shm/ $ echo always > /sys/kernel/mm/transparent_hugepage/defrag $ cat mbind_thp.c #include #include #include #include #include #include #include #include #define SIZE 2ULL << 30 int main(int argc, char **argv) { int fd; unsigned long long i; char *addr; pid_t pid; char buf[100]; unsigned long nodemask = 1; fd = open("/dev/shm/test", O_RDWR|O_CREAT); assert(fd > 0); assert(ftruncate(fd, SIZE) == 0); addr = mmap(NULL, SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); assert(mbind(addr, SIZE, MPOL_BIND, &nodemask, 2, MPOL_MF_STRICT|MPOL_MF_MOVE)==0); for (i = 0; i < SIZE; i+=4096) { addr[i] = 1; } pid = getpid(); snprintf(buf, sizeof(buf), "grep shm /proc/%d/numa_maps", pid); system(buf); sleep(10000); return 0; } $ gcc mbind_thp.c -o mbind_thp -lnuma $ numactl -H available: 2 nodes (0-1) node 0 cpus: 0 2 node 0 size: 1918 MB node 0 free: 1595 MB node 1 cpus: 1 3 node 1 size: 2014 MB node 1 free: 1731 MB node distances: node 0 1 0: 10 20 1: 20 10 $ rm -f /dev/shm/test; taskset -c 0 ./mbind_thp 7fd970a00000 bind:0 file=/dev/shm/test dirty=524288 active=0 N0=396800 N1=127488 kernelpagesize_kB=4 Link: https://lkml.kernel.org/r/20211208165343.22349-1-arbn@yandex-team.com Fixes: ac5b2c18911f ("mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings") Signed-off-by: Andrey Ryabinin Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: David Rientjes Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 10e9c87260ed..f6248affaf38 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2140,8 +2140,7 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, * memory with both reclaim and compact as well. */ if (!page && (gfp & __GFP_DIRECT_RECLAIM)) - page = __alloc_pages_node(hpage_node, - gfp, order); + page = __alloc_pages(gfp, order, hpage_node, nmask); goto out; } From 71d2bcec2d4d69ff109c497e6611d6c53c8926d4 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Fri, 24 Dec 2021 21:12:39 -0800 Subject: [PATCH 213/251] kernel/crash_core: suppress unknown crashkernel parameter warning When booting with crashkernel= on the kernel command line a warning similar to Kernel command line: ro console=ttyS0 crashkernel=256M Unknown kernel command line parameters "crashkernel=256M", will be passed to user space. is printed. This comes from crashkernel= being parsed independent from the kernel parameter handling mechanism. So the code in init/main.c doesn't know that crashkernel= is a valid kernel parameter and prints this incorrect warning. Suppress the warning by adding a dummy early_param handler for crashkernel=. Link: https://lkml.kernel.org/r/20211208133443.6867-1-prudo@redhat.com Fixes: 86d1919a4fb0 ("init: print out unknown kernel parameters") Signed-off-by: Philipp Rudo Acked-by: Baoquan He Cc: Andrew Halaney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/crash_core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index eb53f5ec62c9..256cf6db573c 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -295,6 +296,16 @@ int __init parse_crashkernel_low(char *cmdline, "crashkernel=", suffix_tbl[SUFFIX_LOW]); } +/* + * Add a dummy early_param handler to mark crashkernel= as a known command line + * parameter and suppress incorrect warnings in init/main.c. + */ +static int __init parse_crashkernel_dummy(char *arg) +{ + return 0; +} +early_param("crashkernel", parse_crashkernel_dummy); + Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len) { From 7e5b901e4609441fc6bb94701c4743b39b6c277e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 24 Dec 2021 21:12:42 -0800 Subject: [PATCH 214/251] MAINTAINERS: mark more list instances as moderated Some lists that are moderated are not marked as moderated consistently, so mark them all as moderated. Link: https://lkml.kernel.org/r/20211209001330.18558-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Miquel Raynal Cc: Conor Culhane Cc: Ryder Lee Cc: Jianjun Wang Cc: Alexandre Belloni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8912b2c1260c..fb18ce7168aa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14845,7 +14845,7 @@ PCIE DRIVER FOR MEDIATEK M: Ryder Lee M: Jianjun Wang L: linux-pci@vger.kernel.org -L: linux-mediatek@lists.infradead.org +L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) S: Supported F: Documentation/devicetree/bindings/pci/mediatek* F: drivers/pci/controller/*mediatek* @@ -17423,7 +17423,7 @@ F: drivers/video/fbdev/sm712* SILVACO I3C DUAL-ROLE MASTER M: Miquel Raynal M: Conor Culhane -L: linux-i3c@lists.infradead.org +L: linux-i3c@lists.infradead.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml F: drivers/i3c/master/svc-i3c-master.c From e37e7b0b3bd52ec4f8ab71b027bcec08f57f1b3b Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 24 Dec 2021 21:12:45 -0800 Subject: [PATCH 215/251] mm, hwpoison: fix condition in free hugetlb page path When a memory error hits a tail page of a free hugepage, __page_handle_poison() is expected to be called to isolate the error in 4kB unit, but it's not called due to the outdated if-condition in memory_failure_hugetlb(). This loses the chance to isolate the error in the finer unit, so it's not optimal. Drop the condition. This "(p != head && TestSetPageHWPoison(head)" condition is based on the old semantics of PageHWPoison on hugepage (where PG_hwpoison flag was set on the subpage), so it's not necessray any more. By getting to set PG_hwpoison on head page for hugepages, concurrent error events on different subpages in a single hugepage can be prevented by TestSetPageHWPoison(head) at the beginning of memory_failure_hugetlb(). So dropping the condition should not reopen the race window originally mentioned in commit b985194c8c0a ("hwpoison, hugetlb: lock_page/unlock_page does not match for handling a free hugepage") [naoya.horiguchi@linux.dev: fix "HardwareCorrupted" counter] Link: https://lkml.kernel.org/r/20211220084851.GA1460264@u2004 Link: https://lkml.kernel.org/r/20211210110208.879740-1-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Reported-by: Fei Luo Reviewed-by: Mike Kravetz Cc: [5.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 07c875fdeaf0..682828b94ab6 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1470,17 +1470,12 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) if (!(flags & MF_COUNT_INCREASED)) { res = get_hwpoison_page(p, flags); if (!res) { - /* - * Check "filter hit" and "race with other subpage." - */ lock_page(head); - if (PageHWPoison(head)) { - if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) - || (p != head && TestSetPageHWPoison(head))) { + if (hwpoison_filter(p)) { + if (TestClearPageHWPoison(head)) num_poisoned_pages_dec(); - unlock_page(head); - return 0; - } + unlock_page(head); + return 0; } unlock_page(head); res = MF_FAILED; From 94ab10dd42a70acc5208a41325617e3d9cf81a70 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 24 Dec 2021 21:12:48 -0800 Subject: [PATCH 216/251] mm: delete unsafe BUG from page_cache_add_speculative() It is not easily reproducible, but on 5.16-rc I have several times hit the VM_BUG_ON_PAGE(PageTail(page), page) in page_cache_add_speculative(): usually from filemap_get_read_batch() for an ext4 read, yesterday from next_uptodate_page() from filemap_map_pages() for a shmem fault. That BUG used to be placed where page_ref_add_unless() had succeeded, but now it is placed before folio_ref_add_unless() is attempted: that is not safe, since it is only the acquired reference which makes the page safe from racing THP collapse or split. We could keep the BUG, checking PageTail only when folio_ref_try_add_rcu() has succeeded; but I don't think it adds much value - just delete it. Link: https://lkml.kernel.org/r/8b98fc6f-3439-8614-c3f3-945c659a1aba@google.com Fixes: 020853b6f5ea ("mm: Add folio_try_get_rcu()") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Reviewed-by: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: William Kucharski Cc: Christoph Hellwig Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 605246452305..d150a9082b31 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -285,7 +285,6 @@ static inline struct inode *folio_inode(struct folio *folio) static inline bool page_cache_add_speculative(struct page *page, int count) { - VM_BUG_ON_PAGE(PageTail(page), page); return folio_ref_try_add_rcu((struct folio *)page, count); } From 595ec1973c276f6c0c1de8aca5eef8dfd81f9b49 Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Fri, 24 Dec 2021 21:12:51 -0800 Subject: [PATCH 217/251] mm/page_alloc: fix __alloc_size attribute for alloc_pages_exact_nid The second parameter of alloc_pages_exact_nid is the one indicating the size of memory pointed by the returned pointer. Link: https://lkml.kernel.org/r/YbjEgwhn4bGblp//@coeus Fixes: abd58f38dfb4 ("mm/page_alloc: add __alloc_size attributes for better bounds checking") Signed-off-by: Thibaut Sautereau Acked-by: Kees Cook Cc: Daniel Micay Cc: Levente Polyak Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b976c4177299..8fcc38467af6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -624,7 +624,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask); void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1); void free_pages_exact(void *virt, size_t size); -__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(1); +__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) From 34796417964b8d0aef45a99cf6c2d20cebe33733 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 24 Dec 2021 21:12:54 -0800 Subject: [PATCH 218/251] mm/damon/dbgfs: protect targets destructions with kdamond_lock DAMON debugfs interface iterates current monitoring targets in 'dbgfs_target_ids_read()' while holding the corresponding 'kdamond_lock'. However, it also destructs the monitoring targets in 'dbgfs_before_terminate()' without holding the lock. This can result in a use_after_free bug. This commit avoids the race by protecting the destruction with the corresponding 'kdamond_lock'. Link: https://lkml.kernel.org/r/20211221094447.2241-1-sj@kernel.org Reported-by: Sangwoo Bae Fixes: 4bc05954d007 ("mm/damon: implement a debugfs-based user space interface") Signed-off-by: SeongJae Park Cc: [5.15.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/dbgfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 1efac0022e9a..4fbd729edc9e 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -650,10 +650,12 @@ static void dbgfs_before_terminate(struct damon_ctx *ctx) if (!targetid_is_pid(ctx)) return; + mutex_lock(&ctx->kdamond_lock); damon_for_each_target_safe(t, next, ctx) { put_pid((struct pid *)t->id); damon_destroy_target(t); } + mutex_unlock(&ctx->kdamond_lock); } static struct damon_ctx *dbgfs_new_ctx(void) From 2a57d83c78f889bf3f54eede908d0643c40d5418 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 24 Dec 2021 21:12:58 -0800 Subject: [PATCH 219/251] mm/hwpoison: clear MF_COUNT_INCREASED before retrying get_any_page() Hulk Robot reported a panic in put_page_testzero() when testing madvise() with MADV_SOFT_OFFLINE. The BUG() is triggered when retrying get_any_page(). This is because we keep MF_COUNT_INCREASED flag in second try but the refcnt is not increased. page dumped because: VM_BUG_ON_PAGE(page_ref_count(page) == 0) ------------[ cut here ]------------ kernel BUG at include/linux/mm.h:737! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 5 PID: 2135 Comm: sshd Tainted: G B 5.16.0-rc6-dirty #373 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: release_pages+0x53f/0x840 Call Trace: free_pages_and_swap_cache+0x64/0x80 tlb_flush_mmu+0x6f/0x220 unmap_page_range+0xe6c/0x12c0 unmap_single_vma+0x90/0x170 unmap_vmas+0xc4/0x180 exit_mmap+0xde/0x3a0 mmput+0xa3/0x250 do_exit+0x564/0x1470 do_group_exit+0x3b/0x100 __do_sys_exit_group+0x13/0x20 __x64_sys_exit_group+0x16/0x20 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Modules linked in: ---[ end trace e99579b570fe0649 ]--- RIP: 0010:release_pages+0x53f/0x840 Link: https://lkml.kernel.org/r/20211221074908.3910286-1-liushixin2@huawei.com Fixes: b94e02822deb ("mm,hwpoison: try to narrow window race for free pages") Signed-off-by: Liu Shixin Reported-by: Hulk Robot Reviewed-by: Oscar Salvador Acked-by: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 682828b94ab6..3a274468f193 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2234,6 +2234,7 @@ retry: } else if (ret == 0) { if (soft_offline_free_page(page) && try_again) { try_again = false; + flags &= ~MF_COUNT_INCREASED; goto retry; } } From fc74e0a40e4f9fd0468e34045b0c45bba11dcbb2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Dec 2021 13:17:17 -0800 Subject: [PATCH 220/251] Linux 5.16-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d85f1ff79f5c..17b4319ad2ff 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Gobble Gobble # *DOCUMENTATION* From c1833c3964d5bd8c163bd4e01736a38bc473cb8a Mon Sep 17 00:00:00 2001 From: William Zhao Date: Thu, 23 Dec 2021 12:33:16 -0500 Subject: [PATCH 221/251] ip6_vti: initialize __ip6_tnl_parm struct in vti6_siocdevprivate The "__ip6_tnl_parm" struct was left uninitialized causing an invalid load of random data when the "__ip6_tnl_parm" struct was used elsewhere. As an example, in the function "ip6_tnl_xmit_ctl()", it tries to access the "collect_md" member. With "__ip6_tnl_parm" being uninitialized and containing random data, the UBSAN detected that "collect_md" held a non-boolean value. The UBSAN issue is as follows: =============================================================== UBSAN: invalid-load in net/ipv6/ip6_tunnel.c:1025:14 load of value 30 is not a valid value for type '_Bool' CPU: 1 PID: 228 Comm: kworker/1:3 Not tainted 5.16.0-rc4+ #8 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work Call Trace: dump_stack_lvl+0x44/0x57 ubsan_epilogue+0x5/0x40 __ubsan_handle_load_invalid_value+0x66/0x70 ? __cpuhp_setup_state+0x1d3/0x210 ip6_tnl_xmit_ctl.cold.52+0x2c/0x6f [ip6_tunnel] vti6_tnl_xmit+0x79c/0x1e96 [ip6_vti] ? lock_is_held_type+0xd9/0x130 ? vti6_rcv+0x100/0x100 [ip6_vti] ? lock_is_held_type+0xd9/0x130 ? rcu_read_lock_bh_held+0xc0/0xc0 ? lock_acquired+0x262/0xb10 dev_hard_start_xmit+0x1e6/0x820 __dev_queue_xmit+0x2079/0x3340 ? mark_lock.part.52+0xf7/0x1050 ? netdev_core_pick_tx+0x290/0x290 ? kvm_clock_read+0x14/0x30 ? kvm_sched_clock_read+0x5/0x10 ? sched_clock_cpu+0x15/0x200 ? find_held_lock+0x3a/0x1c0 ? lock_release+0x42f/0xc90 ? lock_downgrade+0x6b0/0x6b0 ? mark_held_locks+0xb7/0x120 ? neigh_connected_output+0x31f/0x470 ? lockdep_hardirqs_on+0x79/0x100 ? neigh_connected_output+0x31f/0x470 ? ip6_finish_output2+0x9b0/0x1d90 ? rcu_read_lock_bh_held+0x62/0xc0 ? ip6_finish_output2+0x9b0/0x1d90 ip6_finish_output2+0x9b0/0x1d90 ? ip6_append_data+0x330/0x330 ? ip6_mtu+0x166/0x370 ? __ip6_finish_output+0x1ad/0xfb0 ? nf_hook_slow+0xa6/0x170 ip6_output+0x1fb/0x710 ? nf_hook.constprop.32+0x317/0x430 ? ip6_finish_output+0x180/0x180 ? __ip6_finish_output+0xfb0/0xfb0 ? lock_is_held_type+0xd9/0x130 ndisc_send_skb+0xb33/0x1590 ? __sk_mem_raise_allocated+0x11cf/0x1560 ? dst_output+0x4a0/0x4a0 ? ndisc_send_rs+0x432/0x610 addrconf_dad_completed+0x30c/0xbb0 ? addrconf_rs_timer+0x650/0x650 ? addrconf_dad_work+0x73c/0x10e0 addrconf_dad_work+0x73c/0x10e0 ? addrconf_dad_completed+0xbb0/0xbb0 ? rcu_read_lock_sched_held+0xaf/0xe0 ? rcu_read_lock_bh_held+0xc0/0xc0 process_one_work+0x97b/0x1740 ? pwq_dec_nr_in_flight+0x270/0x270 worker_thread+0x87/0xbf0 ? process_one_work+0x1740/0x1740 kthread+0x3ac/0x490 ? set_kthread_struct+0x100/0x100 ret_from_fork+0x22/0x30 =============================================================== The solution is to initialize "__ip6_tnl_parm" struct to zeros in the "vti6_siocdevprivate()" function. Signed-off-by: William Zhao Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 527e9ead7449..5e9474bc54fc 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -808,6 +808,8 @@ vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); + memset(&p1, 0, sizeof(p1)); + switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { From 6d7373dabfd3933ee30c40fc8c09d2a788f6ece1 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 27 Dec 2021 14:35:30 +0100 Subject: [PATCH 222/251] net/smc: fix using of uninitialized completions In smc_wr_tx_send_wait() the completion on index specified by pend->idx is initialized and after smc_wr_tx_send() was called the wait for completion starts. pend->idx is used to get the correct index for the wait, but the pend structure could already be cleared in smc_wr_tx_process_cqe(). Introduce pnd_idx to hold and use a local copy of the correct index. Fixes: 09c61d24f96d ("net/smc: wait for departure of an IB message") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_wr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 600ab5889227..79a7431f534e 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -358,18 +358,20 @@ int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, unsigned long timeout) { struct smc_wr_tx_pend *pend; + u32 pnd_idx; int rc; pend = container_of(priv, struct smc_wr_tx_pend, priv); pend->compl_requested = 1; - init_completion(&link->wr_tx_compl[pend->idx]); + pnd_idx = pend->idx; + init_completion(&link->wr_tx_compl[pnd_idx]); rc = smc_wr_tx_send(link, priv); if (rc) return rc; /* wait for completion by smc_wr_tx_process_cqe() */ rc = wait_for_completion_interruptible_timeout( - &link->wr_tx_compl[pend->idx], timeout); + &link->wr_tx_compl[pnd_idx], timeout); if (rc <= 0) rc = -ENODATA; if (rc > 0) From 6c25449e1a32c594d743df8e8258e8ef870b6a77 Mon Sep 17 00:00:00 2001 From: yangxingwu Date: Mon, 27 Dec 2021 16:29:51 +0800 Subject: [PATCH 223/251] net: udp: fix alignment problem in udp4_seq_show() $ cat /pro/net/udp before: sl local_address rem_address st tx_queue rx_queue tr tm->when 26050: 0100007F:0035 00000000:0000 07 00000000:00000000 00:00000000 26320: 0100007F:0143 00000000:0000 07 00000000:00000000 00:00000000 27135: 00000000:8472 00000000:0000 07 00000000:00000000 00:00000000 after: sl local_address rem_address st tx_queue rx_queue tr tm->when 26050: 0100007F:0035 00000000:0000 07 00000000:00000000 00:00000000 26320: 0100007F:0143 00000000:0000 07 00000000:00000000 00:00000000 27135: 00000000:8472 00000000:0000 07 00000000:00000000 00:00000000 Signed-off-by: yangxingwu Signed-off-by: David S. Miller --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 15c6b450b8db..0cd6b857e7ec 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -3075,7 +3075,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) { seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) - seq_puts(seq, " sl local_address rem_address st tx_queue " + seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops"); else { From 5f50153288452e10b6edd69ec9112c49442b054a Mon Sep 17 00:00:00 2001 From: Zekun Shen Date: Sun, 26 Dec 2021 21:32:45 -0500 Subject: [PATCH 224/251] atlantic: Fix buff_ring OOB in aq_ring_rx_clean The function obtain the next buffer without boundary check. We should return with I/O error code. The bug is found by fuzzing and the crash report is attached. It is an OOB bug although reported as use-after-free. [ 4.804724] BUG: KASAN: use-after-free in aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.805661] Read of size 4 at addr ffff888034fe93a8 by task ksoftirqd/0/9 [ 4.806505] [ 4.806703] CPU: 0 PID: 9 Comm: ksoftirqd/0 Tainted: G W 5.6.0 #34 [ 4.809030] Call Trace: [ 4.809343] dump_stack+0x76/0xa0 [ 4.809755] print_address_description.constprop.0+0x16/0x200 [ 4.810455] ? aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.811234] ? aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.813183] __kasan_report.cold+0x37/0x7c [ 4.813715] ? aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.814393] kasan_report+0xe/0x20 [ 4.814837] aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.815499] ? hw_atl_b0_hw_ring_rx_receive+0x9a5/0xb90 [atlantic] [ 4.816290] aq_vec_poll+0x179/0x5d0 [atlantic] [ 4.816870] ? _GLOBAL__sub_I_65535_1_aq_pci_func_init+0x20/0x20 [atlantic] [ 4.817746] ? __next_timer_interrupt+0xba/0xf0 [ 4.818322] net_rx_action+0x363/0xbd0 [ 4.818803] ? call_timer_fn+0x240/0x240 [ 4.819302] ? __switch_to_asm+0x40/0x70 [ 4.819809] ? napi_busy_loop+0x520/0x520 [ 4.820324] __do_softirq+0x18c/0x634 [ 4.820797] ? takeover_tasklets+0x5f0/0x5f0 [ 4.821343] run_ksoftirqd+0x15/0x20 [ 4.821804] smpboot_thread_fn+0x2f1/0x6b0 [ 4.822331] ? smpboot_unregister_percpu_thread+0x160/0x160 [ 4.823041] ? __kthread_parkme+0x80/0x100 [ 4.823571] ? smpboot_unregister_percpu_thread+0x160/0x160 [ 4.824301] kthread+0x2b5/0x3b0 [ 4.824723] ? kthread_create_on_node+0xd0/0xd0 [ 4.825304] ret_from_fork+0x35/0x40 Signed-off-by: Zekun Shen Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 81b3756417ec..77e76c9efd32 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -366,6 +366,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (!buff->is_eop) { buff_ = buff; do { + if (buff_->next >= self->size) { + err = -EIO; + goto err_exit; + } next_ = buff_->next, buff_ = &self->buff_ring[next_]; is_rsc_completed = @@ -389,6 +393,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, (buff->is_lro && buff->is_cso_err)) { buff_ = buff; do { + if (buff_->next >= self->size) { + err = -EIO; + goto err_exit; + } next_ = buff_->next, buff_ = &self->buff_ring[next_]; From ca506fca461b260ab32952b610c3d4aadc6c11fd Mon Sep 17 00:00:00 2001 From: Matthias-Christian Ott Date: Sun, 26 Dec 2021 23:12:08 +0100 Subject: [PATCH 225/251] net: usb: pegasus: Do not drop long Ethernet frames The D-Link DSB-650TX (2001:4002) is unable to receive Ethernet frames that are longer than 1518 octets, for example, Ethernet frames that contain 802.1Q VLAN tags. The frames are sent to the pegasus driver via USB but the driver discards them because they have the Long_pkt field set to 1 in the received status report. The function read_bulk_callback of the pegasus driver treats such received "packets" (in the terminology of the hardware) as errors but the field simply does just indicate that the Ethernet frame (MAC destination to FCS) is longer than 1518 octets. It seems that in the 1990s there was a distinction between "giant" (> 1518) and "runt" (< 64) frames and the hardware includes flags to indicate this distinction. It seems that the purpose of the distinction "giant" frames was to not allow infinitely long frames due to transmission errors and to allow hardware to have an upper limit of the frame size. However, the hardware already has such limit with its 2048 octet receive buffer and, therefore, Long_pkt is merely a convention and should not be treated as a receive error. Actually, the hardware is even able to receive Ethernet frames with 2048 octets which exceeds the claimed limit frame size limit of the driver of 1536 octets (PEGASUS_MTU). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Matthias-Christian Ott Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index c4cd40b090fd..feb247e355f7 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -493,11 +493,11 @@ static void read_bulk_callback(struct urb *urb) goto goon; rx_status = buf[count - 2]; - if (rx_status & 0x1e) { + if (rx_status & 0x1c) { netif_dbg(pegasus, rx_err, net, "RX packet error %x\n", rx_status); net->stats.rx_errors++; - if (rx_status & 0x06) /* long or runt */ + if (rx_status & 0x04) /* runt */ net->stats.rx_length_errors++; if (rx_status & 0x08) net->stats.rx_crc_errors++; From 7175f02c4e5f5a9430113ab9ca0fd0ce98b28a51 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sun, 26 Dec 2021 16:01:27 +0300 Subject: [PATCH 226/251] uapi: fix linux/nfc.h userspace compilation errors Replace sa_family_t with __kernel_sa_family_t to fix the following linux/nfc.h userspace compilation errors: /usr/include/linux/nfc.h:266:2: error: unknown type name 'sa_family_t' sa_family_t sa_family; /usr/include/linux/nfc.h:274:2: error: unknown type name 'sa_family_t' sa_family_t sa_family; Fixes: 23b7869c0fd0 ("NFC: add the NFC socket raw protocol") Fixes: d646960f7986 ("NFC: Initial LLCP support") Cc: Signed-off-by: Dmitry V. Levin Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/uapi/linux/nfc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index f6e3c8c9c744..aadad43d943a 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -263,7 +263,7 @@ enum nfc_sdp_attr { #define NFC_SE_ENABLED 0x1 struct sockaddr_nfc { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; @@ -271,7 +271,7 @@ struct sockaddr_nfc { #define NFC_LLCP_MAX_SERVICE_NAME 63 struct sockaddr_nfc_llcp { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; From 79b69a83705e621b258ac6d8ae6d3bfdb4b930aa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 26 Dec 2021 13:03:47 +0100 Subject: [PATCH 227/251] nfc: uapi: use kernel size_t to fix user-space builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix user-space builds if it includes /usr/include/linux/nfc.h before some of other headers: /usr/include/linux/nfc.h:281:9: error: unknown type name ‘size_t’ 281 | size_t service_name_len; | ^~~~~~ Fixes: d646960f7986 ("NFC: Initial LLCP support") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/uapi/linux/nfc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index aadad43d943a..4fa4e979e948 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -278,7 +278,7 @@ struct sockaddr_nfc_llcp { __u8 dsap; /* Destination SAP, if known */ __u8 ssap; /* Source SAP to be bound to */ char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */; - size_t service_name_len; + __kernel_size_t service_name_len; }; /* NFC socket protocols */ From 732bc2ff080c447f8524f40c970c481f5da6eed3 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 24 Dec 2021 07:07:39 -0800 Subject: [PATCH 228/251] selinux: initialize proto variable in selinux_ip_postroute_compat() Clang static analysis reports this warning hooks.c:5765:6: warning: 4th function call argument is an uninitialized value if (selinux_xfrm_postroute_last(sksec->sid, skb, &ad, proto)) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ selinux_parse_skb() can return ok without setting proto. The later call to selinux_xfrm_postroute_last() does an early check of proto and can return ok if the garbage proto value matches. So initialize proto. Cc: stable@vger.kernel.org Fixes: eef9b41622f2 ("selinux: cleanup selinux_xfrm_sock_rcv_skb() and selinux_xfrm_postroute_last()") Signed-off-by: Tom Rix [PM: typo/spelling and checkpatch.pl description fixes] Signed-off-by: Paul Moore --- security/selinux/hooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 1afc06ffd969..dde4ecc0cd18 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5785,7 +5785,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, struct sk_security_struct *sksec; struct common_audit_data ad; struct lsm_network_audit net = {0,}; - u8 proto; + u8 proto = 0; sk = skb_to_full_sk(skb); if (sk == NULL) From 8b5fdfc57cc2471179d1c51081424ded833c16c8 Mon Sep 17 00:00:00 2001 From: wolfgang huang Date: Tue, 28 Dec 2021 16:01:20 +0800 Subject: [PATCH 229/251] mISDN: change function names to avoid conflicts As we build for mips, we meet following error. l1_init error with multiple definition. Some architecture devices usually marked with l1, l2, lxx as the start-up phase. so we change the mISDN function names, align with Isdnl2_xxx. mips-linux-gnu-ld: drivers/isdn/mISDN/layer1.o: in function `l1_init': (.text+0x890): multiple definition of `l1_init'; \ arch/mips/kernel/bmips_5xxx_init.o:(.text+0xf0): first defined here make[1]: *** [home/mips/kernel-build/linux/Makefile:1161: vmlinux] Error 1 Signed-off-by: wolfgang huang Reported-by: k2ci Signed-off-by: David S. Miller --- drivers/isdn/mISDN/core.c | 6 +++--- drivers/isdn/mISDN/core.h | 4 ++-- drivers/isdn/mISDN/layer1.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index 55891e420446..a41b4b264594 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -381,7 +381,7 @@ mISDNInit(void) err = mISDN_inittimer(&debug); if (err) goto error2; - err = l1_init(&debug); + err = Isdnl1_Init(&debug); if (err) goto error3; err = Isdnl2_Init(&debug); @@ -395,7 +395,7 @@ mISDNInit(void) error5: Isdnl2_cleanup(); error4: - l1_cleanup(); + Isdnl1_cleanup(); error3: mISDN_timer_cleanup(); error2: @@ -408,7 +408,7 @@ static void mISDN_cleanup(void) { misdn_sock_cleanup(); Isdnl2_cleanup(); - l1_cleanup(); + Isdnl1_cleanup(); mISDN_timer_cleanup(); class_unregister(&mISDN_class); diff --git a/drivers/isdn/mISDN/core.h b/drivers/isdn/mISDN/core.h index 23b44d303327..42599f49c189 100644 --- a/drivers/isdn/mISDN/core.h +++ b/drivers/isdn/mISDN/core.h @@ -60,8 +60,8 @@ struct Bprotocol *get_Bprotocol4id(u_int); extern int mISDN_inittimer(u_int *); extern void mISDN_timer_cleanup(void); -extern int l1_init(u_int *); -extern void l1_cleanup(void); +extern int Isdnl1_Init(u_int *); +extern void Isdnl1_cleanup(void); extern int Isdnl2_Init(u_int *); extern void Isdnl2_cleanup(void); diff --git a/drivers/isdn/mISDN/layer1.c b/drivers/isdn/mISDN/layer1.c index 98a3bc6c1700..7b31c25a550e 100644 --- a/drivers/isdn/mISDN/layer1.c +++ b/drivers/isdn/mISDN/layer1.c @@ -398,7 +398,7 @@ create_l1(struct dchannel *dch, dchannel_l1callback *dcb) { EXPORT_SYMBOL(create_l1); int -l1_init(u_int *deb) +Isdnl1_Init(u_int *deb) { debug = deb; l1fsm_s.state_count = L1S_STATE_COUNT; @@ -409,7 +409,7 @@ l1_init(u_int *deb) } void -l1_cleanup(void) +Isdnl1_cleanup(void) { mISDN_FsmFree(&l1fsm_s); } From 1cd5384c88af5b59bf9f3b6c1a151bc14b88c2cd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Dec 2021 18:51:44 +0100 Subject: [PATCH 230/251] net: ag71xx: Fix a potential double free in error handling paths 'ndev' is a managed resource allocated with devm_alloc_etherdev(), so there is no need to call free_netdev() explicitly or there will be a double free(). Simplify all error handling paths accordingly. Fixes: d51b6ce441d3 ("net: ethernet: add ag71xx driver") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/ag71xx.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 88d2ab748399..4579ddf9c427 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1913,15 +1913,12 @@ static int ag71xx_probe(struct platform_device *pdev) ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac"); if (IS_ERR(ag->mac_reset)) { netif_err(ag, probe, ndev, "missing mac reset\n"); - err = PTR_ERR(ag->mac_reset); - goto err_free; + return PTR_ERR(ag->mac_reset); } ag->mac_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); - if (!ag->mac_base) { - err = -ENOMEM; - goto err_free; - } + if (!ag->mac_base) + return -ENOMEM; ndev->irq = platform_get_irq(pdev, 0); err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt, @@ -1929,7 +1926,7 @@ static int ag71xx_probe(struct platform_device *pdev) if (err) { netif_err(ag, probe, ndev, "unable to request IRQ %d\n", ndev->irq); - goto err_free; + return err; } ndev->netdev_ops = &ag71xx_netdev_ops; @@ -1957,10 +1954,8 @@ static int ag71xx_probe(struct platform_device *pdev) ag->stop_desc = dmam_alloc_coherent(&pdev->dev, sizeof(struct ag71xx_desc), &ag->stop_desc_dma, GFP_KERNEL); - if (!ag->stop_desc) { - err = -ENOMEM; - goto err_free; - } + if (!ag->stop_desc) + return -ENOMEM; ag->stop_desc->data = 0; ag->stop_desc->ctrl = 0; @@ -1975,7 +1970,7 @@ static int ag71xx_probe(struct platform_device *pdev) err = of_get_phy_mode(np, &ag->phy_if_mode); if (err) { netif_err(ag, probe, ndev, "missing phy-mode property in DT\n"); - goto err_free; + return err; } netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT); @@ -1983,7 +1978,7 @@ static int ag71xx_probe(struct platform_device *pdev) err = clk_prepare_enable(ag->clk_eth); if (err) { netif_err(ag, probe, ndev, "Failed to enable eth clk.\n"); - goto err_free; + return err; } ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0); @@ -2019,8 +2014,6 @@ err_mdio_remove: ag71xx_mdio_remove(ag); err_put_clk: clk_disable_unprepare(ag->clk_eth); -err_free: - free_netdev(ndev); return err; } From 5be60a945329d82f06fc755a43eeefbfc5f77d72 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Mon, 27 Dec 2021 17:22:03 +0100 Subject: [PATCH 231/251] net: lantiq_xrx200: fix statistics of received bytes Received frames have FCS truncated. There is no need to subtract FCS length from the statistics. Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver") Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 96bd6f2b21ed..80bfaf2fec92 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -224,7 +224,7 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) skb->protocol = eth_type_trans(skb, net_dev); netif_receive_skb(skb); net_dev->stats.rx_packets++; - net_dev->stats.rx_bytes += len - ETH_FCS_LEN; + net_dev->stats.rx_bytes += len; return 0; } From 1b9dadba502234eea7244879b8d5d126bfaf9f0c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 28 Dec 2021 12:48:11 +0000 Subject: [PATCH 232/251] NFC: st21nfca: Fix memory leak in device probe and remove 'phy->pending_skb' is alloced when device probe, but forgot to free in the error handling path and remove path, this cause memory leak as follows: unreferenced object 0xffff88800bc06800 (size 512): comm "8", pid 11775, jiffies 4295159829 (age 9.032s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000d66c09ce>] __kmalloc_node_track_caller+0x1ed/0x450 [<00000000c93382b3>] kmalloc_reserve+0x37/0xd0 [<000000005fea522c>] __alloc_skb+0x124/0x380 [<0000000019f29f9a>] st21nfca_hci_i2c_probe+0x170/0x8f2 Fix it by freeing 'pending_skb' in error and remove. Fixes: 68957303f44a ("NFC: ST21NFCA: Add driver for STMicroelectronics ST21NFCA NFC Chip") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/nfc/st21nfca/i2c.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c index f126ce96a7df..35b32fb90906 100644 --- a/drivers/nfc/st21nfca/i2c.c +++ b/drivers/nfc/st21nfca/i2c.c @@ -524,7 +524,8 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(phy->gpiod_ena)) { nfc_err(dev, "Unable to get ENABLE GPIO\n"); - return PTR_ERR(phy->gpiod_ena); + r = PTR_ERR(phy->gpiod_ena); + goto out_free; } phy->se_status.is_ese_present = @@ -535,7 +536,7 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, r = st21nfca_hci_platform_init(phy); if (r < 0) { nfc_err(&client->dev, "Unable to reboot st21nfca\n"); - return r; + goto out_free; } r = devm_request_threaded_irq(&client->dev, client->irq, NULL, @@ -544,15 +545,23 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, ST21NFCA_HCI_DRIVER_NAME, phy); if (r < 0) { nfc_err(&client->dev, "Unable to register IRQ handler\n"); - return r; + goto out_free; } - return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, - ST21NFCA_FRAME_HEADROOM, - ST21NFCA_FRAME_TAILROOM, - ST21NFCA_HCI_LLC_MAX_PAYLOAD, - &phy->hdev, - &phy->se_status); + r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, + ST21NFCA_FRAME_HEADROOM, + ST21NFCA_FRAME_TAILROOM, + ST21NFCA_HCI_LLC_MAX_PAYLOAD, + &phy->hdev, + &phy->se_status); + if (r) + goto out_free; + + return 0; + +out_free: + kfree_skb(phy->pending_skb); + return r; } static int st21nfca_hci_i2c_remove(struct i2c_client *client) @@ -563,6 +572,8 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client) if (phy->powered) st21nfca_hci_i2c_disable(phy); + if (phy->pending_skb) + kfree_skb(phy->pending_skb); return 0; } From 90cee52f2e780345d3629e278291aea5ac74f40f Mon Sep 17 00:00:00 2001 From: Dust Li Date: Tue, 28 Dec 2021 17:03:24 +0800 Subject: [PATCH 233/251] net/smc: don't send CDC/LLC message if link not ready We found smc_llc_send_link_delete_all() sometimes wait for 2s timeout when testing with RDMA link up/down. It is possible when a smc_link is in ACTIVATING state, the underlaying QP is still in RESET or RTR state, which cannot send any messages out. smc_llc_send_link_delete_all() use smc_link_usable() to checks whether the link is usable, if the QP is still in RESET or RTR state, but the smc_link is in ACTIVATING, this LLC message will always fail without any CQE entering the CQ, and we will always wait 2s before timeout. Since we cannot send any messages through the QP before the QP enter RTS. I add a wrapper smc_link_sendable() which checks the state of QP along with the link state. And replace smc_link_usable() with smc_link_sendable() in all LLC & CDC message sending routine. Fixes: 5f08318f617b ("smc: connection data control (CDC)") Signed-off-by: Dust Li Signed-off-by: David S. Miller --- net/smc/smc_core.c | 2 +- net/smc/smc_core.h | 6 ++++++ net/smc/smc_llc.c | 2 +- net/smc/smc_wr.c | 4 ++-- net/smc/smc_wr.h | 2 +- 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 387d28b2f8dd..55ca175e8d57 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -647,7 +647,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; - if (smc_link_usable(lnk)) + if (smc_link_sendable(lnk)) lnk->state = SMC_LNK_INACTIVE; } wake_up_all(&lgr->llc_msg_waiter); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 59cef3b830d8..d63b08274197 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -415,6 +415,12 @@ static inline bool smc_link_usable(struct smc_link *lnk) return true; } +static inline bool smc_link_sendable(struct smc_link *lnk) +{ + return smc_link_usable(lnk) && + lnk->qp_attr.cur_qp_state == IB_QPS_RTS; +} + static inline bool smc_link_active(struct smc_link *lnk) { return lnk->state == SMC_LNK_ACTIVE; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index b102680296b8..3e9fd8a3124c 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1630,7 +1630,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) delllc.reason = htonl(rsn); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&lgr->lnk[i])) + if (!smc_link_sendable(&lgr->lnk[i])) continue; if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc)) break; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 79a7431f534e..df1dc225cbab 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -188,7 +188,7 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context) static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) { *idx = link->wr_tx_cnt; - if (!smc_link_usable(link)) + if (!smc_link_sendable(link)) return -ENOLINK; for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) { if (!test_and_set_bit(*idx, link->wr_tx_mask)) @@ -231,7 +231,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, } else { rc = wait_event_interruptible_timeout( link->wr_tx_wait, - !smc_link_usable(link) || + !smc_link_sendable(link) || lgr->terminating || (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index f353311e6f84..48ed9b08ac7a 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -62,7 +62,7 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val) static inline bool smc_wr_tx_link_hold(struct smc_link *link) { - if (!smc_link_usable(link)) + if (!smc_link_sendable(link)) return false; atomic_inc(&link->wr_tx_refcnt); return true; From 349d43127dac00c15231e8ffbcaabd70f7b0e544 Mon Sep 17 00:00:00 2001 From: Dust Li Date: Tue, 28 Dec 2021 17:03:25 +0800 Subject: [PATCH 234/251] net/smc: fix kernel panic caused by race of smc_sock A crash occurs when smc_cdc_tx_handler() tries to access smc_sock but smc_release() has already freed it. [ 4570.695099] BUG: unable to handle page fault for address: 000000002eae9e88 [ 4570.696048] #PF: supervisor write access in kernel mode [ 4570.696728] #PF: error_code(0x0002) - not-present page [ 4570.697401] PGD 0 P4D 0 [ 4570.697716] Oops: 0002 [#1] PREEMPT SMP NOPTI [ 4570.698228] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.16.0-rc4+ #111 [ 4570.699013] Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS 8c24b4c 04/0 [ 4570.699933] RIP: 0010:_raw_spin_lock+0x1a/0x30 <...> [ 4570.711446] Call Trace: [ 4570.711746] [ 4570.711992] smc_cdc_tx_handler+0x41/0xc0 [ 4570.712470] smc_wr_tx_tasklet_fn+0x213/0x560 [ 4570.712981] ? smc_cdc_tx_dismisser+0x10/0x10 [ 4570.713489] tasklet_action_common.isra.17+0x66/0x140 [ 4570.714083] __do_softirq+0x123/0x2f4 [ 4570.714521] irq_exit_rcu+0xc4/0xf0 [ 4570.714934] common_interrupt+0xba/0xe0 Though smc_cdc_tx_handler() checked the existence of smc connection, smc_release() may have already dismissed and released the smc socket before smc_cdc_tx_handler() further visits it. smc_cdc_tx_handler() |smc_release() if (!conn) | | |smc_cdc_tx_dismiss_slots() | smc_cdc_tx_dismisser() | |sock_put(&smc->sk) <- last sock_put, | smc_sock freed bh_lock_sock(&smc->sk) (panic) | To make sure we won't receive any CDC messages after we free the smc_sock, add a refcount on the smc_connection for inflight CDC message(posted to the QP but haven't received related CQE), and don't release the smc_connection until all the inflight CDC messages haven been done, for both success or failed ones. Using refcount on CDC messages brings another problem: when the link is going to be destroyed, smcr_link_clear() will reset the QP, which then remove all the pending CQEs related to the QP in the CQ. To make sure all the CQEs will always come back so the refcount on the smc_connection can always reach 0, smc_ib_modify_qp_reset() was replaced by smc_ib_modify_qp_error(). And remove the timeout in smc_wr_tx_wait_no_pending_sends() since we need to wait for all pending WQEs done, or we may encounter use-after- free when handling CQEs. For IB device removal routine, we need to wait for all the QPs on that device been destroyed before we can destroy CQs on the device, or the refcount on smc_connection won't reach 0 and smc_sock cannot be released. Fixes: 5f08318f617b ("smc: connection data control (CDC)") Reported-by: Wen Gu Signed-off-by: Dust Li Signed-off-by: David S. Miller --- net/smc/smc.h | 5 +++++ net/smc/smc_cdc.c | 52 +++++++++++++++++++++------------------------- net/smc/smc_cdc.h | 2 +- net/smc/smc_core.c | 25 +++++++++++++++++----- net/smc/smc_ib.c | 4 ++-- net/smc/smc_ib.h | 1 + net/smc/smc_wr.c | 41 +++--------------------------------- net/smc/smc_wr.h | 3 +-- 8 files changed, 57 insertions(+), 76 deletions(-) diff --git a/net/smc/smc.h b/net/smc/smc.h index f4286ca1f228..1a4fc1c6c4ab 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -180,6 +180,11 @@ struct smc_connection { u16 tx_cdc_seq; /* sequence # for CDC send */ u16 tx_cdc_seq_fin; /* sequence # - tx completed */ spinlock_t send_lock; /* protect wr_sends */ + atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe + * - inc when post wqe, + * - dec on polled tx cqe + */ + wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 99acd337ba90..84c8a4374fdd 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -31,10 +31,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, struct smc_sock *smc; int diff; - if (!conn) - /* already dismissed */ - return; - smc = container_of(conn, struct smc_sock, conn); bh_lock_sock(&smc->sk); if (!wc_status) { @@ -51,6 +47,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, conn); conn->tx_cdc_seq_fin = cdcpend->ctrl_seq; } + + if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) && + unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq))) + wake_up(&conn->cdc_pend_tx_wq); + WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0); + smc_tx_sndbuf_nonfull(smc); bh_unlock_sock(&smc->sk); } @@ -107,6 +109,10 @@ int smc_cdc_msg_send(struct smc_connection *conn, conn->tx_cdc_seq++; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); + + atomic_inc(&conn->cdc_pend_tx_wr); + smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); if (!rc) { smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); @@ -114,6 +120,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, } else { conn->tx_cdc_seq--; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; + atomic_dec(&conn->cdc_pend_tx_wr); } return rc; @@ -136,7 +143,18 @@ int smcr_cdc_msg_send_validation(struct smc_connection *conn, peer->token = htonl(local->token); peer->prod_flags.failover_validation = 1; + /* We need to set pend->conn here to make sure smc_cdc_tx_handler() + * can handle properly + */ + smc_cdc_add_pending_send(conn, pend); + + atomic_inc(&conn->cdc_pend_tx_wr); + smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); + if (unlikely(rc)) + atomic_dec(&conn->cdc_pend_tx_wr); + return rc; } @@ -193,31 +211,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) return rc; } -static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend, - unsigned long data) +void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn) { - struct smc_connection *conn = (struct smc_connection *)data; - struct smc_cdc_tx_pend *cdc_pend = - (struct smc_cdc_tx_pend *)tx_pend; - - return cdc_pend->conn == conn; -} - -static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend) -{ - struct smc_cdc_tx_pend *cdc_pend = - (struct smc_cdc_tx_pend *)tx_pend; - - cdc_pend->conn = NULL; -} - -void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) -{ - struct smc_link *link = conn->lnk; - - smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE, - smc_cdc_tx_filter, smc_cdc_tx_dismisser, - (unsigned long)conn); + wait_event(conn->cdc_pend_tx_wq, !atomic_read(&conn->cdc_pend_tx_wr)); } /* Send a SMC-D CDC header. diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 0a0a89abd38b..696cc11f2303 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -291,7 +291,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); -void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); +void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 55ca175e8d57..a6849362f4dd 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1127,7 +1127,7 @@ void smc_conn_free(struct smc_connection *conn) smc_ism_unset_conn(conn); tasklet_kill(&conn->rx_tsklet); } else { - smc_cdc_tx_dismiss_slots(conn); + smc_cdc_wait_pend_tx_wr(conn); if (current_work() != &conn->abort_work) cancel_work_sync(&conn->abort_work); } @@ -1204,7 +1204,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log) smc_llc_link_clear(lnk, log); smcr_buf_unmap_lgr(lnk); smcr_rtoken_clear_link(lnk); - smc_ib_modify_qp_reset(lnk); + smc_ib_modify_qp_error(lnk); smc_wr_free_link(lnk); smc_ib_destroy_queue_pair(lnk); smc_ib_dealloc_protection_domain(lnk); @@ -1336,7 +1336,7 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft) else tasklet_unlock_wait(&conn->rx_tsklet); } else { - smc_cdc_tx_dismiss_slots(conn); + smc_cdc_wait_pend_tx_wr(conn); } smc_lgr_unregister_conn(conn); smc_close_active_abort(smc); @@ -1459,11 +1459,16 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd) /* Called when an SMCR device is removed or the smc module is unloaded. * If smcibdev is given, all SMCR link groups using this device are terminated. * If smcibdev is NULL, all SMCR link groups are terminated. + * + * We must wait here for QPs been destroyed before we destroy the CQs, + * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus + * smc_sock cannot be released. */ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) { struct smc_link_group *lgr, *lg; LIST_HEAD(lgr_free_list); + LIST_HEAD(lgr_linkdown_list); int i; spin_lock_bh(&smc_lgr_list.lock); @@ -1475,7 +1480,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (lgr->lnk[i].smcibdev == smcibdev) - smcr_link_down_cond_sched(&lgr->lnk[i]); + list_move_tail(&lgr->list, &lgr_linkdown_list); } } } @@ -1487,6 +1492,16 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) __smc_lgr_terminate(lgr, false); } + list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) { + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].smcibdev == smcibdev) { + mutex_lock(&lgr->llc_conf_mutex); + smcr_link_down_cond(&lgr->lnk[i]); + mutex_unlock(&lgr->llc_conf_mutex); + } + } + } + if (smcibdev) { if (atomic_read(&smcibdev->lnk_cnt)) wait_event(smcibdev->lnks_deleted, @@ -1586,7 +1601,6 @@ static void smcr_link_down(struct smc_link *lnk) if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) return; - smc_ib_modify_qp_reset(lnk); to_lnk = smc_switch_conns(lgr, lnk, true); if (!to_lnk) { /* no backup link available */ smcr_link_clear(lnk, true); @@ -1824,6 +1838,7 @@ create: conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; + init_waitqueue_head(&conn->cdc_pend_tx_wq); INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); if (ini->is_smcd) { conn->rx_off = sizeof(struct smcd_cdc_msg); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index d93055ec17ae..fe5d5399c4e8 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -109,12 +109,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk) IB_QP_MAX_QP_RD_ATOMIC); } -int smc_ib_modify_qp_reset(struct smc_link *lnk) +int smc_ib_modify_qp_error(struct smc_link *lnk) { struct ib_qp_attr qp_attr; memset(&qp_attr, 0, sizeof(qp_attr)); - qp_attr.qp_state = IB_QPS_RESET; + qp_attr.qp_state = IB_QPS_ERR; return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE); } diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 07585937370e..bfa1c6bf6313 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -90,6 +90,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk); int smc_ib_ready_link(struct smc_link *lnk); int smc_ib_modify_qp_rts(struct smc_link *lnk); int smc_ib_modify_qp_reset(struct smc_link *lnk); +int smc_ib_modify_qp_error(struct smc_link *lnk); long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev); int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, struct smc_buf_desc *buf_slot, u8 link_idx); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index df1dc225cbab..c6cfdea8b71b 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -62,13 +62,9 @@ static inline bool smc_wr_is_tx_pend(struct smc_link *link) } /* wait till all pending tx work requests on the given link are completed */ -int smc_wr_tx_wait_no_pending_sends(struct smc_link *link) +void smc_wr_tx_wait_no_pending_sends(struct smc_link *link) { - if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link), - SMC_WR_TX_WAIT_PENDING_TIME)) - return 0; - else /* timeout */ - return -EPIPE; + wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link)); } static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id) @@ -87,7 +83,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) struct smc_wr_tx_pend pnd_snd; struct smc_link *link; u32 pnd_snd_idx; - int i; link = wc->qp->qp_context; @@ -128,14 +123,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) } if (wc->status) { - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - /* clear full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[i], 0, - sizeof(link->wr_tx_pends[i])); - memset(&link->wr_tx_bufs[i], 0, - sizeof(link->wr_tx_bufs[i])); - clear_bit(i, link->wr_tx_mask); - } if (link->lgr->smc_version == SMC_V2) { memset(link->wr_tx_v2_pend, 0, sizeof(*link->wr_tx_v2_pend)); @@ -421,25 +408,6 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) return rc; } -void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type, - smc_wr_tx_filter filter, - smc_wr_tx_dismisser dismisser, - unsigned long data) -{ - struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_tx; - int i; - - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i]; - if (wr_tx->type != wr_tx_hdr_type) - continue; - tx_pend = &link->wr_tx_pends[i].priv; - if (filter(tx_pend, data)) - dismisser(tx_pend); - } -} - /****************************** receive queue ********************************/ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) @@ -675,10 +643,7 @@ void smc_wr_free_link(struct smc_link *lnk) smc_wr_wakeup_reg_wait(lnk); smc_wr_wakeup_tx_wait(lnk); - if (smc_wr_tx_wait_no_pending_sends(lnk)) - memset(lnk->wr_tx_mask, 0, - BITS_TO_LONGS(SMC_WR_BUF_CNT) * - sizeof(*lnk->wr_tx_mask)); + smc_wr_tx_wait_no_pending_sends(lnk); wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt))); wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt))); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 48ed9b08ac7a..47512ccce5ef 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -22,7 +22,6 @@ #define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) -#define SMC_WR_TX_WAIT_PENDING_TIME (5 * HZ) #define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */ @@ -130,7 +129,7 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, unsigned long data); -int smc_wr_tx_wait_no_pending_sends(struct smc_link *link); +void smc_wr_tx_wait_no_pending_sends(struct smc_link *link); int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); int smc_wr_rx_post_init(struct smc_link *link); From 1e81dcc1ab7de7a789e60042ce82d5a612632599 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 13 Dec 2021 16:39:49 -0800 Subject: [PATCH 235/251] igc: Do not enable crosstimestamping for i225-V models It was reported that when PCIe PTM is enabled, some lockups could be observed with some integrated i225-V models. While the issue is investigated, we can disable crosstimestamp for those models and see no loss of functionality, because those models don't have any support for time synchronization. Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()") Link: https://lore.kernel.org/all/924175a188159f4e03bd69908a91e606b574139b.camel@gmx.de/ Reported-by: Stefan Dietrich Signed-off-by: Vinicius Costa Gomes Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 30568e3544cd..4f9245aa79a1 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -768,7 +768,20 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) */ static bool igc_is_crosststamp_supported(struct igc_adapter *adapter) { - return IS_ENABLED(CONFIG_X86_TSC) ? pcie_ptm_enabled(adapter->pdev) : false; + if (!IS_ENABLED(CONFIG_X86_TSC)) + return false; + + /* FIXME: it was noticed that enabling support for PCIe PTM in + * some i225-V models could cause lockups when bringing the + * interface up/down. There should be no downsides to + * disabling crosstimestamping support for i225-V, as it + * doesn't have any PTP support. That way we gain some time + * while root causing the issue. + */ + if (adapter->pdev->device == IGC_DEV_ID_I225_V) + return false; + + return pcie_ptm_enabled(adapter->pdev); } static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp) From f85846bbf43de38fb2c89fe7d2a085608c4eb25a Mon Sep 17 00:00:00 2001 From: James McLaughlin Date: Fri, 17 Dec 2021 16:49:33 -0700 Subject: [PATCH 236/251] igc: Fix TX timestamp support for non-MSI-X platforms Time synchronization was not properly enabled on non-MSI-X platforms. Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") Signed-off-by: James McLaughlin Reviewed-by: Vinicius Costa Gomes Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 8e448288ee26..d28a80a00953 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5467,6 +5467,9 @@ static irqreturn_t igc_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & IGC_ICR_TS) + igc_tsync_interrupt(adapter); + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -5510,6 +5513,9 @@ static irqreturn_t igc_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & IGC_ICR_TS) + igc_tsync_interrupt(adapter); + napi_schedule(&q_vector->napi); return IRQ_HANDLED; From 140c7bc7d1195750342ea0e6ab76179499ae7cd7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Dec 2021 15:06:17 +0100 Subject: [PATCH 237/251] ionic: Initialize the 'lif->dbid_inuse' bitmap When allocated, this bitmap is not initialized. Only the first bit is set a few lines below. Use bitmap_zalloc() to make sure that it is cleared before being used. Fixes: 6461b446f2a0 ("ionic: Add interrupts and doorbells") Signed-off-by: Christophe JAILLET Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/6a478eae0b5e6c63774e1f0ddb1a3f8c38fa8ade.1640527506.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 63f8a8163b5f..2ff7be17e5af 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3135,7 +3135,7 @@ int ionic_lif_init(struct ionic_lif *lif) return -EINVAL; } - lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL); + lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL); if (!lif->dbid_inuse) { dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n"); return -ENOMEM; From 077cdda764c7f147e03e6065ba0cd1dbc1bf00d1 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 22 Dec 2021 09:20:58 +0200 Subject: [PATCH 238/251] net/mlx5e: TC, Fix memory leak with rules with internal port Fix a memory leak with decap rule with internal port as destination device. The driver allocates a modify hdr action but doesn't set the flow attr modify hdr action which results in skipping releasing the modify hdr action when releasing the flow. backtrace: [<000000005f8c651c>] krealloc+0x83/0xd0 [<000000009f59b143>] alloc_mod_hdr_actions+0x156/0x310 [mlx5_core] [<000000002257f342>] mlx5e_tc_match_to_reg_set_and_get_id+0x12a/0x360 [mlx5_core] [<00000000b44ea75a>] mlx5e_tc_add_fdb_flow+0x962/0x1470 [mlx5_core] [<0000000003e384a0>] __mlx5e_add_fdb_flow+0x54c/0xb90 [mlx5_core] [<00000000ed8b22b6>] mlx5e_configure_flower+0xe45/0x4af0 [mlx5_core] [<00000000024f4ab5>] mlx5e_rep_indr_offload.isra.0+0xfe/0x1b0 [mlx5_core] [<000000006c3bb494>] mlx5e_rep_indr_setup_tc_cb+0x90/0x130 [mlx5_core] [<00000000d3dac2ea>] tc_setup_cb_add+0x1d2/0x420 Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device") Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a60c7680fd2b..5e454a14428f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1441,6 +1441,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, metadata); if (err) goto err_out; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } } From 992d8a4e38f0527f24e273ce3a9cd6dea1a6a436 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 29 Nov 2021 11:08:41 +0200 Subject: [PATCH 239/251] net/mlx5e: Fix wrong features assignment in case of error In case of an error in mlx5e_set_features(), 'netdev->features' must be updated with the correct state of the device to indicate which features were updated successfully. To do that we maintain a copy of 'netdev->features' and update it after successful feature changes, so we can assign it to back to 'netdev->features' if needed. However, since not all netdev features are handled by the driver (e.g. GRO/TSO/etc), some features may not be updated correctly in case of an error updating another feature. For example, while requesting to disable TSO (feature which is not handled by the driver) and enable HW-GRO, if an error occurs during HW-GRO enable, 'oper_features' will be assigned with 'netdev->features' and HW-GRO turned off. TSO will remain enabled in such case, which is a bug. To solve that, instead of using 'netdev->features' as the baseline of 'oper_features' and changing it on set feature success, use 'features' instead and update it in case of errors. Fixes: 75b81ce719b7 ("net/mlx5e: Don't override netdev features field unless in error flow") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3b0f3a831216..41379844eee1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3741,12 +3741,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable) static int mlx5e_handle_feature(struct net_device *netdev, netdev_features_t *features, - netdev_features_t wanted_features, netdev_features_t feature, mlx5e_feature_handler feature_handler) { - netdev_features_t changes = wanted_features ^ netdev->features; - bool enable = !!(wanted_features & feature); + netdev_features_t changes = *features ^ netdev->features; + bool enable = !!(*features & feature); int err; if (!(changes & feature)) @@ -3754,22 +3753,22 @@ static int mlx5e_handle_feature(struct net_device *netdev, err = feature_handler(netdev, enable); if (err) { + MLX5E_SET_FEATURE(features, feature, !enable); netdev_err(netdev, "%s feature %pNF failed, err %d\n", enable ? "Enable" : "Disable", &feature, err); return err; } - MLX5E_SET_FEATURE(features, feature, enable); return 0; } int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) { - netdev_features_t oper_features = netdev->features; + netdev_features_t oper_features = features; int err = 0; #define MLX5E_HANDLE_FEATURE(feature, handler) \ - mlx5e_handle_feature(netdev, &oper_features, features, feature, handler) + mlx5e_handle_feature(netdev, &oper_features, feature, handler) err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro); From 5bec7ca2be6955ca1aa0d7bae2b981de9b1c9844 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Mon, 20 Dec 2021 15:52:50 +0000 Subject: [PATCH 240/251] xsk: Initialise xskb free_list_node This commit initialises the xskb's free_list_node when the xskb is allocated. This prevents a potential false negative returned from a call to list_empty for that node, such as the one introduced in commit 199d983bc015 ("xsk: Fix crash on double free in buffer pool") In my environment this issue caused packets to not be received by the xdpsock application if the traffic was running prior to application launch. This happened when the first batch of packets failed the xskmap lookup and XDP_PASS was returned from the bpf program. This action is handled in the i40e zc driver (and others) by allocating an skbuff, freeing the xdp_buff and adding the associated xskb to the xsk_buff_pool's free_list if it hadn't been added already. Without this fix, the xskb is not added to the free_list because the check to determine if it was added already returns an invalid positive result. Later, this caused allocation errors in the driver and the failure to receive packets. Fixes: 199d983bc015 ("xsk: Fix crash on double free in buffer pool") Fixes: 2b43470add8c ("xsk: Introduce AF_XDP buffer allocation API") Signed-off-by: Ciara Loftus Acked-by: Magnus Karlsson Link: https://lore.kernel.org/r/20211220155250.2746-1-ciara.loftus@intel.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk_buff_pool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index bc4ad48ea4f0..fd39bb660ebc 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -83,6 +83,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, xskb = &pool->heads[i]; xskb->pool = pool; xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; + INIT_LIST_HEAD(&xskb->free_list_node); if (pool->unaligned) pool->free_heads[i] = xskb; else From fb7bc9204095090731430c8921f9e629740c110a Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 29 Dec 2021 15:09:47 -0500 Subject: [PATCH 241/251] ipv6: raw: check passed optlen before reading Add a check that the user-provided option is at least as long as the number of bytes we intend to read. Before this patch we would blindly read sizeof(int) bytes even in cases where the user passed optlen Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211229200947.2862255-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/raw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 60f1e4f5be5a..c51d5ce3711c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1020,6 +1020,9 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, struct raw6_sock *rp = raw6_sk(sk); int val; + if (optlen < sizeof(val)) + return -EINVAL; + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; From 99b40610956a8a8755653a67392e2a8b772453be Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 27 Dec 2021 19:21:15 +0200 Subject: [PATCH 242/251] net: bridge: mcast: add and enforce query interval minimum As reported[1] if query interval is set too low and we have multiple bridges or even a single bridge with multiple querier vlans configured we can crash the machine. Add a 1 second minimum which must be enforced by overwriting the value if set lower (i.e. without returning an error) to avoid breaking user-space. If that happens a log message is emitted to let the administrator know that the interval has been set to the minimum. The issue has been present since these intervals could be user-controlled. [1] https://lore.kernel.org/netdev/e8b9ce41-57b9-b6e2-a46a-ff9c791cf0ba@gmail.com/ Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries") Reported-by: Eric Dumazet Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 16 ++++++++++++++++ net/bridge/br_netlink.c | 2 +- net/bridge/br_private.h | 3 +++ net/bridge/br_sysfs_br.c | 2 +- net/bridge/br_vlan_options.c | 2 +- 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index f3d751105343..998da4a2d209 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4522,6 +4522,22 @@ int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx, } #endif +void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, + unsigned long val) +{ + unsigned long intvl_jiffies = clock_t_to_jiffies(val); + + if (intvl_jiffies < BR_MULTICAST_QUERY_INTVL_MIN) { + br_info(brmctx->br, + "trying to set multicast query interval below minimum, setting to %lu (%ums)\n", + jiffies_to_clock_t(BR_MULTICAST_QUERY_INTVL_MIN), + jiffies_to_msecs(BR_MULTICAST_QUERY_INTVL_MIN)); + intvl_jiffies = BR_MULTICAST_QUERY_INTVL_MIN; + } + + brmctx->multicast_query_interval = intvl_jiffies; +} + /** * br_multicast_list_adjacent - Returns snooped multicast addresses * @dev: The bridge port adjacent to which to retrieve addresses diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0c8b5f1a15bc..701dd8b8455e 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1357,7 +1357,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_MCAST_QUERY_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]); - br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); + br_multicast_set_query_intvl(&br->multicast_ctx, val); } if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c0efd697865a..4ed7f11042e8 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -28,6 +28,7 @@ #define BR_MAX_PORTS (1<multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); + br_multicast_set_query_intvl(&br->multicast_ctx, val); return 0; } diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index 8ffd4ed2563c..bf1ac0874279 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -521,7 +521,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, u64 val; val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]); - v->br_mcast_ctx.multicast_query_interval = clock_t_to_jiffies(val); + br_multicast_set_query_intvl(&v->br_mcast_ctx, val); *changed = true; } if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]) { From f83a112bd91a494cdee671aec74e777470fb4a07 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 27 Dec 2021 19:21:16 +0200 Subject: [PATCH 243/251] net: bridge: mcast: add and enforce startup query interval minimum As reported[1] if startup query interval is set too low in combination with large number of startup queries and we have multiple bridges or even a single bridge with multiple querier vlans configured we can crash the machine. Add a 1 second minimum which must be enforced by overwriting the value if set lower (i.e. without returning an error) to avoid breaking user-space. If that happens a log message is emitted to let the admin know that the startup interval has been set to the minimum. It doesn't make sense to make the startup interval lower than the normal query interval so use the same value of 1 second. The issue has been present since these intervals could be user-controlled. [1] https://lore.kernel.org/netdev/e8b9ce41-57b9-b6e2-a46a-ff9c791cf0ba@gmail.com/ Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries") Reported-by: Eric Dumazet Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 16 ++++++++++++++++ net/bridge/br_netlink.c | 2 +- net/bridge/br_private.h | 3 +++ net/bridge/br_sysfs_br.c | 2 +- net/bridge/br_vlan_options.c | 2 +- 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 998da4a2d209..de2409889489 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4538,6 +4538,22 @@ void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, brmctx->multicast_query_interval = intvl_jiffies; } +void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, + unsigned long val) +{ + unsigned long intvl_jiffies = clock_t_to_jiffies(val); + + if (intvl_jiffies < BR_MULTICAST_STARTUP_QUERY_INTVL_MIN) { + br_info(brmctx->br, + "trying to set multicast startup query interval below minimum, setting to %lu (%ums)\n", + jiffies_to_clock_t(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN), + jiffies_to_msecs(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN)); + intvl_jiffies = BR_MULTICAST_STARTUP_QUERY_INTVL_MIN; + } + + brmctx->multicast_startup_query_interval = intvl_jiffies; +} + /** * br_multicast_list_adjacent - Returns snooped multicast addresses * @dev: The bridge port adjacent to which to retrieve addresses diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 701dd8b8455e..2ff83d84230d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1369,7 +1369,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]); - br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); + br_multicast_set_startup_query_intvl(&br->multicast_ctx, val); } if (data[IFLA_BR_MCAST_STATS_ENABLED]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 4ed7f11042e8..2187a0c3fd22 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -29,6 +29,7 @@ #define BR_MULTICAST_DEFAULT_HASH_MAX 4096 #define BR_MULTICAST_QUERY_INTVL_MIN msecs_to_jiffies(1000) +#define BR_MULTICAST_STARTUP_QUERY_INTVL_MIN BR_MULTICAST_QUERY_INTVL_MIN #define BR_HWDOM_MAX BITS_PER_LONG @@ -966,6 +967,8 @@ size_t br_multicast_querier_state_size(void); size_t br_rports_size(const struct net_bridge_mcast *brmctx); void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, unsigned long val); +void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, + unsigned long val); static inline bool br_group_is_l2(const struct br_ip *group) { diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index f5bd1114a434..7b0c19772111 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -706,7 +706,7 @@ static ssize_t multicast_startup_query_interval_show( static int set_startup_query_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); + br_multicast_set_startup_query_intvl(&br->multicast_ctx, val); return 0; } diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index bf1ac0874279..a6382973b3e7 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -535,7 +535,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, u64 val; val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]); - v->br_mcast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); + br_multicast_set_startup_query_intvl(&v->br_mcast_ctx, val); *changed = true; } if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]) { From 9c1952aeaa98b3cfc49e2a79cb2c7d6a674213e9 Mon Sep 17 00:00:00 2001 From: wujianguo Date: Wed, 29 Dec 2021 18:58:10 +0800 Subject: [PATCH 244/251] selftests/net: udpgso_bench_tx: fix dst ip argument udpgso_bench_tx call setup_sockaddr() for dest address before parsing all arguments, if we specify "-p ${dst_port}" after "-D ${dst_ip}", then ${dst_port} will be ignored, and using default cfg_port 8000. This will cause test case "multiple GRO socks" failed in udpgro.sh. Setup sockaddr after parsing all arguments. Fixes: 3a687bef148d ("selftests: udp gso benchmark") Signed-off-by: Jianguo Wu Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/ff620d9f-5b52-06ab-5286-44b945453002@163.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso_bench_tx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index 17512a43885e..f1fdaa270291 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -419,6 +419,7 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + const char *bind_addr = NULL; int max_len, hdrlen; int c; @@ -446,7 +447,7 @@ static void parse_opts(int argc, char **argv) cfg_cpu = strtol(optarg, NULL, 0); break; case 'D': - setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + bind_addr = optarg; break; case 'l': cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; @@ -492,6 +493,11 @@ static void parse_opts(int argc, char **argv) } } + if (!bind_addr) + bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0"; + + setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr); + if (optind != argc) usage(argv[0]); From add25d6d6c85f7b6d00a055ee0a4169acf845681 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Wed, 29 Dec 2021 15:27:30 +0800 Subject: [PATCH 245/251] selftests: net: Fix a typo in udpgro_fwd.sh $rvs -> $rcv Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Jianguo Wu Link: https://lore.kernel.org/r/d247d7c8-a03a-0abf-3c71-4006a051d133@163.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgro_fwd.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 7f26591f236b..6a3985b8cd7f 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -132,7 +132,7 @@ run_test() { local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \ sed -e 's/\[//' -e 's/:.*//'` if [ $rcv != $pkts ]; then - echo " fail - received $rvs packets, expected $pkts" + echo " fail - received $rcv packets, expected $pkts" ret=1 return fi From e22e45fc9e41bf9fcc1e92cfb78eb92786728ef0 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Dec 2021 18:41:45 +0800 Subject: [PATCH 246/251] net: fix use-after-free in tw_timer_handler A real world panic issue was found as follow in Linux 5.4. BUG: unable to handle page fault for address: ffffde49a863de28 PGD 7e6fe62067 P4D 7e6fe62067 PUD 7e6fe63067 PMD f51e064067 PTE 0 RIP: 0010:tw_timer_handler+0x20/0x40 Call Trace: call_timer_fn+0x2b/0x120 run_timer_softirq+0x1ef/0x450 __do_softirq+0x10d/0x2b8 irq_exit+0xc7/0xd0 smp_apic_timer_interrupt+0x68/0x120 apic_timer_interrupt+0xf/0x20 This issue was also reported since 2017 in the thread [1], unfortunately, the issue was still can be reproduced after fixing DCCP. The ipv4_mib_exit_net is called before tcp_sk_exit_batch when a net namespace is destroyed since tcp_sk_ops is registered befrore ipv4_mib_ops, which means tcp_sk_ops is in the front of ipv4_mib_ops in the list of pernet_list. There will be a use-after-free on net->mib.net_statistics in tw_timer_handler after ipv4_mib_exit_net if there are some inflight time-wait timers. This bug is not introduced by commit f2bf415cfed7 ("mib: add net to NET_ADD_STATS_BH") since the net_statistics is a global variable instead of dynamic allocation and freeing. Actually, commit 61a7e26028b9 ("mib: put net statistics on struct net") introduces the bug since it put net statistics on struct net and free it when net namespace is destroyed. Moving init_ipv4_mibs() to the front of tcp_init() to fix this bug and replace pr_crit() with panic() since continuing is meaningless when init_ipv4_mibs() fails. [1] https://groups.google.com/g/syzkaller/c/p1tn-_Kc6l4/m/smuL_FMAAgAJ?pli=1 Fixes: 61a7e26028b9 ("mib: put net statistics on struct net") Signed-off-by: Muchun Song Cc: Cong Wang Cc: Fam Zheng Cc: Link: https://lore.kernel.org/r/20211228104145.9426-1-songmuchun@bytedance.com Signed-off-by: Jakub Kicinski --- net/ipv4/af_inet.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6b5956500436..5f70ffdae1b5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1994,6 +1994,10 @@ static int __init inet_init(void) ip_init(); + /* Initialise per-cpu ipv4 mibs */ + if (init_ipv4_mibs()) + panic("%s: Cannot init ipv4 mibs\n", __func__); + /* Setup TCP slab cache for open requests. */ tcp_init(); @@ -2024,12 +2028,6 @@ static int __init inet_init(void) if (init_inet_pernet_ops()) pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); - /* - * Initialise per-cpu ipv4 mibs - */ - - if (init_ipv4_mibs()) - pr_crit("%s: Cannot init ipv4 mibs\n", __func__); ipv4_proc_init(); From 168fed986b3a7ec7b98cab1fe84e2f282b9e6a8f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 28 Dec 2021 17:31:42 +0200 Subject: [PATCH 247/251] net: bridge: mcast: fix br_multicast_ctx_vlan_global_disabled helper We need to first check if the context is a vlan one, then we need to check the global bridge multicast vlan snooping flag, and finally the vlan's multicast flag, otherwise we will unnecessarily enable vlan mcast processing (e.g. querier timers). Fixes: 7b54aaaf53cb ("net: bridge: multicast: add vlan state initialization and control") Signed-off-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20211228153142.536969-1-nikolay@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_private.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2187a0c3fd22..e8c6ee322c71 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1153,9 +1153,9 @@ br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx) static inline bool br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx) { - return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && - br_multicast_ctx_is_vlan(brmctx) && - !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED); + return br_multicast_ctx_is_vlan(brmctx) && + (!br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) || + !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)); } static inline bool From 92a34ab169f9eefe29cd420ce96b0a0a2a1da853 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 29 Dec 2021 11:21:18 +0800 Subject: [PATCH 248/251] net/ncsi: check for error return from call to nla_put_u32 As we can see from the comment of the nla_put() that it could return -EMSGSIZE if the tailroom of the skb is insufficient. Therefore, it should be better to check the return value of the nla_put_u32 and return the error code if error accurs. Also, there are many other functions have the same problem, and if this patch is correct, I will commit a new version to fix all. Fixes: 955dc68cb9b2 ("net/ncsi: Add generic netlink family") Signed-off-by: Jiasheng Jiang Link: https://lore.kernel.org/r/20211229032118.1706294-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- net/ncsi/ncsi-netlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index bb5f1650f11c..c189b4c8a182 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -112,7 +112,11 @@ static int ncsi_write_package_info(struct sk_buff *skb, pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR); if (!pnest) return -ENOMEM; - nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + if (rc) { + nla_nest_cancel(skb, pnest); + return rc; + } if ((0x1 << np->id) == ndp->package_whitelist) nla_put_flag(skb, NCSI_PKG_ATTR_FORCED); cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST); From be1c5b53227ba8280f1ebb01c6f5da3c9eebdaad Mon Sep 17 00:00:00 2001 From: xu xin Date: Thu, 30 Dec 2021 03:28:56 +0000 Subject: [PATCH 249/251] Documentation: fix outdated interpretation of ip_no_pmtu_disc The updating way of pmtu has changed, but documentation is still in the old way. So this patch updates the interpretation of ip_no_pmtu_disc and min_pmtu. See commit 28d35bcdd3925 ("net: ipv4: don't let PMTU updates increase route MTU") Reported-by: Zeal Robot Signed-off-by: xu xin Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c04431144f7a..2572eecc3e86 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -25,7 +25,8 @@ ip_default_ttl - INTEGER ip_no_pmtu_disc - INTEGER Disable Path MTU Discovery. If enabled in mode 1 and a fragmentation-required ICMP is received, the PMTU to this - destination will be set to min_pmtu (see below). You will need + destination will be set to the smallest of the old MTU to + this destination and min_pmtu (see below). You will need to raise min_pmtu to the smallest interface MTU on your system manually if you want to avoid locally generated fragments. @@ -49,7 +50,8 @@ ip_no_pmtu_disc - INTEGER Default: FALSE min_pmtu - INTEGER - default 552 - minimum discovered Path MTU + default 552 - minimum Path MTU. Unless this is changed mannually, + each cached pmtu will never be lower than this setting. ip_forward_use_pmtu - BOOLEAN By default we don't trust protocol path MTUs while forwarding From 8b3170e07539855ee91bc5e2fa7780a4c9b5c7aa Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Thu, 30 Dec 2021 18:40:29 +0800 Subject: [PATCH 250/251] selftests: net: using ping6 for IPv6 in udpgro_fwd.sh udpgro_fwd.sh output following message: ping: 2001:db8:1::100: Address family for hostname not supported Using ping6 when pinging IPv6 addresses. Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Jianguo Wu Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro_fwd.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 6a3985b8cd7f..3ea73013d956 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -185,6 +185,7 @@ for family in 4 6; do IPT=iptables SUFFIX=24 VXDEV=vxlan + PING=ping if [ $family = 6 ]; then BM_NET=$BM_NET_V6 @@ -192,6 +193,7 @@ for family in 4 6; do SUFFIX="64 nodad" VXDEV=vxlan6 IPT=ip6tables + PING="ping6" fi echo "IPv$family" @@ -237,7 +239,7 @@ for family in 4 6; do # load arp cache before running the test to reduce the amount of # stray traffic on top of the UDP tunnel - ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null + ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST cleanup From bf2b09fedc17248b315f80fb249087b7d28a69a6 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 30 Dec 2021 12:26:27 +0000 Subject: [PATCH 251/251] fsl/fman: Fix missing put_device() call in fman_port_probe The reference taken by 'of_find_device_by_node()' must be released when not needed anymore. Add the corresponding 'put_device()' in the and error handling paths. Fixes: 18a6c85fcc78 ("fsl/fman: Add FMan Port Support") Signed-off-by: Miaoqian Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_port.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index d9baac0dbc7d..4c9d05c45c03 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1805,7 +1805,7 @@ static int fman_port_probe(struct platform_device *of_dev) fman = dev_get_drvdata(&fm_pdev->dev); if (!fman) { err = -EINVAL; - goto return_err; + goto put_device; } err = of_property_read_u32(port_node, "cell-index", &val); @@ -1813,7 +1813,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: reading cell-index for %pOF failed\n", __func__, port_node); err = -EINVAL; - goto return_err; + goto put_device; } port_id = (u8)val; port->dts_params.id = port_id; @@ -1847,7 +1847,7 @@ static int fman_port_probe(struct platform_device *of_dev) } else { dev_err(port->dev, "%s: Illegal port type\n", __func__); err = -EINVAL; - goto return_err; + goto put_device; } port->dts_params.type = port_type; @@ -1861,7 +1861,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: incorrect qman-channel-id\n", __func__); err = -EINVAL; - goto return_err; + goto put_device; } port->dts_params.qman_channel_id = qman_channel_id; } @@ -1871,7 +1871,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: of_address_to_resource() failed\n", __func__); err = -ENOMEM; - goto return_err; + goto put_device; } port->dts_params.fman = fman; @@ -1896,6 +1896,8 @@ static int fman_port_probe(struct platform_device *of_dev) return 0; +put_device: + put_device(&fm_pdev->dev); return_err: of_node_put(port_node); free_port: