From 6c9762a78c325107dc37d20ee21002b841679209 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Fri, 23 Apr 2021 15:54:02 +0200 Subject: [PATCH 001/730] ASoC: max98088: fix ni clock divider calculation The ni1/ni2 ratio formula [1] uses the pclk which is the prescaled mclk. The max98088 datasheet [2] has no such formula but table-12 equals so we can assume that it is the same for both devices. While on it make use of DIV_ROUND_CLOSEST_ULL(). [1] https://datasheets.maximintegrated.com/en/ds/MAX98089.pdf; page 86 [2] https://datasheets.maximintegrated.com/en/ds/MAX98088.pdf; page 82 Signed-off-by: Marco Felsch Link: https://lore.kernel.org/r/20210423135402.32105-1-m.felsch@pengutronix.de Signed-off-by: Mark Brown --- sound/soc/codecs/max98088.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c index 4be24e7f51c8..f8e49e45ce33 100644 --- a/sound/soc/codecs/max98088.c +++ b/sound/soc/codecs/max98088.c @@ -41,6 +41,7 @@ struct max98088_priv { enum max98088_type devtype; struct max98088_pdata *pdata; struct clk *mclk; + unsigned char mclk_prescaler; unsigned int sysclk; struct max98088_cdata dai[2]; int eq_textcnt; @@ -998,13 +999,16 @@ static int max98088_dai1_hw_params(struct snd_pcm_substream *substream, /* Configure NI when operating as master */ if (snd_soc_component_read(component, M98088_REG_14_DAI1_FORMAT) & M98088_DAI_MAS) { + unsigned long pclk; + if (max98088->sysclk == 0) { dev_err(component->dev, "Invalid system clock frequency\n"); return -EINVAL; } ni = 65536ULL * (rate < 50000 ? 96ULL : 48ULL) * (unsigned long long int)rate; - do_div(ni, (unsigned long long int)max98088->sysclk); + pclk = DIV_ROUND_CLOSEST(max98088->sysclk, max98088->mclk_prescaler); + ni = DIV_ROUND_CLOSEST_ULL(ni, pclk); snd_soc_component_write(component, M98088_REG_12_DAI1_CLKCFG_HI, (ni >> 8) & 0x7F); snd_soc_component_write(component, M98088_REG_13_DAI1_CLKCFG_LO, @@ -1065,13 +1069,16 @@ static int max98088_dai2_hw_params(struct snd_pcm_substream *substream, /* Configure NI when operating as master */ if (snd_soc_component_read(component, M98088_REG_1C_DAI2_FORMAT) & M98088_DAI_MAS) { + unsigned long pclk; + if (max98088->sysclk == 0) { dev_err(component->dev, "Invalid system clock frequency\n"); return -EINVAL; } ni = 65536ULL * (rate < 50000 ? 96ULL : 48ULL) * (unsigned long long int)rate; - do_div(ni, (unsigned long long int)max98088->sysclk); + pclk = DIV_ROUND_CLOSEST(max98088->sysclk, max98088->mclk_prescaler); + ni = DIV_ROUND_CLOSEST_ULL(ni, pclk); snd_soc_component_write(component, M98088_REG_1A_DAI2_CLKCFG_HI, (ni >> 8) & 0x7F); snd_soc_component_write(component, M98088_REG_1B_DAI2_CLKCFG_LO, @@ -1113,8 +1120,10 @@ static int max98088_dai_set_sysclk(struct snd_soc_dai *dai, */ if ((freq >= 10000000) && (freq < 20000000)) { snd_soc_component_write(component, M98088_REG_10_SYS_CLK, 0x10); + max98088->mclk_prescaler = 1; } else if ((freq >= 20000000) && (freq < 30000000)) { snd_soc_component_write(component, M98088_REG_10_SYS_CLK, 0x20); + max98088->mclk_prescaler = 2; } else { dev_err(component->dev, "Invalid master clock frequency\n"); return -EINVAL; From 366db3ac3cdf97e90695282b959c75d5ea58cf00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 21 Apr 2021 17:02:20 +0200 Subject: [PATCH 002/730] arm64: dts: renesas: aistarvision-mipi-adapter-2.1: Fix CSI40 ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the DTS schema by explicitly stating that the input is port@0. This fixes a schema validation error but has no runtime effect as the default port number is 0 if not specified. Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20210421150221.3202955-2-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Geert Uytterhoeven --- arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts index e7b4a929bb17..2e3d1981cac4 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts +++ b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts @@ -33,7 +33,7 @@ status = "okay"; ports { - port { + port@0 { csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; From 0a96c05995ef1085f9c5e6bf005a04915dd2ec6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 21 Apr 2021 17:02:21 +0200 Subject: [PATCH 003/730] arm64: dts: renesas: Add port@0 node for all CSI-2 nodes to dtsi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The port@0 is a mandatory port, add or move the declaration to the CSI-2 nodes top declared in dtsi files instead of depending on dts files adding them when describing the external connection. This fixes validation warnings for DTB outputs that do not connect all CSI-2 receivers to transmitters and thus declaring all port@0 nodes in dts files. Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20210421150221.3202955-3-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Geert Uytterhoeven --- ...hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi | 2 -- arch/arm64/boot/dts/renesas/r8a774a1.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a774b1.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a774c0.dtsi | 4 ++++ arch/arm64/boot/dts/renesas/r8a774e1.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77950.dtsi | 4 ++++ arch/arm64/boot/dts/renesas/r8a77951.dtsi | 12 ++++++++++++ arch/arm64/boot/dts/renesas/r8a77960.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77961.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77965.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77970.dtsi | 4 ++++ arch/arm64/boot/dts/renesas/r8a77980.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts | 2 -- arch/arm64/boot/dts/renesas/r8a77990.dtsi | 4 ++++ arch/arm64/boot/dts/renesas/salvator-common.dtsi | 3 --- 15 files changed, 84 insertions(+), 7 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi index c62ddb9b2ba5..3771144a2ce4 100644 --- a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi +++ b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi @@ -14,7 +14,6 @@ ports { port@0 { - reg = <0>; csi20_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; @@ -29,7 +28,6 @@ ports { port@0 { - reg = <0>; csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi index d64fb8b1b86c..46f8dbf68904 100644 --- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi @@ -2573,6 +2573,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2628,6 +2632,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi index 5b05474dc272..d16a4be5ef77 100644 --- a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi @@ -2419,6 +2419,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2474,6 +2478,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi index 20fa3caa050e..1aef34447abd 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi @@ -1823,6 +1823,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi index 8eb006cbd9af..1f51237ab0a6 100644 --- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi @@ -2709,6 +2709,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2764,6 +2768,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77950.dtsi b/arch/arm64/boot/dts/renesas/r8a77950.dtsi index 25b87da32eeb..b643d3079db1 100644 --- a/arch/arm64/boot/dts/renesas/r8a77950.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77950.dtsi @@ -192,6 +192,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77951.dtsi b/arch/arm64/boot/dts/renesas/r8a77951.dtsi index 5c39152e4570..85d66d15465a 100644 --- a/arch/arm64/boot/dts/renesas/r8a77951.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77951.dtsi @@ -3097,6 +3097,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -3152,6 +3156,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -3191,6 +3199,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77960.dtsi b/arch/arm64/boot/dts/renesas/r8a77960.dtsi index 25d947a81b29..12476e354d74 100644 --- a/arch/arm64/boot/dts/renesas/r8a77960.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77960.dtsi @@ -2761,6 +2761,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2816,6 +2820,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77961.dtsi b/arch/arm64/boot/dts/renesas/r8a77961.dtsi index ab081f14af9a..d9804768425a 100644 --- a/arch/arm64/boot/dts/renesas/r8a77961.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77961.dtsi @@ -2499,6 +2499,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2554,6 +2558,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi index 657b20d3533b..dcb9df861d74 100644 --- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi @@ -2575,6 +2575,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2630,6 +2634,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi index 5a5d5649332a..e8f6352c3665 100644 --- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi @@ -1106,6 +1106,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi index 1ffa4a995a7a..7b51d464de0e 100644 --- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi @@ -1439,6 +1439,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -1478,6 +1482,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts index 295d34f1d216..4715e4a4abe0 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts +++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts @@ -298,8 +298,6 @@ ports { port@0 { - reg = <0>; - csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi index 5010f23fafcc..0eaea58f4210 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi @@ -1970,6 +1970,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index e18747df219f..453ffcef24fa 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -349,7 +349,6 @@ ports { port@0 { - reg = <0>; csi20_in: endpoint { clock-lanes = <0>; data-lanes = <1>; @@ -364,8 +363,6 @@ ports { port@0 { - reg = <0>; - csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2 3 4>; From d9cd78edb2e6b7e26747c0ec312be31e7ef196fe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 22 Apr 2021 12:02:29 +0300 Subject: [PATCH 004/730] firmware: arm_scpi: Prevent the ternary sign expansion bug How the type promotion works in ternary expressions is a bit tricky. The problem is that scpi_clk_get_val() returns longs, "ret" is a int which holds a negative error code, and le32_to_cpu() is an unsigned int. We want the negative error code to be cast to a negative long. But because le32_to_cpu() is an u32 then "ret" is type promoted to u32 and becomes a high positive and then it is promoted to long and it is still a high positive value. Fix this by getting rid of the ternary. Link: https://lore.kernel.org/r/YIE7pdqV/h10tEAK@mwanda Fixes: 8cb7cf56c9fe ("firmware: add support for ARM System Control and Power Interface(SCPI) protocol") Reviewed-by: Cristian Marussi Signed-off-by: Dan Carpenter [sudeep.holla: changed to return 0 as clock rate on error] Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scpi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c index d0dee37ad522..4ceba5ef7895 100644 --- a/drivers/firmware/arm_scpi.c +++ b/drivers/firmware/arm_scpi.c @@ -552,8 +552,10 @@ static unsigned long scpi_clk_get_val(u16 clk_id) ret = scpi_send_message(CMD_GET_CLOCK_VALUE, &le_clk_id, sizeof(le_clk_id), &rate, sizeof(rate)); + if (ret) + return 0; - return ret ? ret : le32_to_cpu(rate); + return le32_to_cpu(rate); } static int scpi_clk_set_val(u16 clk_id, unsigned long rate) From 03f840c49207e8c125b3df8c29c13137c6675d42 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 27 Apr 2021 11:30:31 +0800 Subject: [PATCH 005/730] firmware: arm_scmi: Remove duplicate declaration of struct scmi_protocol_handle struct scmi_protocol_handle is declared twice, let us remove the duplicate declaration. Link: https://lore.kernel.org/r/20210427033031.4580-1-wanjiabing@vivo.com Reviewed-by: Cristian Marussi Signed-off-by: Wan Jiabing [sudeep.holla: minor updates to the title and the changelog] Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/notify.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/notify.h b/drivers/firmware/arm_scmi/notify.h index ce0324be6c71..4e9b627edfef 100644 --- a/drivers/firmware/arm_scmi/notify.h +++ b/drivers/firmware/arm_scmi/notify.h @@ -79,8 +79,6 @@ struct scmi_protocol_events { int scmi_notification_init(struct scmi_handle *handle); void scmi_notification_exit(struct scmi_handle *handle); - -struct scmi_protocol_handle; int scmi_register_protocol_events(const struct scmi_handle *handle, u8 proto_id, const struct scmi_protocol_handle *ph, const struct scmi_protocol_events *ee); From 774cda6f12d5ad11410c4cda223554c3735ee862 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 27 Apr 2021 22:59:55 +0200 Subject: [PATCH 006/730] dt-bindings: nvmem: mediatek: remove duplicate mt8192 line The same patch was accidentally merged twice, resulting in a duplicate line for the mt8192 SoC. Fixes: f2674c0c7488 ("dt-bindings: nvmem: mediatek: add support for MediaTek mt8192 SoC") Fixes: 2a1405a14c3a ("dt-bindings: nvmem: mediatek: add support for MediaTek mt8192 SoC") Signed-off-by: Arnd Bergmann --- Documentation/devicetree/bindings/nvmem/mtk-efuse.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt b/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt index d479ad977e24..b6791702bcfc 100644 --- a/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt +++ b/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt @@ -9,7 +9,6 @@ Required properties: "mediatek,mt8173-efuse" or "mediatek,efuse": for MT8173 "mediatek,mt8192-efuse", "mediatek,efuse": for MT8192 "mediatek,mt8516-efuse", "mediatek,efuse": for MT8516 - "mediatek,mt8192-efuse", "mediatek,efuse": for MT8192 - reg: Should contain registers location and length = Data cells = From c019d92457826bb7b2091c86f36adb5de08405f9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 23 Apr 2021 17:09:28 +0200 Subject: [PATCH 007/730] openrisc: Fix a memory leak 'setup_find_cpu_node()' take a reference on the node it returns. This reference must be decremented when not needed anymore, or there will be a leak. Add the missing 'of_node_put(cpu)'. Note that 'setup_cpuinfo()' that also calls this function already has a correct 'of_node_put(cpu)' at its end. Fixes: 9d02a4283e9c ("OpenRISC: Boot code") Signed-off-by: Christophe JAILLET Signed-off-by: Stafford Horne --- arch/openrisc/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 2416a9f91533..c6f9e7b9f7cb 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -278,6 +278,8 @@ void calibrate_delay(void) pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n", loops_per_jiffy / (500000 / HZ), (loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy); + + of_node_put(cpu); } void __init setup_arch(char **cmdline_p) From a0695853e5906a9558eef9f79856e07659b7a1e6 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 28 Apr 2021 14:26:31 +0200 Subject: [PATCH 008/730] ASoC: stm32: do not request a new clock consummer reference This reverts commit 65d1cce726d4912793d0a84c55ecdb0ef5832130. There is problem with clk_hw_get_hw(). Using it pins the clock provider to itself, making it impossible to remove the module. Revert commit 65d1cce726d4 ("ASoC: stm32: properly get clk from the provider") until this gets sorted out. Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20210428122632.46244-2-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index c1561237ee24..3aa1cf262402 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -484,10 +484,7 @@ static int stm32_sai_add_mclk_provider(struct stm32_sai_sub_data *sai) dev_err(dev, "mclk register returned %d\n", ret); return ret; } - - sai->sai_mclk = devm_clk_hw_get_clk(dev, hw, NULL); - if (IS_ERR(sai->sai_mclk)) - return PTR_ERR(sai->sai_mclk); + sai->sai_mclk = hw->clk; /* register mclk provider */ return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw); From 97c733654ab4a5ac910216b4b74e605acf3e1cce Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 28 Apr 2021 14:26:32 +0200 Subject: [PATCH 009/730] ASoC: da7219: do not request a new clock consummer reference This reverts commit 12f8127fe9e6154dd4197df97e44f3fd67583071. There is problem with clk_hw_get_hw(). Using it pins the clock provider to itself, making it impossible to remove the module. Revert commit 12f8127fe9e6 ("ASoC: da7219: properly get clk from the provider") until this gets sorted out. Reported-by: Pierre-Louis Bossart Signed-off-by: Jerome Brunet Tested-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210428122632.46244-3-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/codecs/da7219.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sound/soc/codecs/da7219.c b/sound/soc/codecs/da7219.c index bd3c523a8617..13009d08b09a 100644 --- a/sound/soc/codecs/da7219.c +++ b/sound/soc/codecs/da7219.c @@ -2181,10 +2181,7 @@ static int da7219_register_dai_clks(struct snd_soc_component *component) ret); goto err; } - - da7219->dai_clks[i] = devm_clk_hw_get_clk(dev, dai_clk_hw, NULL); - if (IS_ERR(da7219->dai_clks[i])) - return PTR_ERR(da7219->dai_clks[i]); + da7219->dai_clks[i] = dai_clk_hw->clk; /* For DT setup onecell data, otherwise create lookup */ if (np) { From 6879e8e759bf9e05eaee85e32ca1a936e6b46da1 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Wed, 28 Apr 2021 01:53:31 +0530 Subject: [PATCH 010/730] ASoC: amd: fix for pcm_read() error Below phython script throwing pcm_read() error. import subprocess p = subprocess.Popen(["aplay -t raw -D plughw:1,0 /dev/zero"], shell=True) subprocess.call(["arecord -Dhw:1,0 --dump-hw-params"], shell=True) subprocess.call(["arecord -Dhw:1,0 -fdat -d1 /dev/null"], shell=True) p.kill() Handling ACP global external interrupt enable register causing this issue. This register got updated wrongly when there is active stream causing interrupts disabled for active stream. Refactored code to handle enabling and disabling external interrupts. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/1619555017-29858-1-git-send-email-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/raven/acp3x-pcm-dma.c | 10 ---------- sound/soc/amd/raven/acp3x.h | 1 + sound/soc/amd/raven/pci-acp3x.c | 15 +++++++++++++++ 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/sound/soc/amd/raven/acp3x-pcm-dma.c b/sound/soc/amd/raven/acp3x-pcm-dma.c index 417cda24030c..2447a1e6e913 100644 --- a/sound/soc/amd/raven/acp3x-pcm-dma.c +++ b/sound/soc/amd/raven/acp3x-pcm-dma.c @@ -237,10 +237,6 @@ static int acp3x_dma_open(struct snd_soc_component *component, return ret; } - if (!adata->play_stream && !adata->capture_stream && - !adata->i2ssp_play_stream && !adata->i2ssp_capture_stream) - rv_writel(1, adata->acp3x_base + mmACP_EXTERNAL_INTR_ENB); - i2s_data->acp3x_base = adata->acp3x_base; runtime->private_data = i2s_data; return ret; @@ -367,12 +363,6 @@ static int acp3x_dma_close(struct snd_soc_component *component, } } - /* Disable ACP irq, when the current stream is being closed and - * another stream is also not active. - */ - if (!adata->play_stream && !adata->capture_stream && - !adata->i2ssp_play_stream && !adata->i2ssp_capture_stream) - rv_writel(0, adata->acp3x_base + mmACP_EXTERNAL_INTR_ENB); return 0; } diff --git a/sound/soc/amd/raven/acp3x.h b/sound/soc/amd/raven/acp3x.h index 03fe93913e12..c3f0c8b7545d 100644 --- a/sound/soc/amd/raven/acp3x.h +++ b/sound/soc/amd/raven/acp3x.h @@ -77,6 +77,7 @@ #define ACP_POWER_OFF_IN_PROGRESS 0x03 #define ACP3x_ITER_IRER_SAMP_LEN_MASK 0x38 +#define ACP_EXT_INTR_STAT_CLEAR_MASK 0xFFFFFFFF struct acp3x_platform_info { u16 play_i2s_instance; diff --git a/sound/soc/amd/raven/pci-acp3x.c b/sound/soc/amd/raven/pci-acp3x.c index d3536fd6a124..a013a607b3d4 100644 --- a/sound/soc/amd/raven/pci-acp3x.c +++ b/sound/soc/amd/raven/pci-acp3x.c @@ -76,6 +76,19 @@ static int acp3x_reset(void __iomem *acp3x_base) return -ETIMEDOUT; } +static void acp3x_enable_interrupts(void __iomem *acp_base) +{ + rv_writel(0x01, acp_base + mmACP_EXTERNAL_INTR_ENB); +} + +static void acp3x_disable_interrupts(void __iomem *acp_base) +{ + rv_writel(ACP_EXT_INTR_STAT_CLEAR_MASK, acp_base + + mmACP_EXTERNAL_INTR_STAT); + rv_writel(0x00, acp_base + mmACP_EXTERNAL_INTR_CNTL); + rv_writel(0x00, acp_base + mmACP_EXTERNAL_INTR_ENB); +} + static int acp3x_init(struct acp3x_dev_data *adata) { void __iomem *acp3x_base = adata->acp3x_base; @@ -93,6 +106,7 @@ static int acp3x_init(struct acp3x_dev_data *adata) pr_err("ACP3x reset failed\n"); return ret; } + acp3x_enable_interrupts(acp3x_base); return 0; } @@ -100,6 +114,7 @@ static int acp3x_deinit(void __iomem *acp3x_base) { int ret; + acp3x_disable_interrupts(acp3x_base); /* Reset */ ret = acp3x_reset(acp3x_base); if (ret) { From c7299fea67696db5bd09d924d1f1080d894f92ef Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 26 Apr 2021 16:56:38 -0700 Subject: [PATCH 011/730] spi: Fix spi device unregister flow When an SPI device is unregistered, the spi->controller->cleanup() is called in the device's release callback. That's wrong for a couple of reasons: 1. spi_dev_put() can be called before spi_add_device() is called. And it's spi_add_device() that calls spi_setup(). This will cause clean() to get called without the spi device ever being setup. 2. There's no guarantee that the controller's driver would be present by the time the spi device's release function gets called. 3. It also causes "sleeping in atomic context" stack dump[1] when device link deletion code does a put_device() on the spi device. Fix these issues by simply moving the cleanup from the device release callback to the actual spi_unregister_device() function. [1] - https://lore.kernel.org/lkml/CAHp75Vc=FCGcUyS0v6fnxme2YJ+qD+Y-hQDQLa2JhWNON9VmsQ@mail.gmail.com/ Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20210426235638.1285530-1-saravanak@google.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index ba425b9c7700..f9885c096563 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -47,10 +47,6 @@ static void spidev_release(struct device *dev) { struct spi_device *spi = to_spi_device(dev); - /* spi controllers may cleanup for released devices */ - if (spi->controller->cleanup) - spi->controller->cleanup(spi); - spi_controller_put(spi->controller); kfree(spi->driver_override); kfree(spi); @@ -558,6 +554,12 @@ static int spi_dev_check(struct device *dev, void *data) return 0; } +static void spi_cleanup(struct spi_device *spi) +{ + if (spi->controller->cleanup) + spi->controller->cleanup(spi); +} + /** * spi_add_device - Add spi_device allocated with spi_alloc_device * @spi: spi_device to register @@ -622,11 +624,13 @@ int spi_add_device(struct spi_device *spi) /* Device may be bound to an active driver when this returns */ status = device_add(&spi->dev); - if (status < 0) + if (status < 0) { dev_err(dev, "can't add %s, status %d\n", dev_name(&spi->dev), status); - else + spi_cleanup(spi); + } else { dev_dbg(dev, "registered child %s\n", dev_name(&spi->dev)); + } done: mutex_unlock(&spi_add_lock); @@ -710,6 +714,8 @@ void spi_unregister_device(struct spi_device *spi) if (!spi) return; + spi_cleanup(spi); + if (spi->dev.of_node) { of_node_clear_flag(spi->dev.of_node, OF_POPULATED); of_node_put(spi->dev.of_node); From 41f48a29ebd5ce944e412f491f1876b5abeff1d6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 27 Apr 2021 16:38:42 +0200 Subject: [PATCH 012/730] spi: altera: Make SPI_ALTERA_CORE invisible The SPI_ALTERA_CORE config symbol controls compilation of the Altera SPI Controller core code. It is already selected by all of its users, so there is no reason to make it visible, unless compile-testing. Fixes: b0c3d9354de1f87e ("spi: altera: separate core code from platform code") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/f0cb8e66baba4506db6f42fca74dc51b76883507.1619534253.git.geert+renesas@glider.be Signed-off-by: Mark Brown --- drivers/spi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 8b161ec4943b..f4481fe48bf0 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -65,7 +65,7 @@ config SPI_ALTERA This is the driver for the Altera SPI Controller. config SPI_ALTERA_CORE - tristate "Altera SPI Controller core code" + tristate "Altera SPI Controller core code" if COMPILE_TEST select REGMAP help "The core code for the Altera SPI Controller" From 121271f08809e5dc01d15d3e529988ac5d740af6 Mon Sep 17 00:00:00 2001 From: Amit Kumar Mahapatra Date: Wed, 28 Apr 2021 23:38:01 -0600 Subject: [PATCH 013/730] spi: spi-zynq-qspi: Fix kernel-doc warning Fix kernel-doc warning. Signed-off-by: Amit Kumar Mahapatra Link: https://lore.kernel.org/r/20210429053802.17650-2-amit.kumar-mahapatra@xilinx.com Signed-off-by: Mark Brown --- drivers/spi/spi-zynq-qspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c index 5d8a5ee62fa2..1acde9e24973 100644 --- a/drivers/spi/spi-zynq-qspi.c +++ b/drivers/spi/spi-zynq-qspi.c @@ -367,7 +367,7 @@ static int zynq_qspi_config_op(struct zynq_qspi *xqspi, struct spi_device *spi) } /** - * zynq_qspi_setup - Configure the QSPI controller + * zynq_qspi_setup_op - Configure the QSPI controller * @spi: Pointer to the spi_device structure * * Sets the operational mode of QSPI controller for the next QSPI transfer, baud From 6d5ff8e632a4f2389c331e5554cd1c2a9a28c7aa Mon Sep 17 00:00:00 2001 From: Karen Dombroski Date: Wed, 28 Apr 2021 23:38:02 -0600 Subject: [PATCH 014/730] spi: spi-zynq-qspi: Fix stack violation bug When the number of bytes for the op is greater than one, the read could run off the end of the function stack and cause a crash. This patch restores the behaviour of safely reading out of the original opcode location. Signed-off-by: Karen Dombroski Signed-off-by: Amit Kumar Mahapatra Link: https://lore.kernel.org/r/20210429053802.17650-3-amit.kumar-mahapatra@xilinx.com Signed-off-by: Mark Brown --- drivers/spi/spi-zynq-qspi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c index 1acde9e24973..5a3d81c31d04 100644 --- a/drivers/spi/spi-zynq-qspi.c +++ b/drivers/spi/spi-zynq-qspi.c @@ -528,18 +528,17 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem, struct zynq_qspi *xqspi = spi_controller_get_devdata(mem->spi->master); int err = 0, i; u8 *tmpbuf; - u8 opcode = op->cmd.opcode; dev_dbg(xqspi->dev, "cmd:%#x mode:%d.%d.%d.%d\n", - opcode, op->cmd.buswidth, op->addr.buswidth, + op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth, op->dummy.buswidth, op->data.buswidth); zynq_qspi_chipselect(mem->spi, true); zynq_qspi_config_op(xqspi, mem->spi); - if (op->cmd.nbytes) { + if (op->cmd.opcode) { reinit_completion(&xqspi->data_completion); - xqspi->txbuf = &opcode; + xqspi->txbuf = (u8 *)&op->cmd.opcode; xqspi->rxbuf = NULL; xqspi->tx_bytes = op->cmd.nbytes; xqspi->rx_bytes = op->cmd.nbytes; From 9f015b3765bf593b3ed5d3b588e409dc0ffa9f85 Mon Sep 17 00:00:00 2001 From: Rijo Thomas Date: Wed, 14 Apr 2021 23:08:27 +0530 Subject: [PATCH 015/730] tee: amdtee: unload TA only when its refcount becomes 0 Same Trusted Application (TA) can be loaded in multiple TEE contexts. If it is a single instance TA, the TA should not get unloaded from AMD Secure Processor, while it is still in use in another TEE context. Therefore reference count TA and unload it when the count becomes zero. Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver") Reviewed-by: Devaraj Rangasamy Signed-off-by: Rijo Thomas Acked-by: Dan Carpenter Signed-off-by: Jens Wiklander --- drivers/tee/amdtee/amdtee_private.h | 13 ++++ drivers/tee/amdtee/call.c | 94 ++++++++++++++++++++++++++--- drivers/tee/amdtee/core.c | 15 +++-- 3 files changed, 106 insertions(+), 16 deletions(-) diff --git a/drivers/tee/amdtee/amdtee_private.h b/drivers/tee/amdtee/amdtee_private.h index 337c8d82f74e..6d0f7062bb87 100644 --- a/drivers/tee/amdtee/amdtee_private.h +++ b/drivers/tee/amdtee/amdtee_private.h @@ -21,6 +21,7 @@ #define TEEC_SUCCESS 0x00000000 #define TEEC_ERROR_GENERIC 0xFFFF0000 #define TEEC_ERROR_BAD_PARAMETERS 0xFFFF0006 +#define TEEC_ERROR_OUT_OF_MEMORY 0xFFFF000C #define TEEC_ERROR_COMMUNICATION 0xFFFF000E #define TEEC_ORIGIN_COMMS 0x00000002 @@ -93,6 +94,18 @@ struct amdtee_shm_data { u32 buf_id; }; +/** + * struct amdtee_ta_data - Keeps track of all TAs loaded in AMD Secure + * Processor + * @ta_handle: Handle to TA loaded in TEE + * @refcount: Reference count for the loaded TA + */ +struct amdtee_ta_data { + struct list_head list_node; + u32 ta_handle; + u32 refcount; +}; + #define LOWER_TWO_BYTE_MASK 0x0000FFFF /** diff --git a/drivers/tee/amdtee/call.c b/drivers/tee/amdtee/call.c index 096dd4d92d39..07f36ac834c8 100644 --- a/drivers/tee/amdtee/call.c +++ b/drivers/tee/amdtee/call.c @@ -121,15 +121,69 @@ static int amd_params_to_tee_params(struct tee_param *tee, u32 count, return ret; } +static DEFINE_MUTEX(ta_refcount_mutex); +static struct list_head ta_list = LIST_HEAD_INIT(ta_list); + +static u32 get_ta_refcount(u32 ta_handle) +{ + struct amdtee_ta_data *ta_data; + u32 count = 0; + + /* Caller must hold a mutex */ + list_for_each_entry(ta_data, &ta_list, list_node) + if (ta_data->ta_handle == ta_handle) + return ++ta_data->refcount; + + ta_data = kzalloc(sizeof(*ta_data), GFP_KERNEL); + if (ta_data) { + ta_data->ta_handle = ta_handle; + ta_data->refcount = 1; + count = ta_data->refcount; + list_add(&ta_data->list_node, &ta_list); + } + + return count; +} + +static u32 put_ta_refcount(u32 ta_handle) +{ + struct amdtee_ta_data *ta_data; + u32 count = 0; + + /* Caller must hold a mutex */ + list_for_each_entry(ta_data, &ta_list, list_node) + if (ta_data->ta_handle == ta_handle) { + count = --ta_data->refcount; + if (count == 0) { + list_del(&ta_data->list_node); + kfree(ta_data); + break; + } + } + + return count; +} + int handle_unload_ta(u32 ta_handle) { struct tee_cmd_unload_ta cmd = {0}; - u32 status; + u32 status, count; int ret; if (!ta_handle) return -EINVAL; + mutex_lock(&ta_refcount_mutex); + + count = put_ta_refcount(ta_handle); + + if (count) { + pr_debug("unload ta: not unloading %u count %u\n", + ta_handle, count); + ret = -EBUSY; + goto unlock; + } + cmd.ta_handle = ta_handle; ret = psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA, (void *)&cmd, @@ -137,8 +191,12 @@ int handle_unload_ta(u32 ta_handle) if (!ret && status != 0) { pr_err("unload ta: status = 0x%x\n", status); ret = -EBUSY; + } else { + pr_debug("unloaded ta handle %u\n", ta_handle); } +unlock: + mutex_unlock(&ta_refcount_mutex); return ret; } @@ -340,7 +398,8 @@ int handle_open_session(struct tee_ioctl_open_session_arg *arg, u32 *info, int handle_load_ta(void *data, u32 size, struct tee_ioctl_open_session_arg *arg) { - struct tee_cmd_load_ta cmd = {0}; + struct tee_cmd_unload_ta unload_cmd = {}; + struct tee_cmd_load_ta load_cmd = {}; phys_addr_t blob; int ret; @@ -353,21 +412,36 @@ int handle_load_ta(void *data, u32 size, struct tee_ioctl_open_session_arg *arg) return -EINVAL; } - cmd.hi_addr = upper_32_bits(blob); - cmd.low_addr = lower_32_bits(blob); - cmd.size = size; + load_cmd.hi_addr = upper_32_bits(blob); + load_cmd.low_addr = lower_32_bits(blob); + load_cmd.size = size; - ret = psp_tee_process_cmd(TEE_CMD_ID_LOAD_TA, (void *)&cmd, - sizeof(cmd), &arg->ret); + mutex_lock(&ta_refcount_mutex); + + ret = psp_tee_process_cmd(TEE_CMD_ID_LOAD_TA, (void *)&load_cmd, + sizeof(load_cmd), &arg->ret); if (ret) { arg->ret_origin = TEEC_ORIGIN_COMMS; arg->ret = TEEC_ERROR_COMMUNICATION; - } else { - set_session_id(cmd.ta_handle, 0, &arg->session); + } else if (arg->ret == TEEC_SUCCESS) { + ret = get_ta_refcount(load_cmd.ta_handle); + if (!ret) { + arg->ret_origin = TEEC_ORIGIN_COMMS; + arg->ret = TEEC_ERROR_OUT_OF_MEMORY; + + /* Unload the TA on error */ + unload_cmd.ta_handle = load_cmd.ta_handle; + psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA, + (void *)&unload_cmd, + sizeof(unload_cmd), &ret); + } else { + set_session_id(load_cmd.ta_handle, 0, &arg->session); + } } + mutex_unlock(&ta_refcount_mutex); pr_debug("load TA: TA handle = 0x%x, RO = 0x%x, ret = 0x%x\n", - cmd.ta_handle, arg->ret_origin, arg->ret); + load_cmd.ta_handle, arg->ret_origin, arg->ret); return 0; } diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c index 8a6a8f30bb42..da6b88e80dc0 100644 --- a/drivers/tee/amdtee/core.c +++ b/drivers/tee/amdtee/core.c @@ -59,10 +59,9 @@ static void release_session(struct amdtee_session *sess) continue; handle_close_session(sess->ta_handle, sess->session_info[i]); + handle_unload_ta(sess->ta_handle); } - /* Unload Trusted Application once all sessions are closed */ - handle_unload_ta(sess->ta_handle); kfree(sess); } @@ -224,8 +223,6 @@ static void destroy_session(struct kref *ref) struct amdtee_session *sess = container_of(ref, struct amdtee_session, refcount); - /* Unload the TA from TEE */ - handle_unload_ta(sess->ta_handle); mutex_lock(&session_list_mutex); list_del(&sess->list_node); mutex_unlock(&session_list_mutex); @@ -238,7 +235,7 @@ int amdtee_open_session(struct tee_context *ctx, { struct amdtee_context_data *ctxdata = ctx->data; struct amdtee_session *sess = NULL; - u32 session_info; + u32 session_info, ta_handle; size_t ta_size; int rc, i; void *ta; @@ -259,11 +256,14 @@ int amdtee_open_session(struct tee_context *ctx, if (arg->ret != TEEC_SUCCESS) goto out; + ta_handle = get_ta_handle(arg->session); + mutex_lock(&session_list_mutex); sess = alloc_session(ctxdata, arg->session); mutex_unlock(&session_list_mutex); if (!sess) { + handle_unload_ta(ta_handle); rc = -ENOMEM; goto out; } @@ -277,6 +277,7 @@ int amdtee_open_session(struct tee_context *ctx, if (i >= TEE_NUM_SESSIONS) { pr_err("reached maximum session count %d\n", TEE_NUM_SESSIONS); + handle_unload_ta(ta_handle); kref_put(&sess->refcount, destroy_session); rc = -ENOMEM; goto out; @@ -289,12 +290,13 @@ int amdtee_open_session(struct tee_context *ctx, spin_lock(&sess->lock); clear_bit(i, sess->sess_mask); spin_unlock(&sess->lock); + handle_unload_ta(ta_handle); kref_put(&sess->refcount, destroy_session); goto out; } sess->session_info[i] = session_info; - set_session_id(sess->ta_handle, i, &arg->session); + set_session_id(ta_handle, i, &arg->session); out: free_pages((u64)ta, get_order(ta_size)); return rc; @@ -329,6 +331,7 @@ int amdtee_close_session(struct tee_context *ctx, u32 session) /* Close the session */ handle_close_session(ta_handle, session_info); + handle_unload_ta(ta_handle); kref_put(&sess->refcount, destroy_session); From 682ae59ca2876f83396ccc5674235da99beed06c Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Tue, 4 May 2021 18:04:24 +0800 Subject: [PATCH 016/730] ASoC: rt711-sdca: fix the function number of SDCA control for feature unit 0x1E The function number should be FUNC_NUM_MIC_ARRAY(0x2) for the feature unit 0x1E. Fixes: ca5118c0c00f6 ('ASoC: rt711-sdca: change capture switch controls') Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20210504100424.8760-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt711-sdca.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index cc36739f7fcf..24a084e0b48a 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -683,13 +683,13 @@ static int rt711_sdca_set_fu1e_capture_ctl(struct rt711_sdca_priv *rt711) ch_r = (rt711->fu1e_dapm_mute || rt711->fu1e_mixer_r_mute) ? 0x01 : 0x00; err = regmap_write(rt711->regmap, - SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT711_SDCA_ENT_USER_FU1E, + SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT711_SDCA_ENT_USER_FU1E, RT711_SDCA_CTL_FU_MUTE, CH_L), ch_l); if (err < 0) return err; err = regmap_write(rt711->regmap, - SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT711_SDCA_ENT_USER_FU1E, + SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT711_SDCA_ENT_USER_FU1E, RT711_SDCA_CTL_FU_MUTE, CH_R), ch_r); if (err < 0) return err; From 9683e5775c75097c46bd24e65411b16ac6c6cbb3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 May 2021 16:49:10 -0700 Subject: [PATCH 017/730] libbpf: Add NULL check to add_dummy_ksym_var Avoids a segv if btf isn't present. Seen on the call path __bpf_object__open calling bpf_object__collect_externs. Fixes: 5bd022ec01f0 (libbpf: Support extern kernel function) Suggested-by: Stanislav Fomichev Suggested-by: Petar Penkov Signed-off-by: Ian Rogers Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210504234910.976501-1-irogers@google.com --- tools/lib/bpf/libbpf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e2a3cf437814..c41d9b2b59ac 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3216,6 +3216,9 @@ static int add_dummy_ksym_var(struct btf *btf) const struct btf_var_secinfo *vs; const struct btf_type *sec; + if (!btf) + return 0; + sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC, BTF_KIND_DATASEC); if (sec_btf_id < 0) From 3b80d106e110d39d3f678954d3b55078669cf07e Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Thu, 6 May 2021 14:43:49 +0200 Subject: [PATCH 018/730] samples/bpf: Consider frame size in tx_only of xdpsock sample Fix the tx_only micro-benchmark in xdpsock to take frame size into consideration. It was hardcoded to the default value of frame_size which is 4K. Changing this on the command line to 2K made half of the packets illegal as they were outside the umem and were therefore discarded by the kernel. Fixes: 46738f73ea4f ("samples/bpf: add use of need_wakeup flag in xdpsock") Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20210506124349.6666-1-magnus.karlsson@gmail.com --- samples/bpf/xdpsock_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index aa696854be78..53e300f860bb 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1255,7 +1255,7 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) for (i = 0; i < batch_size; i++) { struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); - tx_desc->addr = (*frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; + tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size; tx_desc->len = PKT_SIZE; } From 31379397dcc364a59ce764fabb131b645c43e340 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 5 May 2021 15:25:29 +0200 Subject: [PATCH 019/730] bpf: Forbid trampoline attach for functions with variable arguments We can't currently allow to attach functions with variable arguments. The problem is that we should save all the registers for arguments, which is probably doable, but if caller uses more than 6 arguments, we need stack data, which will be wrong, because of the extra stack frame we do in bpf trampoline, so we could crash. Also currently there's malformed trampoline code generated for such functions at the moment as described in: https://lore.kernel.org/bpf/20210429212834.82621-1-jolsa@kernel.org/ Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210505132529.401047-1-jolsa@kernel.org --- kernel/bpf/btf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 0600ed325fa0..f982a9f0dbc4 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5206,6 +5206,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, m->ret_size = ret; for (i = 0; i < nargs; i++) { + if (i == nargs - 1 && args[i].type == 0) { + bpf_log(log, + "The function %s with variable args is unsupported.\n", + tname); + return -EINVAL; + } ret = __get_type_size(btf, args[i].type, &t); if (ret < 0) { bpf_log(log, @@ -5213,6 +5219,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]); return -EINVAL; } + if (ret == 0) { + bpf_log(log, + "The function %s has malformed void argument.\n", + tname); + return -EINVAL; + } m->arg_size[i] = ret; } m->nr_args = nargs; From 8822702f6e4c8917c83ba79e0ebf2c8c218910d4 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 7 May 2021 10:44:52 +0800 Subject: [PATCH 020/730] ALSA: hda/realtek: reset eapd coeff to default value for alc287 Ubuntu users reported an audio bug on the Lenovo Yoga Slim 7 14IIL05, he installed dual OS (Windows + Linux), if he booted to the Linux from Windows, the Speaker can't work well, it has crackling noise, if he poweroff the machine first after Windows, the Speaker worked well. Before rebooting or shutdown from Windows, the Windows changes the codec eapd coeff value, but the BIOS doesn't re-initialize its value, when booting into the Linux from Windows, the eapd coeff value is not correct. To fix it, set the codec default value to that coeff register in the alsa driver. BugLink: http://bugs.launchpad.net/bugs/1925057 Suggested-by: Kailang Yang Cc: Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20210507024452.8300-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6d58f24c9702..a5f3e78ec04e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -395,7 +395,6 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0282: case 0x10ec0283: case 0x10ec0286: - case 0x10ec0287: case 0x10ec0288: case 0x10ec0285: case 0x10ec0298: @@ -406,6 +405,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0275: alc_update_coef_idx(codec, 0xe, 0, 1<<0); break; + case 0x10ec0287: + alc_update_coef_idx(codec, 0x10, 1<<9, 0); + alc_write_coef_idx(codec, 0x8, 0x4ab7); + break; case 0x10ec0293: alc_update_coef_idx(codec, 0xa, 1<<13, 0); break; From 285c0faddcebdf360412fc9ef9cde63cf98da7f6 Mon Sep 17 00:00:00 2001 From: Bharat Jauhari Date: Thu, 25 Mar 2021 18:15:40 +0200 Subject: [PATCH 021/730] habanalabs: expose ASIC specific PLL index Currently the user cannot interpret the PLL information based on index as its exposed as an integer. This commit exposes ASIC specific PLL indexes and maps it to a generic FW compatible index. Signed-off-by: Bharat Jauhari Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 42 ++++++++------- drivers/misc/habanalabs/common/habanalabs.h | 16 +++--- drivers/misc/habanalabs/common/sysfs.c | 4 +- drivers/misc/habanalabs/gaudi/gaudi.c | 55 +++++++------------- drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c | 12 ++--- drivers/misc/habanalabs/goya/goya.c | 47 +++++++---------- drivers/misc/habanalabs/goya/goya_hwmgr.c | 40 +++++++------- include/uapi/misc/habanalabs.h | 33 ++++++++++++ 8 files changed, 131 insertions(+), 118 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 832dd5c5bb06..7cf82da67dab 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -661,18 +661,13 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) return rc; } -int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, +int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index, enum pll_index *pll_index) { struct asic_fixed_properties *prop = &hdev->asic_prop; u8 pll_byte, pll_bit_off; bool dynamic_pll; - - if (input_pll_index >= PLL_MAX) { - dev_err(hdev->dev, "PLL index %d is out of range\n", - input_pll_index); - return -EINVAL; - } + int fw_pll_idx; dynamic_pll = prop->fw_security_status_valid && (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN); @@ -680,28 +675,39 @@ int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, if (!dynamic_pll) { /* * in case we are working with legacy FW (each asic has unique - * PLL numbering) extract the legacy numbering + * PLL numbering) use the driver based index as they are + * aligned with fw legacy numbering */ - *pll_index = hdev->legacy_pll_map[input_pll_index]; + *pll_index = input_pll_index; return 0; } - /* PLL map is a u8 array */ - pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3]; - pll_bit_off = input_pll_index & 0x7; - - if (!(pll_byte & BIT(pll_bit_off))) { - dev_err(hdev->dev, "PLL index %d is not supported\n", - input_pll_index); + /* retrieve a FW compatible PLL index based on + * ASIC specific user request + */ + fw_pll_idx = hdev->asic_funcs->map_pll_idx_to_fw_idx(input_pll_index); + if (fw_pll_idx < 0) { + dev_err(hdev->dev, "Invalid PLL index (%u) error %d\n", + input_pll_index, fw_pll_idx); return -EINVAL; } - *pll_index = input_pll_index; + /* PLL map is a u8 array */ + pll_byte = prop->cpucp_info.pll_map[fw_pll_idx >> 3]; + pll_bit_off = fw_pll_idx & 0x7; + + if (!(pll_byte & BIT(pll_bit_off))) { + dev_err(hdev->dev, "PLL index %d is not supported\n", + fw_pll_idx); + return -EINVAL; + } + + *pll_index = fw_pll_idx; return 0; } -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index, u16 *pll_freq_arr) { struct cpucp_packet pkt; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 44e89da30b4a..91291a8e201e 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -930,6 +930,9 @@ enum div_select_defs { * driver is ready to receive asynchronous events. This * function should be called during the first init and * after every hard-reset of the device + * @get_msi_info: Retrieve asic-specific MSI ID of the f/w async event + * @map_pll_idx_to_fw_idx: convert driver specific per asic PLL index to + * generic f/w compatible PLL Indexes */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -1054,6 +1057,7 @@ struct hl_asic_funcs { u32 block_id, u32 block_size); void (*enable_events_from_fw)(struct hl_device *hdev); void (*get_msi_info)(u32 *table); + int (*map_pll_idx_to_fw_idx)(u32 pll_idx); }; @@ -1950,8 +1954,6 @@ struct hl_mmu_funcs { * @aggregated_cs_counters: aggregated cs counters among all contexts * @mmu_priv: device-specific MMU data. * @mmu_func: device-related MMU functions. - * @legacy_pll_map: map holding map between dynamic (common) PLL indexes and - * static (asic specific) PLL indexes. * @dram_used_mem: current DRAM memory consumption. * @timeout_jiffies: device CS timeout value. * @max_power: the max power of the device, as configured by the sysadmin. This @@ -2071,8 +2073,6 @@ struct hl_device { struct hl_mmu_priv mmu_priv; struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS]; - enum pll_index *legacy_pll_map; - atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; @@ -2387,9 +2387,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters); int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy); -int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, +int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index, enum pll_index *pll_index); -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index, u16 *pll_freq_arr); int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power); int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, @@ -2411,9 +2411,9 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, int hl_pci_init(struct hl_device *hdev); void hl_pci_fini(struct hl_device *hdev); -long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); -void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value); diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 9fa61573a89d..c9f649b31e3a 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -9,7 +9,7 @@ #include -long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) { struct cpucp_packet pkt; @@ -44,7 +44,7 @@ long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, return (long) result; } -void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) { struct cpucp_packet pkt; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index b751652f80a8..81155f06c126 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -105,36 +105,6 @@ #define GAUDI_PLL_MAX 10 -/* - * this enum kept here for compatibility with old FW (in which each asic has - * unique PLL numbering - */ -enum gaudi_pll_index { - GAUDI_CPU_PLL = 0, - GAUDI_PCI_PLL, - GAUDI_SRAM_PLL, - GAUDI_HBM_PLL, - GAUDI_NIC_PLL, - GAUDI_DMA_PLL, - GAUDI_MESH_PLL, - GAUDI_MME_PLL, - GAUDI_TPC_PLL, - GAUDI_IF_PLL, -}; - -static enum pll_index gaudi_pll_map[PLL_MAX] = { - [CPU_PLL] = GAUDI_CPU_PLL, - [PCI_PLL] = GAUDI_PCI_PLL, - [SRAM_PLL] = GAUDI_SRAM_PLL, - [HBM_PLL] = GAUDI_HBM_PLL, - [NIC_PLL] = GAUDI_NIC_PLL, - [DMA_PLL] = GAUDI_DMA_PLL, - [MESH_PLL] = GAUDI_MESH_PLL, - [MME_PLL] = GAUDI_MME_PLL, - [TPC_PLL] = GAUDI_TPC_PLL, - [IF_PLL] = GAUDI_IF_PLL, -}; - static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", @@ -810,7 +780,7 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) freq = 0; } } else { - rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr); + rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); if (rc) return rc; @@ -1652,9 +1622,6 @@ static int gaudi_sw_init(struct hl_device *hdev) hdev->asic_specific = gaudi; - /* store legacy PLL map */ - hdev->legacy_pll_map = gaudi_pll_map; - /* Create DMA pool for small allocations */ hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); @@ -8783,6 +8750,23 @@ static void gaudi_enable_events_from_fw(struct hl_device *hdev) WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER); } +static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) +{ + switch (pll_idx) { + case HL_GAUDI_CPU_PLL: return CPU_PLL; + case HL_GAUDI_PCI_PLL: return PCI_PLL; + case HL_GAUDI_NIC_PLL: return NIC_PLL; + case HL_GAUDI_DMA_PLL: return DMA_PLL; + case HL_GAUDI_MESH_PLL: return MESH_PLL; + case HL_GAUDI_MME_PLL: return MME_PLL; + case HL_GAUDI_TPC_PLL: return TPC_PLL; + case HL_GAUDI_IF_PLL: return IF_PLL; + case HL_GAUDI_SRAM_PLL: return SRAM_PLL; + case HL_GAUDI_HBM_PLL: return HBM_PLL; + default: return -EINVAL; + } +} + static const struct hl_asic_funcs gaudi_funcs = { .early_init = gaudi_early_init, .early_fini = gaudi_early_fini, @@ -8866,7 +8850,8 @@ static const struct hl_asic_funcs gaudi_funcs = { .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, .get_hw_block_id = gaudi_get_hw_block_id, .hw_block_mmap = gaudi_block_mmap, - .enable_events_from_fw = gaudi_enable_events_from_fw + .enable_events_from_fw = gaudi_enable_events_from_fw, + .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx }; /** diff --git a/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c b/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c index 8c49da4bcbd5..9b60eadd4c35 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c @@ -13,7 +13,7 @@ void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) struct gaudi_device *gaudi = hdev->asic_specific; if (freq == PLL_LAST) - hl_set_frequency(hdev, MME_PLL, gaudi->max_freq_value); + hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value); } int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) @@ -23,7 +23,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, MME_PLL, false); + value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false); if (value < 0) { dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", @@ -33,7 +33,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) *max_clk = (value / 1000 / 1000); - value = hl_get_frequency(hdev, MME_PLL, true); + value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true); if (value < 0) { dev_err(hdev->dev, @@ -57,7 +57,7 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, MME_PLL, false); + value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false); gaudi->max_freq_value = value; @@ -85,7 +85,7 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev, gaudi->max_freq_value = value * 1000 * 1000; - hl_set_frequency(hdev, MME_PLL, gaudi->max_freq_value); + hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value); fail: return count; @@ -100,7 +100,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, MME_PLL, true); + value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true); return sprintf(buf, "%lu\n", (value / 1000 / 1000)); } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index e27338f4aad2..e0ad2a269779 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -118,30 +118,6 @@ #define IS_MME_IDLE(mme_arch_sts) \ (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) -/* - * this enum kept here for compatibility with old FW (in which each asic has - * unique PLL numbering - */ -enum goya_pll_index { - GOYA_CPU_PLL = 0, - GOYA_IC_PLL, - GOYA_MC_PLL, - GOYA_MME_PLL, - GOYA_PCI_PLL, - GOYA_EMMC_PLL, - GOYA_TPC_PLL, -}; - -static enum pll_index goya_pll_map[PLL_MAX] = { - [CPU_PLL] = GOYA_CPU_PLL, - [IC_PLL] = GOYA_IC_PLL, - [MC_PLL] = GOYA_MC_PLL, - [MME_PLL] = GOYA_MME_PLL, - [PCI_PLL] = GOYA_PCI_PLL, - [EMMC_PLL] = GOYA_EMMC_PLL, - [TPC_PLL] = GOYA_TPC_PLL, -}; - static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = { "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3", "goya cq 4", "goya cpu eq" @@ -775,7 +751,8 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev) freq = 0; } } else { - rc = hl_fw_cpucp_pll_info_get(hdev, PCI_PLL, pll_freq_arr); + rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL, + pll_freq_arr); if (rc) return; @@ -897,9 +874,6 @@ static int goya_sw_init(struct hl_device *hdev) hdev->asic_specific = goya; - /* store legacy PLL map */ - hdev->legacy_pll_map = goya_pll_map; - /* Create DMA pool for small allocations */ hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0); @@ -5512,6 +5486,20 @@ static void goya_enable_events_from_fw(struct hl_device *hdev) GOYA_ASYNC_EVENT_ID_INTS_REGISTER); } +static int goya_map_pll_idx_to_fw_idx(u32 pll_idx) +{ + switch (pll_idx) { + case HL_GOYA_CPU_PLL: return CPU_PLL; + case HL_GOYA_PCI_PLL: return PCI_PLL; + case HL_GOYA_MME_PLL: return MME_PLL; + case HL_GOYA_TPC_PLL: return TPC_PLL; + case HL_GOYA_IC_PLL: return IC_PLL; + case HL_GOYA_MC_PLL: return MC_PLL; + case HL_GOYA_EMMC_PLL: return EMMC_PLL; + default: return -EINVAL; + } +} + static const struct hl_asic_funcs goya_funcs = { .early_init = goya_early_init, .early_fini = goya_early_fini, @@ -5595,7 +5583,8 @@ static const struct hl_asic_funcs goya_funcs = { .ack_protection_bits_errors = goya_ack_protection_bits_errors, .get_hw_block_id = goya_get_hw_block_id, .hw_block_mmap = goya_block_mmap, - .enable_events_from_fw = goya_enable_events_from_fw + .enable_events_from_fw = goya_enable_events_from_fw, + .map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx }; /* diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c index 3acb36a1a902..7d007125727f 100644 --- a/drivers/misc/habanalabs/goya/goya_hwmgr.c +++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c @@ -13,19 +13,19 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) switch (freq) { case PLL_HIGH: - hl_set_frequency(hdev, MME_PLL, hdev->high_pll); - hl_set_frequency(hdev, TPC_PLL, hdev->high_pll); - hl_set_frequency(hdev, IC_PLL, hdev->high_pll); + hl_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll); + hl_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll); + hl_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll); break; case PLL_LOW: - hl_set_frequency(hdev, MME_PLL, GOYA_PLL_FREQ_LOW); - hl_set_frequency(hdev, TPC_PLL, GOYA_PLL_FREQ_LOW); - hl_set_frequency(hdev, IC_PLL, GOYA_PLL_FREQ_LOW); + hl_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW); + hl_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW); + hl_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW); break; case PLL_LAST: - hl_set_frequency(hdev, MME_PLL, goya->mme_clk); - hl_set_frequency(hdev, TPC_PLL, goya->tpc_clk); - hl_set_frequency(hdev, IC_PLL, goya->ic_clk); + hl_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk); + hl_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk); + hl_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk); break; default: dev_err(hdev->dev, "unknown frequency setting\n"); @@ -39,7 +39,7 @@ int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, MME_PLL, false); + value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false); if (value < 0) { dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", @@ -49,7 +49,7 @@ int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) *max_clk = (value / 1000 / 1000); - value = hl_get_frequency(hdev, MME_PLL, true); + value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true); if (value < 0) { dev_err(hdev->dev, @@ -72,7 +72,7 @@ static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, MME_PLL, false); + value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false); if (value < 0) return value; @@ -105,7 +105,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - hl_set_frequency(hdev, MME_PLL, value); + hl_set_frequency(hdev, HL_GOYA_MME_PLL, value); goya->mme_clk = value; fail: @@ -121,7 +121,7 @@ static ssize_t tpc_clk_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, TPC_PLL, false); + value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, false); if (value < 0) return value; @@ -154,7 +154,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - hl_set_frequency(hdev, TPC_PLL, value); + hl_set_frequency(hdev, HL_GOYA_TPC_PLL, value); goya->tpc_clk = value; fail: @@ -170,7 +170,7 @@ static ssize_t ic_clk_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, IC_PLL, false); + value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, false); if (value < 0) return value; @@ -203,7 +203,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - hl_set_frequency(hdev, IC_PLL, value); + hl_set_frequency(hdev, HL_GOYA_IC_PLL, value); goya->ic_clk = value; fail: @@ -219,7 +219,7 @@ static ssize_t mme_clk_curr_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, MME_PLL, true); + value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true); if (value < 0) return value; @@ -236,7 +236,7 @@ static ssize_t tpc_clk_curr_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, TPC_PLL, true); + value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, true); if (value < 0) return value; @@ -253,7 +253,7 @@ static ssize_t ic_clk_curr_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, IC_PLL, true); + value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, true); if (value < 0) return value; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index d3e017b5f0db..6d2d34c9f375 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -239,6 +239,39 @@ enum gaudi_engine_id { GAUDI_ENGINE_ID_SIZE }; +/* + * ASIC specific PLL index + * + * Used to retrieve in frequency info of different IPs via + * HL_INFO_PLL_FREQUENCY under HL_IOCTL_INFO IOCTL. The enums need to be + * used as an index in struct hl_pll_frequency_info + */ + +enum hl_goya_pll_index { + HL_GOYA_CPU_PLL = 0, + HL_GOYA_IC_PLL, + HL_GOYA_MC_PLL, + HL_GOYA_MME_PLL, + HL_GOYA_PCI_PLL, + HL_GOYA_EMMC_PLL, + HL_GOYA_TPC_PLL, + HL_GOYA_PLL_MAX +}; + +enum hl_gaudi_pll_index { + HL_GAUDI_CPU_PLL = 0, + HL_GAUDI_PCI_PLL, + HL_GAUDI_SRAM_PLL, + HL_GAUDI_HBM_PLL, + HL_GAUDI_NIC_PLL, + HL_GAUDI_DMA_PLL, + HL_GAUDI_MESH_PLL, + HL_GAUDI_MME_PLL, + HL_GAUDI_TPC_PLL, + HL_GAUDI_IF_PLL, + HL_GAUDI_PLL_MAX +}; + enum hl_device_status { HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_IN_RESET, From 001d5f66c156f2c30b6bf85346de09de8db49b59 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 11 Apr 2021 21:06:05 +0300 Subject: [PATCH 022/730] habanalabs: skip reading f/w errors on bad status If we read all FF from the boot status register, then something is totally wrong and there is no point of reading specific errors. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 7cf82da67dab..fff29f057b6d 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -850,8 +850,13 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, if (rc) { dev_err(hdev->dev, "Failed to read preboot version\n"); detect_cpu_boot_status(hdev, status); - fw_read_errors(hdev, boot_err0_reg, - cpu_security_boot_status_reg); + + /* If we read all FF, then something is totally wrong, no point + * of reading specific errors + */ + if (status != -1) + fw_read_errors(hdev, boot_err0_reg, + cpu_security_boot_status_reg); return -EIO; } From b5fd82a7af198db04408e218f64dc3d4178d585a Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 12 Apr 2021 09:38:22 +0300 Subject: [PATCH 023/730] habanalabs: change error level of security not ready This error indicates a problem in the security initialization inside the f/w so we need to stop the device loading because it won't be usable. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index fff29f057b6d..377a7ca886fe 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -362,12 +362,9 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, } if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) { - dev_warn(hdev->dev, + dev_err(hdev->dev, "Device boot warning - security not ready\n"); - /* This is a warning so we don't want it to disable the - * device - */ - err_val &= ~CPU_BOOT_ERR0_SECURITY_NOT_RDY; + err_exists = true; } if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) { From 27a9e35daad080f3770401a1a11eda2f9f7732dd Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 12 Apr 2021 09:52:05 +0300 Subject: [PATCH 024/730] habanalabs: ignore f/w status error In case firmware has a bug and erroneously reports a status error (e.g. device unusable) during boot, allow the user to tell the driver to continue the boot regardless of the error status. This will be done via kernel parameter which exposes a mask. The user that loads the driver can decide exactly which status error to ignore and which to take into account. The bitmask is according to defines in hl_boot_if.h Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 3 ++- drivers/misc/habanalabs/common/habanalabs.h | 7 +++++++ drivers/misc/habanalabs/common/habanalabs_drv.c | 7 +++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 377a7ca886fe..0713b2c12d54 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -400,7 +400,8 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, err_exists = true; } - if (err_exists) + if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) & + lower_32_bits(hdev->boot_error_status_mask))) return -EIO; return 0; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 91291a8e201e..6579f8767abd 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1962,6 +1962,12 @@ struct hl_mmu_funcs { * @clock_gating_mask: is clock gating enabled. bitmask that represents the * different engines. See debugfs-driver-habanalabs for * details. + * @boot_error_status_mask: contains a mask of the device boot error status. + * Each bit represents a different error, according to + * the defines in hl_boot_if.h. If the bit is cleared, + * the error will be ignored by the driver during + * device initialization. Mainly used to debug and + * workaround firmware bugs * @in_reset: is device in reset flow. * @curr_pll_profile: current PLL profile. * @card_type: Various ASICs have several card types. This indicates the card @@ -2077,6 +2083,7 @@ struct hl_device { u64 timeout_jiffies; u64 max_power; u64 clock_gating_mask; + u64 boot_error_status_mask; atomic_t in_reset; enum hl_pll_frequency curr_pll_profile; enum cpucp_card_types card_type; diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 7135f1e03864..64d1530db985 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -30,6 +30,7 @@ static DEFINE_MUTEX(hl_devs_idr_lock); static int timeout_locked = 30; static int reset_on_lockup = 1; static int memory_scrub = 1; +static ulong boot_error_status_mask = ULONG_MAX; module_param(timeout_locked, int, 0444); MODULE_PARM_DESC(timeout_locked, @@ -43,6 +44,10 @@ module_param(memory_scrub, int, 0444); MODULE_PARM_DESC(memory_scrub, "Scrub device memory in various states (0 = no, 1 = yes, default yes)"); +module_param(boot_error_status_mask, ulong, 0444); +MODULE_PARM_DESC(boot_error_status_mask, + "Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)"); + #define PCI_VENDOR_ID_HABANALABS 0x1da3 #define PCI_IDS_GOYA 0x0001 @@ -319,6 +324,8 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, hdev->major = hl_major; hdev->reset_on_lockup = reset_on_lockup; hdev->memory_scrub = memory_scrub; + hdev->boot_error_status_mask = boot_error_status_mask; + hdev->pldm = 0; set_driver_behavior_per_device(hdev); From 24a107097fbd8fb6a48a0dcb31e64c1de6831a1d Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 27 Apr 2021 17:49:25 +0300 Subject: [PATCH 025/730] habanalabs: wait for interrupt wrong timeout calculation Wait for interrupt timeout calculation is wrong, hence timeout occurs when user waits on an interrupt with certain timeout values. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index ff8791a651fd..af3c497defb1 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -2017,7 +2017,7 @@ wait_again: if (completion_value >= target_value) { *status = CS_WAIT_STATUS_COMPLETED; } else { - timeout -= jiffies_to_usecs(completion_rc); + timeout = completion_rc; goto wait_again; } } else { From 115726c5d312b462c9d9931ea42becdfa838a076 Mon Sep 17 00:00:00 2001 From: Lv Yunlong Date: Mon, 26 Apr 2021 06:43:46 -0700 Subject: [PATCH 026/730] habanalabs/gaudi: Fix a potential use after free in gaudi_memset_device_memory Our code analyzer reported a uaf. In gaudi_memset_device_memory, cb is get via hl_cb_kernel_create() with 2 refcount. If hl_cs_allocate_job() failed, the execution runs into release_cb branch. One ref of cb is dropped by hl_cb_put(cb) and could be freed if other thread also drops one ref. Then cb is used by cb->id later, which is a potential uaf. My patch add a variable 'id' to accept the value of cb->id before the hl_cb_put(cb) is called, to avoid the potential uaf. Fixes: 423815bf02e25 ("habanalabs/gaudi: remove PCI access to SM block") Signed-off-by: Lv Yunlong Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 81155f06c126..9e4a6bb3acd1 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -5579,6 +5579,7 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, struct hl_cs_job *job; u32 cb_size, ctl, err_cause; struct hl_cb *cb; + u64 id; int rc; cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); @@ -5645,8 +5646,9 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, } release_cb: + id = cb->id; hl_cb_put(cb); - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT); return rc; } From c1b55029493879f5bd585ff79f326e71f0bc05e3 Mon Sep 17 00:00:00 2001 From: Daniel Cordova A Date: Fri, 7 May 2021 12:31:16 -0500 Subject: [PATCH 027/730] ALSA: hda: fixup headset for ASUS GU502 laptop The GU502 requires a few steps to make headset i/o works properly: pincfg, verbs to unmute headphone out and callback to toggle output between speakers and headphone using jack. Signed-off-by: Daniel Cordova A Cc: Link: https://lore.kernel.org/r/20210507173116.12043-1-danesc87@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 62 +++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a5f3e78ec04e..b4b71609dff1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6254,6 +6254,35 @@ static void alc294_fixup_gx502_hp(struct hda_codec *codec, } } +static void alc294_gu502_toggle_output(struct hda_codec *codec, + struct hda_jack_callback *cb) +{ + /* Windows sets 0x10 to 0x8420 for Node 0x20 which is + * responsible from changes between speakers and headphones + */ + if (snd_hda_jack_detect_state(codec, 0x21) == HDA_JACK_PRESENT) + alc_write_coef_idx(codec, 0x10, 0x8420); + else + alc_write_coef_idx(codec, 0x10, 0x0a20); +} + +static void alc294_fixup_gu502_hp(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (!is_jack_detectable(codec, 0x21)) + return; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + snd_hda_jack_detect_enable_callback(codec, 0x21, + alc294_gu502_toggle_output); + break; + case HDA_FIXUP_ACT_INIT: + alc294_gu502_toggle_output(codec, NULL); + break; + } +} + static void alc285_fixup_hp_gpio_amp_init(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -6471,6 +6500,9 @@ enum { ALC294_FIXUP_ASUS_GX502_HP, ALC294_FIXUP_ASUS_GX502_PINS, ALC294_FIXUP_ASUS_GX502_VERBS, + ALC294_FIXUP_ASUS_GU502_HP, + ALC294_FIXUP_ASUS_GU502_PINS, + ALC294_FIXUP_ASUS_GU502_VERBS, ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_GPIO_LED, @@ -7712,6 +7744,35 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc294_fixup_gx502_hp, }, + [ALC294_FIXUP_ASUS_GU502_PINS] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a11050 }, /* rear HP mic */ + { 0x1a, 0x01a11830 }, /* rear external mic */ + { 0x21, 0x012110f0 }, /* rear HP out */ + { } + }, + .chained = true, + .chain_id = ALC294_FIXUP_ASUS_GU502_VERBS + }, + [ALC294_FIXUP_ASUS_GU502_VERBS] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* set 0x15 to HP-OUT ctrl */ + { 0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0 }, + /* unmute the 0x15 amp */ + { 0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000 }, + /* set 0x1b to HP-OUT */ + { 0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 }, + { } + }, + .chained = true, + .chain_id = ALC294_FIXUP_ASUS_GU502_HP + }, + [ALC294_FIXUP_ASUS_GU502_HP] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc294_fixup_gu502_hp, + }, [ALC294_FIXUP_ASUS_COEF_1B] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -8256,6 +8317,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), + SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), From 4eff124347191d1548eb4e14e20e77513dcbd0fe Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 9 May 2021 12:11:02 +0300 Subject: [PATCH 028/730] openrisc: mm/init.c: remove unused memblock_region variable in map_ram() Kernel test robot reports: cppcheck possible warnings: (new ones prefixed by >>, may not real problems) >> arch/openrisc/mm/init.c:125:10: warning: Uninitialized variable: region [uninitvar] region->base, region->base + region->size); ^ Replace usage of memblock_region fields with 'start' and 'end' variables that are initialized in for_each_mem_range() and remove the declaration of region. Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") Reported-by: kernel test robot Signed-off-by: Mike Rapoport Signed-off-by: Stafford Horne --- arch/openrisc/mm/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index bf9b2310fc93..f3fa02b8838a 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -75,7 +75,6 @@ static void __init map_ram(void) /* These mark extents of read-only kernel pages... * ...from vmlinux.lds.S */ - struct memblock_region *region; v = PAGE_OFFSET; @@ -121,7 +120,7 @@ static void __init map_ram(void) } printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__, - region->base, region->base + region->size); + start, end); } } From 371dcaee1ade4b1eefd541ae6ee048b5ce15b37c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 9 May 2021 12:11:03 +0300 Subject: [PATCH 029/730] openrisc: mm/init.c: remove unused variable 'end' in paging_init() A build with W=1 enabled produces the following warning: CC arch/openrisc/mm/init.o arch/openrisc/mm/init.c: In function 'paging_init': arch/openrisc/mm/init.c:131:16: warning: variable 'end' set but not used [-Wunused-but-set-variable] 131 | unsigned long end; | ^~~ Remove the unused variable 'end'. Signed-off-by: Mike Rapoport Signed-off-by: Stafford Horne --- arch/openrisc/mm/init.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index f3fa02b8838a..6e38ec96cab8 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -128,7 +128,6 @@ void __init paging_init(void) { extern void tlb_init(void); - unsigned long end; int i; printk(KERN_INFO "Setting up paging and PTEs.\n"); @@ -144,8 +143,6 @@ void __init paging_init(void) */ current_pgd[smp_processor_id()] = init_mm.pgd; - end = (unsigned long)__va(max_low_pfn * PAGE_SIZE); - map_ram(); zone_sizes_init(); From c5a80540e425a5f9a82b0f3163e3b6a4331f33bc Mon Sep 17 00:00:00 2001 From: Dominik Andreas Schorpp Date: Thu, 22 Apr 2021 09:58:52 +0200 Subject: [PATCH 030/730] USB: serial: ftdi_sio: add IDs for IDS GmbH Products Add the IDS GmbH Vendor ID and the Product IDs for SI31A (2xRS232) and CM31A (LoRaWAN Modem). Signed-off-by: Dominik Andreas Schorpp Signed-off-by: Juergen Borleis Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 3 +++ drivers/usb/serial/ftdi_sio_ids.h | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 6f2659e59b2e..369ef140df78 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1034,6 +1034,9 @@ static const struct usb_device_id id_table_combined[] = { /* Sienna devices */ { USB_DEVICE(FTDI_VID, FTDI_SIENNA_PID) }, { USB_DEVICE(ECHELON_VID, ECHELON_U20_PID) }, + /* IDS GmbH devices */ + { USB_DEVICE(IDS_VID, IDS_SI31A_PID) }, + { USB_DEVICE(IDS_VID, IDS_CM31A_PID) }, /* U-Blox devices */ { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ZED_PID) }, { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ODIN_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 3d47c6d72256..d854e04a4286 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1567,6 +1567,13 @@ #define UNJO_VID 0x22B7 #define UNJO_ISODEBUG_V1_PID 0x150D +/* + * IDS GmbH + */ +#define IDS_VID 0x2CAF +#define IDS_SI31A_PID 0x13A2 +#define IDS_CM31A_PID 0x13A3 + /* * U-Blox products (http://www.u-blox.com). */ From e467714f822b5d167a7fb03d34af91b5b6af1827 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 28 Apr 2021 09:26:34 +0200 Subject: [PATCH 031/730] USB: serial: option: add Telit LE910-S1 compositions 0x7010, 0x7011 Add support for the following Telit LE910-S1 compositions: 0x7010: rndis, tty, tty, tty 0x7011: ecm, tty, tty, tty Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20210428072634.5091-1-dnlplm@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 3e79a543d3e7..7608584ef4fe 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1240,6 +1240,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff), /* Telit LN940 (MBIM) */ .driver_info = NCTRL(0) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7010, 0xff), /* Telit LE910-S1 (RNDIS) */ + .driver_info = NCTRL(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7011, 0xff), /* Telit LE910-S1 (ECM) */ + .driver_info = NCTRL(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x9010), /* Telit SBL FN980 flashing device */ .driver_info = NCTRL(0) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ From 89b1a3d811e6f8065d6ae8a25e7682329b4a31e2 Mon Sep 17 00:00:00 2001 From: Sean MacLennan Date: Sat, 1 May 2021 20:40:45 -0400 Subject: [PATCH 032/730] USB: serial: ti_usb_3410_5052: add startech.com device id This adds support for the Startech.com generic serial to USB converter. It seems to be a bone stock TI_3410. I have been using this patch for years. Signed-off-by: Sean MacLennan Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ti_usb_3410_5052.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index caa46ac23db9..310db5abea9d 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -37,6 +37,7 @@ /* Vendor and product ids */ #define TI_VENDOR_ID 0x0451 #define IBM_VENDOR_ID 0x04b3 +#define STARTECH_VENDOR_ID 0x14b0 #define TI_3410_PRODUCT_ID 0x3410 #define IBM_4543_PRODUCT_ID 0x4543 #define IBM_454B_PRODUCT_ID 0x454b @@ -370,6 +371,7 @@ static const struct usb_device_id ti_id_table_3410[] = { { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1131_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1150_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1151_PRODUCT_ID) }, + { USB_DEVICE(STARTECH_VENDOR_ID, TI_3410_PRODUCT_ID) }, { } /* terminator */ }; @@ -408,6 +410,7 @@ static const struct usb_device_id ti_id_table_combined[] = { { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1131_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1150_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1151_PRODUCT_ID) }, + { USB_DEVICE(STARTECH_VENDOR_ID, TI_3410_PRODUCT_ID) }, { } /* terminator */ }; From f8e8c1b2f782e7391e8a1c25648ce756e2a7d481 Mon Sep 17 00:00:00 2001 From: Zolton Jheng Date: Mon, 10 May 2021 10:32:00 +0800 Subject: [PATCH 033/730] USB: serial: pl2303: add device id for ADLINK ND-6530 GC This adds the device id for the ADLINK ND-6530 which is a PL2303GC based device. Signed-off-by: Zolton Jheng Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index fd773d252691..940050c31482 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -113,6 +113,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(SONY_VENDOR_ID, SONY_QN3USB_PRODUCT_ID) }, { USB_DEVICE(SANWA_VENDOR_ID, SANWA_PRODUCT_ID) }, { USB_DEVICE(ADLINK_VENDOR_ID, ADLINK_ND6530_PRODUCT_ID) }, + { USB_DEVICE(ADLINK_VENDOR_ID, ADLINK_ND6530GC_PRODUCT_ID) }, { USB_DEVICE(SMART_VENDOR_ID, SMART_PRODUCT_ID) }, { USB_DEVICE(AT_VENDOR_ID, AT_VTKIT3_PRODUCT_ID) }, { } /* Terminating entry */ diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 0f681ddbfd28..6097ee8fccb2 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -158,6 +158,7 @@ /* ADLINK ND-6530 RS232,RS485 and RS422 adapter */ #define ADLINK_VENDOR_ID 0x0b63 #define ADLINK_ND6530_PRODUCT_ID 0x6530 +#define ADLINK_ND6530GC_PRODUCT_ID 0x653a /* SMART USB Serial Adapter */ #define SMART_VENDOR_ID 0x0b8c From d4335d058f8430a0ce2b43dab9531f3a3cf9fe2c Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 10 May 2021 11:38:44 +0100 Subject: [PATCH 034/730] ASoC: codecs: lpass-rx-macro: add missing MODULE_DEVICE_TABLE Fix module loading by adding missing MODULE_DEVICE_TABLE. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210510103844.1532-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-rx-macro.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c index 4f1b569d7c47..e074c7908c23 100644 --- a/sound/soc/codecs/lpass-rx-macro.c +++ b/sound/soc/codecs/lpass-rx-macro.c @@ -3579,6 +3579,7 @@ static const struct of_device_id rx_macro_dt_match[] = { { .compatible = "qcom,sm8250-lpass-rx-macro" }, { } }; +MODULE_DEVICE_TABLE(of, rx_macro_dt_match); static struct platform_driver rx_macro_driver = { .driver = { From 14c0c423746fe7232a093a68809a4bc6233eed60 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Sat, 8 May 2021 11:15:12 +0800 Subject: [PATCH 035/730] ASoC: codecs: lpass-tx-macro: add missing MODULE_DEVICE_TABLE This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module. Reported-by: Hulk Robot Signed-off-by: Bixuan Cui Reviewed-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210508031512.53783-1-cuibixuan@huawei.com Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-tx-macro.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c index 4eede9ad57bf..3d3a6e31551b 100644 --- a/sound/soc/codecs/lpass-tx-macro.c +++ b/sound/soc/codecs/lpass-tx-macro.c @@ -1846,6 +1846,7 @@ static const struct of_device_id tx_macro_dt_match[] = { { .compatible = "qcom,sm8250-lpass-tx-macro" }, { } }; +MODULE_DEVICE_TABLE(of, tx_macro_dt_match); static struct platform_driver tx_macro_driver = { .driver = { .name = "tx_macro", From b23584d6ce0212b9ad6cb7be19a7123461ed9e09 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Sat, 8 May 2021 18:46:47 +0800 Subject: [PATCH 036/730] ASoC: ak5558: Correct the dai name for ak5552 Correct the dai name for ak5552. The name should be "ak5552-aif". Fixes: d8c5c82e4e5b ("ASoC: ak5558: Add support for ak5552") Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1620470807-12056-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/codecs/ak5558.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/ak5558.c b/sound/soc/codecs/ak5558.c index 34aed80db0eb..37d4600b6f2c 100644 --- a/sound/soc/codecs/ak5558.c +++ b/sound/soc/codecs/ak5558.c @@ -307,7 +307,7 @@ static struct snd_soc_dai_driver ak5558_dai = { }; static struct snd_soc_dai_driver ak5552_dai = { - .name = "ak5558-aif", + .name = "ak5552-aif", .capture = { .stream_name = "Capture", .channels_min = 1, From 680ec0549a055eb464dce6ffb4bfb736ef87236e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 9 May 2021 21:12:27 +0200 Subject: [PATCH 037/730] spi: spi-fsl-dspi: Fix a resource leak in an error handling path 'dspi_request_dma()' should be undone by a 'dspi_release_dma()' call in the error handling path of the probe function, as already done in the remove function Fixes: 90ba37033cb9 ("spi: spi-fsl-dspi: Add DMA support for Vybrid") Signed-off-by: Christophe JAILLET Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/d51caaac747277a1099ba8dea07acd85435b857e.1620587472.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 028736687488..fb45e6af6638 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -1375,11 +1375,13 @@ poll_mode: ret = spi_register_controller(ctlr); if (ret != 0) { dev_err(&pdev->dev, "Problem registering DSPI ctlr\n"); - goto out_free_irq; + goto out_release_dma; } return ret; +out_release_dma: + dspi_release_dma(dspi); out_free_irq: if (dspi->irq) free_irq(dspi->irq, dspi); From dc5fa590273890a8541ce6e999d606bfb2d73797 Mon Sep 17 00:00:00 2001 From: Leilk Liu Date: Sat, 8 May 2021 14:02:14 +0800 Subject: [PATCH 038/730] spi: take the SPI IO-mutex in the spi_set_cs_timing method this patch takes the io_mutex to prevent an unprotected HW register modification in the set_cs_timing callback. Fixes: 4cea6b8cc34e ("spi: add power control when set_cs_timing") Signed-off-by: Leilk Liu Link: https://lore.kernel.org/r/20210508060214.1485-1-leilk.liu@mediatek.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index f9885c096563..a565e7d6bf3b 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3457,9 +3457,12 @@ int spi_set_cs_timing(struct spi_device *spi, struct spi_delay *setup, if (spi->controller->set_cs_timing && !(spi->cs_gpiod || gpio_is_valid(spi->cs_gpio))) { + mutex_lock(&spi->controller->io_mutex); + if (spi->controller->auto_runtime_pm) { status = pm_runtime_get_sync(parent); if (status < 0) { + mutex_unlock(&spi->controller->io_mutex); pm_runtime_put_noidle(parent); dev_err(&spi->controller->dev, "Failed to power device: %d\n", status); @@ -3470,11 +3473,13 @@ int spi_set_cs_timing(struct spi_device *spi, struct spi_delay *setup, hold, inactive); pm_runtime_mark_last_busy(parent); pm_runtime_put_autosuspend(parent); - return status; } else { - return spi->controller->set_cs_timing(spi, setup, hold, + status = spi->controller->set_cs_timing(spi, setup, hold, inactive); } + + mutex_unlock(&spi->controller->io_mutex); + return status; } if ((setup && setup->unit == SPI_DELAY_UNIT_SCK) || From cabb1bb60e88ccaaa122ba01862403cd44e8e8f8 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 26 Apr 2021 19:55:58 +0200 Subject: [PATCH 039/730] mmc: meson-gx: make replace WARN_ONCE with dev_warn_once about scatterlist offset alignment Some drivers like ath10k can sometimg give an sg buffer with an offset whose alignment is not compatible with the Amlogic DMA descriptor engine requirements. Simply replace with dev_warn_once() to inform user this should be fixed to avoid degraded performance. This should be ultimately fixed in ath10k, but since it's only a performance issue the warning should be removed. Fixes: 79ed05e329c3 ("mmc: meson-gx: add support for descriptor chain mode") Cc: stable@vger.kernel.org Reported-by: Christian Hewitt Signed-off-by: Neil Armstrong Acked-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20210426175559.3110575-1-narmstrong@baylibre.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index b8b771b643cc..1c61f0f24c09 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -258,7 +258,9 @@ static void meson_mmc_get_transfer_mode(struct mmc_host *mmc, for_each_sg(data->sg, sg, data->sg_len, i) { /* check for 8 byte alignment */ if (sg->offset % 8) { - WARN_ONCE(1, "unaligned scatterlist buffer\n"); + dev_warn_once(mmc_dev(mmc), + "unaligned sg offset %u, disabling descriptor DMA for transfer\n", + sg->offset); return; } } From 9b81354d7ebc1fd17f666a168dcabf27dae290bd Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 26 Apr 2021 19:55:59 +0200 Subject: [PATCH 040/730] mmc: meson-gx: also check SD_IO_RW_EXTENDED for scatterlist size alignment The brcmfmac driver can generate a scatterlist from a skb with each packets not aligned to the block size. This is not supported by the Amlogic Descriptor dma engine where each descriptor must match a multiple of the block size. The sg list is valid, since the sum of the sg buffers is a multiple of the block size, but we must discard those when in SD_IO_RW_EXTENDED mode since SDIO block mode can be used under the hood even with data->blocks == 1. Those transfers are very rare, thus can be replaced by a bounce buffer without real performance loss. Fixes: 7412dee9f1fd ("mmc: meson-gx: replace WARN_ONCE with dev_warn_once about scatterlist size alignment in block mode") Cc: stable@vger.kernel.org Reported-by: Christian Hewitt Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20210426175559.3110575-2-narmstrong@baylibre.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 1c61f0f24c09..016a6106151a 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -236,7 +236,8 @@ static void meson_mmc_get_transfer_mode(struct mmc_host *mmc, if (host->dram_access_quirk) return; - if (data->blocks > 1) { + /* SD_IO_RW_EXTENDED (CMD53) can also use block mode under the hood */ + if (data->blocks > 1 || mrq->cmd->opcode == SD_IO_RW_EXTENDED) { /* * In block mode DMA descriptor format, "length" field indicates * number of blocks and there is no way to pass DMA size that From a1149a6c06ee094a6e62886b0c0e8e66967a728a Mon Sep 17 00:00:00 2001 From: Daniel Beer Date: Sat, 24 Apr 2021 20:16:52 +1200 Subject: [PATCH 041/730] mmc: sdhci-pci-gli: increase 1.8V regulator wait Inserting an SD-card on an Intel NUC10i3FNK4 (which contains a GL9755) results in the message: mmc0: 1.8V regulator output did not become stable Following this message, some cards work (sometimes), but most cards fail with EILSEQ. This behaviour is observed on Debian 10 running kernel 4.19.188, but also with 5.8.18 and 5.11.15. The driver currently waits 5ms after switching on the 1.8V regulator for it to become stable. Increasing this to 10ms gets rid of the warning about stability, but most cards still fail. Increasing it to 20ms gets some cards working (a 32GB Samsung micro SD works, a 128GB ADATA doesn't). At 50ms, the ADATA works most of the time, and at 100ms both cards work reliably. Signed-off-by: Daniel Beer Acked-by: Ben Chuang Fixes: e51df6ce668a ("mmc: host: sdhci-pci: Add Genesys Logic GL975x support") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210424081652.GA16047@nyquist.nev Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 592d79082f58..061618aa247f 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -627,8 +627,13 @@ static void sdhci_gli_voltage_switch(struct sdhci_host *host) * * Wait 5ms after set 1.8V signal enable in Host Control 2 register * to ensure 1.8V signal enable bit is set by GL9750/GL9755. + * + * ...however, the controller in the NUC10i3FNK4 (a 9755) requires + * slightly longer than 5ms before the control register reports that + * 1.8V is ready, and far longer still before the card will actually + * work reliably. */ - usleep_range(5000, 5500); + usleep_range(100000, 110000); } static void sdhci_gl9750_reset(struct sdhci_host *host, u8 mask) From be1c2bb3ba5a39c20b1d54e01ffbcb2b1ca7e46c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 27 Apr 2021 09:00:28 +0100 Subject: [PATCH 042/730] ARM: PXA: Fix cplds irqdesc allocation when using legacy mode The Mainstone PXA platform uses CONFIG_SPARSE_IRQ, and thus we cannot rely on the irq descriptors to be readilly allocated before creating the irqdomain in legacy mode. The kernel then complains loudly about not being able to associate the interrupt in the domain -- can't blame it. Fix it by allocating the irqdescs upfront in the legacy case. Fixes: b68761da0111 ("ARM: PXA: Kill use of irq_create_strict_mappings()") Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210426223942.GA213931@roeck-us.net --- arch/arm/mach-pxa/pxa_cplds_irqs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c index ec0d9b094744..bddfc7cd5d40 100644 --- a/arch/arm/mach-pxa/pxa_cplds_irqs.c +++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c @@ -121,8 +121,13 @@ static int cplds_probe(struct platform_device *pdev) return fpga->irq; base_irq = platform_get_irq(pdev, 1); - if (base_irq < 0) + if (base_irq < 0) { base_irq = 0; + } else { + ret = devm_irq_alloc_descs(&pdev->dev, base_irq, base_irq, CPLDS_NB_IRQ, 0); + if (ret < 0) + return ret; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); fpga->base = devm_ioremap_resource(&pdev->dev, res); From 5b44955dc19808fa209444ccb192343050e95ab0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 13 Apr 2021 14:21:58 +0200 Subject: [PATCH 043/730] irqchip/apple-aic: APPLE_AIC should depend on ARCH_APPLE The Apple Interrupt Controller is only present on Apple Silicon SoCs. Hence add a dependency on ARCH_APPLE, to prevent asking the user about this driver when configuring a kernel without Apple Silicon SoC support. Drop the default, as ARCH_APPLE already selects APPLE_AIC. Fixes: 76cde26394114f6a ("irqchip/apple-aic: Add support for the Apple Interrupt Controller") Signed-off-by: Geert Uytterhoeven Acked-by: Hector Martin Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/f37e8daea37d50651d2164b0b3dad90780188548.1618316398.git.geert+renesas@glider.be --- drivers/irqchip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index b90e825df7e1..62543a4eccc0 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -596,7 +596,7 @@ config IRQ_IDT3243X config APPLE_AIC bool "Apple Interrupt Controller (AIC)" depends on ARM64 - default ARCH_APPLE + depends on ARCH_APPLE || COMPILE_TEST help Support for the Apple Interrupt Controller found on Apple Silicon SoCs, such as the M1. From 8c721cb0f742f9a01f2f1985b274b544f89904f4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 28 Apr 2021 10:44:19 +0200 Subject: [PATCH 044/730] quota: Use 'hlist_for_each_entry' to simplify code Use 'hlist_for_each_entry' instead of hand writing it. This saves a few lines of code. Link: https://lore.kernel.org/r/f82d3e33964dcbd2aac19866735e0a8381c8a735.1619599407.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Jan Kara --- fs/quota/dquot.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 4f1373463766..22d904bde6ab 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -288,14 +288,12 @@ static inline void remove_dquot_hash(struct dquot *dquot) static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, struct kqid qid) { - struct hlist_node *node; struct dquot *dquot; - hlist_for_each (node, dquot_hash+hashent) { - dquot = hlist_entry(node, struct dquot, dq_hash); + hlist_for_each_entry(dquot, dquot_hash+hashent, dq_hash) if (dquot->dq_sb == sb && qid_eq(dquot->dq_id, qid)) return dquot; - } + return NULL; } From e84749a78dc82bc545f12ce009e3dbcc2c5a8a91 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 10 May 2021 17:06:59 +0200 Subject: [PATCH 045/730] ALSA: usb-audio: Validate MS endpoint descriptors snd_usbmidi_get_ms_info() may access beyond the border when a malformed descriptor is passed. This patch adds the sanity checks of the given MS endpoint descriptors, and skips invalid ones. Reported-by: syzbot+6bb23a5d5548b93c94aa@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20210510150659.17710-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/midi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index a10ac75969a8..649eb8d1ab7d 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1956,8 +1956,12 @@ static int snd_usbmidi_get_ms_info(struct snd_usb_midi *umidi, ms_ep = find_usb_ms_endpoint_descriptor(hostep); if (!ms_ep) continue; + if (ms_ep->bLength <= sizeof(*ms_ep)) + continue; if (ms_ep->bNumEmbMIDIJack > 0x10) continue; + if (ms_ep->bLength < sizeof(*ms_ep) + ms_ep->bNumEmbMIDIJack) + continue; if (usb_endpoint_dir_out(ep)) { if (endpoints[epidx].out_ep) { if (++epidx >= MIDI_MAX_ENDPOINTS) { From 7ee06ddc4038f936b0d4459d37a7d4d844fb03db Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 7 May 2021 11:38:10 -0400 Subject: [PATCH 046/730] dm snapshot: fix a crash when an origin has no snapshots If an origin target has no snapshots, o->split_boundary is set to 0. This causes BUG_ON(sectors <= 0) in block/bio.c:bio_split(). Fix this by initializing chunk_size, and in turn split_boundary, to rounddown_pow_of_two(UINT_MAX) -- the largest power of two that fits into "unsigned" type. Reported-by: Michael Tokarev Tested-by: Michael Tokarev Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a2acb014c13a..2a51ddd840b4 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -855,12 +855,11 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) static uint32_t __minimum_chunk_size(struct origin *o) { struct dm_snapshot *snap; - unsigned chunk_size = 0; + unsigned chunk_size = rounddown_pow_of_two(UINT_MAX); if (o) list_for_each_entry(snap, &o->snapshots, list) - chunk_size = min_not_zero(chunk_size, - snap->store->chunk_size); + chunk_size = min(chunk_size, snap->store->chunk_size); return (uint32_t) chunk_size; } From 56a8d3fd1f342d10ee7b27e9ac0f4d00b5fbb91c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:34 +0200 Subject: [PATCH 047/730] mtd: rawnand: cs553x: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-2-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/cs553x_nand.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c index 6edf78c16fc8..df40927e5678 100644 --- a/drivers/mtd/nand/raw/cs553x_nand.c +++ b/drivers/mtd/nand/raw/cs553x_nand.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -240,6 +241,15 @@ static int cs_calculate_ecc(struct nand_chip *this, const u_char *dat, return 0; } +static int cs553x_ecc_correct(struct nand_chip *chip, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) +{ + return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); +} + static struct cs553x_nand_controller *controllers[4]; static int cs553x_attach_chip(struct nand_chip *chip) @@ -251,7 +261,7 @@ static int cs553x_attach_chip(struct nand_chip *chip) chip->ecc.bytes = 3; chip->ecc.hwctl = cs_enable_hwecc; chip->ecc.calculate = cs_calculate_ecc; - chip->ecc.correct = rawnand_sw_hamming_correct; + chip->ecc.correct = cs553x_ecc_correct; chip->ecc.strength = 1; return 0; From ad9ffdce453934cdc22fac0a0268119bd630260f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:35 +0200 Subject: [PATCH 048/730] mtd: rawnand: fsmc: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-3-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/fsmc_nand.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index bf695255b43a..a3e66155ae40 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -432,6 +433,15 @@ static int fsmc_read_hwecc_ecc1(struct nand_chip *chip, const u8 *data, return 0; } +static int fsmc_correct_ecc1(struct nand_chip *chip, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) +{ + return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); +} + /* Count the number of 0's in buff upto a max of max_bits */ static int count_written_bits(u8 *buff, int size, int max_bits) { @@ -917,7 +927,7 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand) case NAND_ECC_ENGINE_TYPE_ON_HOST: dev_info(host->dev, "Using 1-bit HW ECC scheme\n"); nand->ecc.calculate = fsmc_read_hwecc_ecc1; - nand->ecc.correct = rawnand_sw_hamming_correct; + nand->ecc.correct = fsmc_correct_ecc1; nand->ecc.hwctl = fsmc_enable_hwecc; nand->ecc.bytes = 3; nand->ecc.strength = 1; From c4b7d7c480d607e4f52d310d9d16b194868d0917 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:36 +0200 Subject: [PATCH 049/730] mtd: rawnand: lpc32xx_slc: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Cc: Vladimir Zapolskiy Reported-by: Trevor Woerner Signed-off-by: Miquel Raynal Tested-by: Trevor Woerner Acked-by: Vladimir Zapolskiy Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-4-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/lpc32xx_slc.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c index 6b7269cfb7d8..d7dfc6fd85ca 100644 --- a/drivers/mtd/nand/raw/lpc32xx_slc.c +++ b/drivers/mtd/nand/raw/lpc32xx_slc.c @@ -27,6 +27,7 @@ #include #include #include +#include #define LPC32XX_MODNAME "lpc32xx-nand" @@ -344,6 +345,18 @@ static int lpc32xx_nand_ecc_calculate(struct nand_chip *chip, return 0; } +/* + * Corrects the data + */ +static int lpc32xx_nand_ecc_correct(struct nand_chip *chip, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) +{ + return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); +} + /* * Read a single byte from NAND device */ @@ -802,7 +815,7 @@ static int lpc32xx_nand_attach_chip(struct nand_chip *chip) chip->ecc.write_oob = lpc32xx_nand_write_oob_syndrome; chip->ecc.read_oob = lpc32xx_nand_read_oob_syndrome; chip->ecc.calculate = lpc32xx_nand_ecc_calculate; - chip->ecc.correct = rawnand_sw_hamming_correct; + chip->ecc.correct = lpc32xx_nand_ecc_correct; chip->ecc.hwctl = lpc32xx_nand_ecc_enable; /* From 3e09c0252501829b14b10f14e1982aaab77d0b80 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:37 +0200 Subject: [PATCH 050/730] mtd: rawnand: ndfc: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-5-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/ndfc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c index 338d6b1a189e..98d5a94c3a24 100644 --- a/drivers/mtd/nand/raw/ndfc.c +++ b/drivers/mtd/nand/raw/ndfc.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -100,6 +101,15 @@ static int ndfc_calculate_ecc(struct nand_chip *chip, return 0; } +static int ndfc_correct_ecc(struct nand_chip *chip, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) +{ + return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); +} + /* * Speedups for buffer read/write/verify * @@ -145,7 +155,7 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc, chip->controller = &ndfc->ndfc_control; chip->legacy.read_buf = ndfc_read_buf; chip->legacy.write_buf = ndfc_write_buf; - chip->ecc.correct = rawnand_sw_hamming_correct; + chip->ecc.correct = ndfc_correct_ecc; chip->ecc.hwctl = ndfc_enable_hwecc; chip->ecc.calculate = ndfc_calculate_ecc; chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; From 46fcb57e6b7283533ebf8ba17a6bd30fa88bdc9f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:38 +0200 Subject: [PATCH 051/730] mtd: rawnand: sharpsl: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-6-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/sharpsl.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c index 5612ee628425..2f1fe464e663 100644 --- a/drivers/mtd/nand/raw/sharpsl.c +++ b/drivers/mtd/nand/raw/sharpsl.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -96,6 +97,15 @@ static int sharpsl_nand_calculate_ecc(struct nand_chip *chip, return readb(sharpsl->io + ECCCNTR) != 0; } +static int sharpsl_nand_correct_ecc(struct nand_chip *chip, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) +{ + return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); +} + static int sharpsl_attach_chip(struct nand_chip *chip) { if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) @@ -106,7 +116,7 @@ static int sharpsl_attach_chip(struct nand_chip *chip) chip->ecc.strength = 1; chip->ecc.hwctl = sharpsl_nand_enable_hwecc; chip->ecc.calculate = sharpsl_nand_calculate_ecc; - chip->ecc.correct = rawnand_sw_hamming_correct; + chip->ecc.correct = sharpsl_nand_correct_ecc; return 0; } From 6a4c5ada577467a5f79e06f2c5e69c09983c22fb Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:39 +0200 Subject: [PATCH 052/730] mtd: rawnand: tmio: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-7-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/tmio_nand.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c index de8e919d0ebe..6d93dd31969b 100644 --- a/drivers/mtd/nand/raw/tmio_nand.c +++ b/drivers/mtd/nand/raw/tmio_nand.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -292,11 +293,12 @@ static int tmio_nand_correct_data(struct nand_chip *chip, unsigned char *buf, int r0, r1; /* assume ecc.size = 512 and ecc.bytes = 6 */ - r0 = rawnand_sw_hamming_correct(chip, buf, read_ecc, calc_ecc); + r0 = ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); if (r0 < 0) return r0; - r1 = rawnand_sw_hamming_correct(chip, buf + 256, read_ecc + 3, - calc_ecc + 3); + r1 = ecc_sw_hamming_correct(buf + 256, read_ecc + 3, calc_ecc + 3, + chip->ecc.size, false); if (r1 < 0) return r1; return r0 + r1; From 3d227a0b0ce319edbff6fd0d8af4d66689e477cc Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:40 +0200 Subject: [PATCH 053/730] mtd: rawnand: txx9ndfmc: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-8-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/txx9ndfmc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c index 1a9449e53bf9..b8894ac27073 100644 --- a/drivers/mtd/nand/raw/txx9ndfmc.c +++ b/drivers/mtd/nand/raw/txx9ndfmc.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -193,8 +194,8 @@ static int txx9ndfmc_correct_data(struct nand_chip *chip, unsigned char *buf, int stat; for (eccsize = chip->ecc.size; eccsize > 0; eccsize -= 256) { - stat = rawnand_sw_hamming_correct(chip, buf, read_ecc, - calc_ecc); + stat = ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); if (stat < 0) return stat; corrected += stat; From 562b4e91d3b221f737f84ff78ee7d348c8a6891f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sat, 8 May 2021 19:32:14 +0200 Subject: [PATCH 054/730] mtd: parsers: ofpart: fix parsing subpartitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ofpart was recently patched to not scan random partition nodes as subpartitions. That change unfortunately broke scanning valid subpartitions like: partitions { compatible = "fixed-partitions"; #address-cells = <1>; #size-cells = <1>; partition@0 { compatible = "fixed-partitions"; label = "bootloader"; reg = <0x0 0x100000>; partition@0 { label = "config"; reg = <0x80000 0x80000>; }; }; }; Fix that regression by adding 1 more code path. We actually need 3 conditional blocks to support 3 possible cases. This change also makes code easier to understand & follow. Reported-by: David Bauer Fixes: 2d751203aacf ("mtd: parsers: ofpart: limit parsing of deprecated DT syntax Signed-off-by: Rafał Miłecki Tested-by: Andrew Cameron Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210508173214.28365-1-zajec5@gmail.com --- drivers/mtd/parsers/ofpart_core.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/mtd/parsers/ofpart_core.c b/drivers/mtd/parsers/ofpart_core.c index 0fd8d2a0db97..192190c42fc8 100644 --- a/drivers/mtd/parsers/ofpart_core.c +++ b/drivers/mtd/parsers/ofpart_core.c @@ -57,20 +57,22 @@ static int parse_fixed_partitions(struct mtd_info *master, if (!mtd_node) return 0; - ofpart_node = of_get_child_by_name(mtd_node, "partitions"); - if (!ofpart_node && !master->parent) { - /* - * We might get here even when ofpart isn't used at all (e.g., - * when using another parser), so don't be louder than - * KERN_DEBUG - */ - pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n", - master->name, mtd_node); + if (!master->parent) { /* Master */ + ofpart_node = of_get_child_by_name(mtd_node, "partitions"); + if (!ofpart_node) { + /* + * We might get here even when ofpart isn't used at all (e.g., + * when using another parser), so don't be louder than + * KERN_DEBUG + */ + pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n", + master->name, mtd_node); + ofpart_node = mtd_node; + dedicated = false; + } + } else { /* Partition */ ofpart_node = mtd_node; - dedicated = false; } - if (!ofpart_node) - return 0; of_id = of_match_node(parse_ofpart_match_table, ofpart_node); if (dedicated && !of_id) { From 5311221304fa60e357aada75efdf2f2da8c30a57 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Thu, 6 May 2021 19:49:39 +0800 Subject: [PATCH 055/730] dt-bindings: phy: cadence-torrent: update reference file of docs In commit fd7abc3c5b87 ("phy: cadence-torrent: Use a common header file for Cadence SERDES"), phy-cadence-torrent.h was renamed to phy-cadence.h. Fix it of the Documentation. Signed-off-by: Wan Jiabing Link: https://lore.kernel.org/r/20210506114940.22215-1-wanjiabing@vivo.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml index 01dcd14e7b2a..320a232c7208 100644 --- a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml +++ b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml @@ -118,7 +118,7 @@ patternProperties: description: Specifies the Spread Spectrum Clocking mode used. It can be NO_SSC, EXTERNAL_SSC or INTERNAL_SSC. - Refer include/dt-bindings/phy/phy-cadence-torrent.h for the constants to be used. + Refer include/dt-bindings/phy/phy-cadence.h for the constants to be used. $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 1, 2] default: 0 From a568814a55a0e82bbc7c7b51333d0c38e8fb5520 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 9 May 2021 14:39:21 +0300 Subject: [PATCH 056/730] RDMA/siw: Properly check send and receive CQ pointers The check for the NULL of pointer received from container_of() is incorrect by definition as it points to some offset from NULL. Change such check with proper NULL check of SIW QP attributes. Fixes: 303ae1cdfdf7 ("rdma/siw: application interface") Link: https://lore.kernel.org/r/a7535a82925f6f4c1f062abaa294f3ae6e54bdd2.1620560310.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_verbs.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index d2313efb26db..917c8a919f38 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -300,7 +300,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, struct siw_ucontext *uctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); - struct siw_cq *scq = NULL, *rcq = NULL; unsigned long flags; int num_sqe, num_rqe, rv = 0; size_t length; @@ -343,10 +342,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, rv = -EINVAL; goto err_out; } - scq = to_siw_cq(attrs->send_cq); - rcq = to_siw_cq(attrs->recv_cq); - if (!scq || (!rcq && !attrs->srq)) { + if (!attrs->send_cq || (!attrs->recv_cq && !attrs->srq)) { siw_dbg(base_dev, "send CQ or receive CQ invalid\n"); rv = -EINVAL; goto err_out; @@ -401,8 +398,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, } } qp->pd = pd; - qp->scq = scq; - qp->rcq = rcq; + qp->scq = to_siw_cq(attrs->send_cq); + qp->rcq = to_siw_cq(attrs->recv_cq); if (attrs->srq) { /* From a3d83276d98886879b5bf7b30b7c29882754e4df Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 9 May 2021 14:41:38 +0300 Subject: [PATCH 057/730] RDMA/siw: Release xarray entry The xarray entry is allocated in siw_qp_add(), but release was missed in case zero-sized SQ was discovered. Fixes: 661f385961f0 ("RDMA/siw: Fix handling of zero-sized Read and Receive Queues.") Link: https://lore.kernel.org/r/f070b59d5a1114d5a4e830346755c2b3f141cde5.1620560472.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 917c8a919f38..3f175f220a22 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -375,7 +375,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, else { /* Zero sized SQ is not supported */ rv = -EINVAL; - goto err_out; + goto err_out_xa; } if (num_rqe) num_rqe = roundup_pow_of_two(num_rqe); From 54d87913f147a983589923c7f651f97de9af5be1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 10 May 2021 17:46:00 +0300 Subject: [PATCH 058/730] RDMA/core: Prevent divide-by-zero error triggered by the user The user_entry_size is supplied by the user and later used as a denominator to calculate number of entries. The zero supplied by the user will trigger the following divide-by-zero error: divide error: 0000 [#1] SMP KASAN PTI CPU: 4 PID: 497 Comm: c_repro Not tainted 5.13.0-rc1+ #281 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:ib_uverbs_handler_UVERBS_METHOD_QUERY_GID_TABLE+0x1b1/0x510 Code: 87 59 03 00 00 e8 9f ab 1e ff 48 8d bd a8 00 00 00 e8 d3 70 41 ff 44 0f b7 b5 a8 00 00 00 e8 86 ab 1e ff 31 d2 4c 89 f0 31 ff <49> f7 f5 48 89 d6 48 89 54 24 10 48 89 04 24 e8 1b ad 1e ff 48 8b RSP: 0018:ffff88810416f828 EFLAGS: 00010246 RAX: 0000000000000008 RBX: 1ffff1102082df09 RCX: ffffffff82183f3d RDX: 0000000000000000 RSI: ffff888105f2da00 RDI: 0000000000000000 RBP: ffff88810416fa98 R08: 0000000000000001 R09: ffffed102082df5f R10: ffff88810416faf7 R11: ffffed102082df5e R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000008 R15: ffff88810416faf0 FS: 00007f5715efa740(0000) GS:ffff88811a700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000840 CR3: 000000010c2e0001 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? ib_uverbs_handler_UVERBS_METHOD_INFO_HANDLES+0x4b0/0x4b0 ib_uverbs_cmd_verbs+0x1546/0x1940 ib_uverbs_ioctl+0x186/0x240 __x64_sys_ioctl+0x38a/0x1220 do_syscall_64+0x3f/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 9f85cbe50aa0 ("RDMA/uverbs: Expose the new GID query API to user space") Link: https://lore.kernel.org/r/b971cc70a8b240a8b5eda33c99fa0558a0071be2.1620657876.git.leonro@nvidia.com Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 9ec6971056fa..a03021d94e11 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -331,6 +331,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( if (ret) return ret; + if (!user_entry_size) + return -EINVAL; + max_entries = uverbs_attr_ptr_get_array_size( attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, user_entry_size); From 8ab78863e9eff11910e1ac8bcf478060c29b379e Mon Sep 17 00:00:00 2001 From: Jeimon Date: Sat, 8 May 2021 11:52:30 +0800 Subject: [PATCH 059/730] net/nfc/rawsock.c: fix a permission check bug The function rawsock_create() calls a privileged function sk_alloc(), which requires a ns-aware check to check net->user_ns, i.e., ns_capable(). However, the original code checks the init_user_ns using capable(). So we replace the capable() with ns_capable(). Signed-off-by: Jeimon Signed-off-by: David S. Miller --- net/nfc/rawsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 9c7eb8455ba8..5f1d438a0a23 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -329,7 +329,7 @@ static int rawsock_create(struct net *net, struct socket *sock, return -ESOCKTNOSUPPORT; if (sock->type == SOCK_RAW) { - if (!capable(CAP_NET_RAW)) + if (!ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; sock->ops = &rawsock_raw_ops; } else { From ddb6e00f8413e885ff826e32521cff7924661de0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 8 May 2021 07:38:22 +0200 Subject: [PATCH 060/730] net: netcp: Fix an error message 'ret' is known to be 0 here. The expected error code is stored in 'tx_pipe->dma_queue', so use it instead. While at it, switch from %d to %pe which is more user friendly. Fixes: 84640e27f230 ("net: netcp: Add Keystone NetCP core ethernet driver") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 9030e619e543..97942b0e3897 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1350,8 +1350,8 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe) tx_pipe->dma_queue = knav_queue_open(name, tx_pipe->dma_queue_id, KNAV_QUEUE_SHARED); if (IS_ERR(tx_pipe->dma_queue)) { - dev_err(dev, "Could not open DMA queue for channel \"%s\": %d\n", - name, ret); + dev_err(dev, "Could not open DMA queue for channel \"%s\": %pe\n", + name, tx_pipe->dma_queue); ret = PTR_ERR(tx_pipe->dma_queue); goto err; } From a269333fa5c0c8e53c92b5a28a6076a28cde3e83 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 8 May 2021 16:30:35 +0300 Subject: [PATCH 061/730] net: dsa: fix a crash if ->get_sset_count() fails If ds->ops->get_sset_count() fails then it "count" is a negative error code such as -EOPNOTSUPP. Because "i" is an unsigned int, the negative error code is type promoted to a very high value and the loop will corrupt memory until the system crashes. Fix this by checking for error codes and changing the type of "i" to just int. Fixes: badf3ada60ab ("net: dsa: Provide CPU port statistics to master netdev") Signed-off-by: Dan Carpenter Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/master.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/dsa/master.c b/net/dsa/master.c index 052a977914a6..63adbc21a735 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -147,8 +147,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, struct dsa_switch *ds = cpu_dp->ds; int port = cpu_dp->index; int len = ETH_GSTRING_LEN; - int mcount = 0, count; - unsigned int i; + int mcount = 0, count, i; uint8_t pfx[4]; uint8_t *ndata; @@ -178,6 +177,8 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, */ ds->ops->get_strings(ds, port, stringset, ndata); count = ds->ops->get_sset_count(ds, port, stringset); + if (count < 0) + return; for (i = 0; i < count; i++) { memmove(ndata + (i * len + sizeof(pfx)), ndata + i * len, len - sizeof(pfx)); From db825feefc6868896fed5e361787ba3bee2fd906 Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Sun, 9 May 2021 09:43:18 +0300 Subject: [PATCH 062/730] net/mlx4: Fix EEPROM dump support Fix SFP and QSFP* EEPROM queries by setting i2c_address, offset and page number correctly. For SFP set the following params: - I2C address for offsets 0-255 is 0x50. For 256-511 - 0x51. - Page number is zero. - Offset is 0-255. At the same time, QSFP* parameters are different: - I2C address is always 0x50. - Page number is not limited to zero. - Offset is 0-255 for page zero and 128-255 for others. To set parameters accordingly to cable used, implement function to query module ID and implement respective helper functions to set parameters correctly. Fixes: 135dd9594f12 ("net/mlx4_en: ethtool, Remove unsupported SFP EEPROM high pages query") Signed-off-by: Vladyslav Tarasiuk Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/en_ethtool.c | 4 +- drivers/net/ethernet/mellanox/mlx4/port.c | 107 +++++++++++++++++- 2 files changed, 104 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 1434df66fcf2..3616b77caa0a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -2027,8 +2027,6 @@ static int mlx4_en_set_tunable(struct net_device *dev, return ret; } -#define MLX4_EEPROM_PAGE_LEN 256 - static int mlx4_en_get_module_info(struct net_device *dev, struct ethtool_modinfo *modinfo) { @@ -2063,7 +2061,7 @@ static int mlx4_en_get_module_info(struct net_device *dev, break; case MLX4_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = MLX4_EEPROM_PAGE_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; break; default: return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index ba6ac31a339d..256a06b3c096 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -1973,6 +1973,7 @@ EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave); #define I2C_ADDR_LOW 0x50 #define I2C_ADDR_HIGH 0x51 #define I2C_PAGE_SIZE 256 +#define I2C_HIGH_PAGE_SIZE 128 /* Module Info Data */ struct mlx4_cable_info { @@ -2026,6 +2027,88 @@ static inline const char *cable_info_mad_err_str(u16 mad_status) return "Unknown Error"; } +static int mlx4_get_module_id(struct mlx4_dev *dev, u8 port, u8 *module_id) +{ + struct mlx4_cmd_mailbox *inbox, *outbox; + struct mlx4_mad_ifc *inmad, *outmad; + struct mlx4_cable_info *cable_info; + int ret; + + inbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(inbox)) + return PTR_ERR(inbox); + + outbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(outbox)) { + mlx4_free_cmd_mailbox(dev, inbox); + return PTR_ERR(outbox); + } + + inmad = (struct mlx4_mad_ifc *)(inbox->buf); + outmad = (struct mlx4_mad_ifc *)(outbox->buf); + + inmad->method = 0x1; /* Get */ + inmad->class_version = 0x1; + inmad->mgmt_class = 0x1; + inmad->base_version = 0x1; + inmad->attr_id = cpu_to_be16(0xFF60); /* Module Info */ + + cable_info = (struct mlx4_cable_info *)inmad->data; + cable_info->dev_mem_address = 0; + cable_info->page_num = 0; + cable_info->i2c_addr = I2C_ADDR_LOW; + cable_info->size = cpu_to_be16(1); + + ret = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (ret) + goto out; + + if (be16_to_cpu(outmad->status)) { + /* Mad returned with bad status */ + ret = be16_to_cpu(outmad->status); + mlx4_warn(dev, + "MLX4_CMD_MAD_IFC Get Module ID attr(%x) port(%d) i2c_addr(%x) offset(%d) size(%d): Response Mad Status(%x) - %s\n", + 0xFF60, port, I2C_ADDR_LOW, 0, 1, ret, + cable_info_mad_err_str(ret)); + ret = -ret; + goto out; + } + cable_info = (struct mlx4_cable_info *)outmad->data; + *module_id = cable_info->data[0]; +out: + mlx4_free_cmd_mailbox(dev, inbox); + mlx4_free_cmd_mailbox(dev, outbox); + return ret; +} + +static void mlx4_sfp_eeprom_params_set(u8 *i2c_addr, u8 *page_num, u16 *offset) +{ + *i2c_addr = I2C_ADDR_LOW; + *page_num = 0; + + if (*offset < I2C_PAGE_SIZE) + return; + + *i2c_addr = I2C_ADDR_HIGH; + *offset -= I2C_PAGE_SIZE; +} + +static void mlx4_qsfp_eeprom_params_set(u8 *i2c_addr, u8 *page_num, u16 *offset) +{ + /* Offsets 0-255 belong to page 0. + * Offsets 256-639 belong to pages 01, 02, 03. + * For example, offset 400 is page 02: 1 + (400 - 256) / 128 = 2 + */ + if (*offset < I2C_PAGE_SIZE) + *page_num = 0; + else + *page_num = 1 + (*offset - I2C_PAGE_SIZE) / I2C_HIGH_PAGE_SIZE; + *i2c_addr = I2C_ADDR_LOW; + *offset -= *page_num * I2C_HIGH_PAGE_SIZE; +} + /** * mlx4_get_module_info - Read cable module eeprom data * @dev: mlx4_dev. @@ -2045,12 +2128,30 @@ int mlx4_get_module_info(struct mlx4_dev *dev, u8 port, struct mlx4_cmd_mailbox *inbox, *outbox; struct mlx4_mad_ifc *inmad, *outmad; struct mlx4_cable_info *cable_info; - u16 i2c_addr; + u8 module_id, i2c_addr, page_num; int ret; if (size > MODULE_INFO_MAX_READ) size = MODULE_INFO_MAX_READ; + ret = mlx4_get_module_id(dev, port, &module_id); + if (ret) + return ret; + + switch (module_id) { + case MLX4_MODULE_ID_SFP: + mlx4_sfp_eeprom_params_set(&i2c_addr, &page_num, &offset); + break; + case MLX4_MODULE_ID_QSFP: + case MLX4_MODULE_ID_QSFP_PLUS: + case MLX4_MODULE_ID_QSFP28: + mlx4_qsfp_eeprom_params_set(&i2c_addr, &page_num, &offset); + break; + default: + mlx4_err(dev, "Module ID not recognized: %#x\n", module_id); + return -EINVAL; + } + inbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(inbox)) return PTR_ERR(inbox); @@ -2076,11 +2177,9 @@ int mlx4_get_module_info(struct mlx4_dev *dev, u8 port, */ size -= offset + size - I2C_PAGE_SIZE; - i2c_addr = I2C_ADDR_LOW; - cable_info = (struct mlx4_cable_info *)inmad->data; cable_info->dev_mem_address = cpu_to_be16(offset); - cable_info->page_num = 0; + cable_info->page_num = page_num; cable_info->i2c_addr = i2c_addr; cable_info->size = cpu_to_be16(size); From b94cbc909f1d80378a1f541968309e5c1178c98b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 9 May 2021 22:33:38 +0300 Subject: [PATCH 063/730] net: dsa: fix error code getting shifted with 4 in dsa_slave_get_sset_count DSA implements a bunch of 'standardized' ethtool statistics counters, namely tx_packets, tx_bytes, rx_packets, rx_bytes. So whatever the hardware driver returns in .get_sset_count(), we need to add 4 to that. That is ok, except that .get_sset_count() can return a negative error code, for example: b53_get_sset_count -> phy_ethtool_get_sset_count -> return -EIO -EIO is -5, and with 4 added to it, it becomes -1, aka -EPERM. One can imagine that certain error codes may even become positive, although based on code inspection I did not see instances of that. Check the error code first, if it is negative return it as-is. Based on a similar patch for dsa_master_get_strings from Dan Carpenter: https://patchwork.kernel.org/project/netdevbpf/patch/YJaSe3RPgn7gKxZv@mwanda/ Fixes: 91da11f870f0 ("net: Distributed Switch Architecture protocol support") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/slave.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8c0f3c6ab365..d4756b920108 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -776,13 +776,15 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) struct dsa_switch *ds = dp->ds; if (sset == ETH_SS_STATS) { - int count; + int count = 0; - count = 4; - if (ds->ops->get_sset_count) - count += ds->ops->get_sset_count(ds, dp->index, sset); + if (ds->ops->get_sset_count) { + count = ds->ops->get_sset_count(ds, dp->index, sset); + if (count < 0) + return count; + } - return count; + return count + 4; } else if (sset == ETH_SS_TEST) { return net_selftest_get_count(); } From 3058e01d31bbdbe50e02cafece2b22817a6a0eae Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Mon, 10 May 2021 09:57:38 +0700 Subject: [PATCH 064/730] tipc: make node link identity publish thread safe The using of the node address and node link identity are not thread safe, meaning that two publications may be published the same values, as result one of them will get failure because of already existing in the name table. To avoid this we have to use the node address and node link identity values from inside the node item's write lock protection. Fixes: 50a3499ab853 ("tipc: simplify signature of tipc_namtbl_publish()") Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/node.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 8217905348f4..81af92954c6c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -423,18 +423,18 @@ static void tipc_node_write_unlock(struct tipc_node *n) write_unlock_bh(&n->lock); if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, n->addr, n->capabilities); + tipc_publ_notify(net, publ_list, sk.node, n->capabilities); if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, n->addr, n->capabilities); + tipc_named_node_up(net, sk.node, n->capabilities); if (flags & TIPC_NOTIFY_LINK_UP) { - tipc_mon_peer_up(net, n->addr, bearer_id); - tipc_nametbl_publish(net, &ua, &sk, n->link_id); + tipc_mon_peer_up(net, sk.node, bearer_id); + tipc_nametbl_publish(net, &ua, &sk, sk.ref); } if (flags & TIPC_NOTIFY_LINK_DOWN) { - tipc_mon_peer_down(net, n->addr, bearer_id); - tipc_nametbl_withdraw(net, &ua, &sk, n->link_id); + tipc_mon_peer_down(net, sk.node, bearer_id); + tipc_nametbl_withdraw(net, &ua, &sk, sk.ref); } } From 297c4de6f780b63b6d2af75a730720483bf1904a Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 10 May 2021 13:07:08 +0200 Subject: [PATCH 065/730] net: dsa: felix: re-enable TAS guard band mode Commit 316bcffe4479 ("net: dsa: felix: disable always guard band bit for TAS config") disabled the guard band and broke 802.3Qbv compliance. There are two issues here: (1) Without the guard band the end of the scheduling window could be overrun by a frame in transit. (2) Frames that don't fit into a configured window will still be sent. The reason for both issues is that the switch will schedule the _start_ of a frame transmission inside the predefined window without taking the length of the frame into account. Thus, we'll need the guard band which will close the gate early, so that a complete frame can still be sent. Revert the commit and add a note. For a lengthy discussion see [1]. [1] https://lore.kernel.org/netdev/c7618025da6723418c56a54fe4683bd7@walle.cc/ Fixes: 316bcffe4479 ("net: dsa: felix: disable always guard band bit for TAS config") Signed-off-by: Michael Walle Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix_vsc9959.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 2473bebe48e6..f966a253d1c7 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1227,12 +1227,17 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, if (taprio->num_entries > VSC9959_TAS_GCL_ENTRY_MAX) return -ERANGE; - /* Set port num and disable ALWAYS_GUARD_BAND_SCH_Q, which means set - * guard band to be implemented for nonschedule queues to schedule - * queues transition. + /* Enable guard band. The switch will schedule frames without taking + * their length into account. Thus we'll always need to enable the + * guard band which reserves the time of a maximum sized frame at the + * end of the time window. + * + * Although the ALWAYS_GUARD_BAND_SCH_Q bit is global for all ports, we + * need to set PORT_NUM, because subsequent writes to PARAM_CFG_REG_n + * operate on the port number. */ - ocelot_rmw(ocelot, - QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port), + ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port) | + QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q, QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM_M | QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q, QSYS_TAS_PARAM_CFG_CTRL); From a00593737f8bac2c9e97b696e7ff84a4446653e8 Mon Sep 17 00:00:00 2001 From: Subbaraman Narayanamurthy Date: Thu, 22 Apr 2021 11:36:10 -0700 Subject: [PATCH 066/730] interconnect: qcom: bcm-voter: add a missing of_node_put() Add a missing of_node_put() in of_bcm_voter_get() to avoid the reference leak. Signed-off-by: Subbaraman Narayanamurthy Reviewed-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/1619116570-13308-1-git-send-email-subbaram@codeaurora.org Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support") Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/bcm-voter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/interconnect/qcom/bcm-voter.c b/drivers/interconnect/qcom/bcm-voter.c index d1591a28b743..547f4c2593f4 100644 --- a/drivers/interconnect/qcom/bcm-voter.c +++ b/drivers/interconnect/qcom/bcm-voter.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */ #include @@ -205,6 +205,7 @@ struct bcm_voter *of_bcm_voter_get(struct device *dev, const char *name) } mutex_unlock(&bcm_voter_lock); + of_node_put(node); return voter; } EXPORT_SYMBOL_GPL(of_bcm_voter_get); From 1fd86e280d8b21762901e43d42d66dbfe8b8e0d3 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Tue, 11 May 2021 11:44:33 +0800 Subject: [PATCH 067/730] interconnect: qcom: Add missing MODULE_DEVICE_TABLE This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module. Reported-by: Hulk Robot Signed-off-by: Zou Wei Link: https://lore.kernel.org/r/1620704673-104205-1-git-send-email-zou_wei@huawei.com Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support") Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/bcm-voter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/qcom/bcm-voter.c b/drivers/interconnect/qcom/bcm-voter.c index 547f4c2593f4..8f385f9c2dd3 100644 --- a/drivers/interconnect/qcom/bcm-voter.c +++ b/drivers/interconnect/qcom/bcm-voter.c @@ -363,6 +363,7 @@ static const struct of_device_id bcm_voter_of_match[] = { { .compatible = "qcom,bcm-voter" }, { } }; +MODULE_DEVICE_TABLE(of, bcm_voter_of_match); static struct platform_driver qcom_icc_bcm_voter_driver = { .probe = qcom_icc_bcm_voter_probe, From 07adc0225484fc199e3dc15ec889f75f498c4fca Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 12 Apr 2021 13:49:07 +0800 Subject: [PATCH 068/730] usb: cdns3: Fix runtime PM imbalance on error When cdns3_gadget_start() fails, a pairing PM usage counter decrement is needed to keep the counter balanced. Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20210412054908.7975-1-dinghao.liu@zju.edu.cn Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdns3-gadget.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 9b1bd417cec0..a8b7b50abf64 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -3268,8 +3268,10 @@ static int __cdns3_gadget_init(struct cdns *cdns) pm_runtime_get_sync(cdns->dev); ret = cdns3_gadget_start(cdns); - if (ret) + if (ret) { + pm_runtime_put_sync(cdns->dev); return ret; + } /* * Because interrupt line can be shared with other components in From 3b414d1b0107fa51ad6063de9752d4b2a8063980 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 20 Apr 2021 06:28:13 +0200 Subject: [PATCH 069/730] usb: cdnsp: Fix lack of removing request from pending list. Patch fixes lack of removing request from ep->pending_list on failure of the stop endpoint command. Driver even after failing this command must remove request from ep->pending_list. Without this fix driver can stuck in cdnsp_gadget_ep_disable function in loop: while (!list_empty(&pep->pending_list)) { preq = next_request(&pep->pending_list); cdnsp_ep_dequeue(pep, preq); } Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20210420042813.34917-1-pawell@gli-login.cadence.com Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-gadget.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 56707b6b0f57..c083985e387b 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -422,17 +422,17 @@ unmap: int cdnsp_ep_dequeue(struct cdnsp_ep *pep, struct cdnsp_request *preq) { struct cdnsp_device *pdev = pep->pdev; - int ret; + int ret_stop = 0; + int ret_rem; trace_cdnsp_request_dequeue(preq); - if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_RUNNING) { - ret = cdnsp_cmd_stop_ep(pdev, pep); - if (ret) - return ret; - } + if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_RUNNING) + ret_stop = cdnsp_cmd_stop_ep(pdev, pep); - return cdnsp_remove_request(pdev, preq, pep); + ret_rem = cdnsp_remove_request(pdev, preq, pep); + + return ret_rem ? ret_rem : ret_stop; } static void cdnsp_zero_in_ctx(struct cdnsp_device *pdev) From 049c4e13714ecbca567b4d5f6d563f05d431c80e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 10 May 2021 13:10:44 +0000 Subject: [PATCH 070/730] bpf: Fix alu32 const subreg bound tracking on bitwise operations Fix a bug in the verifier's scalar32_min_max_*() functions which leads to incorrect tracking of 32 bit bounds for the simulation of and/or/xor bitops. When both the src & dst subreg is a known constant, then the assumption is that scalar_min_max_*() will take care to update bounds correctly. However, this is not the case, for example, consider a register R2 which has a tnum of 0xffffffff00000000, meaning, lower 32 bits are known constant and in this case of value 0x00000001. R2 is then and'ed with a register R3 which is a 64 bit known constant, here, 0x100000002. What can be seen in line '10:' is that 32 bit bounds reach an invalid state where {u,s}32_min_value > {u,s}32_max_value. The reason is scalar32_min_max_*() delegates 32 bit bounds updates to scalar_min_max_*(), however, that really only takes place when both the 64 bit src & dst register is a known constant. Given scalar32_min_max_*() is intended to be designed as closely as possible to scalar_min_max_*(), update the 32 bit bounds in this situation through __mark_reg32_known() which will set all {u,s}32_{min,max}_value to the correct constant, which is 0x00000000 after the fix (given 0x00000001 & 0x00000002 in 32 bit space). This is possible given var32_off already holds the final value as dst_reg->var_off is updated before calling scalar32_min_max_*(). Before fix, invalid tracking of R2: [...] 9: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=-9223372036854775807 (0x8000000000000001),smax_value=9223372032559808513 (0x7fffffff00000001),umin_value=1,umax_value=0xffffffff00000001,var_off=(0x1; 0xffffffff00000000),s32_min_value=1,s32_max_value=1,u32_min_value=1,u32_max_value=1) R3_w=inv4294967298 R10=fp0 9: (5f) r2 &= r3 10: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=0,smax_value=4294967296 (0x100000000),umin_value=0,umax_value=0x100000000,var_off=(0x0; 0x100000000),s32_min_value=1,s32_max_value=0,u32_min_value=1,u32_max_value=0) R3_w=inv4294967298 R10=fp0 [...] After fix, correct tracking of R2: [...] 9: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=-9223372036854775807 (0x8000000000000001),smax_value=9223372032559808513 (0x7fffffff00000001),umin_value=1,umax_value=0xffffffff00000001,var_off=(0x1; 0xffffffff00000000),s32_min_value=1,s32_max_value=1,u32_min_value=1,u32_max_value=1) R3_w=inv4294967298 R10=fp0 9: (5f) r2 &= r3 10: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=0,smax_value=4294967296 (0x100000000),umin_value=0,umax_value=0x100000000,var_off=(0x0; 0x100000000),s32_min_value=0,s32_max_value=0,u32_min_value=0,u32_max_value=0) R3_w=inv4294967298 R10=fp0 [...] Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Fixes: 2921c90d4718 ("bpf: Fix a verifier failure with xor") Reported-by: Manfred Paul (@_manfp) Reported-by: Thadeu Lima de Souza Cascardo Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 757476c91c98..9352a1b7de2d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7084,11 +7084,10 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, s32 smin_val = src_reg->s32_min_value; u32 umax_val = src_reg->u32_max_value; - /* Assuming scalar64_min_max_and will be called so its safe - * to skip updating register for known 32-bit case. - */ - if (src_known && dst_known) + if (src_known && dst_known) { + __mark_reg32_known(dst_reg, var32_off.value); return; + } /* We get our minimum from the var_off, since that's inherently * bitwise. Our maximum is the minimum of the operands' maxima. @@ -7108,7 +7107,6 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, dst_reg->s32_min_value = dst_reg->u32_min_value; dst_reg->s32_max_value = dst_reg->u32_max_value; } - } static void scalar_min_max_and(struct bpf_reg_state *dst_reg, @@ -7155,11 +7153,10 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, s32 smin_val = src_reg->s32_min_value; u32 umin_val = src_reg->u32_min_value; - /* Assuming scalar64_min_max_or will be called so it is safe - * to skip updating register for known case. - */ - if (src_known && dst_known) + if (src_known && dst_known) { + __mark_reg32_known(dst_reg, var32_off.value); return; + } /* We get our maximum from the var_off, and our minimum is the * maximum of the operands' minima @@ -7224,11 +7221,10 @@ static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, struct tnum var32_off = tnum_subreg(dst_reg->var_off); s32 smin_val = src_reg->s32_min_value; - /* Assuming scalar64_min_max_xor will be called so it is safe - * to skip updating register for known case. - */ - if (src_known && dst_known) + if (src_known && dst_known) { + __mark_reg32_known(dst_reg, var32_off.value); return; + } /* We get both minimum and maximum from the var32_off. */ dst_reg->u32_min_value = var32_off.value; From 35f3f8504c3b60a1ae5576e178b27fc0ddd6157d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 16:12:42 +0300 Subject: [PATCH 071/730] spi: Switch to signed types for *_native_cs SPI controller fields While fixing undefined behaviour the commit f60d7270c8a3 ("spi: Avoid undefined behaviour when counting unused native CSs") missed the case when all CSs are GPIOs and thus unused_native_cs will be evaluated to -1 in unsigned representation. This will falsely trigger a condition in the spi_get_gpio_descs(). Switch to signed types for *_native_cs SPI controller fields to fix above. Fixes: f60d7270c8a3 ("spi: Avoid undefined behaviour when counting unused native CSs") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510131242.49455-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 360a3bc767ca..74239d65c7fd 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -644,8 +644,8 @@ struct spi_controller { int *cs_gpios; struct gpio_desc **cs_gpiods; bool use_gpio_descriptors; - u8 unused_native_cs; - u8 max_native_cs; + s8 unused_native_cs; + s8 max_native_cs; /* statistics */ struct spi_statistics statistics; From 91e02557f377b6837d4f82b14229d92cae231001 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 11 May 2021 11:05:00 +0200 Subject: [PATCH 072/730] ALSA: usb-audio: Fix potential out-of-bounce access in MIDI EP parser The recently introduced MIDI endpoint parser code has an access to the field without the size validation, hence it might lead to out-of-bounce access. Add the sanity checks for the descriptor sizes. Fixes: eb596e0fd13c ("ALSA: usb-audio: generate midi streaming substream names from jack names") Link: https://lore.kernel.org/r/20210511090500.2637-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/midi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 649eb8d1ab7d..2c01649c70f6 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1750,7 +1750,7 @@ static struct usb_midi_in_jack_descriptor *find_usb_in_jack_descriptor( struct usb_midi_in_jack_descriptor *injd = (struct usb_midi_in_jack_descriptor *)extra; - if (injd->bLength > 4 && + if (injd->bLength >= sizeof(*injd) && injd->bDescriptorType == USB_DT_CS_INTERFACE && injd->bDescriptorSubtype == UAC_MIDI_IN_JACK && injd->bJackID == jack_id) @@ -1773,7 +1773,7 @@ static struct usb_midi_out_jack_descriptor *find_usb_out_jack_descriptor( struct usb_midi_out_jack_descriptor *outjd = (struct usb_midi_out_jack_descriptor *)extra; - if (outjd->bLength > 4 && + if (outjd->bLength >= sizeof(*outjd) && outjd->bDescriptorType == USB_DT_CS_INTERFACE && outjd->bDescriptorSubtype == UAC_MIDI_OUT_JACK && outjd->bJackID == jack_id) @@ -1820,7 +1820,8 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi, outjd = find_usb_out_jack_descriptor(hostif, jack_id); if (outjd) { sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins); - iJack = *(((uint8_t *) outjd) + sz - sizeof(uint8_t)); + if (outjd->bLength >= sz) + iJack = *(((uint8_t *) outjd) + sz - sizeof(uint8_t)); } } else { /* and out jacks connect to ins */ From 4b81ccebaeee885ab1aa1438133f2991e3a2b6ea Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 27 Apr 2021 10:12:12 -0300 Subject: [PATCH 073/730] bpf, ringbuf: Deny reserve of buffers larger than ringbuf A BPF program might try to reserve a buffer larger than the ringbuf size. If the consumer pointer is way ahead of the producer, that would be successfully reserved, allowing the BPF program to read or write out of the ringbuf allocated area. Reported-by: Ryota Shiga (Flatt Security) Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Alexei Starovoitov --- kernel/bpf/ringbuf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index f25b719ac786..b86d80c9cd59 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -315,6 +315,9 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) return NULL; len = round_up(size + BPF_RINGBUF_HDR_SZ, 8); + if (len > rb->mask + 1) + return NULL; + cons_pos = smp_load_acquire(&rb->consumer_pos); if (in_nmi()) { From 04ea3086c4d73da7009de1e84962a904139af219 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 4 May 2021 16:38:00 -0700 Subject: [PATCH 074/730] bpf: Prevent writable memory-mapping of read-only ringbuf pages Only the very first page of BPF ringbuf that contains consumer position counter is supposed to be mapped as writeable by user-space. Producer position is read-only and can be modified only by the kernel code. BPF ringbuf data pages are read-only as well and are not meant to be modified by user-code to maintain integrity of per-record headers. This patch allows to map only consumer position page as writeable and everything else is restricted to be read-only. remap_vmalloc_range() internally adds VM_DONTEXPAND, so all the established memory mappings can't be extended, which prevents any future violations through mremap()'ing. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Reported-by: Ryota Shiga (Flatt Security) Reported-by: Thadeu Lima de Souza Cascardo Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- kernel/bpf/ringbuf.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index b86d80c9cd59..84b3b35fc0d0 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -221,25 +221,20 @@ static int ringbuf_map_get_next_key(struct bpf_map *map, void *key, return -ENOTSUPP; } -static size_t bpf_ringbuf_mmap_page_cnt(const struct bpf_ringbuf *rb) -{ - size_t data_pages = (rb->mask + 1) >> PAGE_SHIFT; - - /* consumer page + producer page + 2 x data pages */ - return RINGBUF_POS_PAGES + 2 * data_pages; -} - static int ringbuf_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) { struct bpf_ringbuf_map *rb_map; - size_t mmap_sz; rb_map = container_of(map, struct bpf_ringbuf_map, map); - mmap_sz = bpf_ringbuf_mmap_page_cnt(rb_map->rb) << PAGE_SHIFT; - - if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) > mmap_sz) - return -EINVAL; + if (vma->vm_flags & VM_WRITE) { + /* allow writable mapping for the consumer_pos only */ + if (vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EPERM; + } else { + vma->vm_flags &= ~VM_MAYWRITE; + } + /* remap_vmalloc_range() checks size and offset constraints */ return remap_vmalloc_range(vma, rb_map->rb, vma->vm_pgoff + RINGBUF_PGOFF); } From ff67dbd554b2aaa22be933eced32610ff90209dd Mon Sep 17 00:00:00 2001 From: Qiu Wenbo Date: Wed, 28 Apr 2021 13:06:36 +0800 Subject: [PATCH 075/730] platform/x86: ideapad-laptop: fix a NULL pointer dereference The third parameter of dytc_cql_command should not be NULL since it will be dereferenced immediately. Fixes: ff36b0d953dc4 ("platform/x86: ideapad-laptop: rework and create new ACPI helpers") Signed-off-by: Qiu Wenbo Acked-by: Ike Panhc Link: https://lore.kernel.org/r/20210428050636.8003-1-qiuwenbo@kylinos.com.cn Signed-off-by: Hans de Goede --- drivers/platform/x86/ideapad-laptop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 6cb5ad4be231..8f871151f0cc 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -809,6 +809,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, { struct ideapad_dytc_priv *dytc = container_of(pprof, struct ideapad_dytc_priv, pprof); struct ideapad_private *priv = dytc->priv; + unsigned long output; int err; err = mutex_lock_interruptible(&dytc->mutex); @@ -829,7 +830,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, /* Determine if we are in CQL mode. This alters the commands we do */ err = dytc_cql_command(priv, DYTC_SET_COMMAND(DYTC_FUNCTION_MMC, perfmode, 1), - NULL); + &output); if (err) goto unlock; } From b09aaa3f2c0edeeed670cd29961a0e35bddc78cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barnab=C3=A1s=20P=C5=91cze?= Date: Fri, 7 May 2021 23:53:44 +0000 Subject: [PATCH 076/730] platform/x86: ideapad-laptop: fix method name typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "smbc" should be "sbmc". `eval_smbc()` incorrectly called the SMBC ACPI method instead of SBMC. This resulted in partial loss of functionality. Rectify that by calling the correct ACPI method (SBMC), and also rename methods and constants. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=212985 Fixes: 0b765671cb80 ("platform/x86: ideapad-laptop: group and separate (un)related constants into enums") Fixes: ff36b0d953dc ("platform/x86: ideapad-laptop: rework and create new ACPI helpers") Cc: stable@vger.kernel.org # 5.12 Signed-off-by: Barnabás Pőcze Link: https://lore.kernel.org/r/20210507235333.286505-1-pobrn@protonmail.com Signed-off-by: Hans de Goede --- drivers/platform/x86/ideapad-laptop.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 8f871151f0cc..387817290921 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -57,8 +57,8 @@ enum { }; enum { - SMBC_CONSERVATION_ON = 3, - SMBC_CONSERVATION_OFF = 5, + SBMC_CONSERVATION_ON = 3, + SBMC_CONSERVATION_OFF = 5, }; enum { @@ -182,9 +182,9 @@ static int eval_gbmd(acpi_handle handle, unsigned long *res) return eval_int(handle, "GBMD", res); } -static int exec_smbc(acpi_handle handle, unsigned long arg) +static int exec_sbmc(acpi_handle handle, unsigned long arg) { - return exec_simple_method(handle, "SMBC", arg); + return exec_simple_method(handle, "SBMC", arg); } static int eval_hals(acpi_handle handle, unsigned long *res) @@ -477,7 +477,7 @@ static ssize_t conservation_mode_store(struct device *dev, if (err) return err; - err = exec_smbc(priv->adev->handle, state ? SMBC_CONSERVATION_ON : SMBC_CONSERVATION_OFF); + err = exec_sbmc(priv->adev->handle, state ? SBMC_CONSERVATION_ON : SBMC_CONSERVATION_OFF); if (err) return err; From 79d341e26ebcdbc622348aaaab6f8f89b6fdb25f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 30 Apr 2021 14:07:35 +0800 Subject: [PATCH 077/730] platform/x86: hp_accel: Avoid invoking _INI to speed up resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hp_accel can take almost two seconds to resume on some HP laptops. The bottleneck is on evaluating _INI, which is only needed to run once. Resolve the issue by only invoking _INI when it's necessary. Namely, on probe and on hibernation restore. Signed-off-by: Kai-Heng Feng Acked-by: Éric Piel Link: https://lore.kernel.org/r/20210430060736.590321-1-kai.heng.feng@canonical.com Signed-off-by: Hans de Goede --- drivers/misc/lis3lv02d/lis3lv02d.h | 1 + drivers/platform/x86/hp_accel.c | 22 +++++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h index c394c0b08519..7ac788fae1b8 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.h +++ b/drivers/misc/lis3lv02d/lis3lv02d.h @@ -271,6 +271,7 @@ struct lis3lv02d { int regs_size; u8 *reg_cache; bool regs_stored; + bool init_required; u8 odr_mask; /* ODR bit mask */ u8 whoami; /* indicates measurement precision */ s16 (*read_data) (struct lis3lv02d *lis3, int reg); diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 799cbe2ffcf3..8c0867bda828 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -88,6 +88,9 @@ MODULE_DEVICE_TABLE(acpi, lis3lv02d_device_ids); static int lis3lv02d_acpi_init(struct lis3lv02d *lis3) { struct acpi_device *dev = lis3->bus_priv; + if (!lis3->init_required) + return 0; + if (acpi_evaluate_object(dev->handle, METHOD_NAME__INI, NULL, NULL) != AE_OK) return -EINVAL; @@ -356,6 +359,7 @@ static int lis3lv02d_add(struct acpi_device *device) } /* call the core layer do its init */ + lis3_dev.init_required = true; ret = lis3lv02d_init_device(&lis3_dev); if (ret) return ret; @@ -403,11 +407,27 @@ static int lis3lv02d_suspend(struct device *dev) static int lis3lv02d_resume(struct device *dev) { + lis3_dev.init_required = false; lis3lv02d_poweron(&lis3_dev); return 0; } -static SIMPLE_DEV_PM_OPS(hp_accel_pm, lis3lv02d_suspend, lis3lv02d_resume); +static int lis3lv02d_restore(struct device *dev) +{ + lis3_dev.init_required = true; + lis3lv02d_poweron(&lis3_dev); + return 0; +} + +static const struct dev_pm_ops hp_accel_pm = { + .suspend = lis3lv02d_suspend, + .resume = lis3lv02d_resume, + .freeze = lis3lv02d_suspend, + .thaw = lis3lv02d_resume, + .poweroff = lis3lv02d_suspend, + .restore = lis3lv02d_restore, +}; + #define HP_ACCEL_PM (&hp_accel_pm) #else #define HP_ACCEL_PM NULL From f2be77fee648ddd6d0d259d3527344ba0120e314 Mon Sep 17 00:00:00 2001 From: Elia Devito Date: Tue, 11 May 2021 14:46:49 +0200 Subject: [PATCH 078/730] ALSA: hda/realtek: Add fixup for HP Spectre x360 15-df0xxx Fixup to enable all 4 speaker on HP Spectre x360 15-df0xxx and probably on similar models. 0x14 pin config override is required to enable all speakers and alc285-speaker2-to-dac1 fixup to enable volume adjustment. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=189331 Signed-off-by: Elia Devito Cc: Link: https://lore.kernel.org/r/20210511124651.4802-1-eliadevito@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b4b71609dff1..3e269de84079 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6542,6 +6542,7 @@ enum { ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST, ALC295_FIXUP_ASUS_DACS, ALC295_FIXUP_HP_OMEN, + ALC285_FIXUP_HP_SPECTRE_X360, }; static const struct hda_fixup alc269_fixups[] = { @@ -8099,6 +8100,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HP_LINE1_MIC1_LED, }, + [ALC285_FIXUP_HP_SPECTRE_X360] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x14, 0x90170110 }, /* enable top speaker */ + {} + }, + .chained = true, + .chain_id = ALC285_FIXUP_SPEAKER2_TO_DAC1, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8259,6 +8269,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), + SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED), @@ -8665,6 +8676,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"}, {.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"}, {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, + {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, {} }; #define ALC225_STANDARD_PINS \ From 0919a3acc0c87049a7d787c4b8b9e64bd7c59eb3 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 11 May 2021 10:17:07 +0900 Subject: [PATCH 079/730] ASoC: simple-card: add simple_parse_node() Original commit 59c35c44a9cf89a83a9 ("ASoC: simple-card: add simple_parse_node()") was reverted, and this is remake version. Parse dai/tdm/clk are common for both CPU/Codec node. This patch creates simple_parse_node() for it and share the code. Reported-by: "kernelci.org bot" Fixes: 25c4a9b614f101bb9f3 ("ASoC: simple-card: Fix breakage on kontron-sl28-var3-ads2") Fixes: 59c35c44a9cf89a83a9 ("ASoC: simple-card: add simple_parse_node()") Signed-off-by: Kuninori Morimoto Tested-by: Michael Walle Link: https://lore.kernel.org/r/87h7jaax2k.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card.c | 107 ++++++++++++++++---------------- 1 file changed, 53 insertions(+), 54 deletions(-) diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index a1373be4558f..57ab89be1b4b 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -93,12 +93,11 @@ static void simple_parse_convert(struct device *dev, } static void simple_parse_mclk_fs(struct device_node *top, - struct device_node *cpu, - struct device_node *codec, + struct device_node *np, struct simple_dai_props *props, char *prefix) { - struct device_node *node = of_get_parent(cpu); + struct device_node *node = of_get_parent(np); char prop[128]; snprintf(prop, sizeof(prop), "%smclk-fs", PREFIX); @@ -106,12 +105,50 @@ static void simple_parse_mclk_fs(struct device_node *top, snprintf(prop, sizeof(prop), "%smclk-fs", prefix); of_property_read_u32(node, prop, &props->mclk_fs); - of_property_read_u32(cpu, prop, &props->mclk_fs); - of_property_read_u32(codec, prop, &props->mclk_fs); + of_property_read_u32(np, prop, &props->mclk_fs); of_node_put(node); } +static int simple_parse_node(struct asoc_simple_priv *priv, + struct device_node *np, + struct link_info *li, + char *prefix, + int *cpu) +{ + struct device *dev = simple_priv_to_dev(priv); + struct device_node *top = dev->of_node; + struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); + struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); + struct snd_soc_dai_link_component *dlc; + struct asoc_simple_dai *dai; + int ret; + + if (cpu) { + dlc = asoc_link_to_cpu(dai_link, 0); + dai = simple_props_to_dai_cpu(dai_props, 0); + } else { + dlc = asoc_link_to_codec(dai_link, 0); + dai = simple_props_to_dai_codec(dai_props, 0); + } + + simple_parse_mclk_fs(top, np, dai_props, prefix); + + ret = asoc_simple_parse_dai(np, dlc, cpu); + if (ret) + return ret; + + ret = asoc_simple_parse_clk(dev, np, dai, dlc); + if (ret) + return ret; + + ret = asoc_simple_parse_tdm(np, dai); + if (ret) + return ret; + + return 0; +} + static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, struct device_node *np, struct device_node *codec, @@ -121,10 +158,6 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, struct device *dev = simple_priv_to_dev(priv); struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); - struct asoc_simple_dai *dai; - struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); - struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); - struct snd_soc_dai_link_component *platforms = asoc_link_to_platform(dai_link, 0); struct device_node *top = dev->of_node; struct device_node *node = of_get_parent(np); char *prefix = ""; @@ -132,13 +165,13 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, dev_dbg(dev, "link_of DPCM (%pOF)\n", np); - li->link++; - /* For single DAI link & old style of DT node */ if (is_top) prefix = PREFIX; if (li->cpu) { + struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); + struct snd_soc_dai_link_component *platforms = asoc_link_to_platform(dai_link, 0); int is_single_links = 0; /* Codec is dummy */ @@ -147,13 +180,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, dai_link->dynamic = 1; dai_link->dpcm_merged_format = 1; - dai = simple_props_to_dai_cpu(dai_props, 0); - - ret = asoc_simple_parse_dai(np, cpus, &is_single_links); - if (ret) - goto out_put_node; - - ret = asoc_simple_parse_clk(dev, np, dai, cpus); + ret = simple_parse_node(priv, np, li, prefix, &is_single_links); if (ret < 0) goto out_put_node; @@ -166,6 +193,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, asoc_simple_canonicalize_cpu(cpus, is_single_links); asoc_simple_canonicalize_platform(platforms, cpus); } else { + struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); struct snd_soc_codec_conf *cconf; /* CPU is dummy */ @@ -174,14 +202,9 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, dai_link->no_pcm = 1; dai_link->be_hw_params_fixup = asoc_simple_be_hw_params_fixup; - dai = simple_props_to_dai_codec(dai_props, 0); cconf = simple_props_to_codec_conf(dai_props, 0); - ret = asoc_simple_parse_dai(np, codecs, NULL); - if (ret < 0) - goto out_put_node; - - ret = asoc_simple_parse_clk(dev, np, dai, codecs); + ret = simple_parse_node(priv, np, li, prefix, NULL); if (ret < 0) goto out_put_node; @@ -201,11 +224,6 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, } simple_parse_convert(dev, np, &dai_props->adata); - simple_parse_mclk_fs(top, np, codec, dai_props, prefix); - - ret = asoc_simple_parse_tdm(np, dai); - if (ret) - goto out_put_node; ret = asoc_simple_parse_daifmt(dev, node, codec, prefix, &dai_link->dai_fmt); @@ -218,6 +236,8 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, dai_link->init = asoc_simple_dai_init; out_put_node: + li->link++; + of_node_put(node); return ret; } @@ -230,13 +250,9 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, { struct device *dev = simple_priv_to_dev(priv); struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); - struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); - struct asoc_simple_dai *cpu_dai = simple_props_to_dai_cpu(dai_props, 0); - struct asoc_simple_dai *codec_dai = simple_props_to_dai_codec(dai_props, 0); struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); struct snd_soc_dai_link_component *platforms = asoc_link_to_platform(dai_link, 0); - struct device_node *top = dev->of_node; struct device_node *cpu = NULL; struct device_node *node = NULL; struct device_node *plat = NULL; @@ -246,7 +262,6 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, cpu = np; node = of_get_parent(np); - li->link++; dev_dbg(dev, "link_of (%pOF)\n", node); @@ -262,13 +277,11 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, if (ret < 0) goto dai_link_of_err; - simple_parse_mclk_fs(top, cpu, codec, dai_props, prefix); - - ret = asoc_simple_parse_dai(cpu, cpus, &single_cpu); + ret = simple_parse_node(priv, cpu, li, prefix, &single_cpu); if (ret < 0) goto dai_link_of_err; - ret = asoc_simple_parse_dai(codec, codecs, NULL); + ret = simple_parse_node(priv, codec, li, prefix, NULL); if (ret < 0) goto dai_link_of_err; @@ -276,22 +289,6 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, if (ret < 0) goto dai_link_of_err; - ret = asoc_simple_parse_tdm(cpu, cpu_dai); - if (ret < 0) - goto dai_link_of_err; - - ret = asoc_simple_parse_tdm(codec, codec_dai); - if (ret < 0) - goto dai_link_of_err; - - ret = asoc_simple_parse_clk(dev, cpu, cpu_dai, cpus); - if (ret < 0) - goto dai_link_of_err; - - ret = asoc_simple_parse_clk(dev, codec, codec_dai, codecs); - if (ret < 0) - goto dai_link_of_err; - ret = asoc_simple_set_dailink_name(dev, dai_link, "%s-%s", cpus->dai_name, @@ -309,6 +306,8 @@ dai_link_of_err: of_node_put(plat); of_node_put(node); + li->link++; + return ret; } From 6ad76b573bb63ef229cf60386cc38c6e7c7625d7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 11 May 2021 10:17:47 +0900 Subject: [PATCH 080/730] ASoC: simple-card: add simple_link_init() Original commit 434392271afcff350fe ("ASoC: simple-card: add simple_link_init()") are rejected, and this is remake version of it. This patch adds simple_link_init() and share dai_link setting code. Reported-by: "kernelci.org bot" Fixes: 25c4a9b614f101bb9f3 ("ASoC: simple-card: Fix breakage on kontron-sl28-var3-ads2") Fixes: 434392271afcff350fe ("ASoC: simple-card: add simple_link_init()") Signed-off-by: Kuninori Morimoto Tested-by: Michael Walle Link: https://lore.kernel.org/r/87fsyuax1g.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card.c | 61 ++++++++++++++++----------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index 57ab89be1b4b..0015f534d42d 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -149,6 +149,27 @@ static int simple_parse_node(struct asoc_simple_priv *priv, return 0; } +static int simple_link_init(struct asoc_simple_priv *priv, + struct device_node *node, + struct device_node *codec, + struct link_info *li, + char *prefix, char *name) +{ + struct device *dev = simple_priv_to_dev(priv); + struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); + int ret; + + ret = asoc_simple_parse_daifmt(dev, node, codec, + prefix, &dai_link->dai_fmt); + if (ret < 0) + return 0; + + dai_link->init = asoc_simple_dai_init; + dai_link->ops = &simple_ops; + + return asoc_simple_set_dailink_name(dev, dai_link, name); +} + static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, struct device_node *np, struct device_node *codec, @@ -161,6 +182,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, struct device_node *top = dev->of_node; struct device_node *node = of_get_parent(np); char *prefix = ""; + char dai_name[64]; int ret; dev_dbg(dev, "link_of DPCM (%pOF)\n", np); @@ -184,11 +206,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, if (ret < 0) goto out_put_node; - ret = asoc_simple_set_dailink_name(dev, dai_link, - "fe.%s", - cpus->dai_name); - if (ret < 0) - goto out_put_node; + snprintf(dai_name, sizeof(dai_name), "fe.%s", cpus->dai_name); asoc_simple_canonicalize_cpu(cpus, is_single_links); asoc_simple_canonicalize_platform(platforms, cpus); @@ -208,11 +226,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, if (ret < 0) goto out_put_node; - ret = asoc_simple_set_dailink_name(dev, dai_link, - "be.%s", - codecs->dai_name); - if (ret < 0) - goto out_put_node; + snprintf(dai_name, sizeof(dai_name), "be.%s", codecs->dai_name); /* check "prefix" from top node */ snd_soc_of_parse_node_prefix(top, cconf, codecs->of_node, @@ -225,15 +239,9 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, simple_parse_convert(dev, np, &dai_props->adata); - ret = asoc_simple_parse_daifmt(dev, node, codec, - prefix, &dai_link->dai_fmt); - if (ret < 0) - goto out_put_node; - snd_soc_dai_link_set_capabilities(dai_link); - dai_link->ops = &simple_ops; - dai_link->init = asoc_simple_dai_init; + ret = simple_link_init(priv, node, codec, li, prefix, dai_name); out_put_node: li->link++; @@ -256,6 +264,7 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, struct device_node *cpu = NULL; struct device_node *node = NULL; struct device_node *plat = NULL; + char dai_name[64]; char prop[128]; char *prefix = ""; int ret, single_cpu = 0; @@ -272,11 +281,6 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, snprintf(prop, sizeof(prop), "%splat", prefix); plat = of_get_child_by_name(node, prop); - ret = asoc_simple_parse_daifmt(dev, node, codec, - prefix, &dai_link->dai_fmt); - if (ret < 0) - goto dai_link_of_err; - ret = simple_parse_node(priv, cpu, li, prefix, &single_cpu); if (ret < 0) goto dai_link_of_err; @@ -289,19 +293,14 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, if (ret < 0) goto dai_link_of_err; - ret = asoc_simple_set_dailink_name(dev, dai_link, - "%s-%s", - cpus->dai_name, - codecs->dai_name); - if (ret < 0) - goto dai_link_of_err; - - dai_link->ops = &simple_ops; - dai_link->init = asoc_simple_dai_init; + snprintf(dai_name, sizeof(dai_name), + "%s-%s", cpus->dai_name, codecs->dai_name); asoc_simple_canonicalize_cpu(cpus, single_cpu); asoc_simple_canonicalize_platform(platforms, cpus); + ret = simple_link_init(priv, node, codec, li, prefix, dai_name); + dai_link_of_err: of_node_put(plat); of_node_put(node); From 28c268d3acdd4cbcd2ac320b85609e77f84e74a7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 8 May 2021 17:01:45 +0200 Subject: [PATCH 081/730] ASoC: Intel: bytcr_rt5640: Add quirk for the Glavey TM800A550L tablet Add a quirk for the Glavey TM800A550L tablet, this BYTCR tablet has no CHAN package in its ACPI tables and uses SSP0-AIF1 rather then SSP0-AIF2 which is the default for BYTCR devices. Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210508150146.28403-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index df2f5d55e8ff..b42fa292d408 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -574,6 +574,17 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF1 | BYT_RT5640_MCLK_EN), }, + { /* Glavey TM800A550L */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"), + /* Above strings are too generic, also match on BIOS version */ + DMI_MATCH(DMI_BIOS_VERSION, "ZY-8-BI-PX4S70VTR400-X423B-005-D"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), From f0353e1f53f92f7b3da91e6669f5d58ee222ebe8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 8 May 2021 17:01:46 +0200 Subject: [PATCH 082/730] ASoC: Intel: bytcr_rt5640: Add quirk for the Lenovo Miix 3-830 tablet The Lenovo Miix 3-830 tablet has only 1 speaker, has an internal analog mic on IN1 and uses JD2 for jack-detect, add a quirk to automatically apply these settings on Lenovo Miix 3-830 tablets. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210508150146.28403-2-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index b42fa292d408..22dbd9d93c1e 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -663,6 +663,20 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_MONO_SPEAKER | BYT_RT5640_MCLK_EN), }, + { /* Lenovo Miix 3-830 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 3-830"), + }, + .driver_data = (void *)(BYT_RT5640_IN1_MAP | + BYT_RT5640_JD_SRC_JD2_IN4N | + BYT_RT5640_OVCD_TH_2000UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_MONO_SPEAKER | + BYT_RT5640_DIFF_MIC | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { /* Linx Linx7 tablet */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LINX"), From f8090ffc91ffd788a73d4e6b5ca3107c94d9ec27 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 11 May 2021 10:17:57 +0900 Subject: [PATCH 083/730] ASoC: audio-graph: tidyup graph_dai_link_of_dpcm() Use local variable at local area only. Signed-off-by: Kuninori Morimoto Tested-by: Michael Walle Link: https://lore.kernel.org/r/87eeeeax16.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/generic/audio-graph-card.c | 30 +++++++++++++--------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c index 2c8a2fcb7922..0159a4576e9c 100644 --- a/sound/soc/generic/audio-graph-card.c +++ b/sound/soc/generic/audio-graph-card.c @@ -276,24 +276,19 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, struct link_info *li) { struct device *dev = simple_priv_to_dev(priv); - struct snd_soc_card *card = simple_priv_to_card(priv); struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); struct device_node *top = dev->of_node; struct device_node *ep = li->cpu ? cpu_ep : codec_ep; - struct device_node *port; - struct device_node *ports; - struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); - struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); char dai_name[64]; int ret; - port = of_get_parent(ep); - ports = of_get_parent(port); - dev_dbg(dev, "link_of DPCM (%pOF)\n", ep); if (li->cpu) { + struct snd_soc_card *card = simple_priv_to_card(priv); + struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); + /* Codec is dummy */ /* FE settings */ @@ -302,7 +297,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, ret = graph_parse_node(priv, cpu_ep, li, 1); if (ret) - goto out_put_node; + return ret; snprintf(dai_name, sizeof(dai_name), "fe.%pOFP.%s", cpus->of_node, cpus->dai_name); @@ -319,7 +314,10 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, if (card->component_chaining && !soc_component_is_pcm(cpus)) dai_link->no_pcm = 1; } else { - struct snd_soc_codec_conf *cconf; + struct snd_soc_codec_conf *cconf = simple_props_to_codec_conf(dai_props, 0); + struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); + struct device_node *port; + struct device_node *ports; /* CPU is dummy */ @@ -327,22 +325,25 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, dai_link->no_pcm = 1; dai_link->be_hw_params_fixup = asoc_simple_be_hw_params_fixup; - cconf = simple_props_to_codec_conf(dai_props, 0); - ret = graph_parse_node(priv, codec_ep, li, 0); if (ret < 0) - goto out_put_node; + return ret; snprintf(dai_name, sizeof(dai_name), "be.%pOFP.%s", codecs->of_node, codecs->dai_name); /* check "prefix" from top node */ + port = of_get_parent(ep); + ports = of_get_parent(port); snd_soc_of_parse_node_prefix(top, cconf, codecs->of_node, "prefix"); if (of_node_name_eq(ports, "ports")) snd_soc_of_parse_node_prefix(ports, cconf, codecs->of_node, "prefix"); snd_soc_of_parse_node_prefix(port, cconf, codecs->of_node, "prefix"); + + of_node_put(ports); + of_node_put(port); } graph_parse_convert(dev, ep, &dai_props->adata); @@ -351,11 +352,8 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, ret = graph_link_init(priv, cpu_ep, codec_ep, li, dai_name); -out_put_node: li->link++; - of_node_put(ports); - of_node_put(port); return ret; } From 582f3503f96543f3afbaaaa085755fd167a0f71e Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 11 May 2021 10:18:48 +0900 Subject: [PATCH 084/730] ASoC: audio-graph: tidyup graph_parse_node() audio-graph is using cpus->dai_name / codecs->dai_name for dailink->name. In graph_parse_node(), xxx->dai_name is got by snd_soc_get_dai_name(), but it might be removed soon by asoc_simple_canonicalize_cpu(). The order should be *1) call snd_soc_get_dai_name() 2) create dailink name *3) call asoc_simple_canonicalize_cpu() * are implemented in graph_parse_node(). This patch remove 3) from graph_parse_node() Reported-by: "kernelci.org bot" Fixes: 8859f809c7d5813 ("ASoC: audio-graph: add graph_parse_node()") Fixes: e51237b8d305225 ("ASoC: audio-graph: add graph_link_init()") Signed-off-by: Kuninori Morimoto Tested-by: Michael Walle Link: https://lore.kernel.org/r/87cztyawzr.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/generic/audio-graph-card.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c index 0159a4576e9c..5e71382467e8 100644 --- a/sound/soc/generic/audio-graph-card.c +++ b/sound/soc/generic/audio-graph-card.c @@ -209,7 +209,7 @@ static void graph_parse_mclk_fs(struct device_node *top, static int graph_parse_node(struct asoc_simple_priv *priv, struct device_node *ep, struct link_info *li, - int is_cpu) + int *cpu) { struct device *dev = simple_priv_to_dev(priv); struct device_node *top = dev->of_node; @@ -217,9 +217,9 @@ static int graph_parse_node(struct asoc_simple_priv *priv, struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); struct snd_soc_dai_link_component *dlc; struct asoc_simple_dai *dai; - int ret, single = 0; + int ret; - if (is_cpu) { + if (cpu) { dlc = asoc_link_to_cpu(dai_link, 0); dai = simple_props_to_dai_cpu(dai_props, 0); } else { @@ -229,7 +229,7 @@ static int graph_parse_node(struct asoc_simple_priv *priv, graph_parse_mclk_fs(top, ep, dai_props); - ret = asoc_simple_parse_dai(ep, dlc, &single); + ret = asoc_simple_parse_dai(ep, dlc, cpu); if (ret < 0) return ret; @@ -241,9 +241,6 @@ static int graph_parse_node(struct asoc_simple_priv *priv, if (ret < 0) return ret; - if (is_cpu) - asoc_simple_canonicalize_cpu(dlc, single); - return 0; } @@ -288,6 +285,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, if (li->cpu) { struct snd_soc_card *card = simple_priv_to_card(priv); struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); + int is_single_links = 0; /* Codec is dummy */ @@ -295,7 +293,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, dai_link->dynamic = 1; dai_link->dpcm_merged_format = 1; - ret = graph_parse_node(priv, cpu_ep, li, 1); + ret = graph_parse_node(priv, cpu_ep, li, &is_single_links); if (ret) return ret; @@ -313,6 +311,8 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, */ if (card->component_chaining && !soc_component_is_pcm(cpus)) dai_link->no_pcm = 1; + + asoc_simple_canonicalize_cpu(cpus, is_single_links); } else { struct snd_soc_codec_conf *cconf = simple_props_to_codec_conf(dai_props, 0); struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); @@ -325,7 +325,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, dai_link->no_pcm = 1; dai_link->be_hw_params_fixup = asoc_simple_be_hw_params_fixup; - ret = graph_parse_node(priv, codec_ep, li, 0); + ret = graph_parse_node(priv, codec_ep, li, NULL); if (ret < 0) return ret; @@ -367,20 +367,23 @@ static int graph_dai_link_of(struct asoc_simple_priv *priv, struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); char dai_name[64]; - int ret; + int ret, is_single_links = 0; dev_dbg(dev, "link_of (%pOF)\n", cpu_ep); - ret = graph_parse_node(priv, cpu_ep, li, 1); + ret = graph_parse_node(priv, cpu_ep, li, &is_single_links); if (ret < 0) return ret; - ret = graph_parse_node(priv, codec_ep, li, 0); + ret = graph_parse_node(priv, codec_ep, li, NULL); if (ret < 0) return ret; snprintf(dai_name, sizeof(dai_name), "%s-%s", cpus->dai_name, codecs->dai_name); + + asoc_simple_canonicalize_cpu(cpus, is_single_links); + ret = graph_link_init(priv, cpu_ep, codec_ep, li, dai_name); if (ret < 0) return ret; From 0fad605fb0bdc00d8ad78696300ff2fbdee6e048 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 11 May 2021 14:28:55 +0100 Subject: [PATCH 085/730] ASoC: cs42l42: Regmap must use_single_read/write cs42l42 does not support standard burst transfers so the use_single_read and use_single_write flags must be set in the regmap config. Because of this bug, the patch: commit 0a0eb567e1d4 ("ASoC: cs42l42: Minor error paths fixups") broke cs42l42 probe() because without the use_single_* flags it causes regmap to issue a burst read. However, the missing use_single_* could cause problems anyway because the regmap cache can attempt burst transfers if these flags are not set. Fixes: 2c394ca79604 ("ASoC: Add support for CS42L42 codec") Signed-off-by: Richard Fitzgerald Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20210511132855.27159-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index bf982e145e94..77473c226f9e 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -399,6 +399,9 @@ static const struct regmap_config cs42l42_regmap = { .reg_defaults = cs42l42_reg_defaults, .num_reg_defaults = ARRAY_SIZE(cs42l42_reg_defaults), .cache_type = REGCACHE_RBTREE, + + .use_single_read = true, + .use_single_write = true, }; static DECLARE_TLV_DB_SCALE(adc_tlv, -9600, 100, false); From 965a7d72e798eb7af0aa67210e37cf7ecd1c9cad Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Tue, 11 May 2021 20:02:42 +0200 Subject: [PATCH 086/730] mac80211: assure all fragments are encrypted Do not mix plaintext and encrypted fragments in protected Wi-Fi networks. This fixes CVE-2020-26147. Previously, an attacker was able to first forward a legitimate encrypted fragment towards a victim, followed by a plaintext fragment. The encrypted and plaintext fragment would then be reassembled. For further details see Section 6.3 and Appendix D in the paper "Fragment and Forge: Breaking Wi-Fi Through Frame Aggregation and Fragmentation". Because of this change there are now two equivalent conditions in the code to determine if a received fragment requires sequential PNs, so we also move this test to a separate function to make the code easier to maintain. Cc: stable@vger.kernel.org Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20210511200110.30c4394bb835.I5acfdb552cc1d20c339c262315950b3eac491397@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 62047e93e217..65fc674e27cc 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2194,6 +2194,16 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, return NULL; } +static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc) +{ + return rx->key && + (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP || + rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 || + rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP || + rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) && + ieee80211_has_protected(fc); +} + static ieee80211_rx_result debug_noinline ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) { @@ -2238,12 +2248,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) /* This is the first fragment of a new frame. */ entry = ieee80211_reassemble_add(rx->sdata, frag, seq, rx->seqno_idx, &(rx->skb)); - if (rx->key && - (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP || - rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 || - rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP || - rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) && - ieee80211_has_protected(fc)) { + if (requires_sequential_pn(rx, fc)) { int queue = rx->security_idx; /* Store CCMP/GCMP PN so that we can verify that the @@ -2285,11 +2290,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; int queue; - if (!rx->key || - (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP && - rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 && - rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP && - rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256)) + if (!requires_sequential_pn(rx, fc)) return RX_DROP_UNUSABLE; memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { From 94034c40ab4a3fcf581fbc7f8fdf4e29943c4a24 Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Tue, 11 May 2021 20:02:43 +0200 Subject: [PATCH 087/730] mac80211: prevent mixed key and fragment cache attacks Simultaneously prevent mixed key attacks (CVE-2020-24587) and fragment cache attacks (CVE-2020-24586). This is accomplished by assigning a unique color to every key (per interface) and using this to track which key was used to decrypt a fragment. When reassembling frames, it is now checked whether all fragments were decrypted using the same key. To assure that fragment cache attacks are also prevented, the ID that is assigned to keys is unique even over (re)associations and (re)connects. This means fragments separated by a (re)association or (re)connect will not be reassembled. Because mac80211 now also prevents the reassembly of mixed encrypted and plaintext fragments, all cache attacks are prevented. Cc: stable@vger.kernel.org Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20210511200110.3f8290e59823.I622a67769ed39257327a362cfc09c812320eb979@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/key.c | 7 +++++++ net/mac80211/key.h | 2 ++ net/mac80211/rx.c | 6 ++++++ 4 files changed, 16 insertions(+) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8fcbaa1eedf3..874ffe7819e5 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -97,6 +97,7 @@ struct ieee80211_fragment_entry { u8 rx_queue; bool check_sequential_pn; /* needed for CCMP/GCMP */ u8 last_pn[6]; /* PN of the last fragment if CCMP was used */ + unsigned int key_color; }; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 56c068cb49c4..f695fc80088b 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -799,6 +799,7 @@ int ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { + static atomic_t key_color = ATOMIC_INIT(0); struct ieee80211_key *old_key; int idx = key->conf.keyidx; bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; @@ -850,6 +851,12 @@ int ieee80211_key_link(struct ieee80211_key *key, key->sdata = sdata; key->sta = sta; + /* + * Assign a unique ID to every key so we can easily prevent mixed + * key and fragment cache attacks. + */ + key->color = atomic_inc_return(&key_color); + increment_tailroom_need_count(sdata); ret = ieee80211_key_replace(sdata, sta, pairwise, old_key, key); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 7ad72e9b4991..1e326c89d721 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -128,6 +128,8 @@ struct ieee80211_key { } debugfs; #endif + unsigned int color; + /* * key config, must be last because it contains key * material as variable length member diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 65fc674e27cc..531232b91bc4 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2255,6 +2255,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) * next fragment has a sequential PN value. */ entry->check_sequential_pn = true; + entry->key_color = rx->key->color; memcpy(entry->last_pn, rx->key->u.ccmp.rx_pn[queue], IEEE80211_CCMP_PN_LEN); @@ -2292,6 +2293,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (!requires_sequential_pn(rx, fc)) return RX_DROP_UNUSABLE; + + /* Prevent mixed key and fragment cache attacks */ + if (entry->key_color != rx->key->color) + return RX_DROP_UNUSABLE; + memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { pn[i]++; From a1d5ff5651ea592c67054233b14b30bf4452999c Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Tue, 11 May 2021 20:02:44 +0200 Subject: [PATCH 088/730] mac80211: properly handle A-MSDUs that start with an RFC 1042 header Properly parse A-MSDUs whose first 6 bytes happen to equal a rfc1042 header. This can occur in practice when the destination MAC address equals AA:AA:03:00:00:00. More importantly, this simplifies the next patch to mitigate A-MSDU injection attacks. Cc: stable@vger.kernel.org Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20210511200110.0b2b886492f0.I23dd5d685fe16d3b0ec8106e8f01b59f499dffed@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++-- net/mac80211/rx.c | 2 +- net/wireless/util.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5224f885a99a..58c2cd417e89 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5760,7 +5760,7 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); */ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, const u8 *addr, enum nl80211_iftype iftype, - u8 data_offset); + u8 data_offset, bool is_amsdu); /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 @@ -5772,7 +5772,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, enum nl80211_iftype iftype) { - return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0); + return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0, false); } /** diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 531232b91bc4..f14d32a5001d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2682,7 +2682,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset) if (ieee80211_data_to_8023_exthdr(skb, ðhdr, rx->sdata->vif.addr, rx->sdata->vif.type, - data_offset)) + data_offset, true)) return RX_DROP_UNUSABLE; ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, diff --git a/net/wireless/util.c b/net/wireless/util.c index 382c5262d997..39966a873e40 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -542,7 +542,7 @@ EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen); int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, const u8 *addr, enum nl80211_iftype iftype, - u8 data_offset) + u8 data_offset, bool is_amsdu) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct { @@ -629,7 +629,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, skb_copy_bits(skb, hdrlen, &payload, sizeof(payload)); tmp.h_proto = payload.proto; - if (likely((ether_addr_equal(payload.hdr, rfc1042_header) && + if (likely((!is_amsdu && ether_addr_equal(payload.hdr, rfc1042_header) && tmp.h_proto != htons(ETH_P_AARP) && tmp.h_proto != htons(ETH_P_IPX)) || ether_addr_equal(payload.hdr, bridge_tunnel_header))) From 2b8a1fee3488c602aca8bea004a087e60806a5cf Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Tue, 11 May 2021 20:02:45 +0200 Subject: [PATCH 089/730] cfg80211: mitigate A-MSDU aggregation attacks Mitigate A-MSDU injection attacks (CVE-2020-24588) by detecting if the destination address of a subframe equals an RFC1042 (i.e., LLC/SNAP) header, and if so dropping the complete A-MSDU frame. This mitigates known attacks, although new (unknown) aggregation-based attacks may remain possible. This defense works because in A-MSDU aggregation injection attacks, a normal encrypted Wi-Fi frame is turned into an A-MSDU frame. This means the first 6 bytes of the first A-MSDU subframe correspond to an RFC1042 header. In other words, the destination MAC address of the first A-MSDU subframe contains the start of an RFC1042 header during an aggregation attack. We can detect this and thereby prevent this specific attack. For details, see Section 7.2 of "Fragment and Forge: Breaking Wi-Fi Through Frame Aggregation and Fragmentation". Note that for kernel 4.9 and above this patch depends on "mac80211: properly handle A-MSDUs that start with a rfc1042 header". Otherwise this patch has no impact and attacks will remain possible. Cc: stable@vger.kernel.org Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20210511200110.25d93176ddaf.I9e265b597f2cd23eb44573f35b625947b386a9de@changeid Signed-off-by: Johannes Berg --- net/wireless/util.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/wireless/util.c b/net/wireless/util.c index 39966a873e40..7ec021a610ae 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -771,6 +771,9 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, remaining = skb->len - offset; if (subframe_len > remaining) goto purge; + /* mitigate A-MSDU aggregation injection attacks */ + if (ether_addr_equal(eth.h_dest, rfc1042_header)) + goto purge; offset += sizeof(struct ethhdr); last = remaining <= subframe_len + padding; From 270032a2a9c4535799736142e1e7c413ca7b836e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 May 2021 20:02:46 +0200 Subject: [PATCH 090/730] mac80211: drop A-MSDUs on old ciphers With old ciphers (WEP and TKIP) we shouldn't be using A-MSDUs since A-MSDUs are only supported if we know that they are, and the only practical way for that is HT support which doesn't support old ciphers. However, we would normally accept them anyway. Since we check the MMIC before deaggregating A-MSDUs, and the A-MSDU bit in the QoS header is not protected in TKIP (or WEP), this enables attacks similar to CVE-2020-24588. To prevent that, drop A-MSDUs completely with old ciphers. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210511200110.076543300172.I548e6e71f1ee9cad4b9a37bf212ae7db723587aa@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f14d32a5001d..8a72d48ad6e0 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -6,7 +6,7 @@ * Copyright 2007-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include @@ -2739,6 +2739,23 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) if (is_multicast_ether_addr(hdr->addr1)) return RX_DROP_UNUSABLE; + if (rx->key) { + /* + * We should not receive A-MSDUs on pre-HT connections, + * and HT connections cannot use old ciphers. Thus drop + * them, as in those cases we couldn't even have SPP + * A-MSDUs or such. + */ + switch (rx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + case WLAN_CIPHER_SUITE_TKIP: + return RX_DROP_UNUSABLE; + default: + break; + } + } + return __ieee80211_rx_h_amsdu(rx, 0); } From 3a11ce08c45b50d69c891d71760b7c5b92074709 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 May 2021 20:02:47 +0200 Subject: [PATCH 091/730] mac80211: add fragment cache to sta_info Prior patches protected against fragmentation cache attacks by coloring keys, but this shows that it can lead to issues when multiple stations use the same sequence number. Add a fragment cache to struct sta_info (in addition to the one in the interface) to separate fragments for different stations properly. This then automatically clear most of the fragment cache when a station disconnects (or reassociates) from an AP, or when client interfaces disconnect from the network, etc. On the way, also fix the comment there since this brings us in line with the recommendation in 802.11-2016 ("An AP should support ..."). Additionally, remove a useless condition (since there's no problem purging an already empty list). Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210511200110.fc35046b0d52.I1ef101e3784d13e8f6600d83de7ec9a3a45bcd52@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 26 ++++-------------------- net/mac80211/iface.c | 11 +++------- net/mac80211/rx.c | 41 ++++++++++++++++++++++++++++---------- net/mac80211/sta_info.c | 6 +++++- net/mac80211/sta_info.h | 32 ++++++++++++++++++++++++++++- 5 files changed, 73 insertions(+), 43 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 874ffe7819e5..4c714375bad0 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -50,12 +50,6 @@ struct ieee80211_local; #define IEEE80211_ENCRYPT_HEADROOM 8 #define IEEE80211_ENCRYPT_TAILROOM 18 -/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent - * reception of at least three fragmented frames. This limit can be increased - * by changing this define, at the cost of slower frame reassembly and - * increased memory use (about 2 kB of RAM per entry). */ -#define IEEE80211_FRAGMENT_MAX 4 - /* power level hasn't been configured (or set to automatic) */ #define IEEE80211_UNSET_POWER_LEVEL INT_MIN @@ -88,19 +82,6 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS]; #define IEEE80211_MAX_NAN_INSTANCE_ID 255 -struct ieee80211_fragment_entry { - struct sk_buff_head skb_list; - unsigned long first_frag_time; - u16 seq; - u16 extra_len; - u16 last_frag; - u8 rx_queue; - bool check_sequential_pn; /* needed for CCMP/GCMP */ - u8 last_pn[6]; /* PN of the last fragment if CCMP was used */ - unsigned int key_color; -}; - - struct ieee80211_bss { u32 device_ts_beacon, device_ts_presp; @@ -903,9 +884,7 @@ struct ieee80211_sub_if_data { char name[IFNAMSIZ]; - /* Fragment table for host-based reassembly */ - struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; - unsigned int fragment_next; + struct ieee80211_fragment_cache frags; /* TID bitmap for NoAck policy */ u16 noack_map; @@ -2321,4 +2300,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, #define debug_noinline #endif +void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache); +void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache); + #endif /* IEEE80211_I_H */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7032a2b59249..2e2f73a4aa73 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -8,7 +8,7 @@ * Copyright 2008, Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include #include @@ -677,16 +677,12 @@ static void ieee80211_set_multicast_list(struct net_device *dev) */ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) { - int i; - /* free extra data */ ieee80211_free_keys(sdata, false); ieee80211_debugfs_remove_netdev(sdata); - for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) - __skb_queue_purge(&sdata->fragments[i].skb_list); - sdata->fragment_next = 0; + ieee80211_destroy_frag_cache(&sdata->frags); if (ieee80211_vif_is_mesh(&sdata->vif)) ieee80211_mesh_teardown_sdata(sdata); @@ -1930,8 +1926,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sdata->wdev.wiphy = local->hw.wiphy; sdata->local = local; - for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) - skb_queue_head_init(&sdata->fragments[i].skb_list); + ieee80211_init_frag_cache(&sdata->frags); INIT_LIST_HEAD(&sdata->key_list); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8a72d48ad6e0..7212a1bebd0c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2123,19 +2123,34 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return result; } +void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cache->entries); i++) + skb_queue_head_init(&cache->entries[i].skb_list); +} + +void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cache->entries); i++) + __skb_queue_purge(&cache->entries[i].skb_list); +} + static inline struct ieee80211_fragment_entry * -ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, +ieee80211_reassemble_add(struct ieee80211_fragment_cache *cache, unsigned int frag, unsigned int seq, int rx_queue, struct sk_buff **skb) { struct ieee80211_fragment_entry *entry; - entry = &sdata->fragments[sdata->fragment_next++]; - if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX) - sdata->fragment_next = 0; + entry = &cache->entries[cache->next++]; + if (cache->next >= IEEE80211_FRAGMENT_MAX) + cache->next = 0; - if (!skb_queue_empty(&entry->skb_list)) - __skb_queue_purge(&entry->skb_list); + __skb_queue_purge(&entry->skb_list); __skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */ *skb = NULL; @@ -2150,14 +2165,14 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, } static inline struct ieee80211_fragment_entry * -ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, +ieee80211_reassemble_find(struct ieee80211_fragment_cache *cache, unsigned int frag, unsigned int seq, int rx_queue, struct ieee80211_hdr *hdr) { struct ieee80211_fragment_entry *entry; int i, idx; - idx = sdata->fragment_next; + idx = cache->next; for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) { struct ieee80211_hdr *f_hdr; struct sk_buff *f_skb; @@ -2166,7 +2181,7 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, if (idx < 0) idx = IEEE80211_FRAGMENT_MAX - 1; - entry = &sdata->fragments[idx]; + entry = &cache->entries[idx]; if (skb_queue_empty(&entry->skb_list) || entry->seq != seq || entry->rx_queue != rx_queue || entry->last_frag + 1 != frag) @@ -2207,6 +2222,7 @@ static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc) static ieee80211_rx_result debug_noinline ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) { + struct ieee80211_fragment_cache *cache = &rx->sdata->frags; struct ieee80211_hdr *hdr; u16 sc; __le16 fc; @@ -2228,6 +2244,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) goto out_no_led; } + if (rx->sta) + cache = &rx->sta->frags; + if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) goto out; @@ -2246,7 +2265,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (frag == 0) { /* This is the first fragment of a new frame. */ - entry = ieee80211_reassemble_add(rx->sdata, frag, seq, + entry = ieee80211_reassemble_add(cache, frag, seq, rx->seqno_idx, &(rx->skb)); if (requires_sequential_pn(rx, fc)) { int queue = rx->security_idx; @@ -2274,7 +2293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) /* This is a fragment for a frame that should already be pending in * fragment cache. Add this fragment to the end of the pending entry. */ - entry = ieee80211_reassemble_find(rx->sdata, frag, seq, + entry = ieee80211_reassemble_find(cache, frag, seq, rx->seqno_idx, hdr); if (!entry) { I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ec6973ee88ef..f2fb69da9b6e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include @@ -392,6 +392,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, u64_stats_init(&sta->rx_stats.syncp); + ieee80211_init_frag_cache(&sta->frags); + sta->sta_state = IEEE80211_STA_NONE; /* Mark TID as unreserved */ @@ -1102,6 +1104,8 @@ static void __sta_info_destroy_part2(struct sta_info *sta) ieee80211_sta_debugfs_remove(sta); + ieee80211_destroy_frag_cache(&sta->frags); + cleanup_single_sta(sta); } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 78b9d0c7cc58..5c56d29a619e 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -3,7 +3,7 @@ * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright(c) 2020 Intel Corporation + * Copyright(c) 2020-2021 Intel Corporation */ #ifndef STA_INFO_H @@ -438,6 +438,33 @@ struct ieee80211_sta_rx_stats { u64 msdu[IEEE80211_NUM_TIDS + 1]; }; +/* + * IEEE 802.11-2016 (10.6 "Defragmentation") recommends support for "concurrent + * reception of at least one MSDU per access category per associated STA" + * on APs, or "at least one MSDU per access category" on other interface types. + * + * This limit can be increased by changing this define, at the cost of slower + * frame reassembly and increased memory use while fragments are pending. + */ +#define IEEE80211_FRAGMENT_MAX 4 + +struct ieee80211_fragment_entry { + struct sk_buff_head skb_list; + unsigned long first_frag_time; + u16 seq; + u16 extra_len; + u16 last_frag; + u8 rx_queue; + bool check_sequential_pn; /* needed for CCMP/GCMP */ + u8 last_pn[6]; /* PN of the last fragment if CCMP was used */ + unsigned int key_color; +}; + +struct ieee80211_fragment_cache { + struct ieee80211_fragment_entry entries[IEEE80211_FRAGMENT_MAX]; + unsigned int next; +}; + /* * The bandwidth threshold below which the per-station CoDel parameters will be * scaled to be more lenient (to prevent starvation of slow stations). This @@ -531,6 +558,7 @@ struct ieee80211_sta_rx_stats { * @status_stats.last_ack_signal: last ACK signal * @status_stats.ack_signal_filled: last ACK signal validity * @status_stats.avg_ack_signal: average ACK signal + * @frags: fragment cache */ struct sta_info { /* General information, mostly static */ @@ -639,6 +667,8 @@ struct sta_info { struct cfg80211_chan_def tdls_chandef; + struct ieee80211_fragment_cache frags; + /* keep last! */ struct ieee80211_sta sta; }; From bf30ca922a0c0176007e074b0acc77ed345e9990 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 May 2021 20:02:48 +0200 Subject: [PATCH 092/730] mac80211: check defrag PN against current frame As pointed out by Mathy Vanhoef, we implement the RX PN check on fragmented frames incorrectly - we check against the last received PN prior to the new frame, rather than to the one in this frame itself. Prior patches addressed the security issue here, but in order to be able to reason better about the code, fix it to really compare against the current frame's PN, not the last stored one. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210511200110.bfbc340ff071.Id0b690e581da7d03d76df90bb0e3fd55930bc8a0@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 11 +++++++++-- net/mac80211/rx.c | 5 ++--- net/mac80211/wpa.c | 13 +++++++++---- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4c714375bad0..214404a558fb 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -223,8 +223,15 @@ struct ieee80211_rx_data { */ int security_idx; - u32 tkip_iv32; - u16 tkip_iv16; + union { + struct { + u32 iv32; + u16 iv16; + } tkip; + struct { + u8 pn[IEEE80211_CCMP_PN_LEN]; + } ccm_gcm; + }; }; struct ieee80211_csa_settings { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7212a1bebd0c..b619c47e1d12 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2308,7 +2308,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (entry->check_sequential_pn) { int i; u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; - int queue; if (!requires_sequential_pn(rx, fc)) return RX_DROP_UNUSABLE; @@ -2323,8 +2322,8 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (pn[i]) break; } - queue = rx->security_idx; - rpn = rx->key->u.ccmp.rx_pn[queue]; + + rpn = rx->ccm_gcm.pn; if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN)) return RX_DROP_UNUSABLE; memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN); diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 91bf32af55e9..bca47fad5a16 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -3,6 +3,7 @@ * Copyright 2002-2004, Instant802 Networks, Inc. * Copyright 2008, Jouni Malinen * Copyright (C) 2016-2017 Intel Deutschland GmbH + * Copyright (C) 2020-2021 Intel Corporation */ #include @@ -167,8 +168,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) update_iv: /* update IV in key information to be able to detect replays */ - rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip_iv32; - rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip_iv16; + rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip.iv32; + rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip.iv16; return RX_CONTINUE; @@ -294,8 +295,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) key, skb->data + hdrlen, skb->len - hdrlen, rx->sta->sta.addr, hdr->addr1, hwaccel, rx->security_idx, - &rx->tkip_iv32, - &rx->tkip_iv16); + &rx->tkip.iv32, + &rx->tkip.iv16); if (res != TKIP_DECRYPT_OK) return RX_DROP_UNUSABLE; @@ -553,6 +554,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, } memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN); + if (unlikely(ieee80211_is_frag(hdr))) + memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN); } /* Remove CCMP header and MIC */ @@ -781,6 +784,8 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) } memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN); + if (unlikely(ieee80211_is_frag(hdr))) + memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN); } /* Remove GCMP header and MIC */ From 7e44a0b597f04e67eee8cdcbe7ee706c6f5de38b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 May 2021 20:02:49 +0200 Subject: [PATCH 093/730] mac80211: prevent attacks on TKIP/WEP as well Similar to the issues fixed in previous patches, TKIP and WEP should be protected even if for TKIP we have the Michael MIC protecting it, and WEP is broken anyway. However, this also somewhat protects potential other algorithms that drivers might implement. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210511200110.430e8c202313.Ia37e4e5b6b3eaab1a5ae050e015f6c92859dbe27@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 12 ++++++++++++ net/mac80211/sta_info.h | 3 ++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b619c47e1d12..4454ec47283f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2274,6 +2274,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) * next fragment has a sequential PN value. */ entry->check_sequential_pn = true; + entry->is_protected = true; entry->key_color = rx->key->color; memcpy(entry->last_pn, rx->key->u.ccmp.rx_pn[queue], @@ -2286,6 +2287,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sizeof(rx->key->u.gcmp.rx_pn[queue])); BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN != IEEE80211_GCMP_PN_LEN); + } else if (rx->key && ieee80211_has_protected(fc)) { + entry->is_protected = true; + entry->key_color = rx->key->color; } return RX_QUEUED; } @@ -2327,6 +2331,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN)) return RX_DROP_UNUSABLE; memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN); + } else if (entry->is_protected && + (!rx->key || !ieee80211_has_protected(fc) || + rx->key->color != entry->key_color)) { + /* Drop this as a mixed key or fragment cache attack, even + * if for TKIP Michael MIC should protect us, and WEP is a + * lost cause anyway. + */ + return RX_DROP_UNUSABLE; } skb_pull(rx->skb, ieee80211_hdrlen(fc)); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 5c56d29a619e..0333072ebd98 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -455,7 +455,8 @@ struct ieee80211_fragment_entry { u16 extra_len; u16 last_frag; u8 rx_queue; - bool check_sequential_pn; /* needed for CCMP/GCMP */ + u8 check_sequential_pn:1, /* needed for CCMP/GCMP */ + is_protected:1; u8 last_pn[6]; /* PN of the last fragment if CCMP was used */ unsigned int key_color; }; From a8c4d76a8dd4fb9666fc8919a703d85fb8f44ed8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 May 2021 20:02:50 +0200 Subject: [PATCH 094/730] mac80211: do not accept/forward invalid EAPOL frames EAPOL frames are used for authentication and key management between the AP and each individual STA associated in the BSS. Those frames are not supposed to be sent by one associated STA to another associated STA (either unicast for broadcast/multicast). Similarly, in 802.11 they're supposed to be sent to the authenticator (AP) address. Since it is possible for unexpected EAPOL frames to result in misbehavior in supplicant implementations, it is better for the AP to not allow such cases to be forwarded to other clients either directly, or indirectly if the AP interface is part of a bridge. Accept EAPOL (control port) frames only if they're transmitted to the own address, or, due to interoperability concerns, to the PAE group address. Disable forwarding of EAPOL (or well, the configured control port protocol) frames back to wireless medium in all cases. Previously, these frames were accepted from fully authenticated and authorized stations and also from unauthenticated stations for one of the cases. Additionally, to avoid forwarding by the bridge, rewrite the PAE group address case to the local MAC address. Cc: stable@vger.kernel.org Co-developed-by: Jouni Malinen Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.cb327ed0cabe.Ib7dcffa2a31f0913d660de65ba3c8aca75b1d10f@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 4454ec47283f..22a925899a9e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2531,13 +2531,13 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc) struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; /* - * Allow EAPOL frames to us/the PAE group address regardless - * of whether the frame was encrypted or not. + * Allow EAPOL frames to us/the PAE group address regardless of + * whether the frame was encrypted or not, and always disallow + * all other destination addresses for them. */ - if (ehdr->h_proto == rx->sdata->control_port_protocol && - (ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) || - ether_addr_equal(ehdr->h_dest, pae_group_addr))) - return true; + if (unlikely(ehdr->h_proto == rx->sdata->control_port_protocol)) + return ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) || + ether_addr_equal(ehdr->h_dest, pae_group_addr); if (ieee80211_802_1x_port_control(rx) || ieee80211_drop_unencrypted(rx, fc)) @@ -2562,8 +2562,28 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, cfg80211_rx_control_port(dev, skb, noencrypt); dev_kfree_skb(skb); } else { + struct ethhdr *ehdr = (void *)skb_mac_header(skb); + memset(skb->cb, 0, sizeof(skb->cb)); + /* + * 802.1X over 802.11 requires that the authenticator address + * be used for EAPOL frames. However, 802.1X allows the use of + * the PAE group address instead. If the interface is part of + * a bridge and we pass the frame with the PAE group address, + * then the bridge will forward it to the network (even if the + * client was not associated yet), which isn't supposed to + * happen. + * To avoid that, rewrite the destination address to our own + * address, so that the authenticator (e.g. hostapd) will see + * the frame, but bridge won't forward it anywhere else. Note + * that due to earlier filtering, the only other address can + * be the PAE group address. + */ + if (unlikely(skb->protocol == sdata->control_port_protocol && + !ether_addr_equal(ehdr->h_dest, sdata->vif.addr))) + ether_addr_copy(ehdr->h_dest, sdata->vif.addr); + /* deliver to local stack */ if (rx->list) list_add_tail(&skb->list, rx->list); @@ -2603,6 +2623,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) if ((sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && + ehdr->h_proto != rx->sdata->control_port_protocol && (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { if (is_multicast_ether_addr(ehdr->h_dest) && ieee80211_vif_get_num_mcast_if(sdata) != 0) { From 3edc6b0d6c061a70d8ca3c3c72eb1f58ce29bfb1 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 11 May 2021 20:02:51 +0200 Subject: [PATCH 095/730] mac80211: extend protection against mixed key and fragment cache attacks For some chips/drivers, e.g., QCA6174 with ath10k, the decryption is done by the hardware, and the Protected bit in the Frame Control field is cleared in the lower level driver before the frame is passed to mac80211. In such cases, the condition for ieee80211_has_protected() is not met in ieee80211_rx_h_defragment() of mac80211 and the new security validation steps are not executed. Extend mac80211 to cover the case where the Protected bit has been cleared, but the frame is indicated as having been decrypted by the hardware. This extends protection against mixed key and fragment cache attack for additional drivers/chips. This fixes CVE-2020-24586 and CVE-2020-24587 for such cases. Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1 Cc: stable@vger.kernel.org Signed-off-by: Wen Gong Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.037aa5ca0390.I7bb888e2965a0db02a67075fcb5deb50eb7408aa@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 22a925899a9e..1bb43edd47b6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2229,6 +2229,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) unsigned int frag, seq; struct ieee80211_fragment_entry *entry; struct sk_buff *skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); hdr = (struct ieee80211_hdr *)rx->skb->data; fc = hdr->frame_control; @@ -2287,7 +2288,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sizeof(rx->key->u.gcmp.rx_pn[queue])); BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN != IEEE80211_GCMP_PN_LEN); - } else if (rx->key && ieee80211_has_protected(fc)) { + } else if (rx->key && + (ieee80211_has_protected(fc) || + (status->flag & RX_FLAG_DECRYPTED))) { entry->is_protected = true; entry->key_color = rx->key->color; } @@ -2332,13 +2335,19 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN); } else if (entry->is_protected && - (!rx->key || !ieee80211_has_protected(fc) || + (!rx->key || + (!ieee80211_has_protected(fc) && + !(status->flag & RX_FLAG_DECRYPTED)) || rx->key->color != entry->key_color)) { /* Drop this as a mixed key or fragment cache attack, even * if for TKIP Michael MIC should protect us, and WEP is a * lost cause anyway. */ return RX_DROP_UNUSABLE; + } else if (entry->is_protected && rx->key && + entry->key_color != rx->key->color && + (status->flag & RX_FLAG_DECRYPTED)) { + return RX_DROP_UNUSABLE; } skb_pull(rx->skb, ieee80211_hdrlen(fc)); From a1166b2653db2f3de7338b9fb8a0f6e924b904ee Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 11 May 2021 20:02:52 +0200 Subject: [PATCH 096/730] ath10k: add CCMP PN replay protection for fragmented frames for PCIe PN replay check for not fragmented frames is finished in the firmware, but this was not done for fragmented frames when ath10k is used with QCA6174/QCA6377 PCIe. mac80211 has the function ieee80211_rx_h_defragment() for PN replay check for fragmented frames, but this does not get checked with QCA6174 due to the ieee80211_has_protected() condition not matching the cleared Protected bit case. Validate the PN of received fragmented frames within ath10k when CCMP is used and drop the fragment if the PN is not correct (incremented by exactly one from the previous fragment). This applies only for QCA6174/QCA6377 PCIe. Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1 Cc: stable@vger.kernel.org Signed-off-by: Wen Gong Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.9ba2664866a4.I756e47b67e210dba69966d989c4711ffc02dc6bc@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt.h | 1 + drivers/net/wireless/ath/ath10k/htt_rx.c | 99 +++++++++++++++++++++++- 2 files changed, 96 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h index 956157946106..dbc8aef82a65 100644 --- a/drivers/net/wireless/ath/ath10k/htt.h +++ b/drivers/net/wireless/ath/ath10k/htt.h @@ -845,6 +845,7 @@ enum htt_security_types { #define ATH10K_HTT_TXRX_PEER_SECURITY_MAX 2 #define ATH10K_TXRX_NUM_EXT_TIDS 19 +#define ATH10K_TXRX_NON_QOS_TID 16 enum htt_security_flags { #define HTT_SECURITY_TYPE_MASK 0x7F diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 1a08156d5011..f1e5bce8b14f 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1746,16 +1746,87 @@ static void ath10k_htt_rx_h_csum_offload(struct sk_buff *msdu) msdu->ip_summed = ath10k_htt_rx_get_csum_state(msdu); } +static u64 ath10k_htt_rx_h_get_pn(struct ath10k *ar, struct sk_buff *skb, + u16 offset, + enum htt_rx_mpdu_encrypt_type enctype) +{ + struct ieee80211_hdr *hdr; + u64 pn = 0; + u8 *ehdr; + + hdr = (struct ieee80211_hdr *)(skb->data + offset); + ehdr = skb->data + offset + ieee80211_hdrlen(hdr->frame_control); + + if (enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2) { + pn = ehdr[0]; + pn |= (u64)ehdr[1] << 8; + pn |= (u64)ehdr[4] << 16; + pn |= (u64)ehdr[5] << 24; + pn |= (u64)ehdr[6] << 32; + pn |= (u64)ehdr[7] << 40; + } + return pn; +} + +static bool ath10k_htt_rx_h_frag_pn_check(struct ath10k *ar, + struct sk_buff *skb, + u16 peer_id, + u16 offset, + enum htt_rx_mpdu_encrypt_type enctype) +{ + struct ath10k_peer *peer; + union htt_rx_pn_t *last_pn, new_pn = {0}; + struct ieee80211_hdr *hdr; + bool more_frags; + u8 tid, frag_number; + u32 seq; + + peer = ath10k_peer_find_by_id(ar, peer_id); + if (!peer) { + ath10k_dbg(ar, ATH10K_DBG_HTT, "invalid peer for frag pn check\n"); + return false; + } + + hdr = (struct ieee80211_hdr *)(skb->data + offset); + if (ieee80211_is_data_qos(hdr->frame_control)) + tid = ieee80211_get_tid(hdr); + else + tid = ATH10K_TXRX_NON_QOS_TID; + + last_pn = &peer->frag_tids_last_pn[tid]; + new_pn.pn48 = ath10k_htt_rx_h_get_pn(ar, skb, offset, enctype); + more_frags = ieee80211_has_morefrags(hdr->frame_control); + frag_number = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG; + seq = (__le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ) >> 4; + + if (frag_number == 0) { + last_pn->pn48 = new_pn.pn48; + peer->frag_tids_seq[tid] = seq; + } else { + if (seq != peer->frag_tids_seq[tid]) + return false; + + if (new_pn.pn48 != last_pn->pn48 + 1) + return false; + + last_pn->pn48 = new_pn.pn48; + } + + return true; +} + static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, struct sk_buff_head *amsdu, struct ieee80211_rx_status *status, bool fill_crypt_header, u8 *rx_hdr, - enum ath10k_pkt_rx_err *err) + enum ath10k_pkt_rx_err *err, + u16 peer_id, + bool frag) { struct sk_buff *first; struct sk_buff *last; - struct sk_buff *msdu; + struct sk_buff *msdu, *temp; struct htt_rx_desc *rxd; struct ieee80211_hdr *hdr; enum htt_rx_mpdu_encrypt_type enctype; @@ -1768,6 +1839,7 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, bool is_decrypted; bool is_mgmt; u32 attention; + bool frag_pn_check = true; if (skb_queue_empty(amsdu)) return; @@ -1866,6 +1938,24 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, } skb_queue_walk(amsdu, msdu) { + if (frag && !fill_crypt_header && is_decrypted && + enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2) + frag_pn_check = ath10k_htt_rx_h_frag_pn_check(ar, + msdu, + peer_id, + 0, + enctype); + + if (!frag_pn_check) { + /* Discard the fragment with invalid PN */ + temp = msdu->prev; + __skb_unlink(msdu, amsdu); + dev_kfree_skb_any(msdu); + msdu = temp; + frag_pn_check = true; + continue; + } + ath10k_htt_rx_h_csum_offload(msdu); ath10k_htt_rx_h_undecap(ar, msdu, status, first_hdr, enctype, is_decrypted); @@ -2071,7 +2161,8 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) ath10k_htt_rx_h_unchain(ar, &amsdu, &drop_cnt, &unchain_cnt); ath10k_htt_rx_h_filter(ar, &amsdu, rx_status, &drop_cnt_filter); - ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true, first_hdr, &err); + ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true, first_hdr, &err, 0, + false); msdus_to_queue = skb_queue_len(&amsdu); ath10k_htt_rx_h_enqueue(ar, &amsdu, rx_status); @@ -3027,7 +3118,7 @@ static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id); ath10k_htt_rx_h_filter(ar, &amsdu, status, NULL); ath10k_htt_rx_h_mpdu(ar, &amsdu, status, false, NULL, - NULL); + NULL, peer_id, frag); ath10k_htt_rx_h_enqueue(ar, &amsdu, status); break; case -EAGAIN: From 65c415a144ad8132b6a6d97d4a1919ffc728e2d1 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 11 May 2021 20:02:53 +0200 Subject: [PATCH 097/730] ath10k: drop fragments with multicast DA for PCIe Fragmentation is not used with multicast frames. Discard unexpected fragments with multicast DA. This fixes CVE-2020-26145. Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1 Cc: stable@vger.kernel.org Signed-off-by: Wen Gong Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.5a0bd289bda8.Idd6ebea20038fb1cfee6de924aa595e5647c9eae@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt_rx.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index f1e5bce8b14f..cb04848ed5cb 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1768,6 +1768,16 @@ static u64 ath10k_htt_rx_h_get_pn(struct ath10k *ar, struct sk_buff *skb, return pn; } +static bool ath10k_htt_rx_h_frag_multicast_check(struct ath10k *ar, + struct sk_buff *skb, + u16 offset) +{ + struct ieee80211_hdr *hdr; + + hdr = (struct ieee80211_hdr *)(skb->data + offset); + return !is_multicast_ether_addr(hdr->addr1); +} + static bool ath10k_htt_rx_h_frag_pn_check(struct ath10k *ar, struct sk_buff *skb, u16 peer_id, @@ -1839,7 +1849,7 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, bool is_decrypted; bool is_mgmt; u32 attention; - bool frag_pn_check = true; + bool frag_pn_check = true, multicast_check = true; if (skb_queue_empty(amsdu)) return; @@ -1946,13 +1956,20 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, 0, enctype); - if (!frag_pn_check) { - /* Discard the fragment with invalid PN */ + if (frag) + multicast_check = ath10k_htt_rx_h_frag_multicast_check(ar, + msdu, + 0); + + if (!frag_pn_check || !multicast_check) { + /* Discard the fragment with invalid PN or multicast DA + */ temp = msdu->prev; __skb_unlink(msdu, amsdu); dev_kfree_skb_any(msdu); msdu = temp; frag_pn_check = true; + multicast_check = true; continue; } From 40e7462dad6f3d06efdb17d26539e61ab6e34db1 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 11 May 2021 20:02:54 +0200 Subject: [PATCH 098/730] ath10k: drop fragments with multicast DA for SDIO Fragmentation is not used with multicast frames. Discard unexpected fragments with multicast DA. This fixes CVE-2020-26145. Tested-on: QCA6174 hw3.2 SDIO WLAN.RMH.4.4.1-00049 Cc: stable@vger.kernel.org Signed-off-by: Wen Gong Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.9ca6ca7945a9.I1e18b514590af17c155bda86699bc3a971a8dcf4@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt_rx.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index cb04848ed5cb..b1d93ff5215a 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -2617,6 +2617,13 @@ static bool ath10k_htt_rx_proc_rx_frag_ind_hl(struct ath10k_htt *htt, rx_desc = (struct htt_hl_rx_desc *)(skb->data + tot_hdr_len); rx_desc_info = __le32_to_cpu(rx_desc->info); + hdr = (struct ieee80211_hdr *)((u8 *)rx_desc + rx_hl->fw_desc.len); + + if (is_multicast_ether_addr(hdr->addr1)) { + /* Discard the fragment with multicast DA */ + goto err; + } + if (!MS(rx_desc_info, HTT_RX_DESC_HL_INFO_ENCRYPTED)) { spin_unlock_bh(&ar->data_lock); return ath10k_htt_rx_proc_rx_ind_hl(htt, &resp->rx_ind_hl, skb, @@ -2624,8 +2631,6 @@ static bool ath10k_htt_rx_proc_rx_frag_ind_hl(struct ath10k_htt *htt, HTT_RX_NON_TKIP_MIC); } - hdr = (struct ieee80211_hdr *)((u8 *)rx_desc + rx_hl->fw_desc.len); - if (ieee80211_has_retry(hdr->frame_control)) goto err; From 079a108feba474b4b32bd3471db03e11f2f83b81 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 11 May 2021 20:02:55 +0200 Subject: [PATCH 099/730] ath10k: drop MPDU which has discard flag set by firmware for SDIO When the discard flag is set by the firmware for an MPDU, it should be dropped. This allows a mitigation for CVE-2020-24588 to be implemented in the firmware. Tested-on: QCA6174 hw3.2 SDIO WLAN.RMH.4.4.1-00049 Cc: stable@vger.kernel.org Signed-off-by: Wen Gong Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.11968c725b5c.Idd166365ebea2771c0c0a38c78b5060750f90e17@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt_rx.c | 5 +++++ drivers/net/wireless/ath/ath10k/rx_desc.h | 14 +++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index b1d93ff5215a..12451ab66a19 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -2312,6 +2312,11 @@ static bool ath10k_htt_rx_proc_rx_ind_hl(struct ath10k_htt *htt, fw_desc = &rx->fw_desc; rx_desc_len = fw_desc->len; + if (fw_desc->u.bits.discard) { + ath10k_dbg(ar, ATH10K_DBG_HTT, "htt discard mpdu\n"); + goto err; + } + /* I have not yet seen any case where num_mpdu_ranges > 1. * qcacld does not seem handle that case either, so we introduce the * same limitiation here as well. diff --git a/drivers/net/wireless/ath/ath10k/rx_desc.h b/drivers/net/wireless/ath/ath10k/rx_desc.h index f2b6bf8f0d60..705b6295e466 100644 --- a/drivers/net/wireless/ath/ath10k/rx_desc.h +++ b/drivers/net/wireless/ath/ath10k/rx_desc.h @@ -1282,7 +1282,19 @@ struct fw_rx_desc_base { #define FW_RX_DESC_UDP (1 << 6) struct fw_rx_desc_hl { - u8 info0; + union { + struct { + u8 discard:1, + forward:1, + any_err:1, + dup_err:1, + reserved:1, + inspect:1, + extension:2; + } bits; + u8 info0; + } u; + u8 version; u8 len; u8 flags; From 0dc267b13f3a7e8424a898815dd357211b737330 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 11 May 2021 20:02:56 +0200 Subject: [PATCH 100/730] ath10k: Fix TKIP Michael MIC verification for PCIe TKIP Michael MIC was not verified properly for PCIe cases since the validation steps in ieee80211_rx_h_michael_mic_verify() in mac80211 did not get fully executed due to unexpected flag values in ieee80211_rx_status. Fix this by setting the flags property to meet mac80211 expectations for performing Michael MIC validation there. This fixes CVE-2020-26141. It does the same as ath10k_htt_rx_proc_rx_ind_hl() for SDIO which passed MIC verification case. This applies only to QCA6174/QCA9377 PCIe. Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1 Cc: stable@vger.kernel.org Signed-off-by: Wen Gong Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.c3f1d42c6746.I795593fcaae941c471425b8c7d5f7bb185d29142@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt_rx.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 12451ab66a19..87196f9bbdea 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1974,6 +1974,11 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, } ath10k_htt_rx_h_csum_offload(msdu); + + if (frag && !fill_crypt_header && + enctype == HTT_RX_MPDU_ENCRYPT_TKIP_WPA) + status->flag &= ~RX_FLAG_MMIC_STRIPPED; + ath10k_htt_rx_h_undecap(ar, msdu, status, first_hdr, enctype, is_decrypted); @@ -1991,6 +1996,11 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, hdr = (void *)msdu->data; hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED); + + if (frag && !fill_crypt_header && + enctype == HTT_RX_MPDU_ENCRYPT_TKIP_WPA) + status->flag &= ~RX_FLAG_IV_STRIPPED & + ~RX_FLAG_MMIC_STRIPPED; } } From 62a8ff67eba52dae9b107e1fb8827054ed00a265 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 11 May 2021 20:02:57 +0200 Subject: [PATCH 101/730] ath10k: Validate first subframe of A-MSDU before processing the list In certain scenarios a normal MSDU can be received as an A-MSDU when the A-MSDU present bit of a QoS header gets flipped during reception. Since this bit is unauthenticated, the hardware crypto engine can pass the frame to the driver without any error indication. This could result in processing unintended subframes collected in the A-MSDU list. Hence, validate A-MSDU list by checking if the first frame has a valid subframe header. Comparing the non-aggregated MSDU and an A-MSDU, the fields of the first subframe DA matches the LLC/SNAP header fields of a normal MSDU. In order to avoid processing such frames, add a validation to filter such A-MSDU frames where the first subframe header DA matches with the LLC/SNAP header pattern. Tested-on: QCA9984 hw1.0 PCI 10.4-3.10-00047 Cc: stable@vger.kernel.org Signed-off-by: Sriram R Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.e6f5eb7b9847.I38a77ae26096862527a5eab73caebd7346af8b66@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt_rx.c | 61 ++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 87196f9bbdea..7ffb5d5b2a70 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -2108,14 +2108,62 @@ static void ath10k_htt_rx_h_unchain(struct ath10k *ar, ath10k_unchain_msdu(amsdu, unchain_cnt); } +static bool ath10k_htt_rx_validate_amsdu(struct ath10k *ar, + struct sk_buff_head *amsdu) +{ + u8 *subframe_hdr; + struct sk_buff *first; + bool is_first, is_last; + struct htt_rx_desc *rxd; + struct ieee80211_hdr *hdr; + size_t hdr_len, crypto_len; + enum htt_rx_mpdu_encrypt_type enctype; + int bytes_aligned = ar->hw_params.decap_align_bytes; + + first = skb_peek(amsdu); + + rxd = (void *)first->data - sizeof(*rxd); + hdr = (void *)rxd->rx_hdr_status; + + is_first = !!(rxd->msdu_end.common.info0 & + __cpu_to_le32(RX_MSDU_END_INFO0_FIRST_MSDU)); + is_last = !!(rxd->msdu_end.common.info0 & + __cpu_to_le32(RX_MSDU_END_INFO0_LAST_MSDU)); + + /* Return in case of non-aggregated msdu */ + if (is_first && is_last) + return true; + + /* First msdu flag is not set for the first msdu of the list */ + if (!is_first) + return false; + + enctype = MS(__le32_to_cpu(rxd->mpdu_start.info0), + RX_MPDU_START_INFO0_ENCRYPT_TYPE); + + hdr_len = ieee80211_hdrlen(hdr->frame_control); + crypto_len = ath10k_htt_rx_crypto_param_len(ar, enctype); + + subframe_hdr = (u8 *)hdr + round_up(hdr_len, bytes_aligned) + + crypto_len; + + /* Validate if the amsdu has a proper first subframe. + * There are chances a single msdu can be received as amsdu when + * the unauthenticated amsdu flag of a QoS header + * gets flipped in non-SPP AMSDU's, in such cases the first + * subframe has llc/snap header in place of a valid da. + * return false if the da matches rfc1042 pattern + */ + if (ether_addr_equal(subframe_hdr, rfc1042_header)) + return false; + + return true; +} + static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar, struct sk_buff_head *amsdu, struct ieee80211_rx_status *rx_status) { - /* FIXME: It might be a good idea to do some fuzzy-testing to drop - * invalid/dangerous frames. - */ - if (!rx_status->freq) { ath10k_dbg(ar, ATH10K_DBG_HTT, "no channel configured; ignoring frame(s)!\n"); return false; @@ -2126,6 +2174,11 @@ static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar, return false; } + if (!ath10k_htt_rx_validate_amsdu(ar, amsdu)) { + ath10k_dbg(ar, ATH10K_DBG_HTT, "invalid amsdu received\n"); + return false; + } + return true; } From c3944a5621026c176001493d48ee66ff94e1a39a Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 11 May 2021 20:02:58 +0200 Subject: [PATCH 102/730] ath11k: Clear the fragment cache during key install Currently the fragment cache setup during peer assoc is cleared only during peer delete. In case a key reinstallation happens with the same peer, the same fragment cache with old fragments added before key installation could be clubbed with fragments received after. This might be exploited to mix fragments of different data resulting in a proper unintended reassembled packet to be passed up the stack. Hence flush the fragment cache on every key installation to prevent potential attacks (CVE-2020-24587). Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01734-QCAHKSWPL_SILICONZ-1 v2 Cc: stable@vger.kernel.org Signed-off-by: Sriram R Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.218dc777836f.I9af6fc76215a35936c4152552018afb5079c5d8c@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/dp_rx.c | 18 ++++++++++++++++++ drivers/net/wireless/ath/ath11k/dp_rx.h | 1 + drivers/net/wireless/ath/ath11k/mac.c | 6 ++++++ 3 files changed, 25 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 1d9aa1bb6b6e..3382f8bfcb48 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -852,6 +852,24 @@ static void ath11k_dp_rx_frags_cleanup(struct dp_rx_tid *rx_tid, bool rel_link_d __skb_queue_purge(&rx_tid->rx_frags); } +void ath11k_peer_frags_flush(struct ath11k *ar, struct ath11k_peer *peer) +{ + struct dp_rx_tid *rx_tid; + int i; + + lockdep_assert_held(&ar->ab->base_lock); + + for (i = 0; i <= IEEE80211_NUM_TIDS; i++) { + rx_tid = &peer->rx_tid[i]; + + spin_unlock_bh(&ar->ab->base_lock); + del_timer_sync(&rx_tid->frag_timer); + spin_lock_bh(&ar->ab->base_lock); + + ath11k_dp_rx_frags_cleanup(rx_tid, true); + } +} + void ath11k_peer_rx_tid_cleanup(struct ath11k *ar, struct ath11k_peer *peer) { struct dp_rx_tid *rx_tid; diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.h b/drivers/net/wireless/ath/ath11k/dp_rx.h index bf399312b5ff..623da3bf9dc8 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.h +++ b/drivers/net/wireless/ath/ath11k/dp_rx.h @@ -49,6 +49,7 @@ int ath11k_dp_peer_rx_pn_replay_config(struct ath11k_vif *arvif, const u8 *peer_addr, enum set_key_cmd key_cmd, struct ieee80211_key_conf *key); +void ath11k_peer_frags_flush(struct ath11k *ar, struct ath11k_peer *peer); void ath11k_peer_rx_tid_cleanup(struct ath11k *ar, struct ath11k_peer *peer); void ath11k_peer_rx_tid_delete(struct ath11k *ar, struct ath11k_peer *peer, u8 tid); diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 4df425dd31a2..9d0ff150ec30 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -2779,6 +2779,12 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, */ spin_lock_bh(&ab->base_lock); peer = ath11k_peer_find(ab, arvif->vdev_id, peer_addr); + + /* flush the fragments cache during key (re)install to + * ensure all frags in the new frag list belong to the same key. + */ + if (peer && cmd == SET_KEY) + ath11k_peer_frags_flush(ar, peer); spin_unlock_bh(&ab->base_lock); if (!peer) { From 210f563b097997ce917e82feab356b298bfd12b0 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 11 May 2021 20:02:59 +0200 Subject: [PATCH 103/730] ath11k: Drop multicast fragments Fragmentation is used only with unicast frames. Drop multicast fragments to avoid any undesired behavior. Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01734-QCAHKSWPL_SILICONZ-1 v2 Cc: stable@vger.kernel.org Signed-off-by: Sriram R Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.1d53bfd20a8b.Ibb63283051bb5e2c45951932c6e1f351d5a73dc3@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/dp_rx.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 3382f8bfcb48..603d2f93ac18 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -260,6 +260,16 @@ static void ath11k_dp_rxdesc_set_msdu_len(struct ath11k_base *ab, ab->hw_params.hw_ops->rx_desc_set_msdu_len(desc, len); } +static bool ath11k_dp_rx_h_attn_is_mcbc(struct ath11k_base *ab, + struct hal_rx_desc *desc) +{ + struct rx_attention *attn = ath11k_dp_rx_get_attention(ab, desc); + + return ath11k_dp_rx_h_msdu_end_first_msdu(ab, desc) && + (!!FIELD_GET(RX_ATTENTION_INFO1_MCAST_BCAST, + __le32_to_cpu(attn->info1))); +} + static void ath11k_dp_service_mon_ring(struct timer_list *t) { struct ath11k_base *ab = from_timer(ab, t, mon_reap_timer); @@ -3468,6 +3478,7 @@ static int ath11k_dp_rx_frag_h_mpdu(struct ath11k *ar, u8 tid; int ret = 0; bool more_frags; + bool is_mcbc; rx_desc = (struct hal_rx_desc *)msdu->data; peer_id = ath11k_dp_rx_h_mpdu_start_peer_id(ar->ab, rx_desc); @@ -3475,6 +3486,11 @@ static int ath11k_dp_rx_frag_h_mpdu(struct ath11k *ar, seqno = ath11k_dp_rx_h_mpdu_start_seq_no(ar->ab, rx_desc); frag_no = ath11k_dp_rx_h_mpdu_start_frag_no(ar->ab, msdu); more_frags = ath11k_dp_rx_h_mpdu_start_more_frags(ar->ab, msdu); + is_mcbc = ath11k_dp_rx_h_attn_is_mcbc(ar->ab, rx_desc); + + /* Multicast/Broadcast fragments are not expected */ + if (is_mcbc) + return -EINVAL; if (!ath11k_dp_rx_h_mpdu_start_seq_ctrl_valid(ar->ab, rx_desc) || !ath11k_dp_rx_h_mpdu_start_fc_valid(ar->ab, rx_desc) || From 0bd50826a40e012a35c58ed3576b3873643e7a7d Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Thu, 6 May 2021 15:08:24 +0800 Subject: [PATCH 104/730] leds: Fix reference file name of documentation In commit 56b01acc1c79a ("dt-bindings: gpio: fairchild,74hc595: Convert to json-schema"), gpio-74x164.txt was deleted and replaced by fairchild,74hc595.yaml. Fix the reference file name. Signed-off-by: Wan Jiabing Acked-by: Pavel Machek Link: https://lore.kernel.org/r/20210506070824.10965-1-wanjiabing@vivo.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/leds/leds-bcm6328.txt | 4 ++-- Documentation/devicetree/bindings/leds/leds-bcm6358.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/leds/leds-bcm6328.txt b/Documentation/devicetree/bindings/leds/leds-bcm6328.txt index ccebce597f37..a555d94084b7 100644 --- a/Documentation/devicetree/bindings/leds/leds-bcm6328.txt +++ b/Documentation/devicetree/bindings/leds/leds-bcm6328.txt @@ -4,8 +4,8 @@ This controller is present on BCM6318, BCM6328, BCM6362 and BCM63268. In these SoCs it's possible to control LEDs both as GPIOs or by hardware. However, on some devices there are Serial LEDs (LEDs connected to a 74x164 controller), which can either be controlled by software (exporting the 74x164 -as spi-gpio. See Documentation/devicetree/bindings/gpio/gpio-74x164.txt), or -by hardware using this driver. +as spi-gpio. See Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml), +or by hardware using this driver. Some of these Serial LEDs are hardware controlled (e.g. ethernet LEDs) and exporting the 74x164 as spi-gpio prevents those LEDs to be hardware controlled, so the only chance to keep them working is by using this driver. diff --git a/Documentation/devicetree/bindings/leds/leds-bcm6358.txt b/Documentation/devicetree/bindings/leds/leds-bcm6358.txt index da5708e7b43b..6e51c6b91ee5 100644 --- a/Documentation/devicetree/bindings/leds/leds-bcm6358.txt +++ b/Documentation/devicetree/bindings/leds/leds-bcm6358.txt @@ -3,7 +3,7 @@ LEDs connected to Broadcom BCM6358 controller This controller is present on BCM6358 and BCM6368. In these SoCs there are Serial LEDs (LEDs connected to a 74x164 controller), which can either be controlled by software (exporting the 74x164 as spi-gpio. -See Documentation/devicetree/bindings/gpio/gpio-74x164.txt), or +See Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml), or by hardware using this driver. Required properties: From 67f29896fdc83298eed5a6576ff8f9873f709228 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 11 May 2021 10:26:03 +0300 Subject: [PATCH 105/730] RDMA/rxe: Clear all QP fields if creation failed rxe_qp_do_cleanup() relies on valid pointer values in QP for the properly created ones, but in case rxe_qp_from_init() failed it was filled with garbage and caused tot the following error. refcount_t: underflow; use-after-free. WARNING: CPU: 1 PID: 12560 at lib/refcount.c:28 refcount_warn_saturate+0x1d1/0x1e0 lib/refcount.c:28 Modules linked in: CPU: 1 PID: 12560 Comm: syz-executor.4 Not tainted 5.12.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:refcount_warn_saturate+0x1d1/0x1e0 lib/refcount.c:28 Code: e9 db fe ff ff 48 89 df e8 2c c2 ea fd e9 8a fe ff ff e8 72 6a a7 fd 48 c7 c7 e0 b2 c1 89 c6 05 dc 3a e6 09 01 e8 ee 74 fb 04 <0f> 0b e9 af fe ff ff 0f 1f 84 00 00 00 00 00 41 56 41 55 41 54 55 RSP: 0018:ffffc900097ceba8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000040000 RSI: ffffffff815bb075 RDI: fffff520012f9d67 RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff815b4eae R11: 0000000000000000 R12: ffff8880322a4800 R13: ffff8880322a4940 R14: ffff888033044e00 R15: 0000000000000000 FS: 00007f6eb2be3700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fdbe5d41000 CR3: 000000001d181000 CR4: 00000000001506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __refcount_sub_and_test include/linux/refcount.h:283 [inline] __refcount_dec_and_test include/linux/refcount.h:315 [inline] refcount_dec_and_test include/linux/refcount.h:333 [inline] kref_put include/linux/kref.h:64 [inline] rxe_qp_do_cleanup+0x96f/0xaf0 drivers/infiniband/sw/rxe/rxe_qp.c:805 execute_in_process_context+0x37/0x150 kernel/workqueue.c:3327 rxe_elem_release+0x9f/0x180 drivers/infiniband/sw/rxe/rxe_pool.c:391 kref_put include/linux/kref.h:65 [inline] rxe_create_qp+0x2cd/0x310 drivers/infiniband/sw/rxe/rxe_verbs.c:425 _ib_create_qp drivers/infiniband/core/core_priv.h:331 [inline] ib_create_named_qp+0x2ad/0x1370 drivers/infiniband/core/verbs.c:1231 ib_create_qp include/rdma/ib_verbs.h:3644 [inline] create_mad_qp+0x177/0x2d0 drivers/infiniband/core/mad.c:2920 ib_mad_port_open drivers/infiniband/core/mad.c:3001 [inline] ib_mad_init_device+0xd6f/0x1400 drivers/infiniband/core/mad.c:3092 add_client_context+0x405/0x5e0 drivers/infiniband/core/device.c:717 enable_device_and_get+0x1cd/0x3b0 drivers/infiniband/core/device.c:1331 ib_register_device drivers/infiniband/core/device.c:1413 [inline] ib_register_device+0x7c7/0xa50 drivers/infiniband/core/device.c:1365 rxe_register_device+0x3d5/0x4a0 drivers/infiniband/sw/rxe/rxe_verbs.c:1147 rxe_add+0x12fe/0x16d0 drivers/infiniband/sw/rxe/rxe.c:247 rxe_net_add+0x8c/0xe0 drivers/infiniband/sw/rxe/rxe_net.c:503 rxe_newlink drivers/infiniband/sw/rxe/rxe.c:269 [inline] rxe_newlink+0xb7/0xe0 drivers/infiniband/sw/rxe/rxe.c:250 nldev_newlink+0x30e/0x550 drivers/infiniband/core/nldev.c:1555 rdma_nl_rcv_msg+0x36d/0x690 drivers/infiniband/core/netlink.c:195 rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline] rdma_nl_rcv+0x2ee/0x430 drivers/infiniband/core/netlink.c:259 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:674 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433 do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/7bf8d548764d406dbbbaf4b574960ebfd5af8387.1620717918.git.leonro@nvidia.com Reported-by: syzbot+36a7f280de4e11c6f04e@syzkaller.appspotmail.com Signed-off-by: Leon Romanovsky Reviewed-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 34ae957a315c..b0f350d674fd 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -242,6 +242,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, if (err) { vfree(qp->sq.queue->buf); kfree(qp->sq.queue); + qp->sq.queue = NULL; return err; } @@ -295,6 +296,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, if (err) { vfree(qp->rq.queue->buf); kfree(qp->rq.queue); + qp->rq.queue = NULL; return err; } } @@ -355,6 +357,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, err2: rxe_queue_cleanup(qp->sq.queue); err1: + qp->pd = NULL; + qp->rcq = NULL; + qp->scq = NULL; + qp->srq = NULL; + if (srq) rxe_drop_ref(srq); rxe_drop_ref(scq); From b24abcff918a5cbf44b0c982bd3477a93e8e4911 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 11 May 2021 22:35:16 +0200 Subject: [PATCH 106/730] bpf, kconfig: Add consolidated menu entry for bpf with core options Right now, all core BPF related options are scattered in different Kconfig locations mainly due to historic reasons. Moving forward, lets add a proper subsystem entry under ... General setup ---> BPF subsystem ---> ... in order to have all knobs in a single location and thus ease BPF related configuration. Networking related bits such as sockmap are out of scope for the general setup and therefore better suited to remain in net/Kconfig. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/f23f58765a4d59244ebd8037da7b6a6b2fb58446.1620765074.git.daniel@iogearbox.net --- init/Kconfig | 41 +----------------------- kernel/bpf/Kconfig | 78 ++++++++++++++++++++++++++++++++++++++++++++++ net/Kconfig | 27 ---------------- 3 files changed, 79 insertions(+), 67 deletions(-) create mode 100644 kernel/bpf/Kconfig diff --git a/init/Kconfig b/init/Kconfig index ca559ccdaa32..2282a6842dc6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -439,6 +439,7 @@ config AUDITSYSCALL source "kernel/irq/Kconfig" source "kernel/time/Kconfig" +source "kernel/bpf/Kconfig" source "kernel/Kconfig.preempt" menu "CPU/Task time and stats accounting" @@ -1705,46 +1706,6 @@ config KALLSYMS_BASE_RELATIVE # syscall, maps, verifier -config BPF_LSM - bool "LSM Instrumentation with BPF" - depends on BPF_EVENTS - depends on BPF_SYSCALL - depends on SECURITY - depends on BPF_JIT - help - Enables instrumentation of the security hooks with eBPF programs for - implementing dynamic MAC and Audit Policies. - - If you are unsure how to answer this question, answer N. - -config BPF_SYSCALL - bool "Enable bpf() system call" - select BPF - select IRQ_WORK - select TASKS_TRACE_RCU - select BINARY_PRINTF - select NET_SOCK_MSG if INET - default n - help - Enable the bpf() system call that allows to manipulate eBPF - programs and maps via file descriptors. - -config ARCH_WANT_DEFAULT_BPF_JIT - bool - -config BPF_JIT_ALWAYS_ON - bool "Permanently enable BPF JIT and remove BPF interpreter" - depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT - help - Enables BPF JIT and removes BPF interpreter to avoid - speculative execution of BPF instructions by the interpreter - -config BPF_JIT_DEFAULT_ON - def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON - depends on HAVE_EBPF_JIT && BPF_JIT - -source "kernel/bpf/preload/Kconfig" - config USERFAULTFD bool "Enable userfaultfd() system call" depends on MMU diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig new file mode 100644 index 000000000000..b4edaefc6255 --- /dev/null +++ b/kernel/bpf/Kconfig @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: GPL-2.0-only + +# BPF interpreter that, for example, classic socket filters depend on. +config BPF + bool + +# Used by archs to tell that they support BPF JIT compiler plus which +# flavour. Only one of the two can be selected for a specific arch since +# eBPF JIT supersedes the cBPF JIT. + +# Classic BPF JIT (cBPF) +config HAVE_CBPF_JIT + bool + +# Extended BPF JIT (eBPF) +config HAVE_EBPF_JIT + bool + +# Used by archs to tell that they want the BPF JIT compiler enabled by +# default for kernels that were compiled with BPF JIT support. +config ARCH_WANT_DEFAULT_BPF_JIT + bool + +menu "BPF subsystem" + +config BPF_SYSCALL + bool "Enable bpf() system call" + select BPF + select IRQ_WORK + select TASKS_TRACE_RCU + select BINARY_PRINTF + select NET_SOCK_MSG if INET + default n + help + Enable the bpf() system call that allows to manipulate BPF programs + and maps via file descriptors. + +config BPF_JIT + bool "Enable BPF Just In Time compiler" + depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT + depends on MODULES + help + BPF programs are normally handled by a BPF interpreter. This option + allows the kernel to generate native code when a program is loaded + into the kernel. This will significantly speed-up processing of BPF + programs. + + Note, an admin should enable this feature changing: + /proc/sys/net/core/bpf_jit_enable + /proc/sys/net/core/bpf_jit_harden (optional) + /proc/sys/net/core/bpf_jit_kallsyms (optional) + +config BPF_JIT_ALWAYS_ON + bool "Permanently enable BPF JIT and remove BPF interpreter" + depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT + help + Enables BPF JIT and removes BPF interpreter to avoid speculative + execution of BPF instructions by the interpreter. + +config BPF_JIT_DEFAULT_ON + def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON + depends on HAVE_EBPF_JIT && BPF_JIT + +source "kernel/bpf/preload/Kconfig" + +config BPF_LSM + bool "Enable BPF LSM Instrumentation" + depends on BPF_EVENTS + depends on BPF_SYSCALL + depends on SECURITY + depends on BPF_JIT + help + Enables instrumentation of the security hooks with BPF programs for + implementing dynamic MAC and Audit Policies. + + If you are unsure how to answer this question, answer N. + +endmenu # "BPF subsystem" diff --git a/net/Kconfig b/net/Kconfig index f5ee7c65e6b4..c7392c449b25 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -302,21 +302,6 @@ config BQL select DQL default y -config BPF_JIT - bool "enable BPF Just In Time compiler" - depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT - depends on MODULES - help - Berkeley Packet Filter filtering capabilities are normally handled - by an interpreter. This option allows kernel to generate a native - code when filter is loaded in memory. This should speedup - packet sniffing (libpcap/tcpdump). - - Note, admin should enable this feature changing: - /proc/sys/net/core/bpf_jit_enable - /proc/sys/net/core/bpf_jit_harden (optional) - /proc/sys/net/core/bpf_jit_kallsyms (optional) - config BPF_STREAM_PARSER bool "enable BPF STREAM_PARSER" depends on INET @@ -470,15 +455,3 @@ config ETHTOOL_NETLINK e.g. notification messages. endif # if NET - -# Used by archs to tell that they support BPF JIT compiler plus which flavour. -# Only one of the two can be selected for a specific arch since eBPF JIT supersedes -# the cBPF JIT. - -# Classic BPF JIT (cBPF) -config HAVE_CBPF_JIT - bool - -# Extended BPF JIT (eBPF) -config HAVE_EBPF_JIT - bool From 08389d888287c3823f80b0216766b71e17f0aba5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 11 May 2021 22:35:17 +0200 Subject: [PATCH 107/730] bpf: Add kconfig knob for disabling unpriv bpf by default Add a kconfig knob which allows for unprivileged bpf to be disabled by default. If set, the knob sets /proc/sys/kernel/unprivileged_bpf_disabled to value of 2. This still allows a transition of 2 -> {0,1} through an admin. Similarly, this also still keeps 1 -> {1} behavior intact, so that once set to permanently disabled, it cannot be undone aside from a reboot. We've also added extra2 with max of 2 for the procfs handler, so that an admin still has a chance to toggle between 0 <-> 2. Either way, as an additional alternative, applications can make use of CAP_BPF that we added a while ago. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/74ec548079189e4e4dffaeb42b8987bb3c852eee.1620765074.git.daniel@iogearbox.net --- Documentation/admin-guide/sysctl/kernel.rst | 17 +++++++++--- kernel/bpf/Kconfig | 10 +++++++ kernel/bpf/syscall.c | 3 ++- kernel/sysctl.c | 29 +++++++++++++++++---- 4 files changed, 50 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 1d56a6b73a4e..24ab20d7a50a 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1457,11 +1457,22 @@ unprivileged_bpf_disabled ========================= Writing 1 to this entry will disable unprivileged calls to ``bpf()``; -once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` will return -``-EPERM``. +once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` or ``CAP_BPF`` +will return ``-EPERM``. Once set to 1, this can't be cleared from the +running kernel anymore. -Once set, this can't be cleared. +Writing 2 to this entry will also disable unprivileged calls to ``bpf()``, +however, an admin can still change this setting later on, if needed, by +writing 0 or 1 to this entry. +If ``BPF_UNPRIV_DEFAULT_OFF`` is enabled in the kernel config, then this +entry will default to 2 instead of 0. + += ============================================================= +0 Unprivileged calls to ``bpf()`` are enabled +1 Unprivileged calls to ``bpf()`` are disabled without recovery +2 Unprivileged calls to ``bpf()`` are disabled += ============================================================= watchdog ======== diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig index b4edaefc6255..26b591e23f16 100644 --- a/kernel/bpf/Kconfig +++ b/kernel/bpf/Kconfig @@ -61,6 +61,16 @@ config BPF_JIT_DEFAULT_ON def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON depends on HAVE_EBPF_JIT && BPF_JIT +config BPF_UNPRIV_DEFAULT_OFF + bool "Disable unprivileged BPF by default" + depends on BPF_SYSCALL + help + Disables unprivileged BPF by default by setting the corresponding + /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can + still reenable it by setting it to 0 later on, or permanently + disable it by setting it to 1 (from which no other transition to + 0 is possible anymore). + source "kernel/bpf/preload/Kconfig" config BPF_LSM diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 941ca06d9dfa..ea04b0deb5ce 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -50,7 +50,8 @@ static DEFINE_SPINLOCK(map_idr_lock); static DEFINE_IDR(link_idr); static DEFINE_SPINLOCK(link_idr_lock); -int sysctl_unprivileged_bpf_disabled __read_mostly; +int sysctl_unprivileged_bpf_disabled __read_mostly = + IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; static const struct bpf_map_ops * const bpf_map_types[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f91d327273c1..6df7c81f7cdd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -225,7 +225,27 @@ static int bpf_stats_handler(struct ctl_table *table, int write, mutex_unlock(&bpf_stats_enabled_mutex); return ret; } -#endif + +static int bpf_unpriv_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret, unpriv_enable = *(int *)table->data; + bool locked_state = unpriv_enable == 1; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &unpriv_enable; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + if (locked_state && unpriv_enable != 1) + return -EPERM; + *(int *)table->data = unpriv_enable; + } + return ret; +} +#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */ /* * /proc/sys support @@ -2600,10 +2620,9 @@ static struct ctl_table kern_table[] = { .data = &sysctl_unprivileged_bpf_disabled, .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), .mode = 0644, - /* only handle a transition from default "0" to "1" */ - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ONE, - .extra2 = SYSCTL_ONE, + .proc_handler = bpf_unpriv_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = &two, }, { .procname = "bpf_stats_enabled", From 35e3815fa8102fab4dee75f3547472c66581125d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 29 Apr 2021 13:47:12 +0200 Subject: [PATCH 108/730] bpf: Add deny list of btf ids check for tracing programs The recursion check in __bpf_prog_enter and __bpf_prog_exit leaves some (not inlined) functions unprotected: In __bpf_prog_enter: - migrate_disable is called before prog->active is checked In __bpf_prog_exit: - migrate_enable,rcu_read_unlock_strict are called after prog->active is decreased When attaching trampoline to them we get panic like: traps: PANIC: double fault, error_code: 0x0 double fault: 0000 [#1] SMP PTI RIP: 0010:__bpf_prog_enter+0x4/0x50 ... Call Trace: bpf_trampoline_6442466513_0+0x18/0x1000 migrate_disable+0x5/0x50 __bpf_prog_enter+0x9/0x50 bpf_trampoline_6442466513_0+0x18/0x1000 migrate_disable+0x5/0x50 __bpf_prog_enter+0x9/0x50 bpf_trampoline_6442466513_0+0x18/0x1000 migrate_disable+0x5/0x50 __bpf_prog_enter+0x9/0x50 bpf_trampoline_6442466513_0+0x18/0x1000 migrate_disable+0x5/0x50 ... Fixing this by adding deny list of btf ids for tracing programs and checking btf id during program verification. Adding above functions to this list. Suggested-by: Alexei Starovoitov Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210429114712.43783-1-jolsa@kernel.org --- kernel/bpf/verifier.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9352a1b7de2d..c58598ef4b5b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13196,6 +13196,17 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, return 0; } +BTF_SET_START(btf_id_deny) +BTF_ID_UNUSED +#ifdef CONFIG_SMP +BTF_ID(func, migrate_disable) +BTF_ID(func, migrate_enable) +#endif +#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU +BTF_ID(func, rcu_read_unlock_strict) +#endif +BTF_SET_END(btf_id_deny) + static int check_attach_btf_id(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; @@ -13255,6 +13266,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) ret = bpf_lsm_verify_prog(&env->log, prog); if (ret < 0) return ret; + } else if (prog->type == BPF_PROG_TYPE_TRACING && + btf_id_set_contains(&btf_id_deny, btf_id)) { + return -EINVAL; } key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id); From e2d5b2bb769fa5f500760caba76436ba3a10a895 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Tue, 11 May 2021 10:10:54 +0200 Subject: [PATCH 109/730] bpf: Fix nested bpf_bprintf_prepare with more per-cpu buffers The bpf_seq_printf, bpf_trace_printk and bpf_snprintf helpers share one per-cpu buffer that they use to store temporary data (arguments to bprintf). They "get" that buffer with try_get_fmt_tmp_buf and "put" it by the end of their scope with bpf_bprintf_cleanup. If one of these helpers gets called within the scope of one of these helpers, for example: a first bpf program gets called, uses bpf_trace_printk which calls raw_spin_lock_irqsave which is traced by another bpf program that calls bpf_snprintf, then the second "get" fails. Essentially, these helpers are not re-entrant. They would return -EBUSY and print a warning message once. This patch triples the number of bprintf buffers to allow three levels of nesting. This is very similar to what was done for tracepoints in "9594dc3c7e7 bpf: fix nested bpf tracepoints with per-cpu data" Fixes: d9c9e4db186a ("bpf: Factorize bpf_trace_printk and bpf_seq_printf") Reported-by: syzbot+63122d0bc347f18c1884@syzkaller.appspotmail.com Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210511081054.2125874-1-revest@chromium.org --- kernel/bpf/helpers.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 544773970dbc..ef658a9ea5c9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -696,34 +696,35 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, */ #define MAX_PRINTF_BUF_LEN 512 -struct bpf_printf_buf { - char tmp_buf[MAX_PRINTF_BUF_LEN]; +/* Support executing three nested bprintf helper calls on a given CPU */ +struct bpf_bprintf_buffers { + char tmp_bufs[3][MAX_PRINTF_BUF_LEN]; }; -static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf); -static DEFINE_PER_CPU(int, bpf_printf_buf_used); +static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); +static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); static int try_get_fmt_tmp_buf(char **tmp_buf) { - struct bpf_printf_buf *bufs; - int used; + struct bpf_bprintf_buffers *bufs; + int nest_level; preempt_disable(); - used = this_cpu_inc_return(bpf_printf_buf_used); - if (WARN_ON_ONCE(used > 1)) { - this_cpu_dec(bpf_printf_buf_used); + nest_level = this_cpu_inc_return(bpf_bprintf_nest_level); + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) { + this_cpu_dec(bpf_bprintf_nest_level); preempt_enable(); return -EBUSY; } - bufs = this_cpu_ptr(&bpf_printf_buf); - *tmp_buf = bufs->tmp_buf; + bufs = this_cpu_ptr(&bpf_bprintf_bufs); + *tmp_buf = bufs->tmp_bufs[nest_level - 1]; return 0; } void bpf_bprintf_cleanup(void) { - if (this_cpu_read(bpf_printf_buf_used)) { - this_cpu_dec(bpf_printf_buf_used); + if (this_cpu_read(bpf_bprintf_nest_level)) { + this_cpu_dec(bpf_bprintf_nest_level); preempt_enable(); } } From 67e7ec0bd4535fc6e6d3f5d174f80e10a8a80c6e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 8 May 2021 12:22:12 -0300 Subject: [PATCH 110/730] libbpf: Provide GELF_ST_VISIBILITY() define for older libelf Where that macro isn't available. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/YJaspEh0qZr4LYOc@kernel.org --- tools/lib/bpf/libbpf_internal.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index ee426226928f..acbcf6c7bdf8 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -41,6 +41,11 @@ #define ELF_C_READ_MMAP ELF_C_READ #endif +/* Older libelf all end up in this expression, for both 32 and 64 bit */ +#ifndef GELF_ST_VISIBILITY +#define GELF_ST_VISIBILITY(o) ((o) & 0x03) +#endif + #define BTF_INFO_ENC(kind, kind_flag, vlen) \ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) From 096eccdef0b32f47e9354231ddc3aaaf9527d51c Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Wed, 5 May 2021 08:59:25 +0000 Subject: [PATCH 111/730] selftests/bpf: Rewrite test_tc_redirect.sh as prog_tests/tc_redirect.c As discussed in [0], this ports test_tc_redirect.sh to the test_progs framework and removes the old test. This makes it more in line with rest of the tests and makes it possible to run this test case with vmtest.sh and under the bpf CI. The upcoming skb_change_head() helper fix in [0] is depending on it and extending the test case to redirect a packet from L3 device to veth. [0] https://lore.kernel.org/bpf/20210427135550.807355-1-joamaki@gmail.com Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210505085925.783985-1-joamaki@gmail.com --- tools/testing/selftests/bpf/network_helpers.c | 2 +- tools/testing/selftests/bpf/network_helpers.h | 1 + .../selftests/bpf/prog_tests/tc_redirect.c | 589 ++++++++++++++++++ .../selftests/bpf/progs/test_tc_neigh.c | 33 +- .../selftests/bpf/progs/test_tc_neigh_fib.c | 9 +- .../selftests/bpf/progs/test_tc_peer.c | 33 +- .../testing/selftests/bpf/test_tc_redirect.sh | 216 ------- 7 files changed, 617 insertions(+), 266 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/tc_redirect.c delete mode 100755 tools/testing/selftests/bpf/test_tc_redirect.sh diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 12ee40284da0..2060bc122c53 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -40,7 +40,7 @@ struct ipv6_packet pkt_v6 = { .tcp.doff = 5, }; -static int settimeo(int fd, int timeout_ms) +int settimeo(int fd, int timeout_ms) { struct timeval timeout = { .tv_sec = 3 }; diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 7205f8afdba1..5e0d51c07b63 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -33,6 +33,7 @@ struct ipv6_packet { } __packed; extern struct ipv6_packet pkt_v6; +int settimeo(int fd, int timeout_ms); int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); int connect_to_fd(int server_fd, int timeout_ms); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c new file mode 100644 index 000000000000..95ef9fcd31d8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -0,0 +1,589 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link + * between src and dst. The netns fwd has veth links to each src and dst. The + * client is in src and server in dst. The test installs a TC BPF program to each + * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the + * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace + * switch from ingress side; it also installs a checker prog on the egress side + * to drop unexpected traffic. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_progs.h" +#include "network_helpers.h" +#include "test_tc_neigh_fib.skel.h" +#include "test_tc_neigh.skel.h" +#include "test_tc_peer.skel.h" + +#define NS_SRC "ns_src" +#define NS_FWD "ns_fwd" +#define NS_DST "ns_dst" + +#define IP4_SRC "172.16.1.100" +#define IP4_DST "172.16.2.100" +#define IP4_PORT 9004 + +#define IP6_SRC "::1:dead:beef:cafe" +#define IP6_DST "::2:dead:beef:cafe" +#define IP6_PORT 9006 + +#define IP4_SLL "169.254.0.1" +#define IP4_DLL "169.254.0.2" +#define IP4_NET "169.254.0.0" + +#define IFADDR_STR_LEN 18 +#define PING_ARGS "-c 3 -w 10 -q" + +#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src" +#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst" +#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk" + +#define TIMEOUT_MILLIS 10000 + +#define MAX_PROC_MODS 128 +#define MAX_PROC_VALUE_LEN 16 + +#define log_err(MSG, ...) \ + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__) + +struct proc_mod { + char path[PATH_MAX]; + char oldval[MAX_PROC_VALUE_LEN]; + int oldlen; +}; + +static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL}; +static int root_netns_fd = -1; +static int num_proc_mods; +static struct proc_mod proc_mods[MAX_PROC_MODS]; + +/** + * modify_proc() - Modify entry in /proc + * + * Modifies an entry in /proc and saves the original value for later + * restoration with restore_proc(). + */ +static int modify_proc(const char *path, const char *newval) +{ + struct proc_mod *mod; + FILE *f; + + if (num_proc_mods + 1 > MAX_PROC_MODS) + return -1; + + f = fopen(path, "r+"); + if (!f) + return -1; + + mod = &proc_mods[num_proc_mods]; + num_proc_mods++; + + strncpy(mod->path, path, PATH_MAX); + + if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) { + log_err("reading from %s failed", path); + goto fail; + } + rewind(f); + if (fwrite(newval, strlen(newval), 1, f) != 1) { + log_err("writing to %s failed", path); + goto fail; + } + + fclose(f); + return 0; + +fail: + fclose(f); + num_proc_mods--; + return -1; +} + +/** + * restore_proc() - Restore all /proc modifications + */ +static void restore_proc(void) +{ + int i; + + for (i = 0; i < num_proc_mods; i++) { + struct proc_mod *mod = &proc_mods[i]; + FILE *f; + + f = fopen(mod->path, "w"); + if (!f) { + log_err("fopen of %s failed", mod->path); + continue; + } + + if (fwrite(mod->oldval, mod->oldlen, 1, f) != 1) + log_err("fwrite to %s failed", mod->path); + + fclose(f); + } + num_proc_mods = 0; +} + +/** + * setns_by_name() - Set networks namespace by name + */ +static int setns_by_name(const char *name) +{ + int nsfd; + char nspath[PATH_MAX]; + int err; + + snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); + nsfd = open(nspath, O_RDONLY | O_CLOEXEC); + if (nsfd < 0) + return nsfd; + + err = setns(nsfd, CLONE_NEWNET); + close(nsfd); + + return err; +} + +/** + * setns_root() - Set network namespace to original (root) namespace + * + * Not expected to ever fail, so error not returned, but failure logged + * and test marked as failed. + */ +static void setns_root(void) +{ + ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "setns root"); +} + +static int netns_setup_namespaces(const char *verb) +{ + const char * const *ns = namespaces; + char cmd[128]; + + while (*ns) { + snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns); + if (!ASSERT_OK(system(cmd), cmd)) + return -1; + ns++; + } + return 0; +} + +struct netns_setup_result { + int ifindex_veth_src_fwd; + int ifindex_veth_dst_fwd; +}; + +static int get_ifaddr(const char *name, char *ifaddr) +{ + char path[PATH_MAX]; + FILE *f; + int ret; + + snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name); + f = fopen(path, "r"); + if (!ASSERT_OK_PTR(f, path)) + return -1; + + ret = fread(ifaddr, 1, IFADDR_STR_LEN, f); + if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) { + fclose(f); + return -1; + } + fclose(f); + return 0; +} + +static int get_ifindex(const char *name) +{ + char path[PATH_MAX]; + char buf[32]; + FILE *f; + int ret; + + snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name); + f = fopen(path, "r"); + if (!ASSERT_OK_PTR(f, path)) + return -1; + + ret = fread(buf, 1, sizeof(buf), f); + if (!ASSERT_GT(ret, 0, "fread ifindex")) { + fclose(f); + return -1; + } + fclose(f); + return atoi(buf); +} + +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto fail; \ + }) + +static int netns_setup_links_and_routes(struct netns_setup_result *result) +{ + char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; + char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {}; + + SYS("ip link add veth_src type veth peer name veth_src_fwd"); + SYS("ip link add veth_dst type veth peer name veth_dst_fwd"); + if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) + goto fail; + if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr)) + goto fail; + + result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd"); + if (result->ifindex_veth_src_fwd < 0) + goto fail; + result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd"); + if (result->ifindex_veth_dst_fwd < 0) + goto fail; + + SYS("ip link set veth_src netns " NS_SRC); + SYS("ip link set veth_src_fwd netns " NS_FWD); + SYS("ip link set veth_dst_fwd netns " NS_FWD); + SYS("ip link set veth_dst netns " NS_DST); + + /** setup in 'src' namespace */ + if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src")) + goto fail; + + SYS("ip addr add " IP4_SRC "/32 dev veth_src"); + SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad"); + SYS("ip link set dev veth_src up"); + + SYS("ip route add " IP4_DST "/32 dev veth_src scope global"); + SYS("ip route add " IP4_NET "/16 dev veth_src scope global"); + SYS("ip route add " IP6_DST "/128 dev veth_src scope global"); + + SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s", + veth_src_fwd_addr); + SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s", + veth_src_fwd_addr); + + /** setup in 'fwd' namespace */ + if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) + goto fail; + + /* The fwd netns automatically gets a v6 LL address / routes, but also + * needs v4 one in order to start ARP probing. IP4_NET route is added + * to the endpoints so that the ARP processing will reply. + */ + SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd"); + SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); + SYS("ip link set dev veth_src_fwd up"); + SYS("ip link set dev veth_dst_fwd up"); + + SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); + SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); + SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); + SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); + + /** setup in 'dst' namespace */ + if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst")) + goto fail; + + SYS("ip addr add " IP4_DST "/32 dev veth_dst"); + SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad"); + SYS("ip link set dev veth_dst up"); + + SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global"); + SYS("ip route add " IP4_NET "/16 dev veth_dst scope global"); + SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global"); + + SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s", + veth_dst_fwd_addr); + SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s", + veth_dst_fwd_addr); + + setns_root(); + return 0; +fail: + setns_root(); + return -1; +} + +static int netns_load_bpf(void) +{ + if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) + return -1; + + SYS("tc qdisc add dev veth_src_fwd clsact"); + SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned " + SRC_PROG_PIN_FILE); + SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned " + CHK_PROG_PIN_FILE); + + SYS("tc qdisc add dev veth_dst_fwd clsact"); + SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " + DST_PROG_PIN_FILE); + SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " + CHK_PROG_PIN_FILE); + + setns_root(); + return -1; +fail: + setns_root(); + return -1; +} + +static int netns_unload_bpf(void) +{ + if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) + goto fail; + SYS("tc qdisc delete dev veth_src_fwd clsact"); + SYS("tc qdisc delete dev veth_dst_fwd clsact"); + + setns_root(); + return 0; +fail: + setns_root(); + return -1; +} + + +static void test_tcp(int family, const char *addr, __u16 port) +{ + int listen_fd = -1, accept_fd = -1, client_fd = -1; + char buf[] = "testing testing"; + int n; + + if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst")) + return; + + listen_fd = start_server(family, SOCK_STREAM, addr, port, 0); + if (!ASSERT_GE(listen_fd, 0, "listen")) + goto done; + + if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src")) + goto done; + + client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto done; + + accept_fd = accept(listen_fd, NULL, NULL); + if (!ASSERT_GE(accept_fd, 0, "accept")) + goto done; + + if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo")) + goto done; + + n = write(client_fd, buf, sizeof(buf)); + if (!ASSERT_EQ(n, sizeof(buf), "send to server")) + goto done; + + n = read(accept_fd, buf, sizeof(buf)); + ASSERT_EQ(n, sizeof(buf), "recv from server"); + +done: + setns_root(); + if (listen_fd >= 0) + close(listen_fd); + if (accept_fd >= 0) + close(accept_fd); + if (client_fd >= 0) + close(client_fd); +} + +static int test_ping(int family, const char *addr) +{ + const char *ping = family == AF_INET6 ? "ping6" : "ping"; + + SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s", ping, addr); + return 0; +fail: + return -1; +} + +static void test_connectivity(void) +{ + test_tcp(AF_INET, IP4_DST, IP4_PORT); + test_ping(AF_INET, IP4_DST); + test_tcp(AF_INET6, IP6_DST, IP6_PORT); + test_ping(AF_INET6, IP6_DST); +} + +static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) +{ + struct test_tc_neigh_fib *skel; + int err; + + skel = test_tc_neigh_fib__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open")) + return; + + if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) { + test_tc_neigh_fib__destroy(skel); + return; + } + + err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto done; + + if (netns_load_bpf()) + goto done; + + /* bpf_fib_lookup() checks if forwarding is enabled */ + if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) + goto done; + + err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1"); + if (!ASSERT_OK(err, "set ipv4.ip_forward")) + goto done; + + err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1"); + if (!ASSERT_OK(err, "set ipv6.forwarding")) + goto done; + setns_root(); + + test_connectivity(); +done: + bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + test_tc_neigh_fib__destroy(skel); + netns_unload_bpf(); + setns_root(); + restore_proc(); +} + +static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) +{ + struct test_tc_neigh *skel; + int err; + + skel = test_tc_neigh__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) + return; + + skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + + err = test_tc_neigh__load(skel); + if (!ASSERT_OK(err, "test_tc_neigh__load")) { + test_tc_neigh__destroy(skel); + return; + } + + err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto done; + + if (netns_load_bpf()) + goto done; + + test_connectivity(); + +done: + bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + test_tc_neigh__destroy(skel); + netns_unload_bpf(); + setns_root(); +} + +static void test_tc_redirect_peer(struct netns_setup_result *setup_result) +{ + struct test_tc_peer *skel; + int err; + + skel = test_tc_peer__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) + return; + + skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + + err = test_tc_peer__load(skel); + if (!ASSERT_OK(err, "test_tc_peer__load")) { + test_tc_peer__destroy(skel); + return; + } + + err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto done; + + if (netns_load_bpf()) + goto done; + + test_connectivity(); + +done: + bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + test_tc_peer__destroy(skel); + netns_unload_bpf(); + setns_root(); +} + +void test_tc_redirect(void) +{ + struct netns_setup_result setup_result; + + root_netns_fd = open("/proc/self/ns/net", O_RDONLY); + if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net")) + return; + + if (netns_setup_namespaces("add")) + goto done; + + if (netns_setup_links_and_routes(&setup_result)) + goto done; + + if (test__start_subtest("tc_redirect_peer")) + test_tc_redirect_peer(&setup_result); + + if (test__start_subtest("tc_redirect_neigh")) + test_tc_redirect_neigh(&setup_result); + + if (test__start_subtest("tc_redirect_neigh_fib")) + test_tc_redirect_neigh_fib(&setup_result); + +done: + close(root_netns_fd); + netns_setup_namespaces("delete"); +} diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index b985ac4e7a81..90f64a85998f 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -33,17 +33,8 @@ a.s6_addr32[3] == b.s6_addr32[3]) #endif -enum { - dev_src, - dev_dst, -}; - -struct bpf_map_def SEC("maps") ifindex_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 2, -}; +static volatile const __u32 IFINDEX_SRC; +static volatile const __u32 IFINDEX_DST; static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb, __be32 addr) @@ -79,14 +70,8 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb, return v6_equal(ip6h->daddr, addr); } -static __always_inline int get_dev_ifindex(int which) -{ - int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); - - return ifindex ? *ifindex : 0; -} - -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +SEC("classifier/chk_egress") +int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); @@ -98,7 +83,8 @@ SEC("chk_egress") int tc_chk(struct __sk_buff *skb) return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK; } -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +SEC("classifier/dst_ingress") +int tc_dst(struct __sk_buff *skb) { __u8 zero[ETH_ALEN * 2]; bool redirect = false; @@ -119,10 +105,11 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0); + return bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0); } -SEC("src_ingress") int tc_src(struct __sk_buff *skb) +SEC("classifier/src_ingress") +int tc_src(struct __sk_buff *skb) { __u8 zero[ETH_ALEN * 2]; bool redirect = false; @@ -143,7 +130,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb) if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0); + return bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c index d82ed3457030..f7ab69cf018e 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c @@ -75,7 +75,8 @@ static __always_inline int fill_fib_params_v6(struct __sk_buff *skb, return 0; } -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +SEC("classifier/chk_egress") +int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); @@ -142,12 +143,14 @@ static __always_inline int tc_redir(struct __sk_buff *skb) /* these are identical, but keep them separate for compatibility with the * section names expected by test_tc_redirect.sh */ -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +SEC("classifier/dst_ingress") +int tc_dst(struct __sk_buff *skb) { return tc_redir(skb); } -SEC("src_ingress") int tc_src(struct __sk_buff *skb) +SEC("classifier/src_ingress") +int tc_src(struct __sk_buff *skb) { return tc_redir(skb); } diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c index fc84a7685aa2..72c72950c3bb 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_peer.c +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -8,38 +8,25 @@ #include -enum { - dev_src, - dev_dst, -}; +static volatile const __u32 IFINDEX_SRC; +static volatile const __u32 IFINDEX_DST; -struct bpf_map_def SEC("maps") ifindex_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 2, -}; - -static __always_inline int get_dev_ifindex(int which) -{ - int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); - - return ifindex ? *ifindex : 0; -} - -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +SEC("classifier/chk_egress") +int tc_chk(struct __sk_buff *skb) { return TC_ACT_SHOT; } -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +SEC("classifier/dst_ingress") +int tc_dst(struct __sk_buff *skb) { - return bpf_redirect_peer(get_dev_ifindex(dev_src), 0); + return bpf_redirect_peer(IFINDEX_SRC, 0); } -SEC("src_ingress") int tc_src(struct __sk_buff *skb) +SEC("classifier/src_ingress") +int tc_src(struct __sk_buff *skb) { - return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0); + return bpf_redirect_peer(IFINDEX_DST, 0); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh deleted file mode 100755 index 8868aa1ca902..000000000000 --- a/tools/testing/selftests/bpf/test_tc_redirect.sh +++ /dev/null @@ -1,216 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link -# between src and dst. The netns fwd has veth links to each src and dst. The -# client is in src and server in dst. The test installs a TC BPF program to each -# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the -# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace -# switch from ingress side; it also installs a checker prog on the egress side -# to drop unexpected traffic. - -if [[ $EUID -ne 0 ]]; then - echo "This script must be run as root" - echo "FAIL" - exit 1 -fi - -# check that needed tools are present -command -v nc >/dev/null 2>&1 || \ - { echo >&2 "nc is not available"; exit 1; } -command -v dd >/dev/null 2>&1 || \ - { echo >&2 "dd is not available"; exit 1; } -command -v timeout >/dev/null 2>&1 || \ - { echo >&2 "timeout is not available"; exit 1; } -command -v ping >/dev/null 2>&1 || \ - { echo >&2 "ping is not available"; exit 1; } -if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi -command -v perl >/dev/null 2>&1 || \ - { echo >&2 "perl is not available"; exit 1; } -command -v jq >/dev/null 2>&1 || \ - { echo >&2 "jq is not available"; exit 1; } -command -v bpftool >/dev/null 2>&1 || \ - { echo >&2 "bpftool is not available"; exit 1; } - -readonly GREEN='\033[0;92m' -readonly RED='\033[0;31m' -readonly NC='\033[0m' # No Color - -readonly PING_ARG="-c 3 -w 10 -q" - -readonly TIMEOUT=10 - -readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" -readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)" -readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" - -readonly IP4_SRC="172.16.1.100" -readonly IP4_DST="172.16.2.100" - -readonly IP6_SRC="::1:dead:beef:cafe" -readonly IP6_DST="::2:dead:beef:cafe" - -readonly IP4_SLL="169.254.0.1" -readonly IP4_DLL="169.254.0.2" -readonly IP4_NET="169.254.0.0" - -netns_cleanup() -{ - ip netns del ${NS_SRC} - ip netns del ${NS_FWD} - ip netns del ${NS_DST} -} - -netns_setup() -{ - ip netns add "${NS_SRC}" - ip netns add "${NS_FWD}" - ip netns add "${NS_DST}" - - ip link add veth_src type veth peer name veth_src_fwd - ip link add veth_dst type veth peer name veth_dst_fwd - - ip link set veth_src netns ${NS_SRC} - ip link set veth_src_fwd netns ${NS_FWD} - - ip link set veth_dst netns ${NS_DST} - ip link set veth_dst_fwd netns ${NS_FWD} - - ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src - ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst - - # The fwd netns automatically get a v6 LL address / routes, but also - # needs v4 one in order to start ARP probing. IP4_NET route is added - # to the endpoints so that the ARP processing will reply. - - ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd - ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd - - ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad - ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad - - ip -netns ${NS_SRC} link set dev veth_src up - ip -netns ${NS_FWD} link set dev veth_src_fwd up - - ip -netns ${NS_DST} link set dev veth_dst up - ip -netns ${NS_FWD} link set dev veth_dst_fwd up - - ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global - ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global - ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global - - ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global - ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global - - ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global - ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global - ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global - - ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global - ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global - - fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address) - fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address) - - ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src - ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst - - ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src - ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst -} - -netns_test_connectivity() -{ - set +e - - ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &" - ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &" - - TEST="TCPv4 connectivity test" - ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004" - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="TCPv6 connectivity test" - ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006" - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="ICMPv4 connectivity test" - ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST} - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="ICMPv6 connectivity test" - ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST} - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - set -e -} - -hex_mem_str() -{ - perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1 -} - -netns_setup_bpf() -{ - local obj=$1 - local use_forwarding=${2:-0} - - ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact - ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress - ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress - - ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact - ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress - ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress - - if [ "$use_forwarding" -eq "1" ]; then - # bpf_fib_lookup() checks if forwarding is enabled - ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1 - ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1 - ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1 - return 0 - fi - - veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex) - veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex) - - progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]') - for prog in $progs; do - map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]') - if [ ! -z "$map" ]; then - bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src) - bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst) - fi - done -} - -trap netns_cleanup EXIT -set -e - -netns_setup -netns_setup_bpf test_tc_neigh.o -netns_test_connectivity -netns_cleanup -netns_setup -netns_setup_bpf test_tc_neigh_fib.o 1 -netns_test_connectivity -netns_cleanup -netns_setup -netns_setup_bpf test_tc_peer.o -netns_test_connectivity From 569c484f9995f489f2b80dd134269fe07d2b900d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 7 May 2021 17:50:11 -0700 Subject: [PATCH 112/730] bpf: Limit static tcp-cc functions in the .BTF_ids list to x86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the discussion in [0]. It was pointed out that static functions in ppc64 is prefixed with ".". For example, the 'readelf -s vmlinux.ppc': 89326: c000000001383280 24 NOTYPE LOCAL DEFAULT 31 cubictcp_init 89327: c000000000c97c50 168 FUNC LOCAL DEFAULT 2 .cubictcp_init The one with FUNC type is ".cubictcp_init" instead of "cubictcp_init". The "." seems to be done by arch/powerpc/include/asm/ppc_asm.h. This caused that pahole cannot generate the BTF for these tcp-cc kernel functions because pahole only captures the FUNC type and "cubictcp_init" is not. It then failed the kernel compilation in ppc64. This behavior is only reported in ppc64 so far. I tried arm64, s390, and sparc64 and did not observe this "." prefix and NOTYPE behavior. Since the kfunc call is only supported in the x86_64 and x86_32 JIT, this patch limits those tcp-cc functions to x86 only to avoid unnecessary compilation issue in other ARCHs. In the future, we can examine if it is better to change all those functions from static to extern. [0] https://lore.kernel.org/bpf/4e051459-8532-7b61-c815-f3435767f8a0@kernel.org/ Fixes: e78aea8b2170 ("bpf: tcp: Put some tcp cong functions in allowlist for bpf-tcp-cc") Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Cc: Michal Suchánek Cc: Jiri Slaby Cc: Jiri Olsa Link: https://lore.kernel.org/bpf/20210508005011.3863757-1-kafai@fb.com --- net/ipv4/bpf_tcp_ca.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index dff4f0eb96b0..9e41eff4a685 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -185,6 +185,7 @@ BTF_ID(func, tcp_reno_cong_avoid) BTF_ID(func, tcp_reno_undo_cwnd) BTF_ID(func, tcp_slow_start) BTF_ID(func, tcp_cong_avoid_ai) +#ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE #if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC) BTF_ID(func, cubictcp_init) @@ -213,6 +214,7 @@ BTF_ID(func, bbr_min_tso_segs) BTF_ID(func, bbr_set_state) #endif #endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_X86 */ BTF_SET_END(bpf_tcp_ca_kfunc_ids) static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id) From 576f9eacc680d2b1f37e8010cff62f7b227ea769 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 10 May 2021 14:55:09 +0800 Subject: [PATCH 113/730] net: stmmac: Fix MAC WoL not working if PHY does not support WoL Both get and set WoL will check device_can_wakeup(), if MAC supports PMT, it will set device wakeup capability. After commit 1d8e5b0f3f2c ("net: stmmac: Support WOL with phy"), device wakeup capability will be overwrite in stmmac_init_phy() according to phy's Wol feature. If phy doesn't support WoL, then MAC will lose wakeup capability. To fix this issue, only overwrite device wakeup capability when MAC doesn't support PMT. For STMMAC now driver checks MAC's WoL capability if MAC supports PMT, if not support, driver will check PHY's WoL capability. Fixes: 1d8e5b0f3f2c ("net: stmmac: Support WOL with phy") Reviewed-by: Jisheng Zhang Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 345b4c6d1fd4..fea3bf07ae89 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1196,7 +1196,6 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) */ static int stmmac_init_phy(struct net_device *dev) { - struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; struct stmmac_priv *priv = netdev_priv(dev); struct device_node *node; int ret; @@ -1222,8 +1221,12 @@ static int stmmac_init_phy(struct net_device *dev) ret = phylink_connect_phy(priv->phylink, phydev); } - phylink_ethtool_get_wol(priv->phylink, &wol); - device_set_wakeup_capable(priv->device, !!wol.supported); + if (!priv->plat->pmt) { + struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; + + phylink_ethtool_get_wol(priv->phylink, &wol); + device_set_wakeup_capable(priv->device, !!wol.supported); + } return ret; } From 29249eac5225429b898f278230a6ca2baa1ae154 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 11 May 2021 19:13:51 +0200 Subject: [PATCH 114/730] mptcp: fix data stream corruption Maxim reported several issues when forcing a TCP transparent proxy to use the MPTCP protocol for the inbound connections. He also provided a clean reproducer. The problem boils down to 'mptcp_frag_can_collapse_to()' assuming that only MPTCP will use the given page_frag. If others - e.g. the plain TCP protocol - allocate page fragments, we can end-up re-using already allocated memory for mptcp_data_frag. Fix the issue ensuring that the to-be-expanded data fragment is located at the current page frag end. v1 -> v2: - added missing fixes tag (Mat) Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/178 Reported-and-tested-by: Maxim Galaganov Fixes: 18b683bff89d ("mptcp: queue data for mptcp level retransmission") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 29a2d690d8d5..2d21a4793d9d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -879,12 +879,18 @@ static bool mptcp_skb_can_collapse_to(u64 write_seq, !mpext->frozen; } +/* we can append data to the given data frag if: + * - there is space available in the backing page_frag + * - the data frag tail matches the current page_frag free offset + * - the data frag end sequence number matches the current write seq + */ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, const struct page_frag *pfrag, const struct mptcp_data_frag *df) { return df && pfrag->page == df->page && pfrag->size - pfrag->offset > 0 && + pfrag->offset == (df->offset + df->data_len) && df->data_seq + df->data_len == msk->write_seq; } From bcbda3fc616272686208f9c4d5f6dccb65360bd8 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 11 May 2021 11:11:32 -0700 Subject: [PATCH 115/730] ionic: fix ptp support config breakage When IONIC=y and PTP_1588_CLOCK=m were set in the .config file the driver link failed with undefined references. We add the dependancy depends on PTP_1588_CLOCK || !PTP_1588_CLOCK to clear this up. If PTP_1588_CLOCK=m, the depends limits IONIC to =m (or disabled). If PTP_1588_CLOCK is disabled, IONIC can be any of y/m/n. Fixes: 61db421da31b ("ionic: link in the new hw timestamp code") Reported-by: kernel test robot Cc: Jakub Kicinski Cc: Randy Dunlap Cc: Allen Hubbe Signed-off-by: Shannon Nelson Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig index 5f8b0bb3af6e..202973a82712 100644 --- a/drivers/net/ethernet/pensando/Kconfig +++ b/drivers/net/ethernet/pensando/Kconfig @@ -20,6 +20,7 @@ if NET_VENDOR_PENSANDO config IONIC tristate "Pensando Ethernet IONIC Support" depends on 64BIT && PCI + depends on PTP_1588_CLOCK || !PTP_1588_CLOCK select NET_DEVLINK select DIMLIB help From 440c3247cba3d9433ac435d371dd7927d68772a7 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 11 May 2021 14:42:04 -0500 Subject: [PATCH 116/730] net: ipa: memory region array is variable size IPA configuration data includes an array of memory region descriptors. That was a fixed-size array at one time, but at some point we started defining it such that it was only as big as required for a given platform. The actual number of entries in the array is recorded in the configuration data along with the array. A loop in ipa_mem_config() still assumes the array has entries for all defined memory region IDs. As a result, this loop can go past the end of the actual array and attempt to write "canary" values based on nonsensical data. Fix this, by stashing the number of entries in the array, and using that rather than IPA_MEM_COUNT in the initialization loop found in ipa_mem_config(). The only remaining use of IPA_MEM_COUNT is in a validation check to ensure configuration data doesn't have too many entries. That's fine for now. Fixes: 3128aae8c439a ("net: ipa: redefine struct ipa_mem_data") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa.h | 2 ++ drivers/net/ipa/ipa_mem.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h index e7ff376cb5b7..744406832a77 100644 --- a/drivers/net/ipa/ipa.h +++ b/drivers/net/ipa/ipa.h @@ -58,6 +58,7 @@ enum ipa_flag { * @mem_virt: Virtual address of IPA-local memory space * @mem_offset: Offset from @mem_virt used for access to IPA memory * @mem_size: Total size (bytes) of memory at @mem_virt + * @mem_count: Number of entries in the mem array * @mem: Array of IPA-local memory region descriptors * @imem_iova: I/O virtual address of IPA region in IMEM * @imem_size: Size of IMEM region @@ -103,6 +104,7 @@ struct ipa { void *mem_virt; u32 mem_offset; u32 mem_size; + u32 mem_count; const struct ipa_mem *mem; unsigned long imem_iova; diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index c5c3b1b7e67d..1624125e7459 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -180,7 +180,7 @@ int ipa_mem_config(struct ipa *ipa) * for the region, write "canary" values in the space prior to * the region's base address. */ - for (mem_id = 0; mem_id < IPA_MEM_COUNT; mem_id++) { + for (mem_id = 0; mem_id < ipa->mem_count; mem_id++) { const struct ipa_mem *mem = &ipa->mem[mem_id]; u16 canary_count; __le32 *canary; @@ -487,6 +487,7 @@ int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data) ipa->mem_size = resource_size(res); /* The ipa->mem[] array is indexed by enum ipa_mem_id values */ + ipa->mem_count = mem_data->local_count; ipa->mem = mem_data->local; ret = ipa_imem_init(ipa, mem_data->imem_addr, mem_data->imem_size); From 2b17c400aeb44daf041627722581ade527bb3c1d Mon Sep 17 00:00:00 2001 From: Norbert Slusarek Date: Wed, 12 May 2021 00:43:54 +0200 Subject: [PATCH 117/730] can: isotp: prevent race between isotp_bind() and isotp_setsockopt() A race condition was found in isotp_setsockopt() which allows to change socket options after the socket was bound. For the specific case of SF_BROADCAST support, this might lead to possible use-after-free because can_rx_unregister() is not called. Checking for the flag under the socket lock in isotp_bind() and taking the lock in isotp_setsockopt() fixes the issue. Fixes: 921ca574cd38 ("can: isotp: add SF_BROADCAST support for functional addressing") Link: https://lore.kernel.org/r/trinity-e6ae9efa-9afb-4326-84c0-f3609b9b8168-1620773528307@3c-app-gmx-bs06 Reported-by: Norbert Slusarek Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Norbert Slusarek Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 51 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index 9f94ad3caee9..253b24417c8e 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1062,19 +1062,6 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (len < ISOTP_MIN_NAMELEN) return -EINVAL; - /* do not register frame reception for functional addressing */ - if (so->opt.flags & CAN_ISOTP_SF_BROADCAST) - do_rx_reg = 0; - - /* do not validate rx address for functional addressing */ - if (do_rx_reg) { - if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) - return -EADDRNOTAVAIL; - - if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) - return -EADDRNOTAVAIL; - } - if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) return -EADDRNOTAVAIL; @@ -1083,6 +1070,23 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) lock_sock(sk); + /* do not register frame reception for functional addressing */ + if (so->opt.flags & CAN_ISOTP_SF_BROADCAST) + do_rx_reg = 0; + + /* do not validate rx address for functional addressing */ + if (do_rx_reg) { + if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) { + err = -EADDRNOTAVAIL; + goto out; + } + + if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) { + err = -EADDRNOTAVAIL; + goto out; + } + } + if (so->bound && addr->can_ifindex == so->ifindex && addr->can_addr.tp.rx_id == so->rxid && addr->can_addr.tp.tx_id == so->txid) @@ -1164,16 +1168,13 @@ static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer) return ISOTP_MIN_NAMELEN; } -static int isotp_setsockopt(struct socket *sock, int level, int optname, +static int isotp_setsockopt_locked(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct isotp_sock *so = isotp_sk(sk); int ret = 0; - if (level != SOL_CAN_ISOTP) - return -EINVAL; - if (so->bound) return -EISCONN; @@ -1248,6 +1249,22 @@ static int isotp_setsockopt(struct socket *sock, int level, int optname, return ret; } +static int isotp_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) + +{ + struct sock *sk = sock->sk; + int ret; + + if (level != SOL_CAN_ISOTP) + return -EINVAL; + + lock_sock(sk); + ret = isotp_setsockopt_locked(sock, level, optname, optval, optlen); + release_sock(sk); + return ret; +} + static int isotp_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { From c6c82e0cd8125d30f2f1b29205c7e1a2f1a6785b Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Tue, 11 May 2021 21:56:29 +0200 Subject: [PATCH 118/730] vfio-ccw: Check initialized flag in cp_init() We have a really nice flag in the channel_program struct that indicates if it had been initialized by cp_init(), and use it as a guard in the other cp accessor routines, but not for a duplicate call into cp_init(). The possibility of this occurring is low, because that flow is protected by the private->io_mutex and FSM CP_PROCESSING state. But then why bother checking it in (for example) cp_prefetch() then? Let's just be consistent and check for that in cp_init() too. Fixes: 71189f263f8a3 ("vfio-ccw: make it safe to access channel programs") Signed-off-by: Eric Farman Reviewed-by: Cornelia Huck Acked-by: Matthew Rosato Message-Id: <20210511195631.3995081-2-farman@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index b9febc581b1f..8d1b2771c1aa 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -638,6 +638,10 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb) static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1); int ret; + /* this is an error in the caller */ + if (cp->initialized) + return -EBUSY; + /* * We only support prefetching the channel program. We assume all channel * programs executed by supported guests likewise support prefetching. From 6c02ac4c9211edabe17bda437ac97e578756f31b Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Tue, 11 May 2021 21:56:30 +0200 Subject: [PATCH 119/730] vfio-ccw: Reset FSM state to IDLE inside FSM When an I/O request is made, the fsm_io_request() routine moves the FSM state from IDLE to CP_PROCESSING, and then fsm_io_helper() moves it to CP_PENDING if the START SUBCHANNEL received a cc0. Yet, the error case to go from CP_PROCESSING back to IDLE is done after the FSM call returns. Let's move this up into the FSM proper, to provide some better symmetry when unwinding in this case. Signed-off-by: Eric Farman Reviewed-by: Cornelia Huck Acked-by: Matthew Rosato Message-Id: <20210511195631.3995081-3-farman@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_fsm.c | 1 + drivers/s390/cio/vfio_ccw_ops.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 23e61aa638e4..e435a9cd92da 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -318,6 +318,7 @@ static void fsm_io_request(struct vfio_ccw_private *private, } err_out: + private->state = VFIO_CCW_STATE_IDLE; trace_vfio_ccw_fsm_io_request(scsw->cmd.fctl, schid, io_region->ret_code, errstr); } diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 491a64c61fff..c57d2a7f0919 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -279,8 +279,6 @@ static ssize_t vfio_ccw_mdev_write_io_region(struct vfio_ccw_private *private, } vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ); - if (region->ret_code != 0) - private->state = VFIO_CCW_STATE_IDLE; ret = (region->ret_code != 0) ? region->ret_code : count; out_unlock: From 2af7a834a435460d546f0cf0a8b8e4d259f1d910 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Tue, 11 May 2021 21:56:31 +0200 Subject: [PATCH 120/730] vfio-ccw: Serialize FSM IDLE state with I/O completion Today, the stacked call to vfio_ccw_sch_io_todo() does three things: 1) Update a solicited IRB with CP information, and release the CP if the interrupt was the end of a START operation. 2) Copy the IRB data into the io_region, under the protection of the io_mutex 3) Reset the vfio-ccw FSM state to IDLE to acknowledge that vfio-ccw can accept more work. The trouble is that step 3 is (A) invoked for both solicited and unsolicited interrupts, and (B) sitting after the mutex for step 2. This second piece becomes a problem if it processes an interrupt for a CLEAR SUBCHANNEL while another thread initiates a START, thus allowing the CP and FSM states to get out of sync. That is: CPU 1 CPU 2 fsm_do_clear() fsm_irq() fsm_io_request() vfio_ccw_sch_io_todo() fsm_io_helper() Since the FSM state and CP should be kept in sync, let's make a note when the CP is released, and rely on that as an indication that the FSM should also be reset at the end of this routine and open up the device for more work. Signed-off-by: Eric Farman Acked-by: Matthew Rosato Reviewed-by: Cornelia Huck Message-Id: <20210511195631.3995081-4-farman@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_drv.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 8c625b530035..9b61e9b131ad 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -86,6 +86,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) struct vfio_ccw_private *private; struct irb *irb; bool is_final; + bool cp_is_finished = false; private = container_of(work, struct vfio_ccw_private, io_work); irb = &private->irb; @@ -94,14 +95,21 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) (SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT)); if (scsw_is_solicited(&irb->scsw)) { cp_update_scsw(&private->cp, &irb->scsw); - if (is_final && private->state == VFIO_CCW_STATE_CP_PENDING) + if (is_final && private->state == VFIO_CCW_STATE_CP_PENDING) { cp_free(&private->cp); + cp_is_finished = true; + } } mutex_lock(&private->io_mutex); memcpy(private->io_region->irb_area, irb, sizeof(*irb)); mutex_unlock(&private->io_mutex); - if (private->mdev && is_final) + /* + * Reset to IDLE only if processing of a channel program + * has finished. Do not overwrite a possible processing + * state if the final interrupt was for HSCH or CSCH. + */ + if (private->mdev && cp_is_finished) private->state = VFIO_CCW_STATE_IDLE; if (private->io_trigger) From 1e948b1752b58c9c570989ab29ceef5b38fdccda Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Wed, 12 May 2021 11:17:47 +0800 Subject: [PATCH 121/730] gpio: cadence: Add missing MODULE_DEVICE_TABLE This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module. Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-cadence.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-cadence.c b/drivers/gpio/gpio-cadence.c index a4d3239d2594..4ab3fcd9b9ba 100644 --- a/drivers/gpio/gpio-cadence.c +++ b/drivers/gpio/gpio-cadence.c @@ -278,6 +278,7 @@ static const struct of_device_id cdns_of_ids[] = { { .compatible = "cdns,gpio-r1p02" }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, cdns_of_ids); static struct platform_driver cdns_gpio_driver = { .driver = { From a0579474effff6a139768b300d8439c2327b3848 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 22:46:30 +0300 Subject: [PATCH 122/730] gpio: xilinx: Correct kernel doc for xgpio_probe() Kernel doc validator complains: .../gpio-xilinx.c:556: warning: expecting prototype for xgpio_of_probe(). Prototype was for xgpio_probe() instead Correct as suggested by changing the name of the function in the doc.. Fixes: 749564ffd52d ("gpio/xilinx: Convert the driver to platform device interface") Signed-off-by: Andy Shevchenko Tested-by: Neeli Srinivas Reviewed-by: Michal Simek Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-xilinx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c index b411d3156e0b..136557e7dd3c 100644 --- a/drivers/gpio/gpio-xilinx.c +++ b/drivers/gpio/gpio-xilinx.c @@ -542,7 +542,7 @@ static void xgpio_irqhandler(struct irq_desc *desc) } /** - * xgpio_of_probe - Probe method for the GPIO device. + * xgpio_probe - Probe method for the GPIO device. * @pdev: pointer to the platform device * * Return: From bdbe871ef0caa660e16461a2a94579d9f9ef7ba4 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 7 May 2021 11:34:11 +0100 Subject: [PATCH 123/730] gpio: tegra186: Don't set parent IRQ affinity When hotplugging CPUs on Tegra186 and Tegra194 errors such as the following are seen ... IRQ63: set affinity failed(-22). IRQ65: set affinity failed(-22). IRQ66: set affinity failed(-22). IRQ67: set affinity failed(-22). Looking at the /proc/interrupts the above are all interrupts associated with GPIOs. The reason why these error messages occur is because there is no 'parent_data' associated with any of the GPIO interrupts and so tegra186_irq_set_affinity() simply returns -EINVAL. To understand why there is no 'parent_data' it is first necessary to understand that in addition to the GPIO interrupts being routed to the interrupt controller (GIC), the interrupts for some GPIOs are also routed to the Tegra Power Management Controller (PMC) to wake up the system from low power states. In order to configure GPIO events as wake events in the PMC, the PMC is configured as IRQ parent domain for the GPIO IRQ domain. Originally the GIC was the IRQ parent domain of the PMC and although this was working, this started causing issues once commit 64a267e9a41c ("irqchip/gic: Configure SGIs as standard interrupts") was added, because technically, the GIC is not a parent of the PMC. Commit c351ab7bf2a5 ("soc/tegra: pmc: Don't create fake interrupt hierarchy levels") fixed this by severing the IRQ domain hierarchy for the Tegra GPIOs and hence, there may be no IRQ parent domain for the GPIOs. The GPIO controllers on Tegra186 and Tegra194 have either one or six interrupt lines to the interrupt controller. For GPIO controllers with six interrupts, the mapping of the GPIO interrupt to the controller interrupt is configurable within the GPIO controller. Currently a default mapping is used, however, it could be possible to use the set affinity callback for the Tegra186 GPIO driver to do something a bit more interesting. Currently, because interrupts for all GPIOs are have the same mapping and any attempts to configure the affinity for a given GPIO can conflict with another that shares the same IRQ, for now it is simpler to just remove set affinity support and this avoids the above warnings being seen. Cc: Fixes: c4e1f7d92cd6 ("gpio: tegra186: Set affinity callback to parent") Signed-off-by: Jon Hunter Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-tegra186.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index 1bd9e44df718..05974b760796 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -444,16 +444,6 @@ static int tegra186_irq_set_wake(struct irq_data *data, unsigned int on) return 0; } -static int tegra186_irq_set_affinity(struct irq_data *data, - const struct cpumask *dest, - bool force) -{ - if (data->parent_data) - return irq_chip_set_affinity_parent(data, dest, force); - - return -EINVAL; -} - static void tegra186_gpio_irq(struct irq_desc *desc) { struct tegra_gpio *gpio = irq_desc_get_handler_data(desc); @@ -700,7 +690,6 @@ static int tegra186_gpio_probe(struct platform_device *pdev) gpio->intc.irq_unmask = tegra186_irq_unmask; gpio->intc.irq_set_type = tegra186_irq_set_type; gpio->intc.irq_set_wake = tegra186_irq_set_wake; - gpio->intc.irq_set_affinity = tegra186_irq_set_affinity; irq = &gpio->gpio.irq; irq->chip = &gpio->intc; From 47c1131633ef6210add63b8b5704497023a3462a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 12 May 2021 08:09:08 +0900 Subject: [PATCH 124/730] ASoC: soc-dai.h: Align the word of comment for SND_SOC_DAIFMT_CBC_CFC Let's use "consumer" instead of "follower". Signed-off-by: Kuninori Morimoto Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/8735usc1gr.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 1358a0ceb4d0..0bc29c4516e7 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -81,7 +81,7 @@ struct snd_compr_stream; #define SND_SOC_DAIFMT_CBP_CFP (1 << 12) /* codec clk provider & frame provider */ #define SND_SOC_DAIFMT_CBC_CFP (2 << 12) /* codec clk consumer & frame provider */ #define SND_SOC_DAIFMT_CBP_CFC (3 << 12) /* codec clk provider & frame consumer */ -#define SND_SOC_DAIFMT_CBC_CFC (4 << 12) /* codec clk consumer & frame follower */ +#define SND_SOC_DAIFMT_CBC_CFC (4 << 12) /* codec clk consumer & frame consumer */ /* previous definitions kept for backwards-compatibility, do not use in new contributions */ #define SND_SOC_DAIFMT_CBM_CFM SND_SOC_DAIFMT_CBP_CFP From e072b2671606c77538d6a4dd5dda80b508cb4816 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Wed, 12 May 2021 11:12:25 +0800 Subject: [PATCH 125/730] ASoC: sti-sas: add missing MODULE_DEVICE_TABLE This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module. Reported-by: Hulk Robot Signed-off-by: Zou Wei Link: https://lore.kernel.org/r/1620789145-14936-1-git-send-email-zou_wei@huawei.com Signed-off-by: Mark Brown --- sound/soc/codecs/sti-sas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/sti-sas.c b/sound/soc/codecs/sti-sas.c index ffdf7e559515..82a24e330065 100644 --- a/sound/soc/codecs/sti-sas.c +++ b/sound/soc/codecs/sti-sas.c @@ -408,6 +408,7 @@ static const struct of_device_id sti_sas_dev_match[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, sti_sas_dev_match); static int sti_sas_driver_probe(struct platform_device *pdev) { From 96f685974609d4c315669ef33d55dbc43996491e Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 11 May 2021 18:57:14 +0100 Subject: [PATCH 126/730] ASoC: cs53l30: Add missing regmap use_single config This device requires single register transactions, this will definely cause problems with the new device ID parsing which uses regmap_bulk_read but might also show up in the cache sync sometimes. Add the missing flags to the regmap_config. Fixes: 4fc81bc88ad9 ("ASoC: cs53l30: Minor error paths fixups") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210511175718.15416-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs53l30.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/cs53l30.c b/sound/soc/codecs/cs53l30.c index 3d67cbf9eaaa..abe0cc0bc03a 100644 --- a/sound/soc/codecs/cs53l30.c +++ b/sound/soc/codecs/cs53l30.c @@ -912,6 +912,9 @@ static struct regmap_config cs53l30_regmap = { .writeable_reg = cs53l30_writeable_register, .readable_reg = cs53l30_readable_register, .cache_type = REGCACHE_RBTREE, + + .use_single_read = true, + .use_single_write = true, }; static int cs53l30_i2c_probe(struct i2c_client *client, From 27fb585169024440c1b358da35499fa578d803cd Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 11 May 2021 18:57:15 +0100 Subject: [PATCH 127/730] ASoC: cs42l73: Add missing regmap use_single config This device requires single register transactions, this will definely cause problems with the new device ID parsing which uses regmap_bulk_read but might also show up in the cache sync sometimes. Add the missing flags to the regmap_config. Fixes: 26495252fe0d ("ASoC: cs42l73: Minor error paths fixups") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210511175718.15416-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l73.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/cs42l73.c b/sound/soc/codecs/cs42l73.c index c3f974ec78e5..e92bacaab53f 100644 --- a/sound/soc/codecs/cs42l73.c +++ b/sound/soc/codecs/cs42l73.c @@ -1268,6 +1268,9 @@ static const struct regmap_config cs42l73_regmap = { .volatile_reg = cs42l73_volatile_register, .readable_reg = cs42l73_readable_register, .cache_type = REGCACHE_RBTREE, + + .use_single_read = true, + .use_single_write = true, }; static int cs42l73_i2c_probe(struct i2c_client *i2c_client, From 2a682f821941e28fb9ceaa1dd03ccfaea0448101 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 11 May 2021 18:57:16 +0100 Subject: [PATCH 128/730] ASoC: cs35l34: Add missing regmap use_single config This device requires single register transactions, this will definely cause problems with the new device ID parsing which uses regmap_bulk_read but might also show up in the cache sync sometimes. Add the missing flags to the regmap_config. Fixes: 8cb9b001635c ("ASoC: cs35l34: Minor error paths fixups") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210511175718.15416-3-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l34.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/cs35l34.c b/sound/soc/codecs/cs35l34.c index 110ee2d06358..3d3c3c34dfe2 100644 --- a/sound/soc/codecs/cs35l34.c +++ b/sound/soc/codecs/cs35l34.c @@ -800,6 +800,9 @@ static struct regmap_config cs35l34_regmap = { .readable_reg = cs35l34_readable_register, .precious_reg = cs35l34_precious_register, .cache_type = REGCACHE_RBTREE, + + .use_single_read = true, + .use_single_write = true, }; static int cs35l34_handle_of_data(struct i2c_client *i2c_client, From b1078e9869531af4f968ba1b9edad51264943bb8 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 11 May 2021 18:57:17 +0100 Subject: [PATCH 129/730] ASoC: cs35l32: Add missing regmap use_single config This device requires single register transactions, this will definely cause problems with the new device ID parsing which uses regmap_bulk_read but might also show up in the cache sync sometimes. Add the missing flags to the regmap_config. Fixes: 283160f1419d ("ASoC: cs35l32: Minor error paths fixups") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210511175718.15416-4-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l32.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/cs35l32.c b/sound/soc/codecs/cs35l32.c index f4067230ac42..88e79b9f52ed 100644 --- a/sound/soc/codecs/cs35l32.c +++ b/sound/soc/codecs/cs35l32.c @@ -261,6 +261,9 @@ static const struct regmap_config cs35l32_regmap = { .readable_reg = cs35l32_readable_register, .precious_reg = cs35l32_precious_register, .cache_type = REGCACHE_RBTREE, + + .use_single_read = true, + .use_single_write = true, }; static int cs35l32_handle_of_data(struct i2c_client *i2c_client, From 0e49a4de4564b3659a34b0b775d43b6b635b17fa Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 11 May 2021 18:57:18 +0100 Subject: [PATCH 130/730] ASoC: cs42l52: Minor tidy up of error paths Fixup a needlessly initialised variable and an unchecked return value. Reported-by: Pierre-Louis Bossart Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210511175718.15416-5-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l56.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l56.c b/sound/soc/codecs/cs42l56.c index c44a5cdb796e..7cdffdf6b8cf 100644 --- a/sound/soc/codecs/cs42l56.c +++ b/sound/soc/codecs/cs42l56.c @@ -1175,7 +1175,7 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client, struct cs42l56_platform_data *pdata = dev_get_platdata(&i2c_client->dev); int ret, i; - unsigned int devid = 0; + unsigned int devid; unsigned int alpha_rev, metal_rev; unsigned int reg; @@ -1245,6 +1245,11 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client, } ret = regmap_read(cs42l56->regmap, CS42L56_CHIP_ID_1, ®); + if (ret) { + dev_err(&i2c_client->dev, "Failed to read chip ID: %d\n", ret); + return ret; + } + devid = reg & CS42L56_CHIP_ID_MASK; if (devid != CS42L56_DEVID) { dev_err(&i2c_client->dev, From 7907cad7d07e0055789ec0c534452f19dfe1fc80 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 12 May 2021 17:35:34 +0800 Subject: [PATCH 131/730] spi: sprd: Add missing MODULE_DEVICE_TABLE MODULE_DEVICE_TABLE is used to extract the device information out of the driver and builds a table when being compiled. If using this macro, kernel can find the driver if available when the device is plugged in, and then loads that driver and initializes the device. Signed-off-by: Chunyan Zhang Link: https://lore.kernel.org/r/20210512093534.243040-1-zhang.lyra@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-sprd.c b/drivers/spi/spi-sprd.c index b41a75749b49..28e70db9bbba 100644 --- a/drivers/spi/spi-sprd.c +++ b/drivers/spi/spi-sprd.c @@ -1068,6 +1068,7 @@ static const struct of_device_id sprd_spi_of_match[] = { { .compatible = "sprd,sc9860-spi", }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, sprd_spi_of_match); static struct platform_driver sprd_spi_driver = { .driver = { From 6b69546912a57ff8c31061f98e56383cc0beffd3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 11 May 2021 17:09:12 +0300 Subject: [PATCH 132/730] spi: Assume GPIO CS active high in ACPI case Currently GPIO CS handling, when descriptors are in use, doesn't take into consideration that in ACPI case the default polarity is Active High and can't be altered. Instead we have to use the per-chip definition provided by SPISerialBus() resource. Fixes: 766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors") Cc: Liguang Zhang Cc: Jay Fang Cc: Sven Van Asbroeck Signed-off-by: Andy Shevchenko Tested-by: Xin Hao Link: https://lore.kernel.org/r/20210511140912.30757-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index a565e7d6bf3b..98048af04abf 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -820,15 +820,29 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force) if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio)) { if (!(spi->mode & SPI_NO_CS)) { - if (spi->cs_gpiod) - /* polarity handled by gpiolib */ - gpiod_set_value_cansleep(spi->cs_gpiod, activate); - else + if (spi->cs_gpiod) { + /* + * Historically ACPI has no means of the GPIO polarity and + * thus the SPISerialBus() resource defines it on the per-chip + * basis. In order to avoid a chain of negations, the GPIO + * polarity is considered being Active High. Even for the cases + * when _DSD() is involved (in the updated versions of ACPI) + * the GPIO CS polarity must be defined Active High to avoid + * ambiguity. That's why we use enable, that takes SPI_CS_HIGH + * into account. + */ + if (has_acpi_companion(&spi->dev)) + gpiod_set_value_cansleep(spi->cs_gpiod, !enable); + else + /* Polarity handled by GPIO library */ + gpiod_set_value_cansleep(spi->cs_gpiod, activate); + } else { /* * invert the enable line, as active low is * default for SPI. */ gpio_set_value_cansleep(spi->cs_gpio, !enable); + } } /* Some SPI masters need both GPIO CS & slave_select */ if ((spi->controller->flags & SPI_MASTER_GPIO_SS) && From 2ca4dcc4909d787ee153272f7efc2bff3b498720 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 11 May 2021 16:30:15 +0200 Subject: [PATCH 133/730] fs/mount_setattr: tighten permission checks We currently don't have any filesystems that support idmapped mounts which are mountable inside a user namespace. That was a deliberate decision for now as a userns root can just mount the filesystem themselves. So enforce this restriction explicitly until there's a real use-case for this. This way we can notice it and will have a chance to adapt and audit our translation helpers and fstests appropriately if we need to support such filesystems. Cc: Christoph Hellwig Cc: Al Viro Cc: stable@vger.kernel.org CC: linux-fsdevel@vger.kernel.org Suggested-by: Seth Forshee Signed-off-by: Christian Brauner --- fs/namespace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index f63337828e1c..c3f1a78ba369 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3855,8 +3855,12 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP)) return -EINVAL; + /* Don't yet support filesystem mountable in user namespaces. */ + if (m->mnt_sb->s_user_ns != &init_user_ns) + return -EINVAL; + /* We're not controlling the superblock. */ - if (!ns_capable(m->mnt_sb->s_user_ns, CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) return -EPERM; /* Mount has already been visible in the filesystem hierarchy. */ From 098116e7e640ba677d9e345cbee83d253c13d556 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 11 May 2021 10:35:21 +0200 Subject: [PATCH 134/730] net: really orphan skbs tied to closing sk If the owing socket is shutting down - e.g. the sock reference count already dropped to 0 and only sk_wmem_alloc is keeping the sock alive, skb_orphan_partial() becomes a no-op. When forwarding packets over veth with GRO enabled, the above causes refcount errors. This change addresses the issue with a plain skb_orphan() call in the critical scenario. Fixes: 9adc89af724f ("net: let skb_orphan_partial wake-up waiters.") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sock.h | 4 +++- net/core/sock.c | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 42bc5e1a627f..0e962d8bc73b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2231,13 +2231,15 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) sk_mem_charge(sk, skb->truesize); } -static inline void skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk) +static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk) { if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) { skb_orphan(skb); skb->destructor = sock_efree; skb->sk = sk; + return true; } + return false; } void sk_reset_timer(struct sock *sk, struct timer_list *timer, diff --git a/net/core/sock.c b/net/core/sock.c index c761c4a0b66b..958614ea16ed 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2132,10 +2132,10 @@ void skb_orphan_partial(struct sk_buff *skb) if (skb_is_tcp_pure_ack(skb)) return; - if (can_skb_orphan_partial(skb)) - skb_set_owner_sk_safe(skb, skb->sk); - else - skb_orphan(skb); + if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk)) + return; + + skb_orphan(skb); } EXPORT_SYMBOL(skb_orphan_partial); From aa473d6ceb821d7c568c64cca7fff3e86ba9d789 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 11 May 2021 19:10:50 -0400 Subject: [PATCH 135/730] bnxt_en: Fix and improve .ndo_features_check(). Jakub Kicinski pointed out that we need to handle ipv6 extension headers and to explicitly check for supported tunnel types in .ndo_features_check(). For ipv6 extension headers, the hardware supports up to 2 ext. headers and each must be <= 64 bytes. For tunneled packets, the supported packets are UDP with supported VXLAN and Geneve ports, GRE, and IPIP. v3: More improvements based on Alexander Duyck's valuable feedback - Remove the jump lable in bnxt_features_check() and restructure it so that the TCP/UDP is check is consolidated in bnxt_exthdr_check(). v2: Add missing step to check inner ipv6 header for UDP and GRE tunnels. Check TCP/UDP next header after skipping ipv6 ext headers for non-tunneled packets and for inner ipv6. (Both feedback from Alexander Duyck) Reviewed-by: Edwin Peer Reviewed-by: Pavan Chebbi Fixes: 1698d600b361 ("bnxt_en: Implement .ndo_features_check().") Signed-off-by: Michael Chan Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 126 ++++++++++++++++++---- 1 file changed, 107 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2985844634c8..46be4046ee51 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10785,37 +10785,125 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features) return rc; } +static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off, + u8 **nextp) +{ + struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off); + int hdr_count = 0; + u8 *nexthdr; + int start; + + /* Check that there are at most 2 IPv6 extension headers, no + * fragment header, and each is <= 64 bytes. + */ + start = nw_off + sizeof(*ip6h); + nexthdr = &ip6h->nexthdr; + while (ipv6_ext_hdr(*nexthdr)) { + struct ipv6_opt_hdr *hp; + int hdrlen; + + if (hdr_count >= 3 || *nexthdr == NEXTHDR_NONE || + *nexthdr == NEXTHDR_FRAGMENT) + return false; + hp = __skb_header_pointer(NULL, start, sizeof(*hp), skb->data, + skb_headlen(skb), NULL); + if (!hp) + return false; + if (*nexthdr == NEXTHDR_AUTH) + hdrlen = ipv6_authlen(hp); + else + hdrlen = ipv6_optlen(hp); + + if (hdrlen > 64) + return false; + nexthdr = &hp->nexthdr; + start += hdrlen; + hdr_count++; + } + if (nextp) { + /* Caller will check inner protocol */ + if (skb->encapsulation) { + *nextp = nexthdr; + return true; + } + *nextp = NULL; + } + /* Only support TCP/UDP for non-tunneled ipv6 and inner ipv6 */ + return *nexthdr == IPPROTO_TCP || *nexthdr == IPPROTO_UDP; +} + +/* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */ +static bool bnxt_udp_tunl_check(struct bnxt *bp, struct sk_buff *skb) +{ + struct udphdr *uh = udp_hdr(skb); + __be16 udp_port = uh->dest; + + if (udp_port != bp->vxlan_port && udp_port != bp->nge_port) + return false; + if (skb->inner_protocol_type == ENCAP_TYPE_ETHER) { + struct ethhdr *eh = inner_eth_hdr(skb); + + switch (eh->h_proto) { + case htons(ETH_P_IP): + return true; + case htons(ETH_P_IPV6): + return bnxt_exthdr_check(bp, skb, + skb_inner_network_offset(skb), + NULL); + } + } + return false; +} + +static bool bnxt_tunl_check(struct bnxt *bp, struct sk_buff *skb, u8 l4_proto) +{ + switch (l4_proto) { + case IPPROTO_UDP: + return bnxt_udp_tunl_check(bp, skb); + case IPPROTO_IPIP: + return true; + case IPPROTO_GRE: { + switch (skb->inner_protocol) { + default: + return false; + case htons(ETH_P_IP): + return true; + case htons(ETH_P_IPV6): + fallthrough; + } + } + case IPPROTO_IPV6: + /* Check ext headers of inner ipv6 */ + return bnxt_exthdr_check(bp, skb, skb_inner_network_offset(skb), + NULL); + } + return false; +} + static netdev_features_t bnxt_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { - struct bnxt *bp; - __be16 udp_port; - u8 l4_proto = 0; + struct bnxt *bp = netdev_priv(dev); + u8 *l4_proto; features = vlan_features_check(skb, features); - if (!skb->encapsulation) - return features; - switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): - l4_proto = ip_hdr(skb)->protocol; + if (!skb->encapsulation) + return features; + l4_proto = &ip_hdr(skb)->protocol; + if (bnxt_tunl_check(bp, skb, *l4_proto)) + return features; break; case htons(ETH_P_IPV6): - l4_proto = ipv6_hdr(skb)->nexthdr; + if (!bnxt_exthdr_check(bp, skb, skb_network_offset(skb), + &l4_proto)) + break; + if (!l4_proto || bnxt_tunl_check(bp, skb, *l4_proto)) + return features; break; - default: - return features; } - - if (l4_proto != IPPROTO_UDP) - return features; - - bp = netdev_priv(dev); - /* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */ - udp_port = udp_hdr(skb)->dest; - if (udp_port == bp->vxlan_port || udp_port == bp->nge_port) - return features; return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } From 171c3b151118a2fe0fc1e2a9d1b5a1570cfe82d2 Mon Sep 17 00:00:00 2001 From: Richard Sanger Date: Wed, 12 May 2021 13:31:22 +1200 Subject: [PATCH 136/730] net: packetmmap: fix only tx timestamp on request The packetmmap tx ring should only return timestamps if requested via setsockopt PACKET_TIMESTAMP, as documented. This allows compatibility with non-timestamp aware user-space code which checks tp_status == TP_STATUS_AVAILABLE; not expecting additional timestamp flags to be set in tp_status. Fixes: b9c32fb27170 ("packet: if hw/sw ts enabled in rx/tx ring, report which ts we got") Cc: Daniel Borkmann Cc: Willem de Bruijn Signed-off-by: Richard Sanger Signed-off-by: David S. Miller --- net/packet/af_packet.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ba96db1880ea..ae906eb4b269 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -422,7 +422,8 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts, ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts)) return TP_STATUS_TS_RAW_HARDWARE; - if (ktime_to_timespec64_cond(skb->tstamp, ts)) + if ((flags & SOF_TIMESTAMPING_SOFTWARE) && + ktime_to_timespec64_cond(skb->tstamp, ts)) return TP_STATUS_TS_SOFTWARE; return 0; @@ -2340,7 +2341,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, skb_copy_bits(skb, 0, h.raw + macoff, snaplen); - if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) + /* Always timestamp; prefer an existing software timestamp taken + * closer to the time of capture. + */ + ts_status = tpacket_get_timestamp(skb, &ts, + po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE); + if (!ts_status) ktime_get_real_ts64(&ts); status |= ts_status; From 619fee9eb13b5d29e4267cb394645608088c28a8 Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Wed, 12 May 2021 10:43:59 +0800 Subject: [PATCH 137/730] net: fec: fix the potential memory leak in fec_enet_init() If the memory allocated for cbd_base is failed, it should free the memory allocated for the queues, otherwise it causes memory leak. And if the memory allocated for the queues is failed, it can return error directly. Fixes: 59d0f7465644 ("net: fec: init multi queue date structure") Signed-off-by: Fugang Duan Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f2065f9d02e6..a2ada39c22d7 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3290,7 +3290,9 @@ static int fec_enet_init(struct net_device *ndev) return ret; } - fec_enet_alloc_queue(ndev); + ret = fec_enet_alloc_queue(ndev); + if (ret) + return ret; bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize; @@ -3298,7 +3300,8 @@ static int fec_enet_init(struct net_device *ndev) cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma, GFP_KERNEL); if (!cbd_base) { - return -ENOMEM; + ret = -ENOMEM; + goto free_queue_mem; } /* Get the Ethernet address */ @@ -3376,6 +3379,10 @@ static int fec_enet_init(struct net_device *ndev) fec_enet_update_ethtool_stats(ndev); return 0; + +free_queue_mem: + fec_enet_free_queue(ndev); + return ret; } #ifdef CONFIG_OF From 052fcc4531824c38f8e0ad88213c1be102a0b124 Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Wed, 12 May 2021 10:44:00 +0800 Subject: [PATCH 138/730] net: fec: add defer probe for of_get_mac_address If MAC address read from nvmem efuse by calling .of_get_mac_address(), but nvmem efuse is registered later than the driver, then it return -EPROBE_DEFER value. So modify the driver to support defer probe when read MAC address from nvmem efuse. Signed-off-by: Fugang Duan Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index a2ada39c22d7..ad82cffc6f3f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1662,7 +1662,7 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget) } /* ------------------------------------------------------------------------- */ -static void fec_get_mac(struct net_device *ndev) +static int fec_get_mac(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); unsigned char *iap, tmpaddr[ETH_ALEN]; @@ -1685,6 +1685,8 @@ static void fec_get_mac(struct net_device *ndev) ret = of_get_mac_address(np, tmpaddr); if (!ret) iap = tmpaddr; + else if (ret == -EPROBE_DEFER) + return ret; } } @@ -1723,7 +1725,7 @@ static void fec_get_mac(struct net_device *ndev) eth_hw_addr_random(ndev); dev_info(&fep->pdev->dev, "Using random MAC address: %pM\n", ndev->dev_addr); - return; + return 0; } memcpy(ndev->dev_addr, iap, ETH_ALEN); @@ -1731,6 +1733,8 @@ static void fec_get_mac(struct net_device *ndev) /* Adjust MAC if using macaddr */ if (iap == macaddr) ndev->dev_addr[ETH_ALEN-1] = macaddr[ETH_ALEN-1] + fep->dev_id; + + return 0; } /* ------------------------------------------------------------------------- */ @@ -3305,7 +3309,10 @@ static int fec_enet_init(struct net_device *ndev) } /* Get the Ethernet address */ - fec_get_mac(ndev); + ret = fec_get_mac(ndev); + if (ret) + goto free_queue_mem; + /* make sure MAC we just acquired is programmed into the hw */ fec_set_mac_address(ndev, NULL); From e5cc361e21648b75f935f9571d4003aaee480214 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 May 2021 13:11:43 +0300 Subject: [PATCH 139/730] octeontx2-pf: fix a buffer overflow in otx2_set_rxfh_context() This function is called from ethtool_set_rxfh() and "*rss_context" comes from the user. Add some bounds checking to prevent memory corruption. Fixes: 81a4362016e7 ("octeontx2-pf: Add RSS multi group support") Signed-off-by: Dan Carpenter Acked-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index f4962a97a075..9d9a2e438acf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -786,6 +786,10 @@ static int otx2_set_rxfh_context(struct net_device *dev, const u32 *indir, if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; + if (*rss_context != ETH_RXFH_CONTEXT_ALLOC && + *rss_context >= MAX_RSS_GROUPS) + return -EINVAL; + rss = &pfvf->hw.rss_info; if (!rss->enable) { From 9c1bb37f8cad5e2ee1933fa1da9a6baa7876a8e4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 12 May 2021 13:15:29 +0200 Subject: [PATCH 140/730] ptp: ocp: Fix a resource leak in an error handling path If an error occurs after a successful 'pci_ioremap_bar()' call, it must be undone by a corresponding 'pci_iounmap()' call, as already done in the remove function. Fixes: a7e1abad13f3 ("ptp: Add clock driver for the OpenCompute TimeCard.") Signed-off-by: Christophe JAILLET Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/ptp_ocp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 530e5f90095e..0d1034e3ed0f 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -324,7 +324,7 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!bp->base) { dev_err(&pdev->dev, "io_remap bar0\n"); err = -ENOMEM; - goto out; + goto out_release_regions; } bp->reg = bp->base + OCP_REGISTER_OFFSET; bp->tod = bp->base + TOD_REGISTER_OFFSET; @@ -347,6 +347,8 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; out: + pci_iounmap(pdev, bp->base); +out_release_regions: pci_release_regions(pdev); out_disable: pci_disable_device(pdev); From ca14f9597f4fdb3679453aec7bb2807f0b8b7363 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 12 May 2021 10:00:46 -0400 Subject: [PATCH 141/730] =?UTF-8?q?MAINTAINERS:=20nfc:=20drop=20Cl=C3=A9me?= =?UTF-8?q?nt=20Perrochaud=20from=20NXP-NCI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emails to Clément Perrochaud bounce with permanent error "user does not exist", so remove Clément Perrochaud from NXP-NCI driver maintainers entry. Signed-off-by: Krzysztof Kozlowski Acked-by: Mark Greer Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index bd7aff0c120f..ec723b48769e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13205,7 +13205,6 @@ F: Documentation/devicetree/bindings/sound/tfa9879.txt F: sound/soc/codecs/tfa9879* NXP-NCI NFC DRIVER -M: Clément Perrochaud R: Charles Gorand L: linux-nfc@lists.01.org (moderated for non-subscribers) S: Supported From 8aa5713d8b2ce1ea67bdf212eb61bfcff3c52202 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 12 May 2021 10:43:18 -0400 Subject: [PATCH 142/730] MAINTAINERS: nfc: add Krzysztof Kozlowski as maintainer The NFC subsystem is orphaned. I am happy to spend some cycles to review the patches, send pull requests and in general keep the NFC subsystem running. Signed-off-by: Krzysztof Kozlowski Acked-by: Mark Greer Signed-off-by: David S. Miller --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index ec723b48769e..7020293a1347 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12896,8 +12896,9 @@ F: include/uapi/linux/nexthop.h F: net/ipv4/nexthop.c NFC SUBSYSTEM +M: Krzysztof Kozlowski L: netdev@vger.kernel.org -S: Orphan +S: Maintained F: Documentation/devicetree/bindings/net/nfc/ F: drivers/nfc/ F: include/linux/platform_data/nfcmrvl.h From 4a64541f2cebef54ea8d9f53ac5067328b8e02d8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 12 May 2021 10:43:19 -0400 Subject: [PATCH 143/730] MAINTAINERS: nfc: include linux-nfc mailing list Keep all NFC related patches in existing linux-nfc@lists.01.org mailing list. Signed-off-by: Krzysztof Kozlowski Acked-by: Mark Greer Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7020293a1347..1d834bebf469 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12897,6 +12897,7 @@ F: net/ipv4/nexthop.c NFC SUBSYSTEM M: Krzysztof Kozlowski +L: linux-nfc@lists.01.org (moderated for non-subscribers) L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/nfc/ From 832ce924b1a14e139e184a6da9f5a69a5e47b256 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 May 2021 13:02:48 +0300 Subject: [PATCH 144/730] chelsio/chtls: unlock on error in chtls_pt_recvmsg() This error path needs to release some memory and call release_sock(sk); before returning. Fixes: 6919a8264a32 ("Crypto/chtls: add/delete TLS header in driver") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index 188d871f6b8c..c320cc8ca68d 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -1564,8 +1564,10 @@ found_ok_skb: cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, sizeof(thdr->type), &thdr->type); - if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) - return -EIO; + if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) { + copied = -EIO; + break; + } /* don't send tls header, skip copy */ goto skip_copy; } From 9e9da02a68d4b7feaa10022fd1135d9b3f2f72d7 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 11 May 2021 16:16:33 +0300 Subject: [PATCH 145/730] percpu_ref: Don't opencode percpu_ref_is_dying Signed-off-by: Nikolay Borisov Signed-off-by: Dennis Zhou --- lib/percpu-refcount.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index a1071cdefb5a..af9302141bcf 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -275,7 +275,7 @@ static void __percpu_ref_switch_mode(struct percpu_ref *ref, wait_event_lock_irq(percpu_ref_switch_waitq, !data->confirm_switch, percpu_ref_switch_lock); - if (data->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD)) + if (data->force_atomic || percpu_ref_is_dying(ref)) __percpu_ref_switch_to_atomic(ref, confirm_switch); else __percpu_ref_switch_to_percpu(ref); @@ -385,7 +385,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, spin_lock_irqsave(&percpu_ref_switch_lock, flags); - WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, + WARN_ONCE(percpu_ref_is_dying(ref), "%s called more than once on %ps!", __func__, ref->data->release); @@ -465,7 +465,7 @@ void percpu_ref_resurrect(struct percpu_ref *ref) spin_lock_irqsave(&percpu_ref_switch_lock, flags); - WARN_ON_ONCE(!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)); + WARN_ON_ONCE(!percpu_ref_is_dying(ref)); WARN_ON_ONCE(__ref_is_percpu(ref, &percpu_count)); ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; From c547addba7096debac4f99cdfe869a32a81081e2 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 11 May 2021 16:17:37 +0300 Subject: [PATCH 146/730] MAINTAINERS: Add lib/percpu* as part of percpu entry Without this patch get_maintainers.pl on a patch which modified lib/percpu_refcount.c produces: Jens Axboe (commit_signer:2/5=40%) Ming Lei (commit_signer:2/5=40%,authored:2/5=40%,added_lines:99/114=87%,removed_lines:34/43=79%) "Paul E. McKenney" (commit_signer:1/5=20%,authored:1/5=20%,added_lines:9/114=8%,removed_lines:3/43=7%) Tejun Heo (commit_signer:1/5=20%) Andrew Morton (commit_signer:1/5=20%) Nikolay Borisov (authored:1/5=20%,removed_lines:3/43=7%) Joe Perches (authored:1/5=20%,removed_lines:3/43=7%) linux-kernel@vger.kernel.org (open list) Whereas with the patch applied it now (properly) prints: Dennis Zhou (maintainer:PER-CPU MEMORY ALLOCATOR) Tejun Heo (maintainer:PER-CPU MEMORY ALLOCATOR) Christoph Lameter (maintainer:PER-CPU MEMORY ALLOCATOR) linux-kernel@vger.kernel.org (open list) Signed-off-by: Nikolay Borisov [Dennis: updated list to linux-mm@kvack.org] Signed-off-by: Dennis Zhou --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index bd7aff0c120f..9599e313d7f7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14317,10 +14317,12 @@ PER-CPU MEMORY ALLOCATOR M: Dennis Zhou M: Tejun Heo M: Christoph Lameter +L: linux-mm@kvack.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu.git F: arch/*/include/asm/percpu.h F: include/linux/percpu*.h +F: lib/percpu*.c F: mm/percpu*.c PER-TASK DELAY ACCOUNTING From da096fbccd52803db3edd9dd0c5ae4079d31c456 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 4 May 2021 13:59:09 +0100 Subject: [PATCH 147/730] soundwire: qcom: fix handling of qcom,ports-block-pack-mode Support to "qcom,ports-block-pack-mode" was added at later stages to support a variant of Qualcomm SoundWire controllers available on Apps processor. However the older versions of the SoundWire controller which are embedded in WCD Codecs do not need this property. So returning on error for those cases will break boards like DragonBoard DB845c and Lenovo Yoga C630. This patch fixes error handling on this property considering older usecases. Fixes: a5943e4fb14e ("soundwire: qcom: check of_property_read status") Reported-by: Amit Pundir Signed-off-by: Srinivas Kandagatla Tested-by: Amit Pundir Link: https://lore.kernel.org/r/20210504125909.16108-1-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/qcom.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index 2827085a323b..0ef79d60e88e 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -1150,8 +1150,16 @@ static int qcom_swrm_get_port_config(struct qcom_swrm_ctrl *ctrl) ret = of_property_read_u8_array(np, "qcom,ports-block-pack-mode", bp_mode, nports); - if (ret) - return ret; + if (ret) { + u32 version; + + ctrl->reg_read(ctrl, SWRM_COMP_HW_VERSION, &version); + + if (version <= 0x01030000) + memset(bp_mode, SWR_INVALID_PARAM, QCOM_SDW_MAX_PORTS); + else + return ret; + } memset(hstart, SWR_INVALID_PARAM, QCOM_SDW_MAX_PORTS); of_property_read_u8_array(np, "qcom,ports-hstart", hstart, nports); From 3ddb4ce1e6e3bd112778ab93bbd9092f23a878ec Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 26 Apr 2021 11:55:14 +0100 Subject: [PATCH 148/730] serial: tegra: Fix a mask operation that is always true Currently the expression lsr | UART_LSR_TEMT is always true and this seems suspect. I believe the intent was to mask lsr with UART_LSR_TEMT to check that bit, so the expression should be using the & operator instead. Fix this. Fixes: b9c2470fb150 ("serial: tegra: flush the RX fifo on frame error") Signed-off-by: Colin Ian King Cc: stable Link: https://lore.kernel.org/r/20210426105514.23268-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c index bbae072a125d..222032792d6c 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -338,7 +338,7 @@ static void tegra_uart_fifo_reset(struct tegra_uart_port *tup, u8 fcr_bits) do { lsr = tegra_uart_read(tup, UART_LSR); - if ((lsr | UART_LSR_TEMT) && !(lsr & UART_LSR_DR)) + if ((lsr & UART_LSR_TEMT) && !(lsr & UART_LSR_DR)) break; udelay(1); } while (--tmout); From 5e722b217ad3cf41f5504db80a68062df82b5242 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 7 May 2021 13:57:19 +0200 Subject: [PATCH 149/730] serial: core: fix suspicious security_locked_down() call The commit that added this check did so in a very strange way - first security_locked_down() is called, its value stored into retval, and if it's nonzero, then an additional check is made for (change_irq || change_port), and if this is true, the function returns. However, if the goto exit branch is not taken, the code keeps the retval value and continues executing the function. Then, depending on whether uport->ops->verify_port is set, the retval value may or may not be reset to zero and eventually the error value from security_locked_down() may abort the function a few lines below. I will go out on a limb and assume that this isn't the intended behavior and that an error value from security_locked_down() was supposed to abort the function only in case (change_irq || change_port) is true. Note that security_locked_down() should be called last in any series of checks, since the SELinux implementation of this hook will do a check against the policy and generate an audit record in case of denial. If the operation was to carry on after calling security_locked_down(), then the SELinux denial record would be bogus. See commit 59438b46471a ("security,lockdown,selinux: implement SELinux lockdown") for how SELinux implements this hook. Fixes: 794edf30ee6c ("lockdown: Lock down TIOCSSERIAL") Acked-by: Kees Cook Signed-off-by: Ondrej Mosnacek Cc: stable Link: https://lore.kernel.org/r/20210507115719.140799-1-omosnace@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 87f7127b57e6..18ff85a83f80 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -863,9 +863,11 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, goto check_and_exit; } - retval = security_locked_down(LOCKDOWN_TIOCSSERIAL); - if (retval && (change_irq || change_port)) - goto exit; + if (change_irq || change_port) { + retval = security_locked_down(LOCKDOWN_TIOCSSERIAL); + if (retval) + goto exit; + } /* * Ask the low level driver to verify the settings. From 2ea2e019c190ee3973ef7bcaf829d8762e56e635 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 10 May 2021 14:07:55 +0200 Subject: [PATCH 150/730] serial: sh-sci: Fix off-by-one error in FIFO threshold register setting The Receive FIFO Data Count Trigger field (RTRG[6:0]) in the Receive FIFO Data Count Trigger Register (HSRTRGR) of HSCIF can only hold values ranging from 0-127. As the FIFO size is equal to 128 on HSCIF, the user can write an out-of-range value, touching reserved bits. Fix this by limiting the trigger value to the FIFO size minus one. Reverse the order of the checks, to avoid rx_trig becoming zero if the FIFO size is one. Note that this change has no impact on other SCIF variants, as their maximum supported trigger value is lower than the FIFO size anyway, and the code below takes care of enforcing these limits. Fixes: a380ed461f66d1b8 ("serial: sh-sci: implement FIFO threshold register setting") Reported-by: Linh Phung Reviewed-by: Wolfram Sang Reviewed-by: Ulrich Hecht Signed-off-by: Geert Uytterhoeven Cc: stable Link: https://lore.kernel.org/r/5eff320aef92ffb33d00e57979fd3603bbb4a70f.1620648218.git.geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index ef37fdf37612..4baf1316ea72 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1023,10 +1023,10 @@ static int scif_set_rtrg(struct uart_port *port, int rx_trig) { unsigned int bits; + if (rx_trig >= port->fifosize) + rx_trig = port->fifosize - 1; if (rx_trig < 1) rx_trig = 1; - if (rx_trig >= port->fifosize) - rx_trig = port->fifosize; /* HSCIF can be set to an arbitrary level. */ if (sci_getreg(port, HSRTRGR)->size) { From 3c35d2a960c0077a4cb09bf4989f45d289332ea0 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Wed, 12 May 2021 23:04:13 +0200 Subject: [PATCH 151/730] serial: 8250_dw: Add device HID for new AMD UART controller Add device HID AMDI0022 to the AMD UART controller driver match table and create a platform device for it. This controller can be found on Microsoft Surface Laptop 4 devices and seems similar enough that we can just copy the existing AMDI0020 entries. Cc: # 5.10+ Tested-by: Sachi King Acked-by: Andy Shevchenko # for 8250_dw part Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20210512210413.1982933-1-luzmaximilian@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_apd.c | 1 + drivers/tty/serial/8250/8250_dw.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c index 0ec5b3f69112..6e02448d15d9 100644 --- a/drivers/acpi/acpi_apd.c +++ b/drivers/acpi/acpi_apd.c @@ -226,6 +226,7 @@ static const struct acpi_device_id acpi_apd_device_ids[] = { { "AMDI0010", APD_ADDR(wt_i2c_desc) }, { "AMD0020", APD_ADDR(cz_uart_desc) }, { "AMDI0020", APD_ADDR(cz_uart_desc) }, + { "AMDI0022", APD_ADDR(cz_uart_desc) }, { "AMD0030", }, { "AMD0040", APD_ADDR(fch_misc_desc)}, { "HYGO0010", APD_ADDR(wt_i2c_desc) }, diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 9e204f9b799a..a3a0154da567 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -714,6 +714,7 @@ static const struct acpi_device_id dw8250_acpi_match[] = { { "APMC0D08", 0}, { "AMD0020", 0 }, { "AMDI0020", 0 }, + { "AMDI0022", 0 }, { "BRCM2032", 0 }, { "HISI0031", 0 }, { }, From a73b6a3b4109ce2ed01dbc51a6c1551a6431b53c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 28 Apr 2021 15:25:34 -0700 Subject: [PATCH 152/730] ics932s401: fix broken handling of errors when word reading fails In commit b05ae01fdb89, someone tried to make the driver handle i2c read errors by simply zeroing out the register contents, but for some reason left unaltered the code that sets the cached register value the function call return value. The original patch was authored by a member of the Underhanded Mangle-happy Nerds, I'm not terribly surprised. I don't have the hardware anymore so I can't test this, but it seems like a pretty obvious API usage fix to me... Fixes: b05ae01fdb89 ("misc/ics932s401: Add a missing check to i2c_smbus_read_word_data") Signed-off-by: Darrick J. Wong Link: https://lore.kernel.org/r/20210428222534.GJ3122264@magnolia Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ics932s401.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c index 2bdf560ee681..0f9ea75b0b18 100644 --- a/drivers/misc/ics932s401.c +++ b/drivers/misc/ics932s401.c @@ -134,7 +134,7 @@ static struct ics932s401_data *ics932s401_update_device(struct device *dev) for (i = 0; i < NUM_MIRRORED_REGS; i++) { temp = i2c_smbus_read_word_data(client, regs_to_copy[i]); if (temp < 0) - data->regs[regs_to_copy[i]] = 0; + temp = 0; data->regs[regs_to_copy[i]] = temp >> 8; } From 6a3239a738d86c5e9b5aad17fefe2c2bfd6ced83 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 28 Apr 2021 09:49:31 +0200 Subject: [PATCH 153/730] Revert "crypto: cavium/nitrox - add an error message to explain the failure of pci_request_mem_regions" This reverts commit 9fcddaf2e28d779cb946d23838ba6d50f299aa80 as it was submitted under a fake name and we can not knowingly accept anonymous contributions to the repository. This commit was part of a submission "test" to the Linux kernel community by some "researchers" at umn.edu. As outlined at: https://www-users.cs.umn.edu/%7Ekjlu/papers/full-disclosure.pdf it was done so as an attempt to submit a known-buggy patch to see if it could get by our review. However, the submission turned out to actually be correct, and not have a bug in it as the author did not understand how the PCI driver model works at all, and so the submission was accepted. As this change is of useless consequence, there is no loss of functionality in reverting it. Cc: "David S. Miller" Cc: Christophe JAILLET Cc: linux-crypto@vger.kernel.org Acked-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman Email: Herbert Xu Link: https://lore.kernel.org/r/YIkTi9a3nnL50wMq@kroah.com Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/cavium/nitrox/nitrox_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c index facc8e6bc580..d385daf2c71c 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_main.c +++ b/drivers/crypto/cavium/nitrox/nitrox_main.c @@ -442,7 +442,6 @@ static int nitrox_probe(struct pci_dev *pdev, err = pci_request_mem_regions(pdev, nitrox_driver_name); if (err) { pci_disable_device(pdev); - dev_err(&pdev->dev, "Failed to request mem regions!\n"); return err; } pci_set_master(pdev); From 3e465fc3846734e9489273d889f19cc17b4cf4bd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:30 +0200 Subject: [PATCH 154/730] Revert "media: rcar_drif: fix a memory disclosure" This reverts commit d39083234c60519724c6ed59509a2129fd2aed41. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, it was determined that this commit is not needed at all as the media core already prevents memory disclosure on this codepath, so just drop the extra memset happening here. Cc: Kangjie Lu Cc: Geert Uytterhoeven Cc: Hans Verkuil Cc: Mauro Carvalho Chehab Fixes: d39083234c60 ("media: rcar_drif: fix a memory disclosure") Cc: stable Reviewed-by: Mauro Carvalho Chehab Reviewed-by: Fabrizio Castro Link: https://lore.kernel.org/r/20210503115736.2104747-4-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/rcar_drif.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/platform/rcar_drif.c b/drivers/media/platform/rcar_drif.c index 83bd9a412a56..1e3b68a8743a 100644 --- a/drivers/media/platform/rcar_drif.c +++ b/drivers/media/platform/rcar_drif.c @@ -915,7 +915,6 @@ static int rcar_drif_g_fmt_sdr_cap(struct file *file, void *priv, { struct rcar_drif_sdr *sdr = video_drvdata(file); - memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved)); f->fmt.sdr.pixelformat = sdr->fmt->pixelformat; f->fmt.sdr.buffersize = sdr->fmt->buffersize; From 99ae3417672a6d4a3bf68d4fc43d7c6ca074d477 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:31 +0200 Subject: [PATCH 155/730] Revert "hwmon: (lm80) fix a missing check of bus read in lm80 probe" This reverts commit 9aa3aa15f4c2f74f47afd6c5db4b420fadf3f315. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, it was determined that this commit is not needed at all so just revert it. Also, the call to lm80_init_client() was not properly handled, so if error handling is needed in the lm80_probe() function, then it should be done properly, not half-baked like the commit being reverted here did. Cc: Kangjie Lu Fixes: 9aa3aa15f4c2 ("hwmon: (lm80) fix a missing check of bus read in lm80 probe") Cc: stable Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20210503115736.2104747-5-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/lm80.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c index ac4adb44b224..97ab491d2922 100644 --- a/drivers/hwmon/lm80.c +++ b/drivers/hwmon/lm80.c @@ -596,7 +596,6 @@ static int lm80_probe(struct i2c_client *client) struct device *dev = &client->dev; struct device *hwmon_dev; struct lm80_data *data; - int rv; data = devm_kzalloc(dev, sizeof(struct lm80_data), GFP_KERNEL); if (!data) @@ -609,14 +608,8 @@ static int lm80_probe(struct i2c_client *client) lm80_init_client(client); /* A few vars need to be filled upon startup */ - rv = lm80_read_value(client, LM80_REG_FAN_MIN(1)); - if (rv < 0) - return rv; - data->fan[f_min][0] = rv; - rv = lm80_read_value(client, LM80_REG_FAN_MIN(2)); - if (rv < 0) - return rv; - data->fan[f_min][1] = rv; + data->fan[f_min][0] = lm80_read_value(client, LM80_REG_FAN_MIN(1)); + data->fan[f_min][1] = lm80_read_value(client, LM80_REG_FAN_MIN(2)); hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, data, lm80_groups); From 754f39158441f4c0d7a8255209dd9a939f08ce80 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:32 +0200 Subject: [PATCH 156/730] Revert "serial: mvebu-uart: Fix to avoid a potential NULL pointer dereference" This reverts commit 32f47179833b63de72427131169809065db6745e. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be not be needed at all as the change was useless because this function can only be called when of_match_device matched on something. So it should be reverted. Cc: Aditya Pakki Cc: stable Fixes: 32f47179833b ("serial: mvebu-uart: Fix to avoid a potential NULL pointer dereference") Acked-by: Jiri Slaby Link: https://lore.kernel.org/r/20210503115736.2104747-6-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mvebu-uart.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index e0c00a1b0763..51b0ecabf2ec 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -818,9 +818,6 @@ static int mvebu_uart_probe(struct platform_device *pdev) return -EINVAL; } - if (!match) - return -ENODEV; - /* Assume that all UART ports have a DT alias or none has */ id = of_alias_get_id(pdev->dev.of_node, "serial"); if (!pdev->dev.of_node || id < 0) From fd013265e5b5576a74a033920d6c571e08d7c423 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:33 +0200 Subject: [PATCH 157/730] Revert "media: usb: gspca: add a missed check for goto_low_power" This reverts commit 5b711870bec4dc9a6d705d41e127e73944fa3650. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to do does nothing useful as a user can do nothing with this information and if an error did happen, the code would continue on as before. Because of this, just revert it. Cc: Kangjie Lu Cc: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20210503115736.2104747-7-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/gspca/cpia1.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/media/usb/gspca/cpia1.c b/drivers/media/usb/gspca/cpia1.c index a4f7431486f3..d93d384286c1 100644 --- a/drivers/media/usb/gspca/cpia1.c +++ b/drivers/media/usb/gspca/cpia1.c @@ -1424,7 +1424,6 @@ static int sd_config(struct gspca_dev *gspca_dev, { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; - int ret; sd->mainsFreq = FREQ_DEF == V4L2_CID_POWER_LINE_FREQUENCY_60HZ; reset_camera_params(gspca_dev); @@ -1436,10 +1435,7 @@ static int sd_config(struct gspca_dev *gspca_dev, cam->cam_mode = mode; cam->nmodes = ARRAY_SIZE(mode); - ret = goto_low_power(gspca_dev); - if (ret) - gspca_err(gspca_dev, "Cannot go to low power mode: %d\n", - ret); + goto_low_power(gspca_dev); /* Check the firmware version. */ sd->params.version.firmwareVersion = 0; get_version_information(gspca_dev); From 4b059ce1f4b368208c2310925f49be77f15e527b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:34 +0200 Subject: [PATCH 158/730] Revert "ALSA: sb: fix a missing check of snd_ctl_add" This reverts commit beae77170c60aa786f3e4599c18ead2854d8694d. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It is safe to ignore this error as the mixer element is optional, and the driver is very legacy. Cc: Aditya Pakki Reviewed-by: Takashi Iwai Link: https://lore.kernel.org/r/20210503115736.2104747-8-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- sound/isa/sb/sb16_main.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/sound/isa/sb/sb16_main.c b/sound/isa/sb/sb16_main.c index 38dc1fde25f3..aa4870531023 100644 --- a/sound/isa/sb/sb16_main.c +++ b/sound/isa/sb/sb16_main.c @@ -846,14 +846,10 @@ int snd_sb16dsp_pcm(struct snd_sb *chip, int device) snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &snd_sb16_playback_ops); snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &snd_sb16_capture_ops); - if (chip->dma16 >= 0 && chip->dma8 != chip->dma16) { - err = snd_ctl_add(card, snd_ctl_new1( - &snd_sb16_dma_control, chip)); - if (err) - return err; - } else { + if (chip->dma16 >= 0 && chip->dma8 != chip->dma16) + snd_ctl_add(card, snd_ctl_new1(&snd_sb16_dma_control, chip)); + else pcm->info_flags = SNDRV_PCM_INFO_HALF_DUPLEX; - } snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_DEV, card->dev, 64*1024, 128*1024); From 8d1beda5f11953ffe135a5213287f0b25b4da41b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:35 +0200 Subject: [PATCH 159/730] Revert "leds: lp5523: fix a missing check of return value of lp55xx_read" This reverts commit 248b57015f35c94d4eae2fdd8c6febf5cd703900. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original commit does not properly unwind if there is an error condition so it needs to be reverted at this point in time. Cc: Kangjie Lu Cc: Jacek Anaszewski Cc: stable Fixes: 248b57015f35 ("leds: lp5523: fix a missing check of return value of lp55xx_read") Link: https://lore.kernel.org/r/20210503115736.2104747-9-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/leds/leds-lp5523.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c index fc433e63b1dc..5036d7d5f3d4 100644 --- a/drivers/leds/leds-lp5523.c +++ b/drivers/leds/leds-lp5523.c @@ -305,9 +305,7 @@ static int lp5523_init_program_engine(struct lp55xx_chip *chip) /* Let the programs run for couple of ms and check the engine status */ usleep_range(3000, 6000); - ret = lp55xx_read(chip, LP5523_REG_STATUS, &status); - if (ret) - return ret; + lp55xx_read(chip, LP5523_REG_STATUS, &status); status &= LP5523_ENG_STATUS_MASK; if (status != LP5523_ENG_STATUS_MASK) { From 6647f7a06eb030a2384ec71f0bb2e78854afabfe Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Mon, 3 May 2021 13:56:36 +0200 Subject: [PATCH 160/730] leds: lp5523: check return value of lp5xx_read and jump to cleanup code Check return value of lp5xx_read and if non-zero, jump to code at end of the function, causing lp5523_stop_all_engines to be executed before returning the error value up the call chain. This fixes the original commit (248b57015f35) which was reverted due to the University of Minnesota problems. Cc: stable Acked-by: Jacek Anaszewski Signed-off-by: Phillip Potter Link: https://lore.kernel.org/r/20210503115736.2104747-10-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/leds/leds-lp5523.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c index 5036d7d5f3d4..b1590cb4a188 100644 --- a/drivers/leds/leds-lp5523.c +++ b/drivers/leds/leds-lp5523.c @@ -305,7 +305,9 @@ static int lp5523_init_program_engine(struct lp55xx_chip *chip) /* Let the programs run for couple of ms and check the engine status */ usleep_range(3000, 6000); - lp55xx_read(chip, LP5523_REG_STATUS, &status); + ret = lp55xx_read(chip, LP5523_REG_STATUS, &status); + if (ret) + goto out; status &= LP5523_ENG_STATUS_MASK; if (status != LP5523_ENG_STATUS_MASK) { From b0a85abbe92e1a6f3e8580a4590fa7245de7090b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:37 +0200 Subject: [PATCH 161/730] Revert "serial: max310x: pass return value of spi_register_driver" This reverts commit 51f689cc11333944c7a457f25ec75fcb41e99410. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. This change did not properly unwind from the error condition, so it was not correct. Cc: Kangjie Lu Acked-by: Jiri Slaby Link: https://lore.kernel.org/r/20210503115736.2104747-11-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 8534d6e45a1d..a3ba0e6520a1 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1518,10 +1518,10 @@ static int __init max310x_uart_init(void) return ret; #ifdef CONFIG_SPI_MASTER - ret = spi_register_driver(&max310x_spi_driver); + spi_register_driver(&max310x_spi_driver); #endif - return ret; + return 0; } module_init(max310x_uart_init); From 3890e3dea315f1a257d1b940a2a4e2fa16a7b095 Mon Sep 17 00:00:00 2001 From: Atul Gopinathan Date: Mon, 3 May 2021 13:56:38 +0200 Subject: [PATCH 162/730] serial: max310x: unregister uart driver in case of failure and abort The macro "spi_register_driver" invokes the function "__spi_register_driver()" which has a return type of int and can fail, returning a negative value in such a case. This is currently ignored and the init() function yields success even if the spi driver failed to register. Fix this by collecting the return value of "__spi_register_driver()" and also unregister the uart driver in case of failure. Cc: Jiri Slaby Signed-off-by: Atul Gopinathan Link: https://lore.kernel.org/r/20210503115736.2104747-12-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index a3ba0e6520a1..3cbc757d7be7 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1518,10 +1518,12 @@ static int __init max310x_uart_init(void) return ret; #ifdef CONFIG_SPI_MASTER - spi_register_driver(&max310x_spi_driver); + ret = spi_register_driver(&max310x_spi_driver); + if (ret) + uart_unregister_driver(&max310x_uart); #endif - return 0; + return ret; } module_init(max310x_uart_init); From 68c5634c4a7278672a3bed00eb5646884257c413 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:39 +0200 Subject: [PATCH 163/730] Revert "rtlwifi: fix a potential NULL pointer dereference" This reverts commit 765976285a8c8db3f0eb7f033829a899d0c2786e. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. This commit is not correct, it should not have used unlikely() and is not propagating the error properly to the calling function, so it should be reverted at this point in time. Also, if the check failed, the work queue was still assumed to be allocated, so further accesses would have continued to fail, meaning this patch does nothing to solve the root issues at all. Cc: Kangjie Lu Cc: Kalle Valo Cc: Bryan Brattlof Fixes: 765976285a8c ("rtlwifi: fix a potential NULL pointer dereference") Cc: stable Link: https://lore.kernel.org/r/20210503115736.2104747-13-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/base.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 2a7ee90a3f54..4136d7c63254 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -452,11 +452,6 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw) /* <2> work queue */ rtlpriv->works.hw = hw; rtlpriv->works.rtl_wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name); - if (unlikely(!rtlpriv->works.rtl_wq)) { - pr_err("Failed to allocate work queue\n"); - return; - } - INIT_DELAYED_WORK(&rtlpriv->works.watchdog_wq, rtl_watchdog_wq_callback); INIT_DELAYED_WORK(&rtlpriv->works.ips_nic_off_wq, From 30b0e0ee9d02b97b68705c46b41444786effc40c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:40 +0200 Subject: [PATCH 164/730] net: rtlwifi: properly check for alloc_workqueue() failure If alloc_workqueue() fails, properly catch this and propagate the error to the calling functions, so that the devuce initialization will properly error out. Cc: Kalle Valo Cc: Bryan Brattlof Cc: stable Link: https://lore.kernel.org/r/20210503115736.2104747-14-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/base.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 4136d7c63254..ffd150ec181f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -440,9 +440,14 @@ static void rtl_watchdog_wq_callback(struct work_struct *work); static void rtl_fwevt_wq_callback(struct work_struct *work); static void rtl_c2hcmd_wq_callback(struct work_struct *work); -static void _rtl_init_deferred_work(struct ieee80211_hw *hw) +static int _rtl_init_deferred_work(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); + struct workqueue_struct *wq; + + wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name); + if (!wq) + return -ENOMEM; /* <1> timer */ timer_setup(&rtlpriv->works.watchdog_timer, @@ -451,7 +456,8 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw) rtl_easy_concurrent_retrytimer_callback, 0); /* <2> work queue */ rtlpriv->works.hw = hw; - rtlpriv->works.rtl_wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name); + rtlpriv->works.rtl_wq = wq; + INIT_DELAYED_WORK(&rtlpriv->works.watchdog_wq, rtl_watchdog_wq_callback); INIT_DELAYED_WORK(&rtlpriv->works.ips_nic_off_wq, @@ -461,6 +467,7 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw) rtl_swlps_rfon_wq_callback); INIT_DELAYED_WORK(&rtlpriv->works.fwevt_wq, rtl_fwevt_wq_callback); INIT_DELAYED_WORK(&rtlpriv->works.c2hcmd_wq, rtl_c2hcmd_wq_callback); + return 0; } void rtl_deinit_deferred_work(struct ieee80211_hw *hw, bool ips_wq) @@ -559,9 +566,7 @@ int rtl_init_core(struct ieee80211_hw *hw) rtlmac->link_state = MAC80211_NOLINK; /* <6> init deferred work */ - _rtl_init_deferred_work(hw); - - return 0; + return _rtl_init_deferred_work(hw); } EXPORT_SYMBOL_GPL(rtl_init_core); From 5f94eaa4ee23e80841fa359a372f84cfe25daee1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:41 +0200 Subject: [PATCH 165/730] Revert "net: fujitsu: fix a potential NULL pointer dereference" This reverts commit 9f4d6358e11bbc7b839f9419636188e4151fb6e4. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original change does not change any behavior as the caller of this function onlyu checks for "== -1" as an error condition so this error is not handled properly. Remove this change and it will be fixed up properly in a later commit. Cc: Kangjie Lu Cc: David S. Miller Reviewed-by: Dominik Brodowski Link: https://lore.kernel.org/r/20210503115736.2104747-15-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index a7b7a4aace79..dc90c61fc827 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -547,11 +547,6 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id) return -1; base = ioremap(link->resource[2]->start, resource_size(link->resource[2])); - if (!base) { - pcmcia_release_window(link, link->resource[2]); - return -ENOMEM; - } - pcmcia_map_mem_page(link, link->resource[2], 0); /* From 52202be1cd996cde6e8969a128dc27ee45a7cb5e Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Mon, 3 May 2021 13:56:42 +0200 Subject: [PATCH 166/730] net: fujitsu: fix potential null-ptr-deref In fmvj18x_get_hwinfo(), if ioremap fails there will be NULL pointer deref. To fix this, check the return value of ioremap and return -1 to the caller in case of failure. Cc: "David S. Miller" Acked-by: Dominik Brodowski Signed-off-by: Anirudh Rayabharam Link: https://lore.kernel.org/r/20210503115736.2104747-16-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index dc90c61fc827..b0c0504950d8 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -547,6 +547,11 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id) return -1; base = ioremap(link->resource[2]->start, resource_size(link->resource[2])); + if (!base) { + pcmcia_release_window(link, link->resource[2]); + return -1; + } + pcmcia_map_mem_page(link, link->resource[2], 0); /* From 5369ead83f5aff223b6418c99cb1fe9a8f007363 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:43 +0200 Subject: [PATCH 167/730] Revert "net/smc: fix a NULL pointer dereference" This reverts commit e183d4e414b64711baf7a04e214b61969ca08dfa. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original commit causes a memory leak and does not properly fix the issue it claims to fix. I will send a follow-on patch to resolve this properly. Cc: Kangjie Lu Cc: Ursula Braun Cc: David S. Miller Link: https://lore.kernel.org/r/20210503115736.2104747-17-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- net/smc/smc_ism.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 9c6e95882553..6558cf7643a7 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -417,11 +417,6 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, init_waitqueue_head(&smcd->lgrs_deleted); smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", WQ_MEM_RECLAIM, name); - if (!smcd->event_wq) { - kfree(smcd->conn); - kfree(smcd); - return NULL; - } return smcd; } EXPORT_SYMBOL_GPL(smcd_alloc_dev); From bbeb18f27a44ce6adb00d2316968bc59dc640b9b Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Mon, 3 May 2021 13:56:44 +0200 Subject: [PATCH 168/730] net/smc: properly handle workqueue allocation failure In smcd_alloc_dev(), if alloc_ordered_workqueue() fails, properly catch it, clean up and return NULL to let the caller know there was a failure. Move the call to alloc_ordered_workqueue higher in the function in order to abort earlier without needing to unwind the call to device_initialize(). Cc: Ursula Braun Cc: David S. Miller Signed-off-by: Anirudh Rayabharam Link: https://lore.kernel.org/r/20210503115736.2104747-18-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- net/smc/smc_ism.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 6558cf7643a7..94b31f2551bc 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -402,6 +402,14 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, return NULL; } + smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", + WQ_MEM_RECLAIM, name); + if (!smcd->event_wq) { + kfree(smcd->conn); + kfree(smcd); + return NULL; + } + smcd->dev.parent = parent; smcd->dev.release = smcd_release; device_initialize(&smcd->dev); @@ -415,8 +423,6 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, INIT_LIST_HEAD(&smcd->vlan); INIT_LIST_HEAD(&smcd->lgr_list); init_waitqueue_head(&smcd->lgrs_deleted); - smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", - WQ_MEM_RECLAIM, name); return smcd; } EXPORT_SYMBOL_GPL(smcd_alloc_dev); From 4df07045fcfd684379a394d0f2aa0cc4067bda2a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:45 +0200 Subject: [PATCH 169/730] Revert "net: caif: replace BUG_ON with recovery code" This reverts commit c5dea815834c7d2e9fc633785455bc428b7a1956. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original change here was pointless as dev can never be NULL in this function so the claim in the changelog that this "fixes" anything is incorrect (also the developer forgot about panic_on_warn). A follow-up change will resolve this issue properly. Cc: Aditya Pakki Cc: David S. Miller Link: https://lore.kernel.org/r/20210503115736.2104747-19-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/caif/caif_serial.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index da6fffb4d5a8..a7f51eb58915 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -269,9 +269,7 @@ static netdev_tx_t caif_xmit(struct sk_buff *skb, struct net_device *dev) { struct ser_device *ser; - if (WARN_ON(!dev)) - return -EINVAL; - + BUG_ON(dev == NULL); ser = netdev_priv(dev); /* Send flow off once, on high water mark */ From 65a67792e3416f7c5d7daa47d99334cbb19a7449 Mon Sep 17 00:00:00 2001 From: Du Cheng Date: Mon, 3 May 2021 13:56:46 +0200 Subject: [PATCH 170/730] net: caif: remove BUG_ON(dev == NULL) in caif_xmit The condition of dev == NULL is impossible in caif_xmit(), hence it is for the removal. Explanation: The static caif_xmit() is only called upon via a function pointer `ndo_start_xmit` defined in include/linux/netdevice.h: ``` struct net_device_ops { ... netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev); ... } ``` The exhausive list of call points are: ``` drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c dev->netdev_ops->ndo_start_xmit(skb, dev); ^ ^ drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); ^ ^ return adapter->rn_ops->ndo_start_xmit(skb, netdev); // adapter would crash first ^ ^ drivers/usb/gadget/function/f_ncm.c ncm->netdev->netdev_ops->ndo_start_xmit(NULL, ncm->netdev); ^ ^ include/linux/netdevice.h static inline netdev_tx_t __netdev_start_xmit(... { return ops->ndo_start_xmit(skb, dev); ^ } const struct net_device_ops *ops = dev->netdev_ops; ^ rc = __netdev_start_xmit(ops, skb, dev, more); ^ ``` In each of the enumerated scenarios, it is impossible for the NULL-valued dev to reach the caif_xmit() without crashing the kernel earlier, therefore `BUG_ON(dev == NULL)` is rather useless, hence the removal. Cc: David S. Miller Signed-off-by: Du Cheng Link: https://lore.kernel.org/r/20210503115736.2104747-20-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/caif/caif_serial.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index a7f51eb58915..d17482395a4d 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -269,7 +269,6 @@ static netdev_tx_t caif_xmit(struct sk_buff *skb, struct net_device *dev) { struct ser_device *ser; - BUG_ON(dev == NULL); ser = netdev_priv(dev); /* Send flow off once, on high water mark */ From bee1b0511844c8c79fccf1f2b13472393b6b91f7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:47 +0200 Subject: [PATCH 171/730] Revert "net: stmicro: fix a missing check of clk_prepare" This reverts commit f86a3b83833e7cfe558ca4d70b64ebc48903efec. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original commit causes a memory leak when it is trying to claim it is properly handling errors. Revert this change and fix it up properly in a follow-on commit. Cc: Kangjie Lu Cc: David S. Miller Fixes: f86a3b83833e ("net: stmicro: fix a missing check of clk_prepare") Cc: stable Link: https://lore.kernel.org/r/20210503115736.2104747-21-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index 527077c98ebc..fc68e90acbea 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -50,9 +50,7 @@ static int sun7i_gmac_init(struct platform_device *pdev, void *priv) gmac->clk_enabled = 1; } else { clk_set_rate(gmac->tx_clk, SUN7I_GMAC_MII_RATE); - ret = clk_prepare(gmac->tx_clk); - if (ret) - return ret; + clk_prepare(gmac->tx_clk); } return 0; From 4573472315f0fa461330545ff2aa2f6da0b1ae76 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Thu, 13 May 2021 15:07:41 +0300 Subject: [PATCH 172/730] iio: adc: ad7124: Fix missbalanced regulator enable / disable on error. If the devm_regulator_get() call succeeded but not the regulator_enable() then regulator_disable() would be called on a regulator that was not enabled. Fix this by moving regulator enabling / disabling over to devm_ management via devm_add_action_or_reset. Alexandru's sign-off here because he pulled Jonathan's patch into a larger set which Jonathan then applied. Fixes: b3af341bbd96 ("iio: adc: Add ad7124 support") Reviewed-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron Signed-off-by: Alexandru Ardelean Cc: --- drivers/iio/adc/ad7124.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index 9d3952b4674f..437116a07cf1 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -850,6 +850,11 @@ static int ad7124_setup(struct ad7124_state *st) return ret; } +static void ad7124_reg_disable(void *r) +{ + regulator_disable(r); +} + static int ad7124_probe(struct spi_device *spi) { const struct ad7124_chip_info *info; @@ -895,17 +900,20 @@ static int ad7124_probe(struct spi_device *spi) ret = regulator_enable(st->vref[i]); if (ret) return ret; + + ret = devm_add_action_or_reset(&spi->dev, ad7124_reg_disable, + st->vref[i]); + if (ret) + return ret; } st->mclk = devm_clk_get(&spi->dev, "mclk"); - if (IS_ERR(st->mclk)) { - ret = PTR_ERR(st->mclk); - goto error_regulator_disable; - } + if (IS_ERR(st->mclk)) + return PTR_ERR(st->mclk); ret = clk_prepare_enable(st->mclk); if (ret < 0) - goto error_regulator_disable; + return ret; ret = ad7124_soft_reset(st); if (ret < 0) @@ -935,11 +943,6 @@ error_remove_trigger: ad_sd_cleanup_buffer_and_trigger(indio_dev); error_clk_disable_unprepare: clk_disable_unprepare(st->mclk); -error_regulator_disable: - for (i = ARRAY_SIZE(st->vref) - 1; i >= 0; i--) { - if (!IS_ERR_OR_NULL(st->vref[i])) - regulator_disable(st->vref[i]); - } return ret; } @@ -948,17 +951,11 @@ static int ad7124_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); struct ad7124_state *st = iio_priv(indio_dev); - int i; iio_device_unregister(indio_dev); ad_sd_cleanup_buffer_and_trigger(indio_dev); clk_disable_unprepare(st->mclk); - for (i = ARRAY_SIZE(st->vref) - 1; i >= 0; i--) { - if (!IS_ERR_OR_NULL(st->vref[i])) - regulator_disable(st->vref[i]); - } - return 0; } From f2a772c51206b0c3f262e4f6a3812c89a650191b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Thu, 13 May 2021 15:07:42 +0300 Subject: [PATCH 173/730] iio: adc: ad7124: Fix potential overflow due to non sequential channel numbers Channel numbering must start at 0 and then not have any holes, or it is possible to overflow the available storage. Note this bug was introduced as part of a fix to ensure we didn't rely on the ordering of child nodes. So we need to support arbitrary ordering but they all need to be there somewhere. Note I hit this when using qemu to test the rest of this series. Arguably this isn't the best fix, but it is probably the most minimal option for backporting etc. Alexandru's sign-off is here because he carried this patch in a larger set that Jonathan then applied. Fixes: d7857e4ee1ba6 ("iio: adc: ad7124: Fix DT channel configuration") Reviewed-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron Signed-off-by: Alexandru Ardelean Cc: --- drivers/iio/adc/ad7124.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index 437116a07cf1..a27db78ea13e 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -771,6 +771,13 @@ static int ad7124_of_parse_channel_config(struct iio_dev *indio_dev, if (ret) goto err; + if (channel >= indio_dev->num_channels) { + dev_err(indio_dev->dev.parent, + "Channel index >= number of channels\n"); + ret = -EINVAL; + goto err; + } + ret = of_property_read_u32_array(child, "diff-channels", ain, 2); if (ret) From 0c32a96d000f260b5ebfabb4145a86ae1cd71847 Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Mon, 3 May 2021 13:56:48 +0200 Subject: [PATCH 174/730] net: stmicro: handle clk_prepare() failure during init In case clk_prepare() fails, capture and propagate the error code up the stack. If regulator_enable() was called earlier, properly unwind it by calling regulator_disable(). Signed-off-by: Anirudh Rayabharam Cc: David S. Miller Cc: stable Link: https://lore.kernel.org/r/20210503115736.2104747-22-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index fc68e90acbea..fc3b0acc8f99 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -30,7 +30,7 @@ struct sunxi_priv_data { static int sun7i_gmac_init(struct platform_device *pdev, void *priv) { struct sunxi_priv_data *gmac = priv; - int ret; + int ret = 0; if (gmac->regulator) { ret = regulator_enable(gmac->regulator); @@ -50,10 +50,12 @@ static int sun7i_gmac_init(struct platform_device *pdev, void *priv) gmac->clk_enabled = 1; } else { clk_set_rate(gmac->tx_clk, SUN7I_GMAC_MII_RATE); - clk_prepare(gmac->tx_clk); + ret = clk_prepare(gmac->tx_clk); + if (ret && gmac->regulator) + regulator_disable(gmac->regulator); } - return 0; + return ret; } static void sun7i_gmac_exit(struct platform_device *pdev, void *priv) From 7930742d6a0ff091c85b92ef4e076432d8d8cb79 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:49 +0200 Subject: [PATCH 175/730] Revert "niu: fix missing checks of niu_pci_eeprom_read" This reverts commit 26fd962bde0b15e54234fe762d86bc0349df1de4. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The change here was incorrect. While it is nice to check if niu_pci_eeprom_read() succeeded or not when using the data, any error that might have happened was not propagated upwards properly, causing the kernel to assume that these reads were successful, which results in invalid data in the buffer that was to contain the successfully read data. Cc: Kangjie Lu Cc: Shannon Nelson Cc: David S. Miller Fixes: 26fd962bde0b ("niu: fix missing checks of niu_pci_eeprom_read") Cc: stable Link: https://lore.kernel.org/r/20210503115736.2104747-23-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sun/niu.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 707ccdd03b19..d70cdea756d1 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -8097,8 +8097,6 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end) start += 3; prop_len = niu_pci_eeprom_read(np, start + 4); - if (prop_len < 0) - return prop_len; err = niu_pci_vpd_get_propname(np, start + 5, namebuf, 64); if (err < 0) return err; @@ -8143,12 +8141,8 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end) netif_printk(np, probe, KERN_DEBUG, np->dev, "VPD_SCAN: Reading in property [%s] len[%d]\n", namebuf, prop_len); - for (i = 0; i < prop_len; i++) { - err = niu_pci_eeprom_read(np, off + i); - if (err >= 0) - *prop_buf = err; - ++prop_buf; - } + for (i = 0; i < prop_len; i++) + *prop_buf++ = niu_pci_eeprom_read(np, off + i); } start += len; From e6e337708c22f80824b82d4af645f20715730ad0 Mon Sep 17 00:00:00 2001 From: Du Cheng Date: Mon, 3 May 2021 13:56:50 +0200 Subject: [PATCH 176/730] ethernet: sun: niu: fix missing checks of niu_pci_eeprom_read() niu_pci_eeprom_read() may fail, so add checks to its return value and propagate the error up the callstack. An examination of the callstack up to niu_pci_eeprom_read shows that: niu_pci_eeprom_read() // returns int niu_pci_vpd_scan_props() // returns int niu_pci_vpd_fetch() // returns *void* niu_get_invariants() // returns int since niu_pci_vpd_fetch() returns void which breaks the bubbling up, change its return type to int so that error is propagated upwards. Signed-off-by: Du Cheng Cc: Shannon Nelson Cc: David S. Miller Cc: stable Link: https://lore.kernel.org/r/20210503115736.2104747-24-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sun/niu.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index d70cdea756d1..74e748662ec0 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -8097,6 +8097,8 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end) start += 3; prop_len = niu_pci_eeprom_read(np, start + 4); + if (prop_len < 0) + return prop_len; err = niu_pci_vpd_get_propname(np, start + 5, namebuf, 64); if (err < 0) return err; @@ -8141,8 +8143,12 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end) netif_printk(np, probe, KERN_DEBUG, np->dev, "VPD_SCAN: Reading in property [%s] len[%d]\n", namebuf, prop_len); - for (i = 0; i < prop_len; i++) - *prop_buf++ = niu_pci_eeprom_read(np, off + i); + for (i = 0; i < prop_len; i++) { + err = niu_pci_eeprom_read(np, off + i); + if (err < 0) + return err; + *prop_buf++ = err; + } } start += len; @@ -8152,14 +8158,14 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end) } /* ESPC_PIO_EN_ENABLE must be set */ -static void niu_pci_vpd_fetch(struct niu *np, u32 start) +static int niu_pci_vpd_fetch(struct niu *np, u32 start) { u32 offset; int err; err = niu_pci_eeprom_read16_swp(np, start + 1); if (err < 0) - return; + return err; offset = err + 3; @@ -8168,12 +8174,14 @@ static void niu_pci_vpd_fetch(struct niu *np, u32 start) u32 end; err = niu_pci_eeprom_read(np, here); + if (err < 0) + return err; if (err != 0x90) - return; + return -EINVAL; err = niu_pci_eeprom_read16_swp(np, here + 1); if (err < 0) - return; + return err; here = start + offset + 3; end = start + offset + err; @@ -8181,9 +8189,12 @@ static void niu_pci_vpd_fetch(struct niu *np, u32 start) offset += err; err = niu_pci_vpd_scan_props(np, here, end); - if (err < 0 || err == 1) - return; + if (err < 0) + return err; + if (err == 1) + return -EINVAL; } + return 0; } /* ESPC_PIO_EN_ENABLE must be set */ @@ -9274,8 +9285,11 @@ static int niu_get_invariants(struct niu *np) offset = niu_pci_vpd_offset(np); netif_printk(np, probe, KERN_DEBUG, np->dev, "%s() VPD offset [%08x]\n", __func__, offset); - if (offset) - niu_pci_vpd_fetch(np, offset); + if (offset) { + err = niu_pci_vpd_fetch(np, offset); + if (err < 0) + return err; + } nw64(ESPC_PIO_EN, 0); if (np->flags & NIU_FLAGS_VPD_VALID) { From b95b57dfe7a142bf2446548eb7f49340fd73e78b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:51 +0200 Subject: [PATCH 177/730] Revert "qlcnic: Avoid potential NULL pointer dereference" This reverts commit 5bf7295fe34a5251b1d241b9736af4697b590670. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. This commit does not properly detect if an error happens because the logic after this loop will not detect that there was a failed allocation. Cc: Aditya Pakki Cc: David S. Miller Fixes: 5bf7295fe34a ("qlcnic: Avoid potential NULL pointer dereference") Cc: stable Link: https://lore.kernel.org/r/20210503115736.2104747-25-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index d8a3ecaed3fc..985cf8cb2ec0 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1047,8 +1047,6 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode) for (i = 0; i < QLCNIC_NUM_ILB_PKT; i++) { skb = netdev_alloc_skb(adapter->netdev, QLCNIC_ILB_PKT_SIZE); - if (!skb) - break; qlcnic_create_loopback_buff(skb->data, adapter->mac_addr); skb_put(skb, QLCNIC_ILB_PKT_SIZE); adapter->ahw->diag_cnt = 0; From 84460f01cba382553199bc1361f69a872d5abed4 Mon Sep 17 00:00:00 2001 From: Tom Seewald Date: Mon, 3 May 2021 13:56:52 +0200 Subject: [PATCH 178/730] qlcnic: Add null check after calling netdev_alloc_skb The function qlcnic_dl_lb_test() currently calls netdev_alloc_skb() without checking afterwards that the allocation succeeded. Fix this by checking if the skb is NULL and returning an error in such a case. Breaking out of the loop if the skb is NULL is not correct as no error would be reported to the caller and no message would be printed for the user. Cc: David S. Miller Cc: stable Signed-off-by: Tom Seewald Link: https://lore.kernel.org/r/20210503115736.2104747-26-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 985cf8cb2ec0..d8f0863b3934 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1047,6 +1047,8 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode) for (i = 0; i < QLCNIC_NUM_ILB_PKT; i++) { skb = netdev_alloc_skb(adapter->netdev, QLCNIC_ILB_PKT_SIZE); + if (!skb) + goto error; qlcnic_create_loopback_buff(skb->data, adapter->mac_addr); skb_put(skb, QLCNIC_ILB_PKT_SIZE); adapter->ahw->diag_cnt = 0; @@ -1070,6 +1072,7 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode) cnt++; } if (cnt != i) { +error: dev_err(&adapter->pdev->dev, "LB Test: failed, TX[%d], RX[%d]\n", i, cnt); if (mode != QLCNIC_ILB_MODE) From 257343d3ed557f11d580d0b7c515dc154f64a42b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:53 +0200 Subject: [PATCH 179/730] Revert "gdrom: fix a memory leak bug" This reverts commit 093c48213ee37c3c3ff1cf5ac1aa2a9d8bc66017. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. Because of this, all submissions from this group must be reverted from the kernel tree and will need to be re-reviewed again to determine if they actually are a valid fix. Until that work is complete, remove this change to ensure that no problems are being introduced into the codebase. Cc: Wenwen Wang Cc: Peter Rosin Cc: Jens Axboe Fixes: 093c48213ee3 ("gdrom: fix a memory leak bug") Cc: stable Link: https://lore.kernel.org/r/20210503115736.2104747-27-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/cdrom/gdrom.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 742b4a0932e3..7f681320c7d3 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -862,7 +862,6 @@ static void __exit exit_gdrom(void) platform_device_unregister(pd); platform_driver_unregister(&gdrom_driver); kfree(gd.toc); - kfree(gd.cd_info); } module_init(init_gdrom); From d03d1021da6fe7f46efe9f2a7335564e7c9db5ab Mon Sep 17 00:00:00 2001 From: Atul Gopinathan Date: Mon, 3 May 2021 13:56:54 +0200 Subject: [PATCH 180/730] cdrom: gdrom: deallocate struct gdrom_unit fields in remove_gdrom The fields, "toc" and "cd_info", of "struct gdrom_unit gd" are allocated in "probe_gdrom()". Prevent a memory leak by making sure "gd.cd_info" is deallocated in the "remove_gdrom()" function. Also prevent double free of the field "gd.toc" by moving it from the module's exit function to "remove_gdrom()". This is because, in "probe_gdrom()", the function makes sure to deallocate "gd.toc" in case of any errors, so the exit function invoked later would again free "gd.toc". The patch also maintains consistency by deallocating the above mentioned fields in "remove_gdrom()" along with another memory allocated field "gd.disk". Suggested-by: Jens Axboe Cc: Peter Rosin Cc: stable Signed-off-by: Atul Gopinathan Link: https://lore.kernel.org/r/20210503115736.2104747-28-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/cdrom/gdrom.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 7f681320c7d3..6c4f6139f853 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -830,6 +830,8 @@ static int remove_gdrom(struct platform_device *devptr) if (gdrom_major) unregister_blkdev(gdrom_major, GDROM_DEV_NAME); unregister_cdrom(gd.cd_info); + kfree(gd.cd_info); + kfree(gd.toc); return 0; } @@ -861,7 +863,6 @@ static void __exit exit_gdrom(void) { platform_device_unregister(pd); platform_driver_unregister(&gdrom_driver); - kfree(gd.toc); } module_init(init_gdrom); From 566f53238da74801b48e985788e5f7c9159e5940 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:55 +0200 Subject: [PATCH 181/730] Revert "char: hpet: fix a missing check of ioremap" This reverts commit 13bd14a41ce3105d5b1f3cd8b4d1e249d17b6d9b. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. While this is technically correct, it is only fixing ONE of these errors in this function, so the patch is not fully correct. I'll leave this revert and provide a fix for this later that resolves this same "problem" everywhere in this function. Cc: Kangjie Lu Link: https://lore.kernel.org/r/20210503115736.2104747-29-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/char/hpet.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index ed3b7dab678d..6f13def6c172 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -969,8 +969,6 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data) if (ACPI_SUCCESS(status)) { hdp->hd_phys_address = addr.address.minimum; hdp->hd_address = ioremap(addr.address.minimum, addr.address.address_length); - if (!hdp->hd_address) - return AE_ERROR; if (hpet_is_known(hdp)) { iounmap(hdp->hd_address); From b11701c933112d49b808dee01cb7ff854ba6a77a Mon Sep 17 00:00:00 2001 From: Tom Seewald Date: Mon, 3 May 2021 13:56:56 +0200 Subject: [PATCH 182/730] char: hpet: add checks after calling ioremap The function hpet_resources() calls ioremap() two times, but in both cases it does not check if ioremap() returned a null pointer. Fix this by adding null pointer checks and returning an appropriate error. Signed-off-by: Tom Seewald Link: https://lore.kernel.org/r/20210503115736.2104747-30-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/char/hpet.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 6f13def6c172..8b55085650ad 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -969,6 +969,8 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data) if (ACPI_SUCCESS(status)) { hdp->hd_phys_address = addr.address.minimum; hdp->hd_address = ioremap(addr.address.minimum, addr.address.address_length); + if (!hdp->hd_address) + return AE_ERROR; if (hpet_is_known(hdp)) { iounmap(hdp->hd_address); @@ -982,6 +984,8 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data) hdp->hd_phys_address = fixmem32->address; hdp->hd_address = ioremap(fixmem32->address, HPET_RANGE_SIZE); + if (!hdp->hd_address) + return AE_ERROR; if (hpet_is_known(hdp)) { iounmap(hdp->hd_address); From 4d427b408c4c2ff1676966c72119a3a559f8e39b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:57 +0200 Subject: [PATCH 183/730] Revert "scsi: ufs: fix a missing check of devm_reset_control_get" This reverts commit 63a06181d7ce169d09843645c50fea1901bc9f0a. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original commit is incorrect, it does not properly clean up on the error path, so I'll keep the revert and fix it up properly with a follow-on patch. Cc: Kangjie Lu Cc: Avri Altman Cc: Martin K. Petersen Fixes: 63a06181d7ce ("scsi: ufs: fix a missing check of devm_reset_control_get") Cc: stable Link: https://lore.kernel.org/r/20210503115736.2104747-31-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufs-hisi.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/ufs/ufs-hisi.c b/drivers/scsi/ufs/ufs-hisi.c index 0aa58131e791..7d1e07a9d9dd 100644 --- a/drivers/scsi/ufs/ufs-hisi.c +++ b/drivers/scsi/ufs/ufs-hisi.c @@ -468,10 +468,6 @@ static int ufs_hisi_init_common(struct ufs_hba *hba) ufshcd_set_variant(hba, host); host->rst = devm_reset_control_get(dev, "rst"); - if (IS_ERR(host->rst)) { - dev_err(dev, "%s: failed to get reset control\n", __func__); - return PTR_ERR(host->rst); - } ufs_hisi_set_pm_lvl(hba); From 2f4a784f40f8d337d6590e2e93f46429052e15ac Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Mon, 3 May 2021 13:56:58 +0200 Subject: [PATCH 184/730] scsi: ufs: handle cleanup correctly on devm_reset_control_get error Move ufshcd_set_variant call in ufs_hisi_init_common to common error section at end of the function, and then jump to this from the error checking statements for both devm_reset_control_get and ufs_hisi_get_resource. This fixes the original commit (63a06181d7ce) which was reverted due to the University of Minnesota problems. Suggested-by: Greg Kroah-Hartman Cc: Avri Altman Cc: Martin K. Petersen Cc: stable Signed-off-by: Phillip Potter Link: https://lore.kernel.org/r/20210503115736.2104747-32-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufs-hisi.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/ufs/ufs-hisi.c b/drivers/scsi/ufs/ufs-hisi.c index 7d1e07a9d9dd..d0626773eb38 100644 --- a/drivers/scsi/ufs/ufs-hisi.c +++ b/drivers/scsi/ufs/ufs-hisi.c @@ -467,17 +467,24 @@ static int ufs_hisi_init_common(struct ufs_hba *hba) host->hba = hba; ufshcd_set_variant(hba, host); - host->rst = devm_reset_control_get(dev, "rst"); + host->rst = devm_reset_control_get(dev, "rst"); + if (IS_ERR(host->rst)) { + dev_err(dev, "%s: failed to get reset control\n", __func__); + err = PTR_ERR(host->rst); + goto error; + } ufs_hisi_set_pm_lvl(hba); err = ufs_hisi_get_resource(host); - if (err) { - ufshcd_set_variant(hba, NULL); - return err; - } + if (err) + goto error; return 0; + +error: + ufshcd_set_variant(hba, NULL); + return err; } static int ufs_hi3660_init(struct ufs_hba *hba) From 1dacca7fa1ebea47d38d20cd2df37094805d2649 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:56:59 +0200 Subject: [PATCH 185/730] Revert "ALSA: gus: add a check of the status of snd_ctl_add" This reverts commit 0f25e000cb4398081748e54f62a902098aa79ec1. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original commit did nothing if there was an error, except to print out a message, which is pointless. So remove the commit as it gives a "false sense of doing something". Cc: Kangjie Lu Reviewed-by: Takashi Iwai Link: https://lore.kernel.org/r/20210503115736.2104747-33-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- sound/isa/gus/gus_main.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/sound/isa/gus/gus_main.c b/sound/isa/gus/gus_main.c index afc088f0377c..b7518122a10d 100644 --- a/sound/isa/gus/gus_main.c +++ b/sound/isa/gus/gus_main.c @@ -77,17 +77,8 @@ static const struct snd_kcontrol_new snd_gus_joystick_control = { static void snd_gus_init_control(struct snd_gus_card *gus) { - int ret; - - if (!gus->ace_flag) { - ret = - snd_ctl_add(gus->card, - snd_ctl_new1(&snd_gus_joystick_control, - gus)); - if (ret) - snd_printk(KERN_ERR "gus: snd_ctl_add failed: %d\n", - ret); - } + if (!gus->ace_flag) + snd_ctl_add(gus->card, snd_ctl_new1(&snd_gus_joystick_control, gus)); } /* From 94f88309f201821073f57ae6005caefa61bf7b7e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:01 +0200 Subject: [PATCH 186/730] Revert "ALSA: sb8: add a check for request_region" This reverts commit dcd0feac9bab901d5739de51b3f69840851f8919. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original commit message for this change was incorrect as the code path can never result in a NULL dereference, alluding to the fact that whatever tool was used to "find this" is broken. It's just an optional resource reservation, so removing this check is fine. Cc: Kangjie Lu Acked-by: Takashi Iwai Fixes: dcd0feac9bab ("ALSA: sb8: add a check for request_region") Cc: stable Link: https://lore.kernel.org/r/20210503115736.2104747-35-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- sound/isa/sb/sb8.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c index 6c9d534ce8b6..95290ffe5c6e 100644 --- a/sound/isa/sb/sb8.c +++ b/sound/isa/sb/sb8.c @@ -95,10 +95,6 @@ static int snd_sb8_probe(struct device *pdev, unsigned int dev) /* block the 0x388 port to avoid PnP conflicts */ acard->fm_res = request_region(0x388, 4, "SoundBlaster FM"); - if (!acard->fm_res) { - err = -EBUSY; - goto _err; - } if (port[dev] != SNDRV_AUTO_PORT) { if ((err = snd_sbdsp_create(card, port[dev], irq[dev], From a28591f61b60fac820c6de59826ffa710e5e314e Mon Sep 17 00:00:00 2001 From: Atul Gopinathan Date: Mon, 3 May 2021 13:57:02 +0200 Subject: [PATCH 187/730] ALSA: sb8: Add a comment note regarding an unused pointer The field "fm_res" of "struct snd_sb8" is never used/dereferenced throughout the sb8.c code. Therefore there is no need for any null value check after the "request_region()". Add a comment note to make developers know about this and prevent any "NULL check" patches on this part of code. Cc: Takashi Iwai Signed-off-by: Atul Gopinathan Link: https://lore.kernel.org/r/20210503115736.2104747-36-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- sound/isa/sb/sb8.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c index 95290ffe5c6e..ed3a87ebe3f4 100644 --- a/sound/isa/sb/sb8.c +++ b/sound/isa/sb/sb8.c @@ -93,7 +93,11 @@ static int snd_sb8_probe(struct device *pdev, unsigned int dev) acard = card->private_data; card->private_free = snd_sb8_free; - /* block the 0x388 port to avoid PnP conflicts */ + /* + * Block the 0x388 port to avoid PnP conflicts. + * No need to check this value after request_region, + * as we never do anything with it. + */ acard->fm_res = request_region(0x388, 4, "SoundBlaster FM"); if (port[dev] != SNDRV_AUTO_PORT) { From 4667a6fc1777ce071504bab570d3599107f4790f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:03 +0200 Subject: [PATCH 188/730] Revert "ALSA: usx2y: Fix potential NULL pointer dereference" This reverts commit a2c6433ee5a35a8de6d563f6512a26f87835ea0f. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original patch was incorrect, and would leak memory if the error path the patch added was hit. Cc: Aditya Pakki Reviewed-by: Takashi Iwai Link: https://lore.kernel.org/r/20210503115736.2104747-37-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- sound/usb/usx2y/usb_stream.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sound/usb/usx2y/usb_stream.c b/sound/usb/usx2y/usb_stream.c index 091c071b270a..6bba17bf689a 100644 --- a/sound/usb/usx2y/usb_stream.c +++ b/sound/usb/usx2y/usb_stream.c @@ -91,12 +91,7 @@ static int init_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, for (u = 0; u < USB_STREAM_NURBS; ++u) { sk->inurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL); - if (!sk->inurb[u]) - return -ENOMEM; - sk->outurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL); - if (!sk->outurb[u]) - return -ENOMEM; } if (init_pipe_urbs(sk, use_packsize, sk->inurb, indata, dev, in_pipe) || From 58c0cc2d90f1e37c4eb63ae7f164c83830833f78 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:05 +0200 Subject: [PATCH 189/730] Revert "video: hgafb: fix potential NULL pointer dereference" This reverts commit ec7f6aad57ad29e4e66cc2e18e1e1599ddb02542. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. This patch "looks" correct, but the driver keeps on running and will fail horribly right afterward if this error condition ever trips. So points for trying to resolve an issue, but a huge NEGATIVE value for providing a "fake" fix for the problem as nothing actually got resolved at all. I'll go fix this up properly... Cc: Kangjie Lu Cc: Aditya Pakki Cc: Ferenc Bakonyi Cc: Bartlomiej Zolnierkiewicz Fixes: ec7f6aad57ad ("video: hgafb: fix potential NULL pointer dereference") Cc: stable Link: https://lore.kernel.org/r/20210503115736.2104747-39-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/hgafb.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/video/fbdev/hgafb.c b/drivers/video/fbdev/hgafb.c index 8bbac7182ad3..fca29f219f8b 100644 --- a/drivers/video/fbdev/hgafb.c +++ b/drivers/video/fbdev/hgafb.c @@ -285,8 +285,6 @@ static int hga_card_detect(void) hga_vram_len = 0x08000; hga_vram = ioremap(0xb0000, hga_vram_len); - if (!hga_vram) - goto error; if (request_region(0x3b0, 12, "hgafb")) release_io_ports = 1; From dc13cac4862cc68ec74348a80b6942532b7735fa Mon Sep 17 00:00:00 2001 From: Igor Matheus Andrade Torrente Date: Mon, 3 May 2021 13:57:06 +0200 Subject: [PATCH 190/730] video: hgafb: fix potential NULL pointer dereference The return of ioremap if not checked, and can lead to a NULL to be assigned to hga_vram. Potentially leading to a NULL pointer dereference. The fix adds code to deal with this case in the error label and changes how the hgafb_probe handles the return of hga_card_detect. Cc: Ferenc Bakonyi Cc: Bartlomiej Zolnierkiewicz Cc: stable Signed-off-by: Igor Matheus Andrade Torrente Link: https://lore.kernel.org/r/20210503115736.2104747-40-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/hgafb.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/video/fbdev/hgafb.c b/drivers/video/fbdev/hgafb.c index fca29f219f8b..cc8e62ae93f6 100644 --- a/drivers/video/fbdev/hgafb.c +++ b/drivers/video/fbdev/hgafb.c @@ -285,6 +285,8 @@ static int hga_card_detect(void) hga_vram_len = 0x08000; hga_vram = ioremap(0xb0000, hga_vram_len); + if (!hga_vram) + return -ENOMEM; if (request_region(0x3b0, 12, "hgafb")) release_io_ports = 1; @@ -344,13 +346,18 @@ static int hga_card_detect(void) hga_type_name = "Hercules"; break; } - return 1; + return 0; error: if (release_io_ports) release_region(0x3b0, 12); if (release_io_port) release_region(0x3bf, 1); - return 0; + + iounmap(hga_vram); + + pr_err("hgafb: HGA card not detected.\n"); + + return -EINVAL; } /** @@ -548,13 +555,11 @@ static const struct fb_ops hgafb_ops = { static int hgafb_probe(struct platform_device *pdev) { struct fb_info *info; + int ret; - if (! hga_card_detect()) { - printk(KERN_INFO "hgafb: HGA card not detected.\n"); - if (hga_vram) - iounmap(hga_vram); - return -EINVAL; - } + ret = hga_card_detect(); + if (!ret) + return ret; printk(KERN_INFO "hgafb: %s with %ldK of memory detected.\n", hga_type_name, hga_vram_len/1024); From abd7bca23bd4247124265152d00ffd4b2b0d6877 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:07 +0200 Subject: [PATCH 191/730] Revert "isdn: mISDNinfineon: fix potential NULL pointer dereference" This reverts commit d721fe99f6ada070ae8fc0ec3e01ce5a42def0d9. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original commit was incorrect, it should have never have used "unlikely()" and if it ever does trigger, resources are left grabbed. Given there are no users for this code around, I'll just revert this and leave it "as is" as the odds that ioremap() will ever fail here is horrendiously low. Cc: Kangjie Lu Cc: David S. Miller Link: https://lore.kernel.org/r/20210503115736.2104747-41-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/hardware/mISDN/mISDNinfineon.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c index a16c7a2a7f3d..fa9c491f9c38 100644 --- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c +++ b/drivers/isdn/hardware/mISDN/mISDNinfineon.c @@ -697,11 +697,8 @@ setup_io(struct inf_hw *hw) (ulong)hw->addr.start, (ulong)hw->addr.size); return err; } - if (hw->ci->addr_mode == AM_MEMIO) { + if (hw->ci->addr_mode == AM_MEMIO) hw->addr.p = ioremap(hw->addr.start, hw->addr.size); - if (unlikely(!hw->addr.p)) - return -ENOMEM; - } hw->addr.mode = hw->ci->addr_mode; if (debug & DEBUG_HW) pr_notice("%s: IO addr %lx (%lu bytes) mode%d\n", From e32fe6d90f44922ccbb94016cfc3c238359e3e39 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Thu, 13 May 2021 15:07:43 +0300 Subject: [PATCH 192/730] iio: adc: ad7192: Avoid disabling a clock that was never enabled. Found by inspection. If the internal clock source is being used, the driver doesn't call clk_prepare_enable() and as such we should not call clk_disable_unprepare() Use the same condition to protect the disable path as is used on the enable one. Note this will all get simplified when the driver moves over to a full devm_ flow, but that would make backporting the fix harder. Fix obviously predates move out of staging, but backporting will become more complex (and is unlikely to happen), hence that patch is given in the fixes tag. Alexandru's sign off is here because he added this patch into a larger series that Jonathan then applied. Fixes: b581f748cce0 ("staging: iio: adc: ad7192: move out of staging") Cc: Alexandru Tachici Reviewed-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron Signed-off-by: Alexandru Ardelean Cc: --- drivers/iio/adc/ad7192.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index 2ed580521d81..d3be67aa0522 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -1014,7 +1014,9 @@ static int ad7192_probe(struct spi_device *spi) return 0; error_disable_clk: - clk_disable_unprepare(st->mclk); + if (st->clock_sel == AD7192_CLK_EXT_MCLK1_2 || + st->clock_sel == AD7192_CLK_EXT_MCLK2) + clk_disable_unprepare(st->mclk); error_remove_trigger: ad_sd_cleanup_buffer_and_trigger(indio_dev); error_disable_dvdd: @@ -1031,7 +1033,9 @@ static int ad7192_remove(struct spi_device *spi) struct ad7192_state *st = iio_priv(indio_dev); iio_device_unregister(indio_dev); - clk_disable_unprepare(st->mclk); + if (st->clock_sel == AD7192_CLK_EXT_MCLK1_2 || + st->clock_sel == AD7192_CLK_EXT_MCLK2) + clk_disable_unprepare(st->mclk); ad_sd_cleanup_buffer_and_trigger(indio_dev); regulator_disable(st->dvdd); From b0f27fca5a6c7652e265aae6a4452ce2f2ed64da Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 13 May 2021 15:07:44 +0300 Subject: [PATCH 193/730] iio: adc: ad7192: handle regulator voltage error first This change fixes a corner-case, where for a zero regulator value, the driver would exit early, initializing the driver only partially. The driver would be in an unknown state. This change reworks the code to check regulator_voltage() return value for negative (error) first, and return early. This is the more common idiom. Also, this change is removing the 'voltage_uv' variable and using the 'ret' value directly. The only place where 'voltage_uv' is being used is to compute the internal reference voltage, and the type of this variable is 'int' (same are for 'ret'). Using only 'ret' avoids having to assign it on the error path. Fixes: ab0afa65bbc7 ("staging: iio: adc: ad7192: fail probe on get_voltage") Cc: Alexandru Tachici Signed-off-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron Cc: --- drivers/iio/adc/ad7192.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index d3be67aa0522..1141cc13a124 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -912,7 +912,7 @@ static int ad7192_probe(struct spi_device *spi) { struct ad7192_state *st; struct iio_dev *indio_dev; - int ret, voltage_uv = 0; + int ret; if (!spi->irq) { dev_err(&spi->dev, "no IRQ?\n"); @@ -949,15 +949,12 @@ static int ad7192_probe(struct spi_device *spi) goto error_disable_avdd; } - voltage_uv = regulator_get_voltage(st->avdd); - - if (voltage_uv > 0) { - st->int_vref_mv = voltage_uv / 1000; - } else { - ret = voltage_uv; + ret = regulator_get_voltage(st->avdd); + if (ret < 0) { dev_err(&spi->dev, "Device tree error, reference voltage undefined\n"); goto error_disable_avdd; } + st->int_vref_mv = ret / 1000; spi_set_drvdata(spi, indio_dev); st->chip_info = of_device_get_match_data(&spi->dev); From 04f5b9f539ce314f758d919a14dc7a669f3b7838 Mon Sep 17 00:00:00 2001 From: Lucas Stankus Date: Tue, 11 May 2021 17:54:18 -0300 Subject: [PATCH 194/730] staging: iio: cdc: ad7746: avoid overwrite of num_channels AD7745 devices don't have the CIN2 pins and therefore can't handle related channels. Forcing the number of AD7746 channels may lead to enabling more channels than what the hardware actually supports. Avoid num_channels being overwritten after first assignment. Signed-off-by: Lucas Stankus Fixes: 83e416f458d53 ("staging: iio: adc: Replace, rewrite ad7745 from scratch.") Signed-off-by: Jonathan Cameron Cc: --- drivers/staging/iio/cdc/ad7746.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/iio/cdc/ad7746.c b/drivers/staging/iio/cdc/ad7746.c index dfd71e99e872..eab534dc4bcc 100644 --- a/drivers/staging/iio/cdc/ad7746.c +++ b/drivers/staging/iio/cdc/ad7746.c @@ -700,7 +700,6 @@ static int ad7746_probe(struct i2c_client *client, indio_dev->num_channels = ARRAY_SIZE(ad7746_channels); else indio_dev->num_channels = ARRAY_SIZE(ad7746_channels) - 2; - indio_dev->num_channels = ARRAY_SIZE(ad7746_channels); indio_dev->modes = INDIO_DIRECT_MODE; if (pdata) { From c446f0d4702d316e1c6bf621f70e79678d28830a Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Mon, 3 May 2021 13:57:08 +0200 Subject: [PATCH 195/730] isdn: mISDNinfineon: check/cleanup ioremap failure correctly in setup_io Move hw->cfg.mode and hw->addr.mode assignments from hw->ci->cfg_mode and hw->ci->addr_mode respectively, to be before the subsequent checks for memory IO mode (and possible ioremap calls in this case). Also introduce ioremap error checks at both locations. This allows resources to be properly freed on ioremap failure, as when the caller of setup_io then subsequently calls release_io via its error path, release_io can now correctly determine the mode as it has been set before the ioremap call. Finally, refactor release_io function so that it will call release_mem_region in the memory IO case, regardless of whether or not hw->cfg.p/hw->addr.p are NULL. This means resources are then properly released on failure. This properly implements the original reverted commit (d721fe99f6ad) from the University of Minnesota, whilst also implementing the ioremap check for the hw->ci->cfg_mode if block as well. Cc: David S. Miller Signed-off-by: Phillip Potter Link: https://lore.kernel.org/r/20210503115736.2104747-42-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/hardware/mISDN/mISDNinfineon.c | 24 ++++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c index fa9c491f9c38..88d592bafdb0 100644 --- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c +++ b/drivers/isdn/hardware/mISDN/mISDNinfineon.c @@ -630,17 +630,19 @@ static void release_io(struct inf_hw *hw) { if (hw->cfg.mode) { - if (hw->cfg.p) { + if (hw->cfg.mode == AM_MEMIO) { release_mem_region(hw->cfg.start, hw->cfg.size); - iounmap(hw->cfg.p); + if (hw->cfg.p) + iounmap(hw->cfg.p); } else release_region(hw->cfg.start, hw->cfg.size); hw->cfg.mode = AM_NONE; } if (hw->addr.mode) { - if (hw->addr.p) { + if (hw->addr.mode == AM_MEMIO) { release_mem_region(hw->addr.start, hw->addr.size); - iounmap(hw->addr.p); + if (hw->addr.p) + iounmap(hw->addr.p); } else release_region(hw->addr.start, hw->addr.size); hw->addr.mode = AM_NONE; @@ -670,9 +672,12 @@ setup_io(struct inf_hw *hw) (ulong)hw->cfg.start, (ulong)hw->cfg.size); return err; } - if (hw->ci->cfg_mode == AM_MEMIO) - hw->cfg.p = ioremap(hw->cfg.start, hw->cfg.size); hw->cfg.mode = hw->ci->cfg_mode; + if (hw->ci->cfg_mode == AM_MEMIO) { + hw->cfg.p = ioremap(hw->cfg.start, hw->cfg.size); + if (!hw->cfg.p) + return -ENOMEM; + } if (debug & DEBUG_HW) pr_notice("%s: IO cfg %lx (%lu bytes) mode%d\n", hw->name, (ulong)hw->cfg.start, @@ -697,9 +702,12 @@ setup_io(struct inf_hw *hw) (ulong)hw->addr.start, (ulong)hw->addr.size); return err; } - if (hw->ci->addr_mode == AM_MEMIO) - hw->addr.p = ioremap(hw->addr.start, hw->addr.size); hw->addr.mode = hw->ci->addr_mode; + if (hw->ci->addr_mode == AM_MEMIO) { + hw->addr.p = ioremap(hw->addr.start, hw->addr.size); + if (!hw->addr.p) + return -ENOMEM; + } if (debug & DEBUG_HW) pr_notice("%s: IO addr %lx (%lu bytes) mode%d\n", hw->name, (ulong)hw->addr.start, From efba106f89fc6848726716c101f4c84e88720a9c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:09 +0200 Subject: [PATCH 196/730] Revert "ath6kl: return error code in ath6kl_wmi_set_roam_lrssi_cmd()" This reverts commit fc6a6521556c8250e356ddc6a3f2391aa62dc976. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The change being reverted does NOTHING as the caller to this function does not even look at the return value of the call. So the "claim" that this fixed an an issue is not true. It will be fixed up properly in a future patch by propagating the error up the stack correctly. Cc: Kangjie Lu Cc: Kalle Valo Link: https://lore.kernel.org/r/20210503115736.2104747-43-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath6kl/wmi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c index b137e7f34397..aca9732ec1ee 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.c +++ b/drivers/net/wireless/ath/ath6kl/wmi.c @@ -776,8 +776,10 @@ int ath6kl_wmi_set_roam_lrssi_cmd(struct wmi *wmi, u8 lrssi) cmd->info.params.roam_rssi_floor = DEF_LRSSI_ROAM_FLOOR; cmd->roam_ctrl = WMI_SET_LRSSI_SCAN_PARAMS; - return ath6kl_wmi_cmd_send(wmi, 0, skb, WMI_SET_ROAM_CTRL_CMDID, + ath6kl_wmi_cmd_send(wmi, 0, skb, WMI_SET_ROAM_CTRL_CMDID, NO_SYNC_WMIFLAG); + + return 0; } int ath6kl_wmi_force_roam_cmd(struct wmi *wmi, const u8 *bssid) From 54433367840b46a1555c8ed36c4c0cfc5dbf1358 Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Mon, 3 May 2021 13:57:10 +0200 Subject: [PATCH 197/730] ath6kl: return error code in ath6kl_wmi_set_roam_lrssi_cmd() Propagate error code from failure of ath6kl_wmi_cmd_send() to the caller. Signed-off-by: Anirudh Rayabharam Cc: Kalle Valo Link: https://lore.kernel.org/r/20210503115736.2104747-44-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath6kl/debug.c | 5 ++++- drivers/net/wireless/ath/ath6kl/wmi.c | 4 +--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/debug.c b/drivers/net/wireless/ath/ath6kl/debug.c index 7506cea46f58..433a047f3747 100644 --- a/drivers/net/wireless/ath/ath6kl/debug.c +++ b/drivers/net/wireless/ath/ath6kl/debug.c @@ -1027,14 +1027,17 @@ static ssize_t ath6kl_lrssi_roam_write(struct file *file, { struct ath6kl *ar = file->private_data; unsigned long lrssi_roam_threshold; + int ret; if (kstrtoul_from_user(user_buf, count, 0, &lrssi_roam_threshold)) return -EINVAL; ar->lrssi_roam_threshold = lrssi_roam_threshold; - ath6kl_wmi_set_roam_lrssi_cmd(ar->wmi, ar->lrssi_roam_threshold); + ret = ath6kl_wmi_set_roam_lrssi_cmd(ar->wmi, ar->lrssi_roam_threshold); + if (ret) + return ret; return count; } diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c index aca9732ec1ee..b137e7f34397 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.c +++ b/drivers/net/wireless/ath/ath6kl/wmi.c @@ -776,10 +776,8 @@ int ath6kl_wmi_set_roam_lrssi_cmd(struct wmi *wmi, u8 lrssi) cmd->info.params.roam_rssi_floor = DEF_LRSSI_ROAM_FLOOR; cmd->roam_ctrl = WMI_SET_LRSSI_SCAN_PARAMS; - ath6kl_wmi_cmd_send(wmi, 0, skb, WMI_SET_ROAM_CTRL_CMDID, + return ath6kl_wmi_cmd_send(wmi, 0, skb, WMI_SET_ROAM_CTRL_CMDID, NO_SYNC_WMIFLAG); - - return 0; } int ath6kl_wmi_force_roam_cmd(struct wmi *wmi, const u8 *bssid) From 5e68b86c7b7c059c0f0ec4bf8adabe63f84a61eb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:11 +0200 Subject: [PATCH 198/730] Revert "rapidio: fix a NULL pointer dereference when create_workqueue() fails" This reverts commit 23015b22e47c5409620b1726a677d69e5cd032ba. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original commit has a memory leak on the error path here, it does not clean up everything properly. Cc: Kangjie Lu Cc: Alexandre Bounine Cc: Matt Porter Cc: Andrew Morton Cc: Linus Torvalds Fixes: 23015b22e47c ("rapidio: fix a NULL pointer dereference when create_workqueue() fails") Cc: stable Link: https://lore.kernel.org/r/20210503115736.2104747-45-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/rapidio/rio_cm.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index 50ec53d67a4c..e6c16f04f2b4 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -2138,14 +2138,6 @@ static int riocm_add_mport(struct device *dev, mutex_init(&cm->rx_lock); riocm_rx_fill(cm, RIOCM_RX_RING_SIZE); cm->rx_wq = create_workqueue(DRV_NAME "/rxq"); - if (!cm->rx_wq) { - riocm_error("failed to allocate IBMBOX_%d on %s", - cmbox, mport->name); - rio_release_outb_mbox(mport, cmbox); - kfree(cm); - return -ENOMEM; - } - INIT_WORK(&cm->rx_work, rio_ibmsg_handler); cm->tx_slot = 0; From 69ce3ae36dcb03cdf416b0862a45369ddbf50fdf Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Mon, 3 May 2021 13:57:12 +0200 Subject: [PATCH 199/730] rapidio: handle create_workqueue() failure In case create_workqueue() fails, release all resources and return -ENOMEM to caller to avoid potential NULL pointer deref later. Move up the create_workequeue() call to return early and avoid unwinding the call to riocm_rx_fill(). Cc: Alexandre Bounine Cc: Matt Porter Cc: Andrew Morton Cc: Linus Torvalds Cc: stable Signed-off-by: Anirudh Rayabharam Link: https://lore.kernel.org/r/20210503115736.2104747-46-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/rapidio/rio_cm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index e6c16f04f2b4..db4c265287ae 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -2127,6 +2127,14 @@ static int riocm_add_mport(struct device *dev, return -ENODEV; } + cm->rx_wq = create_workqueue(DRV_NAME "/rxq"); + if (!cm->rx_wq) { + rio_release_inb_mbox(mport, cmbox); + rio_release_outb_mbox(mport, cmbox); + kfree(cm); + return -ENOMEM; + } + /* * Allocate and register inbound messaging buffers to be ready * to receive channel and system management requests @@ -2137,7 +2145,6 @@ static int riocm_add_mport(struct device *dev, cm->rx_slots = RIOCM_RX_RING_SIZE; mutex_init(&cm->rx_lock); riocm_rx_fill(cm, RIOCM_RX_RING_SIZE); - cm->rx_wq = create_workqueue(DRV_NAME "/rxq"); INIT_WORK(&cm->rx_work, rio_ibmsg_handler); cm->tx_slot = 0; From 36a2c87f7ed9e305d05b9a5c044cc6c494771504 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:13 +0200 Subject: [PATCH 200/730] Revert "isdn: mISDN: Fix potential NULL pointer dereference of kzalloc" This reverts commit 38d22659803a033b1b66cd2624c33570c0dde77d. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. While it looks like the original change is correct, it is not, as none of the setup actually happens, and the error value is not propagated upwards. Cc: Aditya Pakki Cc: David S. Miller Link: https://lore.kernel.org/r/20210503115736.2104747-47-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/hardware/mISDN/hfcsusb.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index 70061991915a..4bb470d3963d 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -249,9 +249,6 @@ hfcsusb_ph_info(struct hfcsusb *hw) int i; phi = kzalloc(struct_size(phi, bch, dch->dev.nrbchan), GFP_ATOMIC); - if (!phi) - return; - phi->dch.ch.protocol = hw->protocol; phi->dch.ch.Flags = dch->Flags; phi->dch.state = dch->state; From 5265db2ccc735e2783b790d6c19fb5cee8c025ed Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Mon, 3 May 2021 13:57:14 +0200 Subject: [PATCH 201/730] isdn: mISDN: correctly handle ph_info allocation failure in hfcsusb_ph_info Modify return type of hfcusb_ph_info to int, so that we can pass error value up the call stack when allocation of ph_info fails. Also change three of four call sites to actually account for the memory failure. The fourth, in ph_state_nt, is infeasible to change as it is in turn called by ph_state which is used as a function pointer argument to mISDN_initdchannel, which would necessitate changing its signature and updating all the places where it is used (too many). Fixes original flawed commit (38d22659803a) from the University of Minnesota. Cc: David S. Miller Signed-off-by: Phillip Potter Link: https://lore.kernel.org/r/20210503115736.2104747-48-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/hardware/mISDN/hfcsusb.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index 4bb470d3963d..cd5642cef01f 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -46,7 +46,7 @@ static void hfcsusb_start_endpoint(struct hfcsusb *hw, int channel); static void hfcsusb_stop_endpoint(struct hfcsusb *hw, int channel); static int hfcsusb_setup_bch(struct bchannel *bch, int protocol); static void deactivate_bchannel(struct bchannel *bch); -static void hfcsusb_ph_info(struct hfcsusb *hw); +static int hfcsusb_ph_info(struct hfcsusb *hw); /* start next background transfer for control channel */ static void @@ -241,7 +241,7 @@ hfcusb_l2l1B(struct mISDNchannel *ch, struct sk_buff *skb) * send full D/B channel status information * as MPH_INFORMATION_IND */ -static void +static int hfcsusb_ph_info(struct hfcsusb *hw) { struct ph_info *phi; @@ -249,6 +249,9 @@ hfcsusb_ph_info(struct hfcsusb *hw) int i; phi = kzalloc(struct_size(phi, bch, dch->dev.nrbchan), GFP_ATOMIC); + if (!phi) + return -ENOMEM; + phi->dch.ch.protocol = hw->protocol; phi->dch.ch.Flags = dch->Flags; phi->dch.state = dch->state; @@ -260,6 +263,8 @@ hfcsusb_ph_info(struct hfcsusb *hw) _queue_data(&dch->dev.D, MPH_INFORMATION_IND, MISDN_ID_ANY, struct_size(phi, bch, dch->dev.nrbchan), phi, GFP_ATOMIC); kfree(phi); + + return 0; } /* @@ -344,8 +349,7 @@ hfcusb_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb) ret = l1_event(dch->l1, hh->prim); break; case MPH_INFORMATION_REQ: - hfcsusb_ph_info(hw); - ret = 0; + ret = hfcsusb_ph_info(hw); break; } @@ -400,8 +404,7 @@ hfc_l1callback(struct dchannel *dch, u_int cmd) hw->name, __func__, cmd); return -1; } - hfcsusb_ph_info(hw); - return 0; + return hfcsusb_ph_info(hw); } static int @@ -743,8 +746,7 @@ hfcsusb_setup_bch(struct bchannel *bch, int protocol) handle_led(hw, (bch->nr == 1) ? LED_B1_OFF : LED_B2_OFF); } - hfcsusb_ph_info(hw); - return 0; + return hfcsusb_ph_info(hw); } static void From e1436df2f2550bc89d832ffd456373fdf5d5b5d7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:15 +0200 Subject: [PATCH 202/730] Revert "ecryptfs: replace BUG_ON with error handling code" This reverts commit 2c2a7552dd6465e8fde6bc9cccf8d66ed1c1eb72. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original commit log for this change was incorrect, no "error handling code" was added, things will blow up just as badly as before if any of these cases ever were true. As this BUG_ON() never fired, and most of these checks are "obviously" never going to be true, let's just revert to the original code for now until this gets unwound to be done correctly in the future. Cc: Aditya Pakki Fixes: 2c2a7552dd64 ("ecryptfs: replace BUG_ON with error handling code") Cc: stable Acked-by: Tyler Hicks Link: https://lore.kernel.org/r/20210503115736.2104747-49-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/crypto.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 345f8061e3b4..b1aa993784f7 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -296,10 +296,8 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, struct extent_crypt_result ecr; int rc = 0; - if (!crypt_stat || !crypt_stat->tfm - || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)) - return -EINVAL; - + BUG_ON(!crypt_stat || !crypt_stat->tfm + || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", crypt_stat->key_size); From c6052f09c14bf0ecdd582662e022eb716f9b8022 Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Mon, 3 May 2021 13:57:16 +0200 Subject: [PATCH 203/730] fs: ecryptfs: remove BUG_ON from crypt_scatterlist crypt_stat memory itself is allocated when inode is created, in ecryptfs_alloc_inode, which returns NULL on failure and is handled by callers, which would prevent us getting to this point. It then calls ecryptfs_init_crypt_stat which allocates crypt_stat->tfm checking for and likewise handling allocation failure. Finally, crypt_stat->flags has ECRYPTFS_STRUCT_INITIALIZED merged into it in ecryptfs_init_crypt_stat as well. Simply put, the conditions that the BUG_ON checks for will never be triggered, as to even get to this function, the relevant conditions will have already been fulfilled (or the inode allocation would fail in the first place and thus no call to this function or those above it). Cc: Tyler Hicks Signed-off-by: Phillip Potter Link: https://lore.kernel.org/r/20210503115736.2104747-50-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/crypto.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index b1aa993784f7..e3f5d7f3c8a0 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -296,8 +296,6 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, struct extent_crypt_result ecr; int rc = 0; - BUG_ON(!crypt_stat || !crypt_stat->tfm - || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", crypt_stat->key_size); From 43ed0fcf613a87dd0221ec72d1ade4d6544f2ffc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:17 +0200 Subject: [PATCH 204/730] Revert "dmaengine: qcom_hidma: Check for driver register failure" This reverts commit a474b3f0428d6b02a538aa10b3c3b722751cb382. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original change is NOT correct, as it does not correctly unwind from the resources that was allocated before the call to platform_driver_register(). Cc: Aditya Pakki Acked-By: Vinod Koul Acked-By: Sinan Kaya Link: https://lore.kernel.org/r/20210503115736.2104747-51-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/dma/qcom/hidma_mgmt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c index 806ca02c52d7..fe87b01f7a4e 100644 --- a/drivers/dma/qcom/hidma_mgmt.c +++ b/drivers/dma/qcom/hidma_mgmt.c @@ -418,8 +418,9 @@ static int __init hidma_mgmt_init(void) hidma_mgmt_of_populate_channels(child); } #endif - return platform_driver_register(&hidma_mgmt_driver); + platform_driver_register(&hidma_mgmt_driver); + return 0; } module_init(hidma_mgmt_init); MODULE_LICENSE("GPL v2"); From 4df2a8b0ad634d98a67e540a4e18a60f943e7d9f Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Mon, 3 May 2021 13:57:18 +0200 Subject: [PATCH 205/730] dmaengine: qcom_hidma: comment platform_driver_register call Place a comment in hidma_mgmt_init explaining why success must currently be assumed, due to the cleanup issue that would need to be considered were this module ever to be unloadable or were this platform_driver_register call ever to fail. Acked-By: Vinod Koul Acked-By: Sinan Kaya Signed-off-by: Phillip Potter Link: https://lore.kernel.org/r/20210503115736.2104747-52-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/dma/qcom/hidma_mgmt.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c index fe87b01f7a4e..62026607f3f8 100644 --- a/drivers/dma/qcom/hidma_mgmt.c +++ b/drivers/dma/qcom/hidma_mgmt.c @@ -418,6 +418,20 @@ static int __init hidma_mgmt_init(void) hidma_mgmt_of_populate_channels(child); } #endif + /* + * We do not check for return value here, as it is assumed that + * platform_driver_register must not fail. The reason for this is that + * the (potential) hidma_mgmt_of_populate_channels calls above are not + * cleaned up if it does fail, and to do this work is quite + * complicated. In particular, various calls of of_address_to_resource, + * of_irq_to_resource, platform_device_register_full, of_dma_configure, + * and of_msi_configure which then call other functions and so on, must + * be cleaned up - this is not a trivial exercise. + * + * Currently, this module is not intended to be unloaded, and there is + * no module_exit function defined which does the needed cleanup. For + * this reason, we have to assume success here. + */ platform_driver_register(&hidma_mgmt_driver); return 0; From 46651077765c80a0d6f87f3469129a72e49ce91b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:19 +0200 Subject: [PATCH 206/730] Revert "libertas: add checks for the return value of sysfs_create_group" This reverts commit 434256833d8eb988cb7f3b8a41699e2fe48d9332. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original commit was incorrect, the error needs to be propagated back to the caller AND if the second group call fails, the first needs to be removed. There are much better ways to solve this, the driver should NOT be calling sysfs_create_group() on its own as it is racing userspace and loosing. Cc: Kangjie Lu Cc: Kalle Valo Link: https://lore.kernel.org/r/20210503115736.2104747-53-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/libertas/mesh.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/mesh.c b/drivers/net/wireless/marvell/libertas/mesh.c index f5b78257d551..c611e6668b21 100644 --- a/drivers/net/wireless/marvell/libertas/mesh.c +++ b/drivers/net/wireless/marvell/libertas/mesh.c @@ -805,12 +805,7 @@ static void lbs_persist_config_init(struct net_device *dev) { int ret; ret = sysfs_create_group(&(dev->dev.kobj), &boot_opts_group); - if (ret) - pr_err("failed to create boot_opts_group.\n"); - ret = sysfs_create_group(&(dev->dev.kobj), &mesh_ie_group); - if (ret) - pr_err("failed to create mesh_ie_group.\n"); } static void lbs_persist_config_remove(struct net_device *dev) From 7e79b38fe9a403b065ac5915465f620a8fb3de84 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:20 +0200 Subject: [PATCH 207/730] libertas: register sysfs groups properly The libertas driver was trying to register sysfs groups "by hand" which causes them to be created _after_ the device is initialized and announced to userspace, which causes races and can prevent userspace tools from seeing the sysfs files correctly. Fix this up by using the built-in sysfs_groups pointers in struct net_device which were created for this very reason, fixing the race condition, and properly allowing for any error that might have occured to be handled properly. Cc: Kalle Valo Link: https://lore.kernel.org/r/20210503115736.2104747-54-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/libertas/mesh.c | 28 +++----------------- 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/mesh.c b/drivers/net/wireless/marvell/libertas/mesh.c index c611e6668b21..c68814841583 100644 --- a/drivers/net/wireless/marvell/libertas/mesh.c +++ b/drivers/net/wireless/marvell/libertas/mesh.c @@ -801,19 +801,6 @@ static const struct attribute_group mesh_ie_group = { .attrs = mesh_ie_attrs, }; -static void lbs_persist_config_init(struct net_device *dev) -{ - int ret; - ret = sysfs_create_group(&(dev->dev.kobj), &boot_opts_group); - ret = sysfs_create_group(&(dev->dev.kobj), &mesh_ie_group); -} - -static void lbs_persist_config_remove(struct net_device *dev) -{ - sysfs_remove_group(&(dev->dev.kobj), &boot_opts_group); - sysfs_remove_group(&(dev->dev.kobj), &mesh_ie_group); -} - /*************************************************************************** * Initializing and starting, stopping mesh @@ -1009,6 +996,10 @@ static int lbs_add_mesh(struct lbs_private *priv) SET_NETDEV_DEV(priv->mesh_dev, priv->dev->dev.parent); mesh_dev->flags |= IFF_BROADCAST | IFF_MULTICAST; + mesh_dev->sysfs_groups[0] = &lbs_mesh_attr_group; + mesh_dev->sysfs_groups[1] = &boot_opts_group; + mesh_dev->sysfs_groups[2] = &mesh_ie_group; + /* Register virtual mesh interface */ ret = register_netdev(mesh_dev); if (ret) { @@ -1016,19 +1007,10 @@ static int lbs_add_mesh(struct lbs_private *priv) goto err_free_netdev; } - ret = sysfs_create_group(&(mesh_dev->dev.kobj), &lbs_mesh_attr_group); - if (ret) - goto err_unregister; - - lbs_persist_config_init(mesh_dev); - /* Everything successful */ ret = 0; goto done; -err_unregister: - unregister_netdev(mesh_dev); - err_free_netdev: free_netdev(mesh_dev); @@ -1049,8 +1031,6 @@ void lbs_remove_mesh(struct lbs_private *priv) netif_stop_queue(mesh_dev); netif_carrier_off(mesh_dev); - sysfs_remove_group(&(mesh_dev->dev.kobj), &lbs_mesh_attr_group); - lbs_persist_config_remove(mesh_dev); unregister_netdev(mesh_dev); priv->mesh_dev = NULL; kfree(mesh_dev->ieee80211_ptr); From 1e0ce84215dbfd6065872e5d3755352da34f198b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:21 +0200 Subject: [PATCH 208/730] Revert "ASoC: rt5645: fix a NULL pointer dereference" This reverts commit 51dd97d1df5fb9ac58b9b358e63e67b530f6ae21. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. Lots of things seem to be still allocated here and must be properly cleaned up if an error happens here. Cc: Kangjie Lu Cc: Mark Brown Link: https://lore.kernel.org/r/20210503115736.2104747-55-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt5645.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 9408ee63cb26..7cb90975009a 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3431,9 +3431,6 @@ static int rt5645_probe(struct snd_soc_component *component) RT5645_HWEQ_NUM, sizeof(struct rt5645_eq_param_s), GFP_KERNEL); - if (!rt5645->eq_param) - return -ENOMEM; - return 0; } From 5e70b8e22b64eed13d5bbebcb5911dae65bf8c6b Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Mon, 3 May 2021 13:57:22 +0200 Subject: [PATCH 209/730] ASoC: rt5645: add error checking to rt5645_probe function Check for return value from various snd_soc_dapm_* calls, as many of them can return errors and this should be handled. Also, reintroduce the allocation failure check for rt5645->eq_param as well. Make all areas where return values are checked lead to the end of the function in the case of an error. Finally, introduce a comment explaining how resources here are actually eventually cleaned up by the caller. Cc: Mark Brown Signed-off-by: Phillip Potter Link: https://lore.kernel.org/r/20210503115736.2104747-56-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt5645.c | 48 +++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 7cb90975009a..438fa18bcb55 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3388,30 +3388,44 @@ static int rt5645_probe(struct snd_soc_component *component) { struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component); struct rt5645_priv *rt5645 = snd_soc_component_get_drvdata(component); + int ret = 0; rt5645->component = component; switch (rt5645->codec_type) { case CODEC_TYPE_RT5645: - snd_soc_dapm_new_controls(dapm, + ret = snd_soc_dapm_new_controls(dapm, rt5645_specific_dapm_widgets, ARRAY_SIZE(rt5645_specific_dapm_widgets)); - snd_soc_dapm_add_routes(dapm, + if (ret < 0) + goto exit; + + ret = snd_soc_dapm_add_routes(dapm, rt5645_specific_dapm_routes, ARRAY_SIZE(rt5645_specific_dapm_routes)); + if (ret < 0) + goto exit; + if (rt5645->v_id < 3) { - snd_soc_dapm_add_routes(dapm, + ret = snd_soc_dapm_add_routes(dapm, rt5645_old_dapm_routes, ARRAY_SIZE(rt5645_old_dapm_routes)); + if (ret < 0) + goto exit; } break; case CODEC_TYPE_RT5650: - snd_soc_dapm_new_controls(dapm, + ret = snd_soc_dapm_new_controls(dapm, rt5650_specific_dapm_widgets, ARRAY_SIZE(rt5650_specific_dapm_widgets)); - snd_soc_dapm_add_routes(dapm, + if (ret < 0) + goto exit; + + ret = snd_soc_dapm_add_routes(dapm, rt5650_specific_dapm_routes, ARRAY_SIZE(rt5650_specific_dapm_routes)); + if (ret < 0) + goto exit; break; } @@ -3419,9 +3433,17 @@ static int rt5645_probe(struct snd_soc_component *component) /* for JD function */ if (rt5645->pdata.jd_mode) { - snd_soc_dapm_force_enable_pin(dapm, "JD Power"); - snd_soc_dapm_force_enable_pin(dapm, "LDO2"); - snd_soc_dapm_sync(dapm); + ret = snd_soc_dapm_force_enable_pin(dapm, "JD Power"); + if (ret < 0) + goto exit; + + ret = snd_soc_dapm_force_enable_pin(dapm, "LDO2"); + if (ret < 0) + goto exit; + + ret = snd_soc_dapm_sync(dapm); + if (ret < 0) + goto exit; } if (rt5645->pdata.long_name) @@ -3431,7 +3453,15 @@ static int rt5645_probe(struct snd_soc_component *component) RT5645_HWEQ_NUM, sizeof(struct rt5645_eq_param_s), GFP_KERNEL); - return 0; + if (!rt5645->eq_param) + ret = -ENOMEM; +exit: + /* + * If there was an error above, everything will be cleaned up by the + * caller if we return an error here. This will be done with a later + * call to rt5645_remove(). + */ + return ret; } static void rt5645_remove(struct snd_soc_component *component) From fdda0dd2686ecd1f2e616c9e0366ea71b40c485d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:23 +0200 Subject: [PATCH 210/730] Revert "ASoC: cs43130: fix a NULL pointer dereference" This reverts commit a2be42f18d409213bb7e7a736e3ef6ba005115bb. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original patch here is not correct, sysfs files that were created are not unwound. Cc: Kangjie Lu Cc: Mark Brown Link: https://lore.kernel.org/r/20210503115736.2104747-57-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/cs43130.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/soc/codecs/cs43130.c b/sound/soc/codecs/cs43130.c index 80bc7c10ed75..c2b6f0ae6d57 100644 --- a/sound/soc/codecs/cs43130.c +++ b/sound/soc/codecs/cs43130.c @@ -2319,8 +2319,6 @@ static int cs43130_probe(struct snd_soc_component *component) return ret; cs43130->wq = create_singlethread_workqueue("cs43130_hp"); - if (!cs43130->wq) - return -ENOMEM; INIT_WORK(&cs43130->work, cs43130_imp_meas); } From 2da441a6491d93eff8ffff523837fd621dc80389 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:24 +0200 Subject: [PATCH 211/730] ASoC: cs43130: handle errors in cs43130_probe() properly cs43130_probe() does not do any valid error checking of things it initializes, OR what it does, it does not unwind properly if there are errors. Fix this up by moving the sysfs files to an attribute group so the driver core will correctly add/remove them all at once and handle errors with them, and correctly check for creating a new workqueue and unwinding if that fails. Cc: Mark Brown Link: https://lore.kernel.org/r/20210503115736.2104747-58-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/cs43130.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/sound/soc/codecs/cs43130.c b/sound/soc/codecs/cs43130.c index c2b6f0ae6d57..80cd3ea0c157 100644 --- a/sound/soc/codecs/cs43130.c +++ b/sound/soc/codecs/cs43130.c @@ -1735,6 +1735,14 @@ static DEVICE_ATTR(hpload_dc_r, 0444, cs43130_show_dc_r, NULL); static DEVICE_ATTR(hpload_ac_l, 0444, cs43130_show_ac_l, NULL); static DEVICE_ATTR(hpload_ac_r, 0444, cs43130_show_ac_r, NULL); +static struct attribute *hpload_attrs[] = { + &dev_attr_hpload_dc_l.attr, + &dev_attr_hpload_dc_r.attr, + &dev_attr_hpload_ac_l.attr, + &dev_attr_hpload_ac_r.attr, +}; +ATTRIBUTE_GROUPS(hpload); + static struct reg_sequence hp_en_cal_seq[] = { {CS43130_INT_MASK_4, CS43130_INT_MASK_ALL}, {CS43130_HP_MEAS_LOAD_1, 0}, @@ -2302,23 +2310,15 @@ static int cs43130_probe(struct snd_soc_component *component) cs43130->hpload_done = false; if (cs43130->dc_meas) { - ret = device_create_file(component->dev, &dev_attr_hpload_dc_l); - if (ret < 0) - return ret; - - ret = device_create_file(component->dev, &dev_attr_hpload_dc_r); - if (ret < 0) - return ret; - - ret = device_create_file(component->dev, &dev_attr_hpload_ac_l); - if (ret < 0) - return ret; - - ret = device_create_file(component->dev, &dev_attr_hpload_ac_r); - if (ret < 0) + ret = sysfs_create_groups(&component->dev->kobj, hpload_groups); + if (ret) return ret; cs43130->wq = create_singlethread_workqueue("cs43130_hp"); + if (!cs43130->wq) { + sysfs_remove_groups(&component->dev->kobj, hpload_groups); + return -ENOMEM; + } INIT_WORK(&cs43130->work, cs43130_imp_meas); } From 47e4ff06fa7f5ba4860543a2913bbd0c164640aa Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:25 +0200 Subject: [PATCH 212/730] Revert "media: dvb: Add check on sp8870_readreg" This reverts commit 467a37fba93f2b4fe3ab597ff6a517b22b566882. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. This commit is not properly checking for an error at all, so if a read succeeds from this device, it will error out. Cc: Aditya Pakki Cc: Sean Young Cc: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20210503115736.2104747-59-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/sp8870.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/media/dvb-frontends/sp8870.c b/drivers/media/dvb-frontends/sp8870.c index 655db8272268..ee893a2f2261 100644 --- a/drivers/media/dvb-frontends/sp8870.c +++ b/drivers/media/dvb-frontends/sp8870.c @@ -280,9 +280,7 @@ static int sp8870_set_frontend_parameters(struct dvb_frontend *fe) sp8870_writereg(state, 0xc05, reg0xc05); // read status reg in order to clear pending irqs - err = sp8870_readreg(state, 0x200); - if (err) - return err; + sp8870_readreg(state, 0x200); // system controller start sp8870_microcontroller_start(state); From c6d822c56e7fd29e6fa1b1bb91b98f6a1e942b3c Mon Sep 17 00:00:00 2001 From: Alaa Emad Date: Mon, 3 May 2021 13:57:26 +0200 Subject: [PATCH 213/730] media: dvb: Add check on sp8870_readreg return The function sp8870_readreg returns a negative value when i2c_transfer fails so properly check for this and return the error if it happens. Cc: Sean Young Cc: Mauro Carvalho Chehab Signed-off-by: Alaa Emad Link: https://lore.kernel.org/r/20210503115736.2104747-60-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/sp8870.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/sp8870.c b/drivers/media/dvb-frontends/sp8870.c index ee893a2f2261..9767159aeb9b 100644 --- a/drivers/media/dvb-frontends/sp8870.c +++ b/drivers/media/dvb-frontends/sp8870.c @@ -280,7 +280,9 @@ static int sp8870_set_frontend_parameters(struct dvb_frontend *fe) sp8870_writereg(state, 0xc05, reg0xc05); // read status reg in order to clear pending irqs - sp8870_readreg(state, 0x200); + err = sp8870_readreg(state, 0x200); + if (err < 0) + return err; // system controller start sp8870_microcontroller_start(state); From d8c3be2fb2079d0cb4cd29d6aba58dbe54771e42 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:27 +0200 Subject: [PATCH 214/730] Revert "media: gspca: mt9m111: Check write_bridge for timeout" This reverts commit 656025850074f5c1ba2e05be37bda57ba2b8d491. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. Different error values should never be "OR" together and expect anything sane to come out of the result. Cc: Aditya Pakki Cc: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20210503115736.2104747-61-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/gspca/m5602/m5602_mt9m111.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c index bfa3b381d8a2..50481dc928d0 100644 --- a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c +++ b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c @@ -195,7 +195,7 @@ static const struct v4l2_ctrl_config mt9m111_greenbal_cfg = { int mt9m111_probe(struct sd *sd) { u8 data[2] = {0x00, 0x00}; - int i, rc = 0; + int i; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; if (force_sensor) { @@ -213,18 +213,16 @@ int mt9m111_probe(struct sd *sd) /* Do the preinit */ for (i = 0; i < ARRAY_SIZE(preinit_mt9m111); i++) { if (preinit_mt9m111[i][0] == BRIDGE) { - rc |= m5602_write_bridge(sd, + m5602_write_bridge(sd, preinit_mt9m111[i][1], preinit_mt9m111[i][2]); } else { data[0] = preinit_mt9m111[i][2]; data[1] = preinit_mt9m111[i][3]; - rc |= m5602_write_sensor(sd, + m5602_write_sensor(sd, preinit_mt9m111[i][1], data, 2); } } - if (rc < 0) - return rc; if (m5602_read_sensor(sd, MT9M111_SC_CHIPVER, data, 2)) return -ENODEV; From e932f5b458eee63d013578ea128b9ff8ef5f5496 Mon Sep 17 00:00:00 2001 From: Alaa Emad Date: Mon, 3 May 2021 13:57:28 +0200 Subject: [PATCH 215/730] media: gspca: mt9m111: Check write_bridge for timeout If m5602_write_bridge times out, it will return a negative error value. So properly check for this and handle the error correctly instead of just ignoring it. Cc: Mauro Carvalho Chehab Signed-off-by: Alaa Emad Link: https://lore.kernel.org/r/20210503115736.2104747-62-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/gspca/m5602/m5602_mt9m111.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c index 50481dc928d0..bf1af6ed9131 100644 --- a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c +++ b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c @@ -195,7 +195,7 @@ static const struct v4l2_ctrl_config mt9m111_greenbal_cfg = { int mt9m111_probe(struct sd *sd) { u8 data[2] = {0x00, 0x00}; - int i; + int i, err; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; if (force_sensor) { @@ -213,15 +213,17 @@ int mt9m111_probe(struct sd *sd) /* Do the preinit */ for (i = 0; i < ARRAY_SIZE(preinit_mt9m111); i++) { if (preinit_mt9m111[i][0] == BRIDGE) { - m5602_write_bridge(sd, - preinit_mt9m111[i][1], - preinit_mt9m111[i][2]); + err = m5602_write_bridge(sd, + preinit_mt9m111[i][1], + preinit_mt9m111[i][2]); } else { data[0] = preinit_mt9m111[i][2]; data[1] = preinit_mt9m111[i][3]; - m5602_write_sensor(sd, - preinit_mt9m111[i][1], data, 2); + err = m5602_write_sensor(sd, + preinit_mt9m111[i][1], data, 2); } + if (err < 0) + return err; } if (m5602_read_sensor(sd, MT9M111_SC_CHIPVER, data, 2)) From 8e23e83c752b54e98102627a1cc09281ad71a299 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:29 +0200 Subject: [PATCH 216/730] Revert "media: gspca: Check the return value of write_bridge for timeout" This reverts commit a21a0eb56b4e8fe4a330243af8030f890cde2283. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. Different error values should never be "OR" together and expect anything sane to come out of the result. Cc: Aditya Pakki Cc: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20210503115736.2104747-63-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/gspca/m5602/m5602_po1030.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/media/usb/gspca/m5602/m5602_po1030.c b/drivers/media/usb/gspca/m5602/m5602_po1030.c index d680b777f097..7bdbb8065146 100644 --- a/drivers/media/usb/gspca/m5602/m5602_po1030.c +++ b/drivers/media/usb/gspca/m5602/m5602_po1030.c @@ -154,7 +154,6 @@ static const struct v4l2_ctrl_config po1030_greenbal_cfg = { int po1030_probe(struct sd *sd) { - int rc = 0; u8 dev_id_h = 0, i; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; @@ -174,14 +173,11 @@ int po1030_probe(struct sd *sd) for (i = 0; i < ARRAY_SIZE(preinit_po1030); i++) { u8 data = preinit_po1030[i][2]; if (preinit_po1030[i][0] == SENSOR) - rc |= m5602_write_sensor(sd, + m5602_write_sensor(sd, preinit_po1030[i][1], &data, 1); else - rc |= m5602_write_bridge(sd, preinit_po1030[i][1], - data); + m5602_write_bridge(sd, preinit_po1030[i][1], data); } - if (rc < 0) - return rc; if (m5602_read_sensor(sd, PO1030_DEVID_H, &dev_id_h, 1)) return -ENODEV; From dacb408ca6f0e34df22b40d8dd5fae7f8e777d84 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:30 +0200 Subject: [PATCH 217/730] media: gspca: properly check for errors in po1030_probe() If m5602_write_sensor() or m5602_write_bridge() fail, do not continue to initialize the device but return the error to the calling funtion. Cc: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20210503115736.2104747-64-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/gspca/m5602/m5602_po1030.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/gspca/m5602/m5602_po1030.c b/drivers/media/usb/gspca/m5602/m5602_po1030.c index 7bdbb8065146..8fd99ceee4b6 100644 --- a/drivers/media/usb/gspca/m5602/m5602_po1030.c +++ b/drivers/media/usb/gspca/m5602/m5602_po1030.c @@ -155,6 +155,7 @@ static const struct v4l2_ctrl_config po1030_greenbal_cfg = { int po1030_probe(struct sd *sd) { u8 dev_id_h = 0, i; + int err; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; if (force_sensor) { @@ -173,10 +174,13 @@ int po1030_probe(struct sd *sd) for (i = 0; i < ARRAY_SIZE(preinit_po1030); i++) { u8 data = preinit_po1030[i][2]; if (preinit_po1030[i][0] == SENSOR) - m5602_write_sensor(sd, - preinit_po1030[i][1], &data, 1); + err = m5602_write_sensor(sd, preinit_po1030[i][1], + &data, 1); else - m5602_write_bridge(sd, preinit_po1030[i][1], data); + err = m5602_write_bridge(sd, preinit_po1030[i][1], + data); + if (err < 0) + return err; } if (m5602_read_sensor(sd, PO1030_DEVID_H, &dev_id_h, 1)) From 4fd798a5a89114c1892574c50f2aebd49bc5b4f5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:31 +0200 Subject: [PATCH 218/730] Revert "net: liquidio: fix a NULL pointer dereference" This reverts commit fe543b2f174f34a7a751aa08b334fe6b105c4569. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. While the original commit does keep the immediate "NULL dereference" from happening, it does not properly propagate the error back to the callers, AND it does not fix this same identical issue in the drivers/net/ethernet/cavium/liquidio/lio_vf_main.c for some reason. Cc: Kangjie Lu Cc: David S. Miller Link: https://lore.kernel.org/r/20210503115736.2104747-65-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 7c5af4beedc6..6fa570068648 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1166,11 +1166,6 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop) sc = (struct octeon_soft_command *) octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, 16, 0); - if (!sc) { - netif_info(lio, rx_err, lio->netdev, - "Failed to allocate octeon_soft_command\n"); - return; - } ncmd = (union octnet_cmd *)sc->virtdptr; From dbc97bfd3918ed9268bfc174cae8a7d6b3d51aad Mon Sep 17 00:00:00 2001 From: Tom Seewald Date: Mon, 3 May 2021 13:57:32 +0200 Subject: [PATCH 219/730] net: liquidio: Add missing null pointer checks The functions send_rx_ctrl_cmd() in both liquidio/lio_main.c and liquidio/lio_vf_main.c do not check if the call to octeon_alloc_soft_command() fails and returns a null pointer. Both functions also return void so errors are not propagated back to the caller. Fix these issues by updating both instances of send_rx_ctrl_cmd() to return an integer rather than void, and have them return -ENOMEM if an allocation failure occurs. Also update all callers of send_rx_ctrl_cmd() so that they now check the return value. Cc: David S. Miller Signed-off-by: Tom Seewald Link: https://lore.kernel.org/r/20210503115736.2104747-66-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/cavium/liquidio/lio_main.c | 28 +++++++++++++------ .../ethernet/cavium/liquidio/lio_vf_main.c | 27 +++++++++++++----- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 6fa570068648..591229b96257 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1153,7 +1153,7 @@ static void octeon_destroy_resources(struct octeon_device *oct) * @lio: per-network private data * @start_stop: whether to start or stop */ -static void send_rx_ctrl_cmd(struct lio *lio, int start_stop) +static int send_rx_ctrl_cmd(struct lio *lio, int start_stop) { struct octeon_soft_command *sc; union octnet_cmd *ncmd; @@ -1161,11 +1161,16 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop) int retval; if (oct->props[lio->ifidx].rx_on == start_stop) - return; + return 0; sc = (struct octeon_soft_command *) octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, 16, 0); + if (!sc) { + netif_info(lio, rx_err, lio->netdev, + "Failed to allocate octeon_soft_command struct\n"); + return -ENOMEM; + } ncmd = (union octnet_cmd *)sc->virtdptr; @@ -1187,18 +1192,19 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop) if (retval == IQ_SEND_FAILED) { netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n"); octeon_free_soft_command(oct, sc); - return; } else { /* Sleep on a wait queue till the cond flag indicates that the * response arrived or timed-out. */ retval = wait_for_sc_completion_timeout(oct, sc, 0); if (retval) - return; + return retval; oct->props[lio->ifidx].rx_on = start_stop; WRITE_ONCE(sc->caller_is_done, true); } + + return retval; } /** @@ -1773,6 +1779,7 @@ static int liquidio_open(struct net_device *netdev) struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)oct->priv; struct napi_struct *napi, *n; + int ret = 0; if (oct->props[lio->ifidx].napi_enabled == 0) { tasklet_disable(&oct_priv->droq_tasklet); @@ -1808,7 +1815,9 @@ static int liquidio_open(struct net_device *netdev) netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n"); /* tell Octeon to start forwarding packets to host */ - send_rx_ctrl_cmd(lio, 1); + ret = send_rx_ctrl_cmd(lio, 1); + if (ret) + return ret; /* start periodical statistics fetch */ INIT_DELAYED_WORK(&lio->stats_wk.work, lio_fetch_stats); @@ -1819,7 +1828,7 @@ static int liquidio_open(struct net_device *netdev) dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name); - return 0; + return ret; } /** @@ -1833,6 +1842,7 @@ static int liquidio_stop(struct net_device *netdev) struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)oct->priv; struct napi_struct *napi, *n; + int ret = 0; ifstate_reset(lio, LIO_IFSTATE_RUNNING); @@ -1849,7 +1859,9 @@ static int liquidio_stop(struct net_device *netdev) lio->link_changes++; /* Tell Octeon that nic interface is down. */ - send_rx_ctrl_cmd(lio, 0); + ret = send_rx_ctrl_cmd(lio, 0); + if (ret) + return ret; if (OCTEON_CN23XX_PF(oct)) { if (!oct->msix_on) @@ -1884,7 +1896,7 @@ static int liquidio_stop(struct net_device *netdev) dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name); - return 0; + return ret; } /** diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 516f166ceff8..ffddb3126a32 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -595,7 +595,7 @@ static void octeon_destroy_resources(struct octeon_device *oct) * @lio: per-network private data * @start_stop: whether to start or stop */ -static void send_rx_ctrl_cmd(struct lio *lio, int start_stop) +static int send_rx_ctrl_cmd(struct lio *lio, int start_stop) { struct octeon_device *oct = (struct octeon_device *)lio->oct_dev; struct octeon_soft_command *sc; @@ -603,11 +603,16 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop) int retval; if (oct->props[lio->ifidx].rx_on == start_stop) - return; + return 0; sc = (struct octeon_soft_command *) octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, 16, 0); + if (!sc) { + netif_info(lio, rx_err, lio->netdev, + "Failed to allocate octeon_soft_command struct\n"); + return -ENOMEM; + } ncmd = (union octnet_cmd *)sc->virtdptr; @@ -635,11 +640,13 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop) */ retval = wait_for_sc_completion_timeout(oct, sc, 0); if (retval) - return; + return retval; oct->props[lio->ifidx].rx_on = start_stop; WRITE_ONCE(sc->caller_is_done, true); } + + return retval; } /** @@ -906,6 +913,7 @@ static int liquidio_open(struct net_device *netdev) struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)oct->priv; struct napi_struct *napi, *n; + int ret = 0; if (!oct->props[lio->ifidx].napi_enabled) { tasklet_disable(&oct_priv->droq_tasklet); @@ -932,11 +940,13 @@ static int liquidio_open(struct net_device *netdev) (LIQUIDIO_NDEV_STATS_POLL_TIME_MS)); /* tell Octeon to start forwarding packets to host */ - send_rx_ctrl_cmd(lio, 1); + ret = send_rx_ctrl_cmd(lio, 1); + if (ret) + return ret; dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name); - return 0; + return ret; } /** @@ -950,9 +960,12 @@ static int liquidio_stop(struct net_device *netdev) struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)oct->priv; struct napi_struct *napi, *n; + int ret = 0; /* tell Octeon to stop forwarding packets to host */ - send_rx_ctrl_cmd(lio, 0); + ret = send_rx_ctrl_cmd(lio, 0); + if (ret) + return ret; netif_info(lio, ifdown, lio->netdev, "Stopping interface!\n"); /* Inform that netif carrier is down */ @@ -986,7 +999,7 @@ static int liquidio_stop(struct net_device *netdev) dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name); - return 0; + return ret; } /** From ed04fe8a0e87d7b5ea17d47f4ac9ec962b24814a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:33 +0200 Subject: [PATCH 220/730] Revert "video: imsttfb: fix potential NULL pointer dereferences" This reverts commit 1d84353d205a953e2381044953b7fa31c8c9702d. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original commit here, while technically correct, did not fully handle all of the reported issues that the commit stated it was fixing, so revert it until it can be "fixed" fully. Note, ioremap() probably will never fail for old hardware like this, and if anyone actually used this hardware (a PowerMac era PCI display card), they would not be using fbdev anymore. Cc: Kangjie Lu Cc: Aditya Pakki Cc: Finn Thain Cc: Bartlomiej Zolnierkiewicz Reviewed-by: Rob Herring Fixes: 1d84353d205a ("video: imsttfb: fix potential NULL pointer dereferences") Cc: stable Link: https://lore.kernel.org/r/20210503115736.2104747-67-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/imsttfb.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c index 3ac053b88495..e04411701ec8 100644 --- a/drivers/video/fbdev/imsttfb.c +++ b/drivers/video/fbdev/imsttfb.c @@ -1512,11 +1512,6 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) info->fix.smem_start = addr; info->screen_base = (__u8 *)ioremap(addr, par->ramdac == IBM ? 0x400000 : 0x800000); - if (!info->screen_base) { - release_mem_region(addr, size); - framebuffer_release(info); - return -ENOMEM; - } info->fix.mmio_start = addr + 0x800000; par->dc_regs = ioremap(addr + 0x800000, 0x1000); par->cmap_regs_phys = addr + 0x840000; From 13b7c0390a5d3840e1e2cda8f44a310fdbb982de Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:34 +0200 Subject: [PATCH 221/730] video: imsttfb: check for ioremap() failures We should check if ioremap() were to somehow fail in imsttfb_probe() and handle the unwinding of the resources allocated here properly. Ideally if anyone cares about this driver (it's for a PowerMac era PCI display card), they wouldn't even be using fbdev anymore. Or the devm_* apis could be used, but that's just extra work for diminishing returns... Cc: Finn Thain Cc: Bartlomiej Zolnierkiewicz Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20210503115736.2104747-68-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/imsttfb.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c index e04411701ec8..16f272a50811 100644 --- a/drivers/video/fbdev/imsttfb.c +++ b/drivers/video/fbdev/imsttfb.c @@ -1469,6 +1469,7 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct imstt_par *par; struct fb_info *info; struct device_node *dp; + int ret = -ENOMEM; dp = pci_device_to_OF_node(pdev); if(dp) @@ -1504,23 +1505,37 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) default: printk(KERN_INFO "imsttfb: Device 0x%x unknown, " "contact maintainer.\n", pdev->device); - release_mem_region(addr, size); - framebuffer_release(info); - return -ENODEV; + ret = -ENODEV; + goto error; } info->fix.smem_start = addr; info->screen_base = (__u8 *)ioremap(addr, par->ramdac == IBM ? 0x400000 : 0x800000); + if (!info->screen_base) + goto error; info->fix.mmio_start = addr + 0x800000; par->dc_regs = ioremap(addr + 0x800000, 0x1000); + if (!par->dc_regs) + goto error; par->cmap_regs_phys = addr + 0x840000; par->cmap_regs = (__u8 *)ioremap(addr + 0x840000, 0x1000); + if (!par->cmap_regs) + goto error; info->pseudo_palette = par->palette; init_imstt(info); pci_set_drvdata(pdev, info); return 0; + +error: + if (par->dc_regs) + iounmap(par->dc_regs); + if (info->screen_base) + iounmap(info->screen_base); + release_mem_region(addr, size); + framebuffer_release(info); + return ret; } static void imsttfb_remove(struct pci_dev *pdev) From 30a350947692f794796f563029d29764497f2887 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:35 +0200 Subject: [PATCH 222/730] Revert "brcmfmac: add a check for the status of usb_register" This reverts commit 42daad3343be4a4e1ee03e30a5f5cc731dadfef5. Because of recent interactions with developers from @umn.edu, all commits from them have been recently re-reviewed to ensure if they were correct or not. Upon review, this commit was found to be incorrect for the reasons below, so it must be reverted. It will be fixed up "correctly" in a later kernel change. The original commit here did nothing to actually help if usb_register() failed, so it gives a "false sense of security" when there is none. The correct solution is to correctly unwind from this error. Cc: Kangjie Lu Cc: Kalle Valo Link: https://lore.kernel.org/r/20210503115736.2104747-69-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 586f4dfc638b..d2a803fc8ac6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -1586,10 +1586,6 @@ void brcmf_usb_exit(void) void brcmf_usb_register(void) { - int ret; - brcmf_dbg(USB, "Enter\n"); - ret = usb_register(&brcmf_usbdrvr); - if (ret) - brcmf_err("usb_register failed %d\n", ret); + usb_register(&brcmf_usbdrvr); } From 419b4a142a7ece36cebcd434f8ce2af59ef94b85 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:36 +0200 Subject: [PATCH 223/730] brcmfmac: properly check for bus register errors The brcmfmac driver ignores any errors on initialization with the different busses by deferring the initialization to a workqueue and ignoring all possible errors that might happen. Fix up all of this by only allowing the module to load if all bus registering worked properly. Cc: Kalle Valo Link: https://lore.kernel.org/r/20210503115736.2104747-70-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- .../broadcom/brcm80211/brcmfmac/bcmsdh.c | 8 +--- .../broadcom/brcm80211/brcmfmac/bus.h | 19 ++++++++- .../broadcom/brcm80211/brcmfmac/core.c | 42 ++++++++----------- .../broadcom/brcm80211/brcmfmac/pcie.c | 9 +--- .../broadcom/brcm80211/brcmfmac/pcie.h | 5 --- .../broadcom/brcm80211/brcmfmac/usb.c | 4 +- 6 files changed, 41 insertions(+), 46 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index ce8c102df7b3..633d0ab19031 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -1217,13 +1217,9 @@ static struct sdio_driver brcmf_sdmmc_driver = { }, }; -void brcmf_sdio_register(void) +int brcmf_sdio_register(void) { - int ret; - - ret = sdio_register_driver(&brcmf_sdmmc_driver); - if (ret) - brcmf_err("sdio_register_driver failed: %d\n", ret); + return sdio_register_driver(&brcmf_sdmmc_driver); } void brcmf_sdio_exit(void) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h index 08f9d47f2e5c..3f5da3bb6aa5 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h @@ -275,11 +275,26 @@ void brcmf_bus_add_txhdrlen(struct device *dev, uint len); #ifdef CONFIG_BRCMFMAC_SDIO void brcmf_sdio_exit(void); -void brcmf_sdio_register(void); +int brcmf_sdio_register(void); +#else +static inline void brcmf_sdio_exit(void) { } +static inline int brcmf_sdio_register(void) { return 0; } #endif + #ifdef CONFIG_BRCMFMAC_USB void brcmf_usb_exit(void); -void brcmf_usb_register(void); +int brcmf_usb_register(void); +#else +static inline void brcmf_usb_exit(void) { } +static inline int brcmf_usb_register(void) { return 0; } +#endif + +#ifdef CONFIG_BRCMFMAC_PCIE +void brcmf_pcie_exit(void); +int brcmf_pcie_register(void); +#else +static inline void brcmf_pcie_exit(void) { } +static inline int brcmf_pcie_register(void) { return 0; } #endif #endif /* BRCMFMAC_BUS_H */ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 838b09b23abf..cee1682d2333 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -1518,40 +1518,34 @@ void brcmf_bus_change_state(struct brcmf_bus *bus, enum brcmf_bus_state state) } } -static void brcmf_driver_register(struct work_struct *work) -{ -#ifdef CONFIG_BRCMFMAC_SDIO - brcmf_sdio_register(); -#endif -#ifdef CONFIG_BRCMFMAC_USB - brcmf_usb_register(); -#endif -#ifdef CONFIG_BRCMFMAC_PCIE - brcmf_pcie_register(); -#endif -} -static DECLARE_WORK(brcmf_driver_work, brcmf_driver_register); - int __init brcmf_core_init(void) { - if (!schedule_work(&brcmf_driver_work)) - return -EBUSY; + int err; + err = brcmf_sdio_register(); + if (err) + return err; + + err = brcmf_usb_register(); + if (err) + goto error_usb_register; + + err = brcmf_pcie_register(); + if (err) + goto error_pcie_register; return 0; + +error_pcie_register: + brcmf_usb_exit(); +error_usb_register: + brcmf_sdio_exit(); + return err; } void __exit brcmf_core_exit(void) { - cancel_work_sync(&brcmf_driver_work); - -#ifdef CONFIG_BRCMFMAC_SDIO brcmf_sdio_exit(); -#endif -#ifdef CONFIG_BRCMFMAC_USB brcmf_usb_exit(); -#endif -#ifdef CONFIG_BRCMFMAC_PCIE brcmf_pcie_exit(); -#endif } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index ad79e3b7e74a..143a705b5cb3 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -2140,15 +2140,10 @@ static struct pci_driver brcmf_pciedrvr = { }; -void brcmf_pcie_register(void) +int brcmf_pcie_register(void) { - int err; - brcmf_dbg(PCIE, "Enter\n"); - err = pci_register_driver(&brcmf_pciedrvr); - if (err) - brcmf_err(NULL, "PCIE driver registration failed, err=%d\n", - err); + return pci_register_driver(&brcmf_pciedrvr); } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.h index d026401d2001..8e6c227e8315 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.h @@ -11,9 +11,4 @@ struct brcmf_pciedev { struct brcmf_pciedev_info *devinfo; }; - -void brcmf_pcie_exit(void); -void brcmf_pcie_register(void); - - #endif /* BRCMFMAC_PCIE_H */ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index d2a803fc8ac6..9fb68c2dc7e3 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -1584,8 +1584,8 @@ void brcmf_usb_exit(void) usb_deregister(&brcmf_usbdrvr); } -void brcmf_usb_register(void) +int brcmf_usb_register(void) { brcmf_dbg(USB, "Enter\n"); - usb_register(&brcmf_usbdrvr); + return usb_register(&brcmf_usbdrvr); } From 9183f01b5e6e32eb3f17b5f3f8d5ad5ac9786c49 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 6 May 2021 16:00:47 +0200 Subject: [PATCH 224/730] cdrom: gdrom: initialize global variable at init time As Peter points out, if we were to disconnect and then reconnect this driver from a device, the "global" state of the device would contain odd values and could cause problems. Fix this up by just initializing the whole thing to 0 at probe() time. Ideally this would be a per-device variable, but given the age and the total lack of users of it, that would require a lot of s/./->/g changes for really no good reason. Reported-by: Peter Rosin Cc: Jens Axboe Reviewed-by: Peter Rosin Link: https://lore.kernel.org/r/YJP2j6AU82MqEY2M@kroah.com Signed-off-by: Greg Kroah-Hartman --- drivers/cdrom/gdrom.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 6c4f6139f853..c6d8c0f59722 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -744,6 +744,13 @@ static const struct blk_mq_ops gdrom_mq_ops = { static int probe_gdrom(struct platform_device *devptr) { int err; + + /* + * Ensure our "one" device is initialized properly in case of previous + * usages of it + */ + memset(&gd, 0, sizeof(gd)); + /* Start the device */ if (gdrom_execute_diagnostic() != 1) { pr_warn("ATA Probe for GDROM failed\n"); @@ -847,7 +854,7 @@ static struct platform_driver gdrom_driver = { static int __init init_gdrom(void) { int rc; - gd.toc = NULL; + rc = platform_driver_register(&gdrom_driver); if (rc) return rc; From ced081a436d21a7d34d4d42acb85058f9cf423f2 Mon Sep 17 00:00:00 2001 From: Luca Stefani Date: Thu, 6 May 2021 21:37:25 +0200 Subject: [PATCH 225/730] binder: Return EFAULT if we fail BINDER_ENABLE_ONEWAY_SPAM_DETECTION All the other ioctl paths return EFAULT in case the copy_from_user/copy_to_user call fails, make oneway spam detection follow the same paradigm. Fixes: a7dc1e6f99df ("binder: tell userspace to dump current backtrace when detected oneway spamming") Acked-by: Todd Kjos Acked-by: Christian Brauner Signed-off-by: Luca Stefani Link: https://lore.kernel.org/r/20210506193726.45118-1-luca.stefani.ge1@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 61d34e1dc59c..bcec598b89f2 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4918,7 +4918,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) uint32_t enable; if (copy_from_user(&enable, ubuf, sizeof(enable))) { - ret = -EINVAL; + ret = -EFAULT; goto err; } binder_inner_proc_lock(proc); From c699a0db2d62e3bbb7f0bf35c87edbc8d23e3062 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 10 May 2021 14:49:05 -0400 Subject: [PATCH 226/730] dm snapshot: fix crash with transient storage and zero chunk size The following commands will crash the kernel: modprobe brd rd_size=1048576 dmsetup create o --table "0 `blockdev --getsize /dev/ram0` snapshot-origin /dev/ram0" dmsetup create s --table "0 `blockdev --getsize /dev/ram0` snapshot /dev/ram0 /dev/ram1 N 0" The reason is that when we test for zero chunk size, we jump to the label bad_read_metadata without setting the "r" variable. The function snapshot_ctr destroys all the structures and then exits with "r == 0". The kernel then crashes because it falsely believes that snapshot_ctr succeeded. In order to fix the bug, we set the variable "r" to -EINVAL. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 2a51ddd840b4..b8e4d31124ea 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1408,6 +1408,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (!s->store->chunk_size) { ti->error = "Chunk size not set"; + r = -EINVAL; goto bad_read_metadata; } From 640d1eaff2c09e382a23bd831094ebbfaa16fef5 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Tue, 4 May 2021 16:22:34 -0600 Subject: [PATCH 227/730] dyndbg: avoid calling dyndbg_emit_prefix when it has no work Wrap function in a static-inline one, which checks flags to avoid calling the function unnecessarily. And hoist its output-buffer initialization to the grand-caller, which is already allocating the buffer on the stack, and can trivially initialize it too. Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20210504222235.1033685-2-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/dynamic_debug.h | 5 +++++ lib/dynamic_debug.c | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index a57ee75342cf..dce631e678dd 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -32,6 +32,11 @@ struct _ddebug { #define _DPRINTK_FLAGS_INCL_FUNCNAME (1<<2) #define _DPRINTK_FLAGS_INCL_LINENO (1<<3) #define _DPRINTK_FLAGS_INCL_TID (1<<4) + +#define _DPRINTK_FLAGS_INCL_ANY \ + (_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\ + _DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID) + #if defined DEBUG #define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINT #else diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 921d0a654243..398920403321 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -586,13 +586,11 @@ static int remaining(int wrote) return 0; } -static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf) +static char *__dynamic_emit_prefix(const struct _ddebug *desc, char *buf) { int pos_after_tid; int pos = 0; - *buf = '\0'; - if (desc->flags & _DPRINTK_FLAGS_INCL_TID) { if (in_interrupt()) pos += snprintf(buf + pos, remaining(pos), " "); @@ -618,11 +616,18 @@ static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf) return buf; } +static inline char *dynamic_emit_prefix(struct _ddebug *desc, char *buf) +{ + if (unlikely(desc->flags & _DPRINTK_FLAGS_INCL_ANY)) + return __dynamic_emit_prefix(desc, buf); + return buf; +} + void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...) { va_list args; struct va_format vaf; - char buf[PREFIX_SIZE]; + char buf[PREFIX_SIZE] = ""; BUG_ON(!descriptor); BUG_ON(!fmt); @@ -655,7 +660,7 @@ void __dynamic_dev_dbg(struct _ddebug *descriptor, if (!dev) { printk(KERN_DEBUG "(NULL device *): %pV", &vaf); } else { - char buf[PREFIX_SIZE]; + char buf[PREFIX_SIZE] = ""; dev_printk_emit(LOGLEVEL_DEBUG, dev, "%s%s %s: %pV", dynamic_emit_prefix(descriptor, buf), @@ -684,7 +689,7 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor, vaf.va = &args; if (dev && dev->dev.parent) { - char buf[PREFIX_SIZE]; + char buf[PREFIX_SIZE] = ""; dev_printk_emit(LOGLEVEL_DEBUG, dev->dev.parent, "%s%s %s %s%s: %pV", @@ -720,7 +725,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, vaf.va = &args; if (ibdev && ibdev->dev.parent) { - char buf[PREFIX_SIZE]; + char buf[PREFIX_SIZE] = ""; dev_printk_emit(LOGLEVEL_DEBUG, ibdev->dev.parent, "%s%s %s %s: %pV", From a3626bcf5fafad0ded410b269e21f37bdaf2baf4 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Tue, 4 May 2021 16:22:35 -0600 Subject: [PATCH 228/730] dyndbg: drop uninformative vpr_info Remove a vpr_info which I added in 2012, when I knew even less than now. In 2020, a simpler pr_fmt stripped it of context, and any remaining value. no functional change. Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20210504222235.1033685-3-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 398920403321..641767b0dce2 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -920,7 +920,6 @@ static const struct seq_operations ddebug_proc_seqops = { static int ddebug_proc_open(struct inode *inode, struct file *file) { - vpr_info("called\n"); return seq_open_private(file, &ddebug_proc_seqops, sizeof(struct ddebug_iter)); } From dbae70d452a0858d62915166d93650c98fe6639c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 12 May 2021 08:28:43 -0400 Subject: [PATCH 229/730] dm integrity: revert to not using discard filler when recalulating Revert the commit 7a5b96b4784454ba258e83dc7469ddbacd3aaac3 ("dm integrity: use discard support when recalculating"). There's a bug that when we write some data beyond the current recalculate boundary, the checksum will be rewritten with the discard filler later. And the data will no longer have integrity protection. There's no easy fix for this case. Also, another problematic case is if dm-integrity is used to detect bitrot (random device errors, bit flips, etc); dm-integrity should detect that even for unused sectors. With commit 7a5b96b4784 it can happen that such change is undetected (because discard filler is not a valid checksum). Signed-off-by: Mikulas Patocka Acked-by: Milan Broz Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 57 +++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 781942aeddd1..6d00e619a141 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2689,30 +2689,26 @@ next_chunk: if (unlikely(dm_integrity_failed(ic))) goto err; - if (!ic->discard) { - io_req.bi_op = REQ_OP_READ; - io_req.bi_op_flags = 0; - io_req.mem.type = DM_IO_VMA; - io_req.mem.ptr.addr = ic->recalc_buffer; - io_req.notify.fn = NULL; - io_req.client = ic->io; - io_loc.bdev = ic->dev->bdev; - io_loc.sector = get_data_sector(ic, area, offset); - io_loc.count = n_sectors; + io_req.bi_op = REQ_OP_READ; + io_req.bi_op_flags = 0; + io_req.mem.type = DM_IO_VMA; + io_req.mem.ptr.addr = ic->recalc_buffer; + io_req.notify.fn = NULL; + io_req.client = ic->io; + io_loc.bdev = ic->dev->bdev; + io_loc.sector = get_data_sector(ic, area, offset); + io_loc.count = n_sectors; - r = dm_io(&io_req, 1, &io_loc, NULL); - if (unlikely(r)) { - dm_integrity_io_error(ic, "reading data", r); - goto err; - } + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) { + dm_integrity_io_error(ic, "reading data", r); + goto err; + } - t = ic->recalc_tags; - for (i = 0; i < n_sectors; i += ic->sectors_per_block) { - integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t); - t += ic->tag_size; - } - } else { - t = ic->recalc_tags + (n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size; + t = ic->recalc_tags; + for (i = 0; i < n_sectors; i += ic->sectors_per_block) { + integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t); + t += ic->tag_size; } metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset); @@ -4368,13 +4364,11 @@ try_smaller_buffer: goto bad; } INIT_WORK(&ic->recalc_work, integrity_recalc); - if (!ic->discard) { - ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT); - if (!ic->recalc_buffer) { - ti->error = "Cannot allocate buffer for recalculating"; - r = -ENOMEM; - goto bad; - } + ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT); + if (!ic->recalc_buffer) { + ti->error = "Cannot allocate buffer for recalculating"; + r = -ENOMEM; + goto bad; } ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block, ic->tag_size, GFP_KERNEL); @@ -4383,9 +4377,6 @@ try_smaller_buffer: r = -ENOMEM; goto bad; } - if (ic->discard) - memset(ic->recalc_tags, DISCARD_FILLER, - (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size); } else { if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { ti->error = "Recalculate can only be specified with internal_hash"; @@ -4579,7 +4570,7 @@ static void dm_integrity_dtr(struct dm_target *ti) static struct target_type integrity_target = { .name = "integrity", - .version = {1, 9, 0}, + .version = {1, 10, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, From bc8f3d4647a99468d7733039b6bc9234b6e91df4 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 11 May 2021 11:41:00 -0400 Subject: [PATCH 230/730] dm integrity: fix sparse warnings Use the types __le* instead of __u* to fix sparse warnings. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 6d00e619a141..20f2510db1f6 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -66,14 +66,14 @@ struct superblock { __u8 magic[8]; __u8 version; __u8 log2_interleave_sectors; - __u16 integrity_tag_size; - __u32 journal_sections; - __u64 provided_data_sectors; /* userspace uses this value */ - __u32 flags; + __le16 integrity_tag_size; + __le32 journal_sections; + __le64 provided_data_sectors; /* userspace uses this value */ + __le32 flags; __u8 log2_sectors_per_block; __u8 log2_blocks_per_bitmap_bit; __u8 pad[2]; - __u64 recalc_sector; + __le64 recalc_sector; __u8 pad2[8]; __u8 salt[SALT_SIZE]; }; @@ -86,16 +86,16 @@ struct superblock { #define JOURNAL_ENTRY_ROUNDUP 8 -typedef __u64 commit_id_t; +typedef __le64 commit_id_t; #define JOURNAL_MAC_PER_SECTOR 8 struct journal_entry { union { struct { - __u32 sector_lo; - __u32 sector_hi; + __le32 sector_lo; + __le32 sector_hi; } s; - __u64 sector; + __le64 sector; } u; commit_id_t last_bytes[]; /* __u8 tag[0]; */ @@ -806,7 +806,7 @@ static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result } if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { - uint64_t section_le; + __le64 section_le; r = crypto_shash_update(desc, (__u8 *)&ic->sb->salt, SALT_SIZE); if (unlikely(r < 0)) { @@ -1640,7 +1640,7 @@ static void integrity_end_io(struct bio *bio) static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector, const char *data, char *result) { - __u64 sector_le = cpu_to_le64(sector); + __le64 sector_le = cpu_to_le64(sector); SHASH_DESC_ON_STACK(req, ic->internal_hash); int r; unsigned digest_size; @@ -3822,7 +3822,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) for (i = 0; i < ic->journal_sections; i++) { struct scatterlist sg; struct skcipher_request *section_req; - __u32 section_le = cpu_to_le32(i); + __le32 section_le = cpu_to_le32(i); memset(crypt_iv, 0x00, ivsize); memset(crypt_data, 0x00, crypt_len); From 27b57bb76a897be80494ee11ee4e85326d19383d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 13 May 2021 21:40:38 +0200 Subject: [PATCH 231/730] Revert "Revert "ALSA: usx2y: Fix potential NULL pointer dereference"" This reverts commit 4667a6fc1777ce071504bab570d3599107f4790f. Takashi writes: I have already started working on the bigger cleanup of this driver code based on 5.13-rc1, so could you drop this revert? I missed our previous discussion about this, my fault for applying it. Reported-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/usx2y/usb_stream.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/usb/usx2y/usb_stream.c b/sound/usb/usx2y/usb_stream.c index 6bba17bf689a..091c071b270a 100644 --- a/sound/usb/usx2y/usb_stream.c +++ b/sound/usb/usx2y/usb_stream.c @@ -91,7 +91,12 @@ static int init_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, for (u = 0; u < USB_STREAM_NURBS; ++u) { sk->inurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL); + if (!sk->inurb[u]) + return -ENOMEM; + sk->outurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL); + if (!sk->outurb[u]) + return -ENOMEM; } if (init_pipe_urbs(sk, use_packsize, sk->inurb, indata, dev, in_pipe) || From a93a0a15876d2a077a3bc260b387d2457a051f24 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 13 May 2021 09:44:49 +0200 Subject: [PATCH 232/730] net: mdio: thunder: Fix a double free issue in the .remove function 'bus->mii_bus' have been allocated with 'devm_mdiobus_alloc_size()' in the probe function. So it must not be freed explicitly or there will be a double free. Remove the incorrect 'mdiobus_free' in the remove function. Fixes: 379d7ac7ca31 ("phy: mdio-thunder: Add driver for Cavium Thunder SoC MDIO buses.") Signed-off-by: Christophe JAILLET Reviewed-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/mdio/mdio-thunder.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/mdio/mdio-thunder.c b/drivers/net/mdio/mdio-thunder.c index cb1761693b69..822d2cdd2f35 100644 --- a/drivers/net/mdio/mdio-thunder.c +++ b/drivers/net/mdio/mdio-thunder.c @@ -126,7 +126,6 @@ static void thunder_mdiobus_pci_remove(struct pci_dev *pdev) continue; mdiobus_unregister(bus->mii_bus); - mdiobus_free(bus->mii_bus); oct_mdio_writeq(0, bus->register_base + SMI_EN); } pci_release_regions(pdev); From e1d027dd97e1e750669cdc0d3b016a4f54e473eb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 13 May 2021 09:24:55 +0200 Subject: [PATCH 233/730] net: mdio: octeon: Fix some double free issues 'bus->mii_bus' has been allocated with 'devm_mdiobus_alloc_size()' in the probe function. So it must not be freed explicitly or there will be a double free. Remove the incorrect 'mdiobus_free' in the error handling path of the probe function and in remove function. Suggested-By: Andrew Lunn Fixes: 35d2aeac9810 ("phy: mdio-octeon: Use devm_mdiobus_alloc_size()") Signed-off-by: Christophe JAILLET Reviewed-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/mdio/mdio-octeon.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/mdio/mdio-octeon.c b/drivers/net/mdio/mdio-octeon.c index 8ce99c4888e1..e096e68ac667 100644 --- a/drivers/net/mdio/mdio-octeon.c +++ b/drivers/net/mdio/mdio-octeon.c @@ -71,7 +71,6 @@ static int octeon_mdiobus_probe(struct platform_device *pdev) return 0; fail_register: - mdiobus_free(bus->mii_bus); smi_en.u64 = 0; oct_mdio_writeq(smi_en.u64, bus->register_base + SMI_EN); return err; @@ -85,7 +84,6 @@ static int octeon_mdiobus_remove(struct platform_device *pdev) bus = platform_get_drvdata(pdev); mdiobus_unregister(bus->mii_bus); - mdiobus_free(bus->mii_bus); smi_en.u64 = 0; oct_mdio_writeq(smi_en.u64, bus->register_base + SMI_EN); return 0; From 65e302a9bd57b62872040d57eea1201562a7cbb2 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Thu, 13 May 2021 15:11:51 +0530 Subject: [PATCH 234/730] cxgb4/ch_ktls: Clear resources when pf4 device is removed This patch maintain the list of active tids and clear all the active connection resources when DETACH notification comes. Fixes: a8c16e8ed624f ("crypto/chcr: move nic TLS functionality to drivers/net") Signed-off-by: Ayush Sawal Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 80 ++++++++++++++++++- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.h | 2 + 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 6264bc66a4fc..421bd9b88028 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -6480,9 +6480,9 @@ static void cxgb4_ktls_dev_del(struct net_device *netdev, adap->uld[CXGB4_ULD_KTLS].tlsdev_ops->tls_dev_del(netdev, tls_ctx, direction); - cxgb4_set_ktls_feature(adap, FW_PARAMS_PARAM_DEV_KTLS_HW_DISABLE); out_unlock: + cxgb4_set_ktls_feature(adap, FW_PARAMS_PARAM_DEV_KTLS_HW_DISABLE); mutex_unlock(&uld_mutex); } diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index ef3f1e92632f..59683f79959c 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -59,6 +59,7 @@ static int chcr_get_nfrags_to_send(struct sk_buff *skb, u32 start, u32 len) } static int chcr_init_tcb_fields(struct chcr_ktls_info *tx_info); +static void clear_conn_resources(struct chcr_ktls_info *tx_info); /* * chcr_ktls_save_keys: calculate and save crypto keys. * @tx_info - driver specific tls info. @@ -364,10 +365,14 @@ static void chcr_ktls_dev_del(struct net_device *netdev, chcr_get_ktls_tx_context(tls_ctx); struct chcr_ktls_info *tx_info = tx_ctx->chcr_info; struct ch_ktls_port_stats_debug *port_stats; + struct chcr_ktls_uld_ctx *u_ctx; if (!tx_info) return; + u_ctx = tx_info->adap->uld[CXGB4_ULD_KTLS].handle; + if (u_ctx && u_ctx->detach) + return; /* clear l2t entry */ if (tx_info->l2te) cxgb4_l2t_release(tx_info->l2te); @@ -384,6 +389,8 @@ static void chcr_ktls_dev_del(struct net_device *netdev, if (tx_info->tid != -1) { cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan, tx_info->tid, tx_info->ip_family); + + xa_erase(&u_ctx->tid_list, tx_info->tid); } port_stats = &tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id]; @@ -411,6 +418,7 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk, struct tls_context *tls_ctx = tls_get_ctx(sk); struct ch_ktls_port_stats_debug *port_stats; struct chcr_ktls_ofld_ctx_tx *tx_ctx; + struct chcr_ktls_uld_ctx *u_ctx; struct chcr_ktls_info *tx_info; struct dst_entry *dst; struct adapter *adap; @@ -425,6 +433,7 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk, adap = pi->adapter; port_stats = &adap->ch_ktls_stats.ktls_port[pi->port_id]; atomic64_inc(&port_stats->ktls_tx_connection_open); + u_ctx = adap->uld[CXGB4_ULD_KTLS].handle; if (direction == TLS_OFFLOAD_CTX_DIR_RX) { pr_err("not expecting for RX direction\n"); @@ -434,6 +443,9 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk, if (tx_ctx->chcr_info) goto out; + if (u_ctx && u_ctx->detach) + goto out; + tx_info = kvzalloc(sizeof(*tx_info), GFP_KERNEL); if (!tx_info) goto out; @@ -569,6 +581,8 @@ free_tid: cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan, tx_info->tid, tx_info->ip_family); + xa_erase(&u_ctx->tid_list, tx_info->tid); + put_module: /* release module refcount */ module_put(THIS_MODULE); @@ -633,8 +647,12 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, { const struct cpl_act_open_rpl *p = (void *)input; struct chcr_ktls_info *tx_info = NULL; + struct chcr_ktls_ofld_ctx_tx *tx_ctx; + struct chcr_ktls_uld_ctx *u_ctx; unsigned int atid, tid, status; + struct tls_context *tls_ctx; struct tid_info *t; + int ret = 0; tid = GET_TID(p); status = AOPEN_STATUS_G(ntohl(p->atid_status)); @@ -666,14 +684,29 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, if (!status) { tx_info->tid = tid; cxgb4_insert_tid(t, tx_info, tx_info->tid, tx_info->ip_family); + /* Adding tid */ + tls_ctx = tls_get_ctx(tx_info->sk); + tx_ctx = chcr_get_ktls_tx_context(tls_ctx); + u_ctx = adap->uld[CXGB4_ULD_KTLS].handle; + if (u_ctx) { + ret = xa_insert_bh(&u_ctx->tid_list, tid, tx_ctx, + GFP_NOWAIT); + if (ret < 0) { + pr_err("%s: Failed to allocate tid XA entry = %d\n", + __func__, tx_info->tid); + tx_info->open_state = CH_KTLS_OPEN_FAILURE; + goto out; + } + } tx_info->open_state = CH_KTLS_OPEN_SUCCESS; } else { tx_info->open_state = CH_KTLS_OPEN_FAILURE; } +out: spin_unlock(&tx_info->lock); complete(&tx_info->completion); - return 0; + return ret; } /* @@ -2090,6 +2123,8 @@ static void *chcr_ktls_uld_add(const struct cxgb4_lld_info *lldi) goto out; } u_ctx->lldi = *lldi; + u_ctx->detach = false; + xa_init_flags(&u_ctx->tid_list, XA_FLAGS_LOCK_BH); out: return u_ctx; } @@ -2123,6 +2158,45 @@ static int chcr_ktls_uld_rx_handler(void *handle, const __be64 *rsp, return 0; } +static void clear_conn_resources(struct chcr_ktls_info *tx_info) +{ + /* clear l2t entry */ + if (tx_info->l2te) + cxgb4_l2t_release(tx_info->l2te); + +#if IS_ENABLED(CONFIG_IPV6) + /* clear clip entry */ + if (tx_info->ip_family == AF_INET6) + cxgb4_clip_release(tx_info->netdev, (const u32 *) + &tx_info->sk->sk_v6_rcv_saddr, + 1); +#endif + + /* clear tid */ + if (tx_info->tid != -1) + cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan, + tx_info->tid, tx_info->ip_family); +} + +static void ch_ktls_reset_all_conn(struct chcr_ktls_uld_ctx *u_ctx) +{ + struct ch_ktls_port_stats_debug *port_stats; + struct chcr_ktls_ofld_ctx_tx *tx_ctx; + struct chcr_ktls_info *tx_info; + unsigned long index; + + xa_for_each(&u_ctx->tid_list, index, tx_ctx) { + tx_info = tx_ctx->chcr_info; + clear_conn_resources(tx_info); + port_stats = &tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id]; + atomic64_inc(&port_stats->ktls_tx_connection_close); + kvfree(tx_info); + tx_ctx->chcr_info = NULL; + /* release module refcount */ + module_put(THIS_MODULE); + } +} + static int chcr_ktls_uld_state_change(void *handle, enum cxgb4_state new_state) { struct chcr_ktls_uld_ctx *u_ctx = handle; @@ -2139,7 +2213,10 @@ static int chcr_ktls_uld_state_change(void *handle, enum cxgb4_state new_state) case CXGB4_STATE_DETACH: pr_info("%s: Down\n", pci_name(u_ctx->lldi.pdev)); mutex_lock(&dev_mutex); + u_ctx->detach = true; list_del(&u_ctx->entry); + ch_ktls_reset_all_conn(u_ctx); + xa_destroy(&u_ctx->tid_list); mutex_unlock(&dev_mutex); break; default: @@ -2178,6 +2255,7 @@ static void __exit chcr_ktls_exit(void) adap = pci_get_drvdata(u_ctx->lldi.pdev); memset(&adap->ch_ktls_stats, 0, sizeof(adap->ch_ktls_stats)); list_del(&u_ctx->entry); + xa_destroy(&u_ctx->tid_list); kfree(u_ctx); } mutex_unlock(&dev_mutex); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h index 18b3b1f02415..10572dc55365 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h @@ -75,6 +75,8 @@ struct chcr_ktls_ofld_ctx_tx { struct chcr_ktls_uld_ctx { struct list_head entry; struct cxgb4_lld_info lldi; + struct xarray tid_list; + bool detach; }; static inline struct chcr_ktls_ofld_ctx_tx * From c7d8302478ae645c2e9b59f2cf125641875b7dc2 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 13 May 2021 12:46:21 +0000 Subject: [PATCH 235/730] net: korina: Fix return value check in korina_probe() In case of error, the function devm_platform_ioremap_resource_byname() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: b4cd249a8cc0 ("net: korina: Use devres functions") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 6f987a7ffcb3..b30a45725374 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -1315,23 +1315,23 @@ static int korina_probe(struct platform_device *pdev) lp->tx_irq = platform_get_irq_byname(pdev, "tx"); p = devm_platform_ioremap_resource_byname(pdev, "emac"); - if (!p) { + if (IS_ERR(p)) { printk(KERN_ERR DRV_NAME ": cannot remap registers\n"); - return -ENOMEM; + return PTR_ERR(p); } lp->eth_regs = p; p = devm_platform_ioremap_resource_byname(pdev, "dma_rx"); - if (!p) { + if (IS_ERR(p)) { printk(KERN_ERR DRV_NAME ": cannot remap Rx DMA registers\n"); - return -ENOMEM; + return PTR_ERR(p); } lp->rx_dma_regs = p; p = devm_platform_ioremap_resource_byname(pdev, "dma_tx"); - if (!p) { + if (IS_ERR(p)) { printk(KERN_ERR DRV_NAME ": cannot remap Tx DMA registers\n"); - return -ENOMEM; + return PTR_ERR(p); } lp->tx_dma_regs = p; From e4df1b0c24350a0f00229ff895a91f1072bd850d Mon Sep 17 00:00:00 2001 From: Tao Liu Date: Thu, 13 May 2021 21:08:00 +0800 Subject: [PATCH 236/730] openvswitch: meter: fix race when getting now_ms. We have observed meters working unexpected if traffic is 3+Gbit/s with multiple connections. now_ms is not pretected by meter->lock, we may get a negative long_delta_ms when another cpu updated meter->used, then: delta_ms = (u32)long_delta_ms; which will be a large value. band->bucket += delta_ms * band->rate; then we get a wrong band->bucket. OpenVswitch userspace datapath has fixed the same issue[1] some time ago, and we port the implementation to kernel datapath. [1] https://patchwork.ozlabs.org/project/openvswitch/patch/20191025114436.9746-1-i.maximets@ovn.org/ Fixes: 96fbc13d7e77 ("openvswitch: Add meter infrastructure") Signed-off-by: Tao Liu Suggested-by: Ilya Maximets Reviewed-by: Ilya Maximets Signed-off-by: David S. Miller --- net/openvswitch/meter.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 96b524ceabca..896b8f5bc885 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -611,6 +611,14 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, spin_lock(&meter->lock); long_delta_ms = (now_ms - meter->used); /* ms */ + if (long_delta_ms < 0) { + /* This condition means that we have several threads fighting + * for a meter lock, and the one who received the packets a + * bit later wins. Assuming that all racing threads received + * packets at the same time to avoid overflow. + */ + long_delta_ms = 0; + } /* Make sure delta_ms will not be too large, so that bucket will not * wrap around below. From d6f67afbdf9df5301641b2ef7ac4030abab3e067 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 10 May 2021 22:39:38 +0900 Subject: [PATCH 237/730] btrfs: return 0 for dev_extent_hole_check_zoned hole_start in case of error Commit 7000babddac6 ("btrfs: assign proper values to a bool variable in dev_extent_hole_check_zoned") assigned false to the hole_start parameter of dev_extent_hole_check_zoned(). The hole_start parameter is not boolean and returns the start location of the found hole. Fixes: 7000babddac6 ("btrfs: assign proper values to a bool variable in dev_extent_hole_check_zoned") Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 77cdb75acc15..bc53939fef48 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1458,7 +1458,7 @@ static bool dev_extent_hole_check_zoned(struct btrfs_device *device, /* Given hole range was invalid (outside of device) */ if (ret == -ERANGE) { *hole_start += *hole_size; - *hole_size = false; + *hole_size = 0; return true; } From 71795ee590111e3636cc3c148289dfa9fa0a5fc3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 29 Apr 2021 10:51:34 -0400 Subject: [PATCH 238/730] btrfs: avoid RCU stalls while running delayed iputs Generally a delayed iput is added when we might do the final iput, so usually we'll end up sleeping while processing the delayed iputs naturally. However there's no guarantee of this, especially for small files. In production we noticed 5 instances of RCU stalls while testing a kernel release overnight across 1000 machines, so this is relatively common: host count: 5 rcu: INFO: rcu_sched self-detected stall on CPU rcu: ....: (20998 ticks this GP) idle=59e/1/0x4000000000000002 softirq=12333372/12333372 fqs=3208 (t=21031 jiffies g=27810193 q=41075) NMI backtrace for cpu 1 CPU: 1 PID: 1713 Comm: btrfs-cleaner Kdump: loaded Not tainted 5.6.13-0_fbk12_rc1_5520_gec92bffc1ec9 #1 Call Trace: dump_stack+0x50/0x70 nmi_cpu_backtrace.cold.6+0x30/0x65 ? lapic_can_unplug_cpu.cold.30+0x40/0x40 nmi_trigger_cpumask_backtrace+0xba/0xca rcu_dump_cpu_stacks+0x99/0xc7 rcu_sched_clock_irq.cold.90+0x1b2/0x3a3 ? trigger_load_balance+0x5c/0x200 ? tick_sched_do_timer+0x60/0x60 ? tick_sched_do_timer+0x60/0x60 update_process_times+0x24/0x50 tick_sched_timer+0x37/0x70 __hrtimer_run_queues+0xfe/0x270 hrtimer_interrupt+0xf4/0x210 smp_apic_timer_interrupt+0x5e/0x120 apic_timer_interrupt+0xf/0x20 RIP: 0010:queued_spin_lock_slowpath+0x17d/0x1b0 RSP: 0018:ffffc9000da5fe48 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: 0000000000000000 RBX: ffff889fa81d0cd8 RCX: 0000000000000029 RDX: ffff889fff86c0c0 RSI: 0000000000080000 RDI: ffff88bfc2da7200 RBP: ffff888f2dcdd768 R08: 0000000001040000 R09: 0000000000000000 R10: 0000000000000001 R11: ffffffff82a55560 R12: ffff88bfc2da7200 R13: 0000000000000000 R14: ffff88bff6c2a360 R15: ffffffff814bd870 ? kzalloc.constprop.57+0x30/0x30 list_lru_add+0x5a/0x100 inode_lru_list_add+0x20/0x40 iput+0x1c1/0x1f0 run_delayed_iput_locked+0x46/0x90 btrfs_run_delayed_iputs+0x3f/0x60 cleaner_kthread+0xf2/0x120 kthread+0x10b/0x130 Fix this by adding a cond_resched_lock() to the loop processing delayed iputs so we can avoid these sort of stalls. CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Rik van Riel Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 69fcdf8f0b1c..095e452f59f0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3246,6 +3246,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) inode = list_first_entry(&fs_info->delayed_iputs, struct btrfs_inode, delayed_iput); run_delayed_iput_locked(fs_info, inode); + cond_resched_lock(&fs_info->delayed_iput_lock); } spin_unlock(&fs_info->delayed_iput_lock); } From 15c7745c9a0078edad1f7df5a6bb7b80bc8cca23 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Tue, 6 Apr 2021 15:31:18 -0700 Subject: [PATCH 239/730] btrfs: return whole extents in fiemap `xfs_io -c 'fiemap ' ` can give surprising results on btrfs that differ from xfs. btrfs prints out extents trimmed to fit the user input. If the user's fiemap request has an offset, then rather than returning each whole extent which intersects that range, we also trim the start extent to not have start < off. Documentation in filesystems/fiemap.txt and the xfs_io man page suggests that returning the whole extent is expected. Some cases which all yield the same fiemap in xfs, but not btrfs: dd if=/dev/zero of=$f bs=4k count=1 sudo xfs_io -c 'fiemap 0 1024' $f 0: [0..7]: 26624..26631 sudo xfs_io -c 'fiemap 2048 1024' $f 0: [4..7]: 26628..26631 sudo xfs_io -c 'fiemap 2048 4096' $f 0: [4..7]: 26628..26631 sudo xfs_io -c 'fiemap 3584 512' $f 0: [7..7]: 26631..26631 sudo xfs_io -c 'fiemap 4091 5' $f 0: [7..6]: 26631..26630 I believe this is a consequence of the logic for merging contiguous extents represented by separate extent items. That logic needs to track the last offset as it loops through the extent items, which happens to pick up the start offset on the first iteration, and trim off the beginning of the full extent. To fix it, start `off` at 0 rather than `start` so that we keep the iteration/merging intact without cutting off the start of the extent. after the fix, all the above commands give: 0: [0..7]: 26624..26631 The merging logic is exercised by fstest generic/483, and I have written a new fstest for checking we don't have backwards or zero-length fiemaps for cases like those above. Reviewed-by: Josef Bacik Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f2d1bb234377..360d997c7226 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5210,7 +5210,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { int ret = 0; - u64 off = start; + u64 off; u64 max = start + len; u32 flags = 0; u32 found_type; @@ -5245,6 +5245,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, goto out_free_ulist; } + /* + * We can't initialize that to 'start' as this could miss extents due + * to extent item merging + */ + off = 0; start = round_down(start, btrfs_inode_sectorsize(inode)); len = round_up(max, btrfs_inode_sectorsize(inode)) - start; From 54a40fc3a1da21b52dbf19f72fdc27a2ec740760 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 12 May 2021 16:27:16 +0100 Subject: [PATCH 240/730] btrfs: fix removed dentries still existing after log is synced When we move one inode from one directory to another and both the inode and its previous parent directory were logged before, we are not supposed to have the dentry for the old parent if we have a power failure after the log is synced. Only the new dentry is supposed to exist. Generally this works correctly, however there is a scenario where this is not currently working, because the old parent of the file/directory that was moved is not authoritative for a range that includes the dir index and dir item keys of the old dentry. This case is better explained with the following example and reproducer: # The test requires a very specific layout of keys and items in the # fs/subvolume btree to trigger the bug. So we want to make sure that # on whatever platform we are, we have the same leaf/node size. # # Currently in btrfs the node/leaf size can not be smaller than the page # size (but it can be greater than the page size). So use the largest # supported node/leaf size (64K). $ mkfs.btrfs -f -n 65536 /dev/sdc $ mount /dev/sdc /mnt # "testdir" is inode 257. $ mkdir /mnt/testdir $ chmod 755 /mnt/testdir # Create several empty files to have the directory "testdir" with its # items spread over several leaves (7 in this case). $ for ((i = 1; i <= 1200; i++)); do echo -n > /mnt/testdir/file$i done # Create our test directory "dira", inode number 1458, which gets all # its items in leaf 7. # # The BTRFS_DIR_ITEM_KEY item for inode 257 ("testdir") that points to # the entry named "dira" is in leaf 2, while the BTRFS_DIR_INDEX_KEY # item that points to that entry is in leaf 3. # # For this particular filesystem node size (64K), file count and file # names, we endup with the directory entry items from inode 257 in # leaves 2 and 3, as previously mentioned - what matters for triggering # the bug exercised by this test case is that those items are not placed # in leaf 1, they must be placed in a leaf different from the one # containing the inode item for inode 257. # # The corresponding BTRFS_DIR_ITEM_KEY and BTRFS_DIR_INDEX_KEY items for # the parent inode (257) are the following: # # item 460 key (257 DIR_ITEM 3724298081) itemoff 48344 itemsize 34 # location key (1458 INODE_ITEM 0) type DIR # transid 6 data_len 0 name_len 4 # name: dira # # and: # # item 771 key (257 DIR_INDEX 1202) itemoff 36673 itemsize 34 # location key (1458 INODE_ITEM 0) type DIR # transid 6 data_len 0 name_len 4 # name: dira $ mkdir /mnt/testdir/dira # Make sure everything done so far is durably persisted. $ sync # Now do a change to inode 257 ("testdir") that does not result in # COWing leaves 2 and 3 - the leaves that contain the directory items # pointing to inode 1458 (directory "dira"). # # Changing permissions, the owner/group, updating or adding a xattr, # etc, will not change (COW) leaves 2 and 3. So for the sake of # simplicity change the permissions of inode 257, which results in # updating its inode item and therefore change (COW) only leaf 1. $ chmod 700 /mnt/testdir # Now fsync directory inode 257. # # Since only the first leaf was changed/COWed, we log the inode item of # inode 257 and only the dentries found in the first leaf, all have a # key type of BTRFS_DIR_ITEM_KEY, and no keys of type # BTRFS_DIR_INDEX_KEY, because they sort after the former type and none # exist in the first leaf. # # We also log 3 items that represent ranges for dir items and dir # indexes for which the log is authoritative: # # 1) a key of type BTRFS_DIR_LOG_ITEM_KEY, which indicates the log is # authoritative for all BTRFS_DIR_ITEM_KEY keys that have an offset # in the range [0, 2285968570] (the offset here is the crc32c of the # dentry's name). The value 2285968570 corresponds to the offset of # the first key of leaf 2 (which is of type BTRFS_DIR_ITEM_KEY); # # 2) a key of type BTRFS_DIR_LOG_ITEM_KEY, which indicates the log is # authoritative for all BTRFS_DIR_ITEM_KEY keys that have an offset # in the range [4293818216, (u64)-1] (the offset here is the crc32c # of the dentry's name). The value 4293818216 corresponds to the # offset of the highest key of type BTRFS_DIR_ITEM_KEY plus 1 # (4293818215 + 1), which is located in leaf 2; # # 3) a key of type BTRFS_DIR_LOG_INDEX_KEY, with an offset of 1203, # which indicates the log is authoritative for all keys of type # BTRFS_DIR_INDEX_KEY that have an offset in the range # [1203, (u64)-1]. The value 1203 corresponds to the offset of the # last key of type BTRFS_DIR_INDEX_KEY plus 1 (1202 + 1), which is # located in leaf 3; # # Also, because "testdir" is a directory and inode 1458 ("dira") is a # child directory, we log inode 1458 too. $ xfs_io -c "fsync" /mnt/testdir # Now move "dira", inode 1458, to be a child of the root directory # (inode 256). # # Because this inode was previously logged, when "testdir" was fsynced, # the log is updated so that the old inode reference, referring to inode # 257 as the parent, is deleted and the new inode reference, referring # to inode 256 as the parent, is added to the log. $ mv /mnt/testdir/dira /mnt # Now change some file and fsync it. This guarantees the log changes # made by the previous move/rename operation are persisted. We do not # need to do any special modification to the file, just any change to # any file and sync the log. $ xfs_io -c "pwrite -S 0xab 0 64K" -c "fsync" /mnt/testdir/file1 # Simulate a power failure and then mount again the filesystem to # replay the log tree. We want to verify that we are able to mount the # filesystem, meaning log replay was successful, and that directory # inode 1458 ("dira") only has inode 256 (the filesystem's root) as # its parent (and no longer a child of inode 257). # # It used to happen that during log replay we would end up having # inode 1458 (directory "dira") with 2 hard links, being a child of # inode 257 ("testdir") and inode 256 (the filesystem's root). This # resulted in the tree checker detecting the issue and causing the # mount operation to fail (with -EIO). # # This happened because in the log we have the new name/parent for # inode 1458, which results in adding the new dentry with inode 256 # as the parent, but the previous dentry, under inode 257 was never # removed - this is because the ranges for dir items and dir indexes # of inode 257 for which the log is authoritative do not include the # old dir item and dir index for the dentry of inode 257 referring to # inode 1458: # # - for dir items, the log is authoritative for the ranges # [0, 2285968570] and [4293818216, (u64)-1]. The dir item at inode 257 # pointing to inode 1458 has a key of (257 DIR_ITEM 3724298081), as # previously mentioned, so the dir item is not deleted when the log # replay procedure processes the authoritative ranges, as 3724298081 # is outside both ranges; # # - for dir indexes, the log is authoritative for the range # [1203, (u64)-1], and the dir index item of inode 257 pointing to # inode 1458 has a key of (257 DIR_INDEX 1202), as previously # mentioned, so the dir index item is not deleted when the log # replay procedure processes the authoritative range. $ mount /dev/sdc /mnt mount: /mnt: can't read superblock on /dev/sdc. $ dmesg (...) [87849.840509] BTRFS info (device sdc): start tree-log replay [87849.875719] BTRFS critical (device sdc): corrupt leaf: root=5 block=30539776 slot=554 ino=1458, invalid nlink: has 2 expect no more than 1 for dir [87849.878084] BTRFS info (device sdc): leaf 30539776 gen 7 total ptrs 557 free space 2092 owner 5 [87849.879516] BTRFS info (device sdc): refs 1 lock_owner 0 current 2099108 [87849.880613] item 0 key (1181 1 0) itemoff 65275 itemsize 160 [87849.881544] inode generation 6 size 0 mode 100644 [87849.882692] item 1 key (1181 12 257) itemoff 65258 itemsize 17 (...) [87850.562549] item 556 key (1458 12 257) itemoff 16017 itemsize 14 [87850.563349] BTRFS error (device dm-0): block=30539776 write time tree block corruption detected [87850.564386] ------------[ cut here ]------------ [87850.564920] WARNING: CPU: 3 PID: 2099108 at fs/btrfs/disk-io.c:465 csum_one_extent_buffer+0xed/0x100 [btrfs] [87850.566129] Modules linked in: btrfs dm_zero dm_snapshot (...) [87850.573789] CPU: 3 PID: 2099108 Comm: mount Not tainted 5.12.0-rc8-btrfs-next-86 #1 (...) [87850.587481] Call Trace: [87850.587768] btree_csum_one_bio+0x244/0x2b0 [btrfs] [87850.588354] ? btrfs_bio_fits_in_stripe+0xd8/0x110 [btrfs] [87850.589003] btrfs_submit_metadata_bio+0xb7/0x100 [btrfs] [87850.589654] submit_one_bio+0x61/0x70 [btrfs] [87850.590248] submit_extent_page+0x91/0x2f0 [btrfs] [87850.590842] write_one_eb+0x175/0x440 [btrfs] [87850.591370] ? find_extent_buffer_nolock+0x1c0/0x1c0 [btrfs] [87850.592036] btree_write_cache_pages+0x1e6/0x610 [btrfs] [87850.592665] ? free_debug_processing+0x1d5/0x240 [87850.593209] do_writepages+0x43/0xf0 [87850.593798] ? __filemap_fdatawrite_range+0xa4/0x100 [87850.594391] __filemap_fdatawrite_range+0xc5/0x100 [87850.595196] btrfs_write_marked_extents+0x68/0x160 [btrfs] [87850.596202] btrfs_write_and_wait_transaction.isra.0+0x4d/0xd0 [btrfs] [87850.597377] btrfs_commit_transaction+0x794/0xca0 [btrfs] [87850.598455] ? _raw_spin_unlock_irqrestore+0x32/0x60 [87850.599305] ? kmem_cache_free+0x15a/0x3d0 [87850.600029] btrfs_recover_log_trees+0x346/0x380 [btrfs] [87850.601021] ? replay_one_extent+0x7d0/0x7d0 [btrfs] [87850.601988] open_ctree+0x13c9/0x1698 [btrfs] [87850.602846] btrfs_mount_root.cold+0x13/0xed [btrfs] [87850.603771] ? kmem_cache_alloc_trace+0x7c9/0x930 [87850.604576] ? vfs_parse_fs_string+0x5d/0xb0 [87850.605293] ? kfree+0x276/0x3f0 [87850.605857] legacy_get_tree+0x30/0x50 [87850.606540] vfs_get_tree+0x28/0xc0 [87850.607163] fc_mount+0xe/0x40 [87850.607695] vfs_kern_mount.part.0+0x71/0x90 [87850.608440] btrfs_mount+0x13b/0x3e0 [btrfs] (...) [87850.629477] ---[ end trace 68802022b99a1ea0 ]--- [87850.630849] BTRFS: error (device sdc) in btrfs_commit_transaction:2381: errno=-5 IO failure (Error while writing out transaction) [87850.632422] BTRFS warning (device sdc): Skipping commit of aborted transaction. [87850.633416] BTRFS: error (device sdc) in cleanup_transaction:1978: errno=-5 IO failure [87850.634553] BTRFS: error (device sdc) in btrfs_replay_log:2431: errno=-5 IO failure (Failed to recover log tree) [87850.637529] BTRFS error (device sdc): open_ctree failed In this example the inode we moved was a directory, so it was easy to detect the problem because directories can only have one hard link and the tree checker immediately detects that. If the moved inode was a file, then the log replay would succeed and we would end up having both the new hard link (/mnt/foo) and the old hard link (/mnt/testdir/foo) present, but only the new one should be present. Fix this by forcing re-logging of the old parent directory when logging the new name during a rename operation. This ensures we end up with a log that is authoritative for a range covering the keys for the old dentry, therefore causing the old dentry do be deleted when replaying the log. A test case for fstests will follow up soon. Fixes: 64d6b281ba4db0 ("btrfs: remove unnecessary check_parent_dirs_for_sync()") CC: stable@vger.kernel.org # 5.12+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a0fc3a1390ab..fd6b1f13112e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6462,6 +6462,24 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, (!old_dir || old_dir->logged_trans < trans->transid)) return; + /* + * If we are doing a rename (old_dir is not NULL) from a directory that + * was previously logged, make sure the next log attempt on the directory + * is not skipped and logs the inode again. This is because the log may + * not currently be authoritative for a range including the old + * BTRFS_DIR_ITEM_KEY and BTRFS_DIR_INDEX_KEY keys, so we want to make + * sure after a log replay we do not end up with both the new and old + * dentries around (in case the inode is a directory we would have a + * directory with two hard links and 2 inode references for different + * parents). The next log attempt of old_dir will happen at + * btrfs_log_all_parents(), called through btrfs_log_inode_parent() + * below, because we have previously set inode->last_unlink_trans to the + * current transaction ID, either here or at btrfs_record_unlink_dir() in + * case inode is a directory. + */ + if (old_dir) + old_dir->logged_trans = 0; + btrfs_init_log_ctx(&ctx, &inode->vfs_inode); ctx.logging_new_name = true; /* From c07531c01d8284aedaf95708ea90e76d11af0e21 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 10 May 2021 14:50:24 +0300 Subject: [PATCH 241/730] netfilter: flowtable: Remove redundant hw refresh bit Offloading conns could fail for multiple reasons and a hw refresh bit is set to try to reoffload it in next sw packet. But it could be in some cases and future points that the hw refresh bit is not set but a refresh could succeed. Remove the hw refresh bit and do offload refresh if requested. There won't be a new work entry if a work is already pending anyway as there is the hw pending bit. Fixes: 8b3646d6e0c4 ("net/sched: act_ct: Support refreshing the flow table entries") Signed-off-by: Roi Dayan Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 1 - net/netfilter/nf_flow_table_core.c | 3 +-- net/netfilter/nf_flow_table_offload.c | 7 ++++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 51d8eb99764d..48ef7460ff30 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -157,7 +157,6 @@ enum nf_flow_flags { NF_FLOW_HW, NF_FLOW_HW_DYING, NF_FLOW_HW_DEAD, - NF_FLOW_HW_REFRESH, NF_FLOW_HW_PENDING, }; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 39c02d1aeedf..1d02650dd715 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -306,8 +306,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, { flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT; - if (likely(!nf_flowtable_hw_offload(flow_table) || - !test_and_clear_bit(NF_FLOW_HW_REFRESH, &flow->flags))) + if (likely(!nf_flowtable_hw_offload(flow_table))) return; nf_flow_offload_add(flow_table, flow); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 2af7bdb38407..528b2f172684 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -902,10 +902,11 @@ static void flow_offload_work_add(struct flow_offload_work *offload) err = flow_offload_rule_add(offload, flow_rule); if (err < 0) - set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags); - else - set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); + goto out; + set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); + +out: nf_flow_offload_destroy(flow_rule); } From f0b3d338064e1fe7531f0d2977e35f3b334abfb4 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Mon, 10 May 2021 07:58:22 +0200 Subject: [PATCH 242/730] netfilter: nft_set_pipapo_avx2: Add irq_fpu_usable() check, fallback to non-AVX2 version Arturo reported this backtrace: [709732.358791] WARNING: CPU: 3 PID: 456 at arch/x86/kernel/fpu/core.c:128 kernel_fpu_begin_mask+0xae/0xe0 [709732.358793] Modules linked in: binfmt_misc nft_nat nft_chain_nat nf_nat nft_counter nft_ct nf_tables nf_conntrack_netlink nfnetlink 8021q garp stp mrp llc vrf intel_rapl_msr intel_rapl_common skx_edac nfit libnvdimm ipmi_ssif x86_pkg_temp_thermal intel_powerclamp coretemp crc32_pclmul mgag200 ghash_clmulni_intel drm_kms_helper cec aesni_intel drm libaes crypto_simd cryptd glue_helper mei_me dell_smbios iTCO_wdt evdev intel_pmc_bxt iTCO_vendor_support dcdbas pcspkr rapl dell_wmi_descriptor wmi_bmof sg i2c_algo_bit watchdog mei acpi_ipmi ipmi_si button nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ipmi_devintf ipmi_msghandler ip_tables x_tables autofs4 ext4 crc16 mbcache jbd2 dm_mod raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor sd_mod t10_pi crc_t10dif crct10dif_generic raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod ahci libahci tg3 libata xhci_pci libphy xhci_hcd ptp usbcore crct10dif_pclmul crct10dif_common bnxt_en crc32c_intel scsi_mod [709732.358941] pps_core i2c_i801 lpc_ich i2c_smbus wmi usb_common [709732.358957] CPU: 3 PID: 456 Comm: jbd2/dm-0-8 Not tainted 5.10.0-0.bpo.5-amd64 #1 Debian 5.10.24-1~bpo10+1 [709732.358959] Hardware name: Dell Inc. PowerEdge R440/04JN2K, BIOS 2.9.3 09/23/2020 [709732.358964] RIP: 0010:kernel_fpu_begin_mask+0xae/0xe0 [709732.358969] Code: ae 54 24 04 83 e3 01 75 38 48 8b 44 24 08 65 48 33 04 25 28 00 00 00 75 33 48 83 c4 10 5b c3 65 8a 05 5e 21 5e 76 84 c0 74 92 <0f> 0b eb 8e f0 80 4f 01 40 48 81 c7 00 14 00 00 e8 dd fb ff ff eb [709732.358972] RSP: 0018:ffffbb9700304740 EFLAGS: 00010202 [709732.358976] RAX: 0000000000000001 RBX: 0000000000000003 RCX: 0000000000000001 [709732.358979] RDX: ffffbb9700304970 RSI: ffff922fe1952e00 RDI: 0000000000000003 [709732.358981] RBP: ffffbb9700304970 R08: ffff922fc868a600 R09: ffff922fc711e462 [709732.358984] R10: 000000000000005f R11: ffff922ff0b27180 R12: ffffbb9700304960 [709732.358987] R13: ffffbb9700304b08 R14: ffff922fc664b6c8 R15: ffff922fc664b660 [709732.358990] FS: 0000000000000000(0000) GS:ffff92371fec0000(0000) knlGS:0000000000000000 [709732.358993] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [709732.358996] CR2: 0000557a6655bdd0 CR3: 000000026020a001 CR4: 00000000007706e0 [709732.358999] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [709732.359001] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [709732.359003] PKRU: 55555554 [709732.359005] Call Trace: [709732.359009] [709732.359035] nft_pipapo_avx2_lookup+0x4c/0x1cba [nf_tables] [709732.359046] ? sched_clock+0x5/0x10 [709732.359054] ? sched_clock_cpu+0xc/0xb0 [709732.359061] ? record_times+0x16/0x80 [709732.359068] ? plist_add+0xc1/0x100 [709732.359073] ? psi_group_change+0x47/0x230 [709732.359079] ? skb_clone+0x4d/0xb0 [709732.359085] ? enqueue_task_rt+0x22b/0x310 [709732.359098] ? bnxt_start_xmit+0x1e8/0xaf0 [bnxt_en] [709732.359102] ? packet_rcv+0x40/0x4a0 [709732.359121] nft_lookup_eval+0x59/0x160 [nf_tables] [709732.359133] nft_do_chain+0x350/0x500 [nf_tables] [709732.359152] ? nft_lookup_eval+0x59/0x160 [nf_tables] [709732.359163] ? nft_do_chain+0x364/0x500 [nf_tables] [709732.359172] ? fib4_rule_action+0x6d/0x80 [709732.359178] ? fib_rules_lookup+0x107/0x250 [709732.359184] nft_nat_do_chain+0x8a/0xf2 [nft_chain_nat] [709732.359193] nf_nat_inet_fn+0xea/0x210 [nf_nat] [709732.359202] nf_nat_ipv4_out+0x14/0xa0 [nf_nat] [709732.359207] nf_hook_slow+0x44/0xc0 [709732.359214] ip_output+0xd2/0x100 [709732.359221] ? __ip_finish_output+0x210/0x210 [709732.359226] ip_forward+0x37d/0x4a0 [709732.359232] ? ip4_key_hashfn+0xb0/0xb0 [709732.359238] ip_sublist_rcv_finish+0x4f/0x60 [709732.359243] ip_sublist_rcv+0x196/0x220 [709732.359250] ? ip_rcv_finish_core.isra.22+0x400/0x400 [709732.359255] ip_list_rcv+0x137/0x160 [709732.359264] __netif_receive_skb_list_core+0x29b/0x2c0 [709732.359272] netif_receive_skb_list_internal+0x1a6/0x2d0 [709732.359280] gro_normal_list.part.156+0x19/0x40 [709732.359286] napi_complete_done+0x67/0x170 [709732.359298] bnxt_poll+0x105/0x190 [bnxt_en] [709732.359304] ? irqentry_exit+0x29/0x30 [709732.359309] ? asm_common_interrupt+0x1e/0x40 [709732.359315] net_rx_action+0x144/0x3c0 [709732.359322] __do_softirq+0xd5/0x29c [709732.359329] asm_call_irq_on_stack+0xf/0x20 [709732.359332] [709732.359339] do_softirq_own_stack+0x37/0x40 [709732.359346] irq_exit_rcu+0x9d/0xa0 [709732.359353] common_interrupt+0x78/0x130 [709732.359358] asm_common_interrupt+0x1e/0x40 [709732.359366] RIP: 0010:crc_41+0x0/0x1e [crc32c_intel] [709732.359370] Code: ff ff f2 4d 0f 38 f1 93 a8 fe ff ff f2 4c 0f 38 f1 81 b0 fe ff ff f2 4c 0f 38 f1 8a b0 fe ff ff f2 4d 0f 38 f1 93 b0 fe ff ff 4c 0f 38 f1 81 b8 fe ff ff f2 4c 0f 38 f1 8a b8 fe ff ff f2 4d [709732.359373] RSP: 0018:ffffbb97008dfcd0 EFLAGS: 00000246 [709732.359377] RAX: 000000000000002a RBX: 0000000000000400 RCX: ffff922fc591dd50 [709732.359379] RDX: ffff922fc591dea0 RSI: 0000000000000a14 RDI: ffffffffc00dddc0 [709732.359382] RBP: 0000000000001000 R08: 000000000342d8c3 R09: 0000000000000000 [709732.359384] R10: 0000000000000000 R11: ffff922fc591dff0 R12: ffffbb97008dfe58 [709732.359386] R13: 000000000000000a R14: ffff922fd2b91e80 R15: ffff922fef83fe38 [709732.359395] ? crc_43+0x1e/0x1e [crc32c_intel] [709732.359403] ? crc32c_pcl_intel_update+0x97/0xb0 [crc32c_intel] [709732.359419] ? jbd2_journal_commit_transaction+0xaec/0x1a30 [jbd2] [709732.359425] ? irq_exit_rcu+0x3e/0xa0 [709732.359447] ? kjournald2+0xbd/0x270 [jbd2] [709732.359454] ? finish_wait+0x80/0x80 [709732.359470] ? commit_timeout+0x10/0x10 [jbd2] [709732.359476] ? kthread+0x116/0x130 [709732.359481] ? kthread_park+0x80/0x80 [709732.359488] ? ret_from_fork+0x1f/0x30 [709732.359494] ---[ end trace 081a19978e5f09f5 ]--- that is, nft_pipapo_avx2_lookup() uses the FPU running from a softirq that interrupted a kthread, also using the FPU. That's exactly the reason why irq_fpu_usable() is there: use it, and if we can't use the FPU, fall back to the non-AVX2 version of the lookup operation, i.e. nft_pipapo_lookup(). Reported-by: Arturo Borrero Gonzalez Cc: # 5.6.x Fixes: 7400b063969b ("nft_set_pipapo: Introduce AVX2-based lookup implementation") Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 4 ++-- net/netfilter/nft_set_pipapo.h | 2 ++ net/netfilter/nft_set_pipapo_avx2.c | 3 +++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 528a2d7ca991..dce866d93fee 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -408,8 +408,8 @@ int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, * * Return: true on match, false otherwise. */ -static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) { struct nft_pipapo *priv = nft_set_priv(set); unsigned long *res_map, *fill_map; diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 25a75591583e..d84afb8fa79a 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -178,6 +178,8 @@ struct nft_pipapo_elem { int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, union nft_pipapo_map_bucket *mt, bool match_only); +bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext); /** * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index d65ae0e23028..eabdb8d552ee 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1131,6 +1131,9 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, bool map_index; int i, ret = 0; + if (unlikely(!irq_fpu_usable())) + return nft_pipapo_lookup(net, set, key, ext); + m = rcu_dereference(priv->match); /* This also protects access to all data related to scratch maps */ From eb8500b874cf295971a6a2a04e14eb0854197a3c Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 30 Apr 2021 05:23:43 -0700 Subject: [PATCH 243/730] thermal/drivers/intel: Initialize RW trip to THERMAL_TEMP_INVALID After commit 81ad4276b505 ("Thermal: Ignore invalid trip points") all user_space governor notifications via RW trip point is broken in intel thermal drivers. This commits marks trip_points with value of 0 during call to thermal_zone_device_register() as invalid. RW trip points can be 0 as user space will set the correct trip temperature later. During driver init, x86_package_temp and all int340x drivers sets RW trip temperature as 0. This results in all these trips marked as invalid by the thermal core. To fix this initialize RW trips to THERMAL_TEMP_INVALID instead of 0. Cc: Signed-off-by: Srinivas Pandruvada Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210430122343.1789899-1-srinivas.pandruvada@linux.intel.com --- drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c | 4 ++++ drivers/thermal/intel/x86_pkg_temp_thermal.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index d1248ba943a4..62c0aa5d0783 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -237,6 +237,8 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, if (ACPI_FAILURE(status)) trip_cnt = 0; else { + int i; + int34x_thermal_zone->aux_trips = kcalloc(trip_cnt, sizeof(*int34x_thermal_zone->aux_trips), @@ -247,6 +249,8 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, } trip_mask = BIT(trip_cnt) - 1; int34x_thermal_zone->aux_trip_nr = trip_cnt; + for (i = 0; i < trip_cnt; ++i) + int34x_thermal_zone->aux_trips[i] = THERMAL_TEMP_INVALID; } trip_cnt = int340x_thermal_read_trips(int34x_thermal_zone); diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c index 295742e83960..4d8edc61a78b 100644 --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c @@ -166,7 +166,7 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd, if (thres_reg_value) *temp = zonedev->tj_max - thres_reg_value * 1000; else - *temp = 0; + *temp = THERMAL_TEMP_INVALID; pr_debug("sys_get_trip_temp %d\n", *temp); return 0; From 1b6604896e78969baffc1b6cc6bc175f95929ac4 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 May 2021 21:56:48 +0900 Subject: [PATCH 244/730] ALSA: dice: fix stream format at middle sampling rate for Alesis iO 26 Alesis iO 26 FireWire has two pairs of digital optical interface. It delivers PCM frames from the interfaces by second isochronous packet streaming. Although both of the interfaces are available at 44.1/48.0 kHz, first one of them is only available at 88.2/96.0 kHz. It reduces the number of PCM samples to 4 in Multi Bit Linear Audio data channel of data blocks on the second isochronous packet streaming. This commit fixes hardcoded stream formats. Cc: Fixes: 28b208f600a3 ("ALSA: dice: add parameters of stream formats for models produced by Alesis") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20210513125652.110249-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/dice/dice-alesis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/dice/dice-alesis.c b/sound/firewire/dice/dice-alesis.c index 0916864511d5..27c13b9cc9ef 100644 --- a/sound/firewire/dice/dice-alesis.c +++ b/sound/firewire/dice/dice-alesis.c @@ -16,7 +16,7 @@ alesis_io14_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = { static const unsigned int alesis_io26_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = { {10, 10, 4}, /* Tx0 = Analog + S/PDIF. */ - {16, 8, 0}, /* Tx1 = ADAT1 + ADAT2. */ + {16, 4, 0}, /* Tx1 = ADAT1 + ADAT2 (available at low rate). */ }; int snd_dice_detect_alesis_formats(struct snd_dice *dice) From 0edabdfe89581669609eaac5f6a8d0ae6fe95e7f Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 May 2021 21:56:49 +0900 Subject: [PATCH 245/730] ALSA: bebob/oxfw: fix Kconfig entry for Mackie d.2 Pro Mackie d.2 has an extension card for IEEE 1394 communication, which uses BridgeCo DM1000 ASIC. On the other hand, Mackie d.4 Pro has built-in function for IEEE 1394 communication by Oxford Semiconductor OXFW971, according to schematic diagram available in Mackie website. Although I misunderstood that Mackie d.2 Pro would be also a model with OXFW971, it's wrong. Mackie d.2 Pro is a model which includes the extension card as factory settings. This commit fixes entries in Kconfig and comment in ALSA OXFW driver. Cc: Fixes: fd6f4b0dc167 ("ALSA: bebob: Add skelton for BeBoB based devices") Fixes: ec4dba5053e1 ("ALSA: oxfw: Add support for Behringer/Mackie devices") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20210513125652.110249-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/Kconfig | 4 ++-- sound/firewire/bebob/bebob.c | 2 +- sound/firewire/oxfw/oxfw.c | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sound/firewire/Kconfig b/sound/firewire/Kconfig index 25778765cbfe..9897bd26a438 100644 --- a/sound/firewire/Kconfig +++ b/sound/firewire/Kconfig @@ -38,7 +38,7 @@ config SND_OXFW * Mackie(Loud) Onyx 1640i (former model) * Mackie(Loud) Onyx Satellite * Mackie(Loud) Tapco Link.Firewire - * Mackie(Loud) d.2 pro/d.4 pro + * Mackie(Loud) d.4 pro * Mackie(Loud) U.420/U.420d * TASCAM FireOne * Stanton Controllers & Systems 1 Deck/Mixer @@ -84,7 +84,7 @@ config SND_BEBOB * PreSonus FIREBOX/FIREPOD/FP10/Inspire1394 * BridgeCo RDAudio1/Audio5 * Mackie Onyx 1220/1620/1640 (FireWire I/O Card) - * Mackie d.2 (FireWire Option) + * Mackie d.2 (FireWire Option) and d.2 Pro * Stanton FinalScratch 2 (ScratchAmp) * Tascam IF-FW/DM * Behringer XENIX UFX 1204/1604 diff --git a/sound/firewire/bebob/bebob.c b/sound/firewire/bebob/bebob.c index 2c8e3392a490..daeecfa8b9aa 100644 --- a/sound/firewire/bebob/bebob.c +++ b/sound/firewire/bebob/bebob.c @@ -387,7 +387,7 @@ static const struct ieee1394_device_id bebob_id_table[] = { SND_BEBOB_DEV_ENTRY(VEN_BRIDGECO, 0x00010049, &spec_normal), /* Mackie, Onyx 1220/1620/1640 (Firewire I/O Card) */ SND_BEBOB_DEV_ENTRY(VEN_MACKIE2, 0x00010065, &spec_normal), - /* Mackie, d.2 (Firewire Option) */ + // Mackie, d.2 (Firewire option card) and d.2 Pro (the card is built-in). SND_BEBOB_DEV_ENTRY(VEN_MACKIE1, 0x00010067, &spec_normal), /* Stanton, ScratchAmp */ SND_BEBOB_DEV_ENTRY(VEN_STANTON, 0x00000001, &spec_normal), diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c index 1f1e3236efb8..9eea25c46dc7 100644 --- a/sound/firewire/oxfw/oxfw.c +++ b/sound/firewire/oxfw/oxfw.c @@ -355,7 +355,6 @@ static const struct ieee1394_device_id oxfw_id_table[] = { * Onyx-i series (former models): 0x081216 * Mackie Onyx Satellite: 0x00200f * Tapco LINK.firewire 4x6: 0x000460 - * d.2 pro: Unknown * d.4 pro: Unknown * U.420: Unknown * U.420d: Unknown From 395f41e2cdac63e7581fb9574e5ac0f02556e34a Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 May 2021 21:56:50 +0900 Subject: [PATCH 246/730] ALSA: firewire-lib: fix check for the size of isochronous packet payload The check for size of isochronous packet payload just cares of the size of IR context payload without the size of CIP header. Cc: Fixes: f11453c7cc01 ("ALSA: firewire-lib: use 16 bytes IR context header to separate CIP header") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20210513125652.110249-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/amdtp-stream.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 4e2f2bb7879f..b53971bf4b90 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -633,18 +633,24 @@ static int parse_ir_ctx_header(struct amdtp_stream *s, unsigned int cycle, unsigned int *syt, unsigned int index) { const __be32 *cip_header; + unsigned int cip_header_size; int err; *payload_length = be32_to_cpu(ctx_header[0]) >> ISO_DATA_LENGTH_SHIFT; - if (*payload_length > s->ctx_data.tx.ctx_header_size + - s->ctx_data.tx.max_ctx_payload_length) { + + if (!(s->flags & CIP_NO_HEADER)) + cip_header_size = 8; + else + cip_header_size = 0; + + if (*payload_length > cip_header_size + s->ctx_data.tx.max_ctx_payload_length) { dev_err(&s->unit->device, "Detect jumbo payload: %04x %04x\n", - *payload_length, s->ctx_data.tx.max_ctx_payload_length); + *payload_length, cip_header_size + s->ctx_data.tx.max_ctx_payload_length); return -EIO; } - if (!(s->flags & CIP_NO_HEADER)) { + if (cip_header_size > 0) { cip_header = ctx_header + 2; err = check_cip_header(s, cip_header, *payload_length, data_blocks, data_block_counter, syt); From 1be4f21d9984fa9835fae5411a29465dc5aece6f Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 May 2021 21:56:51 +0900 Subject: [PATCH 247/730] ALSA: firewire-lib: fix calculation for size of IR context payload The quadlets for CIP header is handled as a part of IR context header, thus it doesn't join in IR context payload. However current calculation includes the quadlets in IR context payload. Cc: Fixes: f11453c7cc01 ("ALSA: firewire-lib: use 16 bytes IR context header to separate CIP header") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20210513125652.110249-5-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/amdtp-stream.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index b53971bf4b90..73aff017dc9a 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -1071,23 +1071,22 @@ static int amdtp_stream_start(struct amdtp_stream *s, int channel, int speed, s->data_block_counter = 0; } - /* initialize packet buffer */ + // initialize packet buffer. + max_ctx_payload_size = amdtp_stream_get_max_payload(s); if (s->direction == AMDTP_IN_STREAM) { dir = DMA_FROM_DEVICE; type = FW_ISO_CONTEXT_RECEIVE; - if (!(s->flags & CIP_NO_HEADER)) + if (!(s->flags & CIP_NO_HEADER)) { + max_ctx_payload_size -= 8; ctx_header_size = IR_CTX_HEADER_SIZE_CIP; - else + } else { ctx_header_size = IR_CTX_HEADER_SIZE_NO_CIP; - - max_ctx_payload_size = amdtp_stream_get_max_payload(s) - - ctx_header_size; + } } else { dir = DMA_TO_DEVICE; type = FW_ISO_CONTEXT_TRANSMIT; ctx_header_size = 0; // No effect for IT context. - max_ctx_payload_size = amdtp_stream_get_max_payload(s); if (!(s->flags & CIP_NO_HEADER)) max_ctx_payload_size -= IT_PKT_HEADER_SIZE_CIP; } From 814b43127f4ac69332e809152e30773941438aff Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 May 2021 21:56:52 +0900 Subject: [PATCH 248/730] ALSA: firewire-lib: fix amdtp_packet tracepoints event for packet_index field The snd_firewire_lib:amdtp_packet tracepoints event includes index of packet processed in a context handling. However in IR context, it is not calculated as expected. Cc: Fixes: 753e717986c2 ("ALSA: firewire-lib: use packet descriptor for IR context") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20210513125652.110249-6-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/amdtp-stream-trace.h | 6 +++--- sound/firewire/amdtp-stream.c | 15 +++++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/sound/firewire/amdtp-stream-trace.h b/sound/firewire/amdtp-stream-trace.h index 26e7cb555d3c..aa53c13b89d3 100644 --- a/sound/firewire/amdtp-stream-trace.h +++ b/sound/firewire/amdtp-stream-trace.h @@ -14,8 +14,8 @@ #include TRACE_EVENT(amdtp_packet, - TP_PROTO(const struct amdtp_stream *s, u32 cycles, const __be32 *cip_header, unsigned int payload_length, unsigned int data_blocks, unsigned int data_block_counter, unsigned int index), - TP_ARGS(s, cycles, cip_header, payload_length, data_blocks, data_block_counter, index), + TP_PROTO(const struct amdtp_stream *s, u32 cycles, const __be32 *cip_header, unsigned int payload_length, unsigned int data_blocks, unsigned int data_block_counter, unsigned int packet_index, unsigned int index), + TP_ARGS(s, cycles, cip_header, payload_length, data_blocks, data_block_counter, packet_index, index), TP_STRUCT__entry( __field(unsigned int, second) __field(unsigned int, cycle) @@ -48,7 +48,7 @@ TRACE_EVENT(amdtp_packet, __entry->payload_quadlets = payload_length / sizeof(__be32); __entry->data_blocks = data_blocks; __entry->data_block_counter = data_block_counter, - __entry->packet_index = s->packet_index; + __entry->packet_index = packet_index; __entry->irq = !!in_interrupt(); __entry->index = index; ), diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 73aff017dc9a..e0faa6601966 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -526,7 +526,7 @@ static void build_it_pkt_header(struct amdtp_stream *s, unsigned int cycle, } trace_amdtp_packet(s, cycle, cip_header, payload_length, data_blocks, - data_block_counter, index); + data_block_counter, s->packet_index, index); } static int check_cip_header(struct amdtp_stream *s, const __be32 *buf, @@ -630,7 +630,7 @@ static int parse_ir_ctx_header(struct amdtp_stream *s, unsigned int cycle, unsigned int *payload_length, unsigned int *data_blocks, unsigned int *data_block_counter, - unsigned int *syt, unsigned int index) + unsigned int *syt, unsigned int packet_index, unsigned int index) { const __be32 *cip_header; unsigned int cip_header_size; @@ -668,7 +668,7 @@ static int parse_ir_ctx_header(struct amdtp_stream *s, unsigned int cycle, } trace_amdtp_packet(s, cycle, cip_header, *payload_length, *data_blocks, - *data_block_counter, index); + *data_block_counter, packet_index, index); return err; } @@ -707,12 +707,13 @@ static int generate_device_pkt_descs(struct amdtp_stream *s, unsigned int packets) { unsigned int dbc = s->data_block_counter; + unsigned int packet_index = s->packet_index; + unsigned int queue_size = s->queue_size; int i; int err; for (i = 0; i < packets; ++i) { struct pkt_desc *desc = descs + i; - unsigned int index = (s->packet_index + i) % s->queue_size; unsigned int cycle; unsigned int payload_length; unsigned int data_blocks; @@ -721,7 +722,7 @@ static int generate_device_pkt_descs(struct amdtp_stream *s, cycle = compute_cycle_count(ctx_header[1]); err = parse_ir_ctx_header(s, cycle, ctx_header, &payload_length, - &data_blocks, &dbc, &syt, i); + &data_blocks, &dbc, &syt, packet_index, i); if (err < 0) return err; @@ -729,13 +730,15 @@ static int generate_device_pkt_descs(struct amdtp_stream *s, desc->syt = syt; desc->data_blocks = data_blocks; desc->data_block_counter = dbc; - desc->ctx_payload = s->buffer.packets[index].buffer; + desc->ctx_payload = s->buffer.packets[packet_index].buffer; if (!(s->flags & CIP_DBC_IS_END_EVENT)) dbc = (dbc + desc->data_blocks) & 0xff; ctx_header += s->ctx_data.tx.ctx_header_size / sizeof(*ctx_header); + + packet_index = (packet_index + 1) % queue_size; } s->data_block_counter = dbc; From 1d5cfca286178ce81fb0c8a5f5777ef123cd69e4 Mon Sep 17 00:00:00 2001 From: PeiSen Hou Date: Fri, 14 May 2021 12:50:48 +0200 Subject: [PATCH 249/730] ALSA: hda/realtek: Add some CLOVE SSIDs of ALC293 Fix "use as headset mic, without its own jack detect" problen. Signed-off-by: PeiSen Hou Cc: Link: https://lore.kernel.org/r/d0746eaf29f248a5acc30313e3ba4f99@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3e269de84079..552e2cb73291 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8385,12 +8385,19 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x50b8, "Clevo NK50SZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x50d5, "Clevo NP50D5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x50f0, "Clevo NH50A[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50f2, "Clevo NH50E[PR]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x50f3, "Clevo NH58DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50f5, "Clevo NH55EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50f6, "Clevo NH55DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x5101, "Clevo S510WU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x70f2, "Clevo NH79EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x70f3, "Clevo NH77DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x70f4, "Clevo NH77EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x70f6, "Clevo NH77DPQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -8408,9 +8415,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x8a51, "Clevo NH70RCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8d50, "Clevo NH55RCQ-M", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x951d, "Clevo N950T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x9600, "Clevo N960K[PR]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x961d, "Clevo N960S[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL53RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xa600, "Clevo NL5XNU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xb022, "Clevo NH77D[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xc018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xc019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xc022, "Clevo NH77[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), From 156ed0215ef365604f2382d5164c36d3a1cfd98f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C3=85gren?= Date: Thu, 22 Apr 2021 21:22:40 +0200 Subject: [PATCH 250/730] uio/uio_pci_generic: fix return value changed in refactoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit ef84928cff58 ("uio/uio_pci_generic: use device-managed function equivalents") was able to simplify various error paths thanks to no longer having to clean up on the way out. Some error paths were dropped, others were simplified. In one of those simplifications, the return value was accidentally changed from -ENODEV to -ENOMEM. Restore the old return value. Fixes: ef84928cff58 ("uio/uio_pci_generic: use device-managed function equivalents") Cc: stable Acked-by: Michael S. Tsirkin Signed-off-by: Martin Ågren Link: https://lore.kernel.org/r/20210422192240.1136373-1-martin.agren@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio_pci_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c index c7d681fef198..3bb0b0075467 100644 --- a/drivers/uio/uio_pci_generic.c +++ b/drivers/uio/uio_pci_generic.c @@ -82,7 +82,7 @@ static int probe(struct pci_dev *pdev, } if (pdev->irq && !pci_intx_mask_supported(pdev)) - return -ENOMEM; + return -ENODEV; gdev = devm_kzalloc(&pdev->dev, sizeof(struct uio_pci_generic_dev), GFP_KERNEL); if (!gdev) From 3ee098f96b8b6c1a98f7f97915f8873164e6af9d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 9 May 2021 09:13:03 +0200 Subject: [PATCH 251/730] uio_hv_generic: Fix a memory leak in error handling paths If 'vmbus_establish_gpadl()' fails, the (recv|send)_gpadl will not be updated and 'hv_uio_cleanup()' in the error handling path will not be able to free the corresponding buffer. In such a case, we need to free the buffer explicitly. Fixes: cdfa835c6e5e ("uio_hv_generic: defer opening vmbus until first use") Cc: stable Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/4fdaff557deef6f0475d02ba7922ddbaa1ab08a6.1620544055.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio_hv_generic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 0330ba99730e..eebc399f2cc7 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -296,8 +296,10 @@ hv_uio_probe(struct hv_device *dev, ret = vmbus_establish_gpadl(channel, pdata->recv_buf, RECV_BUFFER_SIZE, &pdata->recv_gpadl); - if (ret) + if (ret) { + vfree(pdata->recv_buf); goto fail_close; + } /* put Global Physical Address Label in name */ snprintf(pdata->recv_name, sizeof(pdata->recv_name), @@ -316,8 +318,10 @@ hv_uio_probe(struct hv_device *dev, ret = vmbus_establish_gpadl(channel, pdata->send_buf, SEND_BUFFER_SIZE, &pdata->send_gpadl); - if (ret) + if (ret) { + vfree(pdata->send_buf); goto fail_close; + } snprintf(pdata->send_name, sizeof(pdata->send_name), "send:%u", pdata->send_gpadl); From 0b0226be3a52dadd965644bc52a807961c2c26df Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 9 May 2021 09:13:12 +0200 Subject: [PATCH 252/730] uio_hv_generic: Fix another memory leak in error handling paths Memory allocated by 'vmbus_alloc_ring()' at the beginning of the probe function is never freed in the error handling path. Add the missing 'vmbus_free_ring()' call. Note that it is already freed in the .remove function. Fixes: cdfa835c6e5e ("uio_hv_generic: defer opening vmbus until first use") Cc: stable Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/0d86027b8eeed8e6360bc3d52bcdb328ff9bdca1.1620544055.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio_hv_generic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index eebc399f2cc7..652fe2547587 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -291,7 +291,7 @@ hv_uio_probe(struct hv_device *dev, pdata->recv_buf = vzalloc(RECV_BUFFER_SIZE); if (pdata->recv_buf == NULL) { ret = -ENOMEM; - goto fail_close; + goto fail_free_ring; } ret = vmbus_establish_gpadl(channel, pdata->recv_buf, @@ -351,6 +351,8 @@ hv_uio_probe(struct hv_device *dev, fail_close: hv_uio_cleanup(dev, pdata); +fail_free_ring: + vmbus_free_ring(dev->channel); return ret; } From 2962484dfef8dbb7f9059822bc26ce8a04d0e47c Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Tue, 20 Apr 2021 21:30:50 +0800 Subject: [PATCH 253/730] misc: eeprom: at24: check suspend status before disable regulator cd5676db0574 ("misc: eeprom: at24: support pm_runtime control") disables regulator in runtime suspend. If runtime suspend is called before regulator disable, it will results in regulator unbalanced disabling. Fixes: cd5676db0574 ("misc: eeprom: at24: support pm_runtime control") Cc: stable Acked-by: Bartosz Golaszewski Signed-off-by: Hsin-Yi Wang Link: https://lore.kernel.org/r/20210420133050.377209-1-hsinyi@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/at24.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 926408b41270..7a6f01ace78a 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -763,7 +763,8 @@ static int at24_probe(struct i2c_client *client) at24->nvmem = devm_nvmem_register(dev, &nvmem_config); if (IS_ERR(at24->nvmem)) { pm_runtime_disable(dev); - regulator_disable(at24->vcc_reg); + if (!pm_runtime_status_suspended(dev)) + regulator_disable(at24->vcc_reg); return PTR_ERR(at24->nvmem); } @@ -774,7 +775,8 @@ static int at24_probe(struct i2c_client *client) err = at24_read(at24, 0, &test_byte, 1); if (err) { pm_runtime_disable(dev); - regulator_disable(at24->vcc_reg); + if (!pm_runtime_status_suspended(dev)) + regulator_disable(at24->vcc_reg); return -ENODEV; } From 27e7db56cf3dffd302bd7ddfacb1d405cf671a2a Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 5 May 2021 09:47:34 -0700 Subject: [PATCH 254/730] spi: Don't have controller clean up spi device before driver unbind When a spi device is unregistered and triggers a driver unbind, the driver might need to access the spi device. So, don't have the controller clean up the spi device before the driver is unbound. Clean up the spi device after the driver is unbound. Fixes: c7299fea6769 ("spi: Fix spi device unregister flow") Reported-by: Lukas Wunner Signed-off-by: Saravana Kannan Tested-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210505164734.175546-1-saravanak@google.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 98048af04abf..e353b7a9e54e 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -714,8 +714,6 @@ void spi_unregister_device(struct spi_device *spi) if (!spi) return; - spi_cleanup(spi); - if (spi->dev.of_node) { of_node_clear_flag(spi->dev.of_node, OF_POPULATED); of_node_put(spi->dev.of_node); @@ -723,7 +721,9 @@ void spi_unregister_device(struct spi_device *spi) if (ACPI_COMPANION(&spi->dev)) acpi_device_clear_enumerated(ACPI_COMPANION(&spi->dev)); device_remove_software_node(&spi->dev); - device_unregister(&spi->dev); + device_del(&spi->dev); + spi_cleanup(spi); + put_device(&spi->dev); } EXPORT_SYMBOL_GPL(spi_unregister_device); From 75016891357a628d2b8acc09e2b9b2576c18d318 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Fri, 14 May 2021 08:23:03 +0700 Subject: [PATCH 255/730] Revert "net:tipc: Fix a double free in tipc_sk_mcast_rcv" This reverts commit 6bf24dc0cc0cc43b29ba344b66d78590e687e046. Above fix is not correct and caused memory leak issue. Fixes: 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv") Acked-by: Jon Maloy Acked-by: Tung Nguyen Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/socket.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 58935cd0d068..53af72824c9c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1262,7 +1262,10 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, spin_lock_bh(&inputq->lock); if (skb_peek(arrvq) == skb) { skb_queue_splice_tail_init(&tmpq, inputq); - __skb_dequeue(arrvq); + /* Decrease the skb's refcnt as increasing in the + * function tipc_skb_peek + */ + kfree_skb(__skb_dequeue(arrvq)); } spin_unlock_bh(&inputq->lock); __skb_queue_purge(&tmpq); From 974271e5ed45cfe4daddbeb16224a2156918530e Mon Sep 17 00:00:00 2001 From: Jim Ma Date: Fri, 14 May 2021 11:11:02 +0800 Subject: [PATCH 256/730] tls splice: check SPLICE_F_NONBLOCK instead of MSG_DONTWAIT In tls_sw_splice_read, checkout MSG_* is inappropriate, should use SPLICE_*, update tls_wait_data to accept nonblock arguments instead of flags for recvmsg and splice. Fixes: c46234ebb4d1 ("tls: RX path for ktls") Signed-off-by: Jim Ma Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 1dcb34dfd56b..694de024d0ee 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -37,6 +37,7 @@ #include #include +#include #include #include @@ -1281,7 +1282,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, } static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock, - int flags, long timeo, int *err) + bool nonblock, long timeo, int *err) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); @@ -1306,7 +1307,7 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock, if (sock_flag(sk, SOCK_DONE)) return NULL; - if ((flags & MSG_DONTWAIT) || !timeo) { + if (nonblock || !timeo) { *err = -EAGAIN; return NULL; } @@ -1786,7 +1787,7 @@ int tls_sw_recvmsg(struct sock *sk, bool async_capable; bool async = false; - skb = tls_wait_data(sk, psock, flags, timeo, &err); + skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err); if (!skb) { if (psock) { int ret = sk_msg_recvmsg(sk, psock, msg, len, @@ -1990,9 +1991,9 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, lock_sock(sk); - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK); - skb = tls_wait_data(sk, NULL, flags, timeo, &err); + skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo, &err); if (!skb) goto splice_read_end; From a90c57f2cedd52a511f739fb55e6244e22e1a2fb Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 14 May 2021 11:16:59 +0800 Subject: [PATCH 257/730] net: sched: fix packet stuck problem for lockless qdisc Lockless qdisc has below concurrent problem: cpu0 cpu1 . . q->enqueue . . . qdisc_run_begin() . . . dequeue_skb() . . . sch_direct_xmit() . . . . q->enqueue . qdisc_run_begin() . return and do nothing . . qdisc_run_end() . cpu1 enqueue a skb without calling __qdisc_run() because cpu0 has not released the lock yet and spin_trylock() return false for cpu1 in qdisc_run_begin(), and cpu0 do not see the skb enqueued by cpu1 when calling dequeue_skb() because cpu1 may enqueue the skb after cpu0 calling dequeue_skb() and before cpu0 calling qdisc_run_end(). Lockless qdisc has below another concurrent problem when tx_action is involved: cpu0(serving tx_action) cpu1 cpu2 . . . . q->enqueue . . qdisc_run_begin() . . dequeue_skb() . . . q->enqueue . . . . sch_direct_xmit() . . . qdisc_run_begin() . . return and do nothing . . . clear __QDISC_STATE_SCHED . . qdisc_run_begin() . . return and do nothing . . . . . . qdisc_run_end() . This patch fixes the above data race by: 1. If the first spin_trylock() return false and STATE_MISSED is not set, set STATE_MISSED and retry another spin_trylock() in case other CPU may not see STATE_MISSED after it releases the lock. 2. reschedule if STATE_MISSED is set after the lock is released at the end of qdisc_run_end(). For tx_action case, STATE_MISSED is also set when cpu1 is at the end if qdisc_run_end(), so tx_action will be rescheduled again to dequeue the skb enqueued by cpu2. Clear STATE_MISSED before retrying a dequeuing when dequeuing returns NULL in order to reduce the overhead of the second spin_trylock() and __netif_schedule() calling. Also clear the STATE_MISSED before calling __netif_schedule() at the end of qdisc_run_end() to avoid doing another round of dequeuing in the pfifo_fast_dequeue(). The performance impact of this patch, tested using pktgen and dummy netdev with pfifo_fast qdisc attached: threads without+this_patch with+this_patch delta 1 2.61Mpps 2.60Mpps -0.3% 2 3.97Mpps 3.82Mpps -3.7% 4 5.62Mpps 5.59Mpps -0.5% 8 2.78Mpps 2.77Mpps -0.3% 16 2.22Mpps 2.22Mpps -0.0% Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking") Acked-by: Jakub Kicinski Tested-by: Juergen Gross Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- include/net/sch_generic.h | 35 ++++++++++++++++++++++++++++++++++- net/sched/sch_generic.c | 19 +++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f7a6e14491fb..1e625519ae96 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -36,6 +36,7 @@ struct qdisc_rate_table { enum qdisc_state_t { __QDISC_STATE_SCHED, __QDISC_STATE_DEACTIVATED, + __QDISC_STATE_MISSED, }; struct qdisc_size_table { @@ -159,8 +160,33 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc) static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { + if (spin_trylock(&qdisc->seqlock)) + goto nolock_empty; + + /* If the MISSED flag is set, it means other thread has + * set the MISSED flag before second spin_trylock(), so + * we can return false here to avoid multi cpus doing + * the set_bit() and second spin_trylock() concurrently. + */ + if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) + return false; + + /* Set the MISSED flag before the second spin_trylock(), + * if the second spin_trylock() return false, it means + * other cpu holding the lock will do dequeuing for us + * or it will see the MISSED flag set after releasing + * lock and reschedule the net_tx_action() to do the + * dequeuing. + */ + set_bit(__QDISC_STATE_MISSED, &qdisc->state); + + /* Retry again in case other CPU may not see the new flag + * after it releases the lock at the end of qdisc_run_end(). + */ if (!spin_trylock(&qdisc->seqlock)) return false; + +nolock_empty: WRITE_ONCE(qdisc->empty, false); } else if (qdisc_is_running(qdisc)) { return false; @@ -176,8 +202,15 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) static inline void qdisc_run_end(struct Qdisc *qdisc) { write_seqcount_end(&qdisc->running); - if (qdisc->flags & TCQ_F_NOLOCK) + if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); + + if (unlikely(test_bit(__QDISC_STATE_MISSED, + &qdisc->state))) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + __netif_schedule(qdisc); + } + } } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 44991ea726fc..795d986e7030 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -640,8 +640,10 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) { struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct sk_buff *skb = NULL; + bool need_retry = true; int band; +retry: for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) { struct skb_array *q = band2list(priv, band); @@ -652,6 +654,23 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) } if (likely(skb)) { qdisc_update_stats_at_dequeue(qdisc, skb); + } else if (need_retry && + test_bit(__QDISC_STATE_MISSED, &qdisc->state)) { + /* Delay clearing the STATE_MISSED here to reduce + * the overhead of the second spin_trylock() in + * qdisc_run_begin() and __netif_schedule() calling + * in qdisc_run_end(). + */ + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + + /* Make sure dequeuing happens after clearing + * STATE_MISSED. + */ + smp_mb__after_atomic(); + + need_retry = false; + + goto retry; } else { WRITE_ONCE(qdisc->empty, true); } From 102b55ee92f9fda4dde7a45d2b20538e6e3e3d1e Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 14 May 2021 11:17:00 +0800 Subject: [PATCH 258/730] net: sched: fix tx action rescheduling issue during deactivation Currently qdisc_run() checks the STATE_DEACTIVATED of lockless qdisc before calling __qdisc_run(), which ultimately clear the STATE_MISSED when all the skb is dequeued. If STATE_DEACTIVATED is set before clearing STATE_MISSED, there may be rescheduling of net_tx_action() at the end of qdisc_run_end(), see below: CPU0(net_tx_atcion) CPU1(__dev_xmit_skb) CPU2(dev_deactivate) . . . . set STATE_MISSED . . __netif_schedule() . . . set STATE_DEACTIVATED . . qdisc_reset() . . . .<--------------- . synchronize_net() clear __QDISC_STATE_SCHED | . . . | . . . | . some_qdisc_is_busy() . | . return *false* . | . . test STATE_DEACTIVATED | . . __qdisc_run() *not* called | . . . | . . test STATE_MISS | . . __netif_schedule()--------| . . . . . . . . __qdisc_run() is not called by net_tx_atcion() in CPU0 because CPU2 has set STATE_DEACTIVATED flag during dev_deactivate(), and STATE_MISSED is only cleared in __qdisc_run(), __netif_schedule is called at the end of qdisc_run_end(), causing tx action rescheduling problem. qdisc_run() called by net_tx_action() runs in the softirq context, which should has the same semantic as the qdisc_run() called by __dev_xmit_skb() protected by rcu_read_lock_bh(). And there is a synchronize_net() between STATE_DEACTIVATED flag being set and qdisc_reset()/some_qdisc_is_busy in dev_deactivate(), we can safely bail out for the deactived lockless qdisc in net_tx_action(), and qdisc_reset() will reset all skb not dequeued yet. So add the rcu_read_lock() explicitly to protect the qdisc_run() and do the STATE_DEACTIVATED checking in net_tx_action() before calling qdisc_run_begin(). Another option is to do the checking in the qdisc_run_end(), but it will add unnecessary overhead for non-tx_action case, because __dev_queue_xmit() will not see qdisc with STATE_DEACTIVATED after synchronize_net(), the qdisc with STATE_DEACTIVATED can only be seen by net_tx_action() because of __netif_schedule(). The STATE_DEACTIVATED checking in qdisc_run() is to avoid race between net_tx_action() and qdisc_reset(), see: commit d518d2ed8640 ("net/sched: fix race between deactivation and dequeue for NOLOCK qdisc"). As the bailout added above for deactived lockless qdisc in net_tx_action() provides better protection for the race without calling qdisc_run() at all, so remove the STATE_DEACTIVATED checking in qdisc_run(). After qdisc_reset(), there is no skb in qdisc to be dequeued, so clear the STATE_MISSED in dev_reset_queue() too. Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking") Acked-by: Jakub Kicinski Signed-off-by: Yunsheng Lin V8: Clearing STATE_MISSED before calling __netif_schedule() has avoid the endless rescheduling problem, but there may still be a unnecessary rescheduling, so adjust the commit log. Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 7 +------ net/core/dev.c | 26 ++++++++++++++++++++++---- net/sched/sch_generic.c | 4 +++- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index f5c1bee0cd6a..6d7b12cba015 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -128,12 +128,7 @@ void __qdisc_run(struct Qdisc *q); static inline void qdisc_run(struct Qdisc *q) { if (qdisc_run_begin(q)) { - /* NOLOCK qdisc must check 'state' under the qdisc seqlock - * to avoid racing with dev_qdisc_reset() - */ - if (!(q->flags & TCQ_F_NOLOCK) || - likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) - __qdisc_run(q); + __qdisc_run(q); qdisc_run_end(q); } } diff --git a/net/core/dev.c b/net/core/dev.c index 222b1d322c96..d596cd746353 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5025,25 +5025,43 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) sd->output_queue_tailp = &sd->output_queue; local_irq_enable(); + rcu_read_lock(); + while (head) { struct Qdisc *q = head; spinlock_t *root_lock = NULL; head = head->next_sched; - if (!(q->flags & TCQ_F_NOLOCK)) { - root_lock = qdisc_lock(q); - spin_lock(root_lock); - } /* We need to make sure head->next_sched is read * before clearing __QDISC_STATE_SCHED */ smp_mb__before_atomic(); + + if (!(q->flags & TCQ_F_NOLOCK)) { + root_lock = qdisc_lock(q); + spin_lock(root_lock); + } else if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, + &q->state))) { + /* There is a synchronize_net() between + * STATE_DEACTIVATED flag being set and + * qdisc_reset()/some_qdisc_is_busy() in + * dev_deactivate(), so we can safely bail out + * early here to avoid data race between + * qdisc_deactivate() and some_qdisc_is_busy() + * for lockless qdisc. + */ + clear_bit(__QDISC_STATE_SCHED, &q->state); + continue; + } + clear_bit(__QDISC_STATE_SCHED, &q->state); qdisc_run(q); if (root_lock) spin_unlock(root_lock); } + + rcu_read_unlock(); } xfrm_dev_backlog(sd); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 795d986e7030..d86c4cca2cab 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1177,8 +1177,10 @@ static void dev_reset_queue(struct net_device *dev, qdisc_reset(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); - if (nolock) + if (nolock) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); spin_unlock_bh(&qdisc->seqlock); + } } static bool some_qdisc_is_busy(struct net_device *dev) From dcad9ee9e0663d74a89b25b987f9c7be86432812 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 14 May 2021 11:17:01 +0800 Subject: [PATCH 259/730] net: sched: fix tx action reschedule issue with stopped queue The netdev qeueue might be stopped when byte queue limit has reached or tx hw ring is full, net_tx_action() may still be rescheduled if STATE_MISSED is set, which consumes unnecessary cpu without dequeuing and transmiting any skb because the netdev queue is stopped, see qdisc_run_end(). This patch fixes it by checking the netdev queue state before calling qdisc_run() and clearing STATE_MISSED if netdev queue is stopped during qdisc_run(), the net_tx_action() is rescheduled again when netdev qeueue is restarted, see netif_tx_wake_queue(). As there is time window between netif_xmit_frozen_or_stopped() checking and STATE_MISSED clearing, between which STATE_MISSED may set by net_tx_action() scheduled by netif_tx_wake_queue(), so set the STATE_MISSED again if netdev queue is restarted. Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking") Reported-by: Michal Kubecek Acked-by: Jakub Kicinski Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- net/sched/sch_generic.c | 27 ++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index d596cd746353..ef8cf7619baf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3853,7 +3853,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (q->flags & TCQ_F_NOLOCK) { rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; - qdisc_run(q); + if (likely(!netif_xmit_frozen_or_stopped(txq))) + qdisc_run(q); if (unlikely(to_free)) kfree_skb_list(to_free); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d86c4cca2cab..fc8b56bcabf3 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -35,6 +35,25 @@ const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops; EXPORT_SYMBOL(default_qdisc_ops); +static void qdisc_maybe_clear_missed(struct Qdisc *q, + const struct netdev_queue *txq) +{ + clear_bit(__QDISC_STATE_MISSED, &q->state); + + /* Make sure the below netif_xmit_frozen_or_stopped() + * checking happens after clearing STATE_MISSED. + */ + smp_mb__after_atomic(); + + /* Checking netif_xmit_frozen_or_stopped() again to + * make sure STATE_MISSED is set if the STATE_MISSED + * set by netif_tx_wake_queue()'s rescheduling of + * net_tx_action() is cleared by the above clear_bit(). + */ + if (!netif_xmit_frozen_or_stopped(txq)) + set_bit(__QDISC_STATE_MISSED, &q->state); +} + /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with @@ -74,6 +93,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) } } else { skb = SKB_XOFF_MAGIC; + qdisc_maybe_clear_missed(q, txq); } } @@ -242,6 +262,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, } } else { skb = NULL; + qdisc_maybe_clear_missed(q, txq); } if (lock) spin_unlock(lock); @@ -251,8 +272,10 @@ validate: *validate = true; if ((q->flags & TCQ_F_ONETXQUEUE) && - netif_xmit_frozen_or_stopped(txq)) + netif_xmit_frozen_or_stopped(txq)) { + qdisc_maybe_clear_missed(q, txq); return skb; + } skb = qdisc_dequeue_skb_bad_txq(q); if (unlikely(skb)) { @@ -311,6 +334,8 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) skb = dev_hard_start_xmit(skb, dev, txq, &ret); + else + qdisc_maybe_clear_missed(q, txq); HARD_TX_UNLOCK(dev, txq); } else { From b81ac7841d511d68989534eff5550269e1bf896d Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Fri, 14 May 2021 14:41:01 +0000 Subject: [PATCH 260/730] net: cdc_eem: fix URL to CDC EEM 1.0 spec The old URL is no longer accessible. Signed-off-by: Jonathan Davies Signed-off-by: David S. Miller --- drivers/net/usb/cdc_eem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c index 0eeec80bec31..2e60bc1b9a6b 100644 --- a/drivers/net/usb/cdc_eem.c +++ b/drivers/net/usb/cdc_eem.c @@ -26,7 +26,7 @@ * for transport over USB using a simpler USB device model than the * previous CDC "Ethernet Control Model" (ECM, or "CDC Ethernet"). * - * For details, see www.usb.org/developers/devclass_docs/CDC_EEM10.pdf + * For details, see https://usb.org/sites/default/files/CDC_EEM10.pdf * * This version has been tested with GIGAntIC WuaoW SIM Smart Card on 2.6.24, * 2.6.27 and 2.6.30rc2 kernel. From c625b80b9d00f3546722cd77527f9697c8c4c911 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Wed, 12 May 2021 18:01:45 +0800 Subject: [PATCH 261/730] scsi: ufs: ufs-mediatek: Fix power down spec violation As per spec, e.g. JESD220E chapter 7.2, while powering off the UFS device, RST_N signal should be between VSS(Ground) and VCCQ/VCCQ2. The power down sequence after fixing: Power down: 1. Assert RST_N low 2. Turn-off VCC 3. Turn-off VCCQ/VCCQ2 Link: https://lore.kernel.org/r/1620813706-25331-1-git-send-email-peter.wang@mediatek.com Reviewed-by: Stanley Chu Signed-off-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-mediatek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index a981f261b304..aee3cfc7142a 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -922,6 +922,7 @@ static void ufs_mtk_vreg_set_lpm(struct ufs_hba *hba, bool lpm) static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) { int err; + struct arm_smccc_res res; if (ufshcd_is_link_hibern8(hba)) { err = ufs_mtk_link_set_lpm(hba); @@ -941,6 +942,9 @@ static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) goto fail; } + if (ufshcd_is_link_off(hba)) + ufs_mtk_device_reset_ctrl(0, res); + return 0; fail: /* From 56f396146af278135c0ff958c79b5ee1bd22453d Mon Sep 17 00:00:00 2001 From: Matt Wang Date: Tue, 11 May 2021 03:04:37 +0000 Subject: [PATCH 262/730] scsi: BusLogic: Fix 64-bit system enumeration error for Buslogic Commit 391e2f25601e ("[SCSI] BusLogic: Port driver to 64-bit") introduced a serious issue for 64-bit systems. With this commit, 64-bit kernel will enumerate 8*15 non-existing disks. This is caused by the broken CCB structure. The change from u32 data to void *data increased CCB length on 64-bit system, which introduced an extra 4 byte offset of the CDB. This leads to incorrect response to INQUIRY commands during enumeration. Fix disk enumeration failure by reverting the portion of the commit above which switched the data pointer from u32 to void. Link: https://lore.kernel.org/r/C325637F-1166-4340-8F0F-3BCCD59D4D54@vmware.com Acked-by: Khalid Aziz Signed-off-by: Matt Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/BusLogic.c | 6 +++--- drivers/scsi/BusLogic.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c index 3ee46a843cb5..adddcd589941 100644 --- a/drivers/scsi/BusLogic.c +++ b/drivers/scsi/BusLogic.c @@ -2926,11 +2926,11 @@ static int blogic_qcmd_lck(struct scsi_cmnd *command, ccb->opcode = BLOGIC_INITIATOR_CCB_SG; ccb->datalen = count * sizeof(struct blogic_sg_seg); if (blogic_multimaster_type(adapter)) - ccb->data = (void *)((unsigned int) ccb->dma_handle + + ccb->data = (unsigned int) ccb->dma_handle + ((unsigned long) &ccb->sglist - - (unsigned long) ccb)); + (unsigned long) ccb); else - ccb->data = ccb->sglist; + ccb->data = virt_to_32bit_virt(ccb->sglist); scsi_for_each_sg(command, sg, count, i) { ccb->sglist[i].segbytes = sg_dma_len(sg); diff --git a/drivers/scsi/BusLogic.h b/drivers/scsi/BusLogic.h index a8e4a19788a7..7d1ec10f2430 100644 --- a/drivers/scsi/BusLogic.h +++ b/drivers/scsi/BusLogic.h @@ -806,7 +806,7 @@ struct blogic_ccb { unsigned char cdblen; /* Byte 2 */ unsigned char sense_datalen; /* Byte 3 */ u32 datalen; /* Bytes 4-7 */ - void *data; /* Bytes 8-11 */ + u32 data; /* Bytes 8-11 */ unsigned char:8; /* Byte 12 */ unsigned char:8; /* Byte 13 */ enum blogic_adapter_status adapter_status; /* Byte 14 */ From d0b2b70eb12e9ffaf95e11b16b230a4e015a536c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 13 May 2021 09:49:12 -0700 Subject: [PATCH 263/730] scsi: ufs: core: Increase the usable queue depth With the current implementation of the UFS driver active_queues is 1 instead of 0 if all UFS request queues are idle. That causes hctx_may_queue() to divide the queue depth by 2 when queueing a request and hence reduces the usable queue depth. The shared tag set code in the block layer keeps track of the number of active request queues. blk_mq_tag_busy() is called before a request is queued onto a hwq and blk_mq_tag_idle() is called some time after the hwq became idle. blk_mq_tag_idle() is called from inside blk_mq_timeout_work(). Hence, blk_mq_tag_idle() is only called if a timer is associated with each request that is submitted to a request queue that shares a tag set with another request queue. Adds a blk_mq_start_request() call in ufshcd_exec_dev_cmd(). This doubles the queue depth on my test setup from 16 to 32. In addition to increasing the usable queue depth, also fix the documentation of the 'timeout' parameter in the header above ufshcd_exec_dev_cmd(). Link: https://lore.kernel.org/r/20210513164912.5683-1-bvanassche@acm.org Fixes: 7252a3603015 ("scsi: ufs: Avoid busy-waiting by eliminating tag conflicts") Cc: Can Guo Cc: Alim Akhtar Cc: Avri Altman Cc: Stanley Chu Cc: Bean Huo Cc: Adrian Hunter Reviewed-by: Can Guo Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 3eb54937f1d8..72fd41bfbd54 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2842,7 +2842,7 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, * ufshcd_exec_dev_cmd - API for sending device management requests * @hba: UFS hba * @cmd_type: specifies the type (NOP, Query...) - * @timeout: time in seconds + * @timeout: timeout in milliseconds * * NOTE: Since there is only one available tag for device management commands, * it is expected you hold the hba->dev_cmd.lock mutex. @@ -2872,6 +2872,9 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, } tag = req->tag; WARN_ON_ONCE(!ufshcd_valid_tag(hba, tag)); + /* Set the timeout such that the SCSI error handler is not activated. */ + req->timeout = msecs_to_jiffies(2 * timeout); + blk_mq_start_request(req); init_completion(&wait); lrbp = &hba->lrb[tag]; From 73578af92a0fae6609b955fcc9113e50e413c80f Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Wed, 12 May 2021 00:25:33 -0700 Subject: [PATCH 264/730] scsi: qedf: Add pointer checks in qedf_update_link_speed() The following trace was observed: [ 14.042059] Call Trace: [ 14.042061] [ 14.042068] qedf_link_update+0x144/0x1f0 [qedf] [ 14.042117] qed_link_update+0x5c/0x80 [qed] [ 14.042135] qed_mcp_handle_link_change+0x2d2/0x410 [qed] [ 14.042155] ? qed_set_ptt+0x70/0x80 [qed] [ 14.042170] ? qed_set_ptt+0x70/0x80 [qed] [ 14.042186] ? qed_rd+0x13/0x40 [qed] [ 14.042205] qed_mcp_handle_events+0x437/0x690 [qed] [ 14.042221] ? qed_set_ptt+0x70/0x80 [qed] [ 14.042239] qed_int_sp_dpc+0x3a6/0x3e0 [qed] [ 14.042245] tasklet_action_common.isra.14+0x5a/0x100 [ 14.042250] __do_softirq+0xe4/0x2f8 [ 14.042253] irq_exit+0xf7/0x100 [ 14.042255] do_IRQ+0x7f/0xd0 [ 14.042257] common_interrupt+0xf/0xf [ 14.042259] API qedf_link_update() is getting called from QED but by that time shost_data is not initialised. This results in a NULL pointer dereference when we try to dereference shost_data while updating supported_speeds. Add a NULL pointer check before dereferencing shost_data. Link: https://lore.kernel.org/r/20210512072533.23618-1-jhasan@marvell.com Fixes: 61d8658b4a43 ("scsi: qedf: Add QLogic FastLinQ offload FCoE driver framework.") Reviewed-by: Himanshu Madhani Signed-off-by: Javed Hasan Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 69f7784233f9..756231151882 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -536,7 +536,9 @@ static void qedf_update_link_speed(struct qedf_ctx *qedf, if (linkmode_intersects(link->supported_caps, sup_caps)) lport->link_supported_speeds |= FC_PORTSPEED_20GBIT; - fc_host_supported_speeds(lport->host) = lport->link_supported_speeds; + if (lport->host && lport->host->shost_data) + fc_host_supported_speeds(lport->host) = + lport->link_supported_speeds; } static void qedf_bw_update(void *dev) From 8b549c18ae81dbc36fb11e4aa08b8378c599ca95 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Apr 2021 14:45:43 +0200 Subject: [PATCH 265/730] openrisc: Define memory barrier mb This came up in the discussion of the requirements of qspinlock on an architecture. OpenRISC uses qspinlock, but it was noticed that the memmory barrier was not defined. Peter defined it in the mail thread writing: As near as I can tell this should do. The arch spec only lists this one instruction and the text makes it sound like a completion barrier. This is correct so applying this patch. Signed-off-by: Peter Zijlstra [shorne@gmail.com:Turned the mail into a patch] Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/barrier.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 arch/openrisc/include/asm/barrier.h diff --git a/arch/openrisc/include/asm/barrier.h b/arch/openrisc/include/asm/barrier.h new file mode 100644 index 000000000000..7538294721be --- /dev/null +++ b/arch/openrisc/include/asm/barrier.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#define mb() asm volatile ("l.msync" ::: "memory") + +#include + +#endif /* __ASM_BARRIER_H */ From 5cb289bf2d7c34ca1abd794ce116c4f19185a1d4 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Fri, 14 May 2021 17:09:52 +0800 Subject: [PATCH 266/730] scsi: qla2xxx: Fix error return code in qla82xx_write_flash_dword() Fix to return a negative error code from the error handling case instead of 0 as done elsewhere in this function. Link: https://lore.kernel.org/r/20210514090952.6715-1-thunder.leizhen@huawei.com Fixes: a9083016a531 ("[SCSI] qla2xxx: Add ISP82XX support.") Reported-by: Hulk Robot Reviewed-by: Himanshu Madhani Signed-off-by: Zhen Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_nx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 0677295957bc..615e44af1ca6 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -1063,7 +1063,8 @@ qla82xx_write_flash_dword(struct qla_hw_data *ha, uint32_t flashaddr, return ret; } - if (qla82xx_flash_set_write_enable(ha)) + ret = qla82xx_flash_set_write_enable(ha); + if (ret < 0) goto done_write; qla82xx_wr_32(ha, QLA82XX_ROMUSB_ROM_WDATA, data); From d1acd81bd6eb685aa9fef25624fb36d297f6404e Mon Sep 17 00:00:00 2001 From: Ajish Koshy Date: Wed, 5 May 2021 17:31:03 +0530 Subject: [PATCH 267/730] scsi: pm80xx: Fix drives missing during rmmod/insmod loop When driver is loaded after rmmod some drives are not showing up during discovery. SATA drives are directly attached to the controller connected phys. During device discovery, the IDENTIFY command (qc timeout (cmd 0xec)) is timing out during revalidation. This will trigger abort from host side and controller successfully aborts the command and returns success. Post this successful abort response ATA library decides to mark the disk as NODEV. To overcome this, inside pm8001_scan_start() after phy_start() call, add get start response and wait for few milliseconds to trigger next phy start. This millisecond delay will give sufficient time for the controller state machine to accept next phy start. Link: https://lore.kernel.org/r/20210505120103.24497-1-ajish.koshy@microchip.com Signed-off-by: Ajish Koshy Signed-off-by: Viswas G Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_hwi.c | 10 ++++++---- drivers/scsi/pm8001/pm8001_init.c | 2 +- drivers/scsi/pm8001/pm8001_sas.c | 7 ++++++- drivers/scsi/pm8001/pm80xx_hwi.c | 12 ++++++------ 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index ecd06d2d7e81..71aa6af08340 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -3765,11 +3765,13 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb) case HW_EVENT_PHY_START_STATUS: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PHY_START_STATUS status = %x\n", status); - if (status == 0) { + if (status == 0) phy->phy_state = 1; - if (pm8001_ha->flags == PM8001F_RUN_TIME && - phy->enable_completion != NULL) - complete(phy->enable_completion); + + if (pm8001_ha->flags == PM8001F_RUN_TIME && + phy->enable_completion != NULL) { + complete(phy->enable_completion); + phy->enable_completion = NULL; } break; case HW_EVENT_SAS_PHY_UP: diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index 390c33df0357..af09bd282cb9 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -1151,8 +1151,8 @@ static int pm8001_pci_probe(struct pci_dev *pdev, goto err_out_shost; } list_add_tail(&pm8001_ha->list, &hba_list); - scsi_scan_host(pm8001_ha->shost); pm8001_ha->flags = PM8001F_RUN_TIME; + scsi_scan_host(pm8001_ha->shost); return 0; err_out_shost: diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index d28af413b93a..335cf37e6cb9 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -264,12 +264,17 @@ void pm8001_scan_start(struct Scsi_Host *shost) int i; struct pm8001_hba_info *pm8001_ha; struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); + DECLARE_COMPLETION_ONSTACK(completion); pm8001_ha = sha->lldd_ha; /* SAS_RE_INITIALIZATION not available in SPCv/ve */ if (pm8001_ha->chip_id == chip_8001) PM8001_CHIP_DISP->sas_re_init_req(pm8001_ha); - for (i = 0; i < pm8001_ha->chip->n_phy; ++i) + for (i = 0; i < pm8001_ha->chip->n_phy; ++i) { + pm8001_ha->phy[i].enable_completion = &completion; PM8001_CHIP_DISP->phy_start_req(pm8001_ha, i); + wait_for_completion(&completion); + msleep(300); + } } int pm8001_scan_finished(struct Scsi_Host *shost, unsigned long time) diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 4e980830f9f5..700530e969ac 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -3487,13 +3487,13 @@ static int mpi_phy_start_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) pm8001_dbg(pm8001_ha, INIT, "phy start resp status:0x%x, phyid:0x%x\n", status, phy_id); - if (status == 0) { + if (status == 0) phy->phy_state = PHY_LINK_DOWN; - if (pm8001_ha->flags == PM8001F_RUN_TIME && - phy->enable_completion != NULL) { - complete(phy->enable_completion); - phy->enable_completion = NULL; - } + + if (pm8001_ha->flags == PM8001F_RUN_TIME && + phy->enable_completion != NULL) { + complete(phy->enable_completion); + phy->enable_completion = NULL; } return 0; From fbb80d5ad400a12ec67214a0e7e9f9497dc9e615 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 11 May 2021 20:54:28 +0800 Subject: [PATCH 268/730] irqchip: Remove redundant error printing When devm_ioremap_resource() fails, a clear enough error message will be printed by its subfunction __devm_ioremap_resource(). The error information contains the device name, failure cause, and possibly resource information. Therefore, remove the error printing here to simplify code and reduce the binary size. Reported-by: Hulk Robot Signed-off-by: Zhen Lei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210511125428.6108-2-thunder.leizhen@huawei.com --- drivers/irqchip/irq-mvebu-icu.c | 4 +--- drivers/irqchip/irq-mvebu-sei.c | 4 +--- drivers/irqchip/irq-stm32-exti.c | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c index 91adf771f185..090bc3f4f7d8 100644 --- a/drivers/irqchip/irq-mvebu-icu.c +++ b/drivers/irqchip/irq-mvebu-icu.c @@ -359,10 +359,8 @@ static int mvebu_icu_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); icu->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(icu->base)) { - dev_err(&pdev->dev, "Failed to map icu base address.\n"); + if (IS_ERR(icu->base)) return PTR_ERR(icu->base); - } /* * Legacy bindings: ICU is one node with one MSI parent: force manually diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c index 18832ccc8ff8..3a7b7a7f20ca 100644 --- a/drivers/irqchip/irq-mvebu-sei.c +++ b/drivers/irqchip/irq-mvebu-sei.c @@ -384,10 +384,8 @@ static int mvebu_sei_probe(struct platform_device *pdev) sei->res = platform_get_resource(pdev, IORESOURCE_MEM, 0); sei->base = devm_ioremap_resource(sei->dev, sei->res); - if (IS_ERR(sei->base)) { - dev_err(sei->dev, "Failed to remap SEI resource\n"); + if (IS_ERR(sei->base)) return PTR_ERR(sei->base); - } /* Retrieve the SEI capabilities with the interrupt ranges */ sei->caps = of_device_get_match_data(&pdev->dev); diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index b9db90c4aa56..4704f2ee5797 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -892,10 +892,8 @@ static int stm32_exti_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); host_data->base = devm_ioremap_resource(dev, res); - if (IS_ERR(host_data->base)) { - dev_err(dev, "Unable to map registers\n"); + if (IS_ERR(host_data->base)) return PTR_ERR(host_data->base); - } for (i = 0; i < drv_data->bank_nr; i++) stm32_exti_chip_init(host_data, i, np); From c1f0616124c455c5c762b6f123e40bba5df759e6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 16 May 2021 18:17:55 +0200 Subject: [PATCH 269/730] ALSA: intel8x0: Don't update period unless prepared The interrupt handler of intel8x0 calls snd_intel8x0_update() whenever the hardware sets the corresponding status bit for each stream. This works fine for most cases as long as the hardware behaves properly. But when the hardware gives a wrong bit set, this leads to a zero- division Oops, and reportedly, this seems what happened on a VM. For fixing the crash, this patch adds a internal flag indicating that the stream is ready to be updated, and check it (as well as the flag being in suspended) to ignore such spurious update. Cc: Reported-and-tested-by: Sergey Senozhatsky Link: https://lore.kernel.org/r/s5h5yzi7uh0.wl-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/intel8x0.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index 35903d1a1cbd..5b124c4ad572 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -331,6 +331,7 @@ struct ichdev { unsigned int ali_slot; /* ALI DMA slot */ struct ac97_pcm *pcm; int pcm_open_flag; + unsigned int prepared:1; unsigned int suspended: 1; }; @@ -691,6 +692,9 @@ static inline void snd_intel8x0_update(struct intel8x0 *chip, struct ichdev *ich int status, civ, i, step; int ack = 0; + if (!ichdev->prepared || ichdev->suspended) + return; + spin_lock_irqsave(&chip->reg_lock, flags); status = igetbyte(chip, port + ichdev->roff_sr); civ = igetbyte(chip, port + ICH_REG_OFF_CIV); @@ -881,6 +885,7 @@ static int snd_intel8x0_hw_params(struct snd_pcm_substream *substream, if (ichdev->pcm_open_flag) { snd_ac97_pcm_close(ichdev->pcm); ichdev->pcm_open_flag = 0; + ichdev->prepared = 0; } err = snd_ac97_pcm_open(ichdev->pcm, params_rate(hw_params), params_channels(hw_params), @@ -902,6 +907,7 @@ static int snd_intel8x0_hw_free(struct snd_pcm_substream *substream) if (ichdev->pcm_open_flag) { snd_ac97_pcm_close(ichdev->pcm); ichdev->pcm_open_flag = 0; + ichdev->prepared = 0; } return 0; } @@ -976,6 +982,7 @@ static int snd_intel8x0_pcm_prepare(struct snd_pcm_substream *substream) ichdev->pos_shift = (runtime->sample_bits > 16) ? 2 : 1; } snd_intel8x0_setup_periods(chip, ichdev); + ichdev->prepared = 1; return 0; } From 9d5e8492eee017ffdaa9f0957e91d39d83163197 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 9 May 2021 16:22:54 -0700 Subject: [PATCH 270/730] xfs: adjust rt allocation minlen when extszhint > rtextsize xfs_bmap_rtalloc doesn't handle realtime extent files with extent size hints larger than the rt volume's extent size properly, because xfs_bmap_extsize_align can adjust the offset/length parameters to try to fit the extent size hint. Under these conditions, minlen has to be large enough so that any allocation returned by xfs_rtallocate_extent will be large enough to cover at least one of the blocks that the caller asked for. If the allocation is too short, bmapi_write will return no mapping for the requested range, which causes ENOSPC errors in other parts of the filesystem. Therefore, adjust minlen upwards to fix this. This can be found by running generic/263 (g/127 or g/522) with a realtime extent size hint that's larger than the rt volume extent size. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson --- fs/xfs/xfs_bmap_util.c | 81 +++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index a5e9d7d34023..c9381bf4f04b 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -71,18 +71,23 @@ xfs_zero_extent( #ifdef CONFIG_XFS_RT int xfs_bmap_rtalloc( - struct xfs_bmalloca *ap) /* bmap alloc argument struct */ + struct xfs_bmalloca *ap) { - int error; /* error return value */ - xfs_mount_t *mp; /* mount point structure */ - xfs_extlen_t prod = 0; /* product factor for allocators */ - xfs_extlen_t mod = 0; /* product factor for allocators */ - xfs_extlen_t ralen = 0; /* realtime allocation length */ - xfs_extlen_t align; /* minimum allocation alignment */ - xfs_rtblock_t rtb; + struct xfs_mount *mp = ap->ip->i_mount; + xfs_fileoff_t orig_offset = ap->offset; + xfs_rtblock_t rtb; + xfs_extlen_t prod = 0; /* product factor for allocators */ + xfs_extlen_t mod = 0; /* product factor for allocators */ + xfs_extlen_t ralen = 0; /* realtime allocation length */ + xfs_extlen_t align; /* minimum allocation alignment */ + xfs_extlen_t orig_length = ap->length; + xfs_extlen_t minlen = mp->m_sb.sb_rextsize; + xfs_extlen_t raminlen; + bool rtlocked = false; + int error; - mp = ap->ip->i_mount; align = xfs_get_extsz_hint(ap->ip); +retry: prod = align / mp->m_sb.sb_rextsize; error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 1, ap->eof, 0, @@ -92,6 +97,15 @@ xfs_bmap_rtalloc( ASSERT(ap->length); ASSERT(ap->length % mp->m_sb.sb_rextsize == 0); + /* + * If we shifted the file offset downward to satisfy an extent size + * hint, increase minlen by that amount so that the allocator won't + * give us an allocation that's too short to cover at least one of the + * blocks that the caller asked for. + */ + if (ap->offset != orig_offset) + minlen += orig_offset - ap->offset; + /* * If the offset & length are not perfectly aligned * then kill prod, it will just get us in trouble. @@ -116,10 +130,13 @@ xfs_bmap_rtalloc( /* * Lock out modifications to both the RT bitmap and summary inodes */ - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP); - xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); - xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM); - xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); + if (!rtlocked) { + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP); + xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM); + xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); + rtlocked = true; + } /* * If it's an allocation to an empty file at offset 0, @@ -144,30 +161,44 @@ xfs_bmap_rtalloc( do_div(ap->blkno, mp->m_sb.sb_rextsize); rtb = ap->blkno; ap->length = ralen; - error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, - &ralen, ap->wasdel, prod, &rtb); + raminlen = max_t(xfs_extlen_t, 1, minlen / mp->m_sb.sb_rextsize); + error = xfs_rtallocate_extent(ap->tp, ap->blkno, raminlen, ap->length, + &ralen, ap->wasdel, prod, &rtb); if (error) return error; - ap->blkno = rtb; - if (ap->blkno != NULLFSBLOCK) { - ap->blkno *= mp->m_sb.sb_rextsize; - ralen *= mp->m_sb.sb_rextsize; - ap->length = ralen; - ap->ip->i_nblocks += ralen; + if (rtb != NULLRTBLOCK) { + ap->blkno = rtb * mp->m_sb.sb_rextsize; + ap->length = ralen * mp->m_sb.sb_rextsize; + ap->ip->i_nblocks += ap->length; xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); if (ap->wasdel) - ap->ip->i_delayed_blks -= ralen; + ap->ip->i_delayed_blks -= ap->length; /* * Adjust the disk quota also. This was reserved * earlier. */ xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : - XFS_TRANS_DQ_RTBCOUNT, (long) ralen); - } else { - ap->length = 0; + XFS_TRANS_DQ_RTBCOUNT, ap->length); + return 0; } + + if (align > mp->m_sb.sb_rextsize) { + /* + * We previously enlarged the request length to try to satisfy + * an extent size hint. The allocator didn't return anything, + * so reset the parameters to the original values and try again + * without alignment criteria. + */ + ap->offset = orig_offset; + ap->length = orig_length; + minlen = align = mp->m_sb.sb_rextsize; + goto retry; + } + + ap->blkno = NULLFSBLOCK; + ap->length = 0; return 0; } #endif /* CONFIG_XFS_RT */ From 9e3927f6373da54cb17e17f4bd700907e1123d2f Mon Sep 17 00:00:00 2001 From: Li Jun Date: Fri, 14 May 2021 18:59:44 +0800 Subject: [PATCH 271/730] usb: chipidea: udc: assign interrupt number to USB gadget structure Chipidea also need sync interrupt before unbind the udc while gadget remove driver, otherwise setup irq handling may happen while unbind, see below dump generated from android function switch stress test: [ 4703.503056] android_work: sent uevent USB_STATE=CONNECTED [ 4703.514642] android_work: sent uevent USB_STATE=DISCONNECTED [ 4703.651339] android_work: sent uevent USB_STATE=CONNECTED [ 4703.661806] init: Control message: Processed ctl.stop for 'adbd' from pid: 561 (system_server) [ 4703.673469] init: processing action (init.svc.adbd=stopped) from (/system/etc/init/hw/init.usb.configfs.rc:14) [ 4703.676451] Unable to handle kernel read from unreadable memory at virtual address 0000000000000090 [ 4703.676454] Mem abort info: [ 4703.676458] ESR = 0x96000004 [ 4703.676461] EC = 0x25: DABT (current EL), IL = 32 bits [ 4703.676464] SET = 0, FnV = 0 [ 4703.676466] EA = 0, S1PTW = 0 [ 4703.676468] Data abort info: [ 4703.676471] ISV = 0, ISS = 0x00000004 [ 4703.676473] CM = 0, WnR = 0 [ 4703.676478] user pgtable: 4k pages, 48-bit VAs, pgdp=000000004a867000 [ 4703.676481] [0000000000000090] pgd=0000000000000000, p4d=0000000000000000 [ 4703.676503] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 4703.758297] Modules linked in: synaptics_dsx_i2c moal(O) mlan(O) [ 4703.764327] CPU: 0 PID: 235 Comm: lmkd Tainted: G W O 5.10.9-00001-g3f5fd8487c38-dirty #63 [ 4703.773720] Hardware name: NXP i.MX8MNano EVK board (DT) [ 4703.779033] pstate: 60400085 (nZCv daIf +PAN -UAO -TCO BTYPE=--) [ 4703.785046] pc : _raw_write_unlock_bh+0xc0/0x2c8 [ 4703.789667] lr : android_setup+0x4c/0x168 [ 4703.793676] sp : ffff80001256bd80 [ 4703.796989] x29: ffff80001256bd80 x28: 00000000000000a8 [ 4703.802304] x27: ffff800012470000 x26: ffff80006d923000 [ 4703.807616] x25: ffff800012471000 x24: ffff00000b091140 [ 4703.812929] x23: ffff0000077dbd38 x22: ffff0000077da490 [ 4703.818242] x21: ffff80001256be30 x20: 0000000000000000 [ 4703.823554] x19: 0000000000000080 x18: ffff800012561048 [ 4703.828867] x17: 0000000000000000 x16: 0000000000000039 [ 4703.834180] x15: ffff8000106ad258 x14: ffff80001194c277 [ 4703.839493] x13: 0000000000003934 x12: 0000000000000000 [ 4703.844805] x11: 0000000000000000 x10: 0000000000000001 [ 4703.850117] x9 : 0000000000000000 x8 : 0000000000000090 [ 4703.855429] x7 : 6f72646e61203a70 x6 : ffff8000124f2450 [ 4703.860742] x5 : ffffffffffffffff x4 : 0000000000000009 [ 4703.866054] x3 : ffff8000108a290c x2 : ffff00007fb3a9c8 [ 4703.871367] x1 : 0000000000000000 x0 : 0000000000000090 [ 4703.876681] Call trace: [ 4703.879129] _raw_write_unlock_bh+0xc0/0x2c8 [ 4703.883397] android_setup+0x4c/0x168 [ 4703.887059] udc_irq+0x824/0xa9c [ 4703.890287] ci_irq+0x124/0x148 [ 4703.893429] __handle_irq_event_percpu+0x84/0x268 [ 4703.898131] handle_irq_event+0x64/0x14c [ 4703.902054] handle_fasteoi_irq+0x110/0x210 [ 4703.906236] __handle_domain_irq+0x8c/0xd4 [ 4703.910332] gic_handle_irq+0x6c/0x124 [ 4703.914081] el1_irq+0xdc/0x1c0 [ 4703.917221] _raw_spin_unlock_irq+0x20/0x54 [ 4703.921405] finish_task_switch+0x84/0x224 [ 4703.925502] __schedule+0x4a4/0x734 [ 4703.928990] schedule+0xa0/0xe8 [ 4703.932132] do_notify_resume+0x150/0x184 [ 4703.936140] work_pending+0xc/0x40c [ 4703.939633] Code: d5384613 521b0a69 d5184609 f9800111 (885ffd01) [ 4703.945732] ---[ end trace ba5c1875ae49d53c ]--- [ 4703.950350] Kernel panic - not syncing: Oops: Fatal exception in interrupt [ 4703.957223] SMP: stopping secondary CPUs [ 4703.961151] Kernel Offset: disabled [ 4703.964638] CPU features: 0x0240002,2000200c [ 4703.968905] Memory Limit: none [ 4703.971963] Rebooting in 5 seconds.. Tested-by: faqiang.zhu Signed-off-by: Li Jun Link: https://lore.kernel.org/r/1620989984-7653-1-git-send-email-jun.li@nxp.com Signed-off-by: Peter Chen --- drivers/usb/chipidea/udc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index c16d900cdaee..393f216b9161 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -2061,6 +2061,7 @@ static int udc_start(struct ci_hdrc *ci) ci->gadget.name = ci->platdata->name; ci->gadget.otg_caps = otg_caps; ci->gadget.sg_supported = 1; + ci->gadget.irq = ci->irq; if (ci->platdata->flags & CI_HDRC_REQUIRES_ALIGNED_DMA) ci->gadget.quirk_avoids_skb_reserve = 1; From c6de37dd5e48b883db032aa4dc0547a4858b9f20 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 Apr 2021 11:58:48 -0700 Subject: [PATCH 272/730] tools build: Fix quiet cmd indentation The tools quiet cmd output has mismatched indentation (and extra space character between cmd name and target name) compared to the rest of kbuild out: HOSTCC scripts/insert-sys-cert LD /srv/code/tools/objtool/arch/x86/objtool-in.o LD /srv/code/tools/objtool/libsubcmd-in.o AR /srv/code/tools/objtool/libsubcmd.a HOSTLD scripts/genksyms/genksyms CC scripts/mod/empty.o HOSTCC scripts/mod/mk_elfconfig CC scripts/mod/devicetable-offsets.s MKELF scripts/mod/elfconfig.h HOSTCC scripts/mod/modpost.o HOSTCC scripts/mod/file2alias.o HOSTCC scripts/mod/sumversion.o LD /srv/code/tools/objtool/objtool-in.o LINK /srv/code/tools/objtool/objtool HOSTLD scripts/mod/modpost CC kernel/bounds.s Adjust to match the rest of kbuild. Signed-off-by: Kees Cook Signed-off-by: Masahiro Yamada --- tools/build/Makefile.build | 22 +++++++++++----------- tools/scripts/Makefile.include | 30 +++++++++++++++--------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index cd72016c3cfa..715092fc6a23 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -51,39 +51,39 @@ subdir-obj-y := build-file := $(dir)/Build -include $(build-file) -quiet_cmd_flex = FLEX $@ -quiet_cmd_bison = BISON $@ +quiet_cmd_flex = FLEX $@ +quiet_cmd_bison = BISON $@ # Create directory unless it exists -quiet_cmd_mkdir = MKDIR $(dir $@) +quiet_cmd_mkdir = MKDIR $(dir $@) cmd_mkdir = mkdir -p $(dir $@) rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir)) # Compile command -quiet_cmd_cc_o_c = CC $@ +quiet_cmd_cc_o_c = CC $@ cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< -quiet_cmd_host_cc_o_c = HOSTCC $@ +quiet_cmd_host_cc_o_c = HOSTCC $@ cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $< -quiet_cmd_cxx_o_c = CXX $@ +quiet_cmd_cxx_o_c = CXX $@ cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $< -quiet_cmd_cpp_i_c = CPP $@ +quiet_cmd_cpp_i_c = CPP $@ cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $< -quiet_cmd_cc_s_c = AS $@ +quiet_cmd_cc_s_c = AS $@ cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< -quiet_cmd_gen = GEN $@ +quiet_cmd_gen = GEN $@ # Link agregate command # If there's nothing to link, create empty $@ object. -quiet_cmd_ld_multi = LD $@ +quiet_cmd_ld_multi = LD $@ cmd_ld_multi = $(if $(strip $(obj-y)),\ $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@) -quiet_cmd_host_ld_multi = HOSTLD $@ +quiet_cmd_host_ld_multi = HOSTLD $@ cmd_host_ld_multi = $(if $(strip $(obj-y)),\ $(HOSTLD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(HOSTAR) rcs $@) diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index f9271f3ea912..071312f5eb92 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -131,29 +131,29 @@ QUIET_SUBDIR1 = ifneq ($(silent),1) ifneq ($(V),1) - QUIET_CC = @echo ' CC '$@; - QUIET_CC_FPIC = @echo ' CC FPIC '$@; - QUIET_CLANG = @echo ' CLANG '$@; - QUIET_AR = @echo ' AR '$@; - QUIET_LINK = @echo ' LINK '$@; - QUIET_MKDIR = @echo ' MKDIR '$@; - QUIET_GEN = @echo ' GEN '$@; + QUIET_CC = @echo ' CC '$@; + QUIET_CC_FPIC = @echo ' CC FPIC '$@; + QUIET_CLANG = @echo ' CLANG '$@; + QUIET_AR = @echo ' AR '$@; + QUIET_LINK = @echo ' LINK '$@; + QUIET_MKDIR = @echo ' MKDIR '$@; + QUIET_GEN = @echo ' GEN '$@; QUIET_SUBDIR0 = +@subdir= QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ - echo ' SUBDIR '$$subdir; \ + echo ' SUBDIR '$$subdir; \ $(MAKE) $(PRINT_DIR) -C $$subdir - QUIET_FLEX = @echo ' FLEX '$@; - QUIET_BISON = @echo ' BISON '$@; - QUIET_GENSKEL = @echo ' GEN-SKEL '$@; + QUIET_FLEX = @echo ' FLEX '$@; + QUIET_BISON = @echo ' BISON '$@; + QUIET_GENSKEL = @echo ' GENSKEL '$@; descend = \ - +@echo ' DESCEND '$(1); \ + +@echo ' DESCEND '$(1); \ mkdir -p $(OUTPUT)$(1) && \ $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2) - QUIET_CLEAN = @printf ' CLEAN %s\n' $1; - QUIET_INSTALL = @printf ' INSTALL %s\n' $1; - QUIET_UNINST = @printf ' UNINST %s\n' $1; + QUIET_CLEAN = @printf ' CLEAN %s\n' $1; + QUIET_INSTALL = @printf ' INSTALL %s\n' $1; + QUIET_UNINST = @printf ' UNINST %s\n' $1; endif endif From 98a499a11ecdd8cb91d03dd5c034aaf7422f2deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Thu, 13 May 2021 18:24:02 +0200 Subject: [PATCH 273/730] scripts/jobserver-exec: Fix a typo ("envirnoment") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Neuschäfer Signed-off-by: Masahiro Yamada --- scripts/jobserver-exec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index 48d141e3ec56..8762887a970c 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -10,7 +10,7 @@ from __future__ import print_function import os, sys, errno import subprocess -# Extract and prepare jobserver file descriptors from envirnoment. +# Extract and prepare jobserver file descriptors from environment. claim = 0 jobs = b"" try: From c93db682cfb213501881072a9200a48ce1dc3c3f Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Sat, 15 May 2021 12:11:13 +0200 Subject: [PATCH 274/730] kbuild: dummy-tools: adjust to stricter stackprotector check Commit 3fb0fdb3bbe7 ("x86/stackprotector/32: Make the canary into a regular percpu variable") modified the stackprotector check on 32-bit x86 to check if gcc supports using %fs as canary. Adjust dummy-tools gcc script to pass this new test by returning "%fs" rather than "%gs" if it detects -mstack-protector-guard-reg=fs on command line. Fixes: 3fb0fdb3bbe7 ("x86/stackprotector/32: Make the canary into a regular percpu variable") Signed-off-by: Michal Kubecek Signed-off-by: Masahiro Yamada --- scripts/dummy-tools/gcc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/dummy-tools/gcc b/scripts/dummy-tools/gcc index f6d543725f1e..b2483149bbe5 100755 --- a/scripts/dummy-tools/gcc +++ b/scripts/dummy-tools/gcc @@ -76,7 +76,11 @@ fi if arg_contain -S "$@"; then # For scripts/gcc-x86-*-has-stack-protector.sh if arg_contain -fstack-protector "$@"; then - echo "%gs" + if arg_contain -mstack-protector-guard-reg=fs "$@"; then + echo "%fs" + else + echo "%gs" + fi exit 0 fi From 4236a26a6b998c8c4fdc0117b8848a38789c48ae Mon Sep 17 00:00:00 2001 From: wenhuizhang Date: Thu, 13 May 2021 12:55:16 -0400 Subject: [PATCH 275/730] cifs: remove deadstore in cifs_close_all_deferred_files() Deadstore detected by Lukas Bulwahn's CodeChecker Tool (ELISA group). line 741 struct cifsInodeInfo *cinode; line 747 cinode = CIFS_I(d_inode(cfile->dentry)); could be deleted. cinode on filesystem should not be deleted when files are closed, they are representations of some data fields on a physical disk, thus no further action is required. The virtual inode on vfs will be handled by vfs automatically, and the denotation is inode, which is different from the cinode. Signed-off-by: wenhuizhang Reviewed-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/misc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 524dbdfb7184..801a5300f765 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -738,13 +738,11 @@ void cifs_close_all_deferred_files(struct cifs_tcon *tcon) { struct cifsFileInfo *cfile; - struct cifsInodeInfo *cinode; struct list_head *tmp; spin_lock(&tcon->open_file_lock); list_for_each(tmp, &tcon->openFileList) { cfile = list_entry(tmp, struct cifsFileInfo, tlist); - cinode = CIFS_I(d_inode(cfile->dentry)); if (delayed_work_pending(&cfile->deferred)) mod_delayed_work(deferredclose_wq, &cfile->deferred, 0); } From 055f23b74b20f2824ce33047b4cf2e2aa856bf3b Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Wed, 12 May 2021 15:45:46 +0200 Subject: [PATCH 276/730] module: check for exit sections in layout_sections() instead of module_init_section() Previously, when CONFIG_MODULE_UNLOAD=n, the module loader just does not attempt to load exit sections since it never expects that any code in those sections will ever execute. However, dynamic code patching (alternatives, jump_label and static_call) can have sites in __exit code, even if __exit is never executed. Therefore __exit must be present at runtime, at least for as long as __init code is. Commit 33121347fb1c ("module: treat exit sections the same as init sections when !CONFIG_MODULE_UNLOAD") solves the requirements of jump_labels and static_calls by putting the exit sections in the init region of the module so that they are at least present at init, and discarded afterwards. It does this by including a check for exit sections in module_init_section(), so that it also returns true for exit sections, and the module loader will automatically sort them in the init region of the module. However, the solution there was not completely arch-independent. ARM is a special case where it supplies its own module_{init, exit}_section() functions. Instead of pushing the exit section checks into module_init_section(), just implement the exit section check in layout_sections(), so that we don't have to touch arch-dependent code. Fixes: 33121347fb1c ("module: treat exit sections the same as init sections when !CONFIG_MODULE_UNLOAD") Reviewed-by: Russell King (Oracle) Signed-off-by: Jessica Yu --- kernel/module.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index b5dd92e35b02..7e78dfabca97 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2401,6 +2401,15 @@ static long get_offset(struct module *mod, unsigned int *size, return ret; } +static bool module_init_layout_section(const char *sname) +{ +#ifndef CONFIG_MODULE_UNLOAD + if (module_exit_section(sname)) + return true; +#endif + return module_init_section(sname); +} + /* * Lay out the SHF_ALLOC sections in a way not dissimilar to how ld * might -- code, read-only data, read-write data, small data. Tally @@ -2435,7 +2444,7 @@ static void layout_sections(struct module *mod, struct load_info *info) if ((s->sh_flags & masks[m][0]) != masks[m][0] || (s->sh_flags & masks[m][1]) || s->sh_entsize != ~0UL - || module_init_section(sname)) + || module_init_layout_section(sname)) continue; s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i); pr_debug("\t%s\n", sname); @@ -2468,7 +2477,7 @@ static void layout_sections(struct module *mod, struct load_info *info) if ((s->sh_flags & masks[m][0]) != masks[m][0] || (s->sh_flags & masks[m][1]) || s->sh_entsize != ~0UL - || !module_init_section(sname)) + || !module_init_layout_section(sname)) continue; s->sh_entsize = (get_offset(mod, &mod->init_layout.size, s, i) | INIT_OFFSET_MASK); @@ -2807,11 +2816,7 @@ void * __weak module_alloc(unsigned long size) bool __weak module_init_section(const char *name) { -#ifndef CONFIG_MODULE_UNLOAD - return strstarts(name, ".init") || module_exit_section(name); -#else return strstarts(name, ".init"); -#endif } bool __weak module_exit_section(const char *name) From 145e06b58f8625becc61792a0554726314297a85 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 13 May 2021 16:39:02 +0800 Subject: [PATCH 277/730] drm/i915/gvt: Move mdev attribute groups into kvmgt module As kvmgt module contains all handling for VFIO/mdev, leaving mdev attribute groups in gvt module caused dependency issue. Although it was there for possible other hypervisor usage, that turns out never to be true. So this moves all mdev handling into kvmgt module completely to resolve dependency issue. With this fix, no config workaround is required. So revert previous workaround commits: adaeb718d46f ("vfio/gvt: fix DRM_I915_GVT dependency on VFIO_MDEV") and 07e543f4f9d1 ("vfio/gvt: Make DRM_I915_GVT depend on VFIO_MDEV"). Reviewed-by: Colin Xu Cc: Arnd Bergmann Cc: Jason Gunthorpe Cc: Alex Williamson Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20210513083902.2822350-1-zhenyuw@linux.intel.com --- drivers/gpu/drm/i915/Kconfig | 1 - drivers/gpu/drm/i915/gvt/gvt.c | 124 +-------------------------- drivers/gpu/drm/i915/gvt/gvt.h | 3 - drivers/gpu/drm/i915/gvt/hypercall.h | 2 +- drivers/gpu/drm/i915/gvt/kvmgt.c | 122 ++++++++++++++++++++++++-- drivers/gpu/drm/i915/gvt/mpt.h | 4 +- 6 files changed, 118 insertions(+), 138 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 69f57ca9c68d..93f4d059fc89 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -102,7 +102,6 @@ config DRM_I915_GVT bool "Enable Intel GVT-g graphics virtualization host support" depends on DRM_I915 depends on 64BIT - depends on VFIO_MDEV=y || VFIO_MDEV=DRM_I915 default n help Choose this option if you want to enable Intel GVT-g graphics diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index e7c2babcee8b..cbac409f6c8a 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -46,118 +46,6 @@ static const char * const supported_hypervisors[] = { [INTEL_GVT_HYPERVISOR_KVM] = "KVM", }; -static struct intel_vgpu_type * -intel_gvt_find_vgpu_type(struct intel_gvt *gvt, unsigned int type_group_id) -{ - if (WARN_ON(type_group_id >= gvt->num_types)) - return NULL; - return &gvt->types[type_group_id]; -} - -static ssize_t available_instances_show(struct mdev_type *mtype, - struct mdev_type_attribute *attr, - char *buf) -{ - struct intel_vgpu_type *type; - unsigned int num = 0; - void *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; - - type = intel_gvt_find_vgpu_type(gvt, mtype_get_type_group_id(mtype)); - if (!type) - num = 0; - else - num = type->avail_instance; - - return sprintf(buf, "%u\n", num); -} - -static ssize_t device_api_show(struct mdev_type *mtype, - struct mdev_type_attribute *attr, char *buf) -{ - return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); -} - -static ssize_t description_show(struct mdev_type *mtype, - struct mdev_type_attribute *attr, char *buf) -{ - struct intel_vgpu_type *type; - void *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; - - type = intel_gvt_find_vgpu_type(gvt, mtype_get_type_group_id(mtype)); - if (!type) - return 0; - - return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" - "fence: %d\nresolution: %s\n" - "weight: %d\n", - BYTES_TO_MB(type->low_gm_size), - BYTES_TO_MB(type->high_gm_size), - type->fence, vgpu_edid_str(type->resolution), - type->weight); -} - -static MDEV_TYPE_ATTR_RO(available_instances); -static MDEV_TYPE_ATTR_RO(device_api); -static MDEV_TYPE_ATTR_RO(description); - -static struct attribute *gvt_type_attrs[] = { - &mdev_type_attr_available_instances.attr, - &mdev_type_attr_device_api.attr, - &mdev_type_attr_description.attr, - NULL, -}; - -static struct attribute_group *gvt_vgpu_type_groups[] = { - [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, -}; - -static bool intel_get_gvt_attrs(struct attribute_group ***intel_vgpu_type_groups) -{ - *intel_vgpu_type_groups = gvt_vgpu_type_groups; - return true; -} - -static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) -{ - int i, j; - struct intel_vgpu_type *type; - struct attribute_group *group; - - for (i = 0; i < gvt->num_types; i++) { - type = &gvt->types[i]; - - group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); - if (WARN_ON(!group)) - goto unwind; - - group->name = type->name; - group->attrs = gvt_type_attrs; - gvt_vgpu_type_groups[i] = group; - } - - return 0; - -unwind: - for (j = 0; j < i; j++) { - group = gvt_vgpu_type_groups[j]; - kfree(group); - } - - return -ENOMEM; -} - -static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) -{ - int i; - struct attribute_group *group; - - for (i = 0; i < gvt->num_types; i++) { - group = gvt_vgpu_type_groups[i]; - gvt_vgpu_type_groups[i] = NULL; - kfree(group); - } -} - static const struct intel_gvt_ops intel_gvt_ops = { .emulate_cfg_read = intel_vgpu_emulate_cfg_read, .emulate_cfg_write = intel_vgpu_emulate_cfg_write, @@ -169,8 +57,6 @@ static const struct intel_gvt_ops intel_gvt_ops = { .vgpu_reset = intel_gvt_reset_vgpu, .vgpu_activate = intel_gvt_activate_vgpu, .vgpu_deactivate = intel_gvt_deactivate_vgpu, - .gvt_find_vgpu_type = intel_gvt_find_vgpu_type, - .get_gvt_attrs = intel_get_gvt_attrs, .vgpu_query_plane = intel_vgpu_query_plane, .vgpu_get_dmabuf = intel_vgpu_get_dmabuf, .write_protect_handler = intel_vgpu_page_track_handler, @@ -274,7 +160,6 @@ void intel_gvt_clean_device(struct drm_i915_private *i915) return; intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); - intel_gvt_cleanup_vgpu_type_groups(gvt); intel_gvt_clean_vgpu_types(gvt); intel_gvt_debugfs_clean(gvt); @@ -363,12 +248,6 @@ int intel_gvt_init_device(struct drm_i915_private *i915) if (ret) goto out_clean_thread; - ret = intel_gvt_init_vgpu_type_groups(gvt); - if (ret) { - gvt_err("failed to init vgpu type groups: %d\n", ret); - goto out_clean_types; - } - vgpu = intel_gvt_create_idle_vgpu(gvt); if (IS_ERR(vgpu)) { ret = PTR_ERR(vgpu); @@ -454,7 +333,8 @@ EXPORT_SYMBOL_GPL(intel_gvt_register_hypervisor); void intel_gvt_unregister_hypervisor(void) { - intel_gvt_hypervisor_host_exit(intel_gvt_host.dev); + void *gvt = (void *)kdev_to_i915(intel_gvt_host.dev)->gvt; + intel_gvt_hypervisor_host_exit(intel_gvt_host.dev, gvt); module_put(THIS_MODULE); } EXPORT_SYMBOL_GPL(intel_gvt_unregister_hypervisor); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 88ab360fcb31..0c0615602343 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -574,9 +574,6 @@ struct intel_gvt_ops { void (*vgpu_reset)(struct intel_vgpu *); void (*vgpu_activate)(struct intel_vgpu *); void (*vgpu_deactivate)(struct intel_vgpu *); - struct intel_vgpu_type *(*gvt_find_vgpu_type)( - struct intel_gvt *gvt, unsigned int type_group_id); - bool (*get_gvt_attrs)(struct attribute_group ***intel_vgpu_type_groups); int (*vgpu_query_plane)(struct intel_vgpu *vgpu, void *); int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int); int (*write_protect_handler)(struct intel_vgpu *, u64, void *, diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index b79da5124f83..f33e3cbd0439 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -49,7 +49,7 @@ enum hypervisor_type { struct intel_gvt_mpt { enum hypervisor_type type; int (*host_init)(struct device *dev, void *gvt, const void *ops); - void (*host_exit)(struct device *dev); + void (*host_exit)(struct device *dev, void *gvt); int (*attach_vgpu)(void *vgpu, unsigned long *handle); void (*detach_vgpu)(void *vgpu); int (*inject_msi)(unsigned long handle, u32 addr, u16 data); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 65ff43cfc0f7..48b4d4cf805d 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -144,6 +144,104 @@ static inline bool handle_valid(unsigned long handle) return !!(handle & ~0xff); } +static ssize_t available_instances_show(struct mdev_type *mtype, + struct mdev_type_attribute *attr, + char *buf) +{ + struct intel_vgpu_type *type; + unsigned int num = 0; + struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; + + type = &gvt->types[mtype_get_type_group_id(mtype)]; + if (!type) + num = 0; + else + num = type->avail_instance; + + return sprintf(buf, "%u\n", num); +} + +static ssize_t device_api_show(struct mdev_type *mtype, + struct mdev_type_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); +} + +static ssize_t description_show(struct mdev_type *mtype, + struct mdev_type_attribute *attr, char *buf) +{ + struct intel_vgpu_type *type; + struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; + + type = &gvt->types[mtype_get_type_group_id(mtype)]; + if (!type) + return 0; + + return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" + "fence: %d\nresolution: %s\n" + "weight: %d\n", + BYTES_TO_MB(type->low_gm_size), + BYTES_TO_MB(type->high_gm_size), + type->fence, vgpu_edid_str(type->resolution), + type->weight); +} + +static MDEV_TYPE_ATTR_RO(available_instances); +static MDEV_TYPE_ATTR_RO(device_api); +static MDEV_TYPE_ATTR_RO(description); + +static struct attribute *gvt_type_attrs[] = { + &mdev_type_attr_available_instances.attr, + &mdev_type_attr_device_api.attr, + &mdev_type_attr_description.attr, + NULL, +}; + +static struct attribute_group *gvt_vgpu_type_groups[] = { + [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, +}; + +static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) +{ + int i, j; + struct intel_vgpu_type *type; + struct attribute_group *group; + + for (i = 0; i < gvt->num_types; i++) { + type = &gvt->types[i]; + + group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); + if (!group) + goto unwind; + + group->name = type->name; + group->attrs = gvt_type_attrs; + gvt_vgpu_type_groups[i] = group; + } + + return 0; + +unwind: + for (j = 0; j < i; j++) { + group = gvt_vgpu_type_groups[j]; + kfree(group); + } + + return -ENOMEM; +} + +static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) +{ + int i; + struct attribute_group *group; + + for (i = 0; i < gvt->num_types; i++) { + group = gvt_vgpu_type_groups[i]; + gvt_vgpu_type_groups[i] = NULL; + kfree(group); + } +} + static int kvmgt_guest_init(struct mdev_device *mdev); static void intel_vgpu_release_work(struct work_struct *work); static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); @@ -694,14 +792,13 @@ static int intel_vgpu_create(struct mdev_device *mdev) struct intel_vgpu *vgpu = NULL; struct intel_vgpu_type *type; struct device *pdev; - void *gvt; + struct intel_gvt *gvt; int ret; pdev = mdev_parent_dev(mdev); gvt = kdev_to_i915(pdev)->gvt; - type = intel_gvt_ops->gvt_find_vgpu_type(gvt, - mdev_get_type_group_id(mdev)); + type = &gvt->types[mdev_get_type_group_id(mdev)]; if (!type) { ret = -EINVAL; goto out; @@ -1667,19 +1764,26 @@ static struct mdev_parent_ops intel_vgpu_ops = { static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) { - struct attribute_group **kvm_vgpu_type_groups; + int ret; + + ret = intel_gvt_init_vgpu_type_groups((struct intel_gvt *)gvt); + if (ret) + return ret; intel_gvt_ops = ops; - if (!intel_gvt_ops->get_gvt_attrs(&kvm_vgpu_type_groups)) - return -EFAULT; - intel_vgpu_ops.supported_type_groups = kvm_vgpu_type_groups; + intel_vgpu_ops.supported_type_groups = gvt_vgpu_type_groups; - return mdev_register_device(dev, &intel_vgpu_ops); + ret = mdev_register_device(dev, &intel_vgpu_ops); + if (ret) + intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt); + + return ret; } -static void kvmgt_host_exit(struct device *dev) +static void kvmgt_host_exit(struct device *dev, void *gvt) { mdev_unregister_device(dev); + intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt); } static int kvmgt_page_track_add(unsigned long handle, u64 gfn) diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 550a456e936f..e6c5a792a49a 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -63,13 +63,13 @@ static inline int intel_gvt_hypervisor_host_init(struct device *dev, /** * intel_gvt_hypervisor_host_exit - exit GVT-g host side */ -static inline void intel_gvt_hypervisor_host_exit(struct device *dev) +static inline void intel_gvt_hypervisor_host_exit(struct device *dev, void *gvt) { /* optional to provide */ if (!intel_gvt_host.mpt->host_exit) return; - intel_gvt_host.mpt->host_exit(dev); + intel_gvt_host.mpt->host_exit(dev, gvt); } /** From b80bfc59c60d8a006fdd7a33352732911ee51397 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 5 Apr 2021 20:15:24 +0200 Subject: [PATCH 278/730] drm/exynos: correct exynos_drm_fimd kerneldoc Correct the kerneldoc of fimd_shadow_protect_win() to fix W=1 warnings: drivers/gpu/drm/exynos/exynos_drm_fimd.c:734: warning: expecting prototype for shadow_protect_win(). Prototype was for fimd_shadow_protect_win() instead Signed-off-by: Krzysztof Kozlowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 49a2e0c53918..ae576122873e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -723,7 +723,7 @@ static void fimd_win_set_colkey(struct fimd_context *ctx, unsigned int win) } /** - * shadow_protect_win() - disable updating values from shadow registers at vsync + * fimd_shadow_protect_win() - disable updating values from shadow registers at vsync * * @ctx: local driver data * @win: window to protect registers for From 04562956fd41fb22645e47a00cd5cbd601ce4bdd Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 11 May 2021 17:40:04 +0800 Subject: [PATCH 279/730] drm/exynos: Remove redundant error printing in exynos_dsi_probe() When devm_ioremap_resource() fails, a clear enough error message will be printed by its subfunction __devm_ioremap_resource(). The error information contains the device name, failure cause, and possibly resource information. Therefore, remove the error printing here to simplify code and reduce the binary size. Reported-by: Hulk Robot Signed-off-by: Zhen Lei Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 44e402b7cdfb..2d2fe5ab26e7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1786,10 +1786,8 @@ static int exynos_dsi_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); dsi->reg_base = devm_ioremap_resource(dev, res); - if (IS_ERR(dsi->reg_base)) { - dev_err(dev, "failed to remap io region\n"); + if (IS_ERR(dsi->reg_base)) return PTR_ERR(dsi->reg_base); - } dsi->phy = devm_phy_get(dev, "dsim"); if (IS_ERR(dsi->phy)) { From a470c5665b3b918c31bcc912234862803b10ba00 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 11 May 2021 19:27:33 +0800 Subject: [PATCH 280/730] drm/exynos/decon5433: Remove redundant error printing in exynos5433_decon_probe() When devm_ioremap_resource() fails, a clear enough error message will be printed by its subfunction __devm_ioremap_resource(). The error information contains the device name, failure cause, and possibly resource information. Therefore, remove the error printing here to simplify code and reduce the binary size. Reported-by: Hulk Robot Signed-off-by: Zhen Lei Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index b9a4b7670a89..197b97341cad 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -815,10 +815,8 @@ static int exynos5433_decon_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ctx->addr = devm_ioremap_resource(dev, res); - if (IS_ERR(ctx->addr)) { - dev_err(dev, "ioremap failed\n"); + if (IS_ERR(ctx->addr)) return PTR_ERR(ctx->addr); - } ret = decon_conf_irq(ctx, "vsync", decon_irq_handler, 0); if (ret < 0) From 5b9fedb31e476693c90d8ee040e7d4c51b3e7cc4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 17 May 2021 14:39:56 +0200 Subject: [PATCH 281/730] quota: Disable quotactl_path syscall In commit fa8b90070a80 ("quota: wire up quotactl_path") we have wired up new quotactl_path syscall. However some people in LWN discussion have objected that the path based syscall is missing dirfd and flags argument which is mostly standard for contemporary path based syscalls. Indeed they have a point and after a discussion with Christian Brauner and Sascha Hauer I've decided to disable the syscall for now and update its API. Since there is no userspace currently using that syscall and it hasn't been released in any major release, we should be fine. CC: Christian Brauner CC: Sascha Hauer Link: https://lore.kernel.org/lkml/20210512153621.n5u43jsytbik4yze@wittgenstein Signed-off-by: Jan Kara --- arch/alpha/kernel/syscalls/syscall.tbl | 2 +- arch/arm/tools/syscall.tbl | 2 +- arch/arm64/include/asm/unistd32.h | 3 +-- arch/ia64/kernel/syscalls/syscall.tbl | 2 +- arch/m68k/kernel/syscalls/syscall.tbl | 2 +- arch/microblaze/kernel/syscalls/syscall.tbl | 2 +- arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- arch/mips/kernel/syscalls/syscall_n64.tbl | 2 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- arch/parisc/kernel/syscalls/syscall.tbl | 2 +- arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- arch/s390/kernel/syscalls/syscall.tbl | 2 +- arch/sh/kernel/syscalls/syscall.tbl | 2 +- arch/sparc/kernel/syscalls/syscall.tbl | 2 +- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/entry/syscalls/syscall_64.tbl | 2 +- arch/xtensa/kernel/syscalls/syscall.tbl | 2 +- 17 files changed, 17 insertions(+), 18 deletions(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 5622578742fd..3000a2e8ee21 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -482,7 +482,7 @@ 550 common process_madvise sys_process_madvise 551 common epoll_pwait2 sys_epoll_pwait2 552 common mount_setattr sys_mount_setattr -553 common quotactl_path sys_quotactl_path +# 553 reserved for quotactl_path 554 common landlock_create_ruleset sys_landlock_create_ruleset 555 common landlock_add_rule sys_landlock_add_rule 556 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index c7679d7db98b..28e03b5fec00 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -456,7 +456,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 7859749d6628..5dab69d2c22b 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -893,8 +893,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) -#define __NR_quotactl_path 443 -__SYSCALL(__NR_quotactl_path, sys_quotactl_path) +/* 443 is reserved for quotactl_path */ #define __NR_landlock_create_ruleset 444 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) #define __NR_landlock_add_rule 445 diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 1ee8e736a48e..bb11fe4c875a 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -363,7 +363,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 0dd019dc2136..79c2d24c89dd 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -442,7 +442,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 2ac716984ca2..b11395a20c20 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -448,7 +448,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 5e0096657251..9220909526f9 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -381,7 +381,7 @@ 440 n32 process_madvise sys_process_madvise 441 n32 epoll_pwait2 compat_sys_epoll_pwait2 442 n32 mount_setattr sys_mount_setattr -443 n32 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 n32 landlock_create_ruleset sys_landlock_create_ruleset 445 n32 landlock_add_rule sys_landlock_add_rule 446 n32 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 9974f5f8e49b..9cd1c34f31b5 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -357,7 +357,7 @@ 440 n64 process_madvise sys_process_madvise 441 n64 epoll_pwait2 sys_epoll_pwait2 442 n64 mount_setattr sys_mount_setattr -443 n64 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 n64 landlock_create_ruleset sys_landlock_create_ruleset 445 n64 landlock_add_rule sys_landlock_add_rule 446 n64 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 39d6e71e57b6..d560c467a8c6 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -430,7 +430,7 @@ 440 o32 process_madvise sys_process_madvise 441 o32 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 o32 mount_setattr sys_mount_setattr -443 o32 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 o32 landlock_create_ruleset sys_landlock_create_ruleset 445 o32 landlock_add_rule sys_landlock_add_rule 446 o32 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 5ac80b83d745..aabc37f8cae3 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -440,7 +440,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 2e68fbb57cc6..8f052ff4058c 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -522,7 +522,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 7e4a2aba366d..0690263df1dd 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -445,7 +445,7 @@ 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index f47a0dc55445..0b91499ebdcf 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -445,7 +445,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index b9e1c0e735b7..e34cc30ef22c 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -488,7 +488,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 28a1423ce32e..4bbc267fb36b 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -447,7 +447,7 @@ 440 i386 process_madvise sys_process_madvise 441 i386 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 i386 mount_setattr sys_mount_setattr -443 i386 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 i386 landlock_create_ruleset sys_landlock_create_ruleset 445 i386 landlock_add_rule sys_landlock_add_rule 446 i386 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index ecd551b08d05..ce18119ea0d0 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,7 +364,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 9d76d433d3d6..fd2f30227d96 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -413,7 +413,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self From 7a274727702cc07d27cdebd36d1d5132abeea12f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 17 May 2021 12:43:34 +0100 Subject: [PATCH 282/730] io_uring: don't modify req->poll for rw __io_queue_proc() is used by both poll and apoll, so we should not access req->poll directly but selecting right struct io_poll_iocb depending on use case. Reported-and-tested-by: syzbot+a84b8783366ecb1c65d0@syzkaller.appspotmail.com Fixes: ea6a693d862d ("io_uring: disable multishot poll for double poll add cases") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/4a6a1de31142d8e0250fe2dfd4c8923d82a5bbfc.1621251795.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e481ac8a757a..89ec10471b30 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5019,10 +5019,10 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, * Can't handle multishot for double wait for now, turn it * into one-shot mode. */ - if (!(req->poll.events & EPOLLONESHOT)) - req->poll.events |= EPOLLONESHOT; + if (!(poll_one->events & EPOLLONESHOT)) + poll_one->events |= EPOLLONESHOT; /* double add on the same waitqueue head, ignore */ - if (poll->head == head) + if (poll_one->head == head) return; poll = kmalloc(sizeof(*poll), GFP_ATOMIC); if (!poll) { From 6416954ca75baed71640bf3828625bf165fb9b5e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 14 May 2021 10:03:40 +0100 Subject: [PATCH 283/730] btrfs: release path before starting transaction when cloning inline extent When cloning an inline extent there are a few cases, such as when we have an implicit hole at file offset 0, where we start a transaction while holding a read lock on a leaf. Starting the transaction results in a call to sb_start_intwrite(), which results in doing a read lock on a percpu semaphore. Lockdep doesn't like this and complains about it: [46.580704] ====================================================== [46.580752] WARNING: possible circular locking dependency detected [46.580799] 5.13.0-rc1 #28 Not tainted [46.580832] ------------------------------------------------------ [46.580877] cloner/3835 is trying to acquire lock: [46.580918] c00000001301d638 (sb_internal#2){.+.+}-{0:0}, at: clone_copy_inline_extent+0xe4/0x5a0 [46.581167] [46.581167] but task is already holding lock: [46.581217] c000000007fa2550 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x70/0x1d0 [46.581293] [46.581293] which lock already depends on the new lock. [46.581293] [46.581351] [46.581351] the existing dependency chain (in reverse order) is: [46.581410] [46.581410] -> #1 (btrfs-tree-00){++++}-{3:3}: [46.581464] down_read_nested+0x68/0x200 [46.581536] __btrfs_tree_read_lock+0x70/0x1d0 [46.581577] btrfs_read_lock_root_node+0x88/0x200 [46.581623] btrfs_search_slot+0x298/0xb70 [46.581665] btrfs_set_inode_index+0xfc/0x260 [46.581708] btrfs_new_inode+0x26c/0x950 [46.581749] btrfs_create+0xf4/0x2b0 [46.581782] lookup_open.isra.57+0x55c/0x6a0 [46.581855] path_openat+0x418/0xd20 [46.581888] do_filp_open+0x9c/0x130 [46.581920] do_sys_openat2+0x2ec/0x430 [46.581961] do_sys_open+0x90/0xc0 [46.581993] system_call_exception+0x3d4/0x410 [46.582037] system_call_common+0xec/0x278 [46.582078] [46.582078] -> #0 (sb_internal#2){.+.+}-{0:0}: [46.582135] __lock_acquire+0x1e90/0x2c50 [46.582176] lock_acquire+0x2b4/0x5b0 [46.582263] start_transaction+0x3cc/0x950 [46.582308] clone_copy_inline_extent+0xe4/0x5a0 [46.582353] btrfs_clone+0x5fc/0x880 [46.582388] btrfs_clone_files+0xd8/0x1c0 [46.582434] btrfs_remap_file_range+0x3d8/0x590 [46.582481] do_clone_file_range+0x10c/0x270 [46.582558] vfs_clone_file_range+0x1b0/0x310 [46.582605] ioctl_file_clone+0x90/0x130 [46.582651] do_vfs_ioctl+0x874/0x1ac0 [46.582697] sys_ioctl+0x6c/0x120 [46.582733] system_call_exception+0x3d4/0x410 [46.582777] system_call_common+0xec/0x278 [46.582822] [46.582822] other info that might help us debug this: [46.582822] [46.582888] Possible unsafe locking scenario: [46.582888] [46.582942] CPU0 CPU1 [46.582984] ---- ---- [46.583028] lock(btrfs-tree-00); [46.583062] lock(sb_internal#2); [46.583119] lock(btrfs-tree-00); [46.583174] lock(sb_internal#2); [46.583212] [46.583212] *** DEADLOCK *** [46.583212] [46.583266] 6 locks held by cloner/3835: [46.583299] #0: c00000001301d448 (sb_writers#12){.+.+}-{0:0}, at: ioctl_file_clone+0x90/0x130 [46.583382] #1: c00000000f6d3768 (&sb->s_type->i_mutex_key#15){+.+.}-{3:3}, at: lock_two_nondirectories+0x58/0xc0 [46.583477] #2: c00000000f6d72a8 (&sb->s_type->i_mutex_key#15/4){+.+.}-{3:3}, at: lock_two_nondirectories+0x9c/0xc0 [46.583574] #3: c00000000f6d7138 (&ei->i_mmap_lock){+.+.}-{3:3}, at: btrfs_remap_file_range+0xd0/0x590 [46.583657] #4: c00000000f6d35f8 (&ei->i_mmap_lock/1){+.+.}-{3:3}, at: btrfs_remap_file_range+0xe0/0x590 [46.583743] #5: c000000007fa2550 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x70/0x1d0 [46.583828] [46.583828] stack backtrace: [46.583872] CPU: 1 PID: 3835 Comm: cloner Not tainted 5.13.0-rc1 #28 [46.583931] Call Trace: [46.583955] [c0000000167c7200] [c000000000c1ee78] dump_stack+0xec/0x144 (unreliable) [46.584052] [c0000000167c7240] [c000000000274058] print_circular_bug.isra.32+0x3a8/0x400 [46.584123] [c0000000167c72e0] [c0000000002741f4] check_noncircular+0x144/0x190 [46.584191] [c0000000167c73b0] [c000000000278fc0] __lock_acquire+0x1e90/0x2c50 [46.584259] [c0000000167c74f0] [c00000000027aa94] lock_acquire+0x2b4/0x5b0 [46.584317] [c0000000167c75e0] [c000000000a0d6cc] start_transaction+0x3cc/0x950 [46.584388] [c0000000167c7690] [c000000000af47a4] clone_copy_inline_extent+0xe4/0x5a0 [46.584457] [c0000000167c77c0] [c000000000af525c] btrfs_clone+0x5fc/0x880 [46.584514] [c0000000167c7990] [c000000000af5698] btrfs_clone_files+0xd8/0x1c0 [46.584583] [c0000000167c7a00] [c000000000af5b58] btrfs_remap_file_range+0x3d8/0x590 [46.584652] [c0000000167c7ae0] [c0000000005d81dc] do_clone_file_range+0x10c/0x270 [46.584722] [c0000000167c7b40] [c0000000005d84f0] vfs_clone_file_range+0x1b0/0x310 [46.584793] [c0000000167c7bb0] [c00000000058bf80] ioctl_file_clone+0x90/0x130 [46.584861] [c0000000167c7c10] [c00000000058c894] do_vfs_ioctl+0x874/0x1ac0 [46.584922] [c0000000167c7d10] [c00000000058db4c] sys_ioctl+0x6c/0x120 [46.584978] [c0000000167c7d60] [c0000000000364a4] system_call_exception+0x3d4/0x410 [46.585046] [c0000000167c7e10] [c00000000000d45c] system_call_common+0xec/0x278 [46.585114] --- interrupt: c00 at 0x7ffff7e22990 [46.585160] NIP: 00007ffff7e22990 LR: 00000001000010ec CTR: 0000000000000000 [46.585224] REGS: c0000000167c7e80 TRAP: 0c00 Not tainted (5.13.0-rc1) [46.585280] MSR: 800000000280f033 CR: 28000244 XER: 00000000 [46.585374] IRQMASK: 0 [46.585374] GPR00: 0000000000000036 00007fffffffdec0 00007ffff7f17100 0000000000000004 [46.585374] GPR04: 000000008020940d 00007fffffffdf40 0000000000000000 0000000000000000 [46.585374] GPR08: 0000000000000004 0000000000000000 0000000000000000 0000000000000000 [46.585374] GPR12: 0000000000000000 00007ffff7ffa940 0000000000000000 0000000000000000 [46.585374] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [46.585374] GPR20: 0000000000000000 000000009123683e 00007fffffffdf40 0000000000000000 [46.585374] GPR24: 0000000000000000 0000000000000000 0000000000000000 0000000000000004 [46.585374] GPR28: 0000000100030260 0000000100030280 0000000000000003 000000000000005f [46.585919] NIP [00007ffff7e22990] 0x7ffff7e22990 [46.585964] LR [00000001000010ec] 0x1000010ec [46.586010] --- interrupt: c00 This should be a false positive, as both locks are acquired in read mode. Nevertheless, we don't need to hold a leaf locked when we start the transaction, so just release the leaf (path) before starting it. Reported-by: Ritesh Harjani Link: https://lore.kernel.org/linux-btrfs/20210513214404.xks77p566fglzgum@riteshh-domain/ Reviewed-by: Anand Jain Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/reflink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index f4ec06b53aa0..06682128d8fa 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -285,6 +285,11 @@ copy_inline_extent: ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end); out: if (!ret && !trans) { + /* + * Release path before starting a new transaction so we don't + * hold locks that would confuse lockdep. + */ + btrfs_release_path(path); /* * No transaction here means we copied the inline extent into a * page of the destination inode. From 91df99a6eb50d5a1bc70fff4a09a0b7ae6aab96d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 14 May 2021 10:56:16 -0400 Subject: [PATCH 284/730] btrfs: do not BUG_ON in link_to_fixup_dir While doing error injection testing I got the following panic kernel BUG at fs/btrfs/tree-log.c:1862! invalid opcode: 0000 [#1] SMP NOPTI CPU: 1 PID: 7836 Comm: mount Not tainted 5.13.0-rc1+ #305 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 RIP: 0010:link_to_fixup_dir+0xd5/0xe0 RSP: 0018:ffffb5800180fa30 EFLAGS: 00010216 RAX: fffffffffffffffb RBX: 00000000fffffffb RCX: ffff8f595287faf0 RDX: ffffb5800180fa37 RSI: ffff8f5954978800 RDI: 0000000000000000 RBP: ffff8f5953af9450 R08: 0000000000000019 R09: 0000000000000001 R10: 000151f408682970 R11: 0000000120021001 R12: ffff8f5954978800 R13: ffff8f595287faf0 R14: ffff8f5953c77dd0 R15: 0000000000000065 FS: 00007fc5284c8c40(0000) GS:ffff8f59bbd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc5287f47c0 CR3: 000000011275e002 CR4: 0000000000370ee0 Call Trace: replay_one_buffer+0x409/0x470 ? btree_read_extent_buffer_pages+0xd0/0x110 walk_up_log_tree+0x157/0x1e0 walk_log_tree+0xa6/0x1d0 btrfs_recover_log_trees+0x1da/0x360 ? replay_one_extent+0x7b0/0x7b0 open_ctree+0x1486/0x1720 btrfs_mount_root.cold+0x12/0xea ? __kmalloc_track_caller+0x12f/0x240 legacy_get_tree+0x24/0x40 vfs_get_tree+0x22/0xb0 vfs_kern_mount.part.0+0x71/0xb0 btrfs_mount+0x10d/0x380 ? vfs_parse_fs_string+0x4d/0x90 legacy_get_tree+0x24/0x40 vfs_get_tree+0x22/0xb0 path_mount+0x433/0xa10 __x64_sys_mount+0xe3/0x120 do_syscall_64+0x3d/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae We can get -EIO or any number of legitimate errors from btrfs_search_slot(), panicing here is not the appropriate response. The error path for this code handles errors properly, simply return the error. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index fd6b1f13112e..c17d6b827b42 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1858,8 +1858,6 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); } else if (ret == -EEXIST) { ret = 0; - } else { - BUG(); /* Logic Error */ } iput(inode); From b433d090ac63eae4d3182cfc274dbacb0c4ee0ec Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Tue, 11 May 2021 12:00:54 -0700 Subject: [PATCH 285/730] MAINTAINERS: ARM/Amlogic SoCs: add Neil as primary maintainer Add Neil as primary maintainer for the Amlogic family of Arm SoCs. I will now act as co-maintainer. Neil is already doing lots of the reviewing, testing and behind the scenes support for users of the upstream kernel on these SoCs, so this is just to formalize the current state of affairs. Thanks Neil for all of your efforts, and keep up the great work! Signed-off-by: Kevin Hilman Acked-by: Neil Armstrong Acked-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20210511190054.26300-1-khilman@baylibre.com' Signed-off-by: Arnd Bergmann --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1162b0917630..a75eb1514957 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1622,8 +1622,8 @@ F: Documentation/devicetree/bindings/sound/amlogic* F: sound/soc/meson/ ARM/Amlogic Meson SoC support +M: Neil Armstrong M: Kevin Hilman -R: Neil Armstrong R: Jerome Brunet R: Martin Blumenstingl L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) From 6863b4d7bf19a54e23fc5838b7e66d954444289d Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 11 May 2021 08:48:27 +0300 Subject: [PATCH 286/730] RDMA/mlx5: Verify that DM operation is reasonable Fix the complaint from smatch by verifing that the user requested DM operation is not greater than 31. divers/infiniband/hw/mlx5/dm.c:220 mlx5_ib_handler_MLX5_IB_METHOD_DM_MAP_OP_ADDR() error: undefined (user controlled) shift '(((1))) << op' Fixes: cea85fa5dbc2 ("RDMA/mlx5: Add support in MEMIC operations") Link: https://lore.kernel.org/r/458b1d7710c3cf01360c8771893f483665569786.1620711734.git.leonro@nvidia.com Reported-by: Dan Carpenter Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/dm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c index 094bf85589db..001d766cf291 100644 --- a/drivers/infiniband/hw/mlx5/dm.c +++ b/drivers/infiniband/hw/mlx5/dm.c @@ -217,6 +217,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DM_MAP_OP_ADDR)( if (err) return err; + if (op >= BITS_PER_TYPE(u32)) + return -EOPNOTSUPP; + if (!(MLX5_CAP_DEV_MEM(dev->mdev, memic_operations) & BIT(op))) return -EOPNOTSUPP; From 97f30d324ce6645a4de4ffb71e4ae9b8ca36ff04 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 11 May 2021 08:48:29 +0300 Subject: [PATCH 287/730] RDMA/mlx5: Recover from fatal event in dual port mode When there is fatal event on the slave port, the device is marked as not active. We need to mark it as active again when the slave is recovered to regain full functionality. Fixes: d69a24e03659 ("IB/mlx5: Move IB event processing onto a workqueue") Link: https://lore.kernel.org/r/8906754455bb23019ef223c725d2c0d38acfb80b.1620711734.git.leonro@nvidia.com Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6d1dd09a4388..644d5d0ac544 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4419,6 +4419,7 @@ static int mlx5r_mp_probe(struct auxiliary_device *adev, if (bound) { rdma_roce_rescan_device(&dev->ib_dev); + mpi->ibdev->ib_active = true; break; } } From dc07628bd2bbc1da768e265192c28ebd301f509d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 11 May 2021 08:48:31 +0300 Subject: [PATCH 288/730] RDMA/rxe: Return CQE error if invalid lkey was supplied RXE is missing update of WQE status in LOCAL_WRITE failures. This caused the following kernel panic if someone sent an atomic operation with an explicitly wrong lkey. [leonro@vm ~]$ mkt test test_atomic_invalid_lkey (tests.test_atomic.AtomicTest) ... WARNING: CPU: 5 PID: 263 at drivers/infiniband/sw/rxe/rxe_comp.c:740 rxe_completer+0x1a6d/0x2e30 [rdma_rxe] Modules linked in: crc32_generic rdma_rxe ip6_udp_tunnel udp_tunnel rdma_ucm rdma_cm ib_umad ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5_core ptp pps_core CPU: 5 PID: 263 Comm: python3 Not tainted 5.13.0-rc1+ #2936 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:rxe_completer+0x1a6d/0x2e30 [rdma_rxe] Code: 03 0f 8e 65 0e 00 00 3b 93 10 06 00 00 0f 84 82 0a 00 00 4c 89 ff 4c 89 44 24 38 e8 2d 74 a9 e1 4c 8b 44 24 38 e9 1c f5 ff ff <0f> 0b e9 0c e8 ff ff b8 05 00 00 00 41 bf 05 00 00 00 e9 ab e7 ff RSP: 0018:ffff8880158af090 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff888016a78000 RCX: ffffffffa0cf1652 RDX: 1ffff9200004b442 RSI: 0000000000000004 RDI: ffffc9000025a210 RBP: dffffc0000000000 R08: 00000000ffffffea R09: ffff88801617740b R10: ffffed1002c2ee81 R11: 0000000000000007 R12: ffff88800f3b63e8 R13: ffff888016a78008 R14: ffffc9000025a180 R15: 000000000000000c FS: 00007f88b622a740(0000) GS:ffff88806d540000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f88b5a1fa10 CR3: 000000000d848004 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: rxe_do_task+0x130/0x230 [rdma_rxe] rxe_rcv+0xb11/0x1df0 [rdma_rxe] rxe_loopback+0x157/0x1e0 [rdma_rxe] rxe_responder+0x5532/0x7620 [rdma_rxe] rxe_do_task+0x130/0x230 [rdma_rxe] rxe_rcv+0x9c8/0x1df0 [rdma_rxe] rxe_loopback+0x157/0x1e0 [rdma_rxe] rxe_requester+0x1efd/0x58c0 [rdma_rxe] rxe_do_task+0x130/0x230 [rdma_rxe] rxe_post_send+0x998/0x1860 [rdma_rxe] ib_uverbs_post_send+0xd5f/0x1220 [ib_uverbs] ib_uverbs_write+0x847/0xc80 [ib_uverbs] vfs_write+0x1c5/0x840 ksys_write+0x176/0x1d0 do_syscall_64+0x3f/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/11e7b553f3a6f5371c6bb3f57c494bb52b88af99.1620711734.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Acked-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 2af26737d32d..a6712e373eed 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -346,13 +346,15 @@ static inline enum comp_state do_read(struct rxe_qp *qp, ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &wqe->dma, payload_addr(pkt), payload_size(pkt), to_mr_obj, NULL); - if (ret) + if (ret) { + wqe->status = IB_WC_LOC_PROT_ERR; return COMPST_ERROR; + } if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK)) return COMPST_COMP_ACK; - else - return COMPST_UPDATE_COMP; + + return COMPST_UPDATE_COMP; } static inline enum comp_state do_atomic(struct rxe_qp *qp, @@ -366,10 +368,12 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp, ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &wqe->dma, &atomic_orig, sizeof(u64), to_mr_obj, NULL); - if (ret) + if (ret) { + wqe->status = IB_WC_LOC_PROT_ERR; return COMPST_ERROR; - else - return COMPST_COMP_ACK; + } + + return COMPST_COMP_ACK; } static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, From b7df21cf1b79ab7026f545e7bf837bd5750ac026 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 8 May 2021 03:57:03 +0800 Subject: [PATCH 289/730] tipc: skb_linearize the head skb when reassembling msgs It's not a good idea to append the frag skb to a skb's frag_list if the frag_list already has skbs from elsewhere, such as this skb was created by pskb_copy() where the frag_list was cloned (all the skbs in it were skb_get'ed) and shared by multiple skbs. However, the new appended frag skb should have been only seen by the current skb. Otherwise, it will cause use after free crashes as this appended frag skb are seen by multiple skbs but it only got skb_get called once. The same thing happens with a skb updated by pskb_may_pull() with a skb_cloned skb. Li Shuang has reported quite a few crashes caused by this when doing testing over macvlan devices: [] kernel BUG at net/core/skbuff.c:1970! [] Call Trace: [] skb_clone+0x4d/0xb0 [] macvlan_broadcast+0xd8/0x160 [macvlan] [] macvlan_process_broadcast+0x148/0x150 [macvlan] [] process_one_work+0x1a7/0x360 [] worker_thread+0x30/0x390 [] kernel BUG at mm/usercopy.c:102! [] Call Trace: [] __check_heap_object+0xd3/0x100 [] __check_object_size+0xff/0x16b [] simple_copy_to_iter+0x1c/0x30 [] __skb_datagram_iter+0x7d/0x310 [] __skb_datagram_iter+0x2a5/0x310 [] skb_copy_datagram_iter+0x3b/0x90 [] tipc_recvmsg+0x14a/0x3a0 [tipc] [] ____sys_recvmsg+0x91/0x150 [] ___sys_recvmsg+0x7b/0xc0 [] kernel BUG at mm/slub.c:305! [] Call Trace: [] [] kmem_cache_free+0x3ff/0x400 [] __netif_receive_skb_core+0x12c/0xc40 [] ? kmem_cache_alloc+0x12e/0x270 [] netif_receive_skb_internal+0x3d/0xb0 [] ? get_rx_page_info+0x8e/0xa0 [be2net] [] be_poll+0x6ef/0xd00 [be2net] [] ? irq_exit+0x4f/0x100 [] net_rx_action+0x149/0x3b0 ... This patch is to fix it by linearizing the head skb if it has frag_list set in tipc_buf_append(). Note that we choose to do this before calling skb_unshare(), as __skb_linearize() will avoid skb_copy(). Also, we can not just drop the frag_list either as the early time. Fixes: 45c8b7b175ce ("tipc: allow non-linear first fragment buffer") Reported-by: Li Shuang Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 3f0a25345a7c..ce6ab54822d8 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -149,18 +149,13 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (unlikely(head)) goto err; *buf = NULL; + if (skb_has_frag_list(frag) && __skb_linearize(frag)) + goto err; frag = skb_unshare(frag, GFP_ATOMIC); if (unlikely(!frag)) goto err; head = *headbuf = frag; TIPC_SKB_CB(head)->tail = NULL; - if (skb_is_nonlinear(head)) { - skb_walk_frags(head, tail) { - TIPC_SKB_CB(head)->tail = tail; - } - } else { - skb_frag_list_init(head); - } return 0; } From 31db0dbd72444abe645d90c20ecb84d668f5af5e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 May 2021 17:24:48 +0300 Subject: [PATCH 290/730] net: hso: check for allocation failure in hso_create_bulk_serial_device() In current kernels, small allocations never actually fail so this patch shouldn't affect runtime. Originally this error handling code written with the idea that if the "serial->tiocmget" allocation failed, then we would continue operating instead of bailing out early. But in later years we added an unchecked dereference on the next line. serial->tiocmget->serial_state_notification = kzalloc(); ^^^^^^^^^^^^^^^^^^ Since these allocations are never going fail in real life, this is mostly a philosophical debate, but I think bailing out early is the correct behavior that the user would want. And generally it's safer to bail as soon an error happens. Fixes: af0de1303c4e ("usb: hso: obey DMA rules in tiocmget") Signed-off-by: Dan Carpenter Reviewed-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 3ef4b2841402..260f850d69eb 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2618,29 +2618,28 @@ static struct hso_device *hso_create_bulk_serial_device( num_urbs = 2; serial->tiocmget = kzalloc(sizeof(struct hso_tiocmget), GFP_KERNEL); + if (!serial->tiocmget) + goto exit; serial->tiocmget->serial_state_notification = kzalloc(sizeof(struct hso_serial_state_notification), GFP_KERNEL); - /* it isn't going to break our heart if serial->tiocmget - * allocation fails don't bother checking this. - */ - if (serial->tiocmget && serial->tiocmget->serial_state_notification) { - tiocmget = serial->tiocmget; - tiocmget->endp = hso_get_ep(interface, - USB_ENDPOINT_XFER_INT, - USB_DIR_IN); - if (!tiocmget->endp) { - dev_err(&interface->dev, "Failed to find INT IN ep\n"); - goto exit; - } - - tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL); - if (tiocmget->urb) { - mutex_init(&tiocmget->mutex); - init_waitqueue_head(&tiocmget->waitq); - } else - hso_free_tiomget(serial); + if (!serial->tiocmget->serial_state_notification) + goto exit; + tiocmget = serial->tiocmget; + tiocmget->endp = hso_get_ep(interface, + USB_ENDPOINT_XFER_INT, + USB_DIR_IN); + if (!tiocmget->endp) { + dev_err(&interface->dev, "Failed to find INT IN ep\n"); + goto exit; } + + tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL); + if (tiocmget->urb) { + mutex_init(&tiocmget->mutex); + init_waitqueue_head(&tiocmget->waitq); + } else + hso_free_tiomget(serial); } else num_urbs = 1; From e0652f8bb44d6294eeeac06d703185357f25d50b Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Sat, 15 May 2021 07:29:06 +0800 Subject: [PATCH 291/730] NFC: nci: fix memory leak in nci_allocate_device nfcmrvl_disconnect fails to free the hci_dev field in struct nci_dev. Fix this by freeing hci_dev in nci_free_device. BUG: memory leak unreferenced object 0xffff888111ea6800 (size 1024): comm "kworker/1:0", pid 19, jiffies 4294942308 (age 13.580s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 60 fd 0c 81 88 ff ff .........`...... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000004bc25d43>] kmalloc include/linux/slab.h:552 [inline] [<000000004bc25d43>] kzalloc include/linux/slab.h:682 [inline] [<000000004bc25d43>] nci_hci_allocate+0x21/0xd0 net/nfc/nci/hci.c:784 [<00000000c59cff92>] nci_allocate_device net/nfc/nci/core.c:1170 [inline] [<00000000c59cff92>] nci_allocate_device+0x10b/0x160 net/nfc/nci/core.c:1132 [<00000000006e0a8e>] nfcmrvl_nci_register_dev+0x10a/0x1c0 drivers/nfc/nfcmrvl/main.c:153 [<000000004da1b57e>] nfcmrvl_probe+0x223/0x290 drivers/nfc/nfcmrvl/usb.c:345 [<00000000d506aed9>] usb_probe_interface+0x177/0x370 drivers/usb/core/driver.c:396 [<00000000bc632c92>] really_probe+0x159/0x4a0 drivers/base/dd.c:554 [<00000000f5009125>] driver_probe_device+0x84/0x100 drivers/base/dd.c:740 [<000000000ce658ca>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:846 [<000000007067d05f>] bus_for_each_drv+0xb7/0x100 drivers/base/bus.c:431 [<00000000f8e13372>] __device_attach+0x122/0x250 drivers/base/dd.c:914 [<000000009cf68860>] bus_probe_device+0xc6/0xe0 drivers/base/bus.c:491 [<00000000359c965a>] device_add+0x5be/0xc30 drivers/base/core.c:3109 [<00000000086e4bd3>] usb_set_configuration+0x9d9/0xb90 drivers/usb/core/message.c:2164 [<00000000ca036872>] usb_generic_driver_probe+0x8c/0xc0 drivers/usb/core/generic.c:238 [<00000000d40d36f6>] usb_probe_device+0x5c/0x140 drivers/usb/core/driver.c:293 [<00000000bc632c92>] really_probe+0x159/0x4a0 drivers/base/dd.c:554 Reported-by: syzbot+19bcfc64a8df1318d1c3@syzkaller.appspotmail.com Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") Signed-off-by: Dongliang Mu Signed-off-by: David S. Miller --- include/net/nfc/nci_core.h | 1 + net/nfc/nci/core.c | 1 + net/nfc/nci/hci.c | 5 +++++ 3 files changed, 7 insertions(+) diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index bd76e8e082c0..1df0f8074c9d 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -298,6 +298,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len, struct sk_buff **resp); struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev); +void nci_hci_deallocate(struct nci_dev *ndev); int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event, const u8 *param, size_t param_len); int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 9a585332ea84..da7fe9db1b00 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1191,6 +1191,7 @@ EXPORT_SYMBOL(nci_allocate_device); void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); + nci_hci_deallocate(ndev); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 6b275a387a92..96865142104f 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -792,3 +792,8 @@ struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev) return hdev; } + +void nci_hci_deallocate(struct nci_dev *ndev) +{ + kfree(ndev->hci_dev); +} From 28c66b6da4087b8cfe81c2ec0a46eb6116dafda9 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 15 May 2021 15:16:05 +0800 Subject: [PATCH 292/730] net: bnx2: Fix error return code in bnx2_init_board() Fix to return -EPERM from the error handling case instead of 0, as done elsewhere in this function. Fixes: b6016b767397 ("[BNX2]: New Broadcom gigabit network driver.") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Reviewed-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index c0986096c701..5bace8a93d73 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -8247,9 +8247,9 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) BNX2_WR(bp, PCI_COMMAND, reg); } else if ((BNX2_CHIP_ID(bp) == BNX2_CHIP_ID_5706_A1) && !(bp->flags & BNX2_FLAG_PCIX)) { - dev_err(&pdev->dev, "5706 A1 can only be used in a PCIX bus, aborting\n"); + rc = -EPERM; goto err_out_unmap; } From ab21494be9dc7d62736c5fcd06be65d49df713ee Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Sat, 15 May 2021 03:25:18 -0400 Subject: [PATCH 293/730] bnxt_en: Include new P5 HV definition in VF check. Otherwise, some of the recently added HyperV VF IDs would not be recognized as VF devices and they would not initialize properly. Fixes: 7fbf359bb2c1 ("bnxt_en: Add PCI IDs for Hyper-V VF devices.") Reviewed-by: Edwin Peer Signed-off-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 46be4046ee51..4e57041b4775 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -282,7 +282,8 @@ static bool bnxt_vf_pciid(enum board_idx idx) { return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF || idx == NETXTREME_S_VF || idx == NETXTREME_C_VF_HV || - idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF); + idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF || + idx == NETXTREME_E_P5_VF_HV); } #define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID) From 702279d2ce4650000bb6302013630304e359dc13 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 15 May 2021 03:25:19 -0400 Subject: [PATCH 294/730] bnxt_en: Fix context memory setup for 64K page size. There was a typo in the code that checks for 64K BNXT_PAGE_SHIFT in bnxt_hwrm_set_pg_attr(). Fix it and make the code more understandable with a new macro BNXT_SET_CTX_PAGE_ATTR(). Fixes: 1b9394e5a2ad ("bnxt_en: Configure context memory on new devices.") Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +-------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4e57041b4775..fcc729d52b17 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6933,17 +6933,10 @@ ctx_err: static void bnxt_hwrm_set_pg_attr(struct bnxt_ring_mem_info *rmem, u8 *pg_attr, __le64 *pg_dir) { - u8 pg_size = 0; - if (!rmem->nr_pages) return; - if (BNXT_PAGE_SHIFT == 13) - pg_size = 1 << 4; - else if (BNXT_PAGE_SIZE == 16) - pg_size = 2 << 4; - - *pg_attr = pg_size; + BNXT_SET_CTX_PAGE_ATTR(*pg_attr); if (rmem->depth >= 1) { if (rmem->depth == 2) *pg_attr |= 2; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 98e0cef4532c..30e47ea343f9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1457,6 +1457,16 @@ struct bnxt_ctx_pg_info { #define BNXT_BACKING_STORE_CFG_LEGACY_LEN 256 +#define BNXT_SET_CTX_PAGE_ATTR(attr) \ +do { \ + if (BNXT_PAGE_SIZE == 0x2000) \ + attr = FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_PG_8K; \ + else if (BNXT_PAGE_SIZE == 0x10000) \ + attr = FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_64K; \ + else \ + attr = FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_4K; \ +} while (0) + struct bnxt_ctx_mem_info { u32 qp_max_entries; u16 qp_min_qp1_entries; From 9f6f852550d0e1b7735651228116ae9d300f69b3 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Sun, 16 May 2021 07:11:40 +0000 Subject: [PATCH 295/730] isdn: mISDN: netjet: Fix crash in nj_probe: 'nj_setup' in netjet.c might fail with -EIO and in this case 'card->irq' is initialized and is bigger than zero. A subsequent call to 'nj_release' will free the irq that has not been requested. Fix this bug by deleting the previous assignment to 'card->irq' and just keep the assignment before 'request_irq'. The KASAN's log reveals it: [ 3.354615 ] WARNING: CPU: 0 PID: 1 at kernel/irq/manage.c:1826 free_irq+0x100/0x480 [ 3.355112 ] Modules linked in: [ 3.355310 ] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc1-00144-g25a1298726e #13 [ 3.355816 ] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 [ 3.356552 ] RIP: 0010:free_irq+0x100/0x480 [ 3.356820 ] Code: 6e 08 74 6f 4d 89 f4 e8 5e ac 09 00 4d 8b 74 24 18 4d 85 f6 75 e3 e8 4f ac 09 00 8b 75 c8 48 c7 c7 78 c1 2e 85 e8 e0 cf f5 ff <0f> 0b 48 8b 75 c0 4c 89 ff e8 72 33 0b 03 48 8b 43 40 4c 8b a0 80 [ 3.358012 ] RSP: 0000:ffffc90000017b48 EFLAGS: 00010082 [ 3.358357 ] RAX: 0000000000000000 RBX: ffff888104dc8000 RCX: 0000000000000000 [ 3.358814 ] RDX: ffff8881003c8000 RSI: ffffffff8124a9e6 RDI: 00000000ffffffff [ 3.359272 ] RBP: ffffc90000017b88 R08: 0000000000000000 R09: 0000000000000000 [ 3.359732 ] R10: ffffc900000179f0 R11: 0000000000001d04 R12: 0000000000000000 [ 3.360195 ] R13: ffff888107dc6000 R14: ffff888107dc6928 R15: ffff888104dc80a8 [ 3.360652 ] FS: 0000000000000000(0000) GS:ffff88817bc00000(0000) knlGS:0000000000000000 [ 3.361170 ] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3.361538 ] CR2: 0000000000000000 CR3: 000000000582e000 CR4: 00000000000006f0 [ 3.362003 ] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 3.362175 ] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 3.362175 ] Call Trace: [ 3.362175 ] nj_release+0x51/0x1e0 [ 3.362175 ] nj_probe+0x450/0x950 [ 3.362175 ] ? pci_device_remove+0x110/0x110 [ 3.362175 ] local_pci_probe+0x45/0xa0 [ 3.362175 ] pci_device_probe+0x12b/0x1d0 [ 3.362175 ] really_probe+0x2a9/0x610 [ 3.362175 ] driver_probe_device+0x90/0x1d0 [ 3.362175 ] ? mutex_lock_nested+0x1b/0x20 [ 3.362175 ] device_driver_attach+0x68/0x70 [ 3.362175 ] __driver_attach+0x124/0x1b0 [ 3.362175 ] ? device_driver_attach+0x70/0x70 [ 3.362175 ] bus_for_each_dev+0xbb/0x110 [ 3.362175 ] ? rdinit_setup+0x45/0x45 [ 3.362175 ] driver_attach+0x27/0x30 [ 3.362175 ] bus_add_driver+0x1eb/0x2a0 [ 3.362175 ] driver_register+0xa9/0x180 [ 3.362175 ] __pci_register_driver+0x82/0x90 [ 3.362175 ] ? w6692_init+0x38/0x38 [ 3.362175 ] nj_init+0x36/0x38 [ 3.362175 ] do_one_initcall+0x7f/0x3d0 [ 3.362175 ] ? rdinit_setup+0x45/0x45 [ 3.362175 ] ? rcu_read_lock_sched_held+0x4f/0x80 [ 3.362175 ] kernel_init_freeable+0x2aa/0x301 [ 3.362175 ] ? rest_init+0x2c0/0x2c0 [ 3.362175 ] kernel_init+0x18/0x190 [ 3.362175 ] ? rest_init+0x2c0/0x2c0 [ 3.362175 ] ? rest_init+0x2c0/0x2c0 [ 3.362175 ] ret_from_fork+0x1f/0x30 [ 3.362175 ] Kernel panic - not syncing: panic_on_warn set ... [ 3.362175 ] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc1-00144-g25a1298726e #13 [ 3.362175 ] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 [ 3.362175 ] Call Trace: [ 3.362175 ] dump_stack+0xba/0xf5 [ 3.362175 ] ? free_irq+0x100/0x480 [ 3.362175 ] panic+0x15a/0x3f2 [ 3.362175 ] ? __warn+0xf2/0x150 [ 3.362175 ] ? free_irq+0x100/0x480 [ 3.362175 ] __warn+0x108/0x150 [ 3.362175 ] ? free_irq+0x100/0x480 [ 3.362175 ] report_bug+0x119/0x1c0 [ 3.362175 ] handle_bug+0x3b/0x80 [ 3.362175 ] exc_invalid_op+0x18/0x70 [ 3.362175 ] asm_exc_invalid_op+0x12/0x20 [ 3.362175 ] RIP: 0010:free_irq+0x100/0x480 [ 3.362175 ] Code: 6e 08 74 6f 4d 89 f4 e8 5e ac 09 00 4d 8b 74 24 18 4d 85 f6 75 e3 e8 4f ac 09 00 8b 75 c8 48 c7 c7 78 c1 2e 85 e8 e0 cf f5 ff <0f> 0b 48 8b 75 c0 4c 89 ff e8 72 33 0b 03 48 8b 43 40 4c 8b a0 80 [ 3.362175 ] RSP: 0000:ffffc90000017b48 EFLAGS: 00010082 [ 3.362175 ] RAX: 0000000000000000 RBX: ffff888104dc8000 RCX: 0000000000000000 [ 3.362175 ] RDX: ffff8881003c8000 RSI: ffffffff8124a9e6 RDI: 00000000ffffffff [ 3.362175 ] RBP: ffffc90000017b88 R08: 0000000000000000 R09: 0000000000000000 [ 3.362175 ] R10: ffffc900000179f0 R11: 0000000000001d04 R12: 0000000000000000 [ 3.362175 ] R13: ffff888107dc6000 R14: ffff888107dc6928 R15: ffff888104dc80a8 [ 3.362175 ] ? vprintk+0x76/0x150 [ 3.362175 ] ? free_irq+0x100/0x480 [ 3.362175 ] nj_release+0x51/0x1e0 [ 3.362175 ] nj_probe+0x450/0x950 [ 3.362175 ] ? pci_device_remove+0x110/0x110 [ 3.362175 ] local_pci_probe+0x45/0xa0 [ 3.362175 ] pci_device_probe+0x12b/0x1d0 [ 3.362175 ] really_probe+0x2a9/0x610 [ 3.362175 ] driver_probe_device+0x90/0x1d0 [ 3.362175 ] ? mutex_lock_nested+0x1b/0x20 [ 3.362175 ] device_driver_attach+0x68/0x70 [ 3.362175 ] __driver_attach+0x124/0x1b0 [ 3.362175 ] ? device_driver_attach+0x70/0x70 [ 3.362175 ] bus_for_each_dev+0xbb/0x110 [ 3.362175 ] ? rdinit_setup+0x45/0x45 [ 3.362175 ] driver_attach+0x27/0x30 [ 3.362175 ] bus_add_driver+0x1eb/0x2a0 [ 3.362175 ] driver_register+0xa9/0x180 [ 3.362175 ] __pci_register_driver+0x82/0x90 [ 3.362175 ] ? w6692_init+0x38/0x38 [ 3.362175 ] nj_init+0x36/0x38 [ 3.362175 ] do_one_initcall+0x7f/0x3d0 [ 3.362175 ] ? rdinit_setup+0x45/0x45 [ 3.362175 ] ? rcu_read_lock_sched_held+0x4f/0x80 [ 3.362175 ] kernel_init_freeable+0x2aa/0x301 [ 3.362175 ] ? rest_init+0x2c0/0x2c0 [ 3.362175 ] kernel_init+0x18/0x190 [ 3.362175 ] ? rest_init+0x2c0/0x2c0 [ 3.362175 ] ? rest_init+0x2c0/0x2c0 [ 3.362175 ] ret_from_fork+0x1f/0x30 [ 3.362175 ] Dumping ftrace buffer: [ 3.362175 ] (ftrace buffer empty) [ 3.362175 ] Kernel Offset: disabled [ 3.362175 ] Rebooting in 1 seconds.. Reported-by: Zheyu Ma Signed-off-by: Zheyu Ma Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/netjet.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c index ee925b58bbce..2a1ddd47a096 100644 --- a/drivers/isdn/hardware/mISDN/netjet.c +++ b/drivers/isdn/hardware/mISDN/netjet.c @@ -1100,7 +1100,6 @@ nj_probe(struct pci_dev *pdev, const struct pci_device_id *ent) card->typ = NETJET_S_TJ300; card->base = pci_resource_start(pdev, 0); - card->irq = pdev->irq; pci_set_drvdata(pdev, card); err = setup_instance(card); if (err) From 020ef930b826d21c5446fdc9db80fd72a791bc21 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 16 May 2021 14:44:42 +0000 Subject: [PATCH 296/730] mld: fix panic in mld_newpack() mld_newpack() doesn't allow to allocate high order page, only order-0 allocation is allowed. If headroom size is too large, a kernel panic could occur in skb_put(). Test commands: ip netns del A ip netns del B ip netns add A ip netns add B ip link add veth0 type veth peer name veth1 ip link set veth0 netns A ip link set veth1 netns B ip netns exec A ip link set lo up ip netns exec A ip link set veth0 up ip netns exec A ip -6 a a 2001:db8:0::1/64 dev veth0 ip netns exec B ip link set lo up ip netns exec B ip link set veth1 up ip netns exec B ip -6 a a 2001:db8:0::2/64 dev veth1 for i in {1..99} do let A=$i-1 ip netns exec A ip link add ip6gre$i type ip6gre \ local 2001:db8:$A::1 remote 2001:db8:$A::2 encaplimit 100 ip netns exec A ip -6 a a 2001:db8:$i::1/64 dev ip6gre$i ip netns exec A ip link set ip6gre$i up ip netns exec B ip link add ip6gre$i type ip6gre \ local 2001:db8:$A::2 remote 2001:db8:$A::1 encaplimit 100 ip netns exec B ip -6 a a 2001:db8:$i::2/64 dev ip6gre$i ip netns exec B ip link set ip6gre$i up done Splat looks like: kernel BUG at net/core/skbuff.c:110! invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI CPU: 0 PID: 7 Comm: kworker/0:1 Not tainted 5.12.0+ #891 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:skb_panic+0x15d/0x15f Code: 92 fe 4c 8b 4c 24 10 53 8b 4d 70 45 89 e0 48 c7 c7 00 ae 79 83 41 57 41 56 41 55 48 8b 54 24 a6 26 f9 ff <0f> 0b 48 8b 6c 24 20 89 34 24 e8 4a 4e 92 fe 8b 34 24 48 c7 c1 20 RSP: 0018:ffff88810091f820 EFLAGS: 00010282 RAX: 0000000000000089 RBX: ffff8881086e9000 RCX: 0000000000000000 RDX: 0000000000000089 RSI: 0000000000000008 RDI: ffffed1020123efb RBP: ffff888005f6eac0 R08: ffffed1022fc0031 R09: ffffed1022fc0031 R10: ffff888117e00187 R11: ffffed1022fc0030 R12: 0000000000000028 R13: ffff888008284eb0 R14: 0000000000000ed8 R15: 0000000000000ec0 FS: 0000000000000000(0000) GS:ffff888117c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f8b801c5640 CR3: 0000000033c2c006 CR4: 00000000003706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? ip6_mc_hdr.isra.26.constprop.46+0x12a/0x600 ? ip6_mc_hdr.isra.26.constprop.46+0x12a/0x600 skb_put.cold.104+0x22/0x22 ip6_mc_hdr.isra.26.constprop.46+0x12a/0x600 ? rcu_read_lock_sched_held+0x91/0xc0 mld_newpack+0x398/0x8f0 ? ip6_mc_hdr.isra.26.constprop.46+0x600/0x600 ? lock_contended+0xc40/0xc40 add_grhead.isra.33+0x280/0x380 add_grec+0x5ca/0xff0 ? mld_sendpack+0xf40/0xf40 ? lock_downgrade+0x690/0x690 mld_send_initial_cr.part.34+0xb9/0x180 ipv6_mc_dad_complete+0x15d/0x1b0 addrconf_dad_completed+0x8d2/0xbb0 ? lock_downgrade+0x690/0x690 ? addrconf_rs_timer+0x660/0x660 ? addrconf_dad_work+0x73c/0x10e0 addrconf_dad_work+0x73c/0x10e0 Allowing high order page allocation could fix this problem. Fixes: 72e09ad107e7 ("ipv6: avoid high order allocations") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 0d59efb6b49e..d36ef9d25e73 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1745,10 +1745,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - /* limit our allocations to order-0 page */ - size = min_t(int, size, SKB_MAX_ORDER(0, 0)); skb = sock_alloc_send_skb(sk, size, 1, &err); - if (!skb) return NULL; From 04c26faa51d1e2fe71cf13c45791f5174c37f986 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 May 2021 02:28:58 +0800 Subject: [PATCH 297/730] tipc: wait and exit until all work queues are done On some host, a crash could be triggered simply by repeating these commands several times: # modprobe tipc # tipc bearer enable media udp name UDP1 localip 127.0.0.1 # rmmod tipc [] BUG: unable to handle kernel paging request at ffffffffc096bb00 [] Workqueue: events 0xffffffffc096bb00 [] Call Trace: [] ? process_one_work+0x1a7/0x360 [] ? worker_thread+0x30/0x390 [] ? create_worker+0x1a0/0x1a0 [] ? kthread+0x116/0x130 [] ? kthread_flush_work_fn+0x10/0x10 [] ? ret_from_fork+0x35/0x40 When removing the TIPC module, the UDP tunnel sock will be delayed to release in a work queue as sock_release() can't be done in rtnl_lock(). If the work queue is schedule to run after the TIPC module is removed, kernel will crash as the work queue function cleanup_beareri() code no longer exists when trying to invoke it. To fix it, this patch introduce a member wq_count in tipc_net to track the numbers of work queues in schedule, and wait and exit until all work queues are done in tipc_exit_net(). Fixes: d0f91938bede ("tipc: add ip/udp media type") Reported-by: Shuang Li Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/core.c | 2 ++ net/tipc/core.h | 2 ++ net/tipc/udp_media.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/net/tipc/core.c b/net/tipc/core.c index 5cc1f0307215..72f3ac73779b 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -119,6 +119,8 @@ static void __net_exit tipc_exit_net(struct net *net) #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_stop(&tipc_net(net)->crypto_tx); #endif + while (atomic_read(&tn->wq_count)) + cond_resched(); } static void __net_exit tipc_pernet_pre_exit(struct net *net) diff --git a/net/tipc/core.h b/net/tipc/core.h index 03de7b213f55..5741ae488bb5 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -149,6 +149,8 @@ struct tipc_net { #endif /* Work item for net finalize */ struct tipc_net_work final_work; + /* The numbers of work queues in schedule */ + atomic_t wq_count; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index e556d2cdc064..c2bb818704c8 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -814,6 +814,7 @@ static void cleanup_bearer(struct work_struct *work) kfree_rcu(rcast, rcu); } + atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); dst_cache_destroy(&ub->rcast.dst_cache); udp_tunnel_sock_release(ub->ubsock); synchronize_net(); @@ -834,6 +835,7 @@ static void tipc_udp_disable(struct tipc_bearer *b) RCU_INIT_POINTER(ub->bearer, NULL); /* sock_release need to be done outside of rtnl lock */ + atomic_inc(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); INIT_WORK(&ub->work, cleanup_bearer); schedule_work(&ub->work); } From 4710ccc52e8e504a5617a889843a18cd06f1ab72 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 10 May 2021 15:35:14 -0500 Subject: [PATCH 298/730] dt-bindings: media: renesas,drif: Use graph schema Convert the renesas,drif binding schema to use the graph schema. The binding referred to video-interfaces.txt, but it doesn't actually use any properties from it as 'sync-active' is a custom property. As 'sync-active' is custom, it needs a type definition. Cc: Mauro Carvalho Chehab Cc: Ramesh Shanmugasundaram Cc: linux-media@vger.kernel.org Cc: linux-renesas-soc@vger.kernel.org Signed-off-by: Rob Herring Reviewed-by: Fabrizio Castro Link: https://lore.kernel.org/r/20210510203514.603471-1-robh@kernel.org --- .../bindings/media/renesas,drif.yaml | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/Documentation/devicetree/bindings/media/renesas,drif.yaml b/Documentation/devicetree/bindings/media/renesas,drif.yaml index f1bdaeab4053..ce505a7c006a 100644 --- a/Documentation/devicetree/bindings/media/renesas,drif.yaml +++ b/Documentation/devicetree/bindings/media/renesas,drif.yaml @@ -99,32 +99,26 @@ properties: Indicates that the channel acts as primary among the bonded channels. port: - type: object + $ref: /schemas/graph.yaml#/properties/port + unevaluatedProperties: false description: - Child port node corresponding to the data input, in accordance with the - video interface bindings defined in - Documentation/devicetree/bindings/media/video-interfaces.txt. - The port node must contain at least one endpoint. + Child port node corresponding to the data input. The port node must + contain at least one endpoint. properties: endpoint: - type: object + $ref: /schemas/graph.yaml#/$defs/endpoint-base + unevaluatedProperties: false properties: - remote-endpoint: - description: - A phandle to the remote tuner endpoint subnode in remote node - port. - sync-active: + $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 1] description: Indicates sync signal polarity, 0/1 for low/high respectively. This property maps to SYNCAC bit in the hardware manual. The default is 1 (active high). - additionalProperties: false - required: - compatible - reg From c17611592d9635c443bedc9be901f4463f45c6d5 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 10 May 2021 15:45:24 -0500 Subject: [PATCH 299/730] dt-bindings: More removals of type references on common properties Users of common properties shouldn't have a type definition as the common schemas already have one. A few new ones slipped in and *-names was missed in the last clean-up pass. Drop all the unnecessary type references in the tree. A meta-schema update to catch these is pending. Cc: Stephen Boyd Cc: Olivier Moysan Cc: Arnaud Pouliquen Cc: Lars-Peter Clausen Cc: Dmitry Torokhov Cc: Bjorn Andersson Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Orson Zhai Cc: Baolin Wang Cc: Chunyan Zhang Cc: Liam Girdwood Cc: Fabrice Gasnier Cc: Odelu Kukatla Cc: Alex Elder Cc: Shengjiu Wang Cc: linux-clk@vger.kernel.org Cc: alsa-devel@alsa-project.org Cc: linux-iio@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-input@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Rob Herring Acked-by: Mark Brown Acked-by: Georgi Djakov Reviewed-by: Luca Ceresoli Acked-by: Jonathan Cameron Acked-by: Sebastian Reichel Link: https://lore.kernel.org/r/20210510204524.617390-1-robh@kernel.org --- Documentation/devicetree/bindings/clock/idt,versaclock5.yaml | 2 -- .../devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml | 1 - Documentation/devicetree/bindings/input/input.yaml | 1 - Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml | 1 - Documentation/devicetree/bindings/net/qcom,ipa.yaml | 1 - .../devicetree/bindings/power/supply/sc2731-charger.yaml | 2 +- Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml | 2 +- 7 files changed, 2 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml index c268debe5b8d..28675b0b80f1 100644 --- a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml +++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml @@ -60,7 +60,6 @@ properties: maxItems: 2 idt,xtal-load-femtofarads: - $ref: /schemas/types.yaml#/definitions/uint32 minimum: 9000 maximum: 22760 description: Optional load capacitor for XTAL1 and XTAL2 @@ -84,7 +83,6 @@ patternProperties: enum: [ 1800000, 2500000, 3300000 ] idt,slew-percent: description: The Slew rate control for CMOS single-ended. - $ref: /schemas/types.yaml#/definitions/uint32 enum: [ 80, 85, 90, 100 ] required: diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml index 6f2398cdc82d..1e7894e524f9 100644 --- a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml @@ -102,7 +102,6 @@ patternProperties: st,adc-channel-names: description: List of single-ended channel names. - $ref: /schemas/types.yaml#/definitions/string-array st,filter-order: description: | diff --git a/Documentation/devicetree/bindings/input/input.yaml b/Documentation/devicetree/bindings/input/input.yaml index 74244d21d2b3..d41d8743aad4 100644 --- a/Documentation/devicetree/bindings/input/input.yaml +++ b/Documentation/devicetree/bindings/input/input.yaml @@ -38,6 +38,5 @@ properties: Duration in seconds which the key should be kept pressed for device to reset automatically. Device with key pressed reset feature can specify this property. - $ref: /schemas/types.yaml#/definitions/uint32 additionalProperties: true diff --git a/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml index cb6498108b78..36c955965d90 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml @@ -92,7 +92,6 @@ properties: this interconnect to send RPMh commands. qcom,bcm-voter-names: - $ref: /schemas/types.yaml#/definitions/string-array description: | Names for each of the qcom,bcm-voters specified. diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index 7443490d4cc6..5fe6d3dceb08 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -105,7 +105,6 @@ properties: - description: Whether the IPA clock is enabled (if valid) qcom,smem-state-names: - $ref: /schemas/types.yaml#/definitions/string-array description: The names of the state bits used for SMP2P output items: - const: ipa-clock-enabled-valid diff --git a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml index db1aa238cda5..b62c2431f94e 100644 --- a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml +++ b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml @@ -20,7 +20,7 @@ properties: maxItems: 1 phys: - $ref: /schemas/types.yaml#/definitions/phandle + maxItems: 1 description: phandle to the USB phy monitored-battery: diff --git a/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml b/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml index b4c190bddd84..61802a11baf4 100644 --- a/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml +++ b/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml @@ -49,7 +49,7 @@ properties: maxItems: 1 memory-region: - $ref: /schemas/types.yaml#/definitions/phandle + maxItems: 1 description: phandle to a node describing reserved memory (System RAM memory) The M core can't access all the DDR memory space on some platform, From 3c814519743a919f8b3c236c0565e24709806d66 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 May 2021 10:19:54 -0400 Subject: [PATCH 300/730] MAINTAINERS: net: remove stale website link The http://www.linuxfoundation.org/en/Net does not contain networking subsystem description ("Nothing found"). Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1d834bebf469..c1cb2e38ae2e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12709,7 +12709,6 @@ M: "David S. Miller" M: Jakub Kicinski L: netdev@vger.kernel.org S: Maintained -W: http://www.linuxfoundation.org/en/Net Q: https://patchwork.kernel.org/project/netdevbpf/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git @@ -12754,7 +12753,6 @@ M: "David S. Miller" M: Jakub Kicinski L: netdev@vger.kernel.org S: Maintained -W: http://www.linuxfoundation.org/en/Net Q: https://patchwork.kernel.org/project/netdevbpf/list/ B: mailto:netdev@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git From 35d96e631860226d5dc4de0fad0a415362ec2457 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 17 May 2021 16:13:35 +0200 Subject: [PATCH 301/730] bonding: init notify_work earlier to avoid uninitialized use If bond_kobj_init() or later kzalloc() in bond_alloc_slave() fail, then we call kobject_put() on the slave->kobj. This in turn calls the release function slave_kobj_release() which will always try to cancel_delayed_work_sync(&slave->notify_work), which shouldn't be done on an uninitialized work struct. Always initialize the work struct earlier to avoid problems here. Syzbot bisected this down to a completely pointless commit, some fault injection may have been at work here that caused the alloc failure in the first place, which may interact badly with bisect. Reported-by: syzbot+bfda097c12a00c8cae67@syzkaller.appspotmail.com Signed-off-by: Johannes Berg Acked-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 20bbda1b36e1..c5a646d06102 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1526,6 +1526,7 @@ static struct slave *bond_alloc_slave(struct bonding *bond, slave->bond = bond; slave->dev = slave_dev; + INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); if (bond_kobj_init(slave)) return NULL; @@ -1538,7 +1539,6 @@ static struct slave *bond_alloc_slave(struct bonding *bond, return NULL; } } - INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); return slave; } From 444d7be9532dcfda8e0385226c862fd7e986f607 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 17 May 2021 10:47:06 +0200 Subject: [PATCH 302/730] net/smc: remove device from smcd_dev_list after failed device_add() If the device_add() for a smcd_dev fails, there's no cleanup step that rolls back the earlier list_add(). The device subsequently gets freed, and we end up with a corrupted list. Add some error handling that removes the device from the list. Fixes: c6ba7c9ba43d ("net/smc: add base infrastructure for SMC-D and ISM") Signed-off-by: Julian Wiedmann Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_ism.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 9c6e95882553..d24b96ea0eb5 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -428,6 +428,8 @@ EXPORT_SYMBOL_GPL(smcd_alloc_dev); int smcd_register_dev(struct smcd_dev *smcd) { + int rc; + mutex_lock(&smcd_dev_list.mutex); if (list_empty(&smcd_dev_list.list)) { u8 *system_eid = NULL; @@ -447,7 +449,14 @@ int smcd_register_dev(struct smcd_dev *smcd) dev_name(&smcd->dev), smcd->pnetid, smcd->pnetid_by_user ? " (user defined)" : ""); - return device_add(&smcd->dev); + rc = device_add(&smcd->dev); + if (rc) { + mutex_lock(&smcd_dev_list.mutex); + list_del(&smcd->list); + mutex_unlock(&smcd_dev_list.mutex); + } + + return rc; } EXPORT_SYMBOL_GPL(smcd_register_dev); From 1d482e666b8e74c7555dbdfbfb77205eeed3ff2d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 17 May 2021 16:38:09 +0200 Subject: [PATCH 303/730] netlink: disable IRQs for netlink_lock_table() Syzbot reports that in mac80211 we have a potential deadlock between our "local->stop_queue_reasons_lock" (spinlock) and netlink's nl_table_lock (rwlock). This is because there's at least one situation in which we might try to send a netlink message with this spinlock held while it is also possible to take the spinlock from a hardirq context, resulting in the following deadlock scenario reported by lockdep: CPU0 CPU1 ---- ---- lock(nl_table_lock); local_irq_disable(); lock(&local->queue_stop_reason_lock); lock(nl_table_lock); lock(&local->queue_stop_reason_lock); This seems valid, we can take the queue_stop_reason_lock in any kind of context ("CPU0"), and call ieee80211_report_ack_skb() with the spinlock held and IRQs disabled ("CPU1") in some code path (ieee80211_do_stop() via ieee80211_free_txskb()). Short of disallowing netlink use in scenarios like these (which would be rather complex in mac80211's case due to the deep callchain), it seems the only fix for this is to disable IRQs while nl_table_lock is held to avoid hitting this scenario, this disallows the "CPU0" portion of the reported deadlock. Note that the writer side (netlink_table_grab()) already disables IRQs for this lock. Unfortunately though, this seems like a huge hammer, and maybe the whole netlink table locking should be reworked. Reported-by: syzbot+69ff9dff50dcfe14ddd4@syzkaller.appspotmail.com Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3a62f97acf39..6133e412b948 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -461,11 +461,13 @@ void netlink_table_ungrab(void) static inline void netlink_lock_table(void) { + unsigned long flags; + /* read_lock() synchronizes us to netlink_table_grab */ - read_lock(&nl_table_lock); + read_lock_irqsave(&nl_table_lock, flags); atomic_inc(&nl_table_users); - read_unlock(&nl_table_lock); + read_unlock_irqrestore(&nl_table_lock, flags); } static inline void From 5aec55b46c6238506cdf0c60cd0e42ab77a1e5e0 Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Mon, 17 May 2021 14:08:11 -0700 Subject: [PATCH 304/730] gve: Check TX QPL was actually assigned Correctly check the TX QPL was assigned and unassigned if other steps in the allocation fail. Fixes: f5cedc84a30d (gve: Add transmit and receive support) Signed-off-by: Catherine Sullivan Signed-off-by: David Awogbemila Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_tx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 6938f3a939d6..bb57c42872b4 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -212,10 +212,11 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) tx->dev = &priv->pdev->dev; if (!tx->raw_addressing) { tx->tx_fifo.qpl = gve_assign_tx_qpl(priv); - + if (!tx->tx_fifo.qpl) + goto abort_with_desc; /* map Tx FIFO */ if (gve_tx_fifo_init(priv, &tx->tx_fifo)) - goto abort_with_desc; + goto abort_with_qpl; } tx->q_resources = @@ -236,6 +237,9 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) abort_with_fifo: if (!tx->raw_addressing) gve_tx_fifo_release(priv, &tx->tx_fifo); +abort_with_qpl: + if (!tx->raw_addressing) + gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); abort_with_desc: dma_free_coherent(hdev, bytes, tx->desc, tx->bus); tx->desc = NULL; From e96b491a0ffa35a8a9607c193fa4d894ca9fb32f Mon Sep 17 00:00:00 2001 From: David Awogbemila Date: Mon, 17 May 2021 14:08:12 -0700 Subject: [PATCH 305/730] gve: Update mgmt_msix_idx if num_ntfy changes If we do not get the expected number of vectors from pci_enable_msix_range, we update priv->num_ntfy_blks but not priv->mgmt_msix_idx. This patch fixes this so that priv->mgmt_msix_idx is updated accordingly. Fixes: f5cedc84a30d ("gve: Add transmit and receive support") Signed-off-by: David Awogbemila Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 7302498c6df3..64192942ca53 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -220,6 +220,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) int vecs_left = new_num_ntfy_blks % 2; priv->num_ntfy_blks = new_num_ntfy_blks; + priv->mgmt_msix_idx = priv->num_ntfy_blks; priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, vecs_per_type); priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, From 5218e919c8d06279884aa0baf76778a6817d5b93 Mon Sep 17 00:00:00 2001 From: David Awogbemila Date: Mon, 17 May 2021 14:08:13 -0700 Subject: [PATCH 306/730] gve: Add NULL pointer checks when freeing irqs. When freeing notification blocks, we index priv->msix_vectors. If we failed to allocate priv->msix_vectors (see abort_with_msix_vectors) this could lead to a NULL pointer dereference if the driver is unloaded. Fixes: 893ce44df565 ("gve: Add basic driver framework for Compute Engine Virtual NIC") Signed-off-by: David Awogbemila Acked-by: Willem de Brujin Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 64192942ca53..21a5d058dab4 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -301,20 +301,22 @@ static void gve_free_notify_blocks(struct gve_priv *priv) { int i; - /* Free the irqs */ - for (i = 0; i < priv->num_ntfy_blks; i++) { - struct gve_notify_block *block = &priv->ntfy_blocks[i]; - int msix_idx = i; + if (priv->msix_vectors) { + /* Free the irqs */ + for (i = 0; i < priv->num_ntfy_blks; i++) { + struct gve_notify_block *block = &priv->ntfy_blocks[i]; + int msix_idx = i; - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, - NULL); - free_irq(priv->msix_vectors[msix_idx].vector, block); + irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, + NULL); + free_irq(priv->msix_vectors[msix_idx].vector, block); + } + free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); } dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks), priv->ntfy_blocks, priv->ntfy_block_bus); priv->ntfy_blocks = NULL; - free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); pci_disable_msix(priv->pdev); kvfree(priv->msix_vectors); priv->msix_vectors = NULL; From f81781835f0adfae8d701545386030d223efcd6f Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Mon, 17 May 2021 14:08:14 -0700 Subject: [PATCH 307/730] gve: Upgrade memory barrier in poll routine As currently written, if the driver checks for more work (via gve_tx_poll or gve_rx_poll) before the device posts work and the irq doorbell is not unmasked (via iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, ...)) before the device attempts to raise an interrupt, an interrupt is lost and this could potentially lead to the traffic being completely halted. For example, if a tx queue has already been stopped, the driver won't get the chance to complete work and egress will be halted. We need a full memory barrier in the poll routine to ensure that the irq doorbell is unmasked before the driver checks for more work. Fixes: f5cedc84a30d ("gve: Add transmit and receive support") Signed-off-by: Catherine Sullivan Signed-off-by: David Awogbemila Acked-by: Willem de Brujin Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 21a5d058dab4..bbc423e93122 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -180,7 +180,7 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) /* Double check we have no extra work. * Ensure unmask synchronizes with checking for work. */ - dma_rmb(); + mb(); if (block->tx) reschedule |= gve_tx_poll(block, -1); if (block->rx) From fbd4a28b4fa66faaa7f510c0adc531d37e0a7848 Mon Sep 17 00:00:00 2001 From: David Awogbemila Date: Mon, 17 May 2021 14:08:15 -0700 Subject: [PATCH 308/730] gve: Correct SKB queue index validation. SKBs with skb_get_queue_mapping(skb) == tx_cfg.num_queues should also be considered invalid. Fixes: f5cedc84a30d ("gve: Add transmit and receive support") Signed-off-by: David Awogbemila Acked-by: Willem de Brujin Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index bb57c42872b4..3e04a3973d68 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -593,7 +593,7 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) struct gve_tx_ring *tx; int nsegs; - WARN(skb_get_queue_mapping(skb) > priv->tx_cfg.num_queues, + WARN(skb_get_queue_mapping(skb) >= priv->tx_cfg.num_queues, "skb queue index out of range"); tx = &priv->tx[skb_get_queue_mapping(skb)]; if (unlikely(gve_maybe_stop_tx(tx, skb))) { From 1dde47a66d4fb181830d6fa000e5ea86907b639e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 May 2021 12:04:13 +0300 Subject: [PATCH 309/730] net: mdiobus: get rid of a BUG_ON() We spotted a bug recently during a review where a driver was unregistering a bus that wasn't registered, which would trigger this BUG_ON(). Let's handle that situation more gracefully, and just print a warning and return. Reported-by: Russell King (Oracle) Signed-off-by: Dan Carpenter Reviewed-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index dadf75ff3ab9..6045ad3def12 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -607,7 +607,8 @@ void mdiobus_unregister(struct mii_bus *bus) struct mdio_device *mdiodev; int i; - BUG_ON(bus->state != MDIOBUS_REGISTERED); + if (WARN_ON_ONCE(bus->state != MDIOBUS_REGISTERED)) + return; bus->state = MDIOBUS_UNREGISTERED; for (i = 0; i < PHY_MAX_ADDR; i++) { From c67d734975a25ba7b6e8f820c13e0d8eb4a2a77c Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Thu, 29 Apr 2021 20:57:59 +0200 Subject: [PATCH 310/730] perf buildid-list: Initialize zstd_data Fixes segmentation fault when trying to obtain buildid list (e.g. via perf-archive) from a zstd-compressed `perf.data` file: ``` $ perf record -z ls ... [ perf record: Captured and wrote 0,010 MB perf.data, compressed (original 0,001 MB, ratio is 2,190) ] $ memcheck perf buildid-list ... ==57268== Invalid read of size 4 ==57268== at 0x5260D88: ZSTD_decompressStream (in /usr/lib/libzstd.so.1.4.9) ==57268== by 0x4BB51B: zstd_decompress_stream (zstd.c:100) ==57268== by 0x425C6C: perf_session__process_compressed_event (session.c:73) ==57268== by 0x427450: perf_session__process_user_event (session.c:1631) ==57268== by 0x42A609: reader__process_events (session.c:2207) ==57268== by 0x42A609: __perf_session__process_events (session.c:2264) ==57268== by 0x42A609: perf_session__process_events (session.c:2297) ==57268== by 0x343A62: perf_session__list_build_ids (builtin-buildid-list.c:88) ==57268== by 0x343A62: cmd_buildid_list (builtin-buildid-list.c:120) ==57268== by 0x3C7732: run_builtin (perf.c:313) ==57268== by 0x331157: handle_internal_command (perf.c:365) ==57268== by 0x331157: run_argv (perf.c:409) ==57268== by 0x331157: main (perf.c:539) ==57268== Address 0x7470 is not stack'd, malloc'd or (recently) free'd ``` Signed-off-by: Milian Wolff Cc: Alexey Budankov Link: http://lore.kernel.org/lkml/20210429185759.59870-1-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-buildid-list.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 87f5b1a4a7fa..833405c27dae 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -80,6 +80,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits) if (!perf_header__has_feat(&session->header, HEADER_BUILD_ID)) with_hits = true; + if (zstd_init(&(session->zstd_data), 0) < 0) + pr_warning("Decompression initialization failed. Reported data may be incomplete.\n"); + /* * in pipe-mode, the only way to get the buildids is to parse * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID From 3c91e8efaf4838e4c8e465656e9707b5de26f3db Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 May 2021 12:35:07 -0300 Subject: [PATCH 311/730] tools arch kvm: Sync kvm headers with the kernel sources To pick up the changes from: 70f094f4f01dc4d6 ("KVM: nVMX: Properly pad 'struct kvm_vmx_nested_state_hdr'") That don't entail changes in tooling. This silences these tools/perf build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h' diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Cc: Paolo Bonzini Cc: Vitaly Kuznetsov Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 5a3022c8af82..0662f644aad9 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr { __u16 flags; } smm; + __u16 pad; + __u32 flags; __u64 preemption_timer_deadline; }; From fea63d54f7a3e74f8ab489a8b82413a29849a594 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 May 2021 12:42:32 -0500 Subject: [PATCH 312/730] x86/sev-es: Move sev_es_put_ghcb() in prep for follow on patch Move the location of sev_es_put_ghcb() in preparation for an update to it in a follow-on patch. This will better highlight the changes being made to the function. No functional change. Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/8c07662ec17d3d82e5c53841a1d9e766d3bdbab6.1621273353.git.thomas.lendacky@amd.com --- arch/x86/kernel/sev.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 9578c82832aa..45e212675811 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -221,24 +221,6 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) return ghcb; } -static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) -{ - struct sev_es_runtime_data *data; - struct ghcb *ghcb; - - data = this_cpu_read(runtime_data); - ghcb = &data->ghcb_page; - - if (state->ghcb) { - /* Restore GHCB from Backup */ - *ghcb = *state->ghcb; - data->backup_ghcb_active = false; - state->ghcb = NULL; - } else { - data->ghcb_active = false; - } -} - /* Needed in vc_early_forward_exception */ void do_early_exception(struct pt_regs *regs, int trapnr); @@ -461,6 +443,24 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt /* Include code shared with pre-decompression boot stage */ #include "sev-shared.c" +static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) +{ + struct sev_es_runtime_data *data; + struct ghcb *ghcb; + + data = this_cpu_read(runtime_data); + ghcb = &data->ghcb_page; + + if (state->ghcb) { + /* Restore GHCB from Backup */ + *ghcb = *state->ghcb; + data->backup_ghcb_active = false; + state->ghcb = NULL; + } else { + data->ghcb_active = false; + } +} + void noinstr __sev_es_nmi_complete(void) { struct ghcb_state state; From a50c5bebc99c525e7fbc059988c6a5ab8680cb76 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 May 2021 12:42:33 -0500 Subject: [PATCH 313/730] x86/sev-es: Invalidate the GHCB after completing VMGEXIT Since the VMGEXIT instruction can be issued from userspace, invalidate the GHCB after performing VMGEXIT processing in the kernel. Invalidation is only required after userspace is available, so call vc_ghcb_invalidate() from sev_es_put_ghcb(). Update vc_ghcb_invalidate() to additionally clear the GHCB exit code so that it is always presented as 0 when VMGEXIT has been issued by anything else besides the kernel. Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/5a8130462e4f0057ee1184509cd056eedd78742b.1621273353.git.thomas.lendacky@amd.com --- arch/x86/kernel/sev-shared.c | 1 + arch/x86/kernel/sev.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 6ec8b3bfd76e..9f90f460a28c 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -63,6 +63,7 @@ static bool sev_es_negotiate_protocol(void) static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) { + ghcb->save.sw_exit_code = 0; memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 45e212675811..4fa111becc93 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -457,6 +457,11 @@ static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) data->backup_ghcb_active = false; state->ghcb = NULL; } else { + /* + * Invalidate the GHCB so a VMGEXIT instruction issued + * from userspace won't appear to be valid. + */ + vc_ghcb_invalidate(ghcb); data->ghcb_active = false; } } From 9f079c1bdc9087842dc5ac9d81b1d7f2578e81ce Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 18 May 2021 10:25:10 +0900 Subject: [PATCH 314/730] ALSA: dice: disable double_pcm_frames mode for M-Audio Profire 610, 2626 and Avid M-Box 3 Pro ALSA dice driver detects jumbo payload at high sampling transfer frequency for below models: * Avid M-Box 3 Pro * M-Audio Profire 610 * M-Audio Profire 2626 Although many DICE-based devices have a quirk at high sampling transfer frequency to multiplex double number of PCM frames into data block than the number in IEC 61883-1/6, the above devices are just compliant to IEC 61883-1/6. This commit disables the mode of double_pcm_frames for the models. Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20210518012510.37126-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/dice/dice-pcm.c | 4 ++-- sound/firewire/dice/dice-stream.c | 2 +- sound/firewire/dice/dice.c | 24 ++++++++++++++++++++++++ sound/firewire/dice/dice.h | 3 ++- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/sound/firewire/dice/dice-pcm.c b/sound/firewire/dice/dice-pcm.c index af8a90ee40f3..a69ca1111b03 100644 --- a/sound/firewire/dice/dice-pcm.c +++ b/sound/firewire/dice/dice-pcm.c @@ -218,7 +218,7 @@ static int pcm_open(struct snd_pcm_substream *substream) if (frames_per_period > 0) { // For double_pcm_frame quirk. - if (rate > 96000) { + if (rate > 96000 && !dice->disable_double_pcm_frames) { frames_per_period *= 2; frames_per_buffer *= 2; } @@ -273,7 +273,7 @@ static int pcm_hw_params(struct snd_pcm_substream *substream, mutex_lock(&dice->mutex); // For double_pcm_frame quirk. - if (rate > 96000) { + if (rate > 96000 && !dice->disable_double_pcm_frames) { events_per_period /= 2; events_per_buffer /= 2; } diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c index 1a14c083e8ce..c4dfe76500c2 100644 --- a/sound/firewire/dice/dice-stream.c +++ b/sound/firewire/dice/dice-stream.c @@ -181,7 +181,7 @@ static int keep_resources(struct snd_dice *dice, struct amdtp_stream *stream, // as 'Dual Wire'. // For this quirk, blocking mode is required and PCM buffer size should // be aligned to SYT_INTERVAL. - double_pcm_frames = rate > 96000; + double_pcm_frames = (rate > 96000 && !dice->disable_double_pcm_frames); if (double_pcm_frames) { rate /= 2; pcm_chs *= 2; diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c index 107a81691f0e..239d164b0eea 100644 --- a/sound/firewire/dice/dice.c +++ b/sound/firewire/dice/dice.c @@ -21,6 +21,7 @@ MODULE_LICENSE("GPL v2"); #define OUI_SSL 0x0050c2 // Actually ID reserved by IEEE. #define OUI_PRESONUS 0x000a92 #define OUI_HARMAN 0x000fd7 +#define OUI_AVID 0x00a07e #define DICE_CATEGORY_ID 0x04 #define WEISS_CATEGORY_ID 0x00 @@ -222,6 +223,14 @@ static int dice_probe(struct fw_unit *unit, (snd_dice_detect_formats_t)entry->driver_data; } + // Below models are compliant to IEC 61883-1/6 and have no quirk at high sampling transfer + // frequency. + // * Avid M-Box 3 Pro + // * M-Audio Profire 610 + // * M-Audio Profire 2626 + if (entry->vendor_id == OUI_MAUDIO || entry->vendor_id == OUI_AVID) + dice->disable_double_pcm_frames = true; + spin_lock_init(&dice->lock); mutex_init(&dice->mutex); init_completion(&dice->clock_accepted); @@ -278,7 +287,22 @@ static void dice_bus_reset(struct fw_unit *unit) #define DICE_INTERFACE 0x000001 +#define DICE_DEV_ENTRY_TYPICAL(vendor, model, data) \ + { \ + .match_flags = IEEE1394_MATCH_VENDOR_ID | \ + IEEE1394_MATCH_MODEL_ID | \ + IEEE1394_MATCH_SPECIFIER_ID | \ + IEEE1394_MATCH_VERSION, \ + .vendor_id = (vendor), \ + .model_id = (model), \ + .specifier_id = (vendor), \ + .version = DICE_INTERFACE, \ + .driver_data = (kernel_ulong_t)(data), \ + } + static const struct ieee1394_device_id dice_id_table[] = { + // Avid M-Box 3 Pro. To match in probe function. + DICE_DEV_ENTRY_TYPICAL(OUI_AVID, 0x000004, snd_dice_detect_extension_formats), /* M-Audio Profire 2626 has a different value in version field. */ { .match_flags = IEEE1394_MATCH_VENDOR_ID | diff --git a/sound/firewire/dice/dice.h b/sound/firewire/dice/dice.h index adc6f7c84460..3c967d1b3605 100644 --- a/sound/firewire/dice/dice.h +++ b/sound/firewire/dice/dice.h @@ -109,7 +109,8 @@ struct snd_dice { struct fw_iso_resources rx_resources[MAX_STREAMS]; struct amdtp_stream tx_stream[MAX_STREAMS]; struct amdtp_stream rx_stream[MAX_STREAMS]; - bool global_enabled; + bool global_enabled:1; + bool disable_double_pcm_frames:1; struct completion clock_accepted; unsigned int substreams_counter; From 4c6fe8c547e3c9e8c15dabdd23c569ee0df3adb1 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 18 May 2021 10:26:12 +0900 Subject: [PATCH 315/730] ALSA: dice: fix stream format for TC Electronic Konnekt Live at high sampling transfer frequency At high sampling transfer frequency, TC Electronic Konnekt Live transfers/receives 6 audio data frames in multi bit linear audio data channel of data block in CIP payload. Current hard-coded stream format is wrong. Cc: Fixes: f1f0f330b1d0 ("ALSA: dice: add parameters of stream formats for models produced by TC Electronic") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20210518012612.37268-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/dice/dice-tcelectronic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/firewire/dice/dice-tcelectronic.c b/sound/firewire/dice/dice-tcelectronic.c index a8875d24ba2a..43a3bcb15b3d 100644 --- a/sound/firewire/dice/dice-tcelectronic.c +++ b/sound/firewire/dice/dice-tcelectronic.c @@ -38,8 +38,8 @@ static const struct dice_tc_spec konnekt_24d = { }; static const struct dice_tc_spec konnekt_live = { - .tx_pcm_chs = {{16, 16, 16}, {0, 0, 0} }, - .rx_pcm_chs = {{16, 16, 16}, {0, 0, 0} }, + .tx_pcm_chs = {{16, 16, 6}, {0, 0, 0} }, + .rx_pcm_chs = {{16, 16, 6}, {0, 0, 0} }, .has_midi = true, }; From d6177a6556f853785867e2ec6d5b7f4906f0d809 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 22 Apr 2021 11:42:19 +0200 Subject: [PATCH 316/730] iommu/amd: Clear DMA ops when switching domain Since commit 08a27c1c3ecf ("iommu: Add support to change default domain of an iommu group") a user can switch a device between IOMMU and direct DMA through sysfs. This doesn't work for AMD IOMMU at the moment because dev->dma_ops is not cleared when switching from a DMA to an identity IOMMU domain. The DMA layer thus attempts to use the dma-iommu ops on an identity domain, causing an oops: # echo 0000:00:05.0 > /sys/sys/bus/pci/drivers/e1000e/unbind # echo identity > /sys/bus/pci/devices/0000:00:05.0/iommu_group/type # echo 0000:00:05.0 > /sys/sys/bus/pci/drivers/e1000e/bind ... BUG: kernel NULL pointer dereference, address: 0000000000000028 ... Call Trace: iommu_dma_alloc e1000e_setup_tx_resources e1000e_open Since iommu_change_dev_def_domain() calls probe_finalize() again, clear the dma_ops there like Vt-d does. Fixes: 08a27c1c3ecf ("iommu: Add support to change default domain of an iommu group") Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210422094216.2282097-1-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 80e8e1916dd1..67da96d5b3c2 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1714,6 +1714,8 @@ static void amd_iommu_probe_finalize(struct device *dev) domain = iommu_get_domain_for_dev(dev); if (domain->type == IOMMU_DOMAIN_DMA) iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0); + else + set_dma_ops(dev, NULL); } static void amd_iommu_release_device(struct device *dev) From a017c567915fd7a017006f8c210e2c6b30ab6fad Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sat, 1 May 2021 23:59:56 -0700 Subject: [PATCH 317/730] iommu/amd: Fix wrong parentheses on page-specific invalidations The logic to determine the mask of page-specific invalidations was tested in userspace. As the code was copied into the kernel, the parentheses were mistakenly set in the wrong place, resulting in the wrong mask. Fix it. Cc: Joerg Roedel Cc: Will Deacon Cc: Jiajun Cao Cc: iommu@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org Fixes: 268aa4548277 ("iommu/amd: Page-specific invalidations for more than one page") Signed-off-by: Nadav Amit Link: https://lore.kernel.org/r/20210502070001.1559127-2-namit@vmware.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 67da96d5b3c2..3ac42bbdefc6 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -884,7 +884,7 @@ static inline u64 build_inv_address(u64 address, size_t size) * The msb-bit must be clear on the address. Just set all the * lower bits. */ - address |= 1ull << (msb_diff - 1); + address |= (1ull << msb_diff) - 1; } /* Clear bits 11:0 */ From 382d91fc0f4f1b13f8a0dcbf7145f4f175b71a18 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Sat, 8 May 2021 11:14:51 +0800 Subject: [PATCH 318/730] iommu/virtio: Add missing MODULE_DEVICE_TABLE This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module. Reported-by: Hulk Robot Signed-off-by: Bixuan Cui Fixes: fa4afd78ea12 ("iommu/virtio: Build virtio-iommu as module") Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210508031451.53493-1-cuibixuan@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/virtio-iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 7c02481a81b4..c6e5ee4d9cef 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1136,6 +1136,7 @@ static struct virtio_device_id id_table[] = { { VIRTIO_ID_IOMMU, VIRTIO_DEV_ANY_ID }, { 0 }, }; +MODULE_DEVICE_TABLE(virtio, id_table); static struct virtio_driver virtio_iommu_drv = { .driver.name = KBUILD_MODNAME, From 76d0fc5e9bc650766a90cc3ffd2a29248df0f020 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 18 May 2021 10:08:37 +0100 Subject: [PATCH 319/730] arm64: Fix stale link in the arch_counter_enforce_ordering() comment With infradead.org archives gone, update the link to lore.kernel.org as these links are deemed stable. Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/barrier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 2175ec0004ed..451e11e5fd23 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -74,7 +74,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx, * This insanity brought to you by speculative system register reads, * out-of-order memory accesses, sequence locks and Thomas Gleixner. * - * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + * https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/ */ #define arch_counter_enforce_ordering(val) do { \ u64 tmp, _val = (val); \ From 3317c26a4b413b41364f2c4b83c778c6aba1576d Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 30 Apr 2021 13:22:46 +0800 Subject: [PATCH 320/730] perf/x86: Avoid touching LBR_TOS MSR for Arch LBR The Architecture LBR does not have MSR_LBR_TOS (0x000001c9). In a guest that should support Architecture LBR, check_msr() will be a non-related check for the architecture MSR 0x0 (IA32_P5_MC_ADDR) that is also not supported by KVM. The failure will cause x86_pmu.lbr_nr = 0, thereby preventing the initialization of the guest Arch LBR. Fix it by avoiding this extraneous check in intel_pmu_init() for Arch LBR. Fixes: 47125db27e47 ("perf/x86/intel/lbr: Support Architectural LBR") Signed-off-by: Like Xu [peterz: simpler still] Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210430052247.3079672-1-like.xu@linux.intel.com --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2521d03de5e0..e28892270c58 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6253,7 +6253,7 @@ __init int intel_pmu_init(void) * Check all LBT MSR here. * Disable LBR access if any LBR MSRs can not be accessed. */ - if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) + if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL)) x86_pmu.lbr_nr = 0; for (i = 0; i < x86_pmu.lbr_nr; i++) { if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && From 488e13a489e9707a7e81e1991fdd1f20c0f04689 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 30 Apr 2021 13:22:47 +0800 Subject: [PATCH 321/730] perf/x86/lbr: Remove cpuc->lbr_xsave allocation from atomic context If the kernel is compiled with the CONFIG_LOCKDEP option, the conditional might_sleep_if() deep in kmem_cache_alloc() will generate the following trace, and potentially cause a deadlock when another LBR event is added: [] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:196 [] Call Trace: [] kmem_cache_alloc+0x36/0x250 [] intel_pmu_lbr_add+0x152/0x170 [] x86_pmu_add+0x83/0xd0 Make it symmetric with the release_lbr_buffers() call and mirror the existing DS buffers. Fixes: c085fb8774 ("perf/x86/intel/lbr: Support XSAVES for arch LBR read") Signed-off-by: Like Xu [peterz: simplified] Signed-off-by: Peter Zijlstra (Intel) Tested-by: Kan Liang Link: https://lkml.kernel.org/r/20210430052247.3079672-2-like.xu@linux.intel.com --- arch/x86/events/core.c | 6 ++++-- arch/x86/events/intel/lbr.c | 26 ++++++++++++++++++++------ arch/x86/events/perf_event.h | 6 ++++++ 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8e509325c2c3..8f71dd72ef95 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -396,10 +396,12 @@ int x86_reserve_hardware(void) if (!atomic_inc_not_zero(&pmc_refcount)) { mutex_lock(&pmc_reserve_mutex); if (atomic_read(&pmc_refcount) == 0) { - if (!reserve_pmc_hardware()) + if (!reserve_pmc_hardware()) { err = -EBUSY; - else + } else { reserve_ds_buffers(); + reserve_lbr_buffers(); + } } if (!err) atomic_inc(&pmc_refcount); diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 76dbab6ac9fb..4409d2cccfda 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -658,7 +658,6 @@ static inline bool branch_user_callstack(unsigned br_sel) void intel_pmu_lbr_add(struct perf_event *event) { - struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu.lbr_nr) @@ -696,11 +695,6 @@ void intel_pmu_lbr_add(struct perf_event *event) perf_sched_cb_inc(event->ctx->pmu); if (!cpuc->lbr_users++ && !event->total_time_running) intel_pmu_lbr_reset(); - - if (static_cpu_has(X86_FEATURE_ARCH_LBR) && - kmem_cache && !cpuc->lbr_xsave && - (cpuc->lbr_users != cpuc->lbr_pebs_users)) - cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL); } void release_lbr_buffers(void) @@ -722,6 +716,26 @@ void release_lbr_buffers(void) } } +void reserve_lbr_buffers(void) +{ + struct kmem_cache *kmem_cache; + struct cpu_hw_events *cpuc; + int cpu; + + if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) + return; + + for_each_possible_cpu(cpu) { + cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; + if (!kmem_cache || cpuc->lbr_xsave) + continue; + + cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, GFP_KERNEL, + cpu_to_node(cpu)); + } +} + void intel_pmu_lbr_del(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 27fa85e7d4fd..ad87cb36f7c8 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1244,6 +1244,8 @@ void reserve_ds_buffers(void); void release_lbr_buffers(void); +void reserve_lbr_buffers(void); + extern struct event_constraint bts_constraint; extern struct event_constraint vlbr_constraint; @@ -1393,6 +1395,10 @@ static inline void release_lbr_buffers(void) { } +static inline void reserve_lbr_buffers(void) +{ +} + static inline int intel_pmu_init(void) { return 0; From 89e70d5c583c55088faa2201d397ee30a15704aa Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 12 May 2021 20:09:37 +0800 Subject: [PATCH 322/730] locking/lockdep: Correct calling tracepoints The commit eb1f00237aca ("lockdep,trace: Expose tracepoints") reverses tracepoints for lock_contended() and lock_acquired(), thus the ftrace log shows the wrong locking sequence that "acquired" event is prior to "contended" event: -0 [001] d.s3 20803.501685: lock_acquire: 0000000008b91ab4 &sg_policy->update_lock -0 [001] d.s3 20803.501686: lock_acquired: 0000000008b91ab4 &sg_policy->update_lock -0 [001] d.s3 20803.501689: lock_contended: 0000000008b91ab4 &sg_policy->update_lock -0 [001] d.s3 20803.501690: lock_release: 0000000008b91ab4 &sg_policy->update_lock This patch fixes calling tracepoints for lock_contended() and lock_acquired(). Fixes: eb1f00237aca ("lockdep,trace: Expose tracepoints") Signed-off-by: Leo Yan Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210512120937.90211-1-leo.yan@linaro.org --- kernel/locking/lockdep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 48d736aa03b2..7641bd407239 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -5736,7 +5736,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - trace_lock_acquired(lock, ip); + trace_lock_contended(lock, ip); if (unlikely(!lock_stat || !lockdep_enabled())) return; @@ -5754,7 +5754,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - trace_lock_contended(lock, ip); + trace_lock_acquired(lock, ip); if (unlikely(!lock_stat || !lockdep_enabled())) return; From 3a010c493271f04578b133de977e0e5dd2848cea Mon Sep 17 00:00:00 2001 From: Zqiang Date: Mon, 17 May 2021 11:40:05 +0800 Subject: [PATCH 323/730] locking/mutex: clear MUTEX_FLAGS if wait_list is empty due to signal When a interruptible mutex locker is interrupted by a signal without acquiring this lock and removed from the wait queue. if the mutex isn't contended enough to have a waiter put into the wait queue again, the setting of the WAITER bit will force mutex locker to go into the slowpath to acquire the lock every time, so if the wait queue is empty, the WAITER bit need to be clear. Fixes: 040a0a371005 ("mutex: Add support for wound/wait style locks") Suggested-by: Peter Zijlstra Signed-off-by: Zqiang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210517034005.30828-1-qiang.zhang@windriver.com --- kernel/locking/mutex-debug.c | 4 ++-- kernel/locking/mutex-debug.h | 2 +- kernel/locking/mutex.c | 18 +++++++++++++----- kernel/locking/mutex.h | 4 +--- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index a7276aaf2abc..db9301591e3f 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -57,7 +57,7 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, task->blocked_on = waiter; } -void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, +void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct task_struct *task) { DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); @@ -65,7 +65,7 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter); task->blocked_on = NULL; - list_del_init(&waiter->list); + INIT_LIST_HEAD(&waiter->list); waiter->task = NULL; } diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h index 1edd3f45a4ec..53e631e1d76d 100644 --- a/kernel/locking/mutex-debug.h +++ b/kernel/locking/mutex-debug.h @@ -22,7 +22,7 @@ extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); extern void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct task_struct *task); -extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, +extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct task_struct *task); extern void debug_mutex_unlock(struct mutex *lock); extern void debug_mutex_init(struct mutex *lock, const char *name, diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index cb6b112ce155..013e1b08a1bf 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -194,7 +194,7 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait * Add @waiter to a given location in the lock wait_list and set the * FLAG_WAITERS flag if it's the first waiter. */ -static void __sched +static void __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct list_head *list) { @@ -205,6 +205,16 @@ __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); } +static void +__mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter) +{ + list_del(&waiter->list); + if (likely(list_empty(&lock->wait_list))) + __mutex_clear_flag(lock, MUTEX_FLAGS); + + debug_mutex_remove_waiter(lock, waiter, current); +} + /* * Give up ownership to a specific task, when @task = NULL, this is equivalent * to a regular unlock. Sets PICKUP on a handoff, clears HANDOFF, preserves @@ -1061,9 +1071,7 @@ acquired: __ww_mutex_check_waiters(lock, ww_ctx); } - mutex_remove_waiter(lock, &waiter, current); - if (likely(list_empty(&lock->wait_list))) - __mutex_clear_flag(lock, MUTEX_FLAGS); + __mutex_remove_waiter(lock, &waiter); debug_mutex_free_waiter(&waiter); @@ -1080,7 +1088,7 @@ skip_wait: err: __set_current_state(TASK_RUNNING); - mutex_remove_waiter(lock, &waiter, current); + __mutex_remove_waiter(lock, &waiter); err_early_kill: spin_unlock(&lock->wait_lock); debug_mutex_free_waiter(&waiter); diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 1c2287d3fa71..f0c710b1d192 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -10,12 +10,10 @@ * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: */ -#define mutex_remove_waiter(lock, waiter, task) \ - __list_del((waiter)->list.prev, (waiter)->list.next) - #define debug_mutex_wake_waiter(lock, waiter) do { } while (0) #define debug_mutex_free_waiter(waiter) do { } while (0) #define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) +#define debug_mutex_remove_waiter(lock, waiter, ti) do { } while (0) #define debug_mutex_unlock(lock) do { } while (0) #define debug_mutex_init(lock, name, key) do { } while (0) From cdf112d4c65f83065793b73b49363123517fdb71 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 May 2021 23:31:14 +0200 Subject: [PATCH 324/730] ASoC: fsl: fix SND_SOC_IMX_RPMSG dependency Kconfig produces a warning with SND_SOC_FSL_RPMSG=y and SND_IMX_SOC=m: WARNING: unmet direct dependencies detected for SND_SOC_IMX_RPMSG Depends on [m]: SOUND [=y] && !UML && SND [=y] && SND_SOC [=y] && SND_IMX_SOC [=m] && RPMSG [=y] Selected by [y]: - SND_SOC_FSL_RPMSG [=y] && SOUND [=y] && !UML && SND [=y] && SND_SOC [=y] && COMMON_CLK [=y] && RPMSG [=y] && SND_IMX_SOC [=m]!=n Add a dependency to prevent this configuration. Signed-off-by: Arnd Bergmann Acked-by: Shengjiu Wang Link: https://lore.kernel.org/r/20210514213118.630427-1-arnd@kernel.org Signed-off-by: Mark Brown --- sound/soc/fsl/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index 0917d65d6921..556c284f49dd 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -119,6 +119,7 @@ config SND_SOC_FSL_RPMSG tristate "NXP Audio Base On RPMSG support" depends on COMMON_CLK depends on RPMSG + depends on SND_IMX_SOC || SND_IMX_SOC = n select SND_SOC_IMX_RPMSG if SND_IMX_SOC != n help Say Y if you want to add rpmsg audio support for the Freescale CPUs. From 0b07154f066ab2c087c342b372be5771145bdc60 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 17 May 2021 17:39:46 +0200 Subject: [PATCH 325/730] dt-bindings: spi: spi-mux: rename flash node The recent conversion of the common MTD properties to YAML now mandates a particular node name for SPI flash devices. Reported-by: Rob Herring Signed-off-by: Michael Walle Acked-by: Rob Herring Link: https://lore.kernel.org/r/20210517153946.9502-1-michael@walle.cc Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-mux.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/spi/spi-mux.yaml b/Documentation/devicetree/bindings/spi/spi-mux.yaml index 6c21a132b51f..c2c007260582 100644 --- a/Documentation/devicetree/bindings/spi/spi-mux.yaml +++ b/Documentation/devicetree/bindings/spi/spi-mux.yaml @@ -72,7 +72,7 @@ examples: mux-controls = <&mux>; - spi-flash@0 { + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; #address-cells = <1>; From d7aed20d446d8c87f5e13adf73281056b0064a45 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Wed, 12 May 2021 07:20:42 +0200 Subject: [PATCH 326/730] MAINTAINERS: Add Alain Volmat as STM32 SPI maintainer Add Alain Volmat as STM32 SPI maintainer. Signed-off-by: Alain Volmat Reviewed-by: Amelie Delaunay Link: https://lore.kernel.org/r/1620796842-23546-1-git-send-email-alain.volmat@foss.st.com Signed-off-by: Mark Brown --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1c9827207949..528068e31845 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16954,6 +16954,12 @@ L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-stm32* +ST STM32 SPI DRIVER +M: Alain Volmat +L: linux-spi@vger.kernel.org +S: Maintained +F: drivers/spi/spi-stm32.c + ST VL53L0X ToF RANGER(I2C) IIO DRIVER M: Song Qiang L: linux-iio@vger.kernel.org From d37316b72e8bf95a52d1c3e93c823d128c09b521 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Tue, 18 May 2021 16:45:14 +0930 Subject: [PATCH 327/730] ARM: npcm: wpcm450: select interrupt controller driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The interrupt controller driver is necessary in order to have a functioning Linux system on WPCM450. Select it in mach-npcm/Kconfig. Fixes: ece3fe93e8f4 ("ARM: npcm: Introduce Nuvoton WPCM450 SoC") Signed-off-by: Jonathan Neuschäfer Signed-off-by: Joel Stanley Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20210513165627.1767093-1-j.neuschaefer@gmx.net Link: https://lore.kernel.org/r/20210518071514.604492-1-joel@jms.id.au' Signed-off-by: Arnd Bergmann --- arch/arm/mach-npcm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-npcm/Kconfig b/arch/arm/mach-npcm/Kconfig index 658c8efb4ca1..a71cf1d189ae 100644 --- a/arch/arm/mach-npcm/Kconfig +++ b/arch/arm/mach-npcm/Kconfig @@ -10,6 +10,7 @@ config ARCH_WPCM450 bool "Support for WPCM450 BMC (Hermon)" depends on ARCH_MULTI_V5 select CPU_ARM926T + select WPCM450_AIC select NPCM7XX_TIMER help General support for WPCM450 BMC (Hermon). From 5881fa8dc2de9697a89451f6518e8b3a796c09c6 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 7 May 2021 14:53:04 +0200 Subject: [PATCH 328/730] debugfs: fix security_locked_down() call for SELinux When (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) is zero, then the SELinux implementation of the locked_down hook might report a denial even though the operation would actually be allowed. To fix this, make sure that security_locked_down() is called only when the return value will be taken into account (i.e. when changing one of the problematic attributes). Note: this was introduced by commit 5496197f9b08 ("debugfs: Restrict debugfs when the kernel is locked down"), but it didn't matter at that time, as the SELinux support came in later. Fixes: 59438b46471a ("security,lockdown,selinux: implement SELinux lockdown") Cc: stable Signed-off-by: Ondrej Mosnacek Link: https://lore.kernel.org/r/20210507125304.144394-1-omosnace@redhat.com Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 1d252164d97b..8129a430d789 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -45,10 +45,13 @@ static unsigned int debugfs_allow __ro_after_init = DEFAULT_DEBUGFS_ALLOW_BITS; static int debugfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *ia) { - int ret = security_locked_down(LOCKDOWN_DEBUGFS); + int ret; - if (ret && (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))) - return ret; + if (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) { + ret = security_locked_down(LOCKDOWN_DEBUGFS); + if (ret) + return ret; + } return simple_setattr(&init_user_ns, dentry, ia); } From 889d916b6f8a48b8c9489fffcad3b78eedd01a51 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 11 May 2021 08:48:28 +0300 Subject: [PATCH 329/730] RDMA/core: Don't access cm_id after its destruction restrack should only be attached to a cm_id while the ID has a valid device pointer. It is set up when the device is first loaded, but not cleared when the device is removed. There is also two copies of the device pointer, one private and one in the public API, and these were left out of sync. Make everything go to NULL together and manipulate restrack right around the device assignments. Found by syzcaller: BUG: KASAN: wild-memory-access in __list_del include/linux/list.h:112 [inline] BUG: KASAN: wild-memory-access in __list_del_entry include/linux/list.h:135 [inline] BUG: KASAN: wild-memory-access in list_del include/linux/list.h:146 [inline] BUG: KASAN: wild-memory-access in cma_cancel_listens drivers/infiniband/core/cma.c:1767 [inline] BUG: KASAN: wild-memory-access in cma_cancel_operation drivers/infiniband/core/cma.c:1795 [inline] BUG: KASAN: wild-memory-access in cma_cancel_operation+0x1f4/0x4b0 drivers/infiniband/core/cma.c:1783 Write of size 8 at addr dead000000000108 by task syz-executor716/334 CPU: 0 PID: 334 Comm: syz-executor716 Not tainted 5.11.0+ #271 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0xbe/0xf9 lib/dump_stack.c:120 __kasan_report mm/kasan/report.c:400 [inline] kasan_report.cold+0x5f/0xd5 mm/kasan/report.c:413 __list_del include/linux/list.h:112 [inline] __list_del_entry include/linux/list.h:135 [inline] list_del include/linux/list.h:146 [inline] cma_cancel_listens drivers/infiniband/core/cma.c:1767 [inline] cma_cancel_operation drivers/infiniband/core/cma.c:1795 [inline] cma_cancel_operation+0x1f4/0x4b0 drivers/infiniband/core/cma.c:1783 _destroy_id+0x29/0x460 drivers/infiniband/core/cma.c:1862 ucma_close_id+0x36/0x50 drivers/infiniband/core/ucma.c:185 ucma_destroy_private_ctx+0x58d/0x5b0 drivers/infiniband/core/ucma.c:576 ucma_close+0x91/0xd0 drivers/infiniband/core/ucma.c:1797 __fput+0x169/0x540 fs/file_table.c:280 task_work_run+0xb7/0x100 kernel/task_work.c:140 exit_task_work include/linux/task_work.h:30 [inline] do_exit+0x7da/0x17f0 kernel/exit.c:825 do_group_exit+0x9e/0x190 kernel/exit.c:922 __do_sys_exit_group kernel/exit.c:933 [inline] __se_sys_exit_group kernel/exit.c:931 [inline] __x64_sys_exit_group+0x2d/0x30 kernel/exit.c:931 do_syscall_64+0x2d/0x40 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 255d0c14b375 ("RDMA/cma: rdma_bind_addr() leaks a cma_dev reference count") Link: https://lore.kernel.org/r/3352ee288fe34f2b44220457a29bfc0548686363.1620711734.git.leonro@nvidia.com Signed-off-by: Shay Drory Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2b9ffc21cbc4..ab148a696c0c 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -473,6 +473,7 @@ static void cma_release_dev(struct rdma_id_private *id_priv) list_del(&id_priv->list); cma_dev_put(id_priv->cma_dev); id_priv->cma_dev = NULL; + id_priv->id.device = NULL; if (id_priv->id.route.addr.dev_addr.sgid_attr) { rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); id_priv->id.route.addr.dev_addr.sgid_attr = NULL; @@ -1860,6 +1861,7 @@ static void _destroy_id(struct rdma_id_private *id_priv, iw_destroy_cm_id(id_priv->cm_id.iw); } cma_leave_mc_groups(id_priv); + rdma_restrack_del(&id_priv->res); cma_release_dev(id_priv); } @@ -1873,7 +1875,6 @@ static void _destroy_id(struct rdma_id_private *id_priv, kfree(id_priv->id.route.path_rec); put_net(id_priv->id.route.addr.dev_addr.net); - rdma_restrack_del(&id_priv->res); kfree(id_priv); } @@ -3774,7 +3775,7 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) } id_priv->backlog = backlog; - if (id->device) { + if (id_priv->cma_dev) { if (rdma_cap_ib_cm(id->device, 1)) { ret = cma_ib_listen(id_priv); if (ret) From 976aac5f882989e4f6c1b3a7224819bf0e801c6a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 May 2021 16:00:08 +0200 Subject: [PATCH 330/730] kcsan: Fix debugfs initcall return type clang with CONFIG_LTO_CLANG points out that an initcall function should return an 'int' due to the changes made to the initcall macros in commit 3578ad11f3fb ("init: lto: fix PREL32 relocations"): kernel/kcsan/debugfs.c:274:15: error: returning 'void' from a function with incompatible result type 'int' late_initcall(kcsan_debugfs_init); ~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~ include/linux/init.h:292:46: note: expanded from macro 'late_initcall' #define late_initcall(fn) __define_initcall(fn, 7) Fixes: e36299efe7d7 ("kcsan, debugfs: Move debugfs file creation out of early init") Cc: stable Reviewed-by: Greg Kroah-Hartman Reviewed-by: Marco Elver Reviewed-by: Nathan Chancellor Reviewed-by: Miguel Ojeda Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney --- kernel/kcsan/debugfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index c1dd02f3be8b..e65de172ccf7 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -266,9 +266,10 @@ static const struct file_operations debugfs_ops = .release = single_release }; -static void __init kcsan_debugfs_init(void) +static int __init kcsan_debugfs_init(void) { debugfs_create_file("kcsan", 0644, NULL, NULL, &debugfs_ops); + return 0; } late_initcall(kcsan_debugfs_init); From be07f056396d6bb40963c45a02951c566ddeef8e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 18 May 2021 10:09:08 +0800 Subject: [PATCH 331/730] tipc: simplify the finalize work queue This patch is to use "struct work_struct" for the finalize work queue instead of "struct tipc_net_work", as it can get the "net" and "addr" from tipc_net's other members and there is no need to add extra net and addr in tipc_net by defining "struct tipc_net_work". Note that it's safe to get net from tn->bcl as bcl is always released after the finalize work queue is done. Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/core.c | 4 ++-- net/tipc/core.h | 8 +------- net/tipc/discover.c | 4 ++-- net/tipc/link.c | 5 +++++ net/tipc/link.h | 1 + net/tipc/net.c | 15 +++------------ 6 files changed, 14 insertions(+), 23 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 72f3ac73779b..3f4542e0f065 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -60,7 +60,7 @@ static int __net_init tipc_init_net(struct net *net) tn->trial_addr = 0; tn->addr_trial_end = 0; tn->capabilities = TIPC_NODE_CAPABILITIES; - INIT_WORK(&tn->final_work.work, tipc_net_finalize_work); + INIT_WORK(&tn->work, tipc_net_finalize_work); memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; @@ -110,7 +110,7 @@ static void __net_exit tipc_exit_net(struct net *net) tipc_detach_loopback(net); /* Make sure the tipc_net_finalize_work() finished */ - cancel_work_sync(&tn->final_work.work); + cancel_work_sync(&tn->work); tipc_net_stop(net); tipc_bcast_stop(net); diff --git a/net/tipc/core.h b/net/tipc/core.h index 5741ae488bb5..0a3f7a70a50a 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -91,12 +91,6 @@ extern unsigned int tipc_net_id __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; extern int sysctl_tipc_named_timeout __read_mostly; -struct tipc_net_work { - struct work_struct work; - struct net *net; - u32 addr; -}; - struct tipc_net { u8 node_id[NODE_ID_LEN]; u32 node_addr; @@ -148,7 +142,7 @@ struct tipc_net { struct tipc_crypto *crypto_tx; #endif /* Work item for net finalize */ - struct tipc_net_work final_work; + struct work_struct work; /* The numbers of work queues in schedule */ atomic_t wq_count; }; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 5380f605b851..da69e1abf68f 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -168,7 +168,7 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, /* Apply trial address if we just left trial period */ if (!trial && !self) { - tipc_sched_net_finalize(net, tn->trial_addr); + schedule_work(&tn->work); msg_set_prevnode(buf_msg(d->skb), tn->trial_addr); msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); } @@ -308,7 +308,7 @@ static void tipc_disc_timeout(struct timer_list *t) if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) { mod_timer(&d->timer, jiffies + TIPC_DISC_INIT); spin_unlock_bh(&d->lock); - tipc_sched_net_finalize(net, tn->trial_addr); + schedule_work(&tn->work); return; } diff --git a/net/tipc/link.c b/net/tipc/link.c index 115109259430..c44b4bfaaee6 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -372,6 +372,11 @@ char tipc_link_plane(struct tipc_link *l) return l->net_plane; } +struct net *tipc_link_net(struct tipc_link *l) +{ + return l->net; +} + void tipc_link_update_caps(struct tipc_link *l, u16 capabilities) { l->peer_caps = capabilities; diff --git a/net/tipc/link.h b/net/tipc/link.h index fc07232c9a12..a16f401fdabd 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -156,4 +156,5 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); bool tipc_link_too_silent(struct tipc_link *l); +struct net *tipc_link_net(struct tipc_link *l); #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index a130195af188..0e95572e56b4 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -41,6 +41,7 @@ #include "socket.h" #include "node.h" #include "bcast.h" +#include "link.h" #include "netlink.h" #include "monitor.h" @@ -142,19 +143,9 @@ static void tipc_net_finalize(struct net *net, u32 addr) void tipc_net_finalize_work(struct work_struct *work) { - struct tipc_net_work *fwork; + struct tipc_net *tn = container_of(work, struct tipc_net, work); - fwork = container_of(work, struct tipc_net_work, work); - tipc_net_finalize(fwork->net, fwork->addr); -} - -void tipc_sched_net_finalize(struct net *net, u32 addr) -{ - struct tipc_net *tn = tipc_net(net); - - tn->final_work.net = net; - tn->final_work.addr = addr; - schedule_work(&tn->final_work.work); + tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr); } void tipc_net_stop(struct net *net) From 33e6b1674f339c5d3be56ec9b4921d1ddd14327d Mon Sep 17 00:00:00 2001 From: Markus Bloechl Date: Tue, 18 May 2021 11:54:11 +0200 Subject: [PATCH 332/730] net: lan78xx: advertise tx software timestamping support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lan78xx already calls skb_tx_timestamp() in its lan78xx_start_xmit(). Override .get_ts_info to also advertise this capability (SOF_TIMESTAMPING_TX_SOFTWARE) via ethtool. Signed-off-by: Markus Blöchl Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 6acc5e904518..02bce40a67e5 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1645,6 +1645,7 @@ static const struct ethtool_ops lan78xx_ethtool_ops = { .get_strings = lan78xx_get_strings, .get_wol = lan78xx_get_wol, .set_wol = lan78xx_set_wol, + .get_ts_info = ethtool_op_get_ts_info, .get_eee = lan78xx_get_eee, .set_eee = lan78xx_set_eee, .get_pauseparam = lan78xx_get_pause, From a710b9ffbebaf713f7dbd4dbd9524907e5d66f33 Mon Sep 17 00:00:00 2001 From: Jiaran Zhang Date: Tue, 18 May 2021 19:36:00 +0800 Subject: [PATCH 333/730] net: hns3: fix incorrect resp_msg issue In hclge_mbx_handler(), if there are two consecutive mailbox messages that requires resp_msg, the resp_msg is not cleared after processing the first message, which will cause the resp_msg data of second message incorrect. Fix it by clearing the resp_msg before processing every mailbox message. Fixes: bb5790b71bad ("net: hns3: refactor mailbox response scheme between PF and VF") Signed-off-by: Jiaran Zhang Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 8e5f9dc8791d..f1c9f4ada348 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -710,7 +710,6 @@ void hclge_mbx_handler(struct hclge_dev *hdev) unsigned int flag; int ret = 0; - memset(&resp_msg, 0, sizeof(resp_msg)); /* handle all the mailbox requests in the queue */ while (!hclge_cmd_crq_empty(&hdev->hw)) { if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) { @@ -738,6 +737,9 @@ void hclge_mbx_handler(struct hclge_dev *hdev) trace_hclge_pf_mbx_get(hdev, req); + /* clear the resp_msg before processing every mailbox message */ + memset(&resp_msg, 0, sizeof(resp_msg)); + switch (req->msg.code) { case HCLGE_MBX_MAP_RING_TO_VECTOR: ret = hclge_map_unmap_ring_to_vf_vector(vport, true, From a289a7e5c1d49b7d47df9913c1cc81fb48fab613 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Tue, 18 May 2021 19:36:01 +0800 Subject: [PATCH 334/730] net: hns3: put off calling register_netdev() until client initialize complete Currently, the netdevice is registered before client initializing complete. So there is a timewindow between netdevice available and usable. In this case, if user try to change the channel number or ring param, it may cause the hns3_set_rx_cpu_rmap() being called twice, and report bug. [47199.416502] hns3 0000:35:00.0 eth1: set channels: tqp_num=1, rxfh=0 [47199.430340] hns3 0000:35:00.0 eth1: already uninitialized [47199.438554] hns3 0000:35:00.0: rss changes from 4 to 1 [47199.511854] hns3 0000:35:00.0: Channels changed, rss_size from 4 to 1, tqps from 4 to 1 [47200.163524] ------------[ cut here ]------------ [47200.171674] kernel BUG at lib/cpu_rmap.c:142! [47200.177847] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [47200.185259] Modules linked in: hclge(+) hns3(-) hns3_cae(O) hns_roce_hw_v2 hnae3 vfio_iommu_type1 vfio_pci vfio_virqfd vfio pv680_mii(O) [last unloaded: hclge] [47200.205912] CPU: 1 PID: 8260 Comm: ethtool Tainted: G O 5.11.0-rc3+ #1 [47200.215601] Hardware name: , xxxxxx 02/04/2021 [47200.223052] pstate: 60400009 (nZCv daif +PAN -UAO -TCO BTYPE=--) [47200.230188] pc : cpu_rmap_add+0x38/0x40 [47200.237472] lr : irq_cpu_rmap_add+0x84/0x140 [47200.243291] sp : ffff800010e93a30 [47200.247295] x29: ffff800010e93a30 x28: ffff082100584880 [47200.254155] x27: 0000000000000000 x26: 0000000000000000 [47200.260712] x25: 0000000000000000 x24: 0000000000000004 [47200.267241] x23: ffff08209ba03000 x22: ffff08209ba038c0 [47200.273789] x21: 000000000000003f x20: ffff0820e2bc1680 [47200.280400] x19: ffff0820c970ec80 x18: 00000000000000c0 [47200.286944] x17: 0000000000000000 x16: ffffb43debe4a0d0 [47200.293456] x15: fffffc2082990600 x14: dead000000000122 [47200.300059] x13: ffffffffffffffff x12: 000000000000003e [47200.306606] x11: ffff0820815b8080 x10: ffff53e411988000 [47200.313171] x9 : 0000000000000000 x8 : ffff0820e2bc1700 [47200.319682] x7 : 0000000000000000 x6 : 000000000000003f [47200.326170] x5 : 0000000000000040 x4 : ffff800010e93a20 [47200.332656] x3 : 0000000000000004 x2 : ffff0820c970ec80 [47200.339168] x1 : ffff0820e2bc1680 x0 : 0000000000000004 [47200.346058] Call trace: [47200.349324] cpu_rmap_add+0x38/0x40 [47200.354300] hns3_set_rx_cpu_rmap+0x6c/0xe0 [hns3] [47200.362294] hns3_reset_notify_init_enet+0x1cc/0x340 [hns3] [47200.370049] hns3_change_channels+0x40/0xb0 [hns3] [47200.376770] hns3_set_channels+0x12c/0x2a0 [hns3] [47200.383353] ethtool_set_channels+0x140/0x250 [47200.389772] dev_ethtool+0x714/0x23d0 [47200.394440] dev_ioctl+0x4cc/0x640 [47200.399277] sock_do_ioctl+0x100/0x2a0 [47200.404574] sock_ioctl+0x28c/0x470 [47200.409079] __arm64_sys_ioctl+0xb4/0x100 [47200.415217] el0_svc_common.constprop.0+0x84/0x210 [47200.422088] do_el0_svc+0x28/0x34 [47200.426387] el0_svc+0x28/0x70 [47200.431308] el0_sync_handler+0x1a4/0x1b0 [47200.436477] el0_sync+0x174/0x180 [47200.441562] Code: 11000405 79000c45 f8247861 d65f03c0 (d4210000) [47200.448869] ---[ end trace a01efe4ce42e5f34 ]--- The process is like below: excuting hns3_client_init | register_netdev() | hns3_set_channels() | | hns3_set_rx_cpu_rmap() hns3_reset_notify_uninit_enet() | | | quit without calling function | hns3_free_rx_cpu_rmap for flag | HNS3_NIC_STATE_INITED is unset. | | | hns3_reset_notify_init_enet() | | set HNS3_NIC_STATE_INITED call hns3_set_rx_cpu_rmap()-- crash Fix it by calling register_netdev() at the end of function hns3_client_init(). Fixes: 08a100689d4b ("net: hns3: re-organize vector handle") Signed-off-by: Jian Shen Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 783fdaf8f8d6..c64d18878f64 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -4317,12 +4317,6 @@ static int hns3_client_init(struct hnae3_handle *handle) if (ret) goto out_init_phy; - ret = register_netdev(netdev); - if (ret) { - dev_err(priv->dev, "probe register netdev fail!\n"); - goto out_reg_netdev_fail; - } - /* the device can work without cpu rmap, only aRFS needs it */ ret = hns3_set_rx_cpu_rmap(netdev); if (ret) @@ -4355,17 +4349,23 @@ static int hns3_client_init(struct hnae3_handle *handle) if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) set_bit(HNAE3_PFLAG_LIMIT_PROMISC, &handle->supported_pflags); + ret = register_netdev(netdev); + if (ret) { + dev_err(priv->dev, "probe register netdev fail!\n"); + goto out_reg_netdev_fail; + } + if (netif_msg_drv(handle)) hns3_info_show(priv); return ret; +out_reg_netdev_fail: + hns3_dbg_uninit(handle); out_client_start: hns3_free_rx_cpu_rmap(netdev); hns3_nic_uninit_irq(priv); out_init_irq_fail: - unregister_netdev(netdev); -out_reg_netdev_fail: hns3_uninit_phy(netdev); out_init_phy: hns3_uninit_all_ring(priv); From 73a13d8dbe33e53a12400f2be0f5af169816c67f Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Tue, 18 May 2021 19:36:02 +0800 Subject: [PATCH 335/730] net: hns3: fix user's coalesce configuration lost issue Currently, when adaptive is on, the user's coalesce configuration may be overwritten by the dynamic one. The reason is that user's configurations are saved in struct hns3_enet_tqp_vector whose value maybe changed by the dynamic algorithm. To fix it, use struct hns3_nic_priv instead of struct hns3_enet_tqp_vector to save and get the user's configuration. BTW, operations of storing and restoring coalesce info in the reset process are unnecessary now, so remove them as well. Fixes: 434776a5fae2 ("net: hns3: add ethtool_ops.set_coalesce support to PF") Fixes: 7e96adc46633 ("net: hns3: add ethtool_ops.get_coalesce support to PF") Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3_enet.c | 84 +++++++++---------- .../ethernet/hisilicon/hns3/hns3_ethtool.c | 64 +++++--------- 2 files changed, 63 insertions(+), 85 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index c64d18878f64..6d6c0ac65bb4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -264,22 +264,17 @@ static void hns3_vector_coalesce_init(struct hns3_enet_tqp_vector *tqp_vector, struct hnae3_ae_dev *ae_dev = pci_get_drvdata(priv->ae_handle->pdev); struct hns3_enet_coalesce *tx_coal = &tqp_vector->tx_group.coal; struct hns3_enet_coalesce *rx_coal = &tqp_vector->rx_group.coal; + struct hns3_enet_coalesce *ptx_coal = &priv->tx_coal; + struct hns3_enet_coalesce *prx_coal = &priv->rx_coal; - /* initialize the configuration for interrupt coalescing. - * 1. GL (Interrupt Gap Limiter) - * 2. RL (Interrupt Rate Limiter) - * 3. QL (Interrupt Quantity Limiter) - * - * Default: enable interrupt coalescing self-adaptive and GL - */ - tx_coal->adapt_enable = 1; - rx_coal->adapt_enable = 1; + tx_coal->adapt_enable = ptx_coal->adapt_enable; + rx_coal->adapt_enable = prx_coal->adapt_enable; - tx_coal->int_gl = HNS3_INT_GL_50K; - rx_coal->int_gl = HNS3_INT_GL_50K; + tx_coal->int_gl = ptx_coal->int_gl; + rx_coal->int_gl = prx_coal->int_gl; - rx_coal->flow_level = HNS3_FLOW_LOW; - tx_coal->flow_level = HNS3_FLOW_LOW; + rx_coal->flow_level = prx_coal->flow_level; + tx_coal->flow_level = ptx_coal->flow_level; /* device version above V3(include V3), GL can configure 1us * unit, so uses 1us unit. @@ -294,8 +289,8 @@ static void hns3_vector_coalesce_init(struct hns3_enet_tqp_vector *tqp_vector, rx_coal->ql_enable = 1; tx_coal->int_ql_max = ae_dev->dev_specs.int_ql_max; rx_coal->int_ql_max = ae_dev->dev_specs.int_ql_max; - tx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG; - rx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG; + tx_coal->int_ql = ptx_coal->int_ql; + rx_coal->int_ql = prx_coal->int_ql; } } @@ -3844,6 +3839,34 @@ map_ring_fail: return ret; } +static void hns3_nic_init_coal_cfg(struct hns3_nic_priv *priv) +{ + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(priv->ae_handle->pdev); + struct hns3_enet_coalesce *tx_coal = &priv->tx_coal; + struct hns3_enet_coalesce *rx_coal = &priv->rx_coal; + + /* initialize the configuration for interrupt coalescing. + * 1. GL (Interrupt Gap Limiter) + * 2. RL (Interrupt Rate Limiter) + * 3. QL (Interrupt Quantity Limiter) + * + * Default: enable interrupt coalescing self-adaptive and GL + */ + tx_coal->adapt_enable = 1; + rx_coal->adapt_enable = 1; + + tx_coal->int_gl = HNS3_INT_GL_50K; + rx_coal->int_gl = HNS3_INT_GL_50K; + + rx_coal->flow_level = HNS3_FLOW_LOW; + tx_coal->flow_level = HNS3_FLOW_LOW; + + if (ae_dev->dev_specs.int_ql_max) { + tx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG; + rx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG; + } +} + static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv) { struct hnae3_handle *h = priv->ae_handle; @@ -4295,6 +4318,8 @@ static int hns3_client_init(struct hnae3_handle *handle) goto out_get_ring_cfg; } + hns3_nic_init_coal_cfg(priv); + ret = hns3_nic_alloc_vector_data(priv); if (ret) { ret = -ENOMEM; @@ -4571,31 +4596,6 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h) return 0; } -static void hns3_store_coal(struct hns3_nic_priv *priv) -{ - /* ethtool only support setting and querying one coal - * configuration for now, so save the vector 0' coal - * configuration here in order to restore it. - */ - memcpy(&priv->tx_coal, &priv->tqp_vector[0].tx_group.coal, - sizeof(struct hns3_enet_coalesce)); - memcpy(&priv->rx_coal, &priv->tqp_vector[0].rx_group.coal, - sizeof(struct hns3_enet_coalesce)); -} - -static void hns3_restore_coal(struct hns3_nic_priv *priv) -{ - u16 vector_num = priv->vector_num; - int i; - - for (i = 0; i < vector_num; i++) { - memcpy(&priv->tqp_vector[i].tx_group.coal, &priv->tx_coal, - sizeof(struct hns3_enet_coalesce)); - memcpy(&priv->tqp_vector[i].rx_group.coal, &priv->rx_coal, - sizeof(struct hns3_enet_coalesce)); - } -} - static int hns3_reset_notify_down_enet(struct hnae3_handle *handle) { struct hnae3_knic_private_info *kinfo = &handle->kinfo; @@ -4654,8 +4654,6 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle) if (ret) goto err_put_ring; - hns3_restore_coal(priv); - ret = hns3_nic_init_vector_data(priv); if (ret) goto err_dealloc_vector; @@ -4721,8 +4719,6 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) hns3_nic_uninit_vector_data(priv); - hns3_store_coal(priv); - hns3_nic_dealloc_vector_data(priv); hns3_uninit_all_ring(priv); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index b48faf769b1c..c1ea403d2b56 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -1134,50 +1134,32 @@ static void hns3_get_channels(struct net_device *netdev, h->ae_algo->ops->get_channels(h, ch); } -static int hns3_get_coalesce_per_queue(struct net_device *netdev, u32 queue, - struct ethtool_coalesce *cmd) +static int hns3_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *cmd) { - struct hns3_enet_tqp_vector *tx_vector, *rx_vector; struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hns3_enet_coalesce *tx_coal = &priv->tx_coal; + struct hns3_enet_coalesce *rx_coal = &priv->rx_coal; struct hnae3_handle *h = priv->ae_handle; - u16 queue_num = h->kinfo.num_tqps; if (hns3_nic_resetting(netdev)) return -EBUSY; - if (queue >= queue_num) { - netdev_err(netdev, - "Invalid queue value %u! Queue max id=%u\n", - queue, queue_num - 1); - return -EINVAL; - } + cmd->use_adaptive_tx_coalesce = tx_coal->adapt_enable; + cmd->use_adaptive_rx_coalesce = rx_coal->adapt_enable; - tx_vector = priv->ring[queue].tqp_vector; - rx_vector = priv->ring[queue_num + queue].tqp_vector; - - cmd->use_adaptive_tx_coalesce = - tx_vector->tx_group.coal.adapt_enable; - cmd->use_adaptive_rx_coalesce = - rx_vector->rx_group.coal.adapt_enable; - - cmd->tx_coalesce_usecs = tx_vector->tx_group.coal.int_gl; - cmd->rx_coalesce_usecs = rx_vector->rx_group.coal.int_gl; + cmd->tx_coalesce_usecs = tx_coal->int_gl; + cmd->rx_coalesce_usecs = rx_coal->int_gl; cmd->tx_coalesce_usecs_high = h->kinfo.int_rl_setting; cmd->rx_coalesce_usecs_high = h->kinfo.int_rl_setting; - cmd->tx_max_coalesced_frames = tx_vector->tx_group.coal.int_ql; - cmd->rx_max_coalesced_frames = rx_vector->rx_group.coal.int_ql; + cmd->tx_max_coalesced_frames = tx_coal->int_ql; + cmd->rx_max_coalesced_frames = rx_coal->int_ql; return 0; } -static int hns3_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *cmd) -{ - return hns3_get_coalesce_per_queue(netdev, 0, cmd); -} - static int hns3_check_gl_coalesce_para(struct net_device *netdev, struct ethtool_coalesce *cmd) { @@ -1292,19 +1274,7 @@ static int hns3_check_coalesce_para(struct net_device *netdev, return ret; } - ret = hns3_check_ql_coalesce_param(netdev, cmd); - if (ret) - return ret; - - if (cmd->use_adaptive_tx_coalesce == 1 || - cmd->use_adaptive_rx_coalesce == 1) { - netdev_info(netdev, - "adaptive-tx=%u and adaptive-rx=%u, tx_usecs or rx_usecs will changed dynamically.\n", - cmd->use_adaptive_tx_coalesce, - cmd->use_adaptive_rx_coalesce); - } - - return 0; + return hns3_check_ql_coalesce_param(netdev, cmd); } static void hns3_set_coalesce_per_queue(struct net_device *netdev, @@ -1350,6 +1320,9 @@ static int hns3_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *cmd) { struct hnae3_handle *h = hns3_get_handle(netdev); + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hns3_enet_coalesce *tx_coal = &priv->tx_coal; + struct hns3_enet_coalesce *rx_coal = &priv->rx_coal; u16 queue_num = h->kinfo.num_tqps; int ret; int i; @@ -1364,6 +1337,15 @@ static int hns3_set_coalesce(struct net_device *netdev, h->kinfo.int_rl_setting = hns3_rl_round_down(cmd->rx_coalesce_usecs_high); + tx_coal->adapt_enable = cmd->use_adaptive_tx_coalesce; + rx_coal->adapt_enable = cmd->use_adaptive_rx_coalesce; + + tx_coal->int_gl = cmd->tx_coalesce_usecs; + rx_coal->int_gl = cmd->rx_coalesce_usecs; + + tx_coal->int_ql = cmd->tx_max_coalesced_frames; + rx_coal->int_ql = cmd->rx_max_coalesced_frames; + for (i = 0; i < queue_num; i++) hns3_set_coalesce_per_queue(netdev, cmd, i); From 9bb5a495424fd4bfa672eb1f31481248562fa156 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 18 May 2021 19:36:03 +0800 Subject: [PATCH 336/730] net: hns3: check the return of skb_checksum_help() Currently skb_checksum_help()'s return is ignored, but it may return error when it fails to allocate memory when linearizing. So adds checking for the return of skb_checksum_help(). Fixes: 76ad4f0ee747("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Fixes: 3db084d28dc0("net: hns3: Fix for vxlan tx checksum bug") Signed-off-by: Yunsheng Lin Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 6d6c0ac65bb4..026558f8e04b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -841,8 +841,6 @@ static bool hns3_tunnel_csum_bug(struct sk_buff *skb) l4.udp->dest == htons(4790)))) return false; - skb_checksum_help(skb); - return true; } @@ -919,8 +917,7 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto, /* the stack computes the IP header already, * driver calculate l4 checksum when not TSO. */ - skb_checksum_help(skb); - return 0; + return skb_checksum_help(skb); } hns3_set_outer_l2l3l4(skb, ol4_proto, ol_type_vlan_len_msec); @@ -965,7 +962,7 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto, break; case IPPROTO_UDP: if (hns3_tunnel_csum_bug(skb)) - break; + return skb_checksum_help(skb); hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1); hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4T_S, @@ -990,8 +987,7 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto, /* the stack computes the IP header already, * driver calculate l4 checksum when not TSO. */ - skb_checksum_help(skb); - return 0; + return skb_checksum_help(skb); } return 0; From add0b32ef9146a8559a60aed54c37692a5f9d34f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 30 Apr 2021 17:06:01 -0500 Subject: [PATCH 337/730] siginfo: Move si_trapno inside the union inside _si_fault It turns out that linux uses si_trapno very sparingly, and as such it can be considered extra information for a very narrow selection of signals, rather than information that is present with every fault reported in siginfo. As such move si_trapno inside the union inside of _si_fault. This results in no change in placement, and makes it eaiser to extend _si_fault in the future as this reduces the number of special cases. In particular with si_trapno included in the union it is no longer a concern that the union must be pointer aligned on most architectures because the union follows immediately after si_addr which is a pointer. This change results in a difference in siginfo field placement on sparc and alpha for the fields si_addr_lsb, si_lower, si_upper, si_pkey, and si_perf. These architectures do not implement the signals that would use si_addr_lsb, si_lower, si_upper, si_pkey, and si_perf. Further these architecture have not yet implemented the userspace that would use si_perf. The point of this change is in fact to correct these placement issues before sparc or alpha grow userspace that cares. This change was discussed[1] and the agreement is that this change is currently safe. [1]: https://lkml.kernel.org/r/CAK8P3a0+uKYwL1NhY6Hvtieghba2hKYGD6hcKx5n8=4Gtt+pHA@mail.gmail.com Acked-by: Marco Elver v1: https://lkml.kernel.org/r/m1tunns7yf.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210505141101.11519-5-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-1-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- arch/x86/kernel/signal_compat.c | 3 +++ include/linux/compat.h | 5 ++--- include/uapi/asm-generic/siginfo.h | 7 ++----- kernel/signal.c | 1 + 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 0e5d0a7e203b..a9fcabd8a5e5 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -127,6 +127,9 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C); + BUILD_BUG_ON(offsetof(siginfo_t, si_trapno) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_trapno) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10); diff --git a/include/linux/compat.h b/include/linux/compat.h index f0d2dd35d408..6af7bef15e94 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -214,12 +214,11 @@ typedef struct compat_siginfo { /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */ struct { compat_uptr_t _addr; /* faulting insn/memory ref. */ -#ifdef __ARCH_SI_TRAPNO - int _trapno; /* TRAP # which caused the signal */ -#endif #define __COMPAT_ADDR_BND_PKEY_PAD (__alignof__(compat_uptr_t) < sizeof(short) ? \ sizeof(short) : __alignof__(compat_uptr_t)) union { + /* used on alpha and sparc */ + int _trapno; /* TRAP # which caused the signal */ /* * used when si_code=BUS_MCEERR_AR or * used when si_code=BUS_MCEERR_AO diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 03d6f6d2c1fe..e663bf117b46 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -63,9 +63,6 @@ union __sifields { /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */ struct { void __user *_addr; /* faulting insn/memory ref. */ -#ifdef __ARCH_SI_TRAPNO - int _trapno; /* TRAP # which caused the signal */ -#endif #ifdef __ia64__ int _imm; /* immediate value for "break" */ unsigned int _flags; /* see ia64 si_flags */ @@ -75,6 +72,8 @@ union __sifields { #define __ADDR_BND_PKEY_PAD (__alignof__(void *) < sizeof(short) ? \ sizeof(short) : __alignof__(void *)) union { + /* used on alpha and sparc */ + int _trapno; /* TRAP # which caused the signal */ /* * used when si_code=BUS_MCEERR_AR or * used when si_code=BUS_MCEERR_AO @@ -150,9 +149,7 @@ typedef struct siginfo { #define si_int _sifields._rt._sigval.sival_int #define si_ptr _sifields._rt._sigval.sival_ptr #define si_addr _sifields._sigfault._addr -#ifdef __ARCH_SI_TRAPNO #define si_trapno _sifields._sigfault._trapno -#endif #define si_addr_lsb _sifields._sigfault._addr_lsb #define si_lower _sifields._sigfault._addr_bnd._lower #define si_upper _sifields._sigfault._addr_bnd._upper diff --git a/kernel/signal.c b/kernel/signal.c index c3017aa8024a..65888aec65a0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -4607,6 +4607,7 @@ static inline void siginfo_buildtime_checks(void) /* sigfault */ CHECK_OFFSET(si_addr); + CHECK_OFFSET(si_trapno); CHECK_OFFSET(si_addr_lsb); CHECK_OFFSET(si_lower); CHECK_OFFSET(si_upper); From 9abcabe3111811aeae0f3a14e159b14248631875 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 30 Apr 2021 17:29:36 -0500 Subject: [PATCH 338/730] signal: Implement SIL_FAULT_TRAPNO Now that si_trapno is part of the union in _si_fault and available on all architectures, add SIL_FAULT_TRAPNO and update siginfo_layout to return SIL_FAULT_TRAPNO when the code assumes si_trapno is valid. There is room for future changes to reduce when si_trapno is valid but this is all that is needed to make si_trapno and the other members of the the union in _sigfault mutually exclusive. Update the code that uses siginfo_layout to deal with SIL_FAULT_TRAPNO and have the same code ignore si_trapno in in all other cases. v1: https://lkml.kernel.org/r/m1o8dvs7s7.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210505141101.11519-6-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-2-ebiederm@xmission.com Reviewed-by: Marco Elver Signed-off-by: "Eric W. Biederman" --- fs/signalfd.c | 8 +++----- include/linux/signal.h | 1 + kernel/signal.c | 34 ++++++++++++---------------------- 3 files changed, 16 insertions(+), 27 deletions(-) diff --git a/fs/signalfd.c b/fs/signalfd.c index 040a1142915f..e87e59581653 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -123,15 +123,13 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, */ case SIL_FAULT: new.ssi_addr = (long) kinfo->si_addr; -#ifdef __ARCH_SI_TRAPNO + break; + case SIL_FAULT_TRAPNO: + new.ssi_addr = (long) kinfo->si_addr; new.ssi_trapno = kinfo->si_trapno; -#endif break; case SIL_FAULT_MCEERR: new.ssi_addr = (long) kinfo->si_addr; -#ifdef __ARCH_SI_TRAPNO - new.ssi_trapno = kinfo->si_trapno; -#endif new.ssi_addr_lsb = (short) kinfo->si_addr_lsb; break; case SIL_PERF_EVENT: diff --git a/include/linux/signal.h b/include/linux/signal.h index 1e98548d7cf6..5160fd45e5ca 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -40,6 +40,7 @@ enum siginfo_layout { SIL_TIMER, SIL_POLL, SIL_FAULT, + SIL_FAULT_TRAPNO, SIL_FAULT_MCEERR, SIL_FAULT_BNDERR, SIL_FAULT_PKUERR, diff --git a/kernel/signal.c b/kernel/signal.c index 65888aec65a0..597594ee72de 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1194,6 +1194,7 @@ static inline bool has_si_pid_and_uid(struct kernel_siginfo *info) case SIL_TIMER: case SIL_POLL: case SIL_FAULT: + case SIL_FAULT_TRAPNO: case SIL_FAULT_MCEERR: case SIL_FAULT_BNDERR: case SIL_FAULT_PKUERR: @@ -2527,6 +2528,7 @@ static void hide_si_addr_tag_bits(struct ksignal *ksig) { switch (siginfo_layout(ksig->sig, ksig->info.si_code)) { case SIL_FAULT: + case SIL_FAULT_TRAPNO: case SIL_FAULT_MCEERR: case SIL_FAULT_BNDERR: case SIL_FAULT_PKUERR: @@ -3214,6 +3216,10 @@ enum siginfo_layout siginfo_layout(unsigned sig, int si_code) #endif else if ((sig == SIGTRAP) && (si_code == TRAP_PERF)) layout = SIL_PERF_EVENT; +#ifdef __ARCH_SI_TRAPNO + else if (layout == SIL_FAULT) + layout = SIL_FAULT_TRAPNO; +#endif } else if (si_code <= NSIGPOLL) layout = SIL_POLL; @@ -3317,30 +3323,22 @@ void copy_siginfo_to_external32(struct compat_siginfo *to, break; case SIL_FAULT: to->si_addr = ptr_to_compat(from->si_addr); -#ifdef __ARCH_SI_TRAPNO + break; + case SIL_FAULT_TRAPNO: + to->si_addr = ptr_to_compat(from->si_addr); to->si_trapno = from->si_trapno; -#endif break; case SIL_FAULT_MCEERR: to->si_addr = ptr_to_compat(from->si_addr); -#ifdef __ARCH_SI_TRAPNO - to->si_trapno = from->si_trapno; -#endif to->si_addr_lsb = from->si_addr_lsb; break; case SIL_FAULT_BNDERR: to->si_addr = ptr_to_compat(from->si_addr); -#ifdef __ARCH_SI_TRAPNO - to->si_trapno = from->si_trapno; -#endif to->si_lower = ptr_to_compat(from->si_lower); to->si_upper = ptr_to_compat(from->si_upper); break; case SIL_FAULT_PKUERR: to->si_addr = ptr_to_compat(from->si_addr); -#ifdef __ARCH_SI_TRAPNO - to->si_trapno = from->si_trapno; -#endif to->si_pkey = from->si_pkey; break; case SIL_PERF_EVENT: @@ -3401,30 +3399,22 @@ static int post_copy_siginfo_from_user32(kernel_siginfo_t *to, break; case SIL_FAULT: to->si_addr = compat_ptr(from->si_addr); -#ifdef __ARCH_SI_TRAPNO + break; + case SIL_FAULT_TRAPNO: + to->si_addr = compat_ptr(from->si_addr); to->si_trapno = from->si_trapno; -#endif break; case SIL_FAULT_MCEERR: to->si_addr = compat_ptr(from->si_addr); -#ifdef __ARCH_SI_TRAPNO - to->si_trapno = from->si_trapno; -#endif to->si_addr_lsb = from->si_addr_lsb; break; case SIL_FAULT_BNDERR: to->si_addr = compat_ptr(from->si_addr); -#ifdef __ARCH_SI_TRAPNO - to->si_trapno = from->si_trapno; -#endif to->si_lower = compat_ptr(from->si_lower); to->si_upper = compat_ptr(from->si_upper); break; case SIL_FAULT_PKUERR: to->si_addr = compat_ptr(from->si_addr); -#ifdef __ARCH_SI_TRAPNO - to->si_trapno = from->si_trapno; -#endif to->si_pkey = from->si_pkey; break; case SIL_PERF_EVENT: From af5eeab7e8e8c2f0fad10e4ab8cc8092012a2d5b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 2 May 2021 14:27:24 -0500 Subject: [PATCH 339/730] signal: Factor force_sig_perf out of perf_sigtrap Separate filling in siginfo for TRAP_PERF from deciding that siginal needs to be sent. There are enough little details that need to be correct when properly filling in siginfo_t that it is easy to make mistakes if filling in the siginfo_t is in the same function with other logic. So factor out force_sig_perf to reduce the cognative load of on reviewers, maintainers and implementors. v1: https://lkml.kernel.org/r/m17dkjqqxz.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210505141101.11519-10-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-3-ebiederm@xmission.com Reviewed-by: Marco Elver Acked-by: Peter Zijlstra (Intel) Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 1 + kernel/events/core.c | 11 ++--------- kernel/signal.c | 13 +++++++++++++ 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 3f6a0fcaa10c..7f4278fa21fe 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -326,6 +326,7 @@ int send_sig_mceerr(int code, void __user *, short, struct task_struct *); int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper); int force_sig_pkuerr(void __user *addr, u32 pkey); +int force_sig_perf(void __user *addr, u32 type, u64 sig_data); int force_sig_ptrace_errno_trap(int errno, void __user *addr); diff --git a/kernel/events/core.c b/kernel/events/core.c index 928b166d888e..48ea8863183b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6394,8 +6394,6 @@ void perf_event_wakeup(struct perf_event *event) static void perf_sigtrap(struct perf_event *event) { - struct kernel_siginfo info; - /* * We'd expect this to only occur if the irq_work is delayed and either * ctx->task or current has changed in the meantime. This can be the @@ -6410,13 +6408,8 @@ static void perf_sigtrap(struct perf_event *event) if (current->flags & PF_EXITING) return; - clear_siginfo(&info); - info.si_signo = SIGTRAP; - info.si_code = TRAP_PERF; - info.si_errno = event->attr.type; - info.si_perf = event->attr.sig_data; - info.si_addr = (void __user *)event->pending_addr; - force_sig_info(&info); + force_sig_perf((void __user *)event->pending_addr, + event->attr.type, event->attr.sig_data); } static void perf_pending_event_disable(struct perf_event *event) diff --git a/kernel/signal.c b/kernel/signal.c index 597594ee72de..3a18d13c39b2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1763,6 +1763,19 @@ int force_sig_pkuerr(void __user *addr, u32 pkey) } #endif +int force_sig_perf(void __user *addr, u32 type, u64 sig_data) +{ + struct kernel_siginfo info; + + clear_siginfo(&info); + info.si_signo = SIGTRAP; + info.si_errno = type; + info.si_code = TRAP_PERF; + info.si_addr = addr; + info.si_perf = sig_data; + return force_sig_info(&info); +} + /* For the crazy architectures that include trap information in * the errno field, instead of an actual errno value. */ From 0683b53197b55343a166f1507086823030809a19 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 2 May 2021 17:28:31 -0500 Subject: [PATCH 340/730] signal: Deliver all of the siginfo perf data in _perf Don't abuse si_errno and deliver all of the perf data in _perf member of siginfo_t. Note: The data field in the perf data structures in a u64 to allow a pointer to be encoded without needed to implement a 32bit and 64bit version of the same structure. There already exists a 32bit and 64bit versions siginfo_t, and the 32bit version can not include a 64bit member as it only has 32bit alignment. So unsigned long is used in siginfo_t instead of a u64 as unsigned long can encode a pointer on all architectures linux supports. v1: https://lkml.kernel.org/r/m11rarqqx2.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210503203814.25487-10-ebiederm@xmission.com v3: https://lkml.kernel.org/r/20210505141101.11519-11-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-4-ebiederm@xmission.com Reviewed-by: Marco Elver Signed-off-by: "Eric W. Biederman" --- arch/m68k/kernel/signal.c | 3 ++- arch/x86/kernel/signal_compat.c | 6 ++++-- fs/signalfd.c | 3 ++- include/linux/compat.h | 5 ++++- include/uapi/asm-generic/siginfo.h | 8 +++++-- include/uapi/linux/perf_event.h | 2 +- include/uapi/linux/signalfd.h | 4 ++-- kernel/signal.c | 21 ++++++++++++------- .../selftests/perf_events/sigtrap_threads.c | 14 ++++++------- 9 files changed, 41 insertions(+), 25 deletions(-) diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index a4b7ee1df211..8f215e79e70e 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -623,7 +623,8 @@ static inline void siginfo_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x12); /* _sigfault._perf */ - BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x14); /* _sigpoll */ BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0c); diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index a9fcabd8a5e5..06743ec054d2 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -141,8 +141,10 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14); - BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x18); - BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x18); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x20); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_data) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_type) != 0x14); CHECK_CSI_OFFSET(_sigpoll); CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int)); diff --git a/fs/signalfd.c b/fs/signalfd.c index e87e59581653..373df2f12415 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -134,7 +134,8 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, break; case SIL_PERF_EVENT: new.ssi_addr = (long) kinfo->si_addr; - new.ssi_perf = kinfo->si_perf; + new.ssi_perf_type = kinfo->si_perf_type; + new.ssi_perf_data = kinfo->si_perf_data; break; case SIL_CHLD: new.ssi_pid = kinfo->si_pid; diff --git a/include/linux/compat.h b/include/linux/compat.h index 6af7bef15e94..a27fffaae121 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -236,7 +236,10 @@ typedef struct compat_siginfo { u32 _pkey; } _addr_pkey; /* used when si_code=TRAP_PERF */ - compat_ulong_t _perf; + struct { + compat_ulong_t _data; + u32 _type; + } _perf; }; } _sigfault; diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index e663bf117b46..5a3c221f4c9d 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -91,7 +91,10 @@ union __sifields { __u32 _pkey; } _addr_pkey; /* used when si_code=TRAP_PERF */ - unsigned long _perf; + struct { + unsigned long _data; + __u32 _type; + } _perf; }; } _sigfault; @@ -154,7 +157,8 @@ typedef struct siginfo { #define si_lower _sifields._sigfault._addr_bnd._lower #define si_upper _sifields._sigfault._addr_bnd._upper #define si_pkey _sifields._sigfault._addr_pkey._pkey -#define si_perf _sifields._sigfault._perf +#define si_perf_data _sifields._sigfault._perf._data +#define si_perf_type _sifields._sigfault._perf._type #define si_band _sifields._sigpoll._band #define si_fd _sifields._sigpoll._fd #define si_call_addr _sifields._sigsys._call_addr diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e54e639248c8..7b14753b3d38 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -464,7 +464,7 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via - * siginfo_t::si_perf, e.g. to permit user to identify the event. + * siginfo_t::si_perf_data, e.g. to permit user to identify the event. */ __u64 sig_data; }; diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h index 7e333042c7e3..e78dddf433fc 100644 --- a/include/uapi/linux/signalfd.h +++ b/include/uapi/linux/signalfd.h @@ -39,8 +39,8 @@ struct signalfd_siginfo { __s32 ssi_syscall; __u64 ssi_call_addr; __u32 ssi_arch; - __u32 __pad3; - __u64 ssi_perf; + __u32 ssi_perf_type; + __u64 ssi_perf_data; /* * Pad strcture to 128 bytes. Remember to update the diff --git a/kernel/signal.c b/kernel/signal.c index 3a18d13c39b2..dca53515ae3f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1768,11 +1768,13 @@ int force_sig_perf(void __user *addr, u32 type, u64 sig_data) struct kernel_siginfo info; clear_siginfo(&info); - info.si_signo = SIGTRAP; - info.si_errno = type; - info.si_code = TRAP_PERF; - info.si_addr = addr; - info.si_perf = sig_data; + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_PERF; + info.si_addr = addr; + info.si_perf_data = sig_data; + info.si_perf_type = type; + return force_sig_info(&info); } @@ -3356,7 +3358,8 @@ void copy_siginfo_to_external32(struct compat_siginfo *to, break; case SIL_PERF_EVENT: to->si_addr = ptr_to_compat(from->si_addr); - to->si_perf = from->si_perf; + to->si_perf_data = from->si_perf_data; + to->si_perf_type = from->si_perf_type; break; case SIL_CHLD: to->si_pid = from->si_pid; @@ -3432,7 +3435,8 @@ static int post_copy_siginfo_from_user32(kernel_siginfo_t *to, break; case SIL_PERF_EVENT: to->si_addr = compat_ptr(from->si_addr); - to->si_perf = from->si_perf; + to->si_perf_data = from->si_perf_data; + to->si_perf_type = from->si_perf_type; break; case SIL_CHLD: to->si_pid = from->si_pid; @@ -4615,7 +4619,8 @@ static inline void siginfo_buildtime_checks(void) CHECK_OFFSET(si_lower); CHECK_OFFSET(si_upper); CHECK_OFFSET(si_pkey); - CHECK_OFFSET(si_perf); + CHECK_OFFSET(si_perf_data); + CHECK_OFFSET(si_perf_type); /* sigpoll */ CHECK_OFFSET(si_band); diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c index 78ddf5e11625..8e83cf91513a 100644 --- a/tools/testing/selftests/perf_events/sigtrap_threads.c +++ b/tools/testing/selftests/perf_events/sigtrap_threads.c @@ -43,7 +43,7 @@ static struct { siginfo_t first_siginfo; /* First observed siginfo_t. */ } ctx; -/* Unique value to check si_perf is correctly set from perf_event_attr::sig_data. */ +/* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */ #define TEST_SIG_DATA(addr) (~(unsigned long)(addr)) static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr) @@ -164,8 +164,8 @@ TEST_F(sigtrap_threads, enable_event) EXPECT_EQ(ctx.signal_count, NUM_THREADS); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -183,8 +183,8 @@ TEST_F(sigtrap_threads, modify_and_enable_event) EXPECT_EQ(ctx.signal_count, NUM_THREADS); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -203,8 +203,8 @@ TEST_F(sigtrap_threads, signal_stress) EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); } TEST_HARNESS_MAIN From 922e3013046b79b444c87eda5baf43afae1326a8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 3 May 2021 12:52:43 -0500 Subject: [PATCH 341/730] signalfd: Remove SIL_PERF_EVENT fields from signalfd_siginfo With the addition of ssi_perf_data and ssi_perf_type struct signalfd_siginfo is dangerously close to running out of space. All that remains is just enough space for two additional 64bit fields. A practice of adding all possible siginfo_t fields into struct singalfd_siginfo can not be supported as adding the missing fields ssi_lower, ssi_upper, and ssi_pkey would require two 64bit fields and one 32bit fields. In practice the fields ssi_perf_data and ssi_perf_type can never be used by signalfd as the signal that generates them always delivers them synchronously to the thread that triggers them. Therefore until someone actually needs the fields ssi_perf_data and ssi_perf_type in signalfd_siginfo remove them. This leaves a bit more room for future expansion. v1: https://lkml.kernel.org/r/20210503203814.25487-12-ebiederm@xmission.com v2: https://lkml.kernel.org/r/20210505141101.11519-12-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-5-ebiederm@xmission.com Reviewed-by: Marco Elver Signed-off-by: "Eric W. Biederman" --- fs/signalfd.c | 16 ++++++---------- include/uapi/linux/signalfd.h | 4 +--- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/fs/signalfd.c b/fs/signalfd.c index 373df2f12415..167b5889db4b 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -114,12 +114,13 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, break; case SIL_FAULT_BNDERR: case SIL_FAULT_PKUERR: + case SIL_PERF_EVENT: /* - * Fall through to the SIL_FAULT case. Both SIL_FAULT_BNDERR - * and SIL_FAULT_PKUERR are only generated by faults that - * deliver them synchronously to userspace. In case someone - * injects one of these signals and signalfd catches it treat - * it as SIL_FAULT. + * Fall through to the SIL_FAULT case. SIL_FAULT_BNDERR, + * SIL_FAULT_PKUERR, and SIL_PERF_EVENT are only + * generated by faults that deliver them synchronously to + * userspace. In case someone injects one of these signals + * and signalfd catches it treat it as SIL_FAULT. */ case SIL_FAULT: new.ssi_addr = (long) kinfo->si_addr; @@ -132,11 +133,6 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, new.ssi_addr = (long) kinfo->si_addr; new.ssi_addr_lsb = (short) kinfo->si_addr_lsb; break; - case SIL_PERF_EVENT: - new.ssi_addr = (long) kinfo->si_addr; - new.ssi_perf_type = kinfo->si_perf_type; - new.ssi_perf_data = kinfo->si_perf_data; - break; case SIL_CHLD: new.ssi_pid = kinfo->si_pid; new.ssi_uid = kinfo->si_uid; diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h index e78dddf433fc..83429a05b698 100644 --- a/include/uapi/linux/signalfd.h +++ b/include/uapi/linux/signalfd.h @@ -39,8 +39,6 @@ struct signalfd_siginfo { __s32 ssi_syscall; __u64 ssi_call_addr; __u32 ssi_arch; - __u32 ssi_perf_type; - __u64 ssi_perf_data; /* * Pad strcture to 128 bytes. Remember to update the @@ -51,7 +49,7 @@ struct signalfd_siginfo { * comes out of a read(2) and we really don't want to have * a compat on read(2). */ - __u8 __pad[16]; + __u8 __pad[28]; }; From 3410fbcd47dc6479af4309febf760ccaa5efb472 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 12 May 2021 13:52:27 +0300 Subject: [PATCH 342/730] {net, RDMA}/mlx5: Fix override of log_max_qp by other device mlx5_core_dev holds pointer to static profile, hence when the log_max_qp of the profile is override by some device, then it effect all other mlx5 devices that share the same profile. Fix it by having a profile instance for every mlx5 device. Fixes: 883371c453b9 ("net/mlx5: Check FW limitations on log_max_qp before setting it") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/mr.c | 4 +- .../net/ethernet/mellanox/mlx5/core/main.c | 11 +++-- include/linux/mlx5/driver.h | 44 +++++++++---------- 3 files changed, 29 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4388afeff251..9662cd39c7ff 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -743,10 +743,10 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) / MLX5_IB_UMR_OCTOWORD; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; - if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && + if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && !dev->is_rep && mlx5_core_is_pf(dev->mdev) && mlx5_ib_can_load_pas_with_umr(dev, 0)) - ent->limit = dev->mdev->profile->mr_cache[i].limit; + ent->limit = dev->mdev->profile.mr_cache[i].limit; else ent->limit = 0; spin_lock_irq(&ent->lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c114365eb126..a1d67bd7fb43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -503,7 +503,7 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) { - struct mlx5_profile *prof = dev->profile; + struct mlx5_profile *prof = &dev->profile; void *set_hca_cap; int err; @@ -524,11 +524,11 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) to_fw_pkey_sz(dev, 128)); /* Check log_max_qp from HCA caps to set in current profile */ - if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) { + if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", - profile[prof_sel].log_max_qp, + prof->log_max_qp, MLX5_CAP_GEN_MAX(dev, log_max_qp)); - profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); } if (prof->mask & MLX5_PROF_MASK_QP_SIZE) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, @@ -1381,8 +1381,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) struct mlx5_priv *priv = &dev->priv; int err; - dev->profile = &profile[profile_idx]; - + memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); mutex_init(&dev->intf_state_mutex); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f8e8d7e90616..020a8f7fdbdd 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -703,6 +703,27 @@ struct mlx5_hv_vhca; #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) +enum { + MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, + MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, +}; + +enum { + MR_CACHE_LAST_STD_ENTRY = 20, + MLX5_IMR_MTT_CACHE_ENTRY, + MLX5_IMR_KSM_CACHE_ENTRY, + MAX_MR_CACHE_ENTRIES +}; + +struct mlx5_profile { + u64 mask; + u8 log_max_qp; + struct { + int size; + int limit; + } mr_cache[MAX_MR_CACHE_ENTRIES]; +}; + struct mlx5_core_dev { struct device *device; enum mlx5_coredev_type coredev_type; @@ -731,7 +752,7 @@ struct mlx5_core_dev { struct mutex intf_state_mutex; unsigned long intf_state; struct mlx5_priv priv; - struct mlx5_profile *profile; + struct mlx5_profile profile; u32 issi; struct mlx5e_resources mlx5e_res; struct mlx5_dm *dm; @@ -1083,18 +1104,6 @@ static inline u8 mlx5_mkey_variant(u32 mkey) return mkey & 0xff; } -enum { - MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, - MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, -}; - -enum { - MR_CACHE_LAST_STD_ENTRY = 20, - MLX5_IMR_MTT_CACHE_ENTRY, - MLX5_IMR_KSM_CACHE_ENTRY, - MAX_MR_CACHE_ENTRIES -}; - /* Async-atomic event notifier used by mlx5 core to forward FW * evetns recived from event queue to mlx5 consumers. * Optimise event queue dipatching. @@ -1148,15 +1157,6 @@ int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev, struct ib_device *device, struct rdma_netdev_alloc_params *params); -struct mlx5_profile { - u64 mask; - u8 log_max_qp; - struct { - int size; - int limit; - } mr_cache[MAX_MR_CACHE_ENTRIES]; -}; - enum { MLX5_PCI_DEV_IS_VF = 1 << 0, }; From dca59f4a791960ec73fa15803faa0abe0f92ece2 Mon Sep 17 00:00:00 2001 From: Dima Chumak Date: Mon, 26 Apr 2021 15:16:26 +0300 Subject: [PATCH 343/730] net/mlx5e: Fix nullptr in add_vlan_push_action() The result of dev_get_by_index_rcu() is not checked for NULL and then gets dereferenced immediately. Also, the RCU lock must be held by the caller of dev_get_by_index_rcu(), which isn't satisfied by the call stack. Fix by handling nullptr return value when iflink device is not found. Add RCU locking around dev_get_by_index_rcu() to avoid possible adverse effects while iterating over the net_device's hlist. It is safe not to increment reference count of the net_device pointer in case of a successful lookup, because it's already handled by VLAN code during VLAN device registration (see register_vlan_dev and netdev_upper_dev_link). Fixes: 278748a95aa3 ("net/mlx5e: Offload TC e-switch rules with egress VLAN device") Addresses-Coverity: ("Dereference null return value") Signed-off-by: Dima Chumak Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 47a9c49b25fd..46945d04b5b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3526,8 +3526,12 @@ static int add_vlan_push_action(struct mlx5e_priv *priv, if (err) return err; - *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), - dev_get_iflink(vlan_dev)); + rcu_read_lock(); + *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev)); + rcu_read_unlock(); + if (!*out_dev) + return -ENODEV; + if (is_vlan_dev(*out_dev)) err = add_vlan_push_action(priv, attr, out_dev, action); From 442b3d7b671bcb779ebdad46edd08051eb8b28d9 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Fri, 30 Apr 2021 06:58:29 +0000 Subject: [PATCH 344/730] net/mlx5: Set reformat action when needed for termination rules For remote mirroring, after the tunnel packets are received, they are decapsulated and sent to representor, then re-encapsulated and sent out over another tunnel. So reformat action is set only when the destination is required to do encapsulation. Fixes: 249ccc3c95bd ("net/mlx5e: Add support for offloading traffic from uplink to uplink") Signed-off-by: Jianbo Liu Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- .../mlx5/core/eswitch_offloads_termtbl.c | 31 ++++++------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index a81ece94f599..e3e7fdd396ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -172,19 +172,6 @@ mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, } } -static bool mlx5_eswitch_termtbl_is_encap_reformat(struct mlx5_pkt_reformat *rt) -{ - switch (rt->reformat_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: - case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: - case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: - return true; - default: - return false; - } -} - static void mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, struct mlx5_flow_act *dst) @@ -202,14 +189,6 @@ mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, memset(&src->vlan[1], 0, sizeof(src->vlan[1])); } } - - if (src->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT && - mlx5_eswitch_termtbl_is_encap_reformat(src->pkt_reformat)) { - src->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - dst->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - dst->pkt_reformat = src->pkt_reformat; - src->pkt_reformat = NULL; - } } static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, @@ -279,6 +258,14 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) continue; + if (attr->dests[num_vport_dests].flags & MLX5_ESW_DEST_ENCAP) { + term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + term_tbl_act.pkt_reformat = attr->dests[num_vport_dests].pkt_reformat; + } else { + term_tbl_act.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + term_tbl_act.pkt_reformat = NULL; + } + /* get the terminating table for the action list */ tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, &dest[i], attr); @@ -301,6 +288,8 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, goto revert_changes; /* create the FTE */ + flow_act->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = NULL; rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); if (IS_ERR(rule)) goto revert_changes; From fca086617af864efd20289774901221b2df06b39 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 13 May 2021 15:00:53 +0300 Subject: [PATCH 345/730] net/mlx5: Fix err prints and return when creating termination table Fix print to print correct error code and not using IS_ERR() which will just result in always printing 1. Also return real err instead of always -EOPNOTSUPP. Fixes: 10caabdaad5a ("net/mlx5e: Use termination table for VLAN push actions") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../mlx5/core/eswitch_offloads_termtbl.c | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index e3e7fdd396ad..d61bee2d35fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -65,7 +65,7 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_namespace *root_ns; - int err; + int err, err2; root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); if (!root_ns) { @@ -83,26 +83,26 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, ft_attr.autogroup.max_num_groups = 1; tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); if (IS_ERR(tt->termtbl)) { - esw_warn(dev, "Failed to create termination table (error %d)\n", - IS_ERR(tt->termtbl)); - return -EOPNOTSUPP; + err = PTR_ERR(tt->termtbl); + esw_warn(dev, "Failed to create termination table, err %pe\n", tt->termtbl); + return err; } tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act, &tt->dest, 1); if (IS_ERR(tt->rule)) { - esw_warn(dev, "Failed to create termination table rule (error %d)\n", - IS_ERR(tt->rule)); + err = PTR_ERR(tt->rule); + esw_warn(dev, "Failed to create termination table rule, err %pe\n", tt->rule); goto add_flow_err; } return 0; add_flow_err: - err = mlx5_destroy_flow_table(tt->termtbl); - if (err) - esw_warn(dev, "Failed to destroy termination table\n"); + err2 = mlx5_destroy_flow_table(tt->termtbl); + if (err2) + esw_warn(dev, "Failed to destroy termination table, err %d\n", err2); - return -EOPNOTSUPP; + return err; } static struct mlx5_termtbl_handle * @@ -270,8 +270,7 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, &dest[i], attr); if (IS_ERR(tt)) { - esw_warn(esw->dev, "Failed to get termination table (error %d)\n", - IS_ERR(tt)); + esw_warn(esw->dev, "Failed to get termination table, err %pe\n", tt); goto revert_changes; } attr->dests[num_vport_dests].termtbl = tt; From 82041634d96e87b41c600a673f10150d9f21f742 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 7 May 2021 10:08:47 +0300 Subject: [PATCH 346/730] net/mlx5: SF, Fix show state inactive when its inactivated When a SF is inactivated and when it is in a TEARDOWN_REQUEST state, driver still returns its state as active. This is incorrect. Fix it by treating TEARDOWN_REQEUST as inactive state. When a SF is still attached to the driver, on user request to reactivate EINVAL error is returned. Inform user about it with better code EBUSY and informative error message. Fixes: 6a3273217469 ("net/mlx5: SF, Port function state change support") Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/sf/devlink.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index a8e73c9ed1ea..1be048769309 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -136,10 +136,10 @@ static enum devlink_port_fn_state mlx5_sf_to_devlink_state(u8 hw_state) switch (hw_state) { case MLX5_VHCA_STATE_ACTIVE: case MLX5_VHCA_STATE_IN_USE: - case MLX5_VHCA_STATE_TEARDOWN_REQUEST: return DEVLINK_PORT_FN_STATE_ACTIVE; case MLX5_VHCA_STATE_INVALID: case MLX5_VHCA_STATE_ALLOCATED: + case MLX5_VHCA_STATE_TEARDOWN_REQUEST: default: return DEVLINK_PORT_FN_STATE_INACTIVE; } @@ -192,14 +192,17 @@ sf_err: return err; } -static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf) +static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf, + struct netlink_ext_ack *extack) { int err; if (mlx5_sf_is_active(sf)) return 0; - if (sf->hw_state != MLX5_VHCA_STATE_ALLOCATED) - return -EINVAL; + if (sf->hw_state != MLX5_VHCA_STATE_ALLOCATED) { + NL_SET_ERR_MSG_MOD(extack, "SF is inactivated but it is still attached"); + return -EBUSY; + } err = mlx5_cmd_sf_enable_hca(dev, sf->hw_fn_id); if (err) @@ -226,7 +229,8 @@ static int mlx5_sf_deactivate(struct mlx5_core_dev *dev, struct mlx5_sf *sf) static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, struct mlx5_sf *sf, - enum devlink_port_fn_state state) + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack) { int err = 0; @@ -234,7 +238,7 @@ static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf_table *ta if (state == mlx5_sf_to_devlink_state(sf->hw_state)) goto out; if (state == DEVLINK_PORT_FN_STATE_ACTIVE) - err = mlx5_sf_activate(dev, sf); + err = mlx5_sf_activate(dev, sf, extack); else if (state == DEVLINK_PORT_FN_STATE_INACTIVE) err = mlx5_sf_deactivate(dev, sf); else @@ -265,7 +269,7 @@ int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_po goto out; } - err = mlx5_sf_state_set(dev, table, sf, state); + err = mlx5_sf_state_set(dev, table, sf, state, extack); out: mlx5_sf_table_put(table); return err; From fe7738eb3ca3631a75844e790f6cb576c0fe7b00 Mon Sep 17 00:00:00 2001 From: Dima Chumak Date: Mon, 26 Apr 2021 15:16:26 +0300 Subject: [PATCH 347/730] net/mlx5e: Fix nullptr in mlx5e_tc_add_fdb_flow() The result of __dev_get_by_index() is not checked for NULL, which then passed to mlx5e_attach_encap() and gets dereferenced. Also, in case of a successful lookup, the net_device reference count is not incremented, which may result in net_device pointer becoming invalid at any time during mlx5e_attach_encap() execution. Fix by using dev_get_by_index(), which does proper reference counting on the net_device pointer. Also, handle nullptr return value when mirred device is not found. It's safe to call dev_put() on the mirred net_device pointer, right after mlx5e_attach_encap() call, because it's not being saved/copied down the call chain. Fixes: 3c37745ec614 ("net/mlx5e: Properly deal with encap flows add/del under neigh update") Addresses-Coverity: ("Dereference null return value") Signed-off-by: Dima Chumak Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 46945d04b5b8..882bafba43f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1322,10 +1322,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *out_dev, *encap_dev = NULL; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; bool vf_tun = false, encap_valid = true; + struct net_device *encap_dev = NULL; struct mlx5_esw_flow_attr *esw_attr; struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; @@ -1371,16 +1371,22 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, esw_attr = attr->esw_attr; for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + struct net_device *out_dev; int mirred_ifindex; if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) continue; mirred_ifindex = parse_attr->mirred_ifindex[out_index]; - out_dev = __dev_get_by_index(dev_net(priv->netdev), - mirred_ifindex); + out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); + if (!out_dev) { + NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); + err = -ENODEV; + goto err_out; + } err = mlx5e_attach_encap(priv, flow, out_dev, out_index, extack, &encap_dev, &encap_valid); + dev_put(out_dev); if (err) goto err_out; From 83026d83186bc48bb41ee4872f339b83f31dfc55 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 3 May 2021 18:01:02 +0300 Subject: [PATCH 348/730] net/mlx5e: Fix null deref accessing lag dev It could be the lag dev is null so stop processing the event. In bond_enslave() the active/backup slave being set before setting the upper dev so first event is without an upper dev. After setting the upper dev with bond_master_upper_dev_link() there is a second event and in that event we have an upper dev. Fixes: 7e51891a237f ("net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c index 95f2b26a3ee3..9c076aa20306 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -223,6 +223,8 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt rpriv = priv->ppriv; fwd_vport_num = rpriv->rep->vport; lag_dev = netdev_master_upper_dev_get(netdev); + if (!lag_dev) + return; netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n", lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev)); From eb96cc15926f4ddde3a28c42feeffdf002451c24 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 2 May 2021 10:25:50 +0300 Subject: [PATCH 349/730] net/mlx5e: Make sure fib dev exists in fib event For unreachable route entry the fib dev does not exists. Fixes: 8914add2c9e5 ("net/mlx5e: Handle FIB events to update tunnel endpoint device") Reported-by: Dennis Afanasev Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 593503bc4d07..f1fb11680d20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1505,7 +1505,7 @@ mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, fen_info = container_of(info, struct fib_entry_notifier_info, info); fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; - if (fib_dev->netdev_ops != &mlx5e_netdev_ops || + if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || fen_info->dst_len != 32) return NULL; From 77ecd10d0a8aaa6e4871d8c63626e4c9fc5e47db Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 25 Feb 2021 11:20:00 -0800 Subject: [PATCH 350/730] net/mlx5e: reset XPS on error flow if netdev isn't registered yet mlx5e_attach_netdev can be called prior to registering the netdevice: Example stack: ipoib_new_child_link -> ipoib_intf_init-> rdma_init_netdev-> mlx5_rdma_setup_rn-> mlx5e_attach_netdev-> mlx5e_num_channels_changed -> mlx5e_set_default_xps_cpumasks -> netif_set_xps_queue -> __netif_set_xps_queue -> kmalloc If any later stage fails at any point after mlx5e_num_channels_changed() returns, XPS allocated maps will never be freed as they are only freed during netdev unregistration, which will never happen for yet to be registered netdevs. Fixes: 3909a12e7913 ("net/mlx5e: Fix configuration of XPS cpumasks and netdev queues in corner cases") Signed-off-by: Saeed Mahameed Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index bca832cdc4cb..89937b055070 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5229,6 +5229,11 @@ static void mlx5e_update_features(struct net_device *netdev) rtnl_unlock(); } +static void mlx5e_reset_channels(struct net_device *netdev) +{ + netdev_reset_tc(netdev); +} + int mlx5e_attach_netdev(struct mlx5e_priv *priv) { const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED; @@ -5283,6 +5288,7 @@ err_cleanup_tx: profile->cleanup_tx(priv); out: + mlx5e_reset_channels(priv->netdev); set_bit(MLX5E_STATE_DESTROYING, &priv->state); cancel_work_sync(&priv->update_stats_work); return err; @@ -5300,6 +5306,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv) profile->cleanup_rx(priv); profile->cleanup_tx(priv); + mlx5e_reset_channels(priv->netdev); cancel_work_sync(&priv->update_stats_work); } From 97817fcc684ed01497bd19d0cd4dea699665b9cf Mon Sep 17 00:00:00 2001 From: Dima Chumak Date: Tue, 13 Apr 2021 22:43:08 +0300 Subject: [PATCH 351/730] net/mlx5e: Fix multipath lag activation When handling FIB_EVENT_ENTRY_REPLACE event for a new multipath route, lag activation can be missed if a stale (struct lag_mp)->mfi pointer exists, which was associated with an older multipath route that had been removed. Normally, when a route is removed, it triggers mlx5_lag_fib_event(), which handles FIB_EVENT_ENTRY_DEL and clears mfi pointer. But, if mlx5_lag_check_prereq() condition isn't met, for example when eswitch is in legacy mode, the fib event is skipped and mfi pointer becomes stale. Fix by resetting mfi pointer to NULL every time mlx5_lag_mp_init() is called. Fixes: 544fe7c2e654 ("net/mlx5e: Activate HW multipath and handle port affinity based on FIB events") Signed-off-by: Dima Chumak Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 2c41a6920264..fd6196b5e163 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -307,6 +307,11 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev) struct lag_mp *mp = &ldev->lag_mp; int err; + /* always clear mfi, as it might become stale when a route delete event + * has been missed + */ + mp->mfi = NULL; + if (mp->fib_nb.notifier_call) return 0; @@ -335,4 +340,5 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) unregister_fib_notifier(&init_net, &mp->fib_nb); destroy_workqueue(mp->wq); mp->fib_nb.notifier_call = NULL; + mp->mfi = NULL; } From 7d1a3d08c8a6398e7497a98cf3f7b73ea13d9939 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 20 Apr 2021 15:16:16 +0300 Subject: [PATCH 352/730] net/mlx5e: Reject mirroring on source port change encap rules Rules with MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE dest flag are translated to destination FT in eswitch. Currently it is not possible to mirror such rules because firmware doesn't support mixing FT and Vport destinations in single rule when one of them adds encapsulation. Since the only use case for MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE destination is support for tunnel endpoints on VF and trying to offload such rule with mirror action causes either crash in fs_core or firmware error with syndrome 0xff6a1d, reject all such rules in mlx5 TC layer. Fixes: 10742efc20a4 ("net/mlx5e: VF tunnel TX traffic offloading") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 882bafba43f2..bccdb43a880b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1399,6 +1399,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, esw_attr->dests[out_index].mdev = out_priv->mdev; } + if (vf_tun && esw_attr->out_count > 1) { + NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); + err = -EOPNOTSUPP; + goto err_out; + } + err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) goto err_out; From 5e7923acbd86d0ff29269688d8a9c47ad091dd46 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 21 Apr 2021 14:26:31 +0300 Subject: [PATCH 353/730] net/mlx5e: Fix error path of updating netdev queues Avoid division by zero in the error flow. In the driver TC number can be either 1 or 8. When TC count is set to 1, driver zero netdev->num_tc. Hence, need to convert it back from 0 to 1 in the error flow. Fixes: fa3748775b92 ("net/mlx5e: Handle errors from netif_set_real_num_{tx,rx}_queues") Signed-off-by: Aya Levin Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 89937b055070..d1b9a4040d60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2697,7 +2697,7 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) int err; old_num_txqs = netdev->real_num_tx_queues; - old_ntc = netdev->num_tc; + old_ntc = netdev->num_tc ? : 1; nch = priv->channels.params.num_channels; ntc = priv->channels.params.num_tc; From 7c9f131f366ab414691907fa0407124ea2b2f3bc Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 22 Apr 2021 15:48:10 +0300 Subject: [PATCH 354/730] {net,vdpa}/mlx5: Configure interface MAC into mpfs L2 table net/mlx5: Expose MPFS configuration API MPFS is the multi physical function switch that bridges traffic between the physical port and any physical functions associated with it. The driver is required to add or remove MAC entries to properly forward incoming traffic to the correct physical function. We export the API to control MPFS so that other drivers, such as mlx5_vdpa are able to add MAC addresses of their network interfaces. The MAC address of the vdpa interface must be configured into the MPFS L2 address. Failing to do so could cause, in some NIC configurations, failure to forward packets to the vdpa network device instance. Fix this by adding calls to update the MPFS table. CC: CC: CC: Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_fs.c | 1 + .../net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + .../ethernet/mellanox/mlx5/core/lib/mpfs.c | 3 +++ .../ethernet/mellanox/mlx5/core/lib/mpfs.h | 5 +---- drivers/vdpa/mlx5/net/mlx5_vnet.c | 19 ++++++++++++++++++- include/linux/mlx5/mpfs.h | 18 ++++++++++++++++++ 6 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 include/linux/mlx5/mpfs.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 0d571a0c76d9..0b75fab41ae8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "en.h" #include "en_rep.h" #include "lib/mpfs.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 570f2280823c..b88705a3a1a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "esw/acl/lgcy.h" #include "esw/legacy.h" #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index fd8449ff9e17..839a01da110f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "mlx5_core.h" #include "lib/mpfs.h" @@ -175,6 +176,7 @@ out: mutex_unlock(&mpfs->lock); return err; } +EXPORT_SYMBOL(mlx5_mpfs_add_mac); int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { @@ -206,3 +208,4 @@ unlock: mutex_unlock(&mpfs->lock); return err; } +EXPORT_SYMBOL(mlx5_mpfs_del_mac); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h index 4a7b2c3203a7..4a293542a7aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h @@ -84,12 +84,9 @@ struct l2addr_node { #ifdef CONFIG_MLX5_MPFS int mlx5_mpfs_init(struct mlx5_core_dev *dev); void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev); -int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac); -int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac); #else /* #ifndef CONFIG_MLX5_MPFS */ static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {} -static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } -static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } #endif + #endif diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 189e4385df40..dda5dc6f7737 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "mlx5_vdpa.h" MODULE_AUTHOR("Eli Cohen "); @@ -1859,11 +1860,16 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb static void mlx5_vdpa_free(struct vdpa_device *vdev) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_core_dev *pfmdev; struct mlx5_vdpa_net *ndev; ndev = to_mlx5_vdpa_ndev(mvdev); free_resources(ndev); + if (!is_zero_ether_addr(ndev->config.mac)) { + pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); + mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); + } mlx5_vdpa_free_resources(&ndev->mvdev); mutex_destroy(&ndev->reslock); } @@ -1990,6 +1996,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) { struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); struct virtio_net_config *config; + struct mlx5_core_dev *pfmdev; struct mlx5_vdpa_dev *mvdev; struct mlx5_vdpa_net *ndev; struct mlx5_core_dev *mdev; @@ -2023,10 +2030,17 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) if (err) goto err_mtu; + if (!is_zero_ether_addr(config->mac)) { + pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); + err = mlx5_mpfs_add_mac(pfmdev, config->mac); + if (err) + goto err_mtu; + } + mvdev->vdev.dma_dev = mdev->device; err = mlx5_vdpa_alloc_resources(&ndev->mvdev); if (err) - goto err_mtu; + goto err_mpfs; err = alloc_resources(ndev); if (err) @@ -2044,6 +2058,9 @@ err_reg: free_resources(ndev); err_res: mlx5_vdpa_free_resources(&ndev->mvdev); +err_mpfs: + if (!is_zero_ether_addr(config->mac)) + mlx5_mpfs_del_mac(pfmdev, config->mac); err_mtu: mutex_destroy(&ndev->reslock); put_device(&mvdev->vdev.dev); diff --git a/include/linux/mlx5/mpfs.h b/include/linux/mlx5/mpfs.h new file mode 100644 index 000000000000..bf700c8d5516 --- /dev/null +++ b/include/linux/mlx5/mpfs.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2021 Mellanox Technologies Ltd. + */ + +#ifndef _MLX5_MPFS_ +#define _MLX5_MPFS_ + +struct mlx5_core_dev; + +#ifdef CONFIG_MLX5_MPFS +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac); +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac); +#else /* #ifndef CONFIG_MLX5_MPFS */ +static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +#endif + +#endif From 75e8564e919f369cafb3d2b8fd11ec5af7b37416 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 25 Apr 2021 13:28:10 +0300 Subject: [PATCH 355/730] net/mlx5: Don't overwrite HCA capabilities when setting MSI-X count During driver probe of device that has dynamic MSI-X feature enabled, the following error is printed in some FW flavour (not released yet). mlx5_core 0000:06:00.0: firmware version: 4.7.4387 mlx5_core 0000:06:00.0: 126.016 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x16 link) mlx5_core 0000:06:00.0: mlx5_cmd_check:777:(pid 70599): SET_HCA_CAP(0x109) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x0) mlx5_core 0000:06:00.0: set_hca_cap:622:(pid 70599): handle_hca_cap failed mlx5_core 0000:06:00.0: mlx5_function_setup:1045:(pid 70599): set_hca_cap failed mlx5_core 0000:06:00.0: probe_one:1465:(pid 70599): mlx5_init_one failed with error code -22 mlx5_core: probe of 0000:06:00.0 failed with error -22 In order to make the setting capability of MSI-X future proof, let's query the current capabilities first. Fixes: 604774add516 ("net/mlx5: Dynamically assign MSI-X vectors count") Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 1f907df5b3a2..c3373fb1cd7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -95,9 +95,10 @@ int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs) int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, int msix_vec_count) { - int sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *hca_cap = NULL, *query_cap = NULL, *cap; int num_vf_msix, min_msix, max_msix; - void *hca_cap, *cap; int ret; num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); @@ -116,11 +117,20 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, if (msix_vec_count > max_msix) return -EOVERFLOW; - hca_cap = kzalloc(sz, GFP_KERNEL); - if (!hca_cap) - return -ENOMEM; + query_cap = kzalloc(query_sz, GFP_KERNEL); + hca_cap = kzalloc(set_sz, GFP_KERNEL); + if (!hca_cap || !query_cap) { + ret = -ENOMEM; + goto out; + } + + ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap); + if (ret) + goto out; cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); + memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), + MLX5_UN_SZ_BYTES(hca_cap_union)); MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count); MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); @@ -130,7 +140,9 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, MLX5_SET(set_hca_cap_in, hca_cap, op_mod, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); +out: kfree(hca_cap); + kfree(query_cap); return ret; } From 6ff51ab8aa8fcbcddeeefce8ca705b575805d12b Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Wed, 31 Mar 2021 10:09:02 +0300 Subject: [PATCH 356/730] net/mlx5: Set term table as an unmanaged flow table Termination tables are restricted to have the default miss action and cannot be set to forward to another table in case of a miss. If the fs prio of the termination table is not the last one in the list, fs_core will attempt to attach it to another table. Set the unmanaged ft flag when creating the termination table ft and select the tc offload prio for it to prevent fs_core from selecting the forwarding to next ft miss action and use the default one. In addition, set the flow that forwards to the termination table to ignore ft level restrictions since the ft level is not set by fs_core for unamanged fts. Fixes: 249ccc3c95bd ("net/mlx5e: Add support for offloading traffic from uplink to uplink") Signed-off-by: Ariel Levkovich --- .../ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index d61bee2d35fe..b45954905845 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -76,10 +76,11 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, /* As this is the terminating action then the termination table is the * same prio as the slow path */ - ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | + ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED | MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - ft_attr.prio = FDB_SLOW_PATH; + ft_attr.prio = FDB_TC_OFFLOAD; ft_attr.max_fte = 1; + ft_attr.level = 1; ft_attr.autogroup.max_num_groups = 1; tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); if (IS_ERR(tt->termtbl)) { @@ -217,6 +218,7 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, int i; if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) || attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH || !mlx5_eswitch_offload_is_uplink_port(esw, spec)) return false; @@ -289,6 +291,7 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, /* create the FTE */ flow_act->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; flow_act->pkt_reformat = NULL; + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); if (IS_ERR(rule)) goto revert_changes; From e63052a5dd3ce7979bff727a8f4bb6d6b3d1317b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 5 May 2021 13:20:26 -0700 Subject: [PATCH 357/730] mlx5e: add add missing BH locking around napi_schdule() It's not correct to call napi_schedule() in pure process context. Because we use __raise_softirq_irqoff() we require callers to be in a context which will eventually lead to softirq handling (hardirq, bh disabled, etc.). With code as is users will see: NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #08!!! Fixes: a8dd7ac12fc3 ("net/mlx5e: Generalize RQ activation") Signed-off-by: Jakub Kicinski Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d1b9a4040d60..ad0f69480b9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -889,10 +889,13 @@ err_free_rq: void mlx5e_activate_rq(struct mlx5e_rq *rq) { set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - if (rq->icosq) + if (rq->icosq) { mlx5e_trigger_irq(rq->icosq); - else + } else { + local_bh_disable(); napi_schedule(rq->cq.napi); + local_bh_enable(); + } } void mlx5e_deactivate_rq(struct mlx5e_rq *rq) From fec356a61aa3d3a66416b4321f1279e09e0f256f Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Wed, 19 May 2021 13:01:09 +0800 Subject: [PATCH 358/730] nvmet: fix memory leak in nvmet_alloc_ctrl() When creating ctrl in nvmet_alloc_ctrl(), if the cntlid_min is larger than cntlid_max of the subsystem, and jumps to the "out_free_changed_ns_list" label, but the ctrl->sqs lack of be freed. Fix this by jumping to the "out_free_sqs" label. Fixes: 94a39d61f80f ("nvmet: make ctrl-id configurable") Signed-off-by: Wu Bo Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 25cc2ee8de3f..1853db38b682 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1372,7 +1372,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, goto out_free_changed_ns_list; if (subsys->cntlid_min > subsys->cntlid_max) - goto out_free_changed_ns_list; + goto out_free_sqs; ret = ida_simple_get(&cntlid_ida, subsys->cntlid_min, subsys->cntlid_max, From 03504e3b54cc8118cc26c064e60a0b00c2308708 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Wed, 19 May 2021 13:01:10 +0800 Subject: [PATCH 359/730] nvme-loop: fix memory leak in nvme_loop_create_ctrl() When creating loop ctrl in nvme_loop_create_ctrl(), if nvme_init_ctrl() fails, the loop ctrl should be freed before jumping to the "out" label. Fixes: 3a85a5de29ea ("nvme-loop: add a NVMe loopback host driver") Signed-off-by: Wu Bo Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 74b3b150e1a5..cb30cb942e1d 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -590,8 +590,10 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops, 0 /* no quirks, we're perfect! */); - if (ret) + if (ret) { + kfree(ctrl); goto out; + } if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) WARN_ON_ONCE(1); From 825619b09ad351894d2c6fb6705f5b3711d145c7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 17 May 2021 14:07:45 -0700 Subject: [PATCH 360/730] nvme-tcp: fix possible use-after-completion Commit db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") added a second context that may perform a network send. This means that now RX and TX are not serialized in nvme_tcp_io_work and can run concurrently. While there is correct mutual exclusion in the TX path (where the send_mutex protect the queue socket send activity) RX activity, and more specifically request completion may run concurrently. This means we must guarantee that any mutation of the request state related to its lifetime, bytes sent must not be accessed when a completion may have possibly arrived back (and processed). The race may trigger when a request completion arrives, processed _and_ reused as a fresh new request, exactly in the (relatively short) window between the last data payload sent and before the request iov_iter is advanced. Consider the following race: 1. 16K write request is queued 2. The nvme command and the data is sent to the controller (in-capsule or solicited by r2t) 3. After the last payload is sent but before the req.iter is advanced, the controller sends back a completion. 4. The completion is processed, the request is completed, and reused to transfer a new request (write or read) 5. The new request is queued, and the driver reset the request parameters (nvme_tcp_setup_cmd_pdu). 6. Now context in (2) resumes execution and advances the req.iter ==> use-after-completion as this is already a new request. Fix this by making sure the request is not advanced after the last data payload send, knowing that a completion may have arrived already. An alternative solution would have been to delay the request completion or state change waiting for reference counting on the TX path, but besides adding atomic operations to the hot-path, it may present challenges in multi-stage R2T scenarios where a r2t handler needs to be deferred to an async execution. Reported-by: Narayan Ayalasomayajula Tested-by: Anil Mishra Reviewed-by: Keith Busch Cc: stable@vger.kernel.org # v5.8+ Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 0222e23f5936..b97d2732a80f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -943,7 +943,6 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) if (ret <= 0) return ret; - nvme_tcp_advance_req(req, ret); if (queue->data_digest) nvme_tcp_ddgst_update(queue->snd_hash, page, offset, ret); @@ -960,6 +959,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) } return 1; } + nvme_tcp_advance_req(req, ret); } return -EAGAIN; } From a0fdd1418007f83565d3f2e04b47923ba93a9b8c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 17 May 2021 15:36:43 -0700 Subject: [PATCH 361/730] nvme-tcp: rerun io_work if req_list is not empty A possible race condition exists where the request to send data is enqueued from nvme_tcp_handle_r2t()'s will not be observed by nvme_tcp_send_all() if it happens to be running. The driver relies on io_work to send the enqueued request when it is runs again, but the concurrently running nvme_tcp_send_all() may not have released the send_mutex at that time. If no future commands are enqueued to re-kick the io_work, the request will timeout in the SEND_H2C state, resulting in a timeout error like: nvme nvme0: queue 1: timeout request 0x3 type 6 Ensure the io_work continues to run as long as the req_list is not empty. Fixes: db5ad6b7f8cdd ("nvme-tcp: try to send request in queue_rq context") Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b97d2732a80f..34f4b3402f7c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1140,7 +1140,8 @@ static void nvme_tcp_io_work(struct work_struct *w) pending = true; else if (unlikely(result < 0)) break; - } + } else + pending = !llist_empty(&queue->req_list); result = nvme_tcp_try_recv(queue); if (result > 0) From a7d139145a6640172516b193abf6d2398620aa14 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 10 May 2021 21:56:35 -0700 Subject: [PATCH 362/730] nvme-fc: clear q_live at beginning of association teardown The __nvmf_check_ready() routine used to bounce all filesystem io if the controller state isn't LIVE. However, a later patch changed the logic so that it rejection ends up being based on the Q live check. The FC transport has a slightly different sequence from rdma and tcp for shutting down queues/marking them non-live. FC marks its queue non-live after aborting all ios and waiting for their termination, leaving a rather large window for filesystem io to continue to hit the transport. Unfortunately this resulted in filesystem I/O or applications seeing I/O errors. Change the FC transport to mark the queues non-live at the first sign of teardown for the association (when I/O is initially terminated). Fixes: 73a5379937ec ("nvme-fabrics: allow to queue requests for live queues") Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index d9ab9e7871d0..256e87721a01 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2461,6 +2461,18 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) static void __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) { + int q; + + /* + * if aborting io, the queues are no longer good, mark them + * all as not live. + */ + if (ctrl->ctrl.queue_count > 1) { + for (q = 1; q < ctrl->ctrl.queue_count; q++) + clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[q].flags); + } + clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags); + /* * If io queues are present, stop them and terminate all outstanding * ios on them. As FC allocates FC exchange for each io, the From 036867e93ebf4d7e70eba6a8c72db74ee3760bc3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 17 May 2021 09:46:40 +0100 Subject: [PATCH 363/730] drm/i915/gem: Pin the L-shape quirked object as unshrinkable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When instantiating a tiled object on an L-shaped memory machine, we mark the object as unshrinkable to prevent the shrinker from trying to swap out the pages. We have to do this as we do not know the swizzling on the individual pages, and so the data will be scrambled across swap out/in. Not only do we need to move the object off the shrinker list, we need to mark the object with shrink_pin so that the counter is consistent across calls to madvise. v2: in the madvise ioctl we need to check if the object is currently shrinkable/purgeable, not if the object type supports shrinking Fixes: 0175969e489a ("drm/i915/gem: Use shrinkable status for unknown swizzle quirks") References: https://gitlab.freedesktop.org/drm/intel/-/issues/3293 References: https://gitlab.freedesktop.org/drm/intel/-/issues/3450 Reported-by: Ville Syrjälä Tested-by: Ville Syrjälä Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Cc: # v5.12+ Link: https://patchwork.freedesktop.org/patch/msgid/20210517084640.18862-1-matthew.auld@intel.com (cherry picked from commit 8777d17b68dcfbfbd4d524f444adefae56f41225) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 11 +++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index aed8a37ccdc9..7361971c177d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -63,6 +63,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj)); i915_gem_object_set_tiling_quirk(obj); + GEM_BUG_ON(!list_empty(&obj->mm.link)); + atomic_inc(&obj->mm.shrink_pin); shrinkable = false; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b23f58e94cfb..b3cedd20f365 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -999,12 +999,11 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, obj->mm.madv = args->madv; if (i915_gem_object_has_pages(obj)) { - struct list_head *list; + unsigned long flags; - if (i915_gem_object_is_shrinkable(obj)) { - unsigned long flags; - - spin_lock_irqsave(&i915->mm.obj_lock, flags); + spin_lock_irqsave(&i915->mm.obj_lock, flags); + if (!list_empty(&obj->mm.link)) { + struct list_head *list; if (obj->mm.madv != I915_MADV_WILLNEED) list = &i915->mm.purge_list; @@ -1012,8 +1011,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, list = &i915->mm.shrink_list; list_move_tail(&obj->mm.link, list); - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } /* if the object is no longer attached, discard its backing storage */ From 1a590a1c8bf46bf80ea12b657ca44c345531ac80 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 19 May 2021 09:50:26 +0800 Subject: [PATCH 364/730] iommu/vt-d: Check for allocation failure in aux_detach_device() In current kernels small allocations never fail, but checking for allocation failure is the correct thing to do. Fixes: 18abda7a2d55 ("iommu/vt-d: Fix general protection fault in aux_detach_device()") Signed-off-by: Dan Carpenter Acked-by: Lu Baolu Link: https://lore.kernel.org/r/YJuobKuSn81dOPLd@mwanda Link: https://lore.kernel.org/r/20210519015027.108468-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 708f430af1c4..9a7b79b5af18 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4606,6 +4606,8 @@ static int auxiliary_link_device(struct dmar_domain *domain, if (!sinfo) { sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC); + if (!sinfo) + return -ENOMEM; sinfo->domain = domain; sinfo->pdev = dev; list_add(&sinfo->link_phys, &info->subdevices); From 54c80d907400189b09548039be8f3b6e297e8ae3 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 19 May 2021 09:50:27 +0800 Subject: [PATCH 365/730] iommu/vt-d: Use user privilege for RID2PASID translation When first-level page tables are used for IOVA translation, we use user privilege by setting U/S bit in the page table entry. This is to make it consistent with the second level translation, where the U/S enforcement is not available. Clear the SRE (Supervisor Request Enable) field in the pasid table entry of RID2PASID so that requests requesting the supervisor privilege are blocked and treated as DMA remapping faults. Fixes: b802d070a52a1 ("iommu/vt-d: Use iova over first level") Suggested-by: Jacob Pan Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210512064426.3440915-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210519015027.108468-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 7 +++++-- drivers/iommu/intel/pasid.c | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 9a7b79b5af18..be35284a2016 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2525,9 +2525,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu, struct device *dev, u32 pasid) { - int flags = PASID_FLAG_SUPERVISOR_MODE; struct dma_pte *pgd = domain->pgd; int agaw, level; + int flags = 0; /* * Skip top levels of page tables for iommu which has @@ -2543,7 +2543,10 @@ static int domain_setup_first_level(struct intel_iommu *iommu, if (level != 4 && level != 5) return -EINVAL; - flags |= (level == 5) ? PASID_FLAG_FL5LP : 0; + if (pasid != PASID_RID2PASID) + flags |= PASID_FLAG_SUPERVISOR_MODE; + if (level == 5) + flags |= PASID_FLAG_FL5LP; if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED) flags |= PASID_FLAG_PAGE_SNOOP; diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 72646bafc52f..72dc84821dad 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -699,7 +699,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, * Since it is a second level only translation setup, we should * set SRE bit as well (addresses are expected to be GPAs). */ - pasid_set_sre(pte); + if (pasid != PASID_RID2PASID) + pasid_set_sre(pte); pasid_set_present(pte); pasid_flush_caches(iommu, pte, pasid, did); From 023dfa9602f561952c0e19d74f66614a56d7e57a Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Mon, 26 Apr 2021 16:11:24 +0200 Subject: [PATCH 366/730] drm/i915/gt: Disable HiZ Raw Stall Optimization on broken gen7 When resetting CACHE_MODE registers, don't enable HiZ Raw Stall Optimization on Ivybridge GT1 and Baytrail, as it causes severe glitches when rendering any kind of 3D accelerated content. This optimization is disabled on these platforms by default according to official documentation from 01.org. Fixes: ef99a60ffd9b ("drm/i915/gt: Clear CACHE_MODE prior to clearing residuals") BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/3081 BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/3404 BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/3071 Reviewed-by: Manuel Bentele Signed-off-by: Simon Rettberg Reviewed-by: Dave Airlie Signed-off-by: Rodrigo Vivi [Rodrigo removed invalid Fixes line] Link: https://patchwork.freedesktop.org/patch/msgid/20210426161124.2b7fd708@dellnichtsogutkiste (cherry picked from commit 929b734ad34b717d6a1b8de97f53bb5616040147) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/gen7_renderclear.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index de575fdb033f..21f08e53889c 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -397,7 +397,10 @@ static void emit_batch(struct i915_vma * const vma, gen7_emit_pipeline_invalidate(&cmds); batch_add(&cmds, MI_LOAD_REGISTER_IMM(2)); batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7)); - batch_add(&cmds, 0xffff0000); + batch_add(&cmds, 0xffff0000 | + ((IS_IVB_GT1(i915) || IS_VALLEYVIEW(i915)) ? + HIZ_RAW_STALL_OPT_DISABLE : + 0)); batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1)); batch_add(&cmds, 0xffff0000 | PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); gen7_emit_pipeline_invalidate(&cmds); From 0024430e920f2900654ad83cd081cf52e02a3ef5 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 18 May 2021 12:01:06 -0700 Subject: [PATCH 367/730] x86/build: Fix location of '-plugin-opt=' flags Commit b33fff07e3e3 ("x86, build: allow LTO to be selected") added a couple of '-plugin-opt=' flags to KBUILD_LDFLAGS because the code model and stack alignment are not stored in LLVM bitcode. However, these flags were added to KBUILD_LDFLAGS prior to the emulation flag assignment, which uses ':=', so they were overwritten and never added to $(LD) invocations. The absence of these flags caused misalignment issues in the AMDGPU driver when compiling with CONFIG_LTO_CLANG, resulting in general protection faults. Shuffle the assignment below the initial one so that the flags are properly passed along and all of the linker flags stay together. At the same time, avoid any future issues with clobbering flags by changing the emulation flag assignment to '+=' since KBUILD_LDFLAGS is already defined with ':=' in the main Makefile before being exported for modification here as a result of commit: ce99d0bf312d ("kbuild: clear LDFLAGS in the top Makefile") Fixes: b33fff07e3e3 ("x86, build: allow LTO to be selected") Reported-by: Anthony Ruhier Signed-off-by: Nathan Chancellor Signed-off-by: Ingo Molnar Tested-by: Anthony Ruhier Cc: stable@vger.kernel.org Link: https://github.com/ClangBuiltLinux/linux/issues/1374 Link: https://lore.kernel.org/r/20210518190106.60935-1-nathan@kernel.org --- arch/x86/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index c77c5d8a7b3e..307529417021 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -178,11 +178,6 @@ ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,) endif -ifdef CONFIG_LTO_CLANG -KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \ - -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) -endif - # Workaround for a gcc prelease that unfortunately was shipped in a suse release KBUILD_CFLAGS += -Wno-sign-compare # @@ -202,7 +197,12 @@ ifdef CONFIG_RETPOLINE endif endif -KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE) +KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) + +ifdef CONFIG_LTO_CLANG +KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \ + -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) +endif ifdef CONFIG_X86_NEED_RELOCS LDFLAGS_vmlinux := --emit-relocs --discard-none From 8c08652614cb7468620a6328b37ca2965cd48283 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Tue, 18 May 2021 10:41:21 -0700 Subject: [PATCH 368/730] ASoC: SOF: Intel: hda: don't send DAI_CONFIG IPC for older firmware BE hw_params op was recently added for SSP type DAIs. But sending the DAI_CONFIG IPC during hw_params is not supported with older firmware. So add an ABI check to avoid sending the IPC if the firmware ABI is older than 3.18. Fixes: e12be9fbfb91 ('ASoC: SOF: Intel: HDA: add hw params callback for SSP DAIs') Tested-by: Yong Zhi Reviewed-by: Kai Vehmanen Signed-off-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20210518174121.151601-1-ranjani.sridharan@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-dai.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c index 8d7bab433fb3..c1f9f0f58464 100644 --- a/sound/soc/sof/intel/hda-dai.c +++ b/sound/soc/sof/intel/hda-dai.c @@ -421,11 +421,16 @@ static int ssp_dai_hw_params(struct snd_pcm_substream *substream, struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, SOF_AUDIO_PCM_DRV_NAME); struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component); + struct sof_ipc_fw_version *v = &sdev->fw_ready.version; struct sof_ipc_dai_config *config; struct snd_sof_dai *sof_dai; struct sof_ipc_reply reply; int ret; + /* DAI_CONFIG IPC during hw_params is not supported in older firmware */ + if (v->abi_version < SOF_ABI_VER(3, 18, 0)) + return 0; + list_for_each_entry(sof_dai, &sdev->dai_list, list) { if (!sof_dai->cpu_dai_name || !sof_dai->dai_config) continue; From 647e6cc979b0675499347ddbac55c83876a20cf9 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Wed, 5 May 2021 15:36:35 +0200 Subject: [PATCH 369/730] platform/surface: aggregator: Do not mark interrupt as shared Having both IRQF_NO_AUTOEN and IRQF_SHARED set causes request_threaded_irq() to return with -EINVAL (see comment in flag validation in that function). As the interrupt is currently not shared between multiple devices, drop the IRQF_SHARED flag. Fixes: 507cf5a2f1e2 ("platform/surface: aggregator: move to use request_irq by IRQF_NO_AUTOEN flag") Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20210505133635.1499703-1-luzmaximilian@gmail.com Signed-off-by: Hans de Goede --- drivers/platform/surface/aggregator/controller.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c index 69e86cd599d3..8a70df60142c 100644 --- a/drivers/platform/surface/aggregator/controller.c +++ b/drivers/platform/surface/aggregator/controller.c @@ -2483,8 +2483,7 @@ int ssam_irq_setup(struct ssam_controller *ctrl) * interrupt, and let the SAM resume callback during the controller * resume process clear it. */ - const int irqf = IRQF_SHARED | IRQF_ONESHOT | - IRQF_TRIGGER_RISING | IRQF_NO_AUTOEN; + const int irqf = IRQF_ONESHOT | IRQF_TRIGGER_RISING | IRQF_NO_AUTOEN; gpiod = gpiod_get(dev, "ssam_wakeup-int", GPIOD_ASIS); if (IS_ERR(gpiod)) From ba6e1d8422bd476ad79da409639a773c02f0cbad Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 May 2021 22:04:36 +0200 Subject: [PATCH 370/730] platform/surface: aggregator: avoid clang -Wconstant-conversion warning Clang complains about the assignment of SSAM_ANY_IID to ssam_device_uid->instance: drivers/platform/surface/surface_aggregator_registry.c:478:25: error: implicit conversion from 'int' to '__u8' (aka 'unsigned char') changes value from 65535 to 255 [-Werror,-Wconstant-conversion] { SSAM_VDEV(HUB, 0x02, SSAM_ANY_IID, 0x00) }, ~ ^~~~~~~~~~~~ include/linux/surface_aggregator/device.h:71:23: note: expanded from macro 'SSAM_ANY_IID' #define SSAM_ANY_IID 0xffff ^~~~~~ include/linux/surface_aggregator/device.h:126:63: note: expanded from macro 'SSAM_VDEV' SSAM_DEVICE(SSAM_DOMAIN_VIRTUAL, SSAM_VIRTUAL_TC_##cat, tid, iid, fun) ^~~ include/linux/surface_aggregator/device.h:102:41: note: expanded from macro 'SSAM_DEVICE' .instance = ((iid) != SSAM_ANY_IID) ? (iid) : 0, \ ^~~ The assignment doesn't actually happen, but clang checks the type limits before checking whether this assignment is reached. Replace the ?: operator with a __builtin_choose_expr() invocation that avoids the warning for the untaken part. Fixes: eb0e90a82098 ("platform/surface: aggregator: Add dedicated bus and device type") Cc: platform-driver-x86@vger.kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Reviewed-by: Maximilian Luz Link: https://lore.kernel.org/r/20210514200453.1542978-1-arnd@kernel.org Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/device.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index 4441ad667c3f..6ff9c58b3e17 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -98,9 +98,9 @@ struct ssam_device_uid { | (((fun) != SSAM_ANY_FUN) ? SSAM_MATCH_FUNCTION : 0), \ .domain = d, \ .category = cat, \ - .target = ((tid) != SSAM_ANY_TID) ? (tid) : 0, \ - .instance = ((iid) != SSAM_ANY_IID) ? (iid) : 0, \ - .function = ((fun) != SSAM_ANY_FUN) ? (fun) : 0 \ + .target = __builtin_choose_expr((tid) != SSAM_ANY_TID, (tid), 0), \ + .instance = __builtin_choose_expr((iid) != SSAM_ANY_IID, (iid), 0), \ + .function = __builtin_choose_expr((fun) != SSAM_ANY_FUN, (fun), 0) /** * SSAM_VDEV() - Initialize a &struct ssam_device_id as virtual device with From 773fe1d74404fcb6f0e7e69c3420cf04a6bb56b0 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sat, 15 May 2021 00:19:54 +0200 Subject: [PATCH 371/730] platform/surface: aggregator: Add platform-drivers-x86 list to MAINTAINERS entry The Surface System Aggregator Module driver entry is currently missing a mailing list. Surface platform drivers are discussed on the platform-driver-x86 list and all other Surface platform drivers have a reference to that list in their entries. So let's add one here as well. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20210514221954.5976-1-luzmaximilian@gmail.com Signed-off-by: Hans de Goede --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index bd7aff0c120f..57467b6046f1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12180,6 +12180,7 @@ F: drivers/platform/surface/surfacepro3_button.c MICROSOFT SURFACE SYSTEM AGGREGATOR SUBSYSTEM M: Maximilian Luz +L: platform-driver-x86@vger.kernel.org S: Maintained W: https://github.com/linux-surface/surface-aggregator-module C: irc://chat.freenode.net/##linux-surface From 9795d8232a24be9e1e1cc408a6bdc01c40e2cedc Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Thu, 13 May 2021 15:44:37 +0200 Subject: [PATCH 372/730] platform/surface: dtx: Fix poll function The poll function should not return -ERESTARTSYS. Furthermore, locking in this function is completely unnecessary. The ddev->lock protects access to the main device and controller (ddev->dev and ddev->ctrl), ensuring that both are and remain valid while being accessed by clients. Both are, however, never accessed in the poll function. The shutdown test (via atomic bit flags) be safely done without locking, so drop locking here entirely. Reported-by: kernel test robot Fixes: 1d609992832e ("platform/surface: Add DTX driver) Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20210513134437.2431022-1-luzmaximilian@gmail.com Signed-off-by: Hans de Goede --- drivers/platform/surface/surface_dtx.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/platform/surface/surface_dtx.c b/drivers/platform/surface/surface_dtx.c index 63ce587e79e3..5d9b758a99bb 100644 --- a/drivers/platform/surface/surface_dtx.c +++ b/drivers/platform/surface/surface_dtx.c @@ -527,20 +527,14 @@ static __poll_t surface_dtx_poll(struct file *file, struct poll_table_struct *pt struct sdtx_client *client = file->private_data; __poll_t events = 0; - if (down_read_killable(&client->ddev->lock)) - return -ERESTARTSYS; - - if (test_bit(SDTX_DEVICE_SHUTDOWN_BIT, &client->ddev->flags)) { - up_read(&client->ddev->lock); + if (test_bit(SDTX_DEVICE_SHUTDOWN_BIT, &client->ddev->flags)) return EPOLLHUP | EPOLLERR; - } poll_wait(file, &client->ddev->waitq, pt); if (!kfifo_is_empty(&client->buffer)) events |= EPOLLIN | EPOLLRDNORM; - up_read(&client->ddev->lock); return events; } From 1c0e5701c5e792c090aef0e5b9b8923c334d9324 Mon Sep 17 00:00:00 2001 From: Liming Sun Date: Fri, 7 May 2021 20:30:12 -0400 Subject: [PATCH 373/730] platform/mellanox: mlxbf-tmfifo: Fix a memory barrier issue The virtio framework uses wmb() when updating avail->idx. It guarantees the write order, but not necessarily loading order for the code accessing the memory. This commit adds a load barrier after reading the avail->idx to make sure all the data in the descriptor is visible. It also adds a barrier when returning the packet to virtio framework to make sure read/writes are visible to the virtio code. Fixes: 1357dfd7261f ("platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc") Signed-off-by: Liming Sun Reviewed-by: Vadim Pasternak Link: https://lore.kernel.org/r/1620433812-17911-1-git-send-email-limings@nvidia.com Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxbf-tmfifo.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index bbc4e71a16ff..38800e86ed8a 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -294,6 +294,9 @@ mlxbf_tmfifo_get_next_desc(struct mlxbf_tmfifo_vring *vring) if (vring->next_avail == virtio16_to_cpu(vdev, vr->avail->idx)) return NULL; + /* Make sure 'avail->idx' is visible already. */ + virtio_rmb(false); + idx = vring->next_avail % vr->num; head = virtio16_to_cpu(vdev, vr->avail->ring[idx]); if (WARN_ON(head >= vr->num)) @@ -322,7 +325,7 @@ static void mlxbf_tmfifo_release_desc(struct mlxbf_tmfifo_vring *vring, * done or not. Add a memory barrier here to make sure the update above * completes before updating the idx. */ - mb(); + virtio_mb(false); vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1); } @@ -733,6 +736,12 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring, desc = NULL; fifo->vring[is_rx] = NULL; + /* + * Make sure the load/store are in order before + * returning back to virtio. + */ + virtio_mb(false); + /* Notify upper layer that packet is done. */ spin_lock_irqsave(&fifo->spin_lock[is_rx], flags); vring_interrupt(0, vring->vq); From 316a76a58c3f30735e5e416a6dc304d6bb86312d Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 17 May 2021 16:09:31 +0200 Subject: [PATCH 374/730] perf test: Fix libpfm4 support (63) test error for nested event groups Compiling perf with make LIBPFM4=1 includes libpfm support and enables test case 63 'Test libpfm4 support'. This test reports an error on all platforms for subtest 63.2 'test groups of --pfm-events'. The reported error message is 'nested event groups not supported' # ./perf test -F 63 63: Test libpfm4 support : 63.1: test of individual --pfm-events : Error: failed to parse event stereolab : event not found Error: failed to parse event stereolab,instructions : event not found Error: failed to parse event instructions,stereolab : event not found Ok 63.2: test groups of --pfm-events : Error: nested event groups not supported <------ Error message here Error: failed to parse event {stereolab} : event not found Error: failed to parse event {instructions,cycles},{instructions,stereolab} :\ event not found Ok # This patch addresses the error message 'nested event groups not supported'. The root cause is function parse_libpfm_events_option() which parses the event string '{},{instructions}' and can not handle a leading empty group notation '{},...'. The code detects the first (empty) group indicator '{' but does not terminate group processing on the following group closing character '}'. So when the second group indicator '{' is detected, the code assumes a nested group and returns an error. With the error message fixed, also change the expected event number to one for the test case to succeed. While at it also fix a memory leak. In good case the function does not free the duplicated string given as first parameter. Output after: # ./perf test -F 63 63: Test libpfm4 support : 63.1: test of individual --pfm-events : Error: failed to parse event stereolab : event not found Error: failed to parse event stereolab,instructions : event not found Error: failed to parse event instructions,stereolab : event not found Ok 63.2: test groups of --pfm-events : Error: failed to parse event {stereolab} : event not found Error: failed to parse event {instructions,cycles},{instructions,stereolab} : \ event not found Ok # Error message 'nested event groups not supported' is gone. Signed-off-by: Thomas Richter Acked-By: Ian Rogers Acked-by: Sumanth Korikkar Cc: Heiko Carstens Cc: Stephane Eranian Cc: Sven Schnelle Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20210517140931.2559364-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/pfm.c | 4 ++-- tools/perf/util/pfm.c | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c index 76a53126efdf..d4b0ef74defc 100644 --- a/tools/perf/tests/pfm.c +++ b/tools/perf/tests/pfm.c @@ -131,8 +131,8 @@ static int test__pfm_group(void) }, { .events = "{},{instructions}", - .nr_events = 0, - .nr_groups = 0, + .nr_events = 1, + .nr_groups = 1, }, { .events = "{instructions},{instructions}", diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index d735acb6c29c..6eef6dfeaa57 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -62,8 +62,16 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, } /* no event */ - if (*q == '\0') + if (*q == '\0') { + if (*sep == '}') { + if (grp_evt < 0) { + ui__error("cannot close a non-existing event group\n"); + goto error; + } + grp_evt--; + } continue; + } memset(&attr, 0, sizeof(attr)); event_attr_init(&attr); @@ -107,6 +115,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, grp_evt = -1; } } + free(p_orig); return 0; error: free(p_orig); From cb7987837c31b217b28089bbc78922d5c9187869 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 19 May 2021 10:45:13 +0300 Subject: [PATCH 375/730] perf intel-pt: Fix transaction abort handling When adding support for power events, some handling of FUP packets was unified. That resulted in breaking reporting of TSX aborts, by not considering the associated TIP packet. Fix that. Example: A machine that supports TSX is required. It will have flag "rtm". Kernel parameter tsx=on may be required. # for w in `cat /proc/cpuinfo | grep -m1 flags `;do echo $w | grep rtm ; done rtm Test program: #include #include int main() { int x = 0; if (_xbegin() == _XBEGIN_STARTED) { x = 1; _xabort(1); } else { printf("x = %d\n", x); } return 0; } Compile with -mrtm i.e. gcc -Wall -Wextra -mrtm xabort.c -o xabort Record: perf record -e intel_pt/cyc/u --filter 'filter main @ ./xabort' ./xabort Before: # perf script --itrace=be -F+flags,+addr,-period,-event --ns xabort 1478 [007] 92161.431348552: tr strt 0 [unknown] ([unknown]) => 400b6d main+0x0 (/root/xabort) xabort 1478 [007] 92161.431348624: jmp 400b96 main+0x29 (/root/xabort) => 400bae main+0x41 (/root/xabort) xabort 1478 [007] 92161.431348624: return 400bb4 main+0x47 (/root/xabort) => 400b87 main+0x1a (/root/xabort) xabort 1478 [007] 92161.431348637: jcc 400b8a main+0x1d (/root/xabort) => 400b98 main+0x2b (/root/xabort) xabort 1478 [007] 92161.431348644: tr end call 400ba9 main+0x3c (/root/xabort) => 40f690 printf+0x0 (/root/xabort) xabort 1478 [007] 92161.431360859: tr strt 0 [unknown] ([unknown]) => 400bae main+0x41 (/root/xabort) xabort 1478 [007] 92161.431360882: tr end return 400bb4 main+0x47 (/root/xabort) => 401139 __libc_start_main+0x309 (/root/xabort) After: # perf script --itrace=be -F+flags,+addr,-period,-event --ns xabort 1478 [007] 92161.431348552: tr strt 0 [unknown] ([unknown]) => 400b6d main+0x0 (/root/xabort) xabort 1478 [007] 92161.431348624: tx abrt 400b93 main+0x26 (/root/xabort) => 400b87 main+0x1a (/root/xabort) xabort 1478 [007] 92161.431348637: jcc 400b8a main+0x1d (/root/xabort) => 400b98 main+0x2b (/root/xabort) xabort 1478 [007] 92161.431348644: tr end call 400ba9 main+0x3c (/root/xabort) => 40f690 printf+0x0 (/root/xabort) xabort 1478 [007] 92161.431360859: tr strt 0 [unknown] ([unknown]) => 400bae main+0x41 (/root/xabort) xabort 1478 [007] 92161.431360882: tr end return 400bb4 main+0x47 (/root/xabort) => 401139 __libc_start_main+0x309 (/root/xabort) Fixes: a472e65fc490a ("perf intel-pt: Add decoder support for ptwrite and power event packets") Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20210519074515.9262-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 8c59677bee13..20ad663978cc 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1146,6 +1146,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) decoder->set_fup_tx_flags = false; decoder->tx_flags = decoder->fup_tx_flags; decoder->state.type = INTEL_PT_TRANSACTION; + if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX) + decoder->state.type |= INTEL_PT_BRANCH; decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; decoder->state.flags = decoder->fup_tx_flags; @@ -1220,8 +1222,10 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) return 0; if (err == -EAGAIN || intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) { + bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - if (intel_pt_fup_event(decoder)) + if (intel_pt_fup_event(decoder) && no_tip) return 0; return -EAGAIN; } From c954eb72b31a9dc56c99b450253ec5b121add320 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 19 May 2021 10:45:14 +0300 Subject: [PATCH 376/730] perf intel-pt: Fix sample instruction bytes The decoder reports the current instruction if it was decoded. In some cases the current instruction is not decoded, in which case the instruction bytes length must be set to zero. Ensure that is always done. Note perf script can anyway get the instruction bytes for any samples where they are not present. Also note, that there is a redundant "ptq->insn_len = 0" statement which is not removed until a subsequent patch in order to make this patch apply cleanly to stable branches. Example: A machne that supports TSX is required. It will have flag "rtm". Kernel parameter tsx=on may be required. # for w in `cat /proc/cpuinfo | grep -m1 flags `;do echo $w | grep rtm ; done rtm Test program: #include #include int main() { int x = 0; if (_xbegin() == _XBEGIN_STARTED) { x = 1; _xabort(1); } else { printf("x = %d\n", x); } return 0; } Compile with -mrtm i.e. gcc -Wall -Wextra -mrtm xabort.c -o xabort Record: perf record -e intel_pt/cyc/u --filter 'filter main @ ./xabort' ./xabort Before: # perf script --itrace=xe -F+flags,+insn,-period --xed --ns xabort 1478 [007] 92161.431348581: transactions: x 400b81 main+0x14 (/root/xabort) mov $0xffffffff, %eax xabort 1478 [007] 92161.431348624: transactions: tx abrt 400b93 main+0x26 (/root/xabort) mov $0xffffffff, %eax After: # perf script --itrace=xe -F+flags,+insn,-period --xed --ns xabort 1478 [007] 92161.431348581: transactions: x 400b81 main+0x14 (/root/xabort) xbegin 0x6 xabort 1478 [007] 92161.431348624: transactions: tx abrt 400b93 main+0x26 (/root/xabort) xabort $0x1 Fixes: faaa87680b25d ("perf intel-pt/bts: Report instruction bytes and length in sample") Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20210519074515.9262-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 8658d42ce57a..beae5cbe9cc2 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -707,8 +707,10 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, *ip += intel_pt_insn->length; - if (to_ip && *ip == to_ip) + if (to_ip && *ip == to_ip) { + intel_pt_insn->length = 0; goto out_no_cache; + } if (*ip >= al.map->end) break; @@ -1198,6 +1200,7 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, static void intel_pt_sample_flags(struct intel_pt_queue *ptq) { + ptq->insn_len = 0; if (ptq->state->flags & INTEL_PT_ABORT_TX) { ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; } else if (ptq->state->flags & INTEL_PT_ASYNC) { From 0a0c59724516fabf9705c0d9927fa12319908852 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 19 May 2021 10:45:15 +0300 Subject: [PATCH 377/730] perf intel-pt: Remove redundant setting of ptq->insn_len Remove redundant "ptq->insn_len = 0" statement. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20210519074515.9262-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index beae5cbe9cc2..0dfec8761b9a 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1214,7 +1214,6 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_INTERRUPT; - ptq->insn_len = 0; } else { if (ptq->state->from_ip) ptq->flags = intel_pt_insn_type(ptq->state->insn_op); From 86bf2b8ffec40eb4c278ce393e2b0bf48d335e59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 11 May 2021 00:15:43 +0200 Subject: [PATCH 378/730] platform/x86: gigabyte-wmi: streamline dmi matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Streamline dmi matching. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20210510221545.412522-1-linux@weissschuh.net Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 36 ++++++++++------------------- 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 13d57434e60f..b95a94ed40b8 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -133,31 +133,19 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev) return r; } +#define DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME(name) \ + { .matches = { \ + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), \ + DMI_EXACT_MATCH(DMI_BOARD_NAME, name), \ + }} + static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { - { .matches = { - DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "B550 GAMING X V2"), - }}, - { .matches = { - DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "B550M AORUS PRO-P"), - }}, - { .matches = { - DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "B550M DS3H"), - }}, - { .matches = { - DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "Z390 I AORUS PRO WIFI-CF"), - }}, - { .matches = { - DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "X570 AORUS ELITE"), - }}, - { .matches = { - DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "X570 I AORUS PRO WIFI"), - }}, + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"), { } }; From 8605d64f485fbdb71cb4d55a53085feb000e426e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 11 May 2021 00:15:44 +0200 Subject: [PATCH 379/730] platform/x86: gigabyte-wmi: add support for X570 UD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported as working here: https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/4 Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20210510221545.412522-2-linux@weissschuh.net Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index b95a94ed40b8..7af6c24151e2 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -146,6 +146,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"), { } }; From dac282def6f57d251234e7bbb87d21d7a57b26fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 11 May 2021 00:15:45 +0200 Subject: [PATCH 380/730] platform/x86: gigabyte-wmi: add support for B550 Aorus Elite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported as working here: https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/1#issuecomment-837210304 Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20210510221545.412522-3-linux@weissschuh.net Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 7af6c24151e2..5529d7b0abea 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -140,6 +140,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev) }} static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"), From b68e182a3062e326b891f47152a3a1b84abccf0f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 12 May 2021 14:55:23 +0200 Subject: [PATCH 381/730] platform/x86: intel_int0002_vgpio: Only call enable_irq_wake() when using s2idle Commit 871f1f2bcb01 ("platform/x86: intel_int0002_vgpio: Only implement irq_set_wake on Bay Trail") stopped passing irq_set_wake requests on to the parents IRQ because this was breaking suspend (causing immediate wakeups) on an Asus E202SA. This workaround for the Asus E202SA is causing wakeup by USB keyboard to not work on other devices with Airmont CPU cores such as the Medion Akoya E1239T. In hindsight the problem with the Asus E202SA has nothing to do with Silvermont vs Airmont CPU cores, so the differentiation between the 2 types of CPU cores introduced by the previous fix is wrong. The real issue at hand is s2idle vs S3 suspend where the suspend is mostly handled by firmware. The parent IRQ for the INT0002 device is shared with the ACPI SCI and the real problem is that the INT0002 code should not be messing with the wakeup settings of that IRQ when suspend/resume is being handled by the firmware. Note that on systems which support both s2idle and S3 suspend, which suspend method to use can be changed at runtime. This patch fixes both the Asus E202SA spurious wakeups issue as well as the wakeup by USB keyboard not working on the Medion Akoya E1239T issue. These are both fixed by replacing the old workaround with delaying the enable_irq_wake(parent_irq) call till system-suspend time and protecting it with a !pm_suspend_via_firmware() check so that we still do not call it on devices using firmware-based (S3) suspend such as the Asus E202SA. Note rather then adding #ifdef CONFIG_PM_SLEEP, this commit simply adds a "depends on PM_SLEEP" to the Kconfig since this drivers whole purpose is to deal with wakeup events, so using it without CONFIG_PM_SLEEP makes no sense. Cc: Maxim Mikityanskiy Fixes: 871f1f2bcb01 ("platform/x86: intel_int0002_vgpio: Only implement irq_set_wake on Bay Trail") Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20210512125523.55215-2-hdegoede@redhat.com --- drivers/platform/x86/Kconfig | 2 +- drivers/platform/x86/intel_int0002_vgpio.c | 80 +++++++++++++++------- 2 files changed, 57 insertions(+), 25 deletions(-) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 2714f7c3843e..60592fb88e7a 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -711,7 +711,7 @@ config INTEL_HID_EVENT config INTEL_INT0002_VGPIO tristate "Intel ACPI INT0002 Virtual GPIO driver" - depends on GPIOLIB && ACPI + depends on GPIOLIB && ACPI && PM_SLEEP select GPIOLIB_IRQCHIP help Some peripherals on Bay Trail and Cherry Trail platforms signal a diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c index 289c6655d425..569342aa8926 100644 --- a/drivers/platform/x86/intel_int0002_vgpio.c +++ b/drivers/platform/x86/intel_int0002_vgpio.c @@ -51,6 +51,12 @@ #define GPE0A_STS_PORT 0x420 #define GPE0A_EN_PORT 0x428 +struct int0002_data { + struct gpio_chip chip; + int parent_irq; + int wake_enable_count; +}; + /* * As this is not a real GPIO at all, but just a hack to model an event in * ACPI the get / set functions are dummy functions. @@ -98,14 +104,16 @@ static void int0002_irq_mask(struct irq_data *data) static int int0002_irq_set_wake(struct irq_data *data, unsigned int on) { struct gpio_chip *chip = irq_data_get_irq_chip_data(data); - struct platform_device *pdev = to_platform_device(chip->parent); - int irq = platform_get_irq(pdev, 0); + struct int0002_data *int0002 = container_of(chip, struct int0002_data, chip); - /* Propagate to parent irq */ + /* + * Applying of the wakeup flag to our parent IRQ is delayed till system + * suspend, because we only want to do this when using s2idle. + */ if (on) - enable_irq_wake(irq); + int0002->wake_enable_count++; else - disable_irq_wake(irq); + int0002->wake_enable_count--; return 0; } @@ -135,7 +143,7 @@ static bool int0002_check_wake(void *data) return (gpe_sts_reg & GPE0A_PME_B0_STS_BIT); } -static struct irq_chip int0002_byt_irqchip = { +static struct irq_chip int0002_irqchip = { .name = DRV_NAME, .irq_ack = int0002_irq_ack, .irq_mask = int0002_irq_mask, @@ -143,21 +151,9 @@ static struct irq_chip int0002_byt_irqchip = { .irq_set_wake = int0002_irq_set_wake, }; -static struct irq_chip int0002_cht_irqchip = { - .name = DRV_NAME, - .irq_ack = int0002_irq_ack, - .irq_mask = int0002_irq_mask, - .irq_unmask = int0002_irq_unmask, - /* - * No set_wake, on CHT the IRQ is typically shared with the ACPI SCI - * and we don't want to mess with the ACPI SCI irq settings. - */ - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - static const struct x86_cpu_id int0002_cpu_ids[] = { - X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &int0002_byt_irqchip), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &int0002_cht_irqchip), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, NULL), {} }; @@ -172,8 +168,9 @@ static int int0002_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; const struct x86_cpu_id *cpu_id; - struct gpio_chip *chip; + struct int0002_data *int0002; struct gpio_irq_chip *girq; + struct gpio_chip *chip; int irq, ret; /* Menlow has a different INT0002 device? */ @@ -185,10 +182,13 @@ static int int0002_probe(struct platform_device *pdev) if (irq < 0) return irq; - chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); - if (!chip) + int0002 = devm_kzalloc(dev, sizeof(*int0002), GFP_KERNEL); + if (!int0002) return -ENOMEM; + int0002->parent_irq = irq; + + chip = &int0002->chip; chip->label = DRV_NAME; chip->parent = dev; chip->owner = THIS_MODULE; @@ -214,7 +214,7 @@ static int int0002_probe(struct platform_device *pdev) } girq = &chip->irq; - girq->chip = (struct irq_chip *)cpu_id->driver_data; + girq->chip = &int0002_irqchip; /* This let us handle the parent IRQ in the driver */ girq->parent_handler = NULL; girq->num_parents = 0; @@ -230,6 +230,7 @@ static int int0002_probe(struct platform_device *pdev) acpi_register_wakeup_handler(irq, int0002_check_wake, NULL); device_init_wakeup(dev, true); + dev_set_drvdata(dev, int0002); return 0; } @@ -240,6 +241,36 @@ static int int0002_remove(struct platform_device *pdev) return 0; } +static int int0002_suspend(struct device *dev) +{ + struct int0002_data *int0002 = dev_get_drvdata(dev); + + /* + * The INT0002 parent IRQ is often shared with the ACPI GPE IRQ, don't + * muck with it when firmware based suspend is used, otherwise we may + * cause spurious wakeups from firmware managed suspend. + */ + if (!pm_suspend_via_firmware() && int0002->wake_enable_count) + enable_irq_wake(int0002->parent_irq); + + return 0; +} + +static int int0002_resume(struct device *dev) +{ + struct int0002_data *int0002 = dev_get_drvdata(dev); + + if (!pm_suspend_via_firmware() && int0002->wake_enable_count) + disable_irq_wake(int0002->parent_irq); + + return 0; +} + +static const struct dev_pm_ops int0002_pm_ops = { + .suspend = int0002_suspend, + .resume = int0002_resume, +}; + static const struct acpi_device_id int0002_acpi_ids[] = { { "INT0002", 0 }, { }, @@ -250,6 +281,7 @@ static struct platform_driver int0002_driver = { .driver = { .name = DRV_NAME, .acpi_match_table = int0002_acpi_ids, + .pm = &int0002_pm_ops, }, .probe = int0002_probe, .remove = int0002_remove, From f048630bdd55eb5379ef35f971639fe52fabe499 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Fri, 14 May 2021 23:30:47 +0530 Subject: [PATCH 382/730] platform/x86: hp-wireless: add AMD's hardware id to the supported list Newer AMD based laptops uses AMDI0051 as the hardware id to support the airplane mode button. Adding this to the supported list. Signed-off-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20210514180047.1697543-1-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/hp-wireless.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/hp-wireless.c b/drivers/platform/x86/hp-wireless.c index 12c31fd5d5ae..0753ef18e721 100644 --- a/drivers/platform/x86/hp-wireless.c +++ b/drivers/platform/x86/hp-wireless.c @@ -17,12 +17,14 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alex Hung"); MODULE_ALIAS("acpi*:HPQ6001:*"); MODULE_ALIAS("acpi*:WSTADEF:*"); +MODULE_ALIAS("acpi*:AMDI0051:*"); static struct input_dev *hpwl_input_dev; static const struct acpi_device_id hpwl_ids[] = { {"HPQ6001", 0}, {"WSTADEF", 0}, + {"AMDI0051", 0}, {"", 0}, }; From 3a53587423d25c87af4b4126a806a0575104b45e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 18 May 2021 14:50:27 +0200 Subject: [PATCH 383/730] platform/x86: dell-smbios-wmi: Fix oops on rmmod dell_smbios init_dell_smbios_wmi() only registers the dell_smbios_wmi_driver on systems where the Dell WMI interface is supported. While exit_dell_smbios_wmi() unregisters it unconditionally, this leads to the following oops: [ 175.722921] ------------[ cut here ]------------ [ 175.722925] Unexpected driver unregister! [ 175.722939] WARNING: CPU: 1 PID: 3630 at drivers/base/driver.c:194 driver_unregister+0x38/0x40 ... [ 175.723089] Call Trace: [ 175.723094] cleanup_module+0x5/0xedd [dell_smbios] ... [ 175.723148] ---[ end trace 064c34e1ad49509d ]--- Make the unregister happen on the same condition the register happens to fix this. Cc: Mario Limonciello Fixes: 1a258e670434 ("platform/x86: dell-smbios-wmi: Add new WMI dispatcher driver") Signed-off-by: Hans de Goede Reviewed-by: Mario Limonciello Reviewed-by: Mark Gross Link: https://lore.kernel.org/r/20210518125027.21824-1-hdegoede@redhat.com --- drivers/platform/x86/dell/dell-smbios-wmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/dell/dell-smbios-wmi.c b/drivers/platform/x86/dell/dell-smbios-wmi.c index a1753485159c..33f823772733 100644 --- a/drivers/platform/x86/dell/dell-smbios-wmi.c +++ b/drivers/platform/x86/dell/dell-smbios-wmi.c @@ -270,7 +270,8 @@ int init_dell_smbios_wmi(void) void exit_dell_smbios_wmi(void) { - wmi_driver_unregister(&dell_smbios_wmi_driver); + if (wmi_supported) + wmi_driver_unregister(&dell_smbios_wmi_driver); } MODULE_DEVICE_TABLE(wmi, dell_smbios_wmi_id_table); From bc1eca606d8084465e6f89fd646cc71defbad490 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 19 May 2021 13:15:21 +0300 Subject: [PATCH 384/730] platform/x86: intel_punit_ipc: Append MODULE_DEVICE_TABLE for ACPI The intel_punit_ipc driver might be compiled as a module. When udev handles the event of the devices appearing the intel_punit_ipc module is missing. Append MODULE_DEVICE_TABLE for ACPI case to fix the loading issue. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210519101521.79338-1-andriy.shevchenko@linux.intel.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel_punit_ipc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel_punit_ipc.c index 05cced59e251..f58b8543f6ac 100644 --- a/drivers/platform/x86/intel_punit_ipc.c +++ b/drivers/platform/x86/intel_punit_ipc.c @@ -312,6 +312,7 @@ static const struct acpi_device_id punit_ipc_acpi_ids[] = { { "INT34D4", 0 }, { } }; +MODULE_DEVICE_TABLE(acpi, punit_ipc_acpi_ids); static struct platform_driver intel_punit_ipc_driver = { .probe = intel_punit_ipc_probe, From 39a6172ea88b3117353ae16cbb0a53cd80a9340a Mon Sep 17 00:00:00 2001 From: Teava Radu Date: Tue, 4 May 2021 20:57:46 +0200 Subject: [PATCH 385/730] platform/x86: touchscreen_dmi: Add info for the Mediacom Winpad 7.0 W700 tablet Add touchscreen info for the Mediacom Winpad 7.0 W700 tablet. Tested on 5.11 hirsute. Note: it's hw clone to Wintron surftab 7. Signed-off-by: Teava Radu Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210504185746.175461-6-hdegoede@redhat.com --- drivers/platform/x86/touchscreen_dmi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 90fe4f8f3c2c..875519c6c206 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -1096,6 +1096,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_BIOS_VERSION, "jumperx.T87.KFBNEEA"), }, }, + { + /* Mediacom WinPad 7.0 W700 (same hw as Wintron surftab 7") */ + .driver_data = (void *)&trekstor_surftab_wintron70_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MEDIACOM"), + DMI_MATCH(DMI_PRODUCT_NAME, "WinPad 7 W10 - WPW700"), + }, + }, { /* Mediacom Flexbook Edge 11 (same hw as TS Primebook C11) */ .driver_data = (void *)&trekstor_primebook_c11_data, From 05ca447630334c323c9e2b788b61133ab75d60d3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 May 2021 10:39:39 +0200 Subject: [PATCH 386/730] ALSA: line6: Fix racy initialization of LINE6 MIDI The initialization of MIDI devices that are found on some LINE6 drivers are currently done in a racy way; namely, the MIDI buffer instance is allocated and initialized in each private_init callback while the communication with the interface is already started via line6_init_cap_control() call before that point. This may lead to Oops in line6_data_received() when a spurious event is received, as reported by syzkaller. This patch moves the MIDI initialization to line6_init_cap_control() as well instead of the too-lately-called private_init for avoiding the race. Also this reduces slightly more lines, so it's a win-win change. Reported-by: syzbot+0d2b3feb0a2887862e06@syzkallerlkml..appspotmail.com Link: https://lore.kernel.org/r/000000000000a4be9405c28520de@google.com Link: https://lore.kernel.org/r/20210517132725.GA50495@hyeyoo Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Link: https://lore.kernel.org/r/20210518083939.1927-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/line6/driver.c | 4 ++++ sound/usb/line6/pod.c | 5 ----- sound/usb/line6/variax.c | 6 ------ 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index a030dd65eb28..9602929b7de9 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -699,6 +699,10 @@ static int line6_init_cap_control(struct usb_line6 *line6) line6->buffer_message = kmalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL); if (!line6->buffer_message) return -ENOMEM; + + ret = line6_init_midi(line6); + if (ret < 0) + return ret; } else { ret = line6_hwdep_init(line6); if (ret < 0) diff --git a/sound/usb/line6/pod.c b/sound/usb/line6/pod.c index cd44cb5f1310..16e644330c4d 100644 --- a/sound/usb/line6/pod.c +++ b/sound/usb/line6/pod.c @@ -376,11 +376,6 @@ static int pod_init(struct usb_line6 *line6, if (err < 0) return err; - /* initialize MIDI subsystem: */ - err = line6_init_midi(line6); - if (err < 0) - return err; - /* initialize PCM subsystem: */ err = line6_init_pcm(line6, &pod_pcm_properties); if (err < 0) diff --git a/sound/usb/line6/variax.c b/sound/usb/line6/variax.c index ed158f04de80..c2245aa93b08 100644 --- a/sound/usb/line6/variax.c +++ b/sound/usb/line6/variax.c @@ -159,7 +159,6 @@ static int variax_init(struct usb_line6 *line6, const struct usb_device_id *id) { struct usb_line6_variax *variax = line6_to_variax(line6); - int err; line6->process_message = line6_variax_process_message; line6->disconnect = line6_variax_disconnect; @@ -172,11 +171,6 @@ static int variax_init(struct usb_line6 *line6, if (variax->buffer_activate == NULL) return -ENOMEM; - /* initialize MIDI subsystem: */ - err = line6_init_midi(&variax->line6); - if (err < 0) - return err; - /* initiate startup procedure: */ schedule_delayed_work(&line6->startup_work, msecs_to_jiffies(VARIAX_STARTUP_DELAY1)); From b250f2f7792d15bcde98e0456781e2835556d5fa Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 19 May 2021 15:52:44 +0200 Subject: [PATCH 387/730] x86/sev-es: Don't return NULL from sev_es_get_ghcb() sev_es_get_ghcb() is called from several places but only one of them checks the return value. The reaction to returning NULL is always the same: calling panic() and kill the machine. Instead of adding checks to all call sites, move the panic() into the function itself so that it will no longer return NULL. Fixes: 0786138c78e7 ("x86/sev-es: Add a Runtime #VC Exception Handler") Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # v5.10+ Link: https://lkml.kernel.org/r/20210519135251.30093-2-joro@8bytes.org --- arch/x86/kernel/sev.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 4fa111becc93..82bced88153b 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -203,8 +203,18 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) if (unlikely(data->ghcb_active)) { /* GHCB is already in use - save its contents */ - if (unlikely(data->backup_ghcb_active)) - return NULL; + if (unlikely(data->backup_ghcb_active)) { + /* + * Backup-GHCB is also already in use. There is no way + * to continue here so just kill the machine. To make + * panic() work, mark GHCBs inactive so that messages + * can be printed out. + */ + data->ghcb_active = false; + data->backup_ghcb_active = false; + + panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); + } /* Mark backup_ghcb active before writing to it */ data->backup_ghcb_active = true; @@ -1289,7 +1299,6 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) */ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) { - struct sev_es_runtime_data *data = this_cpu_read(runtime_data); irqentry_state_t irq_state; struct ghcb_state state; struct es_em_ctxt ctxt; @@ -1315,16 +1324,6 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) */ ghcb = sev_es_get_ghcb(&state); - if (!ghcb) { - /* - * Mark GHCBs inactive so that panic() is able to print the - * message. - */ - data->ghcb_active = false; - data->backup_ghcb_active = false; - - panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); - } vc_ghcb_invalidate(ghcb); result = vc_init_em_ctxt(&ctxt, regs, error_code); From c0d46717b95735b0eacfddbcca9df37a49de9c7a Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 15 May 2021 09:52:22 -0500 Subject: [PATCH 388/730] SMB3: incorrect file id in requests compounded with open See MS-SMB2 3.2.4.1.4, file ids in compounded requests should be set to 0xFFFFFFFFFFFFFFFF (we were treating it as u32 not u64 and setting it incorrectly). Signed-off-by: Steve French Reported-by: Stefan Metzmacher Reviewed-by: Shyam Prasad N --- fs/cifs/smb2pdu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a8bf43184773..9f24eb88297a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3900,10 +3900,10 @@ smb2_new_read_req(void **buf, unsigned int *total_len, * Related requests use info from previous read request * in chain. */ - shdr->SessionId = 0xFFFFFFFF; + shdr->SessionId = 0xFFFFFFFFFFFFFFFF; shdr->TreeId = 0xFFFFFFFF; - req->PersistentFileId = 0xFFFFFFFF; - req->VolatileFileId = 0xFFFFFFFF; + req->PersistentFileId = 0xFFFFFFFFFFFFFFFF; + req->VolatileFileId = 0xFFFFFFFFFFFFFFFF; } } if (remaining_bytes > io_parms->length) From c25bbdb564060adaad5c3a8a10765c13487ba6a3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 19 May 2021 15:52:45 +0200 Subject: [PATCH 389/730] x86/sev-es: Forward page-faults which happen during emulation When emulating guest instructions for MMIO or IOIO accesses, the #VC handler might get a page-fault and will not be able to complete. Forward the page-fault in this case to the correct handler instead of killing the machine. Fixes: 0786138c78e7 ("x86/sev-es: Add a Runtime #VC Exception Handler") Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # v5.10+ Link: https://lkml.kernel.org/r/20210519135251.30093-3-joro@8bytes.org --- arch/x86/kernel/sev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 82bced88153b..1f428f401bed 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1270,6 +1270,10 @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) case X86_TRAP_UD: exc_invalid_op(ctxt->regs); break; + case X86_TRAP_PF: + write_cr2(ctxt->fi.cr2); + exc_page_fault(ctxt->regs, error_code); + break; case X86_TRAP_AC: exc_alignment_check(ctxt->regs, error_code); break; From 293837b9ac8d3021657f44c9d7a14948ec01c5d0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 19 May 2021 05:55:57 -1000 Subject: [PATCH 390/730] Revert "i915: fix remap_io_sg to verify the pgprot" This reverts commit b12d691ea5e01db42ccf3b4207e57cb3ce7cfe91. It turns out this is not ready for primetime yet. The intentions are good, but using remap_pfn_range() requires that there is nothing already mapped in the area, and the i915 code seems to very much intentionally remap the same area multiple times. That will then just trigger the BUG_ON(!pte_none(*pte)); in mm/memory.c: remap_pte_range(). There are also reports of mapping type inconsistencies, resulting in warnings and in screen corruption. Link: https://lore.kernel.org/lkml/20210519024322.GA29704@xsang-OptiPlex-9020/ Link: https://lore.kernel.org/lkml/YKUjvoaKKggAmpIR@sf/ Link: https://lore.kernel.org/lkml/b6b61cf0-5874-f4c0-1fcc-4b3848451c31@redhat.com/ Reported-by: kernel test robot Reported-by: Kalle Valo Reported-by: Hans de Goede Reported-by: Sergei Trofimovich Acked-by: Christoph Hellwig Cc: Chris Wilson Cc: Daniel Vetter Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Peter Zijlstra Cc: Rodrigo Vivi Cc: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/i915/i915_mm.c | 71 +++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c index 4c8cd08c672d..9a777b0ff59b 100644 --- a/drivers/gpu/drm/i915/i915_mm.c +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -28,10 +28,46 @@ #include "i915_drv.h" -#define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP) +struct remap_pfn { + struct mm_struct *mm; + unsigned long pfn; + pgprot_t prot; + + struct sgt_iter sgt; + resource_size_t iobase; +}; #define use_dma(io) ((io) != -1) +static inline unsigned long sgt_pfn(const struct remap_pfn *r) +{ + if (use_dma(r->iobase)) + return (r->sgt.dma + r->sgt.curr + r->iobase) >> PAGE_SHIFT; + else + return r->sgt.pfn + (r->sgt.curr >> PAGE_SHIFT); +} + +static int remap_sg(pte_t *pte, unsigned long addr, void *data) +{ + struct remap_pfn *r = data; + + if (GEM_WARN_ON(!r->sgt.sgp)) + return -EINVAL; + + /* Special PTE are not associated with any struct page */ + set_pte_at(r->mm, addr, pte, + pte_mkspecial(pfn_pte(sgt_pfn(r), r->prot))); + r->pfn++; /* track insertions in case we need to unwind later */ + + r->sgt.curr += PAGE_SIZE; + if (r->sgt.curr >= r->sgt.max) + r->sgt = __sgt_iter(__sg_next(r->sgt.sgp), use_dma(r->iobase)); + + return 0; +} + +#define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP) + /** * remap_io_sg - remap an IO mapping to userspace * @vma: user vma to map to @@ -46,7 +82,12 @@ int remap_io_sg(struct vm_area_struct *vma, unsigned long addr, unsigned long size, struct scatterlist *sgl, resource_size_t iobase) { - unsigned long pfn, len, remapped = 0; + struct remap_pfn r = { + .mm = vma->vm_mm, + .prot = vma->vm_page_prot, + .sgt = __sgt_iter(sgl, use_dma(iobase)), + .iobase = iobase, + }; int err; /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ @@ -55,25 +96,11 @@ int remap_io_sg(struct vm_area_struct *vma, if (!use_dma(iobase)) flush_cache_range(vma, addr, size); - do { - if (use_dma(iobase)) { - if (!sg_dma_len(sgl)) - break; - pfn = (sg_dma_address(sgl) + iobase) >> PAGE_SHIFT; - len = sg_dma_len(sgl); - } else { - pfn = page_to_pfn(sg_page(sgl)); - len = sgl->length; - } + err = apply_to_page_range(r.mm, addr, size, remap_sg, &r); + if (unlikely(err)) { + zap_vma_ptes(vma, addr, r.pfn << PAGE_SHIFT); + return err; + } - err = remap_pfn_range(vma, addr + remapped, pfn, len, - vma->vm_page_prot); - if (err) - break; - remapped += len; - } while ((sgl = __sg_next(sgl))); - - if (err) - zap_vma_ptes(vma, addr, remapped); - return err; + return 0; } From cfa3b797118eda7d68f9ede9b1a0279192aca653 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 19 May 2021 11:41:32 +0300 Subject: [PATCH 391/730] RDMA/mlx5: Fix query DCT via DEVX When executing DEVX command to query QP object, we need to take the QP type from the mlx5_ib_qp struct which hold the driver specific QP types as well, such as DC. Fixes: 34613eb1d2ad ("IB/mlx5: Enable modify and query verbs objects via DEVX") Link: https://lore.kernel.org/r/6eee15d63f09bb70787488e0cf96216e2957f5aa.1621413654.git.leonro@nvidia.com Reviewed-by: Yishai Hadas Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index a0b677accd96..eb9b0a2707f8 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -630,9 +630,8 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs, case UVERBS_OBJECT_QP: { struct mlx5_ib_qp *qp = to_mqp(uobj->object); - enum ib_qp_type qp_type = qp->ibqp.qp_type; - if (qp_type == IB_QPT_RAW_PACKET || + if (qp->type == IB_QPT_RAW_PACKET || (qp->flags & IB_QP_CREATE_SOURCE_QPN)) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; @@ -649,10 +648,9 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs, sq->tisn) == obj_id); } - if (qp_type == MLX5_IB_QPT_DCT) + if (qp->type == MLX5_IB_QPT_DCT) return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, qp->dct.mdct.mqp.qpn) == obj_id; - return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, qp->ibqp.qp_num) == obj_id; } From 4954f5b8ef0baf70fe978d1a99a5f70e4dd5c877 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 19 May 2021 15:52:46 +0200 Subject: [PATCH 392/730] x86/sev-es: Use __put_user()/__get_user() for data accesses The put_user() and get_user() functions do checks on the address which is passed to them. They check whether the address is actually a user-space address and whether its fine to access it. They also call might_fault() to indicate that they could fault and possibly sleep. All of these checks are neither wanted nor needed in the #VC exception handler, which can be invoked from almost any context and also for MMIO instructions from kernel space on kernel memory. All the #VC handler wants to know is whether a fault happened when the access was tried. This is provided by __put_user()/__get_user(), which just do the access no matter what. Also add comments explaining why __get_user() and __put_user() are the best choice here and why it is safe to use them in this context. Also explain why copy_to/from_user can't be used. In addition, also revert commit 7024f60d6552 ("x86/sev-es: Handle string port IO to kernel memory properly") because using __get_user()/__put_user() fixes the same problem while the above commit introduced several problems: 1) It uses access_ok() which is only allowed in task context. 2) It uses memcpy() which has no fault handling at all and is thus unsafe to use here. [ bp: Fix up commit ID of the reverted commit above. ] Fixes: f980f9c31a92 ("x86/sev-es: Compile early handler code into kernel image") Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # v5.10+ Link: https://lkml.kernel.org/r/20210519135251.30093-4-joro@8bytes.org --- arch/x86/kernel/sev.c | 66 ++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 1f428f401bed..651b81cd648e 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -315,31 +315,44 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; - /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ - if (!user_mode(ctxt->regs) && !access_ok(target, size)) { - memcpy(dst, buf, size); - return ES_OK; - } - + /* + * This function uses __put_user() independent of whether kernel or user + * memory is accessed. This works fine because __put_user() does no + * sanity checks of the pointer being accessed. All that it does is + * to report when the access failed. + * + * Also, this function runs in atomic context, so __put_user() is not + * allowed to sleep. The page-fault handler detects that it is running + * in atomic context and will not try to take mmap_sem and handle the + * fault, so additional pagefault_enable()/disable() calls are not + * needed. + * + * The access can't be done via copy_to_user() here because + * vc_write_mem() must not use string instructions to access unsafe + * memory. The reason is that MOVS is emulated by the #VC handler by + * splitting the move up into a read and a write and taking a nested #VC + * exception on whatever of them is the MMIO access. Using string + * instructions here would cause infinite nesting. + */ switch (size) { case 1: memcpy(&d1, buf, 1); - if (put_user(d1, target)) + if (__put_user(d1, target)) goto fault; break; case 2: memcpy(&d2, buf, 2); - if (put_user(d2, target)) + if (__put_user(d2, target)) goto fault; break; case 4: memcpy(&d4, buf, 4); - if (put_user(d4, target)) + if (__put_user(d4, target)) goto fault; break; case 8: memcpy(&d8, buf, 8); - if (put_user(d8, target)) + if (__put_user(d8, target)) goto fault; break; default: @@ -370,30 +383,43 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; - /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ - if (!user_mode(ctxt->regs) && !access_ok(s, size)) { - memcpy(buf, src, size); - return ES_OK; - } - + /* + * This function uses __get_user() independent of whether kernel or user + * memory is accessed. This works fine because __get_user() does no + * sanity checks of the pointer being accessed. All that it does is + * to report when the access failed. + * + * Also, this function runs in atomic context, so __get_user() is not + * allowed to sleep. The page-fault handler detects that it is running + * in atomic context and will not try to take mmap_sem and handle the + * fault, so additional pagefault_enable()/disable() calls are not + * needed. + * + * The access can't be done via copy_from_user() here because + * vc_read_mem() must not use string instructions to access unsafe + * memory. The reason is that MOVS is emulated by the #VC handler by + * splitting the move up into a read and a write and taking a nested #VC + * exception on whatever of them is the MMIO access. Using string + * instructions here would cause infinite nesting. + */ switch (size) { case 1: - if (get_user(d1, s)) + if (__get_user(d1, s)) goto fault; memcpy(buf, &d1, 1); break; case 2: - if (get_user(d2, s)) + if (__get_user(d2, s)) goto fault; memcpy(buf, &d2, 2); break; case 4: - if (get_user(d4, s)) + if (__get_user(d4, s)) goto fault; memcpy(buf, &d4, 4); break; case 8: - if (get_user(d8, s)) + if (__get_user(d8, s)) goto fault; memcpy(buf, &d8, 8); break; From fb6c79d7261afb7e942251254ea47951c2a9a706 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 10 Feb 2021 17:33:27 +0900 Subject: [PATCH 393/730] perf tools: Add 'cgroup-switches' software event It counts how often cgroups are changed actually during the context switches. # perf stat -a -e context-switches,cgroup-switches -a sleep 1 Performance counter stats for 'system wide': 11,267 context-switches 10,950 cgroup-switches 1.015634369 seconds time elapsed Committer notes: The kernel patches landed in v5.13, but this entry wasn't filled in perf's parse-events tables, which was leading to a segfault when running 'perf list' on a kernel with that feature, as reported by Thomas Richter. Also removed the part touching tools/include/uapi/linux/perf_event.h as it was updated in the usual sync with the kernel UAPI headers, in a previous, already upstream, patch. Signed-off-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Heiko Carstens Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Richter Link: http://lore.kernel.org/lkml/20210210083327.22726-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 4 ++++ tools/perf/util/parse-events.l | 1 + 2 files changed, 5 insertions(+) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4dad14265b81..269997066f6e 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -150,6 +150,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { .symbol = "bpf-output", .alias = "", }, + [PERF_COUNT_SW_CGROUP_SWITCHES] = { + .symbol = "cgroup-switches", + .alias = "", + }, }; #define __PERF_EVENT_FIELD(config, name) \ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index fb8646cc3e83..923849024b15 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -347,6 +347,7 @@ emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EM dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } +cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } /* * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately. From 463a3f66473b58d71428a1c3ce69ea52c05440e5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 May 2021 17:18:10 +0300 Subject: [PATCH 394/730] RDMA/uverbs: Fix a NULL vs IS_ERR() bug The uapi_get_object() function returns error pointers, it never returns NULL. Fixes: 149d3845f4a5 ("RDMA/uverbs: Add a method to introspect handles in a context") Link: https://lore.kernel.org/r/YJ6Got+U7lz+3n9a@mwanda Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index a03021d94e11..049684880ae0 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -117,8 +117,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_INFO_HANDLES)( return ret; uapi_object = uapi_get_object(attrs->ufile->device->uapi, object_id); - if (!uapi_object) - return -EINVAL; + if (IS_ERR(uapi_object)) + return PTR_ERR(uapi_object); handles = gather_objects_handle(attrs->ufile, uapi_object, attrs, out_len, &total); From c71b99640d2d350ee3146452c1057bd59cb2c5e0 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 19 May 2021 10:10:38 +0800 Subject: [PATCH 395/730] ethtool: stats: Fix a copy-paste error data->ctrl_stats should be memset with correct size. Fixes: bfad2b979ddc ("ethtool: add interface to read standard MAC Ctrl stats") Signed-off-by: YueHaibing Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/ethtool/stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c index b7642dc96d50..ec07f5765e03 100644 --- a/net/ethtool/stats.c +++ b/net/ethtool/stats.c @@ -119,7 +119,7 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base, */ memset(&data->phy_stats, 0xff, sizeof(data->phy_stats)); memset(&data->mac_stats, 0xff, sizeof(data->mac_stats)); - memset(&data->ctrl_stats, 0xff, sizeof(data->mac_stats)); + memset(&data->ctrl_stats, 0xff, sizeof(data->ctrl_stats)); memset(&data->rmon_stats, 0xff, sizeof(data->rmon_stats)); if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) && From 88c380df84fbd03f9b137c2b9d0a44b9f2f553b0 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 19 May 2021 16:48:31 +0530 Subject: [PATCH 396/730] cxgb4: avoid accessing registers when clearing filters Hardware register having the server TID base can contain invalid values when adapter is in bad state (for example, due to AER fatal error). Reading these invalid values in the register can lead to out-of-bound memory access. So, fix by using the saved server TID base when clearing filters. Fixes: b1a79360ee86 ("cxgb4: Delete all hash and TCAM filters before resource cleanup") Signed-off-by: Raju Rangoju Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index bc581b149b11..22c9ac922eba 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1042,7 +1042,7 @@ void clear_all_filters(struct adapter *adapter) cxgb4_del_filter(dev, f->tid, &f->fs); } - sb = t4_read_reg(adapter, LE_DB_SRVR_START_INDEX_A); + sb = adapter->tids.stid_base; for (i = 0; i < sb; i++) { f = (struct filter_entry *)adapter->tids.tid_tab[i]; From d5b3bd6ab5418e34d85f64fba7c6ca02c3cbfb63 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 May 2021 15:02:53 +0200 Subject: [PATCH 397/730] dt-bindings: net: renesas,ether: Update Sergei's email address Update Sergei's email address, as per commit 534a8bf0ccdd7b3f ("MAINTAINERS: switch to my private email for Renesas Ethernet drivers"). Signed-off-by: Geert Uytterhoeven Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/renesas,ether.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/renesas,ether.yaml b/Documentation/devicetree/bindings/net/renesas,ether.yaml index 8ce5ed8a58dd..c101a1ec846e 100644 --- a/Documentation/devicetree/bindings/net/renesas,ether.yaml +++ b/Documentation/devicetree/bindings/net/renesas,ether.yaml @@ -10,7 +10,7 @@ allOf: - $ref: ethernet-controller.yaml# maintainers: - - Sergei Shtylyov + - Sergei Shtylyov properties: compatible: From 4d52ebc7ace491d58f96d1f4a1cb9070c506b2e7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 19 May 2021 14:47:17 +0200 Subject: [PATCH 398/730] net: hso: bail out on interrupt URB allocation failure Commit 31db0dbd7244 ("net: hso: check for allocation failure in hso_create_bulk_serial_device()") recently started returning an error when the driver fails to allocate resources for the interrupt endpoint and tiocmget functionality. For consistency let's bail out from probe also if the URB allocation fails. Signed-off-by: Johan Hovold Reviewed-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 260f850d69eb..b48b2a25210c 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2635,14 +2635,14 @@ static struct hso_device *hso_create_bulk_serial_device( } tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL); - if (tiocmget->urb) { - mutex_init(&tiocmget->mutex); - init_waitqueue_head(&tiocmget->waitq); - } else - hso_free_tiomget(serial); - } - else + if (!tiocmget->urb) + goto exit; + + mutex_init(&tiocmget->mutex); + init_waitqueue_head(&tiocmget->waitq); + } else { num_urbs = 1; + } if (hso_serial_common_create(serial, num_urbs, BULK_URB_RX_SIZE, BULK_URB_TX_SIZE)) From 84c63d040938f64a7dc195696301166e75231bf5 Mon Sep 17 00:00:00 2001 From: Nikola Cornij Date: Thu, 6 May 2021 22:46:52 -0400 Subject: [PATCH 399/730] drm/amd/display: Use the correct max downscaling value for DCN3.x family [why] As per spec, DCN3.x can do 6:1 downscaling and DCN2.x can do 4:1. The max downscaling limit value for DCN2.x is 250, which means it's calculated as 1000 / 4 = 250. For DCN3.x this then gives 1000 / 6 = 167. [how] Set maximum downscaling limit to 167 for DCN3.x Signed-off-by: Nikola Cornij Reviewed-by: Charlene Liu Reviewed-by: Harry Wentland Acked-by: Stylon Wang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 7 ++++--- drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 7 ++++--- drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 7 ++++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 4a5fa23d8e7b..5fcc2e64305d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -826,10 +826,11 @@ static const struct dc_plane_cap plane_cap = { .fp16 = 16000 }, + /* 6:1 downscaling ratio: 1000/6 = 166.666 */ .max_downscale_factor = { - .argb8888 = 600, - .nv12 = 600, - .fp16 = 600 + .argb8888 = 167, + .nv12 = 167, + .fp16 = 167 } }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index 5b54b7fc5105..472696f949ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -843,10 +843,11 @@ static const struct dc_plane_cap plane_cap = { .fp16 = 16000 }, + /* 6:1 downscaling ratio: 1000/6 = 166.666 */ .max_downscale_factor = { - .argb8888 = 600, - .nv12 = 600, - .fp16 = 600 + .argb8888 = 167, + .nv12 = 167, + .fp16 = 167 }, 64, 64 diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index fc2dea243d1b..a33f0365329b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -284,10 +284,11 @@ static const struct dc_plane_cap plane_cap = { .nv12 = 16000, .fp16 = 16000 }, + /* 6:1 downscaling ratio: 1000/6 = 166.666 */ .max_downscale_factor = { - .argb8888 = 600, - .nv12 = 600, - .fp16 = 600 + .argb8888 = 167, + .nv12 = 167, + .fp16 = 167 }, 16, 16 From 0c8df343c200529e6b9820bdfed01814140f75e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 12 May 2021 10:36:43 +0200 Subject: [PATCH 400/730] drm/radeon: use the dummy page for GART if needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Imported BOs don't have a pagelist any more. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Fixes: 0575ff3d33cd ("drm/radeon: stop using pages with drm_prime_sg_to_page_addr_arrays v2") CC: stable@vger.kernel.org # 5.12 --- drivers/gpu/drm/radeon/radeon_gart.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 3808a753127b..04109a2a6fd7 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -301,7 +301,8 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); for (i = 0; i < pages; i++, p++) { - rdev->gart.pages[p] = pagelist[i]; + rdev->gart.pages[p] = pagelist ? pagelist[i] : + rdev->dummy_page.page; page_base = dma_addr[i]; for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) { page_entry = radeon_gart_get_page_entry(page_base, flags); From d53751568359e5b3ffb859b13cbd79dc77a571f1 Mon Sep 17 00:00:00 2001 From: Yi Li Date: Fri, 14 May 2021 14:40:39 +0800 Subject: [PATCH 401/730] drm/amdgpu: Fix GPU TLB update error when PAGE_SIZE > AMDGPU_PAGE_SIZE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When PAGE_SIZE is larger than AMDGPU_PAGE_SIZE, the number of GPU TLB entries which need to update in amdgpu_map_buffer() should be multiplied by AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_PAGE_SIZE). Reviewed-by: Christian König Signed-off-by: Yi Li Signed-off-by: Huacai Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 3bef0432cac2..a376a993e474 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -225,7 +225,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, *addr += mm_cur->start & ~PAGE_MASK; num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); - num_bytes = num_pages * 8; + num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, AMDGPU_IB_POOL_DELAYED, &job); From dbd1003d1252db5973dddf20b24bb0106ac52aa2 Mon Sep 17 00:00:00 2001 From: Changfeng Date: Fri, 14 May 2021 15:28:25 +0800 Subject: [PATCH 402/730] drm/amdgpu: disable 3DCGCG on picasso/raven1 to avoid compute hang There is problem with 3DCGCG firmware and it will cause compute test hang on picasso/raven1. It needs to disable 3DCGCG in driver to avoid compute hang. Signed-off-by: Changfeng Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 +++++++--- drivers/gpu/drm/amd/amdgpu/soc15.c | 2 -- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a078a38c2cee..516467e962b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4943,7 +4943,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, amdgpu_gfx_rlc_enter_safe_mode(adev); /* Enable 3D CGCG/CGLS */ - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { + if (enable) { /* write cmd to clear cgcg/cgls ov */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); /* unset CGCG override */ @@ -4955,8 +4955,12 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, /* enable 3Dcgcg FSM(0x0000363f) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); - data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + else + data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8e1b9a40839f..980675052b53 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1392,7 +1392,6 @@ static int soc15_common_early_init(void *handle) adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CP_LS | - AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS | AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS | @@ -1412,7 +1411,6 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_CP_LS | - AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS | AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS | From 080039273b126eeb0185a61c045893a25dbc046e Mon Sep 17 00:00:00 2001 From: Chris Park Date: Tue, 4 May 2021 16:20:55 -0400 Subject: [PATCH 403/730] drm/amd/display: Disconnect non-DP with no EDID [Why] Active DP dongles return no EDID when dongle is connected, but VGA display is taken out. Current driver behavior does not remove the active display when this happens, and this is a gap between dongle DTP and dongle behavior. [How] For active DP dongles and non-DP scenario, disconnect sink on detection when no EDID is read due to timeout. Signed-off-by: Chris Park Reviewed-by: Nicholas Kazlauskas Acked-by: Stylon Wang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index f4374d83662a..c1f5474c205a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1076,6 +1076,24 @@ static bool dc_link_detect_helper(struct dc_link *link, dc_is_dvi_signal(link->connector_signal)) { if (prev_sink) dc_sink_release(prev_sink); + link_disconnect_sink(link); + + return false; + } + /* + * Abort detection for DP connectors if we have + * no EDID and connector is active converter + * as there are no display downstream + * + */ + if (dc_is_dp_sst_signal(link->connector_signal) && + (link->dpcd_caps.dongle_type == + DISPLAY_DONGLE_DP_VGA_CONVERTER || + link->dpcd_caps.dongle_type == + DISPLAY_DONGLE_DP_DVI_CONVERTER)) { + if (prev_sink) + dc_sink_release(prev_sink); + link_disconnect_sink(link); return false; } From fa7e6abc75f3d491bc561734312d065dc9dc2a77 Mon Sep 17 00:00:00 2001 From: Jingwen Chen Date: Mon, 17 May 2021 16:16:10 +0800 Subject: [PATCH 404/730] drm/amd/amdgpu: fix refcount leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] the gem object rfb->base.obj[0] is get according to num_planes in amdgpufb_create, but is not put according to num_planes [How] put rfb->base.obj[0] in amdgpu_fbdev_destroy according to num_planes Signed-off-by: Jingwen Chen Acked-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 4f10c4529840..09b048647523 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -288,10 +288,13 @@ out: static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfbdev) { struct amdgpu_framebuffer *rfb = &rfbdev->rfb; + int i; drm_fb_helper_unregister_fbi(&rfbdev->helper); if (rfb->base.obj[0]) { + for (i = 0; i < rfb->base.format->num_planes; i++) + drm_gem_object_put(rfb->base.obj[0]); amdgpufb_destroy_pinned_object(rfb->base.obj[0]); rfb->base.obj[0] = NULL; drm_framebuffer_unregister_private(&rfb->base); From ab95cb3e1bc44d4376bd8d331b1cff82b99020e3 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 18 May 2021 08:44:23 -0400 Subject: [PATCH 405/730] drm/amdgpu: add video_codecs query support for aldebaran Add video_codecs query support for aldebaran. Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 980675052b53..e65c286f93a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -302,6 +302,7 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode, *codecs = &rv_video_codecs_decode; return 0; case CHIP_ARCTURUS: + case CHIP_ALDEBARAN: case CHIP_RENOIR: if (encode) *codecs = &vega_video_codecs_encode; From 1e5c37385097c35911b0f8a0c67ffd10ee1af9a2 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Tue, 18 May 2021 10:56:07 +0800 Subject: [PATCH 406/730] drm/amdgpu: Fix a use-after-free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit looks like we forget to set ttm->sg to NULL. Hit panic below [ 1235.844104] general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b7b4b: 0000 [#1] SMP DEBUG_PAGEALLOC NOPTI [ 1235.989074] Call Trace: [ 1235.991751] sg_free_table+0x17/0x20 [ 1235.995667] amdgpu_ttm_backend_unbind.cold+0x4d/0xf7 [amdgpu] [ 1236.002288] amdgpu_ttm_backend_destroy+0x29/0x130 [amdgpu] [ 1236.008464] ttm_tt_destroy+0x1e/0x30 [ttm] [ 1236.013066] ttm_bo_cleanup_memtype_use+0x51/0xa0 [ttm] [ 1236.018783] ttm_bo_release+0x262/0xa50 [ttm] [ 1236.023547] ttm_bo_put+0x82/0xd0 [ttm] [ 1236.027766] amdgpu_bo_unref+0x26/0x50 [amdgpu] [ 1236.032809] amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x7aa/0xd90 [amdgpu] [ 1236.040400] kfd_ioctl_alloc_memory_of_gpu+0xe2/0x330 [amdgpu] [ 1236.046912] kfd_ioctl+0x463/0x690 [amdgpu] Signed-off-by: xinhui pan Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index a376a993e474..d5cbc51c5eaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1210,6 +1210,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, if (gtt && gtt->userptr) { amdgpu_ttm_tt_set_user_pages(ttm, NULL); kfree(ttm->sg); + ttm->sg = NULL; ttm->page_flags &= ~TTM_PAGE_FLAG_SG; return; } From 99c45ba5799d6b938bd9bd20edfeb6f3e3e039b9 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Mon, 17 May 2021 16:35:40 +0800 Subject: [PATCH 407/730] drm/amdgpu: update gc golden setting for Navi12 Current golden setting is out of date. Signed-off-by: Guchun Chen Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2408ed4c7d84..7ce76a6b3a35 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -1395,9 +1395,10 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000044), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), @@ -1415,12 +1416,13 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00c00000) }; static bool gfx_v10_is_rlcg_rw(struct amdgpu_device *adev, u32 offset, uint32_t *flag, bool write) From 77194d8642dd4cb7ea8ced77bfaea55610574c38 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Mon, 17 May 2021 16:38:00 +0800 Subject: [PATCH 408/730] drm/amdgpu: update sdma golden setting for Navi12 Current golden setting is out of date. Signed-off-by: Guchun Chen Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 920fc6d4a127..8859133ce37e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -123,6 +123,10 @@ static const struct soc15_reg_golden golden_settings_sdma_nv14[] = { static const struct soc15_reg_golden golden_settings_sdma_nv12[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), }; From 9c2876d56f1ce9b6b2072f1446fb1e8d1532cb3d Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Mon, 17 May 2021 12:47:20 +0800 Subject: [PATCH 409/730] drm/amd/amdgpu: fix a potential deadlock in gpu reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When amdgpu_ib_ring_tests failed, the reset logic called amdgpu_device_ip_suspend twice, then deadlock occurred. Deadlock log: [ 805.655192] amdgpu 0000:04:00.0: amdgpu: ib ring test failed (-110). [ 806.290952] [drm] free PSP TMR buffer [ 806.319406] ============================================ [ 806.320315] WARNING: possible recursive locking detected [ 806.321225] 5.11.0-custom #1 Tainted: G W OEL [ 806.322135] -------------------------------------------- [ 806.323043] cat/2593 is trying to acquire lock: [ 806.323825] ffff888136b1cdc8 (&adev->dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb8/0x1d0 [amdgpu] [ 806.325668] but task is already holding lock: [ 806.326664] ffff888136b1cdc8 (&adev->dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb8/0x1d0 [amdgpu] [ 806.328430] other info that might help us debug this: [ 806.329539] Possible unsafe locking scenario: [ 806.330549] CPU0 [ 806.330983] ---- [ 806.331416] lock(&adev->dm.dc_lock); [ 806.332086] lock(&adev->dm.dc_lock); [ 806.332738] *** DEADLOCK *** [ 806.333747] May be due to missing lock nesting notation [ 806.334899] 3 locks held by cat/2593: [ 806.335537] #0: ffff888100d3f1b8 (&attr->mutex){+.+.}-{3:3}, at: simple_attr_read+0x4e/0x110 [ 806.337009] #1: ffff888136b1fd78 (&adev->reset_sem){++++}-{3:3}, at: amdgpu_device_lock_adev+0x42/0x94 [amdgpu] [ 806.339018] #2: ffff888136b1cdc8 (&adev->dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb8/0x1d0 [amdgpu] [ 806.340869] stack backtrace: [ 806.341621] CPU: 6 PID: 2593 Comm: cat Tainted: G W OEL 5.11.0-custom #1 [ 806.342921] Hardware name: AMD Celadon-CZN/Celadon-CZN, BIOS WLD0C23N_Weekly_20_12_2 12/23/2020 [ 806.344413] Call Trace: [ 806.344849] dump_stack+0x93/0xbd [ 806.345435] __lock_acquire.cold+0x18a/0x2cf [ 806.346179] lock_acquire+0xca/0x390 [ 806.346807] ? dm_suspend+0xb8/0x1d0 [amdgpu] [ 806.347813] __mutex_lock+0x9b/0x930 [ 806.348454] ? dm_suspend+0xb8/0x1d0 [amdgpu] [ 806.349434] ? amdgpu_device_indirect_rreg+0x58/0x70 [amdgpu] [ 806.350581] ? _raw_spin_unlock_irqrestore+0x47/0x50 [ 806.351437] ? dm_suspend+0xb8/0x1d0 [amdgpu] [ 806.352437] ? rcu_read_lock_sched_held+0x4f/0x80 [ 806.353252] ? rcu_read_lock_sched_held+0x4f/0x80 [ 806.354064] mutex_lock_nested+0x1b/0x20 [ 806.354747] ? mutex_lock_nested+0x1b/0x20 [ 806.355457] dm_suspend+0xb8/0x1d0 [amdgpu] [ 806.356427] ? soc15_common_set_clockgating_state+0x17d/0x19 [amdgpu] [ 806.357736] amdgpu_device_ip_suspend_phase1+0x78/0xd0 [amdgpu] [ 806.360394] amdgpu_device_ip_suspend+0x21/0x70 [amdgpu] [ 806.362926] amdgpu_device_pre_asic_reset+0xb3/0x270 [amdgpu] [ 806.365560] amdgpu_device_gpu_recover.cold+0x679/0x8eb [amdgpu] Signed-off-by: Lang Yu Acked-by: Christian KÃnig Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8b2a37bf2adf..66ddfe4f58c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4479,7 +4479,6 @@ out: r = amdgpu_ib_ring_tests(tmp_adev); if (r) { dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); - r = amdgpu_device_ip_suspend(tmp_adev); need_full_reset = true; r = -EAGAIN; goto end; From a2b4785f01280a4291edb9fda69032fc2e4bfd3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 18 May 2021 17:48:02 +0200 Subject: [PATCH 410/730] drm/amdgpu: stop touching sched.ready in the backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This unfortunately comes up in regular intervals and breaks GPU reset for the engine in question. The sched.ready flag controls if an engine can't get working during hw_init, but should never be set to false during hw_fini. v2: squash in unused variable fix (Alex) Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 2 -- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 5 ----- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 8 +------- 4 files changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 83531997aeba..938ef4ce5b76 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -198,8 +198,6 @@ static int jpeg_v2_5_hw_fini(void *handle) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS)) jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); - - ring->sched.ready = false; } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index de5dfcfb3859..94be35357f7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -166,8 +166,6 @@ static int jpeg_v3_0_hw_fini(void *handle) RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); - ring->sched.ready = false; - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index b1ad9e52b234..240596b25fe4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -497,11 +497,6 @@ static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev) ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); } - - sdma0->sched.ready = false; - sdma1->sched.ready = false; - sdma2->sched.ready = false; - sdma3->sched.ready = false; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index cf165ab5dd26..14470da52113 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -373,7 +373,7 @@ static int vcn_v3_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i, j; + int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -388,12 +388,6 @@ static int vcn_v3_0_hw_fini(void *handle) vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); } } - ring->sched.ready = false; - - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - ring = &adev->vcn.inst[i].ring_enc[j]; - ring->sched.ready = false; - } } return 0; From d201d7631ca170b038e7f8921120d05eec70d7c5 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 19 May 2021 08:40:11 +1000 Subject: [PATCH 411/730] cifs: fix memory leak in smb2_copychunk_range When using smb2_copychunk_range() for large ranges we will run through several iterations of a loop calling SMB2_ioctl() but never actually free the returned buffer except for the final iteration. This leads to memory leaks everytime a large copychunk is requested. Fixes: 9bf0c9cd4314 ("CIFS: Fix SMB2/SMB3 Copy offload support (refcopy) for large files") Cc: Reviewed-by: Aurelien Aptel Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index dd0eb665b680..c693624a7267 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1861,6 +1861,8 @@ smb2_copychunk_range(const unsigned int xid, cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk)); /* Request server copy to target from src identified by key */ + kfree(retbuf); + retbuf = NULL; rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, true /* is_fsctl */, (char *)pcchunk, From e83aa3528a38bddae182a35d0efb5a6c35143c1c Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 19 May 2021 18:47:07 +0800 Subject: [PATCH 412/730] cifs: Fix inconsistent indenting Eliminate the follow smatch warning: fs/cifs/fs_context.c:1148 smb3_fs_context_parse_param() warn: inconsistent indenting. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Steve French --- fs/cifs/fs_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 5d21cd905315..92d4ab029c91 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -1145,7 +1145,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, /* if iocharset not set then load_nls_default * is used by caller */ - cifs_dbg(FYI, "iocharset set to %s\n", ctx->iocharset); + cifs_dbg(FYI, "iocharset set to %s\n", ctx->iocharset); break; case Opt_netbiosname: memset(ctx->source_rfc1001_name, 0x20, From 860b69a9d77160d21ca00357fd6c5217f9d41fb1 Mon Sep 17 00:00:00 2001 From: Rohith Surabattula Date: Wed, 5 May 2021 10:56:47 +0000 Subject: [PATCH 413/730] Fix kernel oops when CONFIG_DEBUG_ATOMIC_SLEEP is enabled. Removed oplock_break_received flag which was added to achieve synchronization between oplock handler and open handler by earlier commit. It is not needed because there is an existing lock open_file_lock to achieve the same. find_readable_file takes open_file_lock and then traverses the openFileList. Similarly, cifs_oplock_break while closing the deferred handle (i.e cifsFileInfo_put) takes open_file_lock and then sends close to the server. Added comments for better readability. Signed-off-by: Rohith Surabattula Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 2 +- fs/cifs/cifsglob.h | 3 +-- fs/cifs/file.c | 27 ++++++++++++--------------- fs/cifs/misc.c | 9 +++++++++ 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d7ea9c5fe0f8..2ffcb29d5c8f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -133,7 +133,7 @@ struct workqueue_struct *cifsiod_wq; struct workqueue_struct *decrypt_wq; struct workqueue_struct *fileinfo_put_wq; struct workqueue_struct *cifsoplockd_wq; -struct workqueue_struct *deferredclose_wq; +struct workqueue_struct *deferredclose_wq; __u32 cifs_lock_secret; /* diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index d88b4b523dcc..ea90c53386b8 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1257,8 +1257,7 @@ struct cifsFileInfo { struct work_struct oplock_break; /* work for oplock breaks */ struct work_struct put; /* work for the final part of _put */ struct delayed_work deferred; - bool oplock_break_received; /* Flag to indicate oplock break */ - bool deferred_scheduled; + bool deferred_close_scheduled; /* Flag to indicate close is scheduled */ }; struct cifs_io_parms { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 6caad100c3f3..304d9d3783c6 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -323,8 +323,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, cfile->dentry = dget(dentry); cfile->f_flags = file->f_flags; cfile->invalidHandle = false; - cfile->oplock_break_received = false; - cfile->deferred_scheduled = false; + cfile->deferred_close_scheduled = false; cfile->tlink = cifs_get_tlink(tlink); INIT_WORK(&cfile->oplock_break, cifs_oplock_break); INIT_WORK(&cfile->put, cifsFileInfo_put_work); @@ -574,21 +573,18 @@ int cifs_open(struct inode *inode, struct file *file) file->f_op = &cifs_file_direct_ops; } - spin_lock(&CIFS_I(inode)->deferred_lock); /* Get the cached handle as SMB2 close is deferred */ rc = cifs_get_readable_path(tcon, full_path, &cfile); if (rc == 0) { if (file->f_flags == cfile->f_flags) { file->private_data = cfile; + spin_lock(&CIFS_I(inode)->deferred_lock); cifs_del_deferred_close(cfile); spin_unlock(&CIFS_I(inode)->deferred_lock); goto out; } else { - spin_unlock(&CIFS_I(inode)->deferred_lock); _cifsFileInfo_put(cfile, true, false); } - } else { - spin_unlock(&CIFS_I(inode)->deferred_lock); } if (server->oplocks) @@ -878,12 +874,12 @@ void smb2_deferred_work_close(struct work_struct *work) struct cifsFileInfo, deferred.work); spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); - if (!cfile->deferred_scheduled) { + if (!cfile->deferred_close_scheduled) { spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); return; } cifs_del_deferred_close(cfile); - cfile->deferred_scheduled = false; + cfile->deferred_close_scheduled = false; spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); _cifsFileInfo_put(cfile, true, false); } @@ -905,14 +901,15 @@ int cifs_close(struct inode *inode, struct file *file) inode->i_ctime = inode->i_mtime = current_time(inode); spin_lock(&cinode->deferred_lock); cifs_add_deferred_close(cfile, dclose); - if (cfile->deferred_scheduled) { + if (cfile->deferred_close_scheduled && + delayed_work_pending(&cfile->deferred)) { mod_delayed_work(deferredclose_wq, &cfile->deferred, cifs_sb->ctx->acregmax); } else { /* Deferred close for files */ queue_delayed_work(deferredclose_wq, &cfile->deferred, cifs_sb->ctx->acregmax); - cfile->deferred_scheduled = true; + cfile->deferred_close_scheduled = true; spin_unlock(&cinode->deferred_lock); return 0; } @@ -2020,8 +2017,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) continue; if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { - if ((!open_file->invalidHandle) && - (!open_file->oplock_break_received)) { + if ((!open_file->invalidHandle)) { /* found a good file */ /* lock it so it will not be closed on us */ cifsFileInfo_get(open_file); @@ -4874,13 +4870,14 @@ oplock_break_ack: } /* * When oplock break is received and there are no active - * file handles but cached, then set the flag oplock_break_received. + * file handles but cached, then schedule deferred close immediately. * So, new open will not use cached handle. */ spin_lock(&CIFS_I(inode)->deferred_lock); is_deferred = cifs_is_deferred_close(cfile, &dclose); - if (is_deferred && cfile->deferred_scheduled) { - cfile->oplock_break_received = true; + if (is_deferred && + cfile->deferred_close_scheduled && + delayed_work_pending(&cfile->deferred)) { mod_delayed_work(deferredclose_wq, &cfile->deferred, 0); } spin_unlock(&CIFS_I(inode)->deferred_lock); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 801a5300f765..34f2a7e80c58 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -672,6 +672,9 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink, spin_unlock(&tlink_tcon(open->tlink)->open_file_lock); } +/* + * Critical section which runs after acquiring deferred_lock. + */ bool cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **pdclose) { @@ -688,6 +691,9 @@ cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close ** return false; } +/* + * Critical section which runs after acquiring deferred_lock. + */ void cifs_add_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close *dclose) { @@ -707,6 +713,9 @@ cifs_add_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close * list_add_tail(&dclose->dlist, &CIFS_I(d_inode(cfile->dentry))->deferred_closes); } +/* + * Critical section which runs after acquiring deferred_lock. + */ void cifs_del_deferred_close(struct cifsFileInfo *cfile) { From 0ab95c2510b641fb860a773b3d242ef9768a8f66 Mon Sep 17 00:00:00 2001 From: Rohith Surabattula Date: Mon, 17 May 2021 11:28:34 +0000 Subject: [PATCH 414/730] Defer close only when lease is enabled. When smb2 lease parameter is disabled on server. Server grants batch oplock instead of RHW lease by default on open, inode page cache needs to be zapped immediatley upon close as cache is not valid. Signed-off-by: Rohith Surabattula Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 1 + fs/cifs/file.c | 1 + fs/cifs/smb2ops.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ea90c53386b8..8488d7024462 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1417,6 +1417,7 @@ struct cifsInodeInfo { struct inode vfs_inode; struct list_head deferred_closes; /* list of deferred closes */ spinlock_t deferred_lock; /* protection on deferred list */ + bool lease_granted; /* Flag to indicate whether lease or oplock is granted. */ }; static inline struct cifsInodeInfo * diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 304d9d3783c6..a1abd3da1d44 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -896,6 +896,7 @@ int cifs_close(struct inode *inode, struct file *file) file->private_data = NULL; dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); if ((cinode->oplock == CIFS_CACHE_RHW_FLG) && + cinode->lease_granted && dclose) { if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) inode->i_ctime = inode->i_mtime = current_time(inode); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c693624a7267..21ef51d338e0 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3983,6 +3983,7 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, unsigned int epoch, bool *purge_cache) { oplock &= 0xFF; + cinode->lease_granted = false; if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) return; if (oplock == SMB2_OPLOCK_LEVEL_BATCH) { @@ -4009,6 +4010,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, unsigned int new_oplock = 0; oplock &= 0xFF; + cinode->lease_granted = true; if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) return; From e2f5efd0f0e229bd110eab513e7c0331d61a4649 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 20 May 2021 13:29:19 +1000 Subject: [PATCH 415/730] powerpc: Fix early setup to make early_ioremap() work The immediate problem is that after commit 0bd3f9e953bd ("powerpc/legacy_serial: Use early_ioremap()") the kernel silently reboots on some systems. The reason is that early_ioremap() returns broken addresses as it uses slot_virt[] array which initialized with offsets from FIXADDR_TOP == IOREMAP_END+FIXADDR_SIZE == KERN_IO_END - FIXADDR_SIZ + FIXADDR_SIZE == __kernel_io_end which is 0 when early_ioremap_setup() is called. __kernel_io_end is initialized little bit later in early_init_mmu(). This fixes the initialization by swapping early_ioremap_setup() and early_init_mmu(). Fixes: 265c3491c4bc ("powerpc: Add support for GENERIC_EARLY_IOREMAP") Signed-off-by: Alexey Kardashevskiy Reviewed-by: Christophe Leroy [mpe: Drop unrelated cleanup & cleanup change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210520032919.358935-1-aik@ozlabs.ru --- arch/powerpc/kernel/setup_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index b779d25761cf..e42b85e4f1aa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -369,11 +369,11 @@ void __init early_setup(unsigned long dt_ptr) apply_feature_fixups(); setup_feature_keys(); - early_ioremap_setup(); - /* Initialize the hash table or TLB handling */ early_init_mmu(); + early_ioremap_setup(); + /* * After firmware and early platform setup code has set things up, * we note the SPR values for configurable control/performance From b106776080a1cf953a1b2fd50cb2a995db4732be Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 27 Apr 2021 15:48:29 +0300 Subject: [PATCH 416/730] thunderbolt: dma_port: Fix NVM read buffer bounds and offset issue Up to 64 bytes of data can be read from NVM in one go. Read address must be dword aligned. Data is read into a local buffer. If caller asks to read data starting at an unaligned address then full dword is anyway read from NVM into a local buffer. Data is then copied from the local buffer starting at the unaligned offset to the caller buffer. In cases where asked data length + unaligned offset is over 64 bytes we need to make sure we don't read past the 64 bytes in the local buffer when copying to caller buffer, and make sure that we don't skip copying unaligned offset bytes from local buffer anymore after the first round of 64 byte NVM data read. Fixes: 3e13676862f9 ("thunderbolt: Add support for DMA configuration based mailbox") Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Signed-off-by: Mika Westerberg --- drivers/thunderbolt/dma_port.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/thunderbolt/dma_port.c b/drivers/thunderbolt/dma_port.c index 7288aaf01ae6..5631319f7b20 100644 --- a/drivers/thunderbolt/dma_port.c +++ b/drivers/thunderbolt/dma_port.c @@ -366,15 +366,15 @@ int dma_port_flash_read(struct tb_dma_port *dma, unsigned int address, void *buf, size_t size) { unsigned int retries = DMA_PORT_RETRIES; - unsigned int offset; - - offset = address & 3; - address = address & ~3; do { - u32 nbytes = min_t(u32, size, MAIL_DATA_DWORDS * 4); + unsigned int offset; + size_t nbytes; int ret; + offset = address & 3; + nbytes = min_t(size_t, size + offset, MAIL_DATA_DWORDS * 4); + ret = dma_port_flash_read_block(dma, address, dma->buf, ALIGN(nbytes, 4)); if (ret) { @@ -386,6 +386,7 @@ int dma_port_flash_read(struct tb_dma_port *dma, unsigned int address, return ret; } + nbytes -= offset; memcpy(buf, dma->buf + offset, nbytes); size -= nbytes; From 22c7a18ed5f007faccb7527bc890463763214081 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Mar 2021 10:32:50 +0200 Subject: [PATCH 417/730] thunderbolt: usb4: Fix NVM read buffer bounds and offset issue Up to 64 bytes of data can be read from NVM in one go. Read address must be dword aligned. Data is read into a local buffer. If caller asks to read data starting at an unaligned address then full dword is anyway read from NVM into a local buffer. Data is then copied from the local buffer starting at the unaligned offset to the caller buffer. In cases where asked data length + unaligned offset is over 64 bytes we need to make sure we don't read past the 64 bytes in the local buffer when copying to caller buffer, and make sure that we don't skip copying unaligned offset bytes from local buffer anymore after the first round of 64 byte NVM data read. Fixes: b04079837b20 ("thunderbolt: Add initial support for USB4") Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Signed-off-by: Mika Westerberg --- drivers/thunderbolt/usb4.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 680bc738dd66..671d72af8ba1 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -68,15 +68,15 @@ static int usb4_do_read_data(u16 address, void *buf, size_t size, unsigned int retries = USB4_DATA_RETRIES; unsigned int offset; - offset = address & 3; - address = address & ~3; - do { - size_t nbytes = min_t(size_t, size, USB4_DATA_DWORDS * 4); unsigned int dwaddress, dwords; u8 data[USB4_DATA_DWORDS * 4]; + size_t nbytes; int ret; + offset = address & 3; + nbytes = min_t(size_t, size + offset, USB4_DATA_DWORDS * 4); + dwaddress = address / 4; dwords = ALIGN(nbytes, 4) / 4; @@ -87,6 +87,7 @@ static int usb4_do_read_data(u16 address, void *buf, size_t size, return ret; } + nbytes -= offset; memcpy(buf, data + offset, nbytes); size -= nbytes; From 7e008b02557ccece4d2c31fb0eaf6243cbc87121 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 17 May 2021 13:20:17 +0200 Subject: [PATCH 418/730] dma-buf: fix unintended pin/unpin warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DMA-buf internal users call the pin/unpin functions without having a dynamic attachment. Avoid the warning and backtrace in the logs. Signed-off-by: Christian König Bugs: https://gitlab.freedesktop.org/drm/intel/-/issues/3481 Fixes: c545781e1c55 ("dma-buf: doc polish for pin/unpin") Reviewed-by: Alex Deucher Reviewed-by: Daniel Vetter CC: stable@kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20210517115705.2141-1-christian.koenig@amd.com --- drivers/dma-buf/dma-buf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index f264b70c383e..eadd1eaa2fb5 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -760,7 +760,7 @@ dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev, if (dma_buf_is_dynamic(attach->dmabuf)) { dma_resv_lock(attach->dmabuf->resv, NULL); - ret = dma_buf_pin(attach); + ret = dmabuf->ops->pin(attach); if (ret) goto err_unlock; } @@ -786,7 +786,7 @@ err_attach: err_unpin: if (dma_buf_is_dynamic(attach->dmabuf)) - dma_buf_unpin(attach); + dmabuf->ops->unpin(attach); err_unlock: if (dma_buf_is_dynamic(attach->dmabuf)) @@ -843,7 +843,7 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) __unmap_dma_buf(attach, attach->sgt, attach->dir); if (dma_buf_is_dynamic(attach->dmabuf)) { - dma_buf_unpin(attach); + dmabuf->ops->unpin(attach); dma_resv_unlock(attach->dmabuf->resv); } } @@ -956,7 +956,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, if (dma_buf_is_dynamic(attach->dmabuf)) { dma_resv_assert_held(attach->dmabuf->resv); if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) { - r = dma_buf_pin(attach); + r = attach->dmabuf->ops->pin(attach); if (r) return ERR_PTR(r); } @@ -968,7 +968,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, if (IS_ERR(sg_table) && dma_buf_is_dynamic(attach->dmabuf) && !IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) - dma_buf_unpin(attach); + attach->dmabuf->ops->unpin(attach); if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) { attach->sgt = sg_table; From e68671e9e1275dfdda333c3e83b6d28963af16b6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 20 May 2021 11:32:28 +0200 Subject: [PATCH 419/730] platform/x86: touchscreen_dmi: Add info for the Chuwi Hi10 Pro (CWI529) tablet Add touchscreen info for the Chuwi Hi10 Pro (CWI529) tablet. This includes info for getting the firmware directly from the UEFI, so that the user does not need to manually install the firmware in /lib/firmware/silead. This change will make the touchscreen on these devices work OOTB, without requiring any manual setup. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210520093228.7439-1-hdegoede@redhat.com --- drivers/platform/x86/touchscreen_dmi.c | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 875519c6c206..bde740d6120e 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -115,6 +115,32 @@ static const struct ts_dmi_data chuwi_hi10_plus_data = { .properties = chuwi_hi10_plus_props, }; +static const struct property_entry chuwi_hi10_pro_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 8), + PROPERTY_ENTRY_U32("touchscreen-min-y", 8), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1912), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1272), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-chuwi-hi10-pro.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + { } +}; + +static const struct ts_dmi_data chuwi_hi10_pro_data = { + .embedded_fw = { + .name = "silead/gsl1680-chuwi-hi10-pro.fw", + .prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 }, + .length = 42504, + .sha256 = { 0xdb, 0x92, 0x68, 0xa8, 0xdb, 0x81, 0x31, 0x00, + 0x1f, 0x58, 0x89, 0xdb, 0x19, 0x1b, 0x15, 0x8c, + 0x05, 0x14, 0xf4, 0x95, 0xba, 0x15, 0x45, 0x98, + 0x42, 0xa3, 0xbb, 0x65, 0xe3, 0x30, 0xa5, 0x93 }, + }, + .acpi_name = "MSSL1680:00", + .properties = chuwi_hi10_pro_props, +}; + static const struct property_entry chuwi_vi8_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-x", 4), PROPERTY_ENTRY_U32("touchscreen-min-y", 6), @@ -915,6 +941,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), }, }, + { + /* Chuwi Hi10 Prus (CWI597) */ + .driver_data = (void *)&chuwi_hi10_pro_data, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"), + DMI_MATCH(DMI_PRODUCT_NAME, "Hi10 pro tablet"), + DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), + }, + }, { /* Chuwi Vi8 (CWI506) */ .driver_data = (void *)&chuwi_vi8_data, From ba5ef6dc8a827a904794210a227cdb94828e8ae7 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 20 May 2021 13:21:20 +0100 Subject: [PATCH 420/730] io_uring: fortify tctx/io_wq cleanup We don't want anyone poking into tctx->io_wq awhile it's being destroyed by io_wq_put_and_exit(), and even though it shouldn't even happen, if buggy would be preferable to get a NULL-deref instead of subtle delayed failure or UAF. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/827b021de17926fd807610b3e53a5a5fa8530856.1621513214.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 89ec10471b30..5f82954004f6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9035,15 +9035,15 @@ static void io_uring_del_task_file(unsigned long index) static void io_uring_clean_tctx(struct io_uring_task *tctx) { + struct io_wq *wq = tctx->io_wq; struct io_tctx_node *node; unsigned long index; + tctx->io_wq = NULL; xa_for_each(&tctx->xa, index, node) io_uring_del_task_file(index); - if (tctx->io_wq) { - io_wq_put_and_exit(tctx->io_wq); - tctx->io_wq = NULL; - } + if (wq) + io_wq_put_and_exit(wq); } static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) From e380adfc213a13677993c0e35cb48f5a8e61ebb0 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 19 May 2021 00:40:27 +0900 Subject: [PATCH 421/730] btrfs: zoned: pass start block to btrfs_use_zone_append btrfs_use_zone_append only needs the passed in extent_map's block_start member, so there's no need to pass in the full extent map. This also enables the use of btrfs_use_zone_append in places where we only have a start byte but no extent_map. Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 +- fs/btrfs/inode.c | 2 +- fs/btrfs/zoned.c | 4 ++-- fs/btrfs/zoned.h | 5 ++--- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 360d997c7226..d9f20ca3ac7d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3762,7 +3762,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, /* Note that em_end from extent_map_end() is exclusive */ iosize = min(em_end, end + 1) - cur; - if (btrfs_use_zone_append(inode, em)) + if (btrfs_use_zone_append(inode, em->block_start)) opf = REQ_OP_ZONE_APPEND; free_extent_map(em); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 095e452f59f0..bb4ab408d670 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7796,7 +7796,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, iomap->bdev = fs_info->fs_devices->latest_bdev; iomap->length = len; - if (write && btrfs_use_zone_append(BTRFS_I(inode), em)) + if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start)) iomap->flags |= IOMAP_F_ZONE_APPEND; free_extent_map(em); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 304ce64c70a4..1bb8ee97aae0 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1278,7 +1278,7 @@ void btrfs_free_redirty_list(struct btrfs_transaction *trans) spin_unlock(&trans->releasing_ebs_lock); } -bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em) +bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_block_group *cache; @@ -1293,7 +1293,7 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em) if (!is_data_inode(&inode->vfs_inode)) return false; - cache = btrfs_lookup_block_group(fs_info, em->block_start); + cache = btrfs_lookup_block_group(fs_info, start); ASSERT(cache); if (!cache) return false; diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 5e41a74a9cb2..e55d32595c2c 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -53,7 +53,7 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache); void btrfs_redirty_list_add(struct btrfs_transaction *trans, struct extent_buffer *eb); void btrfs_free_redirty_list(struct btrfs_transaction *trans); -bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em); +bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start); void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset, struct bio *bio); void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered); @@ -152,8 +152,7 @@ static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans, struct extent_buffer *eb) { } static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { } -static inline bool btrfs_use_zone_append(struct btrfs_inode *inode, - struct extent_map *em) +static inline bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start) { return false; } From 764c7c9a464b68f7c6a5a9ec0b923176a05e8e8f Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 19 May 2021 00:40:28 +0900 Subject: [PATCH 422/730] btrfs: zoned: fix parallel compressed writes When multiple processes write data to the same block group on a compressed zoned filesystem, the underlying device could report I/O errors and data corruption is possible. This happens because on a zoned file system, compressed data writes where sent to the device via a REQ_OP_WRITE instead of a REQ_OP_ZONE_APPEND operation. But with REQ_OP_WRITE and parallel submission it cannot be guaranteed that the data is always submitted aligned to the underlying zone's write pointer. The change to using REQ_OP_ZONE_APPEND instead of REQ_OP_WRITE on a zoned filesystem is non intrusive on a regular file system or when submitting to a conventional zone on a zoned filesystem, as it is guarded by btrfs_use_zone_append. Reported-by: David Sterba Fixes: 9d294a685fbc ("btrfs: zoned: enable to mount ZONED incompat flag") CC: stable@vger.kernel.org # 5.12.x: e380adfc213a13: btrfs: zoned: pass start block to btrfs_use_zone_append CC: stable@vger.kernel.org # 5.12.x Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/compression.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 17f93fd28f7e..91743a0b34c5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -28,6 +28,7 @@ #include "compression.h" #include "extent_io.h" #include "extent_map.h" +#include "zoned.h" static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" }; @@ -349,6 +350,7 @@ static void end_compressed_bio_write(struct bio *bio) */ inode = cb->inode; cb->compressed_pages[0]->mapping = cb->inode->i_mapping; + btrfs_record_physical_zoned(inode, cb->start, bio); btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0], cb->start, cb->start + cb->len - 1, bio->bi_status == BLK_STS_OK); @@ -401,6 +403,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, u64 first_byte = disk_start; blk_status_t ret; int skip_sum = inode->flags & BTRFS_INODE_NODATASUM; + const bool use_append = btrfs_use_zone_append(inode, disk_start); + const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE; WARN_ON(!PAGE_ALIGNED(start)); cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); @@ -418,10 +422,31 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb->nr_pages = nr_pages; bio = btrfs_bio_alloc(first_byte); - bio->bi_opf = REQ_OP_WRITE | write_flags; + bio->bi_opf = bio_op | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; + if (use_append) { + struct extent_map *em; + struct map_lookup *map; + struct block_device *bdev; + + em = btrfs_get_chunk_map(fs_info, disk_start, PAGE_SIZE); + if (IS_ERR(em)) { + kfree(cb); + bio_put(bio); + return BLK_STS_NOTSUPP; + } + + map = em->map_lookup; + /* We only support single profile for now */ + ASSERT(map->num_stripes == 1); + bdev = map->stripes[0].dev->bdev; + + bio_set_dev(bio, bdev); + free_extent_map(em); + } + if (blkcg_css) { bio->bi_opf |= REQ_CGROUP_PUNT; kthread_associate_blkcg(blkcg_css); @@ -432,6 +457,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, bytes_left = compressed_len; for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { int submit = 0; + int len; page = compressed_pages[pg_index]; page->mapping = inode->vfs_inode.i_mapping; @@ -439,9 +465,13 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio, 0); + if (pg_index == 0 && use_append) + len = bio_add_zone_append_page(bio, page, PAGE_SIZE, 0); + else + len = bio_add_page(bio, page, PAGE_SIZE, 0); + page->mapping = NULL; - if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) < - PAGE_SIZE) { + if (submit || len < PAGE_SIZE) { /* * inc the count before we submit the bio so * we know the end IO handler won't happen before @@ -465,11 +495,15 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, } bio = btrfs_bio_alloc(first_byte); - bio->bi_opf = REQ_OP_WRITE | write_flags; + bio->bi_opf = bio_op | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; if (blkcg_css) bio->bi_opf |= REQ_CGROUP_PUNT; + /* + * Use bio_add_page() to ensure the bio has at least one + * page. + */ bio_add_page(bio, page, PAGE_SIZE, 0); } if (bytes_left < PAGE_SIZE) { From 6c60ff048ca1e0739f39aa25996543c6e662a46c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 May 2021 15:18:41 +0200 Subject: [PATCH 423/730] block: prevent block device lookups at the beginning of del_gendisk As an artifact of how gendisk lookup used to work in earlier kernels, GENHD_FL_UP is only cleared very late in del_gendisk, and a global lock is used to prevent opens from succeeding while del_gendisk is tearing down the gendisk. Switch to clearing the flag early and under bd_mutex so that callers can use bd_mutex to stabilize the flag, which removes the need for the global mutex. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210514131842.1600568-2-hch@lst.de Signed-off-by: Jens Axboe --- block/genhd.c | 11 +---------- fs/block_dev.c | 15 +++++---------- include/linux/genhd.h | 2 -- 3 files changed, 6 insertions(+), 22 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 39ca97b0edc6..9f8cb7beaad1 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -29,8 +29,6 @@ static struct kobject *block_depr; -DECLARE_RWSEM(bdev_lookup_sem); - /* for extended dynamic devt allocation, currently only one major is used */ #define NR_EXT_DEVT (1 << MINORBITS) static DEFINE_IDA(ext_devt_ida); @@ -609,13 +607,8 @@ void del_gendisk(struct gendisk *disk) blk_integrity_del(disk); disk_del_events(disk); - /* - * Block lookups of the disk until all bdevs are unhashed and the - * disk is marked as dead (GENHD_FL_UP cleared). - */ - down_write(&bdev_lookup_sem); - mutex_lock(&disk->part0->bd_mutex); + disk->flags &= ~GENHD_FL_UP; blk_drop_partitions(disk); mutex_unlock(&disk->part0->bd_mutex); @@ -629,8 +622,6 @@ void del_gendisk(struct gendisk *disk) remove_inode_hash(disk->part0->bd_inode); set_capacity(disk, 0); - disk->flags &= ~GENHD_FL_UP; - up_write(&bdev_lookup_sem); if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); diff --git a/fs/block_dev.c b/fs/block_dev.c index eb265d72fce8..580bae995b87 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1298,6 +1298,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) struct gendisk *disk = bdev->bd_disk; int ret = 0; + if (!(disk->flags & GENHD_FL_UP)) + return -ENXIO; + if (!bdev->bd_openers) { if (!bdev_is_partition(bdev)) { ret = 0; @@ -1332,8 +1335,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) whole->bd_part_count++; mutex_unlock(&whole->bd_mutex); - if (!(disk->flags & GENHD_FL_UP) || - !bdev_nr_sectors(bdev)) { + if (!bdev_nr_sectors(bdev)) { __blkdev_put(whole, mode, 1); bdput(whole); return -ENXIO; @@ -1364,16 +1366,12 @@ struct block_device *blkdev_get_no_open(dev_t dev) struct block_device *bdev; struct gendisk *disk; - down_read(&bdev_lookup_sem); bdev = bdget(dev); if (!bdev) { - up_read(&bdev_lookup_sem); blk_request_module(dev); - down_read(&bdev_lookup_sem); - bdev = bdget(dev); if (!bdev) - goto unlock; + return NULL; } disk = bdev->bd_disk; @@ -1383,14 +1381,11 @@ struct block_device *blkdev_get_no_open(dev_t dev) goto put_disk; if (!try_module_get(bdev->bd_disk->fops->owner)) goto put_disk; - up_read(&bdev_lookup_sem); return bdev; put_disk: put_disk(disk); bdput: bdput(bdev); -unlock: - up_read(&bdev_lookup_sem); return NULL; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7e9660ea967d..6fc26f7bdf71 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -306,8 +306,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, } #endif /* CONFIG_SYSFS */ -extern struct rw_semaphore bdev_lookup_sem; - dev_t blk_lookup_devt(const char *name, int partno); void blk_request_module(dev_t devt); #ifdef CONFIG_BLOCK From bc6a385132601c29a6da1dbf8148c0d3c9ad36dc Mon Sep 17 00:00:00 2001 From: Gulam Mohamed Date: Fri, 14 May 2021 15:18:42 +0200 Subject: [PATCH 424/730] block: fix a race between del_gendisk and BLKRRPART When BLKRRPART is called concurrently with del_gendisk, the partitions rescan can create a stale partition that will never be be cleaned up. Fix this by checking the the disk is up before rescanning partitions while under bd_mutex. Signed-off-by: Gulam Mohamed [hch: split from a larger patch] Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210514131842.1600568-3-hch@lst.de Signed-off-by: Jens Axboe --- fs/block_dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/block_dev.c b/fs/block_dev.c index 580bae995b87..4494411fa4d3 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1244,6 +1244,9 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate) lockdep_assert_held(&bdev->bd_mutex); + if (!(disk->flags & GENHD_FL_UP)) + return -ENXIO; + rescan: if (bdev->bd_part_count) return -EBUSY; From 5665bc35c1ed917ac8fd06cb651317bb47a65b10 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 20 May 2021 21:19:30 +1000 Subject: [PATCH 425/730] powerpc/64s/syscall: Use pt_regs.trap to distinguish syscall ABI difference between sc and scv syscalls The sc and scv 0 system calls have different ABI conventions, and ptracers need to know which system call type is being used if they want to look at the syscall registers. Document that pt_regs.trap can be used for this, and fix one in-tree user to work with scv 0 syscalls. Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions") Cc: stable@vger.kernel.org # v5.9+ Reported-by: "Dmitry V. Levin" Suggested-by: "Dmitry V. Levin" Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210520111931.2597127-1-npiggin@gmail.com --- Documentation/powerpc/syscall64-abi.rst | 10 +++++++ tools/testing/selftests/seccomp/seccomp_bpf.c | 27 ++++++++++++------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/Documentation/powerpc/syscall64-abi.rst b/Documentation/powerpc/syscall64-abi.rst index dabee3729e5a..56490c4c0c07 100644 --- a/Documentation/powerpc/syscall64-abi.rst +++ b/Documentation/powerpc/syscall64-abi.rst @@ -109,6 +109,16 @@ auxiliary vector. scv 0 syscalls will always behave as PPC_FEATURE2_HTM_NOSC. +ptrace +------ +When ptracing system calls (PTRACE_SYSCALL), the pt_regs.trap value contains +the system call type that can be used to distinguish between sc and scv 0 +system calls, and the different register conventions can be accounted for. + +If the value of (pt_regs.trap & 0xfff0) is 0xc00 then the system call was +performed with the sc instruction, if it is 0x3000 then the system call was +performed with the scv 0 instruction. + vsyscall ======== diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 98c3b647f54d..e3d5c77a8612 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1753,16 +1753,25 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define SYSCALL_RET_SET(_regs, _val) \ do { \ typeof(_val) _result = (_val); \ - /* \ - * A syscall error is signaled by CR0 SO bit \ - * and the code is stored as a positive value. \ - */ \ - if (_result < 0) { \ - SYSCALL_RET(_regs) = -_result; \ - (_regs).ccr |= 0x10000000; \ - } else { \ + if ((_regs.trap & 0xfff0) == 0x3000) { \ + /* \ + * scv 0 system call uses -ve result \ + * for error, so no need to adjust. \ + */ \ SYSCALL_RET(_regs) = _result; \ - (_regs).ccr &= ~0x10000000; \ + } else { \ + /* \ + * A syscall error is signaled by the \ + * CR0 SO bit and the code is stored as \ + * a positive value. \ + */ \ + if (_result < 0) { \ + SYSCALL_RET(_regs) = -_result; \ + (_regs).ccr |= 0x10000000; \ + } else { \ + SYSCALL_RET(_regs) = _result; \ + (_regs).ccr &= ~0x10000000; \ + } \ } \ } while (0) # define SYSCALL_RET_SET_ON_PTRACE_EXIT From d72500f992849d31ebae8f821a023660ddd0dcc2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 20 May 2021 21:19:31 +1000 Subject: [PATCH 426/730] powerpc/64s/syscall: Fix ptrace syscall info with scv syscalls The scv implementation missed updating syscall return value and error value get/set functions to deal with the changed register ABI. This broke ptrace PTRACE_GET_SYSCALL_INFO as well as some kernel auditing and tracing functions. Fix. tools/testing/selftests/ptrace/get_syscall_info now passes when scv is used. Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions") Cc: stable@vger.kernel.org # v5.9+ Reported-by: "Dmitry V. Levin" Signed-off-by: Nicholas Piggin Reviewed-by: Dmitry V. Levin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210520111931.2597127-2-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 45 +++++++++++++++++------------- arch/powerpc/include/asm/syscall.h | 42 +++++++++++++++++----------- 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 9c9ab2746168..b476a685f066 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -19,6 +19,7 @@ #ifndef _ASM_POWERPC_PTRACE_H #define _ASM_POWERPC_PTRACE_H +#include #include #include @@ -152,25 +153,6 @@ extern unsigned long profile_pc(struct pt_regs *regs); long do_syscall_trace_enter(struct pt_regs *regs); void do_syscall_trace_leave(struct pt_regs *regs); -#define kernel_stack_pointer(regs) ((regs)->gpr[1]) -static inline int is_syscall_success(struct pt_regs *regs) -{ - return !(regs->ccr & 0x10000000); -} - -static inline long regs_return_value(struct pt_regs *regs) -{ - if (is_syscall_success(regs)) - return regs->gpr[3]; - else - return -regs->gpr[3]; -} - -static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) -{ - regs->gpr[3] = rc; -} - #ifdef __powerpc64__ #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1) #else @@ -235,6 +217,31 @@ static __always_inline void set_trap_norestart(struct pt_regs *regs) regs->trap |= 0x1; } +#define kernel_stack_pointer(regs) ((regs)->gpr[1]) +static inline int is_syscall_success(struct pt_regs *regs) +{ + if (trap_is_scv(regs)) + return !IS_ERR_VALUE((unsigned long)regs->gpr[3]); + else + return !(regs->ccr & 0x10000000); +} + +static inline long regs_return_value(struct pt_regs *regs) +{ + if (trap_is_scv(regs)) + return regs->gpr[3]; + + if (is_syscall_success(regs)) + return regs->gpr[3]; + else + return -regs->gpr[3]; +} + +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->gpr[3] = rc; +} + #define arch_has_single_step() (1) #define arch_has_block_step() (true) #define ARCH_HAS_USER_SINGLE_STEP_REPORT diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index fd1b518eed17..ba0f88f3a30d 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -41,11 +41,17 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - /* - * If the system call failed, - * regs->gpr[3] contains a positive ERRORCODE. - */ - return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; + if (trap_is_scv(regs)) { + unsigned long error = regs->gpr[3]; + + return IS_ERR_VALUE(error) ? error : 0; + } else { + /* + * If the system call failed, + * regs->gpr[3] contains a positive ERRORCODE. + */ + return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; + } } static inline long syscall_get_return_value(struct task_struct *task, @@ -58,18 +64,22 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - /* - * In the general case it's not obvious that we must deal with CCR - * here, as the syscall exit path will also do that for us. However - * there are some places, eg. the signal code, which check ccr to - * decide if the value in r3 is actually an error. - */ - if (error) { - regs->ccr |= 0x10000000L; - regs->gpr[3] = error; + if (trap_is_scv(regs)) { + regs->gpr[3] = (long) error ?: val; } else { - regs->ccr &= ~0x10000000L; - regs->gpr[3] = val; + /* + * In the general case it's not obvious that we must deal with + * CCR here, as the syscall exit path will also do that for us. + * However there are some places, eg. the signal code, which + * check ccr to decide if the value in r3 is actually an error. + */ + if (error) { + regs->ccr |= 0x10000000L; + regs->gpr[3] = error; + } else { + regs->ccr &= ~0x10000000L; + regs->gpr[3] = val; + } } } From df8f2be2fd0b44b2cb6077068f52e05f0ac40897 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 20 May 2021 11:43:33 +0930 Subject: [PATCH 427/730] serial: 8250: Add UART_BUG_TXRACE workaround for Aspeed VUART Aspeed Virtual UARTs directly bridge e.g. the system console UART on the LPC bus to the UART interface on the BMC's internal APB. As such there's no RS-232 signalling involved - the UART interfaces on each bus are directly connected as the producers and consumers of the one set of FIFOs. The APB in the AST2600 generally runs at 100MHz while the LPC bus peaks at 33MHz. The difference in clock speeds exposes a race in the VUART design where a Tx data burst on the APB interface can result in a byte lost on the LPC interface. The symptom is LSR[DR] remains clear on the LPC interface despite data being present in its Rx FIFO, while LSR[THRE] remains clear on the APB interface as the host has not consumed the data the BMC has transmitted. In this state, the UART has stalled and no further data can be transmitted without manual intervention (e.g. resetting the FIFOs, resulting in loss of data). The recommended work-around is to insert a read cycle on the APB interface between writes to THR. Cc: ChiaWei Wang Tested-by: ChiaWei Wang Reviewed-by: Jiri Slaby Signed-off-by: Andrew Jeffery Cc: stable Link: https://lore.kernel.org/r/20210520021334.497341-2-andrew@aj.id.au Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.h | 1 + drivers/tty/serial/8250/8250_aspeed_vuart.c | 1 + drivers/tty/serial/8250/8250_port.c | 12 ++++++++++++ 3 files changed, 14 insertions(+) diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 52bb21205bb6..34aa2714f3c9 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -88,6 +88,7 @@ struct serial8250_config { #define UART_BUG_NOMSR (1 << 2) /* UART has buggy MSR status bits (Au1x00) */ #define UART_BUG_THRE (1 << 3) /* UART has buggy THRE reassertion */ #define UART_BUG_PARITY (1 << 4) /* UART mishandles parity if FIFO enabled */ +#define UART_BUG_TXRACE (1 << 5) /* UART Tx fails to set remote DR */ #ifdef CONFIG_SERIAL_8250_SHARE_IRQ diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index 61550f24a2d3..d035d08cb987 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -437,6 +437,7 @@ static int aspeed_vuart_probe(struct platform_device *pdev) port.port.status = UPSTAT_SYNC_FIFO; port.port.dev = &pdev->dev; port.port.has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE); + port.bugs |= UART_BUG_TXRACE; rc = sysfs_create_group(&vuart->dev->kobj, &aspeed_vuart_attr_group); if (rc < 0) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index d45dab1ab316..fc5ab2032282 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1809,6 +1809,18 @@ void serial8250_tx_chars(struct uart_8250_port *up) count = up->tx_loadsz; do { serial_out(up, UART_TX, xmit->buf[xmit->tail]); + if (up->bugs & UART_BUG_TXRACE) { + /* + * The Aspeed BMC virtual UARTs have a bug where data + * may get stuck in the BMC's Tx FIFO from bursts of + * writes on the APB interface. + * + * Delay back-to-back writes by a read cycle to avoid + * stalling the VUART. Read a register that won't have + * side-effects and discard the result. + */ + serial_in(up, UART_SCR); + } xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); port->icount.tx++; if (uart_circ_empty(xmit)) From 1f06f5713f5278b7768031150ceb43d1127b9ad6 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 20 May 2021 11:43:34 +0930 Subject: [PATCH 428/730] serial: 8250: Use BIT(x) for UART_{CAP,BUG}_* BIT(x) improves readability and safety with respect to shifts. Reviewed-by: Jiri Slaby Signed-off-by: Andrew Jeffery Link: https://lore.kernel.org/r/20210520021334.497341-3-andrew@aj.id.au Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.h | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 34aa2714f3c9..6473361525d1 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -7,6 +7,7 @@ * Copyright (C) 2001 Russell King. */ +#include #include #include #include @@ -70,25 +71,25 @@ struct serial8250_config { unsigned int flags; }; -#define UART_CAP_FIFO (1 << 8) /* UART has FIFO */ -#define UART_CAP_EFR (1 << 9) /* UART has EFR */ -#define UART_CAP_SLEEP (1 << 10) /* UART has IER sleep */ -#define UART_CAP_AFE (1 << 11) /* MCR-based hw flow control */ -#define UART_CAP_UUE (1 << 12) /* UART needs IER bit 6 set (Xscale) */ -#define UART_CAP_RTOIE (1 << 13) /* UART needs IER bit 4 set (Xscale, Tegra) */ -#define UART_CAP_HFIFO (1 << 14) /* UART has a "hidden" FIFO */ -#define UART_CAP_RPM (1 << 15) /* Runtime PM is active while idle */ -#define UART_CAP_IRDA (1 << 16) /* UART supports IrDA line discipline */ -#define UART_CAP_MINI (1 << 17) /* Mini UART on BCM283X family lacks: +#define UART_CAP_FIFO BIT(8) /* UART has FIFO */ +#define UART_CAP_EFR BIT(9) /* UART has EFR */ +#define UART_CAP_SLEEP BIT(10) /* UART has IER sleep */ +#define UART_CAP_AFE BIT(11) /* MCR-based hw flow control */ +#define UART_CAP_UUE BIT(12) /* UART needs IER bit 6 set (Xscale) */ +#define UART_CAP_RTOIE BIT(13) /* UART needs IER bit 4 set (Xscale, Tegra) */ +#define UART_CAP_HFIFO BIT(14) /* UART has a "hidden" FIFO */ +#define UART_CAP_RPM BIT(15) /* Runtime PM is active while idle */ +#define UART_CAP_IRDA BIT(16) /* UART supports IrDA line discipline */ +#define UART_CAP_MINI BIT(17) /* Mini UART on BCM283X family lacks: * STOP PARITY EPAR SPAR WLEN5 WLEN6 */ -#define UART_BUG_QUOT (1 << 0) /* UART has buggy quot LSB */ -#define UART_BUG_TXEN (1 << 1) /* UART has buggy TX IIR status */ -#define UART_BUG_NOMSR (1 << 2) /* UART has buggy MSR status bits (Au1x00) */ -#define UART_BUG_THRE (1 << 3) /* UART has buggy THRE reassertion */ -#define UART_BUG_PARITY (1 << 4) /* UART mishandles parity if FIFO enabled */ -#define UART_BUG_TXRACE (1 << 5) /* UART Tx fails to set remote DR */ +#define UART_BUG_QUOT BIT(0) /* UART has buggy quot LSB */ +#define UART_BUG_TXEN BIT(1) /* UART has buggy TX IIR status */ +#define UART_BUG_NOMSR BIT(2) /* UART has buggy MSR status bits (Au1x00) */ +#define UART_BUG_THRE BIT(3) /* UART has buggy THRE reassertion */ +#define UART_BUG_PARITY BIT(4) /* UART mishandles parity if FIFO enabled */ +#define UART_BUG_TXRACE BIT(5) /* UART Tx fails to set remote DR */ #ifdef CONFIG_SERIAL_8250_SHARE_IRQ From 31fae7c8b18c3f8029a2a5dce97a3182c1a167a0 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Tue, 11 May 2021 20:49:55 +0530 Subject: [PATCH 429/730] serial: 8250: 8250_omap: Fix possible interrupt storm It is possible that RX TIMEOUT is signalled after RX FIFO has been drained, in which case a dummy read of RX FIFO is required to clear RX TIMEOUT condition. Otherwise, RX TIMEOUT condition is not cleared leading to an interrupt storm Cc: stable@vger.kernel.org Reported-by: Jan Kiszka Signed-off-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/20210511151955.28071-1-vigneshr@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 8ac11eaeca51..c71bd766fa56 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -104,6 +104,9 @@ #define UART_OMAP_EFR2 0x23 #define UART_OMAP_EFR2_TIMEOUT_BEHAVE BIT(6) +/* RX FIFO occupancy indicator */ +#define UART_OMAP_RX_LVL 0x64 + struct omap8250_priv { int line; u8 habit; @@ -625,6 +628,15 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) serial8250_rpm_get(up); iir = serial_port_in(port, UART_IIR); ret = serial8250_handle_irq(port, iir); + /* + * It is possible that RX TIMEOUT is signalled after FIFO + * has been drained, in which case a dummy read of RX FIFO is + * required to clear RX TIMEOUT condition. + */ + if ((iir & UART_IIR_RX_TIMEOUT) == UART_IIR_RX_TIMEOUT) { + if (serial_port_in(port, UART_OMAP_RX_LVL) == 0) + serial_port_in(port, UART_RX); + } serial8250_rpm_put(up); return IRQ_RETVAL(ret); From e0e24208792080135248f23fdf6d51aa2e04df05 Mon Sep 17 00:00:00 2001 From: Randy Wright Date: Fri, 14 May 2021 10:26:54 -0600 Subject: [PATCH 430/730] serial: 8250_pci: Add support for new HPE serial device Add support for new HPE serial device. It is MSI enabled, but otherwise similar to legacy HP server serial devices. Tested-by: Jerry Hoemann Signed-off-by: Randy Wright Cc: stable Link: https://lore.kernel.org/r/1621009614-28836-1-git-send-email-rwright@hpe.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 689d8227f95f..04fe42469990 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -56,6 +56,8 @@ struct serial_private { int line[]; }; +#define PCI_DEVICE_ID_HPE_PCI_SERIAL 0x37e + static const struct pci_device_id pci_use_msi[] = { { PCI_DEVICE_SUB(PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9900, 0xA000, 0x1000) }, @@ -63,6 +65,8 @@ static const struct pci_device_id pci_use_msi[] = { 0xA000, 0x1000) }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9922, 0xA000, 0x1000) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_HP_3PAR, PCI_DEVICE_ID_HPE_PCI_SERIAL, + PCI_ANY_ID, PCI_ANY_ID) }, { } }; @@ -1997,6 +2001,16 @@ static struct pci_serial_quirk pci_serial_quirks[] = { .init = pci_hp_diva_init, .setup = pci_hp_diva_setup, }, + /* + * HPE PCI serial device + */ + { + .vendor = PCI_VENDOR_ID_HP_3PAR, + .device = PCI_DEVICE_ID_HPE_PCI_SERIAL, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_hp_diva_setup, + }, /* * Intel */ @@ -4973,6 +4987,10 @@ static const struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b2_1_115200 }, + /* HPE PCI serial device */ + { PCI_VENDOR_ID_HP_3PAR, PCI_DEVICE_ID_HPE_PCI_SERIAL, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_b1_1_115200 }, { PCI_VENDOR_ID_DCI, PCI_DEVICE_ID_DCI_PCCOM2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, From e0112a7c9e847ada15a631b88e279d547e8f26a7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 19 May 2021 17:16:50 +0300 Subject: [PATCH 431/730] staging: emxx_udc: fix loop in _nbu2ss_nuke() The _nbu2ss_ep_done() function calls: list_del_init(&req->queue); which means that the loop will never exit. Fixes: ca3d253eb967 ("Staging: emxx_udc: Iterate list using list_for_each_entry") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YKUd0sDyjm/lkJfJ@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/staging/emxx_udc/emxx_udc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/emxx_udc/emxx_udc.c b/drivers/staging/emxx_udc/emxx_udc.c index 741147a4f0fe..ecc5c9da9027 100644 --- a/drivers/staging/emxx_udc/emxx_udc.c +++ b/drivers/staging/emxx_udc/emxx_udc.c @@ -2064,7 +2064,7 @@ static int _nbu2ss_nuke(struct nbu2ss_udc *udc, struct nbu2ss_ep *ep, int status) { - struct nbu2ss_req *req; + struct nbu2ss_req *req, *n; /* Endpoint Disable */ _nbu2ss_epn_exit(udc, ep); @@ -2076,7 +2076,7 @@ static int _nbu2ss_nuke(struct nbu2ss_udc *udc, return 0; /* called with irqs blocked */ - list_for_each_entry(req, &ep->queue, queue) { + list_for_each_entry_safe(req, n, &ep->queue, queue) { _nbu2ss_ep_done(ep, req, status); } From 676a659b60afb13166371580f3f6f434e9ba6f21 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 9 May 2021 16:22:55 -0700 Subject: [PATCH 432/730] xfs: retry allocations when locality-based search fails If a realtime allocation fails because we can't find a sufficiently large free extent satisfying locality rules, relax the locality rules and try again. This reduces the occurrence of short writes to realtime files when the write size is large and the free space is fragmented. This was originally discovered by running generic/186 with the realtime reflink patchset and a 128k cow extent size hint, but the short write symptoms can manifest with a 128k extent size hint and no reflink, so apply the fix now. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson --- fs/xfs/xfs_bmap_util.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index c9381bf4f04b..0936f3a96fe6 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -84,6 +84,7 @@ xfs_bmap_rtalloc( xfs_extlen_t minlen = mp->m_sb.sb_rextsize; xfs_extlen_t raminlen; bool rtlocked = false; + bool ignore_locality = false; int error; align = xfs_get_extsz_hint(ap->ip); @@ -158,7 +159,10 @@ retry: /* * Realtime allocation, done through xfs_rtallocate_extent. */ - do_div(ap->blkno, mp->m_sb.sb_rextsize); + if (ignore_locality) + ap->blkno = 0; + else + do_div(ap->blkno, mp->m_sb.sb_rextsize); rtb = ap->blkno; ap->length = ralen; raminlen = max_t(xfs_extlen_t, 1, minlen / mp->m_sb.sb_rextsize); @@ -197,6 +201,15 @@ retry: goto retry; } + if (!ignore_locality && ap->blkno != 0) { + /* + * If we can't allocate near a specific rt extent, try again + * without locality criteria. + */ + ignore_locality = true; + goto retry; + } + ap->blkno = NULLFSBLOCK; ap->length = 0; return 0; From 16c9de54dc868c121918f2ae91e46330f919049f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 12 May 2021 16:41:13 -0700 Subject: [PATCH 433/730] xfs: fix deadlock retry tracepoint arguments sc->ip is the inode that's being scrubbed, which means that it's not set for scrub types that don't involve inodes. If one of those scrubbers (e.g. inode btrees) returns EDEADLOCK, we'll trip over the null pointer. Fix that by reporting either the file being examined or the file that was used to call scrub. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/scrub/common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index aa874607618a..be38c960da85 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -74,7 +74,9 @@ __xchk_process_error( return true; case -EDEADLOCK: /* Used to restart an op with deadlock avoidance. */ - trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); + trace_xchk_deadlock_retry( + sc->ip ? sc->ip : XFS_I(file_inode(sc->file)), + sc->sm, *error); break; case -EFSBADCRC: case -EFSCORRUPTED: From e3c2b047475b52739bcf178a9e95176c42bbcf8f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 12 May 2021 16:43:10 -0700 Subject: [PATCH 434/730] xfs: restore old ioctl definitions These ioctl definitions in xfs_fs.h are part of the userspace ABI and were mistakenly removed during the 5.13 merge window. Fixes: 9fefd5db08ce ("xfs: convert to fileattr") Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index a83bdd0c47a8..bde2b4c64dbe 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -770,6 +770,8 @@ struct xfs_scrub_metadata { /* * ioctl commands that are used by Linux filesystems */ +#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS +#define XFS_IOC_SETXFLAGS FS_IOC_SETFLAGS #define XFS_IOC_GETVERSION FS_IOC_GETVERSION /* @@ -780,6 +782,8 @@ struct xfs_scrub_metadata { #define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) #define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64) #define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr) +#define XFS_IOC_FSGETXATTR FS_IOC_FSGETXATTR +#define XFS_IOC_FSSETXATTR FS_IOC_FSSETXATTR #define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64) #define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64) #define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap) From bb002388901151fe35b6697ab116f6ed0721a9ed Mon Sep 17 00:00:00 2001 From: zhouchuangao Date: Sun, 9 May 2021 19:34:37 -0700 Subject: [PATCH 435/730] fs/nfs: Use fatal_signal_pending instead of signal_pending We set the state of the current process to TASK_KILLABLE via prepare_to_wait(). Should we use fatal_signal_pending() to detect the signal here? Fixes: b4868b44c562 ("NFSv4: Wait for stateid updates after CLOSE/OPEN_DOWNGRADE") Signed-off-by: zhouchuangao Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 87d04f2c9385..0cd965882232 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1706,7 +1706,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, rcu_read_unlock(); trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0); - if (!signal_pending(current)) { + if (!fatal_signal_pending(current)) { if (schedule_timeout(5*HZ) == 0) status = -EAGAIN; else @@ -3487,7 +3487,7 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, write_sequnlock(&state->seqlock); trace_nfs4_close_stateid_update_wait(state->inode, dst, 0); - if (signal_pending(current)) + if (fatal_signal_pending(current)) status = -EINTR; else if (schedule_timeout(5*HZ) != 0) From 769b01ea68b6c49dc3cde6adf7e53927dacbd3a8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 11 May 2021 11:49:42 +0300 Subject: [PATCH 436/730] NFS: fix an incorrect limit in filelayout_decode_layout() The "sizeof(struct nfs_fh)" is two bytes too large and could lead to memory corruption. It should be NFS_MAXFHSIZE because that's the size of the ->data[] buffer. I reversed the size of the arguments to put the variable on the left. Fixes: 16b374ca439f ("NFSv4.1: pnfs: filelayout: add driver's LAYOUTGET and GETDEVICEINFO infrastructure") Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index d158a500c25c..d2103852475f 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -718,7 +718,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, if (unlikely(!p)) goto out_err; fl->fh_array[i]->size = be32_to_cpup(p++); - if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { + if (fl->fh_array[i]->size > NFS_MAXFHSIZE) { printk(KERN_ERR "NFS: Too big fh %d received %d\n", i, fl->fh_array[i]->size); goto out_err; From d1d973950aceecd646ea3bee66764414bfeac072 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 12 May 2021 17:20:04 +0800 Subject: [PATCH 437/730] pNFS/NFSv4: Remove redundant initialization of 'rd_size' Variable 'rd_size' is being initialized however this value is never read as 'rd_size' is assigned a new value in for statement. Remove the redundant assignment. Clean up clang warning: fs/nfs/pnfs.c:2681:6: warning: Value stored to 'rd_size' during its initialization is never read [clang-analyzer-deadcode.DeadStores] Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 03e0b34c4a64..f076a6f3558d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2678,7 +2678,7 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - u64 rd_size = req->wb_bytes; + u64 rd_size; pnfs_generic_pg_check_layout(pgio); pnfs_generic_pg_check_range(pgio, req); From e877a88d1f069edced4160792f42c2a8e2dba942 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 17 May 2021 09:59:10 +1000 Subject: [PATCH 438/730] SUNRPC in case of backlog, hand free slots directly to waiting task If sunrpc.tcp_max_slot_table_entries is small and there are tasks on the backlog queue, then when a request completes it is freed and the first task on the queue is woken. The expectation is that it will wake and claim that request. However if it was a sync task and the waiting process was killed at just that moment, it will wake and NOT claim the request. As long as TASK_CONGESTED remains set, requests can only be claimed by tasks woken from the backlog, and they are woken only as requests are freed, so when a task doesn't claim a request, no other task can ever get that request until TASK_CONGESTED is cleared. Each time this happens the number of available requests is decreased by one. With a sufficiently high workload and sufficiently low setting of max_slot (16 in the case where this was seen), TASK_CONGESTED can remain set for an extended period, and the above scenario (of a process being killed just as its task was woken) can repeat until no requests can be allocated. Then traffic stops. This patch addresses the problem by introducing a positive handover of a request from a completing task to a backlog task - the request is never freed when there is a backlog. When a task is woken it might not already have a request attached in which case it is *not* freed (as with current code) but is initialised (if needed) and used. If it isn't used it will eventually be freed by rpc_exit_task(). xprt_release() is enhanced to be able to correctly release an uninitialised request. Fixes: ba60eb25ff6b ("SUNRPC: Fix a livelock problem in the xprt->backlog queue") Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 7 ----- net/sunrpc/xprt.c | 68 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 47 insertions(+), 28 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f555d335e910..42623d6b8f0e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1677,13 +1677,6 @@ call_reserveresult(struct rpc_task *task) return; } - /* - * Even though there was an error, we may have acquired - * a request slot somehow. Make sure not to leak it. - */ - if (task->tk_rqstp) - xprt_release(task); - switch (status) { case -ENOMEM: rpc_delay(task, HZ >> 2); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e5b5a960a69b..5b3981fd3783 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -70,6 +70,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net); static __be32 xprt_alloc_xid(struct rpc_xprt *xprt); static void xprt_destroy(struct rpc_xprt *xprt); +static void xprt_request_init(struct rpc_task *task); static DEFINE_SPINLOCK(xprt_list_lock); static LIST_HEAD(xprt_list); @@ -1612,10 +1613,26 @@ static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) rpc_sleep_on(&xprt->backlog, task, NULL); } -static void xprt_wake_up_backlog(struct rpc_xprt *xprt) +static bool __xprt_set_rq(struct rpc_task *task, void *data) { - if (rpc_wake_up_next(&xprt->backlog) == NULL) + struct rpc_rqst *req = data; + + if (task->tk_rqstp == NULL) { + memset(req, 0, sizeof(*req)); /* mark unused */ + task->tk_status = -EAGAIN; + task->tk_rqstp = req; + return true; + } + return false; +} + +static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req) +{ + if (rpc_wake_up_first(&xprt->backlog, __xprt_set_rq, req) == NULL) { clear_bit(XPRT_CONGESTED, &xprt->state); + return false; + } + return true; } static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task) @@ -1703,11 +1720,11 @@ EXPORT_SYMBOL_GPL(xprt_alloc_slot); void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) { spin_lock(&xprt->reserve_lock); - if (!xprt_dynamic_free_slot(xprt, req)) { + if (!xprt_wake_up_backlog(xprt, req) && + !xprt_dynamic_free_slot(xprt, req)) { memset(req, 0, sizeof(*req)); /* mark unused */ list_add(&req->rq_list, &xprt->free); } - xprt_wake_up_backlog(xprt); spin_unlock(&xprt->reserve_lock); } EXPORT_SYMBOL_GPL(xprt_free_slot); @@ -1795,6 +1812,10 @@ xprt_request_init(struct rpc_task *task) struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; + if (req->rq_task) + /* Already initialized */ + return; + req->rq_task = task; req->rq_xprt = xprt; req->rq_buffer = NULL; @@ -1855,8 +1876,10 @@ void xprt_retry_reserve(struct rpc_task *task) struct rpc_xprt *xprt = task->tk_xprt; task->tk_status = 0; - if (task->tk_rqstp != NULL) + if (task->tk_rqstp != NULL) { + xprt_request_init(task); return; + } task->tk_status = -EAGAIN; xprt_do_reserve(xprt, task); @@ -1881,23 +1904,26 @@ void xprt_release(struct rpc_task *task) } xprt = req->rq_xprt; - xprt_request_dequeue_xprt(task); - spin_lock(&xprt->transport_lock); - xprt->ops->release_xprt(xprt, task); - if (xprt->ops->release_request) - xprt->ops->release_request(task); - xprt_schedule_autodisconnect(xprt); - spin_unlock(&xprt->transport_lock); - if (req->rq_buffer) - xprt->ops->buf_free(task); - xdr_free_bvec(&req->rq_rcv_buf); - xdr_free_bvec(&req->rq_snd_buf); - if (req->rq_cred != NULL) - put_rpccred(req->rq_cred); - task->tk_rqstp = NULL; - if (req->rq_release_snd_buf) - req->rq_release_snd_buf(req); + if (xprt) { + xprt_request_dequeue_xprt(task); + spin_lock(&xprt->transport_lock); + xprt->ops->release_xprt(xprt, task); + if (xprt->ops->release_request) + xprt->ops->release_request(task); + xprt_schedule_autodisconnect(xprt); + spin_unlock(&xprt->transport_lock); + if (req->rq_buffer) + xprt->ops->buf_free(task); + xdr_free_bvec(&req->rq_rcv_buf); + xdr_free_bvec(&req->rq_snd_buf); + if (req->rq_cred != NULL) + put_rpccred(req->rq_cred); + if (req->rq_release_snd_buf) + req->rq_release_snd_buf(req); + } else + xprt = task->tk_xprt; + task->tk_rqstp = NULL; if (likely(!bc_prealloc(req))) xprt->ops->free_slot(xprt, req); else From a421d218603ffa822a0b8045055c03eae394a7eb Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 19 May 2021 12:54:51 -0400 Subject: [PATCH 439/730] NFSv4: Fix a NULL pointer dereference in pnfs_mark_matching_lsegs_return() Commit de144ff4234f changes _pnfs_return_layout() to call pnfs_mark_matching_lsegs_return() passing NULL as the struct pnfs_layout_range argument. Unfortunately, pnfs_mark_matching_lsegs_return() doesn't check if we have a value here before dereferencing it, causing an oops. I'm able to hit this crash consistently when running connectathon basic tests on NFS v4.1/v4.2 against Ontap. Fixes: de144ff4234f ("NFSv4: Don't discard segments marked for return in _pnfs_return_layout()") Cc: stable@vger.kernel.org Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f076a6f3558d..2c01ee805306 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1317,6 +1317,11 @@ _pnfs_return_layout(struct inode *ino) { struct pnfs_layout_hdr *lo = NULL; struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_range range = { + .iomode = IOMODE_ANY, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; LIST_HEAD(tmp_list); const struct cred *cred; nfs4_stateid stateid; @@ -1344,16 +1349,10 @@ _pnfs_return_layout(struct inode *ino) } valid_layout = pnfs_layout_is_valid(lo); pnfs_clear_layoutcommit(ino, &tmp_list); - pnfs_mark_matching_lsegs_return(lo, &tmp_list, NULL, 0); + pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0); - if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { - struct pnfs_layout_range range = { - .iomode = IOMODE_ANY, - .offset = 0, - .length = NFS4_MAX_UINT64, - }; + if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); - } /* Don't send a LAYOUTRETURN if list was initially empty */ if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) || From d275880abce9ac66cb842af828fbc2b1ba8082a0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 May 2021 08:50:11 -0400 Subject: [PATCH 440/730] SUNRPC: Fix Oops in xs_tcp_send_request() when transport is disconnected If a disconnection occurs while we're trying to reply to a server callback, then we may end up calling xs_tcp_send_request() with a NULL value for transport->inet, which trips up the call to tcp_sock_set_cork(). Fixes: d737e5d41870 ("SUNRPC: Set TCP_CORK until the transmit queue is empty") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 47aa47a2b07c..316d04945587 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1010,6 +1010,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req) kernel_sock_shutdown(transport->sock, SHUT_RDWR); return -ENOTCONN; } + if (!transport->inet) + return -ENOTCONN; xs_pktdump("packet data:", req->rq_svec->iov_base, From 45e1ba40837ac2f6f4d4716bddb8d44bd7e4a251 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 12 May 2021 13:19:46 -0700 Subject: [PATCH 441/730] cgroup: disable controllers at parse time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch effectively reverts the commit a3e72739b7a7 ("cgroup: fix too early usage of static_branch_disable()"). The commit 6041186a3258 ("init: initialize jump labels before command line option parsing") has moved the jump_label_init() before parse_args() which has made the commit a3e72739b7a7 unnecessary. On the other hand there are consequences of disabling the controllers later as there are subsystems doing the controller checks for different decisions. One such incident is reported [1] regarding the memory controller and its impact on memory reclaim code. [1] https://lore.kernel.org/linux-mm/921e53f3-4b13-aab8-4a9e-e83ff15371e4@nec.com Signed-off-by: Shakeel Butt Reported-by: NOMURA JUNICHI(野村 淳一) Signed-off-by: Tejun Heo Tested-by: Jun'ichi Nomura --- kernel/cgroup/cgroup.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e049edd66776..e7a9a2998245 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5634,8 +5634,6 @@ int __init cgroup_init_early(void) return 0; } -static u16 cgroup_disable_mask __initdata; - /** * cgroup_init - cgroup initialization * @@ -5694,12 +5692,8 @@ int __init cgroup_init(void) * disabled flag and cftype registration needs kmalloc, * both of which aren't available during early_init. */ - if (cgroup_disable_mask & (1 << ssid)) { - static_branch_disable(cgroup_subsys_enabled_key[ssid]); - printk(KERN_INFO "Disabling %s control group subsystem\n", - ss->name); + if (!cgroup_ssid_enabled(ssid)) continue; - } if (cgroup1_ssid_disabled(ssid)) printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n", @@ -6214,7 +6208,10 @@ static int __init cgroup_disable(char *str) if (strcmp(token, ss->name) && strcmp(token, ss->legacy_name)) continue; - cgroup_disable_mask |= 1 << i; + + static_branch_disable(cgroup_subsys_enabled_key[i]); + pr_info("Disabling %s control group subsystem\n", + ss->name); } } return 1; From 833bc4cf9754643acc69b3c6b65988ca78df4460 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 20 May 2021 08:08:24 +0300 Subject: [PATCH 442/730] ASoC: cs35l33: fix an error code in probe() This error path returns zero (success) but it should return -EINVAL. Fixes: 3333cb7187b9 ("ASoC: cs35l33: Initial commit of the cs35l33 CODEC driver.") Signed-off-by: Dan Carpenter Reviewed-by: Charles Keepax Link: https://lore.kernel.org/r/YKXuyGEzhPT35R3G@mwanda Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l33.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs35l33.c b/sound/soc/codecs/cs35l33.c index 7ad7b733af9b..e8f3dcfd144d 100644 --- a/sound/soc/codecs/cs35l33.c +++ b/sound/soc/codecs/cs35l33.c @@ -1201,6 +1201,7 @@ static int cs35l33_i2c_probe(struct i2c_client *i2c_client, dev_err(&i2c_client->dev, "CS35L33 Device ID (%X). Expected ID %X\n", devid, CS35L33_CHIP_ID); + ret = -EINVAL; goto err_enable; } From 940d71c6462e8151c78f28e4919aa8882ff2054e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 20 May 2021 19:14:22 +0900 Subject: [PATCH 443/730] wq: handle VM suspension in stall detection If VCPU is suspended (VM suspend) in wq_watchdog_timer_fn() then once this VCPU resumes it will see the new jiffies value, while it may take a while before IRQ detects PVCLOCK_GUEST_STOPPED on this VCPU and updates all the watchdogs via pvclock_touch_watchdogs(). There is a small chance of misreported WQ stalls in the meantime, because new jiffies is time_after() old 'ts + thresh'. wq_watchdog_timer_fn() { for_each_pool(pool, pi) { if (time_after(jiffies, ts + thresh)) { pr_emerg("BUG: workqueue lockup - pool"); } } } Save jiffies at the beginning of this function and use that value for stall detection. If VM gets suspended then we continue using "old" jiffies value and old WQ touch timestamps. If IRQ at some point restarts the stall detection cycle (pvclock_touch_watchdogs()) then old jiffies will always be before new 'ts + thresh'. Signed-off-by: Sergey Senozhatsky Signed-off-by: Tejun Heo --- kernel/workqueue.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b19d759e55a5..50142fc08902 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "workqueue_internal.h" @@ -5772,6 +5773,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) { unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ; bool lockup_detected = false; + unsigned long now = jiffies; struct worker_pool *pool; int pi; @@ -5786,6 +5788,12 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) if (list_empty(&pool->worklist)) continue; + /* + * If a virtual machine is stopped by the host it can look to + * the watchdog like a stall. + */ + kvm_check_and_clear_guest_paused(); + /* get the latest of pool and touched timestamps */ if (pool->cpu >= 0) touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu)); @@ -5799,12 +5807,12 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) ts = touched; /* did we stall? */ - if (time_after(jiffies, ts + thresh)) { + if (time_after(now, ts + thresh)) { lockup_detected = true; pr_emerg("BUG: workqueue lockup - pool"); pr_cont_pool_info(pool); pr_cont(" stuck for %us!\n", - jiffies_to_msecs(jiffies - pool_ts) / 1000); + jiffies_to_msecs(now - pool_ts) / 1000); } } From 9687c85dfbf84a6a37522626b4d5c5191a695e6c Mon Sep 17 00:00:00 2001 From: Rohith Surabattula Date: Thu, 20 May 2021 16:45:01 +0000 Subject: [PATCH 444/730] Fix KASAN identified use-after-free issue. [ 612.157429] ================================================================== [ 612.158275] BUG: KASAN: use-after-free in process_one_work+0x90/0x9b0 [ 612.158801] Read of size 8 at addr ffff88810a31ca60 by task kworker/2:9/2382 [ 612.159611] CPU: 2 PID: 2382 Comm: kworker/2:9 Tainted: G OE 5.13.0-rc2+ #98 [ 612.159623] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 [ 612.159640] Workqueue: 0x0 (deferredclose) [ 612.159669] Call Trace: [ 612.159685] dump_stack+0xbb/0x107 [ 612.159711] print_address_description.constprop.0+0x18/0x140 [ 612.159733] ? process_one_work+0x90/0x9b0 [ 612.159743] ? process_one_work+0x90/0x9b0 [ 612.159754] kasan_report.cold+0x7c/0xd8 [ 612.159778] ? lock_is_held_type+0x80/0x130 [ 612.159789] ? process_one_work+0x90/0x9b0 [ 612.159812] kasan_check_range+0x145/0x1a0 [ 612.159834] process_one_work+0x90/0x9b0 [ 612.159877] ? pwq_dec_nr_in_flight+0x110/0x110 [ 612.159914] ? spin_bug+0x90/0x90 [ 612.159967] worker_thread+0x3b6/0x6c0 [ 612.160023] ? process_one_work+0x9b0/0x9b0 [ 612.160038] kthread+0x1dc/0x200 [ 612.160051] ? kthread_create_worker_on_cpu+0xd0/0xd0 [ 612.160092] ret_from_fork+0x1f/0x30 [ 612.160399] Allocated by task 2358: [ 612.160757] kasan_save_stack+0x1b/0x40 [ 612.160768] __kasan_kmalloc+0x9b/0xd0 [ 612.160778] cifs_new_fileinfo+0xb0/0x960 [cifs] [ 612.161170] cifs_open+0xadf/0xf20 [cifs] [ 612.161421] do_dentry_open+0x2aa/0x6b0 [ 612.161432] path_openat+0xbd9/0xfa0 [ 612.161441] do_filp_open+0x11d/0x230 [ 612.161450] do_sys_openat2+0x115/0x240 [ 612.161460] __x64_sys_openat+0xce/0x140 When mod_delayed_work is called to modify the delay of pending work, it might return false and queue a new work when pending work is already scheduled or when try to grab pending work failed. So, Increase the reference count when new work is scheduled to avoid use-after-free. Signed-off-by: Rohith Surabattula Signed-off-by: Steve French --- fs/cifs/file.c | 20 +++++++++++++------- fs/cifs/misc.c | 12 ++++++++++-- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a1abd3da1d44..379a427f3c2f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -874,10 +874,6 @@ void smb2_deferred_work_close(struct work_struct *work) struct cifsFileInfo, deferred.work); spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); - if (!cfile->deferred_close_scheduled) { - spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); - return; - } cifs_del_deferred_close(cfile); cfile->deferred_close_scheduled = false; spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); @@ -904,8 +900,13 @@ int cifs_close(struct inode *inode, struct file *file) cifs_add_deferred_close(cfile, dclose); if (cfile->deferred_close_scheduled && delayed_work_pending(&cfile->deferred)) { - mod_delayed_work(deferredclose_wq, - &cfile->deferred, cifs_sb->ctx->acregmax); + /* + * If there is no pending work, mod_delayed_work queues new work. + * So, Increase the ref count to avoid use-after-free. + */ + if (!mod_delayed_work(deferredclose_wq, + &cfile->deferred, cifs_sb->ctx->acregmax)) + cifsFileInfo_get(cfile); } else { /* Deferred close for files */ queue_delayed_work(deferredclose_wq, @@ -4879,7 +4880,12 @@ oplock_break_ack: if (is_deferred && cfile->deferred_close_scheduled && delayed_work_pending(&cfile->deferred)) { - mod_delayed_work(deferredclose_wq, &cfile->deferred, 0); + /* + * If there is no pending work, mod_delayed_work queues new work. + * So, Increase the ref count to avoid use-after-free. + */ + if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0)) + cifsFileInfo_get(cfile); } spin_unlock(&CIFS_I(inode)->deferred_lock); _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 34f2a7e80c58..7207a63819cb 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -674,6 +674,8 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink, /* * Critical section which runs after acquiring deferred_lock. + * As there is no reference count on cifs_deferred_close, pdclose + * should not be used outside deferred_lock. */ bool cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **pdclose) @@ -752,8 +754,14 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon) spin_lock(&tcon->open_file_lock); list_for_each(tmp, &tcon->openFileList) { cfile = list_entry(tmp, struct cifsFileInfo, tlist); - if (delayed_work_pending(&cfile->deferred)) - mod_delayed_work(deferredclose_wq, &cfile->deferred, 0); + if (delayed_work_pending(&cfile->deferred)) { + /* + * If there is no pending work, mod_delayed_work queues new work. + * So, Increase the ref count to avoid use-after-free. + */ + if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0)) + cifsFileInfo_get(cfile); + } } spin_unlock(&tcon->open_file_lock); } From 8570e75a55430844a8e85e3458e5701556334ffd Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 19 May 2021 21:33:33 +0000 Subject: [PATCH 445/730] selftests: Add .gitignore for nci test suite Building the nci test suite produces a binary, nci_dev, that git then tries to track. Add a .gitignore file to tell git to ignore this binary. Signed-off-by: David Matlack Signed-off-by: David S. Miller --- tools/testing/selftests/nci/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/nci/.gitignore diff --git a/tools/testing/selftests/nci/.gitignore b/tools/testing/selftests/nci/.gitignore new file mode 100644 index 000000000000..448eeb4590fc --- /dev/null +++ b/tools/testing/selftests/nci/.gitignore @@ -0,0 +1 @@ +/nci_dev From 1a0b713c73688c6bafbe6faf8c90390b11b26fc6 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 18 May 2021 09:01:47 +0800 Subject: [PATCH 446/730] drm/amd/pm: correct MGpuFanBoost setting No MGpuFanBoost setting for those ASICs which do not support it. Otherwise, it may breaks their fan control feature. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1580 Signed-off-by: Evan Quan Reviewed-by: Kenneth Feng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 9 +++++++++ .../gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index ac13042672ea..0eaf86b5e698 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2925,6 +2925,8 @@ static ssize_t navi1x_get_gpu_metrics(struct smu_context *smu, static int navi10_enable_mgpu_fan_boost(struct smu_context *smu) { + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *smc_pptable = table_context->driver_pptable; struct amdgpu_device *adev = smu->adev; uint32_t param = 0; @@ -2932,6 +2934,13 @@ static int navi10_enable_mgpu_fan_boost(struct smu_context *smu) if (adev->asic_type == CHIP_NAVI12) return 0; + /* + * Skip the MGpuFanBoost setting for those ASICs + * which do not support it + */ + if (!smc_pptable->MGpuFanBoostLimitRpm) + return 0; + /* Workaround for WS SKU */ if (adev->pdev->device == 0x7312 && adev->pdev->revision == 0) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index d2fd44b903ca..b124a5e40dd6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -3027,6 +3027,16 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, static int sienna_cichlid_enable_mgpu_fan_boost(struct smu_context *smu) { + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *smc_pptable = table_context->driver_pptable; + + /* + * Skip the MGpuFanBoost setting for those ASICs + * which do not support it + */ + if (!smc_pptable->MGpuFanBoostLimitRpm) + return 0; + return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetMGpuFanBoostLimitRpm, 0, From ba515a5821dc0d101ded0379b14b1d1471ebfaba Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Wed, 19 May 2021 11:03:11 +0800 Subject: [PATCH 447/730] drm/amdkfd: correct sienna_cichlid SDMA RLC register offset error 1.correct KFD SDMA RLC queue register offset error. (all sdma rlc register offset is base on SDMA0.RLC0_RLC0_RB_CNTL) 2.HQD_N_REGS (19+6+7+12) 12: the 2 more resgisters than navi1x (SDMAx_RLCy_MIDCMD_DATA{9,10}) the patch also can be fixed NULL pointer issue when read /sys/kernel/debug/kfd/hqds on sienna_cichlid chip. Signed-off-by: Kevin Wang Reviewed-by: Likun Gao Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index fad3b91f74f5..d39cff4a1fe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -156,16 +156,16 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; break; case 1: - sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; break; case 2: - sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0, - mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL; + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA2_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; break; case 3: - sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0, - mmSDMA3_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL; + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA3_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; break; } @@ -450,7 +450,7 @@ static int hqd_sdma_dump_v10_3(struct kgd_dev *kgd, engine_id, queue_id); uint32_t i = 0, reg; #undef HQD_N_REGS -#define HQD_N_REGS (19+6+7+10) +#define HQD_N_REGS (19+6+7+12) *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); if (*dump == NULL) From b95f045ea35673572ef46d6483ad8bd6d353d63c Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 18 May 2021 10:58:22 -0400 Subject: [PATCH 448/730] drm/amdgpu/vcn1: add cancel_delayed_work_sync before power gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add cancel_delayed_work_sync before set power gating state to avoid race condition issue when power gating. Signed-off-by: James Zhu Reviewed-by: Leo Liu Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 0c1beefa3e49..27b1ced145d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -231,9 +231,13 @@ static int vcn_v1_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + cancel_delayed_work_sync(&adev->vcn.idle_work); + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - RREG32_SOC15(VCN, 0, mmUVD_STATUS)) + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, 0, mmUVD_STATUS))) { vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + } return 0; } From 0c6013377b4027e69d8f3e63b6bf556b6cb87802 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 19 May 2021 11:26:32 -0400 Subject: [PATCH 449/730] drm/amdgpu/vcn2.0: add cancel_delayed_work_sync before power gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add cancel_delayed_work_sync before set power gating state to avoid race condition issue when power gating. Signed-off-by: James Zhu Reviewed-by: Leo Liu Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 116b9643d5ba..8af567c546db 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -262,6 +262,8 @@ static int vcn_v2_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + cancel_delayed_work_sync(&adev->vcn.idle_work); + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, 0, mmUVD_STATUS))) From 2fb536ea42d557f39f70c755f68e1aa1ad466c55 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 19 May 2021 11:40:39 -0400 Subject: [PATCH 450/730] drm/amdgpu/vcn2.5: add cancel_delayed_work_sync before power gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add cancel_delayed_work_sync before set power gating state to avoid race condition issue when power gating. Signed-off-by: James Zhu Reviewed-by: Leo Liu Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 948813d7caa0..888b17d84691 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -321,6 +321,8 @@ static int vcn_v2_5_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; + cancel_delayed_work_sync(&adev->vcn.idle_work); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; From 4a62542ae064e3b645d6bbf2295a6c05136956c6 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Mon, 17 May 2021 16:39:17 -0400 Subject: [PATCH 451/730] drm/amdgpu/vcn3: add cancel_delayed_work_sync before power gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add cancel_delayed_work_sync before set power gating state to avoid race condition issue when power gating. Signed-off-by: James Zhu Reviewed-by: Leo Liu Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 14470da52113..3b23de996db2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -372,15 +372,14 @@ done: static int vcn_v3_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring; int i; + cancel_delayed_work_sync(&adev->vcn.idle_work); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - ring = &adev->vcn.inst[i].ring_dec; - if (!amdgpu_sriov_vf(adev)) { if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && From ff48f6dbf0ff896c98d167a67a5b975fb034356b Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 19 May 2021 11:42:48 -0400 Subject: [PATCH 452/730] drm/amdgpu/jpeg2.0: add cancel_delayed_work_sync before power gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add cancel_delayed_work_sync before set power gating state to avoid race condition issue when power gating. Signed-off-by: James Zhu Reviewed-by: Leo Liu Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index de5abceced0d..85967a5570cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -172,6 +172,8 @@ static int jpeg_v2_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + cancel_delayed_work_sync(&adev->vcn.idle_work); + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE); From 23f10a571da5eaa63b7845d16e2f49837e841ab9 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 19 May 2021 12:04:38 -0400 Subject: [PATCH 453/730] drm/amdgpu/jpeg2.5: add cancel_delayed_work_sync before power gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add cancel_delayed_work_sync before set power gating state to avoid race condition issue when power gating. Signed-off-by: James Zhu Reviewed-by: Leo Liu Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 938ef4ce5b76..46096ad7f0d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -187,14 +187,14 @@ static int jpeg_v2_5_hw_init(void *handle) static int jpeg_v2_5_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring; int i; + cancel_delayed_work_sync(&adev->vcn.idle_work); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) continue; - ring = &adev->jpeg.inst[i].ring_dec; if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS)) jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); From 20ebbfd22f8115a1e4f60d3d289f66be4d47f1ec Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 19 May 2021 12:08:20 -0400 Subject: [PATCH 454/730] drm/amdgpu/jpeg3: add cancel_delayed_work_sync before power gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add cancel_delayed_work_sync before set power gating state to avoid race condition issue when power gating. Signed-off-by: James Zhu Reviewed-by: Leo Liu Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 94be35357f7d..bd77794315bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -159,9 +159,9 @@ static int jpeg_v3_0_hw_init(void *handle) static int jpeg_v3_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring; - ring = &adev->jpeg.inst->ring_dec; + cancel_delayed_work_sync(&adev->vcn.idle_work); + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); From 6bdacdb48e94ff26c03c6eeeef48c03c5e2f7dd4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 12 May 2021 20:57:14 +0200 Subject: [PATCH 455/730] bpf: Fix BPF_JIT kconfig symbol dependency Randy reported a randconfig build error recently on i386: ld: arch/x86/net/bpf_jit_comp32.o: in function `do_jit': bpf_jit_comp32.c:(.text+0x28c9): undefined reference to `__bpf_call_base' ld: arch/x86/net/bpf_jit_comp32.o: in function `bpf_int_jit_compile': bpf_jit_comp32.c:(.text+0x3694): undefined reference to `bpf_jit_blind_constants' ld: bpf_jit_comp32.c:(.text+0x3719): undefined reference to `bpf_jit_binary_free' ld: bpf_jit_comp32.c:(.text+0x3745): undefined reference to `bpf_jit_binary_alloc' ld: bpf_jit_comp32.c:(.text+0x37d3): undefined reference to `bpf_jit_prog_release_other' [...] The cause was that b24abcff918a ("bpf, kconfig: Add consolidated menu entry for bpf with core options") moved BPF_JIT from net/Kconfig into kernel/bpf/Kconfig and previously BPF_JIT was guarded by a 'if NET'. However, there is no actual dependency on NET, it's just that menuconfig NET selects BPF. And the latter in turn causes kernel/bpf/core.o to be built which contains above symbols. Randy's randconfig didn't have NET set, and BPF wasn't either, but BPF_JIT otoh was. Detangle this by making BPF_JIT depend on BPF instead. arm64 was the only arch that pulled in its JIT in net/ via obj-$(CONFIG_NET), all others unconditionally pull this dir in via obj-y. Do the same since CONFIG_NET guard there is really useless as we compiled the JIT via obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o anyway. Fixes: b24abcff918a ("bpf, kconfig: Add consolidated menu entry for bpf with core options") Reported-by: Randy Dunlap Signed-off-by: Daniel Borkmann Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- arch/arm64/Kbuild | 3 +-- kernel/bpf/Kconfig | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild index d6465823b281..7b393cfec071 100644 --- a/arch/arm64/Kbuild +++ b/arch/arm64/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += kernel/ mm/ -obj-$(CONFIG_NET) += net/ +obj-y += kernel/ mm/ net/ obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_XEN) += xen/ obj-$(CONFIG_CRYPTO) += crypto/ diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig index 26b591e23f16..bd04f4a44c01 100644 --- a/kernel/bpf/Kconfig +++ b/kernel/bpf/Kconfig @@ -37,6 +37,7 @@ config BPF_SYSCALL config BPF_JIT bool "Enable BPF Just In Time compiler" + depends on BPF depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT depends on MODULES help From 8f1634b82189e715b0f82f16ce54fab43cfedd8a Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 14 May 2021 10:05:28 -0700 Subject: [PATCH 456/730] selftests/bpf: Convert static to global in tc_redirect progs Both IFINDEX_SRC and IFINDEX_DST are set from the userspace and it won't work once bpf merges with bpf-next. Fixes: 096eccdef0b3 ("selftests/bpf: Rewrite test_tc_redirect.sh as prog_tests/tc_redirect.c") Signed-off-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210514170528.3750250-1-sdf@google.com --- tools/testing/selftests/bpf/progs/test_tc_neigh.c | 4 ++-- tools/testing/selftests/bpf/progs/test_tc_peer.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index 90f64a85998f..0c93d326a663 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -33,8 +33,8 @@ a.s6_addr32[3] == b.s6_addr32[3]) #endif -static volatile const __u32 IFINDEX_SRC; -static volatile const __u32 IFINDEX_DST; +volatile const __u32 IFINDEX_SRC; +volatile const __u32 IFINDEX_DST; static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb, __be32 addr) diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c index 72c72950c3bb..ef264bced0e6 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_peer.c +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -8,8 +8,8 @@ #include -static volatile const __u32 IFINDEX_SRC; -static volatile const __u32 IFINDEX_DST; +volatile const __u32 IFINDEX_SRC; +volatile const __u32 IFINDEX_DST; SEC("classifier/chk_egress") int tc_chk(struct __sk_buff *skb) From 704e2beba23c45eaa056b1c03b5e1fb221e03f80 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 14 May 2021 11:07:26 -0700 Subject: [PATCH 457/730] selftests/bpf: Test ringbuf mmap read-only and read-write restrictions Extend ringbuf selftest to validate read/write and read-only restrictions on memory mapping consumer/producer/data pages. Ensure no "escalations" from PROT_READ to PROT_WRITE/PROT_EXEC is allowed. And test that mremap() fails to expand mmap()'ed area. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210514180726.843157-1-andrii@kernel.org --- .../selftests/bpf/prog_tests/ringbuf.c | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index de78617f6550..f9a8ae331963 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -86,8 +86,9 @@ void test_ringbuf(void) const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); pthread_t thread; long bg_ret = -1; - int err, cnt; + int err, cnt, rb_fd; int page_size = getpagesize(); + void *mmap_ptr, *tmp_ptr; skel = test_ringbuf__open(); if (CHECK(!skel, "skel_open", "skeleton open failed\n")) @@ -101,6 +102,52 @@ void test_ringbuf(void) if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) goto cleanup; + rb_fd = bpf_map__fd(skel->maps.ringbuf); + /* good read/write cons_pos */ + mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); + ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"); + tmp_ptr = mremap(mmap_ptr, page_size, 2 * page_size, MREMAP_MAYMOVE); + if (!ASSERT_ERR_PTR(tmp_ptr, "rw_extend")) + goto cleanup; + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect"); + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); + + /* bad writeable prod_pos */ + mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, page_size); + err = -errno; + ASSERT_ERR_PTR(mmap_ptr, "wr_prod_pos"); + ASSERT_EQ(err, -EPERM, "wr_prod_pos_err"); + + /* bad writeable data pages */ + mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size); + err = -errno; + ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_one"); + ASSERT_EQ(err, -EPERM, "wr_data_page_one_err"); + mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 3 * page_size); + ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_two"); + mmap_ptr = mmap(NULL, 2 * page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size); + ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_all"); + + /* good read-only pages */ + mmap_ptr = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED, rb_fd, 0); + if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos")) + goto cleanup; + + ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_WRITE), "write_protect"); + ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_EXEC), "exec_protect"); + ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "ro_remap"); + ASSERT_OK(munmap(mmap_ptr, 4 * page_size), "unmap_ro"); + + /* good read-only pages with initial offset */ + mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, page_size); + if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos")) + goto cleanup; + + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_protect"); + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_protect"); + ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 3 * page_size, MREMAP_MAYMOVE), "ro_remap"); + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro"); + /* only trigger BPF program for current process */ skel->bss->pid = getpid(); From 8afcc19fbf083a8459284d9a29b4b5ac1cb2396c Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Mon, 17 May 2021 11:28:29 +0200 Subject: [PATCH 458/730] bpf: Clarify a bpf_bprintf_prepare macro The per-cpu buffers contain bprintf data rather than printf arguments. The macro name and comment were a bit confusing, this rewords them in a clearer way. Signed-off-by: Florent Revest Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210517092830.1026418-1-revest@chromium.org --- kernel/bpf/helpers.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index ef658a9ea5c9..3a5ab614cbb0 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -692,13 +692,14 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, return -EINVAL; } -/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p +/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary + * arguments representation. */ -#define MAX_PRINTF_BUF_LEN 512 +#define MAX_BPRINTF_BUF_LEN 512 /* Support executing three nested bprintf helper calls on a given CPU */ struct bpf_bprintf_buffers { - char tmp_bufs[3][MAX_PRINTF_BUF_LEN]; + char tmp_bufs[3][MAX_BPRINTF_BUF_LEN]; }; static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); @@ -761,7 +762,7 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) return -EBUSY; - tmp_buf_end = tmp_buf + MAX_PRINTF_BUF_LEN; + tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN; *bin_args = (u32 *)tmp_buf; } From 0af02eb2a7d76ca85a1ecaf4b3775e2c86408fab Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Mon, 17 May 2021 11:28:30 +0200 Subject: [PATCH 459/730] bpf: Avoid using ARRAY_SIZE on an uninitialized pointer The cppcheck static code analysis reported the following error: if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) { ^ ARRAY_SIZE is a macro that expands to sizeofs, so bufs is not actually dereferenced at runtime, and the code is actually safe. But to keep things tidy, this patch removes the need for a call to ARRAY_SIZE by extracting the size of the array into a macro. Cppcheck should no longer be confused and the code ends up being a bit cleaner. Fixes: e2d5b2bb769f ("bpf: Fix nested bpf_bprintf_prepare with more per-cpu buffers") Reported-by: kernel test robot Signed-off-by: Florent Revest Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210517092830.1026418-2-revest@chromium.org --- kernel/bpf/helpers.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3a5ab614cbb0..73443498d88f 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -698,8 +698,9 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, #define MAX_BPRINTF_BUF_LEN 512 /* Support executing three nested bprintf helper calls on a given CPU */ +#define MAX_BPRINTF_NEST_LEVEL 3 struct bpf_bprintf_buffers { - char tmp_bufs[3][MAX_BPRINTF_BUF_LEN]; + char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN]; }; static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); @@ -711,7 +712,7 @@ static int try_get_fmt_tmp_buf(char **tmp_buf) preempt_disable(); nest_level = this_cpu_inc_return(bpf_bprintf_nest_level); - if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) { + if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) { this_cpu_dec(bpf_bprintf_nest_level); preempt_enable(); return -EBUSY; From ceb11679d9fcf3fdb358a310a38760fcbe9b63ed Mon Sep 17 00:00:00 2001 From: Yinjun Zhang Date: Thu, 20 May 2021 10:58:34 +0200 Subject: [PATCH 460/730] bpf, offload: Reorder offload callback 'prepare' in verifier Commit 4976b718c355 ("bpf: Introduce pseudo_btf_id") switched the order of resolve_pseudo_ldimm(), in which some pseudo instructions are rewritten. Thus those rewritten instructions cannot be passed to driver via 'prepare' offload callback. Reorder the 'prepare' offload callback to fix it. Fixes: 4976b718c355 ("bpf: Introduce pseudo_btf_id") Signed-off-by: Yinjun Zhang Signed-off-by: Simon Horman Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210520085834.15023-1-simon.horman@netronome.com --- kernel/bpf/verifier.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c58598ef4b5b..09849e43f035 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13368,12 +13368,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; - if (bpf_prog_is_dev_bound(env->prog->aux)) { - ret = bpf_prog_offload_verifier_prep(env->prog); - if (ret) - goto skip_full_check; - } - env->explored_states = kvcalloc(state_htab_size(env), sizeof(struct bpf_verifier_state_list *), GFP_USER); @@ -13401,6 +13395,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (ret < 0) goto skip_full_check; + if (bpf_prog_is_dev_bound(env->prog->aux)) { + ret = bpf_prog_offload_verifier_prep(env->prog); + if (ret) + goto skip_full_check; + } + ret = check_cfg(env); if (ret < 0) goto skip_full_check; From 84316ca4e100d8cbfccd9f774e23817cb2059868 Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Wed, 19 May 2021 15:47:42 +0000 Subject: [PATCH 461/730] bpf: Set mac_len in bpf_skb_change_head The skb_change_head() helper did not set "skb->mac_len", which is problematic when it's used in combination with skb_redirect_peer(). Without it, redirecting a packet from a L3 device such as wireguard to the veth peer device will cause skb->data to point to the middle of the IP header on entry to tcp_v4_rcv() since the L2 header is not pulled correctly due to mac_len=0. Fixes: 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure") Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210519154743.2554771-2-joamaki@gmail.com --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/filter.c b/net/core/filter.c index cae56d08a670..65ab4e21c087 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3784,6 +3784,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, __skb_push(skb, head_room); memset(skb->data, 0, head_room); skb_reset_mac_header(skb); + skb_reset_mac_len(skb); } return ret; From 63e39d29b3da02e901349f6cd71159818a4737a6 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 20 May 2021 11:18:35 -0700 Subject: [PATCH 462/730] ixgbe: fix large MTU request from VF Check that the MTU value requested by the VF is in the supported range of MTUs before attempting to set the VF large packet enable, otherwise reject the request. This also avoids unnecessary register updates in the case of the 82599 controller. Fixes: 872844ddb9e4 ("ixgbe: Enable jumbo frames support w/ SR-IOV") Co-developed-by: Piotr Skajewski Signed-off-by: Piotr Skajewski Signed-off-by: Jesse Brandeburg Co-developed-by: Mateusz Palczewski Signed-off-by: Mateusz Palczewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 988db46bff0e..214a38de3f41 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -467,12 +467,16 @@ static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, return err; } -static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) +static int ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 max_frame, u32 vf) { struct ixgbe_hw *hw = &adapter->hw; - int max_frame = msgbuf[1]; u32 max_frs; + if (max_frame < ETH_MIN_MTU || max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE) { + e_err(drv, "VF max_frame %d out of range\n", max_frame); + return -EINVAL; + } + /* * For 82599EB we have to keep all PFs and VFs operating with * the same max_frame value in order to avoid sending an oversize @@ -533,12 +537,6 @@ static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) } } - /* MTU < 68 is an error and causes problems on some kernels */ - if (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE) { - e_err(drv, "VF max_frame %d out of range\n", max_frame); - return -EINVAL; - } - /* pull current max frame size from hardware */ max_frs = IXGBE_READ_REG(hw, IXGBE_MAXFRS); max_frs &= IXGBE_MHADD_MFS_MASK; @@ -1249,7 +1247,7 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) retval = ixgbe_set_vf_vlan_msg(adapter, msgbuf, vf); break; case IXGBE_VF_SET_LPE: - retval = ixgbe_set_vf_lpe(adapter, msgbuf, vf); + retval = ixgbe_set_vf_lpe(adapter, msgbuf[1], vf); break; case IXGBE_VF_SET_MACVLAN: retval = ixgbe_set_vf_macvlan_msg(adapter, msgbuf, vf); From 503c599a4f53fe3d959aebfd22c34da27da49777 Mon Sep 17 00:00:00 2001 From: Aditya Srivastava Date: Fri, 21 May 2021 00:19:15 +0530 Subject: [PATCH 463/730] net: encx24j600: fix kernel-doc syntax in file headers The opening comment mark '/**' is used for highlighting the beginning of kernel-doc comments. The header for drivers/net/ethernet/microchip/encx24j600 files follows this syntax, but the content inside does not comply with kernel-doc. This line was probably not meant for kernel-doc parsing, but is parsed due to the presence of kernel-doc like comment syntax(i.e, '/**'), which causes unexpected warning from kernel-doc. For e.g., running scripts/kernel-doc -none drivers/net/ethernet/microchip/encx24j600_hw.h emits: warning: expecting prototype for h(). Prototype was for _ENCX24J600_HW_H() instead Provide a simple fix by replacing such occurrences with general comment format, i.e. '/*', to prevent kernel-doc from parsing it. Signed-off-by: Aditya Srivastava Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/encx24j600.c | 2 +- drivers/net/ethernet/microchip/encx24j600_hw.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index 3658c4ae3c37..ee921a99e439 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/** +/* * Microchip ENCX24J600 ethernet driver * * Copyright (C) 2015 Gridpoint diff --git a/drivers/net/ethernet/microchip/encx24j600_hw.h b/drivers/net/ethernet/microchip/encx24j600_hw.h index f604a260ede7..fac61a8fbd02 100644 --- a/drivers/net/ethernet/microchip/encx24j600_hw.h +++ b/drivers/net/ethernet/microchip/encx24j600_hw.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/** +/* * encx24j600_hw.h: Register definitions * */ From 13a6f3153922391e90036ba2267d34eed63196fc Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Thu, 20 May 2021 12:32:36 +0000 Subject: [PATCH 464/730] net/qla3xxx: fix schedule while atomic in ql_sem_spinlock When calling the 'ql_sem_spinlock', the driver has already acquired the spin lock, so the driver should not call 'ssleep' in atomic context. This bug can be fixed by using 'mdelay' instead of 'ssleep'. The KASAN's log reveals it: [ 3.238124 ] BUG: scheduling while atomic: swapper/0/1/0x00000002 [ 3.238748 ] 2 locks held by swapper/0/1: [ 3.239151 ] #0: ffff88810177b240 (&dev->mutex){....}-{3:3}, at: __device_driver_lock+0x41/0x60 [ 3.240026 ] #1: ffff888107c60e28 (&qdev->hw_lock){....}-{2:2}, at: ql3xxx_probe+0x2aa/0xea0 [ 3.240873 ] Modules linked in: [ 3.241187 ] irq event stamp: 460854 [ 3.241541 ] hardirqs last enabled at (460853): [] _raw_spin_unlock_irqrestore+0x4f/0x70 [ 3.242245 ] hardirqs last disabled at (460854): [] _raw_spin_lock_irqsave+0x2a/0x70 [ 3.242245 ] softirqs last enabled at (446076): [] __do_softirq+0x2e4/0x4b1 [ 3.242245 ] softirqs last disabled at (446069): [] irq_exit_rcu+0x100/0x110 [ 3.242245 ] Preemption disabled at: [ 3.242245 ] [] ql3xxx_probe+0x2aa/0xea0 [ 3.242245 ] Kernel panic - not syncing: scheduling while atomic [ 3.242245 ] CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc1-00145 -gee7dc339169-dirty #16 [ 3.242245 ] Call Trace: [ 3.242245 ] dump_stack+0xba/0xf5 [ 3.242245 ] ? ql3xxx_probe+0x1f0/0xea0 [ 3.242245 ] panic+0x15a/0x3f2 [ 3.242245 ] ? vprintk+0x76/0x150 [ 3.242245 ] ? ql3xxx_probe+0x2aa/0xea0 [ 3.242245 ] __schedule_bug+0xae/0xe0 [ 3.242245 ] __schedule+0x72e/0xa00 [ 3.242245 ] schedule+0x43/0xf0 [ 3.242245 ] schedule_timeout+0x28b/0x500 [ 3.242245 ] ? del_timer_sync+0xf0/0xf0 [ 3.242245 ] ? msleep+0x2f/0x70 [ 3.242245 ] msleep+0x59/0x70 [ 3.242245 ] ql3xxx_probe+0x307/0xea0 [ 3.242245 ] ? _raw_spin_unlock_irqrestore+0x3a/0x70 [ 3.242245 ] ? pci_device_remove+0x110/0x110 [ 3.242245 ] local_pci_probe+0x45/0xa0 [ 3.242245 ] pci_device_probe+0x12b/0x1d0 [ 3.242245 ] really_probe+0x2a9/0x610 [ 3.242245 ] driver_probe_device+0x90/0x1d0 [ 3.242245 ] ? mutex_lock_nested+0x1b/0x20 [ 3.242245 ] device_driver_attach+0x68/0x70 [ 3.242245 ] __driver_attach+0x124/0x1b0 [ 3.242245 ] ? device_driver_attach+0x70/0x70 [ 3.242245 ] bus_for_each_dev+0xbb/0x110 [ 3.242245 ] ? rdinit_setup+0x45/0x45 [ 3.242245 ] driver_attach+0x27/0x30 [ 3.242245 ] bus_add_driver+0x1eb/0x2a0 [ 3.242245 ] driver_register+0xa9/0x180 [ 3.242245 ] __pci_register_driver+0x82/0x90 [ 3.242245 ] ? yellowfin_init+0x25/0x25 [ 3.242245 ] ql3xxx_driver_init+0x23/0x25 [ 3.242245 ] do_one_initcall+0x7f/0x3d0 [ 3.242245 ] ? rdinit_setup+0x45/0x45 [ 3.242245 ] ? rcu_read_lock_sched_held+0x4f/0x80 [ 3.242245 ] kernel_init_freeable+0x2aa/0x301 [ 3.242245 ] ? rest_init+0x2c0/0x2c0 [ 3.242245 ] kernel_init+0x18/0x190 [ 3.242245 ] ? rest_init+0x2c0/0x2c0 [ 3.242245 ] ? rest_init+0x2c0/0x2c0 [ 3.242245 ] ret_from_fork+0x1f/0x30 [ 3.242245 ] Dumping ftrace buffer: [ 3.242245 ] (ftrace buffer empty) [ 3.242245 ] Kernel Offset: disabled [ 3.242245 ] Rebooting in 1 seconds. Reported-by: Zheyu Ma Signed-off-by: Zheyu Ma Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qla3xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 214e347097a7..2376b2729633 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -114,7 +114,7 @@ static int ql_sem_spinlock(struct ql3_adapter *qdev, value = readl(&port_regs->CommonRegs.semaphoreReg); if ((value & (sem_mask >> 16)) == sem_bits) return 0; - ssleep(1); + mdelay(1000); } while (--seconds); return -1; } From b3dcb312778664bfbe0a73242fa04a628719b066 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 20 May 2021 20:51:16 +0800 Subject: [PATCH 465/730] net: stmmac: correct clocks enabled in stmmac_vlan_rx_kill_vid() This should be a mistake to fix conflicts when removing RFC tag to repost the patch. Fixes: 5ec55823438e ("net: stmmac: add clocks management for gmac driver") Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index fea3bf07ae89..df4ce5977fad 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6191,12 +6191,6 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid bool is_double = false; int ret; - ret = pm_runtime_get_sync(priv->device); - if (ret < 0) { - pm_runtime_put_noidle(priv->device); - return ret; - } - if (be16_to_cpu(proto) == ETH_P_8021AD) is_double = true; @@ -6222,6 +6216,12 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi bool is_double = false; int ret; + ret = pm_runtime_get_sync(priv->device); + if (ret < 0) { + pm_runtime_put_noidle(priv->device); + return ret; + } + if (be16_to_cpu(proto) == ETH_P_8021AD) is_double = true; From 4691ffb18ac908609aab07d13af7995b6b89d33c Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 20 May 2021 20:51:17 +0800 Subject: [PATCH 466/730] net: stmmac: fix system hang if change mac address after interface ifdown Fix system hang with below sequences: ~# ifconfig ethx down ~# ifconfig ethx hw ether xx:xx:xx:xx:xx:xx After ethx down, stmmac all clocks gated off and then register access causes system hang. Fixes: 5ec55823438e ("net: stmmac: add clocks management for gmac driver") Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index df4ce5977fad..5d956a553434 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5891,12 +5891,21 @@ static int stmmac_set_mac_address(struct net_device *ndev, void *addr) struct stmmac_priv *priv = netdev_priv(ndev); int ret = 0; + ret = pm_runtime_get_sync(priv->device); + if (ret < 0) { + pm_runtime_put_noidle(priv->device); + return ret; + } + ret = eth_mac_addr(ndev, addr); if (ret) - return ret; + goto set_mac_error; stmmac_set_umac_addr(priv, priv->hw, ndev->dev_addr, 0); +set_mac_error: + pm_runtime_put(priv->device); + return ret; } From ae897fda4f507e4b239f0bdfd578b3688ca96fb4 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 20 May 2021 13:42:42 +0200 Subject: [PATCH 467/730] x86/Xen: swap NX determination and GDT setup on BSP xen_setup_gdt(), via xen_load_gdt_boot(), wants to adjust page tables. For this to work when NX is not available, x86_configure_nx() needs to be called first. [jgross] Note that this is a revert of 36104cb9012a82e73 ("x86/xen: Delay get_cpu_cap until stack canary is established"), which is possible now that we no longer support running as PV guest in 32-bit mode. Cc: # 5.9 Fixes: 36104cb9012a82e73 ("x86/xen: Delay get_cpu_cap until stack canary is established") Reported-by: Olaf Hering Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/12a866b0-9e89-59f7-ebeb-a2a6cec0987a@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 17503fed2017..e87699aa2dc8 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1273,16 +1273,16 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Get mfn list */ xen_build_dynamic_phys_to_machine(); + /* Work out if we support NX */ + get_cpu_cap(&boot_cpu_data); + x86_configure_nx(); + /* * Set up kernel GDT and segment registers, mainly so that * -fstack-protector code can be executed. */ xen_setup_gdt(0); - /* Work out if we support NX */ - get_cpu_cap(&boot_cpu_data); - x86_configure_nx(); - /* Determine virtual and physical address sizes */ get_cpu_address_sizes(&boot_cpu_data); From 4ba50e7c423c29639878c00573288869aa627068 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 18 May 2021 18:13:42 +0200 Subject: [PATCH 468/730] xen-pciback: redo VF placement in the virtual topology The commit referenced below was incomplete: It merely affected what would get written to the vdev- xenstore node. The guest would still find the function at the original function number as long as __xen_pcibk_get_pci_dev() wouldn't be in sync. The same goes for AER wrt __xen_pcibk_get_pcifront_dev(). Undo overriding the function to zero and instead make sure that VFs at function zero remain alone in their slot. This has the added benefit of improving overall capacity, considering that there's only a total of 32 slots available right now (PCI segment and bus can both only ever be zero at present). Fixes: 8a5248fe10b1 ("xen PV passthru: assign SR-IOV virtual functions to separate virtual slots") Signed-off-by: Jan Beulich Cc: stable@vger.kernel.org Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/8def783b-404c-3452-196d-3f3fd4d72c9e@suse.com Signed-off-by: Juergen Gross --- drivers/xen/xen-pciback/vpci.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c index 4162d0e7e00d..cc7450f2b2a9 100644 --- a/drivers/xen/xen-pciback/vpci.c +++ b/drivers/xen/xen-pciback/vpci.c @@ -70,7 +70,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev, int devid, publish_pci_dev_cb publish_cb) { - int err = 0, slot, func = -1; + int err = 0, slot, func = PCI_FUNC(dev->devfn); struct pci_dev_entry *t, *dev_entry; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; @@ -95,22 +95,25 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, /* * Keep multi-function devices together on the virtual PCI bus, except - * virtual functions. + * that we want to keep virtual functions at func 0 on their own. They + * aren't multi-function devices and hence their presence at func 0 + * may cause guests to not scan the other functions. */ - if (!dev->is_virtfn) { + if (!dev->is_virtfn || func) { for (slot = 0; slot < PCI_SLOT_MAX; slot++) { if (list_empty(&vpci_dev->dev_list[slot])) continue; t = list_entry(list_first(&vpci_dev->dev_list[slot]), struct pci_dev_entry, list); + if (t->dev->is_virtfn && !PCI_FUNC(t->dev->devfn)) + continue; if (match_slot(dev, t->dev)) { dev_info(&dev->dev, "vpci: assign to virtual slot %d func %d\n", - slot, PCI_FUNC(dev->devfn)); + slot, func); list_add_tail(&dev_entry->list, &vpci_dev->dev_list[slot]); - func = PCI_FUNC(dev->devfn); goto unlock; } } @@ -123,7 +126,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, slot); list_add_tail(&dev_entry->list, &vpci_dev->dev_list[slot]); - func = dev->is_virtfn ? 0 : PCI_FUNC(dev->devfn); goto unlock; } } From c81d3d24602540f65256f98831d0a25599ea6b87 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 18 May 2021 18:14:07 +0200 Subject: [PATCH 469/730] xen-pciback: reconfigure also from backend watch handler When multiple PCI devices get assigned to a guest right at boot, libxl incrementally populates the backend tree. The writes for the first of the devices trigger the backend watch. In turn xen_pcibk_setup_backend() will set the XenBus state to Initialised, at which point no further reconfigures would happen unless a device got hotplugged. Arrange for reconfigure to also get triggered from the backend watch handler. Signed-off-by: Jan Beulich Cc: stable@vger.kernel.org Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/2337cbd6-94b9-4187-9862-c03ea12e0c61@suse.com Signed-off-by: Juergen Gross --- drivers/xen/xen-pciback/xenbus.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 5188f02e75fb..c09c7ebd6968 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -359,7 +359,8 @@ out: return err; } -static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) +static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev, + enum xenbus_state state) { int err = 0; int num_devs; @@ -373,9 +374,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); mutex_lock(&pdev->dev_lock); - /* Make sure we only reconfigure once */ - if (xenbus_read_driver_state(pdev->xdev->nodename) != - XenbusStateReconfiguring) + if (xenbus_read_driver_state(pdev->xdev->nodename) != state) goto out; err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", @@ -500,6 +499,10 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) } } + if (state != XenbusStateReconfiguring) + /* Make sure we only reconfigure once. */ + goto out; + err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured); if (err) { xenbus_dev_fatal(pdev->xdev, err, @@ -525,7 +528,7 @@ static void xen_pcibk_frontend_changed(struct xenbus_device *xdev, break; case XenbusStateReconfiguring: - xen_pcibk_reconfigure(pdev); + xen_pcibk_reconfigure(pdev, XenbusStateReconfiguring); break; case XenbusStateConnected: @@ -664,6 +667,15 @@ static void xen_pcibk_be_watch(struct xenbus_watch *watch, xen_pcibk_setup_backend(pdev); break; + case XenbusStateInitialised: + /* + * We typically move to Initialised when the first device was + * added. Hence subsequent devices getting added may need + * reconfiguring. + */ + xen_pcibk_reconfigure(pdev, XenbusStateInitialised); + break; + default: break; } From 3b2f17ad1770e51b8b4e68b5069c4f1ee477eff8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 May 2021 13:50:31 -0300 Subject: [PATCH 470/730] perf parse-events: Check if the software events array slots are populated To avoid a NULL pointer dereference when the kernel supports the new feature but the tooling still hasn't an entry for it. This happened with the recently added PERF_COUNT_SW_CGROUP_SWITCHES software event. Reported-by: Thomas Richter Cc: Adrian Hunter Cc: Heiko Carstens Cc: Jiri Olsa Cc: Namhyung Kim Cc: Sumanth Korikkar Link: https://lore.kernel.org/linux-perf-users/YKVESEKRjKtILhog@kernel.org/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 269997066f6e..84108c17f48d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2932,9 +2932,14 @@ restart: } for (i = 0; i < max; i++, syms++) { + /* + * New attr.config still not supported here, the latest + * example was PERF_COUNT_SW_CGROUP_SWITCHES + */ + if (syms->symbol == NULL) + continue; - if (event_glob != NULL && syms->symbol != NULL && - !(strglobmatch(syms->symbol, event_glob) || + if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) || (syms->alias && strglobmatch(syms->alias, event_glob)))) continue; From af2702549d68519ac78228e915d9b2c199056787 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 19 May 2021 18:48:07 -0700 Subject: [PATCH 471/730] ASoC: qcom: lpass-cpu: Use optional clk APIs This driver spits out a warning for me at boot: sc7180-lpass-cpu 62f00000.lpass: asoc_qcom_lpass_cpu_platform_probe() error getting optional null: -2 but it looks like it is all an optional clk. Use the optional clk APIs here so that we don't see this message and everything else is the same. Cc: Srinivas Kandagatla Cc: Banajit Goswami Fixes: 3e53ac8230c1 ("ASoC: qcom: make osr clock optional") Signed-off-by: Stephen Boyd Reviewed-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210520014807.3749797-1-swboyd@chromium.org Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-cpu.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index c62d2612e8f5..28c7497344e3 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -835,18 +835,8 @@ int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev) if (dai_id == LPASS_DP_RX) continue; - drvdata->mi2s_osr_clk[dai_id] = devm_clk_get(dev, + drvdata->mi2s_osr_clk[dai_id] = devm_clk_get_optional(dev, variant->dai_osr_clk_names[i]); - if (IS_ERR(drvdata->mi2s_osr_clk[dai_id])) { - dev_warn(dev, - "%s() error getting optional %s: %ld\n", - __func__, - variant->dai_osr_clk_names[i], - PTR_ERR(drvdata->mi2s_osr_clk[dai_id])); - - drvdata->mi2s_osr_clk[dai_id] = NULL; - } - drvdata->mi2s_bit_clk[dai_id] = devm_clk_get(dev, variant->dai_bit_clk_names[i]); if (IS_ERR(drvdata->mi2s_bit_clk[dai_id])) { From bda7db1d952c3ff7c24c11bc295aa72aaeb98451 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 20 May 2021 16:12:37 +0300 Subject: [PATCH 472/730] spi: sc18is602: don't consider the chip select byte in sc18is602_check_transfer For each spi_message, the sc18is602 I2C-to-SPI bridge driver checks the length of each spi_transfer against 200 (the size of the chip's internal buffer) minus hw->tlen (the number of bytes transferred so far). The first byte of the transferred data is the Function ID (the SPI slave's chip select) and as per the documentation of the chip: https://www.nxp.com/docs/en/data-sheet/SC18IS602B.pdf the data buffer is up to 200 bytes deep _without_ accounting for the Function ID byte. However, in sc18is602_txrx(), the driver keeps the Function ID as part of the buffer, and increments hw->tlen from 0 to 1. Combined with the check in sc18is602_check_transfer, this prevents us from issuing a transfer that has exactly 200 bytes in size, but only 199. Adjust the check function to reflect that the Function ID is not part of the 200 byte deep data buffer. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210520131238.2903024-2-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-sc18is602.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c index 297c512069a5..37871edc7962 100644 --- a/drivers/spi/spi-sc18is602.c +++ b/drivers/spi/spi-sc18is602.c @@ -174,7 +174,7 @@ static int sc18is602_setup_transfer(struct sc18is602 *hw, u32 hz, u8 mode) static int sc18is602_check_transfer(struct spi_device *spi, struct spi_transfer *t, int tlen) { - if (t && t->len + tlen > SC18IS602_BUFSIZ) + if (t && t->len + tlen > SC18IS602_BUFSIZ + 1) return -EINVAL; return 0; From b4e46c9954ad55092502e1e8c44ceb9b6744bade Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 20 May 2021 16:12:38 +0300 Subject: [PATCH 473/730] spi: sc18is602: implement .max_{transfer,message}_size() for the controller Allow SPI peripherals attached to this controller to know what is the maximum transfer size and message size, so they can limit their transfer lengths properly in case they are otherwise capable of larger transfer sizes. For the sc18is602, this is 200 bytes in both cases, since as far as I understand, it isn't possible to tell the controller to keep the chip select asserted after the STOP command is sent. The controller can support SPI messages larger than 200 bytes if cs_change is set for individual transfers such that the portions with chip select asserted are never longer than 200 bytes. What is not supported is just SPI messages with a continuous chip select larger than 200. I don't think it is possible to express this using the current API, so drivers which do send SPI messages with cs_change can safely just look at the max_transfer_size limit. An example of user for this is sja1105_xfer() in drivers/net/dsa/sja1105/sja1105_spi.c which sends by default 64 * 4 = 256 byte transfers. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210520131238.2903024-3-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-sc18is602.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c index 37871edc7962..5d27ee482237 100644 --- a/drivers/spi/spi-sc18is602.c +++ b/drivers/spi/spi-sc18is602.c @@ -219,6 +219,11 @@ static int sc18is602_transfer_one(struct spi_master *master, return status; } +static size_t sc18is602_max_transfer_size(struct spi_device *spi) +{ + return SC18IS602_BUFSIZ; +} + static int sc18is602_setup(struct spi_device *spi) { struct sc18is602 *hw = spi_master_get_devdata(spi->master); @@ -293,6 +298,8 @@ static int sc18is602_probe(struct i2c_client *client, master->bits_per_word_mask = SPI_BPW_MASK(8); master->setup = sc18is602_setup; master->transfer_one_message = sc18is602_transfer_one; + master->max_transfer_size = sc18is602_max_transfer_size; + master->max_message_size = sc18is602_max_transfer_size; master->dev.of_node = np; master->min_speed_hz = hw->freq / 128; master->max_speed_hz = hw->freq / 4; From 4f2629ea67e7225c3fd292c7fe4f5b3c9d6392de Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 18 May 2021 16:18:35 -0400 Subject: [PATCH 474/730] USB: usbfs: Don't WARN about excessively large memory allocations Syzbot found that the kernel generates a WARNing if the user tries to submit a bulk transfer through usbfs with a buffer that is way too large. This isn't a bug in the kernel; it's merely an invalid request from the user and the usbfs code does handle it correctly. In theory the same thing can happen with async transfers, or with the packet descriptor table for isochronous transfers. To prevent the MM subsystem from complaining about these bad allocation requests, add the __GFP_NOWARN flag to the kmalloc calls for these buffers. CC: Andrew Morton CC: Reported-and-tested-by: syzbot+882a85c0c8ec4a3e2281@syzkaller.appspotmail.com Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20210518201835.GA1140918@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 533236366a03..2218941d35a3 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1218,7 +1218,12 @@ static int do_proc_bulk(struct usb_dev_state *ps, ret = usbfs_increase_memory_usage(len1 + sizeof(struct urb)); if (ret) return ret; - tbuf = kmalloc(len1, GFP_KERNEL); + + /* + * len1 can be almost arbitrarily large. Don't WARN if it's + * too big, just fail the request. + */ + tbuf = kmalloc(len1, GFP_KERNEL | __GFP_NOWARN); if (!tbuf) { ret = -ENOMEM; goto done; @@ -1696,7 +1701,7 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb if (num_sgs) { as->urb->sg = kmalloc_array(num_sgs, sizeof(struct scatterlist), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!as->urb->sg) { ret = -ENOMEM; goto error; @@ -1731,7 +1736,7 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb (uurb_start - as->usbm->vm_start); } else { as->urb->transfer_buffer = kmalloc(uurb->buffer_length, - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!as->urb->transfer_buffer) { ret = -ENOMEM; goto error; From 25dda9fc56bd90d45f9a4516bcfa5211e61b4290 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 12 May 2021 20:17:09 -0700 Subject: [PATCH 475/730] usb: dwc3: gadget: Properly track pending and queued SG The driver incorrectly uses req->num_pending_sgs to track both the number of pending and queued SG entries. It only prepares the next request if the previous is done, and it doesn't update num_pending_sgs until there is TRB completion interrupt. This may starve the controller of more TRBs until the num_pending_sgs is decremented. Fix this by decrementing the num_pending_sgs after they are queued and properly track both num_mapped_sgs and num_queued_sgs. Fixes: c96e6725db9d ("usb: dwc3: gadget: Correct the logic for queuing sgs") Cc: Reported-by: Michael Grzeschik Tested-by: Michael Grzeschik Acked-by: Felipe Balbi Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/ba24591dbcaad8f244a3e88bd449bb7205a5aec3.1620874069.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 49ca5da5e279..612825a39f82 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1244,6 +1244,7 @@ static int dwc3_prepare_trbs_sg(struct dwc3_ep *dep, req->start_sg = sg_next(s); req->num_queued_sgs++; + req->num_pending_sgs--; /* * The number of pending SG entries may not correspond to the @@ -1251,7 +1252,7 @@ static int dwc3_prepare_trbs_sg(struct dwc3_ep *dep, * don't include unused SG entries. */ if (length == 0) { - req->num_pending_sgs -= req->request.num_mapped_sgs - req->num_queued_sgs; + req->num_pending_sgs = 0; break; } @@ -2873,15 +2874,15 @@ static int dwc3_gadget_ep_reclaim_trb_sg(struct dwc3_ep *dep, struct dwc3_trb *trb = &dep->trb_pool[dep->trb_dequeue]; struct scatterlist *sg = req->sg; struct scatterlist *s; - unsigned int pending = req->num_pending_sgs; + unsigned int num_queued = req->num_queued_sgs; unsigned int i; int ret = 0; - for_each_sg(sg, s, pending, i) { + for_each_sg(sg, s, num_queued, i) { trb = &dep->trb_pool[dep->trb_dequeue]; req->sg = sg_next(s); - req->num_pending_sgs--; + req->num_queued_sgs--; ret = dwc3_gadget_ep_reclaim_completed_trb(dep, req, trb, event, status, true); @@ -2904,7 +2905,7 @@ static int dwc3_gadget_ep_reclaim_trb_linear(struct dwc3_ep *dep, static bool dwc3_gadget_ep_request_completed(struct dwc3_request *req) { - return req->num_pending_sgs == 0; + return req->num_pending_sgs == 0 && req->num_queued_sgs == 0; } static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, @@ -2913,7 +2914,7 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, { int ret; - if (req->num_pending_sgs) + if (req->request.num_mapped_sgs) ret = dwc3_gadget_ep_reclaim_trb_sg(dep, req, event, status); else From dcb4b8ad6a448532d8b681b5d1a7036210b622de Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Fri, 14 May 2021 20:43:48 +0800 Subject: [PATCH 476/730] misc/uss720: fix memory leak in uss720_probe uss720_probe forgets to decrease the refcount of usbdev in uss720_probe. Fix this by decreasing the refcount of usbdev by usb_put_dev. BUG: memory leak unreferenced object 0xffff888101113800 (size 2048): comm "kworker/0:1", pid 7, jiffies 4294956777 (age 28.870s) hex dump (first 32 bytes): ff ff ff ff 31 00 00 00 00 00 00 00 00 00 00 00 ....1........... 00 00 00 00 00 00 00 00 00 00 00 00 03 00 00 00 ................ backtrace: [] kmalloc include/linux/slab.h:554 [inline] [] kzalloc include/linux/slab.h:684 [inline] [] usb_alloc_dev+0x32/0x450 drivers/usb/core/usb.c:582 [] hub_port_connect drivers/usb/core/hub.c:5129 [inline] [] hub_port_connect_change drivers/usb/core/hub.c:5363 [inline] [] port_event drivers/usb/core/hub.c:5509 [inline] [] hub_event+0x1171/0x20c0 drivers/usb/core/hub.c:5591 [] process_one_work+0x2c9/0x600 kernel/workqueue.c:2275 [] worker_thread+0x59/0x5d0 kernel/workqueue.c:2421 [] kthread+0x178/0x1b0 kernel/kthread.c:292 [] ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294 Fixes: 0f36163d3abe ("[PATCH] usb: fix uss720 schedule with interrupts off") Cc: stable Reported-by: syzbot+636c58f40a86b4a879e7@syzkaller.appspotmail.com Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20210514124348.6587-1-mudongliangabcd@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/uss720.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index b5d661644263..748139d26263 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -736,6 +736,7 @@ static int uss720_probe(struct usb_interface *intf, parport_announce_port(pp); usb_set_intfdata(intf, pp); + usb_put_dev(usbdev); return 0; probe_abort: From acf5631c239dfc53489f739c4ad47f490c5181ff Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sat, 15 May 2021 20:47:30 -0700 Subject: [PATCH 477/730] usb: typec: mux: Fix matching with typec_altmode_desc In typec_mux_match() "nval" is assigned the number of elements in the "svid" fwnode property, then the variable is used to store the success of the read and finally attempts to loop between 0 and "success" - i.e. not at all - and the code returns indicating that no match was found. Fix this by using a separate variable to track the success of the read, to allow the loop to get a change to find a match. Fixes: 96a6d031ca99 ("usb: typec: mux: Find the muxes by also matching against the device node") Reviewed-by: Heikki Krogerus Cc: stable Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210516034730.621461-1-bjorn.andersson@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c index 9da22ae3006c..8514bec7e1b8 100644 --- a/drivers/usb/typec/mux.c +++ b/drivers/usb/typec/mux.c @@ -191,6 +191,7 @@ static void *typec_mux_match(struct fwnode_handle *fwnode, const char *id, bool match; int nval; u16 *val; + int ret; int i; /* @@ -218,10 +219,10 @@ static void *typec_mux_match(struct fwnode_handle *fwnode, const char *id, if (!val) return ERR_PTR(-ENOMEM); - nval = fwnode_property_read_u16_array(fwnode, "svid", val, nval); - if (nval < 0) { + ret = fwnode_property_read_u16_array(fwnode, "svid", val, nval); + if (ret < 0) { kfree(val); - return ERR_PTR(nval); + return ERR_PTR(ret); } for (i = 0; i < nval; i++) { From 8c9b3caab3ac26db1da00b8117901640c55a69dd Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sat, 15 May 2021 21:09:53 -0700 Subject: [PATCH 478/730] usb: typec: ucsi: Clear pending after acking connector change It's possible that the interrupt handler for the UCSI driver signals a connector changes after the handler clears the PENDING bit, but before it has sent the acknowledge request. The result is that the handler is invoked yet again, to ack the same connector change. At least some versions of the Qualcomm UCSI firmware will not handle the second - "spurious" - acknowledgment gracefully. So make sure to not clear the pending flag until the change is acknowledged. Any connector changes coming in after the acknowledgment, that would have the pending flag incorrectly cleared, would afaict be covered by the subsequent connector status check. Fixes: 217504a05532 ("usb: typec: ucsi: Work around PPM losing change information") Cc: stable Reviewed-by: Heikki Krogerus Acked-By: Benjamin Berg Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210516040953.622409-1-bjorn.andersson@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 1d8b7df59ff4..b433169ef6fa 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -717,8 +717,8 @@ static void ucsi_handle_connector_change(struct work_struct *work) ucsi_send_command(con->ucsi, command, NULL, 0); /* 3. ACK connector change */ - clear_bit(EVENT_PENDING, &ucsi->flags); ret = ucsi_acknowledge_connector_change(ucsi); + clear_bit(EVENT_PENDING, &ucsi->flags); if (ret) { dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); goto out_unlock; From c58bbe3477f75deb7883983e6cf428404a107555 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 19 May 2021 13:03:58 +0300 Subject: [PATCH 479/730] usb: typec: tcpm: Use LE to CPU conversion when accessing msg->header Sparse is not happy about strict type handling: .../typec/tcpm/tcpm.c:2720:27: warning: restricted __le16 degrades to integer .../typec/tcpm/tcpm.c:2814:32: warning: restricted __le16 degrades to integer Fix this by converting LE to CPU before use. Fixes: ae8a2ca8a221 ("usb: typec: Group all TCPCI/TCPM code together") Fixes: 64f7c494a3c0 ("typec: tcpm: Add support for sink PPS related messages") Cc: stable Cc: Adam Thomson Reviewed-by: Guenter Roeck Reviewed-by: Adam Thomson Reviewed-by: Heikki Krogerus Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210519100358.64018-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 64133e586c64..8fdfd7f65ad7 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2717,7 +2717,7 @@ static void tcpm_pd_ext_msg_request(struct tcpm_port *port, enum pd_ext_msg_type type = pd_header_type_le(msg->header); unsigned int data_size = pd_ext_header_data_size_le(msg->ext_msg.header); - if (!(msg->ext_msg.header & PD_EXT_HDR_CHUNKED)) { + if (!(le16_to_cpu(msg->ext_msg.header) & PD_EXT_HDR_CHUNKED)) { tcpm_pd_handle_msg(port, PD_MSG_CTRL_NOT_SUPP, NONE_AMS); tcpm_log(port, "Unchunked extended messages unsupported"); return; @@ -2811,7 +2811,7 @@ static void tcpm_pd_rx_handler(struct kthread_work *work) "Data role mismatch, initiating error recovery"); tcpm_set_state(port, ERROR_RECOVERY, 0); } else { - if (msg->header & PD_HEADER_EXT_HDR) + if (le16_to_cpu(msg->header) & PD_HEADER_EXT_HDR) tcpm_pd_ext_msg_request(port, msg); else if (cnt) tcpm_pd_data_request(port, msg); From 10505b720189ecc3852596a70a7e391b2a5c5b57 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 20 May 2021 22:36:08 -0300 Subject: [PATCH 480/730] usb: Restore the usb_header label Commit caa93d9bd2d7 ("usb: Fix up movement of USB core kerneldoc location") removed the reference to the _usb_header label by mistake, which causes the following htmldocs build warning: Documentation/driver-api/usb/writing_usb_driver.rst:129: WARNING: undefined label: usb_header Restore the label. Fixes: caa93d9bd2d7 ("usb: Fix up movement of USB core kerneldoc location") Reported-by: Stephen Rothwell Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20210521013608.17957-1-festevam@gmail.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/usb/usb.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/driver-api/usb/usb.rst b/Documentation/driver-api/usb/usb.rst index 820e867af45a..2c94ff2f4385 100644 --- a/Documentation/driver-api/usb/usb.rst +++ b/Documentation/driver-api/usb/usb.rst @@ -123,6 +123,8 @@ are in ``drivers/usb/common/common.c``. In addition, some functions useful for creating debugging output are defined in ``drivers/usb/common/debug.c``. +.. _usb_header: + Host-Side Data Types and Macros =============================== From 02625c965239b71869326dd0461615f27307ecb3 Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Mon, 17 May 2021 00:57:14 +0530 Subject: [PATCH 481/730] video: hgafb: correctly handle card detect failure during probe The return value of hga_card_detect() is not properly handled causing the probe to succeed even though hga_card_detect() failed. Since probe succeeds, hgafb_open() can be called which will end up operating on an unmapped hga_vram. This results in an out-of-bounds access as reported by kernel test robot [1]. To fix this, correctly detect failure of hga_card_detect() by checking for a non-zero error code. [1]: https://lore.kernel.org/lkml/20210516150019.GB25903@xsang-OptiPlex-9020/ Fixes: dc13cac4862c ("video: hgafb: fix potential NULL pointer dereference") Cc: stable Reported-by: kernel test robot Reviewed-by: Igor Matheus Andrade Torrente Signed-off-by: Anirudh Rayabharam Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20210516192714.25823-1-mail@anirudhrb.com Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/hgafb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/hgafb.c b/drivers/video/fbdev/hgafb.c index cc8e62ae93f6..bd3d07aa4f0e 100644 --- a/drivers/video/fbdev/hgafb.c +++ b/drivers/video/fbdev/hgafb.c @@ -558,7 +558,7 @@ static int hgafb_probe(struct platform_device *pdev) int ret; ret = hga_card_detect(); - if (!ret) + if (ret) return ret; printk(KERN_INFO "hgafb: %s with %ldK of memory detected.\n", From bda7d3ab06f19c02dcef61fefcb9dd954dfd5e4f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 20 May 2021 15:08:39 +0200 Subject: [PATCH 482/730] kgdb: fix gcc-11 warnings harder 40cc3a80bb42 ("kgdb: fix gcc-11 warning on indentation") tried to fix up the gcc-11 complaints in this file by just reformatting the #defines. That worked for gcc 11.1.0, but in gcc 11.1.1 as shipped by Fedora 34, the warning came back for one of the #defines. Fix this up again by putting { } around the if statement, now it is quiet again. Fixes: 40cc3a80bb42 ("kgdb: fix gcc-11 warning on indentation") Cc: Arnd Bergmann Cc: Daniel Thompson Cc: Jason Wessel Link: https://lore.kernel.org/r/20210520130839.51987-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/kgdbts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 64d33e368509..67c5b452dd35 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -101,8 +101,9 @@ printk(KERN_INFO a); \ } while (0) #define v2printk(a...) do { \ - if (verbose > 1) \ + if (verbose > 1) { \ printk(KERN_INFO a); \ + } \ touch_nmi_watchdog(); \ } while (0) #define eprintk(a...) do { \ From 51cb8e206afd463e66f16869e5ddc95bef107142 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 21 May 2021 15:37:42 +0200 Subject: [PATCH 483/730] ALSA: usb-audio: fix control-request direction The direction of the pipe argument must match the request-type direction bit or control requests may fail depending on the host-controller-driver implementation. Fix the UAC2_CS_CUR request which erroneously used usb_sndctrlpipe(). Fixes: 93db51d06b32 ("ALSA: usb-audio: Check valid altsetting at parsing rates for UAC2/3") Cc: stable@vger.kernel.org # 5.10 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210521133742.18098-1-johan@kernel.org Signed-off-by: Takashi Iwai --- sound/usb/format.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/format.c b/sound/usb/format.c index e6ff317a6785..2287f8c65315 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -436,7 +436,7 @@ static bool check_valid_altsetting_v2v3(struct snd_usb_audio *chip, int iface, if (snd_BUG_ON(altsetting >= 64 - 8)) return false; - err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC2_CS_CUR, + err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_CUR, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, UAC2_AS_VAL_ALT_SETTINGS << 8, iface, &raw_data, sizeof(raw_data)); From 764fa6e686e0107c0357a988d193de04cf047583 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 21 May 2021 17:50:12 +0930 Subject: [PATCH 484/730] ALSA: usb-audio: scarlett2: Fix device hang with ehci-pci Use usb_rcvctrlpipe() not usb_sndctrlpipe() for USB control input in the Scarlett Gen 2 mixer driver. This fixes the device hang during initialisation when used with the ehci-pci host driver. Fixes: 9e4d5c1be21f ("ALSA: usb-audio: Scarlett Gen 2 mixer interface") Signed-off-by: Geoffrey D. Bennett Cc: Link: https://lore.kernel.org/r/66a3d05dac325d5b53e4930578e143cef1f50dbe.1621584566.git.g@b4.vu Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 560c2ade829d..dcff3e3a49f3 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -635,7 +635,7 @@ static int scarlett2_usb( /* send a second message to get the response */ err = snd_usb_ctl_msg(mixer->chip->dev, - usb_sndctrlpipe(mixer->chip->dev, 0), + usb_rcvctrlpipe(mixer->chip->dev, 0), SCARLETT2_USB_VENDOR_SPECIFIC_CMD_RESP, USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN, 0, From 265d1a90e4fb6d3264d8122fbd10760e5e733be6 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 21 May 2021 17:50:13 +0930 Subject: [PATCH 485/730] ALSA: usb-audio: scarlett2: Improve driver startup messages Add separate init function to call the existing controls_create function so a custom error can be displayed if initialisation fails. Use info level instead of error for notifications. Display the VID/PID so device_setup is targeted to the right device. Display "enabled" message to easily confirm that the driver is loaded. Signed-off-by: Geoffrey D. Bennett Cc: Link: https://lore.kernel.org/r/b5d140c65f640faf2427e085fbbc0297b32e5fce.1621584566.git.g@b4.vu Signed-off-by: Takashi Iwai --- sound/usb/mixer_quirks.c | 2 +- sound/usb/mixer_scarlett_gen2.c | 79 +++++++++++++++++++++------------ sound/usb/mixer_scarlett_gen2.h | 2 +- 3 files changed, 52 insertions(+), 31 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index fda66b2dbb01..37ad77524c0b 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -3060,7 +3060,7 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) case USB_ID(0x1235, 0x8203): /* Focusrite Scarlett 6i6 2nd Gen */ case USB_ID(0x1235, 0x8204): /* Focusrite Scarlett 18i8 2nd Gen */ case USB_ID(0x1235, 0x8201): /* Focusrite Scarlett 18i20 2nd Gen */ - err = snd_scarlett_gen2_controls_create(mixer); + err = snd_scarlett_gen2_init(mixer); break; case USB_ID(0x041e, 0x323b): /* Creative Sound Blaster E1 */ diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index dcff3e3a49f3..3ad8f61a2095 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -1997,38 +1997,11 @@ static int scarlett2_mixer_status_create(struct usb_mixer_interface *mixer) return usb_submit_urb(mixer->urb, GFP_KERNEL); } -/* Entry point */ -int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer) +int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer, + const struct scarlett2_device_info *info) { - const struct scarlett2_device_info *info; int err; - /* only use UAC_VERSION_2 */ - if (!mixer->protocol) - return 0; - - switch (mixer->chip->usb_id) { - case USB_ID(0x1235, 0x8203): - info = &s6i6_gen2_info; - break; - case USB_ID(0x1235, 0x8204): - info = &s18i8_gen2_info; - break; - case USB_ID(0x1235, 0x8201): - info = &s18i20_gen2_info; - break; - default: /* device not (yet) supported */ - return -EINVAL; - } - - if (!(mixer->chip->setup & SCARLETT2_ENABLE)) { - usb_audio_err(mixer->chip, - "Focusrite Scarlett Gen 2 Mixer Driver disabled; " - "use options snd_usb_audio device_setup=1 " - "to enable and report any issues to g@b4.vu"); - return 0; - } - /* Initialise private data, routing, sequence number */ err = scarlett2_init_private(mixer, info); if (err < 0) @@ -2073,3 +2046,51 @@ int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer) return 0; } + +int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer) +{ + struct snd_usb_audio *chip = mixer->chip; + const struct scarlett2_device_info *info; + int err; + + /* only use UAC_VERSION_2 */ + if (!mixer->protocol) + return 0; + + switch (chip->usb_id) { + case USB_ID(0x1235, 0x8203): + info = &s6i6_gen2_info; + break; + case USB_ID(0x1235, 0x8204): + info = &s18i8_gen2_info; + break; + case USB_ID(0x1235, 0x8201): + info = &s18i20_gen2_info; + break; + default: /* device not (yet) supported */ + return -EINVAL; + } + + if (!(chip->setup & SCARLETT2_ENABLE)) { + usb_audio_info(chip, + "Focusrite Scarlett Gen 2 Mixer Driver disabled; " + "use options snd_usb_audio vid=0x%04x pid=0x%04x " + "device_setup=1 to enable and report any issues " + "to g@b4.vu", + USB_ID_VENDOR(chip->usb_id), + USB_ID_PRODUCT(chip->usb_id)); + return 0; + } + + usb_audio_info(chip, + "Focusrite Scarlett Gen 2 Mixer Driver enabled pid=0x%04x", + USB_ID_PRODUCT(chip->usb_id)); + + err = snd_scarlett_gen2_controls_create(mixer, info); + if (err < 0) + usb_audio_err(mixer->chip, + "Error initialising Scarlett Mixer Driver: %d", + err); + + return err; +} diff --git a/sound/usb/mixer_scarlett_gen2.h b/sound/usb/mixer_scarlett_gen2.h index 52e1dad77afd..668c6b0cb50a 100644 --- a/sound/usb/mixer_scarlett_gen2.h +++ b/sound/usb/mixer_scarlett_gen2.h @@ -2,6 +2,6 @@ #ifndef __USB_MIXER_SCARLETT_GEN2_H #define __USB_MIXER_SCARLETT_GEN2_H -int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer); +int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer); #endif /* __USB_MIXER_SCARLETT_GEN2_H */ From 746e4acf87bcacf1406e05ef24a0b7139147c63e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 21 May 2021 15:31:09 +0200 Subject: [PATCH 486/730] USB: trancevibrator: fix control-request direction The direction of the pipe argument must match the request-type direction bit or control requests may fail depending on the host-controller-driver implementation. Fix the set-speed request which erroneously used USB_DIR_IN and update the default timeout argument to match (same value). Fixes: 5638e4d92e77 ("USB: add PlayStation 2 Trance Vibrator driver") Cc: stable@vger.kernel.org # 2.6.19 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210521133109.17396-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/trancevibrator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/trancevibrator.c b/drivers/usb/misc/trancevibrator.c index a3dfc77578ea..26baba3ab7d7 100644 --- a/drivers/usb/misc/trancevibrator.c +++ b/drivers/usb/misc/trancevibrator.c @@ -61,9 +61,9 @@ static ssize_t speed_store(struct device *dev, struct device_attribute *attr, /* Set speed */ retval = usb_control_msg(tv->udev, usb_sndctrlpipe(tv->udev, 0), 0x01, /* vendor request: set speed */ - USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_OTHER, + USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_OTHER, tv->speed, /* speed value */ - 0, NULL, 0, USB_CTRL_GET_TIMEOUT); + 0, NULL, 0, USB_CTRL_SET_TIMEOUT); if (retval) { tv->speed = old; dev_dbg(&tv->udev->dev, "retval = %d\n", retval); From 016002848c82eeb5d460489ce392d91fe18c475c Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Fri, 21 May 2021 06:08:43 +0000 Subject: [PATCH 487/730] serial: rp2: use 'request_firmware' instead of 'request_firmware_nowait' In 'rp2_probe', the driver registers 'rp2_uart_interrupt' then calls 'rp2_fw_cb' through 'request_firmware_nowait'. In 'rp2_fw_cb', if the firmware don't exists, function just return without initializing ports of 'rp2_card'. But now the interrupt handler function has been registered, and when an interrupt comes, 'rp2_uart_interrupt' may access those ports then causing NULL pointer dereference or other bugs. Because the driver does some initialization work in 'rp2_fw_cb', in order to make the driver ready to handle interrupts, 'request_firmware' should be used instead of asynchronous 'request_firmware_nowait'. This report reveals it: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.19.177-gdba4159c14ef-dirty #45 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59- gc9ba5276e321-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xec/0x156 lib/dump_stack.c:118 assign_lock_key kernel/locking/lockdep.c:727 [inline] register_lock_class+0x14e5/0x1ba0 kernel/locking/lockdep.c:753 __lock_acquire+0x187/0x3750 kernel/locking/lockdep.c:3303 lock_acquire+0x124/0x340 kernel/locking/lockdep.c:3907 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x32/0x50 kernel/locking/spinlock.c:144 spin_lock include/linux/spinlock.h:329 [inline] rp2_ch_interrupt drivers/tty/serial/rp2.c:466 [inline] rp2_asic_interrupt.isra.9+0x15d/0x990 drivers/tty/serial/rp2.c:493 rp2_uart_interrupt+0x49/0xe0 drivers/tty/serial/rp2.c:504 __handle_irq_event_percpu+0xfb/0x770 kernel/irq/handle.c:149 handle_irq_event_percpu+0x79/0x150 kernel/irq/handle.c:189 handle_irq_event+0xac/0x140 kernel/irq/handle.c:206 handle_fasteoi_irq+0x232/0x5c0 kernel/irq/chip.c:725 generic_handle_irq_desc include/linux/irqdesc.h:155 [inline] handle_irq+0x230/0x3a0 arch/x86/kernel/irq_64.c:87 do_IRQ+0xa7/0x1e0 arch/x86/kernel/irq.c:247 common_interrupt+0xf/0xf arch/x86/entry/entry_64.S:670 RIP: 0010:native_safe_halt+0x28/0x30 arch/x86/include/asm/irqflags.h:61 Code: 00 00 55 be 04 00 00 00 48 c7 c7 00 c2 2f 8c 48 89 e5 e8 fb 31 e7 f8 8b 05 75 af 8d 03 85 c0 7e 07 0f 00 2d 8a 61 65 00 fb f4 <5d> c3 90 90 90 90 90 90 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41 RSP: 0018:ffff88806b71fcc8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffde RAX: 0000000000000000 RBX: ffffffff8bde7e48 RCX: ffffffff88a21285 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff8c2fc200 RBP: ffff88806b71fcc8 R08: fffffbfff185f840 R09: fffffbfff185f840 R10: 0000000000000001 R11: fffffbfff185f840 R12: 0000000000000002 R13: ffffffff8bea18a0 R14: 0000000000000000 R15: 0000000000000000 arch_safe_halt arch/x86/include/asm/paravirt.h:94 [inline] default_idle+0x6f/0x360 arch/x86/kernel/process.c:557 arch_cpu_idle+0xf/0x20 arch/x86/kernel/process.c:548 default_idle_call+0x3b/0x60 kernel/sched/idle.c:93 cpuidle_idle_call kernel/sched/idle.c:153 [inline] do_idle+0x2ab/0x3c0 kernel/sched/idle.c:263 cpu_startup_entry+0xcb/0xe0 kernel/sched/idle.c:369 start_secondary+0x3b8/0x4e0 arch/x86/kernel/smpboot.c:271 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:243 BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 PGD 8000000056d27067 P4D 8000000056d27067 PUD 56d28067 PMD 0 Oops: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.19.177-gdba4159c14ef-dirty #45 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59- gc9ba5276e321-prebuilt.qemu.org 04/01/2014 RIP: 0010:readl arch/x86/include/asm/io.h:59 [inline] RIP: 0010:rp2_ch_interrupt drivers/tty/serial/rp2.c:472 [inline] RIP: 0010:rp2_asic_interrupt.isra.9+0x181/0x990 drivers/tty/serial/rp2.c: 493 Code: df e8 43 5d c2 05 48 8d 83 e8 01 00 00 48 89 85 60 ff ff ff 48 c1 e8 03 42 80 3c 30 00 0f 85 aa 07 00 00 48 8b 83 e8 01 00 00 <8b> 40 10 89 c1 89 85 68 ff ff ff 48 8b 83 e8 01 00 00 89 48 10 83 RSP: 0018:ffff88806c287cd0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff88806ade6820 RCX: ffffffff814300b1 RDX: 1ffff1100d5bcd06 RSI: 0000000000000004 RDI: ffff88806ade6820 RBP: ffff88806c287db8 R08: ffffed100d5bcd05 R09: ffffed100d5bcd05 R10: 0000000000000001 R11: ffffed100d5bcd04 R12: ffffc90001e00000 R13: ffff888069654e10 R14: dffffc0000000000 R15: ffff888069654df0 FS: 0000000000000000(0000) GS:ffff88806c280000(0000) knlGS: 0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 000000006892c000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: rp2_uart_interrupt+0x49/0xe0 drivers/tty/serial/rp2.c:504 __handle_irq_event_percpu+0xfb/0x770 kernel/irq/handle.c:149 handle_irq_event_percpu+0x79/0x150 kernel/irq/handle.c:189 handle_irq_event+0xac/0x140 kernel/irq/handle.c:206 handle_fasteoi_irq+0x232/0x5c0 kernel/irq/chip.c:725 generic_handle_irq_desc include/linux/irqdesc.h:155 [inline] handle_irq+0x230/0x3a0 arch/x86/kernel/irq_64.c:87 do_IRQ+0xa7/0x1e0 arch/x86/kernel/irq.c:247 common_interrupt+0xf/0xf arch/x86/entry/entry_64.S:670 RIP: 0010:native_safe_halt+0x28/0x30 arch/x86/include/asm/irqflags.h:61 Code: 00 00 55 be 04 00 00 00 48 c7 c7 00 c2 2f 8c 48 89 e5 e8 fb 31 e7 f8 8b 05 75 af 8d 03 85 c0 7e 07 0f 00 2d 8a 61 65 00 fb f4 <5d> c3 90 90 90 90 90 90 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41 RSP: 0018:ffff88806b71fcc8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffde RAX: 0000000000000000 RBX: ffffffff8bde7e48 RCX: ffffffff88a21285 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff8c2fc200 RBP: ffff88806b71fcc8 R08: fffffbfff185f840 R09: fffffbfff185f840 R10: 0000000000000001 R11: fffffbfff185f840 R12: 0000000000000002 R13: ffffffff8bea18a0 R14: 0000000000000000 R15: 0000000000000000 arch_safe_halt arch/x86/include/asm/paravirt.h:94 [inline] default_idle+0x6f/0x360 arch/x86/kernel/process.c:557 arch_cpu_idle+0xf/0x20 arch/x86/kernel/process.c:548 default_idle_call+0x3b/0x60 kernel/sched/idle.c:93 cpuidle_idle_call kernel/sched/idle.c:153 [inline] do_idle+0x2ab/0x3c0 kernel/sched/idle.c:263 cpu_startup_entry+0xcb/0xe0 kernel/sched/idle.c:369 start_secondary+0x3b8/0x4e0 arch/x86/kernel/smpboot.c:271 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:243 Modules linked in: Dumping ftrace buffer: (ftrace buffer empty) CR2: 0000000000000010 ---[ end trace 11804dbb55cb1a64 ]--- RIP: 0010:readl arch/x86/include/asm/io.h:59 [inline] RIP: 0010:rp2_ch_interrupt drivers/tty/serial/rp2.c:472 [inline] RIP: 0010:rp2_asic_interrupt.isra.9+0x181/0x990 drivers/tty/serial/rp2.c: 493 Code: df e8 43 5d c2 05 48 8d 83 e8 01 00 00 48 89 85 60 ff ff ff 48 c1 e8 03 42 80 3c 30 00 0f 85 aa 07 00 00 48 8b 83 e8 01 00 00 <8b> 40 10 89 c1 89 85 68 ff ff ff 48 8b 83 e8 01 00 00 89 48 10 83 RSP: 0018:ffff88806c287cd0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff88806ade6820 RCX: ffffffff814300b1 RDX: 1ffff1100d5bcd06 RSI: 0000000000000004 RDI: ffff88806ade6820 RBP: ffff88806c287db8 R08: ffffed100d5bcd05 R09: ffffed100d5bcd05 R10: 0000000000000001 R11: ffffed100d5bcd04 R12: ffffc90001e00000 R13: ffff888069654e10 R14: dffffc0000000000 R15: ffff888069654df0 FS: 0000000000000000(0000) GS:ffff88806c280000(0000) knlGS: 0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 000000006892c000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Reported-by: Zheyu Ma Signed-off-by: Zheyu Ma Link: https://lore.kernel.org/r/1621577323-1541-1-git-send-email-zheyuma97@gmail.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/rp2.c | 52 +++++++++++++--------------------------- 1 file changed, 17 insertions(+), 35 deletions(-) diff --git a/drivers/tty/serial/rp2.c b/drivers/tty/serial/rp2.c index d60abffab70e..6689d8add8f7 100644 --- a/drivers/tty/serial/rp2.c +++ b/drivers/tty/serial/rp2.c @@ -195,7 +195,6 @@ struct rp2_card { void __iomem *bar0; void __iomem *bar1; spinlock_t card_lock; - struct completion fw_loaded; }; #define RP_ID(prod) PCI_VDEVICE(RP, (prod)) @@ -662,17 +661,10 @@ static void rp2_remove_ports(struct rp2_card *card) card->initialized_ports = 0; } -static void rp2_fw_cb(const struct firmware *fw, void *context) +static int rp2_load_firmware(struct rp2_card *card, const struct firmware *fw) { - struct rp2_card *card = context; resource_size_t phys_base; - int i, rc = -ENOENT; - - if (!fw) { - dev_err(&card->pdev->dev, "cannot find '%s' firmware image\n", - RP2_FW_NAME); - goto no_fw; - } + int i, rc = 0; phys_base = pci_resource_start(card->pdev, 1); @@ -718,23 +710,13 @@ static void rp2_fw_cb(const struct firmware *fw, void *context) card->initialized_ports++; } - release_firmware(fw); -no_fw: - /* - * rp2_fw_cb() is called from a workqueue long after rp2_probe() - * has already returned success. So if something failed here, - * we'll just leave the now-dormant device in place until somebody - * unbinds it. - */ - if (rc) - dev_warn(&card->pdev->dev, "driver initialization failed\n"); - - complete(&card->fw_loaded); + return rc; } static int rp2_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + const struct firmware *fw; struct rp2_card *card; struct rp2_uart_port *ports; void __iomem * const *bars; @@ -745,7 +727,6 @@ static int rp2_probe(struct pci_dev *pdev, return -ENOMEM; pci_set_drvdata(pdev, card); spin_lock_init(&card->card_lock); - init_completion(&card->fw_loaded); rc = pcim_enable_device(pdev); if (rc) @@ -778,22 +759,24 @@ static int rp2_probe(struct pci_dev *pdev, return -ENOMEM; card->ports = ports; + rc = request_firmware(&fw, RP2_FW_NAME, &pdev->dev); + if (rc < 0) { + dev_err(&pdev->dev, "cannot find '%s' firmware image\n", + RP2_FW_NAME); + return rc; + } + + rc = rp2_load_firmware(card, fw); + + release_firmware(fw); + if (rc < 0) + return rc; + rc = devm_request_irq(&pdev->dev, pdev->irq, rp2_uart_interrupt, IRQF_SHARED, DRV_NAME, card); if (rc) return rc; - /* - * Only catastrophic errors (e.g. ENOMEM) are reported here. - * If the FW image is missing, we'll find out in rp2_fw_cb() - * and print an error message. - */ - rc = request_firmware_nowait(THIS_MODULE, 1, RP2_FW_NAME, &pdev->dev, - GFP_KERNEL, card, rp2_fw_cb); - if (rc) - return rc; - dev_dbg(&pdev->dev, "waiting for firmware blob...\n"); - return 0; } @@ -801,7 +784,6 @@ static void rp2_remove(struct pci_dev *pdev) { struct rp2_card *card = pci_get_drvdata(pdev); - wait_for_completion(&card->fw_loaded); rp2_remove_ports(card); } From ec347b7c319156c3b488681d1813d08d88499cc6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 21 May 2021 16:00:31 -0300 Subject: [PATCH 488/730] tools headers UAPI: Sync linux/fs.h with the kernel sources To pick the trivial change in: 63c8af5687f6b1b7 ("block: uapi: fix comment about block device ioctl") This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fs.h' differs from latest version at 'include/uapi/linux/fs.h' diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h Cc: Damien Le Moal Cc: Jens Axboe Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index f44eb0a04afd..4c32e97dcdf0 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -185,7 +185,7 @@ struct fsxattr { #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) /* - * A jump here: 130-131 are reserved for zoned block devices + * A jump here: 130-136 are reserved for zoned block devices * (see uapi/linux/blkzoned.h) */ From 4224680ee7aaf0f13ab762ffb2a77373737dce5e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 21 May 2021 16:00:31 -0300 Subject: [PATCH 489/730] tools headers UAPI: Sync linux/perf_event.h with the kernel sources To pick the trivial change in: 0683b53197b55343 ("signal: Deliver all of the siginfo perf data in _perf") This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Cc: Eric W. Biederman Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index bf8143505c49..f92880a15645 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -464,7 +464,7 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via - * siginfo_t::si_perf, e.g. to permit user to identify the event. + * siginfo_t::si_perf_data, e.g. to permit user to identify the event. */ __u64 sig_data; }; From bffcbe79370e8fda7f1d19899de83aa2a833bf69 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 21 May 2021 16:14:00 -0300 Subject: [PATCH 490/730] tools headers UAPI: Sync files changed by the quotactl_path unwiring To pick the changes in this csets: 5b9fedb31e476693 ("quota: Disable quotactl_path syscall") That silences these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl Warning: Kernel ABI header at 'tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl' differs from latest version at 'arch/mips/kernel/syscalls/syscall_n64.tbl' diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl Cc: Jan Kara Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 2 +- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 9974f5f8e49b..9cd1c34f31b5 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -357,7 +357,7 @@ 440 n64 process_madvise sys_process_madvise 441 n64 epoll_pwait2 sys_epoll_pwait2 442 n64 mount_setattr sys_mount_setattr -443 n64 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 n64 landlock_create_ruleset sys_landlock_create_ruleset 445 n64 landlock_add_rule sys_landlock_add_rule 446 n64 landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 2e68fbb57cc6..8f052ff4058c 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -522,7 +522,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 7e4a2aba366d..0690263df1dd 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -445,7 +445,7 @@ 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index ecd551b08d05..ce18119ea0d0 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,7 +364,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self From a6172059758ba1b496ae024cece7d5bdc8d017db Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 21 May 2021 12:20:51 +0300 Subject: [PATCH 491/730] perf scripts python: exported-sql-viewer.py: Fix copy to clipboard from Top Calls by elapsed Time report Provide missing argument to prevent following error when copying a selection to the clipboard: Traceback (most recent call last): File "tools/perf/scripts/python/exported-sql-viewer.py", line 4041, in menu.addAction(CreateAction("&Copy selection", "Copy to clipboard", lambda: CopyCellsToClipboardHdr(self.view), self.view)) File "tools/perf/scripts/python/exported-sql-viewer.py", line 4021, in CopyCellsToClipboardHdr CopyCellsToClipboard(view, False, True) File "tools/perf/scripts/python/exported-sql-viewer.py", line 4018, in CopyCellsToClipboard view.CopyCellsToClipboard(view, as_csv, with_hdr) File "tools/perf/scripts/python/exported-sql-viewer.py", line 3871, in CopyTableCellsToClipboard val = model.headerData(col, Qt.Horizontal) TypeError: headerData() missing 1 required positional argument: 'role' Fixes: 96c43b9a7ab3b ("perf scripts python: exported-sql-viewer.py: Add copy to clipboard") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20210521092053.25683-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 7daa8bb70a5a..b5078d65704e 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -3868,7 +3868,7 @@ def CopyTableCellsToClipboard(view, as_csv=False, with_hdr=False): if with_hdr: model = indexes[0].model() for col in range(min_col, max_col + 1): - val = model.headerData(col, Qt.Horizontal) + val = model.headerData(col, Qt.Horizontal, Qt.DisplayRole) if as_csv: text += sep + ToCSValue(val) sep = "," From fd931b2e234a7cc451a7bbb1965d6ce623189158 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 21 May 2021 12:20:52 +0300 Subject: [PATCH 492/730] perf scripts python: exported-sql-viewer.py: Fix Array TypeError The 'Array' class is present in more than one python standard library. In some versions of Python 3, the following error occurs: Traceback (most recent call last): File "tools/perf/scripts/python/exported-sql-viewer.py", line 4702, in reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewBranchView(x), self)) File "tools/perf/scripts/python/exported-sql-viewer.py", line 4727, in NewBranchView BranchWindow(self.glb, event_id, ReportVars(), self) File "tools/perf/scripts/python/exported-sql-viewer.py", line 3208, in __init__ self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, report_vars.where_clause)) File "tools/perf/scripts/python/exported-sql-viewer.py", line 343, in LookupCreateModel model = create_fn() File "tools/perf/scripts/python/exported-sql-viewer.py", line 3208, in self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, report_vars.where_clause)) File "tools/perf/scripts/python/exported-sql-viewer.py", line 3124, in __init__ self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample) File "tools/perf/scripts/python/exported-sql-viewer.py", line 2658, in __init__ self.buffer = Array(c_char, self.buffer_size, lock=False) TypeError: abstract class This apparently happens because Python can be inconsistent about which class of the name 'Array' gets imported. Fix by importing explicitly by name so that only the desired 'Array' gets imported. Fixes: 8392b74b575c3 ("perf scripts python: exported-sql-viewer.py: Add ability to display all the database tables") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20210521092053.25683-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index b5078d65704e..4a63843f623c 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -125,8 +125,9 @@ if pyside_version_1: from PySide.QtGui import * from PySide.QtSql import * -from decimal import * -from ctypes import * +from decimal import Decimal, ROUND_HALF_UP +from ctypes import CDLL, Structure, create_string_buffer, addressof, sizeof, \ + c_void_p, c_bool, c_byte, c_char, c_int, c_uint, c_longlong, c_ulonglong from multiprocessing import Process, Array, Value, Event # xrange is range in Python3 From f56299a9c998e0bfbd4ab07cafe9eb8444512448 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 21 May 2021 12:20:53 +0300 Subject: [PATCH 493/730] perf scripts python: exported-sql-viewer.py: Fix warning display Deprecation warnings are useful only for the developer, not an end user. Display warnings only when requested using the python -W option. This stops the display of warnings like: tools/perf/scripts/python/exported-sql-viewer.py:5102: DeprecationWarning: an integer is required (got type PySide2.QtCore.Qt.AlignmentFlag). Implicit conversion to integers using __int__ is deprecated, and may be removed in a future version of Python. err = app.exec_() Since the warning can be fixed only in PySide2, we must wait for it to be finally fixed there. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org # v5.3+ Link: http://lore.kernel.org/lkml/20210521092053.25683-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 4a63843f623c..711d4f9f5645 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -91,6 +91,11 @@ from __future__ import print_function import sys +# Only change warnings if the python -W option was not used +if not sys.warnoptions: + import warnings + # PySide2 causes deprecation warnings, ignore them. + warnings.filterwarnings("ignore", category=DeprecationWarning) import argparse import weakref import threading From f42907e8a4515635615a6ffd44242454ef843c04 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 21 May 2021 20:51:27 +0300 Subject: [PATCH 494/730] perf script: Add missing PERF_IP_FLAG_CHARS for VM-Entry and VM-Exit Add 'g' (guest) for VM-Entry and 'h' (host) for VM-Exit. Fixes: c025d46cd932c ("perf script: Add branch types for VM-Entry and VM-Exit") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20210521175127.27264-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-intel-pt.txt | 6 +++--- tools/perf/Documentation/perf-script.txt | 7 ++++--- tools/perf/util/event.h | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 1dcec73c910c..bcf3eca5afbe 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -108,9 +108,9 @@ displayed as follows: perf script --itrace=ibxwpe -F+flags -The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, -system, asynchronous, interrupt, transaction abort, trace begin, trace end, and -in transaction, respectively. +The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional, +system, asynchronous, interrupt, transaction abort, trace begin, trace end, +in transaction, VM-entry, and VM-exit respectively. perf script also supports higher level ways to dump instruction traces: diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 5b8b61075039..48a5f5b26dd4 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -183,14 +183,15 @@ OPTIONS At this point usage is displayed, and perf-script exits. The flags field is synthesized and may have a value when Instruction - Trace decoding. The flags are "bcrosyiABEx" which stand for branch, + Trace decoding. The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional, system, asynchronous, interrupt, - transaction abort, trace begin, trace end, and in transaction, + transaction abort, trace begin, trace end, in transaction, VM-Entry, and VM-Exit respectively. Known combinations of flags are printed more nicely e.g. "call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b", "int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs", "async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB", - "tr end" for "bE". However the "x" flag will be display separately in those + "tr end" for "bE", "vmentry" for "bcg", "vmexit" for "bch". + However the "x" flag will be displayed separately in those cases e.g. "jcc (x)" for a condition branch within a transaction. The callindent field is synthesized and may have a value when diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 8a62fb39e365..19ad64f2bd83 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -100,7 +100,7 @@ enum { PERF_IP_FLAG_VMEXIT = 1ULL << 12, }; -#define PERF_IP_FLAG_CHARS "bcrosyiABEx" +#define PERF_IP_FLAG_CHARS "bcrosyiABExgh" #define PERF_BRANCH_MASK (\ PERF_IP_FLAG_BRANCH |\ From f8b61bd20479c094fb421da42fef6b4ff22a589e Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 11 May 2021 23:51:16 -0700 Subject: [PATCH 495/730] perf stat: Skip evlist__[enable|disable] when all events uses BPF When all events of a perf-stat session use BPF, it is not necessary to call evlist__enable() and evlist__disable(). Skip them when all_counters_use_bpf is true. Signed-off-by: Song Liu Reported-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 13 ++++++++++--- tools/perf/util/evlist.c | 3 --- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5a830ae09418..f9f74a514315 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -572,7 +572,8 @@ static int enable_counters(void) * - we have initial delay configured */ if (!target__none(&target) || stat_config.initial_delay) { - evlist__enable(evsel_list); + if (!all_counters_use_bpf) + evlist__enable(evsel_list); if (stat_config.initial_delay > 0) pr_info(EVLIST_ENABLED_MSG); } @@ -581,13 +582,19 @@ static int enable_counters(void) static void disable_counters(void) { + struct evsel *counter; + /* * If we don't have tracee (attaching to task or cpu), counters may * still be running. To get accurate group ratios, we must stop groups * from counting before reading their constituent counters. */ - if (!target__none(&target)) - evlist__disable(evsel_list); + if (!target__none(&target)) { + evlist__for_each_entry(evsel_list, counter) + bpf_counter__disable(counter); + if (!all_counters_use_bpf) + evlist__disable(evsel_list); + } } static volatile int workload_exec_errno; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6e5c41528c7d..6ea3e677dc1e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -425,9 +425,6 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name) if (affinity__setup(&affinity) < 0) return; - evlist__for_each_entry(evlist, pos) - bpf_counter__disable(pos); - /* Disable 'immediate' events last */ for (imm = 0; imm <= 1; imm++) { evlist__for_each_cpu(evlist, i, cpu) { From 80dd33cf72d1ab4f0af303f1fa242c6d6c8d328f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 14 May 2021 14:10:15 +0200 Subject: [PATCH 496/730] drivers: base: Fix device link removal When device_link_free() drops references to the supplier and consumer devices of the device link going away and the reference being dropped turns out to be the last one for any of those device objects, its ->release callback will be invoked and it may sleep which goes against the SRCU callback execution requirements. To address this issue, make the device link removal code carry out the device_link_free() actions preceded by SRCU synchronization from a separate work item (the "long" workqueue is used for that, because it does not matter when the device link memory is released and it may take time to get to that point) instead of using SRCU callbacks. While at it, make the code work analogously when SRCU is not enabled to reduce the differences between the SRCU and non-SRCU cases. Fixes: 843e600b8a2b ("driver core: Fix sleeping in invalid context during device link deletion") Cc: stable Reported-by: chenxiang (M) Tested-by: chenxiang (M) Reviewed-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/5722787.lOV4Wx5bFT@kreacher Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 37 +++++++++++++++++++++++-------------- include/linux/device.h | 6 ++---- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 628e33939aca..61c19641e1d0 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -194,6 +194,11 @@ int device_links_read_lock_held(void) { return srcu_read_lock_held(&device_links_srcu); } + +static void device_link_synchronize_removal(void) +{ + synchronize_srcu(&device_links_srcu); +} #else /* !CONFIG_SRCU */ static DECLARE_RWSEM(device_links_lock); @@ -224,6 +229,10 @@ int device_links_read_lock_held(void) return lockdep_is_held(&device_links_lock); } #endif + +static inline void device_link_synchronize_removal(void) +{ +} #endif /* !CONFIG_SRCU */ static bool device_is_ancestor(struct device *dev, struct device *target) @@ -445,8 +454,13 @@ static struct attribute *devlink_attrs[] = { }; ATTRIBUTE_GROUPS(devlink); -static void device_link_free(struct device_link *link) +static void device_link_release_fn(struct work_struct *work) { + struct device_link *link = container_of(work, struct device_link, rm_work); + + /* Ensure that all references to the link object have been dropped. */ + device_link_synchronize_removal(); + while (refcount_dec_not_one(&link->rpm_active)) pm_runtime_put(link->supplier); @@ -455,24 +469,19 @@ static void device_link_free(struct device_link *link) kfree(link); } -#ifdef CONFIG_SRCU -static void __device_link_free_srcu(struct rcu_head *rhead) -{ - device_link_free(container_of(rhead, struct device_link, rcu_head)); -} - static void devlink_dev_release(struct device *dev) { struct device_link *link = to_devlink(dev); - call_srcu(&device_links_srcu, &link->rcu_head, __device_link_free_srcu); + INIT_WORK(&link->rm_work, device_link_release_fn); + /* + * It may take a while to complete this work because of the SRCU + * synchronization in device_link_release_fn() and if the consumer or + * supplier devices get deleted when it runs, so put it into the "long" + * workqueue. + */ + queue_work(system_long_wq, &link->rm_work); } -#else -static void devlink_dev_release(struct device *dev) -{ - device_link_free(to_devlink(dev)); -} -#endif static struct class devlink_class = { .name = "devlink", diff --git a/include/linux/device.h b/include/linux/device.h index 38a2071cf776..f1a00040fa53 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -570,7 +570,7 @@ struct device { * @flags: Link flags. * @rpm_active: Whether or not the consumer device is runtime-PM-active. * @kref: Count repeated addition of the same link. - * @rcu_head: An RCU head to use for deferred execution of SRCU callbacks. + * @rm_work: Work structure used for removing the link. * @supplier_preactivated: Supplier has been made active before consumer probe. */ struct device_link { @@ -583,9 +583,7 @@ struct device_link { u32 flags; refcount_t rpm_active; struct kref kref; -#ifdef CONFIG_SRCU - struct rcu_head rcu_head; -#endif + struct work_struct rm_work; bool supplier_preactivated; /* Owned by consumer probe. */ }; From 0c8713153fbf7ba4e45172e139d501c86006dc03 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 14 May 2021 14:11:19 +0200 Subject: [PATCH 497/730] drivers: base: Reduce device link removal code duplication Reduce device link removal code duplication between the cases when SRCU is enabled and when it is disabled by moving the only differing piece of it (which is the removal of the link from the consumer and supplier lists) into a separate wrapper function (defined differently for each of the cases in question). No intentional functional impact. Reviewed-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/4326215.LvFx2qVVIh@kreacher Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 61c19641e1d0..54ba506e5a89 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -199,6 +199,12 @@ static void device_link_synchronize_removal(void) { synchronize_srcu(&device_links_srcu); } + +static void device_link_remove_from_lists(struct device_link *link) +{ + list_del_rcu(&link->s_node); + list_del_rcu(&link->c_node); +} #else /* !CONFIG_SRCU */ static DECLARE_RWSEM(device_links_lock); @@ -233,6 +239,12 @@ int device_links_read_lock_held(void) static inline void device_link_synchronize_removal(void) { } + +static void device_link_remove_from_lists(struct device_link *link) +{ + list_del(&link->s_node); + list_del(&link->c_node); +} #endif /* !CONFIG_SRCU */ static bool device_is_ancestor(struct device *dev, struct device *target) @@ -855,7 +867,6 @@ out: } EXPORT_SYMBOL_GPL(device_link_add); -#ifdef CONFIG_SRCU static void __device_link_del(struct kref *kref) { struct device_link *link = container_of(kref, struct device_link, kref); @@ -865,25 +876,9 @@ static void __device_link_del(struct kref *kref) pm_runtime_drop_link(link); - list_del_rcu(&link->s_node); - list_del_rcu(&link->c_node); + device_link_remove_from_lists(link); device_unregister(&link->link_dev); } -#else /* !CONFIG_SRCU */ -static void __device_link_del(struct kref *kref) -{ - struct device_link *link = container_of(kref, struct device_link, kref); - - dev_info(link->consumer, "Dropping the link to %s\n", - dev_name(link->supplier)); - - pm_runtime_drop_link(link); - - list_del(&link->s_node); - list_del(&link->c_node); - device_unregister(&link->link_dev); -} -#endif /* !CONFIG_SRCU */ static void device_link_put_kref(struct device_link *link) { From 70ca3c57ff914113f681e657634f7fbfa68e1ad1 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Sat, 15 May 2021 16:03:15 +0900 Subject: [PATCH 498/730] scsi: target: core: Avoid smp_processor_id() in preemptible code The BUG message "BUG: using smp_processor_id() in preemptible [00000000] code" was observed for TCMU devices with kernel config DEBUG_PREEMPT. The message was observed when blktests block/005 was run on TCMU devices with fileio backend or user:zbc backend [1]. The commit 1130b499b4a7 ("scsi: target: tcm_loop: Use LIO wq cmd submission helper") triggered the symptom. The commit modified work queue to handle commands and changed 'current->nr_cpu_allowed' at smp_processor_id() call. The message was also observed at system shutdown when TCMU devices were not cleaned up [2]. The function smp_processor_id() was called in SCSI host work queue for abort handling, and triggered the BUG message. This symptom was observed regardless of the commit 1130b499b4a7 ("scsi: target: tcm_loop: Use LIO wq cmd submission helper"). To avoid the preemptible code check at smp_processor_id(), get CPU ID with raw_smp_processor_id() instead. The CPU ID is used for performance improvement then thread move to other CPU will not affect the code. [1] [ 56.468103] run blktests block/005 at 2021-05-12 14:16:38 [ 57.369473] check_preemption_disabled: 85 callbacks suppressed [ 57.369480] BUG: using smp_processor_id() in preemptible [00000000] code: fio/1511 [ 57.369506] BUG: using smp_processor_id() in preemptible [00000000] code: fio/1510 [ 57.369512] BUG: using smp_processor_id() in preemptible [00000000] code: fio/1506 [ 57.369552] caller is __target_init_cmd+0x157/0x170 [target_core_mod] [ 57.369606] CPU: 4 PID: 1506 Comm: fio Not tainted 5.13.0-rc1+ #34 [ 57.369613] Hardware name: System manufacturer System Product Name/PRIME Z270-A, BIOS 1302 03/15/2018 [ 57.369617] Call Trace: [ 57.369621] BUG: using smp_processor_id() in preemptible [00000000] code: fio/1507 [ 57.369628] dump_stack+0x6d/0x89 [ 57.369642] check_preemption_disabled+0xc8/0xd0 [ 57.369628] caller is __target_init_cmd+0x157/0x170 [target_core_mod] [ 57.369655] __target_init_cmd+0x157/0x170 [target_core_mod] [ 57.369695] target_init_cmd+0x76/0x90 [target_core_mod] [ 57.369732] tcm_loop_queuecommand+0x109/0x210 [tcm_loop] [ 57.369744] scsi_queue_rq+0x38e/0xc40 [ 57.369761] __blk_mq_try_issue_directly+0x109/0x1c0 [ 57.369779] blk_mq_try_issue_directly+0x43/0x90 [ 57.369790] blk_mq_submit_bio+0x4e5/0x5d0 [ 57.369812] submit_bio_noacct+0x46e/0x4e0 [ 57.369830] __blkdev_direct_IO_simple+0x1a3/0x2d0 [ 57.369859] ? set_init_blocksize.isra.0+0x60/0x60 [ 57.369880] generic_file_read_iter+0x89/0x160 [ 57.369898] blkdev_read_iter+0x44/0x60 [ 57.369906] new_sync_read+0x102/0x170 [ 57.369929] vfs_read+0xd4/0x160 [ 57.369941] __x64_sys_pread64+0x6e/0xa0 [ 57.369946] ? lockdep_hardirqs_on+0x79/0x100 [ 57.369958] do_syscall_64+0x3a/0x70 [ 57.369965] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 57.369973] RIP: 0033:0x7f7ed4c1399f [ 57.369979] Code: 08 89 3c 24 48 89 4c 24 18 e8 7d f3 ff ff 4c 8b 54 24 18 48 8b 54 24 10 41 89 c0 48 8b 74 24 08 8b 3c 24 b8 11 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 04 24 e8 cd f3 ff ff 48 8b [ 57.369983] RSP: 002b:00007ffd7918c580 EFLAGS: 00000293 ORIG_RAX: 0000000000000011 [ 57.369990] RAX: ffffffffffffffda RBX: 00000000015b4540 RCX: 00007f7ed4c1399f [ 57.369993] RDX: 0000000000001000 RSI: 00000000015de000 RDI: 0000000000000009 [ 57.369996] RBP: 00000000015b4540 R08: 0000000000000000 R09: 0000000000000001 [ 57.369999] R10: 0000000000e5c000 R11: 0000000000000293 R12: 00007f7eb5269a70 [ 57.370002] R13: 0000000000000000 R14: 0000000000001000 R15: 00000000015b4568 [ 57.370031] CPU: 7 PID: 1507 Comm: fio Not tainted 5.13.0-rc1+ #34 [ 57.370036] Hardware name: System manufacturer System Product Name/PRIME Z270-A, BIOS 1302 03/15/2018 [ 57.370039] Call Trace: [ 57.370045] dump_stack+0x6d/0x89 [ 57.370056] check_preemption_disabled+0xc8/0xd0 [ 57.370068] __target_init_cmd+0x157/0x170 [target_core_mod] [ 57.370121] target_init_cmd+0x76/0x90 [target_core_mod] [ 57.370178] tcm_loop_queuecommand+0x109/0x210 [tcm_loop] [ 57.370197] scsi_queue_rq+0x38e/0xc40 [ 57.370224] __blk_mq_try_issue_directly+0x109/0x1c0 ... [2] [ 117.458597] BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u16:8 [ 117.467279] caller is __target_init_cmd+0x157/0x170 [target_core_mod] [ 117.473893] CPU: 1 PID: 418 Comm: kworker/u16:6 Not tainted 5.13.0-rc1+ #34 [ 117.481150] Hardware name: System manufacturer System Product Name/PRIME Z270-A, BIOS 8 [ 117.481153] Workqueue: scsi_tmf_7 scmd_eh_abort_handler [ 117.481156] Call Trace: [ 117.481158] dump_stack+0x6d/0x89 [ 117.481162] check_preemption_disabled+0xc8/0xd0 [ 117.512575] target_submit_tmr+0x41/0x150 [target_core_mod] [ 117.519705] tcm_loop_issue_tmr+0xa7/0x100 [tcm_loop] [ 117.524913] tcm_loop_abort_task+0x43/0x60 [tcm_loop] [ 117.530137] scmd_eh_abort_handler+0x7b/0x230 [ 117.534681] process_one_work+0x268/0x580 [ 117.538862] worker_thread+0x55/0x3b0 [ 117.542652] ? process_one_work+0x580/0x580 [ 117.548351] kthread+0x143/0x160 [ 117.551675] ? kthread_create_worker_on_cpu+0x40/0x40 [ 117.556873] ret_from_fork+0x1f/0x30 Link: https://lore.kernel.org/r/20210515070315.215801-1-shinichiro.kawasaki@wdc.com Fixes: 1526d9f10c61 ("scsi: target: Make state_list per CPU") Cc: stable@vger.kernel.org # v5.11+ Reviewed-by: Mike Christie Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Martin K. Petersen --- drivers/target/target_core_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 8fbfe75c5744..05d7ffd59df6 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1416,7 +1416,7 @@ void __target_init_cmd( cmd->orig_fe_lun = unpacked_lun; if (!(cmd->se_cmd_flags & SCF_USE_CPUID)) - cmd->cpuid = smp_processor_id(); + cmd->cpuid = raw_smp_processor_id(); cmd->state_active = false; } From b4150b68815e9e4447ce169224ed436b419f0153 Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Wed, 19 May 2021 15:54:40 +0200 Subject: [PATCH 499/730] scsi: target: tcmu: Fix xarray RCU warning Commit f5ce815f34bc ("scsi: target: tcmu: Support DATA_BLOCK_SIZE = N * PAGE_SIZE") introduced xas_next() calls to iterate xarray elements. These calls triggered the WARNING "suspicious RCU usage" at tcmu device set up [1]. In the call stack of xas_next(), xas_load() was called. According to its comment, this function requires "the xa_lock or the RCU lock". To avoid the warning: - Guard the small loop calling xas_next() in tcmu_get_empty_block with RCU lock. - In the large loop in tcmu_copy_data using RCU lock would possibly disable preemtion for a long time (copy multi MBs). Therefore replace XA_STATE, xas_set and xas_next with a single xa_load. [1] [ 1899.867091] ============================= [ 1899.871199] WARNING: suspicious RCU usage [ 1899.875310] 5.13.0-rc1+ #41 Not tainted [ 1899.879222] ----------------------------- [ 1899.883299] include/linux/xarray.h:1182 suspicious rcu_dereference_check() usage! [ 1899.890940] other info that might help us debug this: [ 1899.899082] rcu_scheduler_active = 2, debug_locks = 1 [ 1899.905719] 3 locks held by kworker/0:1/1368: [ 1899.910161] #0: ffffa1f8c8b98738 ((wq_completion)target_submission){+.+.}-{0:0}, at: process_one_work+0x1ee/0x580 [ 1899.920732] #1: ffffbd7040cd7e78 ((work_completion)(&q->sq.work)){+.+.}-{0:0}, at: process_one_work+0x1ee/0x580 [ 1899.931146] #2: ffffa1f8d1c99768 (&udev->cmdr_lock){+.+.}-{3:3}, at: tcmu_queue_cmd+0xea/0x160 [target_core_user] [ 1899.941678] stack backtrace: [ 1899.946093] CPU: 0 PID: 1368 Comm: kworker/0:1 Not tainted 5.13.0-rc1+ #41 [ 1899.953070] Hardware name: System manufacturer System Product Name/PRIME Z270-A, BIOS 1302 03/15/2018 [ 1899.962459] Workqueue: target_submission target_queued_submit_work [target_core_mod] [ 1899.970337] Call Trace: [ 1899.972839] dump_stack+0x6d/0x89 [ 1899.976222] xas_descend+0x10e/0x120 [ 1899.979875] xas_load+0x39/0x50 [ 1899.983077] tcmu_get_empty_blocks+0x115/0x1c0 [target_core_user] [ 1899.989318] queue_cmd_ring+0x1da/0x630 [target_core_user] [ 1899.994897] ? rcu_read_lock_sched_held+0x3f/0x70 [ 1899.999695] ? trace_kmalloc+0xa6/0xd0 [ 1900.003501] ? __kmalloc+0x205/0x380 [ 1900.007167] tcmu_queue_cmd+0x12f/0x160 [target_core_user] [ 1900.012746] __target_execute_cmd+0x23/0xa0 [target_core_mod] [ 1900.018589] transport_generic_new_cmd+0x1f3/0x370 [target_core_mod] [ 1900.025046] transport_handle_cdb_direct+0x34/0x50 [target_core_mod] [ 1900.031517] target_queued_submit_work+0x43/0xe0 [target_core_mod] [ 1900.037837] process_one_work+0x268/0x580 [ 1900.041952] ? process_one_work+0x580/0x580 [ 1900.046195] worker_thread+0x55/0x3b0 [ 1900.049921] ? process_one_work+0x580/0x580 [ 1900.054192] kthread+0x143/0x160 [ 1900.057499] ? kthread_create_worker_on_cpu+0x40/0x40 [ 1900.062661] ret_from_fork+0x1f/0x30 Link: https://lore.kernel.org/r/20210519135440.26773-1-bostroesser@gmail.com Fixes: f5ce815f34bc ("scsi: target: tcmu: Support DATA_BLOCK_SIZE = N * PAGE_SIZE") Reported-by: Shin'ichiro Kawasaki Tested-by: Shin'ichiro Kawasaki Signed-off-by: Bodo Stroesser Signed-off-by: Martin K. Petersen --- drivers/target/target_core_user.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 198d25ae482a..4bba10e7755a 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -516,8 +516,10 @@ static inline int tcmu_get_empty_block(struct tcmu_dev *udev, dpi = dbi * udev->data_pages_per_blk; /* Count the number of already allocated pages */ xas_set(&xas, dpi); + rcu_read_lock(); for (cnt = 0; xas_next(&xas) && cnt < page_cnt;) cnt++; + rcu_read_unlock(); for (i = cnt; i < page_cnt; i++) { /* try to get new page from the mm */ @@ -699,11 +701,10 @@ static inline void tcmu_copy_data(struct tcmu_dev *udev, struct scatterlist *sg, unsigned int sg_nents, struct iovec **iov, size_t data_len) { - XA_STATE(xas, &udev->data_pages, 0); /* start value of dbi + 1 must not be a valid dbi */ int dbi = -2; size_t page_remaining, cp_len; - int page_cnt, page_inx; + int page_cnt, page_inx, dpi; struct sg_mapping_iter sg_iter; unsigned int sg_flags; struct page *page; @@ -726,9 +727,10 @@ static inline void tcmu_copy_data(struct tcmu_dev *udev, if (page_cnt > udev->data_pages_per_blk) page_cnt = udev->data_pages_per_blk; - xas_set(&xas, dbi * udev->data_pages_per_blk); - for (page_inx = 0; page_inx < page_cnt && data_len; page_inx++) { - page = xas_next(&xas); + dpi = dbi * udev->data_pages_per_blk; + for (page_inx = 0; page_inx < page_cnt && data_len; + page_inx++, dpi++) { + page = xa_load(&udev->data_pages, dpi); if (direction == TCMU_DATA_AREA_TO_SG) flush_dcache_page(page); From 430bfe0576120b52cf7f62116bc7549180da4706 Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Thu, 20 May 2021 10:43:18 +0200 Subject: [PATCH 500/730] net: ethernet: mtk_eth_soc: Fix DIM support for MT7628/88 When updating to latest mainline for some testing on the GARDENA smart gateway based on the MT7628, I noticed that ethernet does not work any more. Commit e9229ffd550b ("net: ethernet: mtk_eth_soc: implement dynamic interrupt moderation") introduced this problem, as it missed the RX_DIM & TX_DIM configuration for this SoC variant. This patch fixes this by calling mtk_dim_rx() & mtk_dim_tx() in this case as well. Signed-off-by: Stefan Roese Fixes: e9229ffd550b ("net: ethernet: mtk_eth_soc: implement dynamic interrupt moderation") Cc: Felix Fietkau Cc: John Crispin Cc: Ilya Lipnitskiy Cc: Reto Schneider Cc: Reto Schneider Cc: David S. Miller Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index ed4eacef17ce..d6cc06ee0caa 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2423,7 +2423,8 @@ static void mtk_dim_rx(struct work_struct *work) val |= cur << MTK_PDMA_DELAY_RX_PINT_SHIFT; mtk_w32(eth, val, MTK_PDMA_DELAY_INT); - mtk_w32(eth, val, MTK_QDMA_DELAY_INT); + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) + mtk_w32(eth, val, MTK_QDMA_DELAY_INT); spin_unlock_bh(ð->dim_lock); @@ -2452,7 +2453,8 @@ static void mtk_dim_tx(struct work_struct *work) val |= cur << MTK_PDMA_DELAY_TX_PINT_SHIFT; mtk_w32(eth, val, MTK_PDMA_DELAY_INT); - mtk_w32(eth, val, MTK_QDMA_DELAY_INT); + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) + mtk_w32(eth, val, MTK_QDMA_DELAY_INT); spin_unlock_bh(ð->dim_lock); @@ -2480,6 +2482,10 @@ static int mtk_hw_init(struct mtk_eth *eth) goto err_disable_pm; } + /* set interrupt delays based on current Net DIM sample */ + mtk_dim_rx(ð->rx_dim.work); + mtk_dim_tx(ð->tx_dim.work); + /* disable delay and normal interrupt */ mtk_tx_irq_disable(eth, ~0); mtk_rx_irq_disable(eth, ~0); From 8c7e7b8486cda21269d393245883c5e4737d5ee7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 19 May 2021 17:20:27 +0300 Subject: [PATCH 501/730] scsi: libsas: Use _safe() loop in sas_resume_port() If sas_notify_lldd_dev_found() fails then this code calls: sas_unregister_dev(port, dev); which removes "dev", our list iterator, from the list. This could lead to an endless loop. We need to use list_for_each_entry_safe(). Link: https://lore.kernel.org/r/YKUeq6gwfGcvvhty@mwanda Fixes: 303694eeee5e ("[SCSI] libsas: suspend / resume support") Reviewed-by: John Garry Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c index 19cf418928fa..e3d03d744713 100644 --- a/drivers/scsi/libsas/sas_port.c +++ b/drivers/scsi/libsas/sas_port.c @@ -25,7 +25,7 @@ static bool phy_is_wideport_member(struct asd_sas_port *port, struct asd_sas_phy static void sas_resume_port(struct asd_sas_phy *phy) { - struct domain_device *dev; + struct domain_device *dev, *n; struct asd_sas_port *port = phy->port; struct sas_ha_struct *sas_ha = phy->ha; struct sas_internal *si = to_sas_internal(sas_ha->core.shost->transportt); @@ -44,7 +44,7 @@ static void sas_resume_port(struct asd_sas_phy *phy) * 1/ presume every device came back * 2/ force the next revalidation to check all expander phys */ - list_for_each_entry(dev, &port->dev_list, dev_list_node) { + list_for_each_entry_safe(dev, n, &port->dev_list, dev_list_node) { int i, rc; rc = sas_notify_lldd_dev_found(dev); From 5aaeca258f5540ca5cd4a56758ef03faacb7716d Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 19 May 2021 17:26:40 -0500 Subject: [PATCH 502/730] scsi: target: iblock: Fix smp_processor_id() BUG messages This has us use raw_smp_processor_id() in iblock's plug_device callout. smp_processor_id() is not needed here, because we are running from a per CPU work item that is also queued to run on a worker thread that is normally bound to a specific CPU. If the worker thread did end up switching CPUs then it's handled the same way we handle when the work got moved to a different CPU's worker thread, where we will just end up sending I/O from the new CPU. Link: https://lore.kernel.org/r/20210519222640.5153-1-michael.christie@oracle.com Fixes: 415ccd9811da ("scsi: target: iblock: Add backend plug/unplug callouts") Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_iblock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index d6fdd1c61f90..a526f9678c34 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -204,11 +204,11 @@ static struct se_dev_plug *iblock_plug_device(struct se_device *se_dev) struct iblock_dev_plug *ib_dev_plug; /* - * Each se_device has a per cpu work this can be run from. Wwe + * Each se_device has a per cpu work this can be run from. We * shouldn't have multiple threads on the same cpu calling this * at the same time. */ - ib_dev_plug = &ib_dev->ibd_plug[smp_processor_id()]; + ib_dev_plug = &ib_dev->ibd_plug[raw_smp_processor_id()]; if (test_and_set_bit(IBD_PLUGF_PLUGGED, &ib_dev_plug->flags)) return NULL; From e5bfaed7508fd34ae95a79d1eb76c38ecc82c947 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 21 May 2021 15:28:56 +0200 Subject: [PATCH 503/730] MAINTAINERS: s390/net: add netdev list Discussions for network-related code should include the netdev list. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index c1cb2e38ae2e..88722efd94a1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15944,6 +15944,7 @@ S390 IUCV NETWORK LAYER M: Julian Wiedmann M: Karsten Graul L: linux-s390@vger.kernel.org +L: netdev@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ F: drivers/s390/net/*iucv* @@ -15954,6 +15955,7 @@ S390 NETWORK DRIVERS M: Julian Wiedmann M: Karsten Graul L: linux-s390@vger.kernel.org +L: netdev@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ F: drivers/s390/net/ From fc516d3a6aa2c6ffe27d0da8818d13839e023e7e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 21 May 2021 10:46:14 -0700 Subject: [PATCH 504/730] net: dsa: bcm_sf2: Fix bcm_sf2_reg_rgmii_cntrl() call for non-RGMII port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We cannot call bcm_sf2_reg_rgmii_cntrl() for a port that is not RGMII, yet we do that in bcm_sf2_sw_mac_link_up() irrespective of the port's interface. Move that read until we have properly qualified the PHY interface mode. This avoids triggering a warning on 7278 platforms that have GMII ports. Fixes: 55cfeb396965 ("net: dsa: bcm_sf2: add function finding RGMII register") Signed-off-by: Florian Fainelli Acked-by: Rafał Miłecki Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 9150038b60cb..3b018fcf4412 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -821,11 +821,9 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port, bcm_sf2_sw_mac_link_set(ds, port, interface, true); if (port != core_readl(priv, CORE_IMP0_PRT_ID)) { - u32 reg_rgmii_ctrl; + u32 reg_rgmii_ctrl = 0; u32 reg, offset; - reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port); - if (priv->type == BCM4908_DEVICE_ID || priv->type == BCM7445_DEVICE_ID) offset = CORE_STS_OVERRIDE_GMIIP_PORT(port); @@ -836,6 +834,7 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port, interface == PHY_INTERFACE_MODE_RGMII_TXID || interface == PHY_INTERFACE_MODE_MII || interface == PHY_INTERFACE_MODE_REVMII) { + reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port); reg = reg_readl(priv, reg_rgmii_ctrl); reg &= ~(RX_PAUSE_EN | TX_PAUSE_EN); From c7718ee96dbc2f9c5fc3b578abdf296dd44b9c20 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Fri, 21 May 2021 16:45:58 +0200 Subject: [PATCH 505/730] net: lantiq: fix memory corruption in RX ring In a situation where memory allocation or dma mapping fails, an invalid address is programmed into the descriptor. This can lead to memory corruption. If the memory allocation fails, DMA should reuse the previous skb and mapping and drop the packet. This patch also increments rx drop counter. Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver ") Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 41c2ad210bc9..36dc3e5f6218 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -154,6 +154,7 @@ static int xrx200_close(struct net_device *net_dev) static int xrx200_alloc_skb(struct xrx200_chan *ch) { + dma_addr_t mapping; int ret = 0; ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(ch->priv->net_dev, @@ -163,16 +164,17 @@ static int xrx200_alloc_skb(struct xrx200_chan *ch) goto skip; } - ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(ch->priv->dev, - ch->skb[ch->dma.desc]->data, XRX200_DMA_DATA_LEN, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(ch->priv->dev, - ch->dma.desc_base[ch->dma.desc].addr))) { + mapping = dma_map_single(ch->priv->dev, ch->skb[ch->dma.desc]->data, + XRX200_DMA_DATA_LEN, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) { dev_kfree_skb_any(ch->skb[ch->dma.desc]); ret = -ENOMEM; goto skip; } + ch->dma.desc_base[ch->dma.desc].addr = mapping; + /* Make sure the address is written before we give it to HW */ + wmb(); skip: ch->dma.desc_base[ch->dma.desc].ctl = LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) | @@ -196,6 +198,8 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) ch->dma.desc %= LTQ_DESC_NUM; if (ret) { + ch->skb[ch->dma.desc] = skb; + net_dev->stats.rx_dropped++; netdev_err(net_dev, "failed to allocate new rx buffer\n"); return ret; } From 29bf1993fdba17703a836cf098712cf15f96706d Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Sat, 22 May 2021 00:01:00 +0300 Subject: [PATCH 506/730] MAINTAINERS: remove Ioana Radulescu from dpaa2-eth Remove Ioana Radulescu from dpaa2-eth since she is no longer working on the DPAA2 set of drivers. Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 88722efd94a1..251111e5da53 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5569,7 +5569,6 @@ F: drivers/soc/fsl/dpio DPAA2 ETHERNET DRIVER M: Ioana Ciornei -M: Ioana Radulescu L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/device_drivers/ethernet/freescale/dpaa2/ethernet-driver.rst From aced3ce57cd37b5ca332bcacd370d01f5a8c5371 Mon Sep 17 00:00:00 2001 From: Rao Shoaib Date: Fri, 21 May 2021 11:08:06 -0700 Subject: [PATCH 507/730] RDS tcp loopback connection can hang When TCP is used as transport and a program on the system connects to RDS port 16385, connection is accepted but denied per the rules of RDS. However, RDS connections object is left in the list. Next loopback connection will select that connection object as it is at the head of list. The connection attempt will hang as the connection object is set to connect over TCP which is not allowed The issue can be reproduced easily, use rds-ping to ping a local IP address. After that use any program like ncat to connect to the same IP address and port 16385. This will hang so ctrl-c out. Now try rds-ping, it will hang. To fix the issue this patch adds checks to disallow the connection object creation and destroys the connection object. Signed-off-by: Rao Shoaib Signed-off-by: David S. Miller --- net/rds/connection.c | 23 +++++++++++++++++------ net/rds/tcp.c | 4 ++-- net/rds/tcp.h | 3 ++- net/rds/tcp_listen.c | 6 ++++++ 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index f2fcab182095..a3bc4b54d491 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -240,12 +240,23 @@ static struct rds_connection *__rds_conn_create(struct net *net, if (loop_trans) { rds_trans_put(loop_trans); conn->c_loopback = 1; - if (is_outgoing && trans->t_prefer_loopback) { - /* "outgoing" connection - and the transport - * says it wants the connection handled by the - * loopback transport. This is what TCP does. - */ - trans = &rds_loop_transport; + if (trans->t_prefer_loopback) { + if (likely(is_outgoing)) { + /* "outgoing" connection to local address. + * Protocol says it wants the connection + * handled by the loopback transport. + * This is what TCP does. + */ + trans = &rds_loop_transport; + } else { + /* No transport currently in use + * should end up here, but if it + * does, reset/destroy the connection. + */ + kmem_cache_free(rds_conn_slab, conn); + conn = ERR_PTR(-EOPNOTSUPP); + goto out; + } } } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 43db0eca911f..abf19c0e3ba0 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -313,8 +313,8 @@ out: } #endif -static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, - __u32 scope_id) +int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, + __u32 scope_id) { struct net_device *dev = NULL; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/rds/tcp.h b/net/rds/tcp.h index bad9cf49d565..dc8d745d6857 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -59,7 +59,8 @@ u32 rds_tcp_snd_una(struct rds_tcp_connection *tc); u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq); extern struct rds_transport rds_tcp_transport; void rds_tcp_accept_work(struct sock *sk); - +int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, + __u32 scope_id); /* tcp_connect.c */ int rds_tcp_conn_path_connect(struct rds_conn_path *cp); void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 101cf14215a0..09cadd556d1e 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -167,6 +167,12 @@ int rds_tcp_accept_one(struct socket *sock) } #endif + if (!rds_tcp_laddr_check(sock_net(sock->sk), peer_addr, dev_if)) { + /* local address connection is only allowed via loopback */ + ret = -EOPNOTSUPP; + goto out; + } + conn = rds_conn_create(sock_net(sock->sk), my_addr, peer_addr, &rds_tcp_transport, 0, GFP_KERNEL, dev_if); From 8f03eeb6e0a0a0b8d617ee0a4bce729e47130036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Fri, 21 May 2021 16:38:35 +0200 Subject: [PATCH 508/730] net:sfc: fix non-freed irq in legacy irq mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SFC driver can be configured via modparam to work using MSI-X, MSI or legacy IRQ interrupts. In the last one, the interrupt was not properly released on module remove. It was not freed because the flag irqs_hooked was not set during initialization in the case of using legacy IRQ. Example of (trimmed) trace during module remove without this fix: remove_proc_entry: removing non-empty directory 'irq/125', leaking at least '0000:3b:00.1' WARNING: CPU: 39 PID: 3658 at fs/proc/generic.c:715 remove_proc_entry+0x15c/0x170 ...trimmed... Call Trace: unregister_irq_proc+0xe3/0x100 free_desc+0x29/0x70 irq_free_descs+0x47/0x70 mp_unmap_irq+0x58/0x60 acpi_unregister_gsi_ioapic+0x2a/0x40 acpi_pci_irq_disable+0x78/0xb0 pci_disable_device+0xd1/0x100 efx_pci_remove+0xa1/0x1e0 [sfc] pci_device_remove+0x38/0xa0 __device_release_driver+0x177/0x230 driver_detach+0xcb/0x110 bus_remove_driver+0x58/0xd0 pci_unregister_driver+0x2a/0xb0 efx_exit_module+0x24/0xf40 [sfc] __do_sys_delete_module.constprop.0+0x171/0x280 ? exit_to_user_mode_prepare+0x83/0x1d0 do_syscall_64+0x3d/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f9f9385800b ...trimmed... Signed-off-by: Íñigo Huguet Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/nic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index d1e908846f5d..22fbb0ae77fb 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -90,6 +90,7 @@ int efx_nic_init_interrupt(struct efx_nic *efx) efx->pci_dev->irq); goto fail1; } + efx->irqs_hooked = true; return 0; } From e29f011e8fc04b2cdc742a2b9bbfa1b62518381a Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Fri, 21 May 2021 13:21:14 -0700 Subject: [PATCH 509/730] ipv6: record frag_max_size in atomic fragments in input path Commit dbd1759e6a9c ("ipv6: on reassembly, record frag_max_size") filled the frag_max_size field in IP6CB in the input path. The field should also be filled in case of atomic fragments. Fixes: dbd1759e6a9c ('ipv6: on reassembly, record frag_max_size') Signed-off-by: Francesco Ruggeri Signed-off-by: David S. Miller --- net/ipv6/reassembly.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 47a0dc46cbdb..28e44782c94d 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -343,7 +343,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - if (!(fhdr->frag_off & htons(0xFFF9))) { + if (!(fhdr->frag_off & htons(IP6_OFFSET | IP6_MF))) { /* It is not a fragmented frame */ skb->transport_header += sizeof(struct frag_hdr); __IP6_INC_STATS(net, @@ -351,6 +351,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb) IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; + IP6CB(skb)->frag_max_size = ntohs(hdr->payload_len) + + sizeof(struct ipv6hdr); return 1; } From 7e7606330b167a0ff483fb02caed9267bfab69ee Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 17 May 2021 06:24:51 -0700 Subject: [PATCH 510/730] scsi: aic7xxx: Restore several defines for aic7xxx firmware build With CONFIG_AIC7XXX_BUILD_FIRMWARE, there is this representative error: aicasm: Stopped at file ./drivers/scsi/aic7xxx/aic7xxx.seq, line 271 - Undefined symbol MSG_SIMPLE_Q_TAG referenced MSG_SIMPLE_Q_TAG used to be defined in drivers/scsi/aic7xxx/scsi_message.h as: #define MSG_SIMPLE_Q_TAG 0x20 /* O/O */ The new definition in include/scsi/scsi.h is: #define SIMPLE_QUEUE_TAG 0x20 But aicasm can not handle the all the preprocessor directives in scsi.h, so add MSG_SIMPLE_Q_TAB and other required defines back to scsi_message.h. Link: https://lore.kernel.org/r/20210517132451.1832233-1-trix@redhat.com Fixes: d8cd784ff7b3 ("scsi: aic7xxx: aic79xx: Drop internal SCSI message definition" Signed-off-by: Tom Rix Signed-off-by: Martin K. Petersen --- drivers/scsi/aic7xxx/scsi_message.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/scsi/aic7xxx/scsi_message.h b/drivers/scsi/aic7xxx/scsi_message.h index a7515c3039ed..53343a6d8ae1 100644 --- a/drivers/scsi/aic7xxx/scsi_message.h +++ b/drivers/scsi/aic7xxx/scsi_message.h @@ -3,6 +3,17 @@ * $FreeBSD: src/sys/cam/scsi/scsi_message.h,v 1.2 2000/05/01 20:21:29 peter Exp $ */ +/* Messages (1 byte) */ /* I/T (M)andatory or (O)ptional */ +#define MSG_SAVEDATAPOINTER 0x02 /* O/O */ +#define MSG_RESTOREPOINTERS 0x03 /* O/O */ +#define MSG_DISCONNECT 0x04 /* O/O */ +#define MSG_MESSAGE_REJECT 0x07 /* M/M */ +#define MSG_NOOP 0x08 /* M/M */ + +/* Messages (2 byte) */ +#define MSG_SIMPLE_Q_TAG 0x20 /* O/O */ +#define MSG_IGN_WIDE_RESIDUE 0x23 /* O/O */ + /* Identify message */ /* M/M */ #define MSG_IDENTIFYFLAG 0x80 #define MSG_IDENTIFY_DISCFLAG 0x40 From b4de11dfb569043be2cb38b2b1031e64f8ee0ff6 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 17 May 2021 13:50:57 -0700 Subject: [PATCH 511/730] scsi: aic7xxx: Remove multiple definition of globals Building aicasm with gcc 10.2 + gas 26.1 causes these errors: multiple definition of `args'; multiple definition of `yylineno'; args came from the expansion of: STAILQ_HEAD(macro_arg_list, macro_arg) args; The definition of the macro_arg_list structure is needed, the global variable 'args' is not, so delete it. yylineno is defined by flex, so defining it in bison/*.y file is not needed. Also delete this. Link: https://lore.kernel.org/r/20210517205057.1850010-1-trix@redhat.com Signed-off-by: Tom Rix Signed-off-by: Martin K. Petersen --- drivers/scsi/aic7xxx/aicasm/aicasm_gram.y | 1 - drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm_gram.y b/drivers/scsi/aic7xxx/aicasm/aicasm_gram.y index 924d55a8acbf..65182ad9cdf8 100644 --- a/drivers/scsi/aic7xxx/aicasm/aicasm_gram.y +++ b/drivers/scsi/aic7xxx/aicasm/aicasm_gram.y @@ -58,7 +58,6 @@ #include "aicasm_symbol.h" #include "aicasm_insformat.h" -int yylineno; char *yyfilename; char stock_prefix[] = "aic_"; char *prefix = stock_prefix; diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h b/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h index 7bf7fd5953ac..ed3bdd43c297 100644 --- a/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h +++ b/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h @@ -108,7 +108,7 @@ struct macro_arg { regex_t arg_regex; char *replacement_text; }; -STAILQ_HEAD(macro_arg_list, macro_arg) args; +STAILQ_HEAD(macro_arg_list, macro_arg); struct macro_info { struct macro_arg_list args; From 122c81c563b0c1c6b15ff76a9159af5ee1f21563 Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Tue, 18 May 2021 23:14:16 -0700 Subject: [PATCH 512/730] scsi: bnx2fc: Return failure if io_req is already in ABTS processing Return failure from bnx2fc_eh_abort() if io_req is already in ABTS processing. Link: https://lore.kernel.org/r/20210519061416.19321-1-jhasan@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Javed Hasan Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 1a0dc18d6915..ed300a279a38 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -1220,6 +1220,7 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd) was a result from the ABTS request rather than the CLEANUP request */ set_bit(BNX2FC_FLAG_IO_CLEANUP, &io_req->req_flags); + rc = FAILED; goto done; } From e662502b3a782d479e67736a5a1c169a703d853a Mon Sep 17 00:00:00 2001 From: Matt Wang Date: Wed, 19 May 2021 09:49:32 +0000 Subject: [PATCH 513/730] scsi: vmw_pvscsi: Set correct residual data length Some commands (such as INQUIRY) may return less data than the initiator requested. To avoid conducting useless information, set the right residual count to make upper layer aware of this. Before (INQUIRY PAGE 0xB0 with 128B buffer): $ sg_raw -r 128 /dev/sda 12 01 B0 00 80 00 SCSI Status: Good Received 128 bytes of data: 00 00 b0 00 3c 01 00 00 00 00 00 00 00 00 00 00 00 ...<............ 10 00 00 00 00 00 01 00 00 00 00 00 40 00 00 08 00 ...........@.... 20 80 00 00 00 00 00 00 00 00 00 20 00 00 00 00 00 .......... ..... 30 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 40 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 50 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 60 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 70 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ After: $ sg_raw -r 128 /dev/sda 12 01 B0 00 80 00 SCSI Status: Good Received 64 bytes of data: 00 00 b0 00 3c 01 00 00 00 00 00 00 00 00 00 00 00 ...<............ 10 00 00 00 00 00 01 00 00 00 00 00 40 00 00 08 00 ...........@.... 20 80 00 00 00 00 00 00 00 00 00 20 00 00 00 00 00 .......... ..... 30 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [mkp: clarified description] Link: https://lore.kernel.org/r/03C41093-B62E-43A2-913E-CFC92F1C70C3@vmware.com Signed-off-by: Matt Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/vmw_pvscsi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index 8a79605d9652..b9969fce6b4d 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -585,7 +585,13 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter, case BTSTAT_SUCCESS: case BTSTAT_LINKED_COMMAND_COMPLETED: case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG: - /* If everything went fine, let's move on.. */ + /* + * Commands like INQUIRY may transfer less data than + * requested by the initiator via bufflen. Set residual + * count to make upper layer aware of the actual amount + * of data returned. + */ + scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); cmd->result = (DID_OK << 16); break; From 7907a021e4bbfa29cccacd2ba2dade894d9a7d4c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 19 May 2021 21:05:19 +0800 Subject: [PATCH 514/730] scsi: hisi_sas: Drop free_irq() of devm_request_irq() allocated irq irqs allocated with devm_request_irq() should not be freed using free_irq(). Doing so causes a dangling pointer and a subsequent double free. Link: https://lore.kernel.org/r/20210519130519.2661938-1-yangyingliang@huawei.com Reported-by: Hulk Robot Acked-by: John Garry Signed-off-by: Yang Yingliang Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 499c770d405c..e95408314078 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -4811,14 +4811,14 @@ hisi_sas_v3_destroy_irqs(struct pci_dev *pdev, struct hisi_hba *hisi_hba) { int i; - free_irq(pci_irq_vector(pdev, 1), hisi_hba); - free_irq(pci_irq_vector(pdev, 2), hisi_hba); - free_irq(pci_irq_vector(pdev, 11), hisi_hba); + devm_free_irq(&pdev->dev, pci_irq_vector(pdev, 1), hisi_hba); + devm_free_irq(&pdev->dev, pci_irq_vector(pdev, 2), hisi_hba); + devm_free_irq(&pdev->dev, pci_irq_vector(pdev, 11), hisi_hba); for (i = 0; i < hisi_hba->cq_nvecs; i++) { struct hisi_sas_cq *cq = &hisi_hba->cq[i]; int nr = hisi_sas_intr_conv ? 16 : 16 + i; - free_irq(pci_irq_vector(pdev, nr), cq); + devm_free_irq(&pdev->dev, pci_irq_vector(pdev, nr), cq); } pci_free_irq_vectors(pdev); } From 2ef7665dfd88830f15415ba007c7c9a46be7acd8 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Thu, 15 Apr 2021 23:35:54 +0300 Subject: [PATCH 515/730] scsi: target: qla2xxx: Wait for stop_phase1 at WWN removal Target de-configuration panics at high CPU load because TPGT and WWPN can be removed on separate threads. TPGT removal requests a reset HBA on a separate thread and waits for reset complete (phase1). Due to high CPU load that HBA reset can be delayed for some time. WWPN removal does qlt_stop_phase2(). There it is believed that phase1 has already completed and thus tgt.tgt_ops is subsequently cleared. However, tgt.tgt_ops is needed to process incoming traffic and therefore this will cause one of the following panics: NIP qlt_reset+0x7c/0x220 [qla2xxx] LR qlt_reset+0x68/0x220 [qla2xxx] Call Trace: 0xc000003ffff63a78 (unreliable) qlt_handle_imm_notify+0x800/0x10c0 [qla2xxx] qlt_24xx_atio_pkt+0x208/0x590 [qla2xxx] qlt_24xx_process_atio_queue+0x33c/0x7a0 [qla2xxx] qla83xx_msix_atio_q+0x54/0x90 [qla2xxx] or NIP qlt_24xx_handle_abts+0xd0/0x2a0 [qla2xxx] LR qlt_24xx_handle_abts+0xb4/0x2a0 [qla2xxx] Call Trace: qlt_24xx_handle_abts+0x90/0x2a0 [qla2xxx] (unreliable) qlt_24xx_process_atio_queue+0x500/0x7a0 [qla2xxx] qla83xx_msix_atio_q+0x54/0x90 [qla2xxx] or NIP qlt_create_sess+0x90/0x4e0 [qla2xxx] LR qla24xx_do_nack_work+0xa8/0x180 [qla2xxx] Call Trace: 0xc0000000348fba30 (unreliable) qla24xx_do_nack_work+0xa8/0x180 [qla2xxx] qla2x00_do_work+0x674/0xbf0 [qla2xxx] qla2x00_iocb_work_fn The patch fixes the issue by serializing qlt_stop_phase1() and qlt_stop_phase2() functions to make WWPN removal wait for phase1 completion. Link: https://lore.kernel.org/r/20210415203554.27890-1-d.bogdanov@yadro.com Reviewed-by: Roman Bolshakov Signed-off-by: Dmitry Bogdanov Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index b2008fb1dd38..12a6848ade43 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1563,10 +1563,12 @@ void qlt_stop_phase2(struct qla_tgt *tgt) return; } + mutex_lock(&tgt->ha->optrom_mutex); mutex_lock(&vha->vha_tgt.tgt_mutex); tgt->tgt_stop = 0; tgt->tgt_stopped = 1; mutex_unlock(&vha->vha_tgt.tgt_mutex); + mutex_unlock(&tgt->ha->optrom_mutex); ql_dbg(ql_dbg_tgt_mgt, vha, 0xf00c, "Stop of tgt %p finished\n", tgt); From 119b75c150773425a89033215eab4d15d4198f8b Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Sat, 22 May 2021 11:47:41 +0800 Subject: [PATCH 516/730] ALSA: hda/realtek: Headphone volume is controlled by Front mixer On some ASUS and MSI machines, the audio codec is alc1220 and the Headphone is connected to audio mixer 0xf and DAC 0x5, in theory the Headphone volume is controlled by DAC 0x5 (Heapdhone Playback Volume), but somehow it is controlled by DAC 0x2 (Front Playback Volume), maybe this is a defect on the codec alc1220. Because of this issue, the PA couldn't switch the headphone and Lineout correctly, If we apply the quirk CLEVO_P950 to those machines, the Lineout and Headphone will share the audio mixer 0xc and DAC 0x2, and generate Headphone+LO mixer, then PA could handle them when switching between them. BugLink: https://gitlab.freedesktop.org/pipewire/pipewire/-/issues/1206 Cc: Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20210522034741.13415-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 552e2cb73291..ffaeb8d3c316 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2603,6 +2603,28 @@ static const struct hda_model_fixup alc882_fixup_models[] = { {} }; +static const struct snd_hda_pin_quirk alc882_pin_fixup_tbl[] = { + SND_HDA_PIN_QUIRK(0x10ec1220, 0x1043, "ASUS", ALC1220_FIXUP_CLEVO_P950, + {0x14, 0x01014010}, + {0x15, 0x01011012}, + {0x16, 0x01016011}, + {0x18, 0x01a19040}, + {0x19, 0x02a19050}, + {0x1a, 0x0181304f}, + {0x1b, 0x0221401f}, + {0x1e, 0x01456130}), + SND_HDA_PIN_QUIRK(0x10ec1220, 0x1462, "MS-7C35", ALC1220_FIXUP_CLEVO_P950, + {0x14, 0x01015010}, + {0x15, 0x01011012}, + {0x16, 0x01011011}, + {0x18, 0x01a11040}, + {0x19, 0x02a19050}, + {0x1a, 0x0181104f}, + {0x1b, 0x0221401f}, + {0x1e, 0x01451130}), + {} +}; + /* * BIOS auto configuration */ @@ -2644,6 +2666,7 @@ static int patch_alc882(struct hda_codec *codec) snd_hda_pick_fixup(codec, alc882_fixup_models, alc882_fixup_tbl, alc882_fixups); + snd_hda_pick_pin_fixup(codec, alc882_pin_fixup_tbl, alc882_fixups, true); snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_PRE_PROBE); alc_auto_parse_customize_define(codec); From 9ebaef0540a981093bce5df15af32354d32391d9 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Sat, 22 May 2021 12:26:45 +0800 Subject: [PATCH 517/730] ALSA: hda/realtek: the bass speaker can't output sound on Yoga 9i The Lenovo Yoga 9i has bass speaker, but the bass speaker can't work, that is because there is an i2s amplifier on that speaker, need to run ideapad_s740_coef() to initialize the amplifier. And also needs to apply ALC285_FIXUP_THINKPAD_HEADSET_JACK to rename the speaker's mixer control name, otherwise the PA can't handle them. BugLink: http://bugs.launchpad.net/bugs/1926165 Signed-off-by: Hui Wang Cc: Link: https://lore.kernel.org/r/20210522042645.14221-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ffaeb8d3c316..6571c3713732 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6566,6 +6566,7 @@ enum { ALC295_FIXUP_ASUS_DACS, ALC295_FIXUP_HP_OMEN, ALC285_FIXUP_HP_SPECTRE_X360, + ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, }; static const struct hda_fixup alc269_fixups[] = { @@ -8132,6 +8133,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC285_FIXUP_SPEAKER2_TO_DAC1, }, + [ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_ideapad_s740_coef, + .chained = true, + .chain_id = ALC285_FIXUP_THINKPAD_HEADSET_JACK, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8500,6 +8507,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME), SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF), + SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), @@ -8715,6 +8723,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"}, {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, + {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {} }; #define ALC225_STANDARD_PINS \ From 2a54c8c9ebc2006bf72554afc84ffc67768979a0 Mon Sep 17 00:00:00 2001 From: Rui Miguel Silva Date: Wed, 12 May 2021 23:39:29 +0100 Subject: [PATCH 518/730] iio: gyro: fxas21002c: balance runtime power in error path If we fail to read temperature or axis we need to decrement the runtime pm reference count to trigger autosuspend. Add the call to pm_put to do that in case of error. Fixes: a0701b6263ae ("iio: gyro: add core driver for fxas21002c") Suggested-by: Mauro Carvalho Chehab Signed-off-by: Rui Miguel Silva Link: https://lore.kernel.org/linux-iio/CBBZA9T1OY9C.2611WSV49DV2G@arch-thunder/ Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/fxas21002c_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/gyro/fxas21002c_core.c b/drivers/iio/gyro/fxas21002c_core.c index 1a20c6b88e7d..645461c70454 100644 --- a/drivers/iio/gyro/fxas21002c_core.c +++ b/drivers/iio/gyro/fxas21002c_core.c @@ -399,6 +399,7 @@ static int fxas21002c_temp_get(struct fxas21002c_data *data, int *val) ret = regmap_field_read(data->regmap_fields[F_TEMP], &temp); if (ret < 0) { dev_err(dev, "failed to read temp: %d\n", ret); + fxas21002c_pm_put(data); goto data_unlock; } @@ -432,6 +433,7 @@ static int fxas21002c_axis_get(struct fxas21002c_data *data, &axis_be, sizeof(axis_be)); if (ret < 0) { dev_err(dev, "failed to read axis: %d: %d\n", index, ret); + fxas21002c_pm_put(data); goto data_unlock; } From 98b7b0ca0828907dbb706387c11356a45463e2ea Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 12:56:49 +0300 Subject: [PATCH 519/730] iio: dac: ad5770r: Put fwnode in error case during ->probe() device_for_each_child_node() bumps a reference counting of a returned variable. We have to balance it whenever we return to the caller. Fixes: cbbb819837f6 ("iio: dac: ad5770r: Add AD5770R support") Cc: Alexandru Tachici Signed-off-by: Andy Shevchenko Reviewed-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210510095649.3302835-1-andy.shevchenko@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5770r.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/iio/dac/ad5770r.c b/drivers/iio/dac/ad5770r.c index 7ab2ccf90863..8107f7bbbe3c 100644 --- a/drivers/iio/dac/ad5770r.c +++ b/drivers/iio/dac/ad5770r.c @@ -524,23 +524,29 @@ static int ad5770r_channel_config(struct ad5770r_state *st) device_for_each_child_node(&st->spi->dev, child) { ret = fwnode_property_read_u32(child, "num", &num); if (ret) - return ret; - if (num >= AD5770R_MAX_CHANNELS) - return -EINVAL; + goto err_child_out; + if (num >= AD5770R_MAX_CHANNELS) { + ret = -EINVAL; + goto err_child_out; + } ret = fwnode_property_read_u32_array(child, "adi,range-microamp", tmp, 2); if (ret) - return ret; + goto err_child_out; min = tmp[0] / 1000; max = tmp[1] / 1000; ret = ad5770r_store_output_range(st, min, max, num); if (ret) - return ret; + goto err_child_out; } + return 0; + +err_child_out: + fwnode_handle_put(child); return ret; } From a1caeebab07e9d72eec534489f47964782b93ba9 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sat, 1 May 2021 17:53:13 +0100 Subject: [PATCH 520/730] iio: adc: ad7768-1: Fix too small buffer passed to iio_push_to_buffers_with_timestamp() Add space for the timestamp to be inserted. Also ensure correct alignment for passing to iio_push_to_buffers_with_timestamp() Fixes: a5f8c7da3dbe ("iio: adc: Add AD7768-1 ADC basic support") Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210501165314.511954-2-jic23@kernel.org Cc: --- drivers/iio/adc/ad7768-1.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index c945f1349623..60f21fed6dcb 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -167,6 +167,10 @@ struct ad7768_state { * transfer buffers to live in their own cache lines. */ union { + struct { + __be32 chan; + s64 timestamp; + } scan; __be32 d32; u8 d8[2]; } data ____cacheline_aligned; @@ -469,11 +473,11 @@ static irqreturn_t ad7768_trigger_handler(int irq, void *p) mutex_lock(&st->lock); - ret = spi_read(st->spi, &st->data.d32, 3); + ret = spi_read(st->spi, &st->data.scan.chan, 3); if (ret < 0) goto err_unlock; - iio_push_to_buffers_with_timestamp(indio_dev, &st->data.d32, + iio_push_to_buffers_with_timestamp(indio_dev, &st->data.scan, iio_get_time_ns(indio_dev)); iio_trigger_notify_done(indio_dev->trig); From 01fcf129f61b26d5b3d2d8afb03e770dee271bc8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sat, 1 May 2021 17:53:14 +0100 Subject: [PATCH 521/730] iio: adc: ad7923: Fix undersized rx buffer. Fixes tag is where the max channels became 8, but timestamp space was missing before that. Fixes: 851644a60d20 ("iio: adc: ad7923: Add support for the ad7908/ad7918/ad7928") Signed-off-by: Jonathan Cameron Cc: Daniel Junho Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210501165314.511954-3-jic23@kernel.org Cc: --- drivers/iio/adc/ad7923.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7923.c b/drivers/iio/adc/ad7923.c index 9a649745cd0a..069b561ee768 100644 --- a/drivers/iio/adc/ad7923.c +++ b/drivers/iio/adc/ad7923.c @@ -59,8 +59,10 @@ struct ad7923_state { /* * DMA (thus cache coherency maintenance) requires the * transfer buffers to live in their own cache lines. + * Ensure rx_buf can be directly used in iio_push_to_buffers_with_timetamp + * Length = 8 channels + 4 extra for 8 byte timestamp */ - __be16 rx_buf[4] ____cacheline_aligned; + __be16 rx_buf[12] ____cacheline_aligned; __be16 tx_buf[4]; }; From 4ed243b1da169bcbc1ec5507867e56250c5f1ff9 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 14 May 2021 16:02:54 +0800 Subject: [PATCH 522/730] iio: adc: ad7793: Add missing error code in ad7793_setup() Set error code while device ID query failed. Fixes: 88bc30548aae ("IIO: ADC: New driver for AD7792/AD7793 3 Channel SPI ADC") Signed-off-by: YueHaibing Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7793.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c index 5e980a06258e..440ef4c7be07 100644 --- a/drivers/iio/adc/ad7793.c +++ b/drivers/iio/adc/ad7793.c @@ -279,6 +279,7 @@ static int ad7793_setup(struct iio_dev *indio_dev, id &= AD7793_ID_MASK; if (id != st->chip_info->id) { + ret = -ENODEV; dev_err(&st->sd.spi->dev, "device ID query failed\n"); goto out; } From f9f74dc218c3cfdf0b7f9a95ddae81a081bdb79d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 22 May 2021 17:41:43 -0700 Subject: [PATCH 523/730] mm/shuffle: fix section mismatch warning clang sometimes decides not to inline shuffle_zone(), but it calls a __meminit function. Without the extra __meminit annotation we get this warning: WARNING: modpost: vmlinux.o(.text+0x2a86d4): Section mismatch in reference from the function shuffle_zone() to the function .meminit.text:__shuffle_zone() The function shuffle_zone() references the function __meminit __shuffle_zone(). This is often because shuffle_zone lacks a __meminit annotation or the annotation of __shuffle_zone is wrong. shuffle_free_memory() did not show the same problem in my tests, but it could happen in theory as well, so mark both as __meminit. Link: https://lkml.kernel.org/r/20210514135952.2928094-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: David Hildenbrand Reviewed-by: Nathan Chancellor Cc: Nick Desaulniers Cc: Arnd Bergmann Cc: Wei Yang Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shuffle.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/shuffle.h b/mm/shuffle.h index 71b784f0b7c3..cec62984f7d3 100644 --- a/mm/shuffle.h +++ b/mm/shuffle.h @@ -10,7 +10,7 @@ DECLARE_STATIC_KEY_FALSE(page_alloc_shuffle_key); extern void __shuffle_free_memory(pg_data_t *pgdat); extern bool shuffle_pick_tail(void); -static inline void shuffle_free_memory(pg_data_t *pgdat) +static inline void __meminit shuffle_free_memory(pg_data_t *pgdat) { if (!static_branch_unlikely(&page_alloc_shuffle_key)) return; @@ -18,7 +18,7 @@ static inline void shuffle_free_memory(pg_data_t *pgdat) } extern void __shuffle_zone(struct zone *z); -static inline void shuffle_zone(struct zone *z) +static inline void __meminit shuffle_zone(struct zone *z) { if (!static_branch_unlikely(&page_alloc_shuffle_key)) return; From f10628d2f613195132532e0fbda439eeed8d12a2 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Sat, 22 May 2021 17:41:46 -0700 Subject: [PATCH 524/730] Revert "mm/gup: check page posion status for coredump." While reviewing [1] I came across commit d3378e86d182 ("mm/gup: check page posion status for coredump.") and noticed that this patch is broken in two ways. First it doesn't really prevent hwpoison pages from being dumped because hwpoison pages can be marked asynchornously at any time after the check. Secondly, and more importantly, the patch introduces a ref count leak because get_dump_page takes a reference on the page which is not released. It also seems that the patch was merged incorrectly because there were follow up changes not included as well as discussions on how to address the underlying problem [2] Therefore revert the original patch. Link: http://lkml.kernel.org/r/20210429122519.15183-4-david@redhat.com [1] Link: http://lkml.kernel.org/r/57ac524c-b49a-99ec-c1e4-ef5027bfb61b@redhat.com [2] Link: https://lkml.kernel.org/r/20210505135407.31590-1-mhocko@kernel.org Fixes: d3378e86d182 ("mm/gup: check page posion status for coredump.") Signed-off-by: Michal Hocko Reviewed-by: David Hildenbrand Cc: Aili Yao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup.c | 4 ---- mm/internal.h | 20 -------------------- 2 files changed, 24 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 0697134b6a12..3ded6a5f26b2 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1593,10 +1593,6 @@ struct page *get_dump_page(unsigned long addr) FOLL_FORCE | FOLL_DUMP | FOLL_GET); if (locked) mmap_read_unlock(mm); - - if (ret == 1 && is_page_poisoned(page)) - return NULL; - return (ret == 1) ? page : NULL; } #endif /* CONFIG_ELF_CORE */ diff --git a/mm/internal.h b/mm/internal.h index 54bd0dc2c23c..2f1182948aa6 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -96,26 +96,6 @@ static inline void set_page_refcounted(struct page *page) set_page_count(page, 1); } -/* - * When kernel touch the user page, the user page may be have been marked - * poison but still mapped in user space, if without this page, the kernel - * can guarantee the data integrity and operation success, the kernel is - * better to check the posion status and avoid touching it, be good not to - * panic, coredump for process fatal signal is a sample case matching this - * scenario. Or if kernel can't guarantee the data integrity, it's better - * not to call this function, let kernel touch the poison page and get to - * panic. - */ -static inline bool is_page_poisoned(struct page *page) -{ - if (PageHWPoison(page)) - return true; - else if (PageHuge(page) && PageHWPoison(compound_head(page))) - return true; - - return false; -} - extern unsigned long highest_memmap_pfn; /* From a11ddb37bf367e6b5239b95ca759e5389bb46048 Mon Sep 17 00:00:00 2001 From: Varad Gautam Date: Sat, 22 May 2021 17:41:49 -0700 Subject: [PATCH 525/730] ipc/mqueue, msg, sem: avoid relying on a stack reference past its expiry do_mq_timedreceive calls wq_sleep with a stack local address. The sender (do_mq_timedsend) uses this address to later call pipelined_send. This leads to a very hard to trigger race where a do_mq_timedreceive call might return and leave do_mq_timedsend to rely on an invalid address, causing the following crash: RIP: 0010:wake_q_add_safe+0x13/0x60 Call Trace: __x64_sys_mq_timedsend+0x2a9/0x490 do_syscall_64+0x80/0x680 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f5928e40343 The race occurs as: 1. do_mq_timedreceive calls wq_sleep with the address of `struct ext_wait_queue` on function stack (aliased as `ewq_addr` here) - it holds a valid `struct ext_wait_queue *` as long as the stack has not been overwritten. 2. `ewq_addr` gets added to info->e_wait_q[RECV].list in wq_add, and do_mq_timedsend receives it via wq_get_first_waiter(info, RECV) to call __pipelined_op. 3. Sender calls __pipelined_op::smp_store_release(&this->state, STATE_READY). Here is where the race window begins. (`this` is `ewq_addr`.) 4. If the receiver wakes up now in do_mq_timedreceive::wq_sleep, it will see `state == STATE_READY` and break. 5. do_mq_timedreceive returns, and `ewq_addr` is no longer guaranteed to be a `struct ext_wait_queue *` since it was on do_mq_timedreceive's stack. (Although the address may not get overwritten until another function happens to touch it, which means it can persist around for an indefinite time.) 6. do_mq_timedsend::__pipelined_op() still believes `ewq_addr` is a `struct ext_wait_queue *`, and uses it to find a task_struct to pass to the wake_q_add_safe call. In the lucky case where nothing has overwritten `ewq_addr` yet, `ewq_addr->task` is the right task_struct. In the unlucky case, __pipelined_op::wake_q_add_safe gets handed a bogus address as the receiver's task_struct causing the crash. do_mq_timedsend::__pipelined_op() should not dereference `this` after setting STATE_READY, as the receiver counterpart is now free to return. Change __pipelined_op to call wake_q_add_safe on the receiver's task_struct returned by get_task_struct, instead of dereferencing `this` which sits on the receiver's stack. As Manfred pointed out, the race potentially also exists in ipc/msg.c::expunge_all and ipc/sem.c::wake_up_sem_queue_prepare. Fix those in the same way. Link: https://lkml.kernel.org/r/20210510102950.12551-1-varad.gautam@suse.com Fixes: c5b2cbdbdac563 ("ipc/mqueue.c: update/document memory barriers") Fixes: 8116b54e7e23ef ("ipc/sem.c: document and update memory barriers") Fixes: 0d97a82ba830d8 ("ipc/msg.c: update and document memory barriers") Signed-off-by: Varad Gautam Reported-by: Matthias von Faber Acked-by: Davidlohr Bueso Acked-by: Manfred Spraul Cc: Christian Brauner Cc: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/mqueue.c | 6 ++++-- ipc/msg.c | 6 ++++-- ipc/sem.c | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 8031464ed4ae..4e4e61111500 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -1004,12 +1004,14 @@ static inline void __pipelined_op(struct wake_q_head *wake_q, struct mqueue_inode_info *info, struct ext_wait_queue *this) { + struct task_struct *task; + list_del(&this->list); - get_task_struct(this->task); + task = get_task_struct(this->task); /* see MQ_BARRIER for purpose/pairing */ smp_store_release(&this->state, STATE_READY); - wake_q_add_safe(wake_q, this->task); + wake_q_add_safe(wake_q, task); } /* pipelined_send() - send a message directly to the task waiting in diff --git a/ipc/msg.c b/ipc/msg.c index acd1bc7af55a..6e6c8e0c9380 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -251,11 +251,13 @@ static void expunge_all(struct msg_queue *msq, int res, struct msg_receiver *msr, *t; list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { - get_task_struct(msr->r_tsk); + struct task_struct *r_tsk; + + r_tsk = get_task_struct(msr->r_tsk); /* see MSG_BARRIER for purpose/pairing */ smp_store_release(&msr->r_msg, ERR_PTR(res)); - wake_q_add_safe(wake_q, msr->r_tsk); + wake_q_add_safe(wake_q, r_tsk); } } diff --git a/ipc/sem.c b/ipc/sem.c index e0ec239680cb..bf534c74293e 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -784,12 +784,14 @@ would_block: static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, struct wake_q_head *wake_q) { - get_task_struct(q->sleeper); + struct task_struct *sleeper; + + sleeper = get_task_struct(q->sleeper); /* see SEM_BARRIER_2 for purpose/pairing */ smp_store_release(&q->status, error); - wake_q_add_safe(wake_q, q->sleeper); + wake_q_add_safe(wake_q, sleeper); } static void unlink_queue(struct sem_array *sma, struct sem_queue *q) From 4d1cd3b2c5c1c32826454de3a18c6183238d47ed Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 22 May 2021 17:41:53 -0700 Subject: [PATCH 526/730] tools/testing/selftests/exec: fix link error Fix the link error by adding '-static': gcc -Wall -Wl,-z,max-page-size=0x1000 -pie load_address.c -o /home/yang/linux/tools/testing/selftests/exec/load_address_4096 /usr/bin/ld: /tmp/ccopEGun.o: relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol `stderr@@GLIBC_2.17' which may bind externally can not be used when making a shared object; recompile with -fPIC /usr/bin/ld: /tmp/ccopEGun.o(.text+0x158): unresolvable R_AARCH64_ADR_PREL_PG_HI21 relocation against symbol `stderr@@GLIBC_2.17' /usr/bin/ld: final link failed: bad value collect2: error: ld returned 1 exit status make: *** [Makefile:25: tools/testing/selftests/exec/load_address_4096] Error 1 Link: https://lkml.kernel.org/r/20210514092422.2367367-1-yangyingliang@huawei.com Fixes: 206e22f01941 ("tools/testing/selftests: add self-test for verifying load alignment") Signed-off-by: Yang Yingliang Cc: Chris Kennelly Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/exec/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index cf69b2fcce59..dd61118df66e 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -28,8 +28,8 @@ $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat cp $< $@ chmod -x $@ $(OUTPUT)/load_address_4096: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@ $(OUTPUT)/load_address_2097152: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@ $(OUTPUT)/load_address_16777216: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@ From f70b00496f2a0669fdb19a783e613bdbdedcf901 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Sat, 22 May 2021 17:41:56 -0700 Subject: [PATCH 527/730] kasan: slab: always reset the tag in get_freepointer_safe() With CONFIG_DEBUG_PAGEALLOC enabled, the kernel should also untag the object pointer, as done in get_freepointer(). Failing to do so reportedly leads to SLUB freelist corruptions that manifest as boot-time crashes. Link: https://lkml.kernel.org/r/20210514072228.534418-1-glider@google.com Signed-off-by: Alexander Potapenko Cc: Marco Elver Cc: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Andrey Konovalov Cc: Elliot Berman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/slub.c b/mm/slub.c index 438fa8d4c970..3f96e099817a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -301,6 +301,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) if (!debug_pagealloc_enabled_static()) return get_freepointer(s, object); + object = kasan_reset_tag(object); freepointer_addr = (unsigned long)object + s->offset; copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p)); return freelist_ptr(s, p, freepointer_addr); From 0f90b88dbcd1143e0f408502eba0af97429c502a Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Sat, 22 May 2021 17:41:59 -0700 Subject: [PATCH 528/730] watchdog: reliable handling of timestamps Commit 9bf3bc949f8a ("watchdog: cleanup handling of false positives") tried to handle a virtual host stopped by the host a more straightforward and cleaner way. But it introduced a risk of false softlockup reports. The virtual host might be stopped at any time, for example between kvm_check_and_clear_guest_paused() and is_softlockup(). As a result, is_softlockup() might read the updated jiffies and detects a softlockup. A solution might be to put back kvm_check_and_clear_guest_paused() after is_softlockup() and detect it. But it would put back the cycle that complicates the logic. In fact, the handling of all the timestamps is not reliable. The code does not guarantee when and how many times the timestamps are read. For example, "period_ts" might be touched anytime also from NMI and re-read in is_softlockup(). It works just by chance. Fix all the problems by making the code even more explicit. 1. Make sure that "now" and "period_ts" timestamps are read only once. They might be changed at anytime by NMI or when the virtual guest is stopped by the host. Note that "now" timestamp does this implicitly because "jiffies" is marked volatile. 2. "now" time must be read first. The state of "period_ts" will decide whether it will be used or the period will get restarted. 3. kvm_check_and_clear_guest_paused() must be called before reading "period_ts". It touches the variable when the guest was stopped. As a result, "now" timestamp is used only when the watchdog was not touched and the guest not stopped in the meantime. "period_ts" is restarted in all other situations. Link: https://lkml.kernel.org/r/YKT55gw+RZfyoFf7@alley Fixes: 9bf3bc949f8aeefeacea4b ("watchdog: cleanup handling of false positives") Signed-off-by: Petr Mladek Reported-by: Sergey Senozhatsky Reviewed-by: Sergey Senozhatsky Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7c397907d0e9..92d3bcc5a5e0 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -302,10 +302,10 @@ void touch_softlockup_watchdog_sync(void) __this_cpu_write(watchdog_report_ts, SOFTLOCKUP_DELAY_REPORT); } -static int is_softlockup(unsigned long touch_ts, unsigned long period_ts) +static int is_softlockup(unsigned long touch_ts, + unsigned long period_ts, + unsigned long now) { - unsigned long now = get_timestamp(); - if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){ /* Warn about unreasonable delays. */ if (time_after(now, period_ts + get_softlockup_thresh())) @@ -353,8 +353,7 @@ static int softlockup_fn(void *data) /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { - unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); - unsigned long period_ts = __this_cpu_read(watchdog_report_ts); + unsigned long touch_ts, period_ts, now; struct pt_regs *regs = get_irq_regs(); int duration; int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; @@ -376,12 +375,23 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* .. and repeat */ hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); + /* + * Read the current timestamp first. It might become invalid anytime + * when a virtual machine is stopped by the host or when the watchog + * is touched from NMI. + */ + now = get_timestamp(); /* * If a virtual machine is stopped by the host it can look to - * the watchdog like a soft lockup. Check to see if the host - * stopped the vm before we process the timestamps. + * the watchdog like a soft lockup. This function touches the watchdog. */ kvm_check_and_clear_guest_paused(); + /* + * The stored timestamp is comparable with @now only when not touched. + * It might get touched anytime from NMI. Make sure that is_softlockup() + * uses the same (valid) value. + */ + period_ts = READ_ONCE(*this_cpu_ptr(&watchdog_report_ts)); /* Reset the interval when touched by known problematic code. */ if (period_ts == SOFTLOCKUP_DELAY_REPORT) { @@ -398,13 +408,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) return HRTIMER_RESTART; } - /* check for a softlockup - * This is done by making sure a high priority task is - * being scheduled. The task touches the watchdog to - * indicate it is getting cpu time. If it hasn't then - * this is a good indication some task is hogging the cpu - */ - duration = is_softlockup(touch_ts, period_ts); + /* Check for a softlockup. */ + touch_ts = __this_cpu_read(watchdog_touch_ts); + duration = is_softlockup(touch_ts, period_ts, now); if (unlikely(duration)) { /* * Prevent multiple soft-lockup reports if one cpu is already From f747e6667ebb2ffb8133486c9cd19800d72b0d98 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sat, 22 May 2021 17:42:02 -0700 Subject: [PATCH 529/730] linux/bits.h: fix compilation error with GENMASK GENMASK() has an input check which uses __builtin_choose_expr() to enable a compile time sanity check of its inputs if they are known at compile time. However, it turns out that __builtin_constant_p() does not always return a compile time constant [0]. It was thought this problem was fixed with gcc 4.9 [1], but apparently this is not the case [2]. Switch to use __is_constexpr() instead which always returns a compile time constant, regardless of its inputs. Link: https://lore.kernel.org/lkml/42b4342b-aefc-a16a-0d43-9f9c0d63ba7a@rasmusvillemoes.dk [0] Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=19449 [1] Link: https://lore.kernel.org/lkml/1ac7bbc2-45d9-26ed-0b33-bf382b8d858b@I-love.SAKURA.ne.jp [2] Link: https://lkml.kernel.org/r/20210511203716.117010-1-rikard.falkeborn@gmail.com Signed-off-by: Rikard Falkeborn Reported-by: Tetsuo Handa Acked-by: Arnd Bergmann Reviewed-by: Andy Shevchenko Cc: Ard Biesheuvel Cc: Yury Norov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bits.h | 2 +- include/linux/const.h | 8 ++++++++ include/linux/minmax.h | 10 ++-------- tools/include/linux/bits.h | 2 +- tools/include/linux/const.h | 8 ++++++++ 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/include/linux/bits.h b/include/linux/bits.h index 7f475d59a097..87d112650dfb 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -22,7 +22,7 @@ #include #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __builtin_constant_p((l) > (h)), (l) > (h), 0))) + __is_constexpr((l) > (h)), (l) > (h), 0))) #else /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, diff --git a/include/linux/const.h b/include/linux/const.h index 81b8aae5a855..435ddd72d2c4 100644 --- a/include/linux/const.h +++ b/include/linux/const.h @@ -3,4 +3,12 @@ #include +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + #endif /* _LINUX_CONST_H */ diff --git a/include/linux/minmax.h b/include/linux/minmax.h index c0f57b0c64d9..5433c08fcc68 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -2,6 +2,8 @@ #ifndef _LINUX_MINMAX_H #define _LINUX_MINMAX_H +#include + /* * min()/max()/clamp() macros must accomplish three things: * @@ -17,14 +19,6 @@ #define __typecheck(x, y) \ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) -/* - * This returns a constant expression while determining if an argument is - * a constant expression, most importantly without evaluating the argument. - * Glory to Martin Uecker - */ -#define __is_constexpr(x) \ - (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) - #define __no_side_effects(x, y) \ (__is_constexpr(x) && __is_constexpr(y)) diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 7f475d59a097..87d112650dfb 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -22,7 +22,7 @@ #include #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __builtin_constant_p((l) > (h)), (l) > (h), 0))) + __is_constexpr((l) > (h)), (l) > (h), 0))) #else /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h index 81b8aae5a855..435ddd72d2c4 100644 --- a/tools/include/linux/const.h +++ b/tools/include/linux/const.h @@ -3,4 +3,12 @@ #include +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + #endif /* _LINUX_CONST_H */ From 43b2ec977ce33b53e2be30999824b584e2be248a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 22 May 2021 17:42:05 -0700 Subject: [PATCH 530/730] proc: remove Alexey from MAINTAINERS People Cc me and I don't have time. Link: https://lkml.kernel.org/r/YKarMxHJBIhMHQIh@localhost.localdomain Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index def1640cc294..81e1edeceae4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14735,7 +14735,6 @@ W: https://wireless.wiki.kernel.org/en/users/Drivers/p54 F: drivers/net/wireless/intersil/prism54/ PROC FILESYSTEM -R: Alexey Dobriyan L: linux-kernel@vger.kernel.org L: linux-fsdevel@vger.kernel.org S: Maintained From 1b6d63938a9d868df01d5bc6e2da212133121b8d Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 22 May 2021 17:42:08 -0700 Subject: [PATCH 531/730] lib: kunit: suppress a compilation warning of frame size lib/bitfield_kunit.c: In function `test_bitfields_constants': lib/bitfield_kunit.c:93:1: warning: the frame size of 7456 bytes is larger than 2048 bytes [-Wframe-larger-than=] } ^ As the description of BITFIELD_KUNIT in lib/Kconfig.debug, it "Only useful for kernel devs running the KUnit test harness, and not intended for inclusion into a production build". Therefore, it is not worth modifying variable 'test_bitfields_constants' to clear this warning. Just suppress it. Link: https://lkml.kernel.org/r/20210518094533.7652-1-thunder.leizhen@huawei.com Signed-off-by: Zhen Lei Cc: Shuah Khan Cc: Vitor Massaru Iha Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Makefile b/lib/Makefile index e11cfc18b6c0..2cc359ec1fdd 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -348,6 +348,7 @@ obj-$(CONFIG_OBJAGG) += objagg.o obj-$(CONFIG_PLDMFW) += pldmfw/ # KUnit tests +CFLAGS_bitfield_kunit.o := $(call cc-option,-Wframe-larger-than=10240) obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o From e32905e57358fdfb82f9de024534f205b3af7dac Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Sat, 22 May 2021 17:42:11 -0700 Subject: [PATCH 532/730] userfaultfd: hugetlbfs: fix new flag usage in error path In commit d6995da31122 ("hugetlb: use page.private for hugetlb specific page flags") the use of PagePrivate to indicate a reservation count should be restored at free time was changed to the hugetlb specific flag HPageRestoreReserve. Changes to a userfaultfd error path as well as a VM_BUG_ON() in remove_inode_hugepages() were overlooked. Users could see incorrect hugetlb reserve counts if they experience an error with a UFFDIO_COPY operation. Specifically, this would be the result of an unlikely copy_huge_page_from_user error. There is not an increased chance of hitting the VM_BUG_ON. Link: https://lkml.kernel.org/r/20210521233952.236434-1-mike.kravetz@oracle.com Fixes: d6995da31122 ("hugetlb: use page.private for hugetlb specific page flags") Signed-off-by: Mike Kravetz Reviewed-by: Mina Almasry Cc: Oscar Salvador Cc: Michal Hocko Cc: Muchun Song Cc: Naoya Horiguchi Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mina Almasry Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 2 +- mm/userfaultfd.c | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9d9e0097c1d3..55efd3dd04f6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -529,7 +529,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, * the subpool and global reserve usage count can need * to be adjusted. */ - VM_BUG_ON(PagePrivate(page)); + VM_BUG_ON(HPageRestoreReserve(page)); remove_huge_page(page); freed++; if (!truncate_op) { diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index e14b3820c6a8..63a73e164d55 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -360,38 +360,38 @@ out: * If a reservation for the page existed in the reservation * map of a private mapping, the map was modified to indicate * the reservation was consumed when the page was allocated. - * We clear the PagePrivate flag now so that the global + * We clear the HPageRestoreReserve flag now so that the global * reserve count will not be incremented in free_huge_page. * The reservation map will still indicate the reservation * was consumed and possibly prevent later page allocation. * This is better than leaking a global reservation. If no - * reservation existed, it is still safe to clear PagePrivate - * as no adjustments to reservation counts were made during - * allocation. + * reservation existed, it is still safe to clear + * HPageRestoreReserve as no adjustments to reservation counts + * were made during allocation. * * The reservation map for shared mappings indicates which * pages have reservations. When a huge page is allocated * for an address with a reservation, no change is made to - * the reserve map. In this case PagePrivate will be set - * to indicate that the global reservation count should be + * the reserve map. In this case HPageRestoreReserve will be + * set to indicate that the global reservation count should be * incremented when the page is freed. This is the desired * behavior. However, when a huge page is allocated for an * address without a reservation a reservation entry is added - * to the reservation map, and PagePrivate will not be set. - * When the page is freed, the global reserve count will NOT - * be incremented and it will appear as though we have leaked - * reserved page. In this case, set PagePrivate so that the - * global reserve count will be incremented to match the - * reservation map entry which was created. + * to the reservation map, and HPageRestoreReserve will not be + * set. When the page is freed, the global reserve count will + * NOT be incremented and it will appear as though we have + * leaked reserved page. In this case, set HPageRestoreReserve + * so that the global reserve count will be incremented to + * match the reservation map entry which was created. * * Note that vm_alloc_shared is based on the flags of the vma * for which the page was originally allocated. dst_vma could * be different or NULL on error. */ if (vm_alloc_shared) - SetPagePrivate(page); + SetHPageRestoreReserve(page); else - ClearPagePrivate(page); + ClearHPageRestoreReserve(page); put_page(page); } BUG_ON(copied < 0); From eac2f3059e02382d91f8c887462083841d6ea2a3 Mon Sep 17 00:00:00 2001 From: Chen Huang Date: Thu, 29 Apr 2021 07:03:48 +0000 Subject: [PATCH 533/730] riscv: stacktrace: fix the riscv stacktrace when CONFIG_FRAME_POINTER enabled As [1] and [2] said, the arch_stack_walk should not to trace itself, or it will leave the trace unexpectedly when called. The example is when we do "cat /sys/kernel/debug/page_owner", all pages' stack is the same. arch_stack_walk+0x18/0x20 stack_trace_save+0x40/0x60 register_dummy_stack+0x24/0x5e init_page_owner+0x2e So we use __builtin_frame_address(1) as the first frame to be walked. And mark the arch_stack_walk() noinline. We found that pr_cont will affact pages' stack whose task state is RUNNING when testing "echo t > /proc/sysrq-trigger". So move the place of pr_cont and mark the function dump_backtrace() noinline. Also we move the case when task == NULL into else branch, and test for it in "echo c > /proc/sysrq-trigger". [1] https://lore.kernel.org/lkml/20210319184106.5688-1-mark.rutland@arm.com/ [2] https://lore.kernel.org/lkml/20210317142050.57712-1-chenjun102@huawei.com/ Signed-off-by: Chen Huang Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 2b3e0cb90d78..bde85fc53357 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -27,10 +27,10 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame_pointer(regs); sp = user_stack_pointer(regs); pc = instruction_pointer(regs); - } else if (task == NULL || task == current) { - fp = (unsigned long)__builtin_frame_address(0); - sp = sp_in_global; - pc = (unsigned long)walk_stackframe; + } else if (task == current) { + fp = (unsigned long)__builtin_frame_address(1); + sp = (unsigned long)__builtin_frame_address(0); + pc = (unsigned long)__builtin_return_address(0); } else { /* task blocked in __switch_to */ fp = task->thread.s[0]; @@ -106,15 +106,15 @@ static bool print_trace_address(void *arg, unsigned long pc) return true; } -void dump_backtrace(struct pt_regs *regs, struct task_struct *task, +noinline void dump_backtrace(struct pt_regs *regs, struct task_struct *task, const char *loglvl) { - pr_cont("%sCall Trace:\n", loglvl); walk_stackframe(task, regs, print_trace_address, (void *)loglvl); } void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { + pr_cont("%sCall Trace:\n", loglvl); dump_backtrace(NULL, task, loglvl); } @@ -139,7 +139,7 @@ unsigned long get_wchan(struct task_struct *task) #ifdef CONFIG_STACKTRACE -void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, +noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { walk_stackframe(task, regs, consume_entry, cookie); From 97a031082320897ee5b06352d0ab3d7cf47321d3 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 7 May 2021 17:47:15 +0800 Subject: [PATCH 534/730] riscv: Select ARCH_USE_MEMTEST As of commit dce44566192e ("mm/memtest: add ARCH_USE_MEMTEST"), architectures must select ARCH_USE_MEMTESET to enable CONFIG_MEMTEST. Signed-off-by: Kefeng Wang Fixes: f6e5aedf470b ("riscv: Add support for memtest") Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a8ad8eb76120..c5914e70a0fd 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -34,6 +34,7 @@ config RISCV select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_SUPPORTS_HUGETLBFS if MMU + select ARCH_USE_MEMTEST select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if 64BIT From 02ccdeed1817a587161ad091887e11ac8a2586b2 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sat, 8 May 2021 23:43:47 +0800 Subject: [PATCH 535/730] riscv: kprobes: Fix build error when MMU=n lkp reported a randconfig failure: arch/riscv/kernel/probes/kprobes.c:90:22: error: use of undeclared identifier 'PAGE_KERNEL_READ_EXEC' We implemented the alloc_insn_page() to allocate PAGE_KERNEL_READ_EXEC page for kprobes insn page for STRICT_MODULE_RWX. But if MMU=n, we should fall back to the generic weak alloc_insn_page() by generic kprobe subsystem. Fixes: cdd1b2bd358f ("riscv: kprobes: Implement alloc_insn_page()") Signed-off-by: Jisheng Zhang Reported-by: kernel test robot Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/kprobes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 10b965c34536..15cc65ac7ca6 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -84,6 +84,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } +#ifdef CONFIG_MMU void *alloc_insn_page(void) { return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, @@ -91,6 +92,7 @@ void *alloc_insn_page(void) VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __builtin_return_address(0)); } +#endif /* install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) From bab0d47c0ebb50ae0bcfa4e84986a60113bf7d6b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sun, 9 May 2021 00:44:43 +0800 Subject: [PATCH 536/730] riscv: kexec: Fix W=1 build warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 build warning(s): In file included from include/linux/kexec.h:28, from arch/riscv/kernel/machine_kexec.c:7: arch/riscv/include/asm/kexec.h:45:1: warning: ‘extern’ is not at beginning of declaration [-Wold-style-declaration] 45 | const extern unsigned char riscv_kexec_relocate[]; | ^~~~~ arch/riscv/include/asm/kexec.h:46:1: warning: ‘extern’ is not at beginning of declaration [-Wold-style-declaration] 46 | const extern unsigned int riscv_kexec_relocate_size; | ^~~~~ arch/riscv/kernel/machine_kexec.c:125:6: warning: no previous prototype for ‘machine_shutdown’ [-Wmissing-prototypes] 125 | void machine_shutdown(void) | ^~~~~~~~~~~~~~~~ arch/riscv/kernel/machine_kexec.c:147:1: warning: no previous prototype for ‘machine_crash_shutdown’ [-Wmissing-prototypes] 147 | machine_crash_shutdown(struct pt_regs *regs) | ^~~~~~~~~~~~~~~~~~~~~~ arch/riscv/kernel/machine_kexec.c:23: warning: Function parameter or member 'image' not described in 'kexec_image_info' arch/riscv/kernel/machine_kexec.c:53: warning: Function parameter or member 'image' not described in 'machine_kexec_prepare' arch/riscv/kernel/machine_kexec.c:114: warning: Function parameter or member 'image' not described in 'machine_kexec_cleanup' arch/riscv/kernel/machine_kexec.c:148: warning: Function parameter or member 'regs' not described in 'machine_crash_shutdown' arch/riscv/kernel/machine_kexec.c:167: warning: Function parameter or member 'image' not described in 'machine_kexec' Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kexec.h | 4 ++-- arch/riscv/kernel/machine_kexec.c | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index 1e954101906a..e4e291d40759 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -42,8 +42,8 @@ struct kimage_arch { unsigned long fdt_addr; }; -const extern unsigned char riscv_kexec_relocate[]; -const extern unsigned int riscv_kexec_relocate_size; +extern const unsigned char riscv_kexec_relocate[]; +extern const unsigned int riscv_kexec_relocate_size; typedef void (*riscv_kexec_method)(unsigned long first_ind_entry, unsigned long jump_addr, diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index cc048143fba5..9e99e1db156b 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -14,8 +14,9 @@ #include /* For set_memory_x() */ #include /* For unreachable() */ #include /* For cpu_down() */ +#include -/** +/* * kexec_image_info - Print received image details */ static void @@ -39,7 +40,7 @@ kexec_image_info(const struct kimage *image) } } -/** +/* * machine_kexec_prepare - Initialize kexec * * This function is called from do_kexec_load, when the user has @@ -100,7 +101,7 @@ machine_kexec_prepare(struct kimage *image) } -/** +/* * machine_kexec_cleanup - Cleanup any leftovers from * machine_kexec_prepare * @@ -135,7 +136,7 @@ void machine_shutdown(void) #endif } -/** +/* * machine_crash_shutdown - Prepare to kexec after a kernel crash * * This function is called by crash_kexec just before machine_kexec @@ -151,7 +152,7 @@ machine_crash_shutdown(struct pt_regs *regs) pr_info("Starting crashdump kernel...\n"); } -/** +/* * machine_kexec - Jump to the loaded kimage * * This function is called by kernel_kexec which is called by the From 2b899f31f1a6db2db4608bac2ac04fe2c4ad89eb Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Sun, 23 May 2021 02:09:00 +0800 Subject: [PATCH 537/730] ALSA: usb-audio: scarlett2: snd_scarlett_gen2_controls_create() can be static sound/usb/mixer_scarlett_gen2.c:2000:5: warning: symbol 'snd_scarlett_gen2_controls_create' was not declared. Should it be static? Fixes: 265d1a90e4fb ("ALSA: usb-audio: scarlett2: Improve driver startup messages") Reported-by: kernel test robot Signed-off-by: kernel test robot Link: https://lore.kernel.org/r/20210522180900.GA83915@f59a3af2f1d9 Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 3ad8f61a2095..4caf379d5b99 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -1997,8 +1997,8 @@ static int scarlett2_mixer_status_create(struct usb_mixer_interface *mixer) return usb_submit_urb(mixer->urb, GFP_KERNEL); } -int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer, - const struct scarlett2_device_info *info) +static int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer, + const struct scarlett2_device_info *info) { int err; From c4681547bcce777daf576925a966ffa824edd09d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 23 May 2021 11:42:48 -1000 Subject: [PATCH 538/730] Linux 5.13-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0ed7e061c8e9..e4468353425a 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Frozen Wasteland # *DOCUMENTATION* From 1e69abf98921fa27e2064970b614502d85230f9f Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 21 May 2021 17:46:54 -0700 Subject: [PATCH 539/730] MAINTAINERS: Add entries for CBS, ETF and taprio qdiscs Add Vinicius Costa Gomes as maintainer for these qdiscs. These qdiscs are all TSN (Time Sensitive Networking) related. Signed-off-by: Vinicius Costa Gomes Acked-by: Cong Wang Signed-off-by: David S. Miller --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 251111e5da53..2cc1cb72bc92 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4138,6 +4138,14 @@ S: Odd Fixes F: Documentation/devicetree/bindings/arm/cavium-thunder2.txt F: arch/arm64/boot/dts/cavium/thunder2-99xx* +CBS/ETF/TAPRIO QDISCS +M: Vinicius Costa Gomes +S: Maintained +L: netdev@vger.kernel.org +F: net/sched/sch_cbs.c +F: net/sched/sch_etf.c +F: net/sched/sch_taprio.c + CC2520 IEEE-802.15.4 RADIO DRIVER M: Varka Bhadram L: linux-wpan@vger.kernel.org From ad79fd2c42f7626bdf6935cd72134c2a5a59ff2d Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Sat, 22 May 2021 09:56:30 +0200 Subject: [PATCH 540/730] net: ethernet: mtk_eth_soc: Fix packet statistics support for MT7628/88 The MT7628/88 SoC(s) have other (limited) packet counter registers than currently supported in the mtk_eth_soc driver. This patch adds support for reading these registers, so that the packet statistics are correctly updated. Additionally the defines for the non-MT7628 variant packet counter registers are added and used in this patch instead of using hard coded values. Signed-off-by: Stefan Roese Fixes: 296c9120752b ("net: ethernet: mediatek: Add MT7628/88 SoC support") Cc: Felix Fietkau Cc: John Crispin Cc: Ilya Lipnitskiy Cc: Reto Schneider Cc: Reto Schneider Cc: David S. Miller Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 67 ++++++++++++++------- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 24 +++++++- 2 files changed, 66 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index d6cc06ee0caa..64adfd24e134 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -681,32 +681,53 @@ static int mtk_set_mac_address(struct net_device *dev, void *p) void mtk_stats_update_mac(struct mtk_mac *mac) { struct mtk_hw_stats *hw_stats = mac->hw_stats; - unsigned int base = MTK_GDM1_TX_GBCNT; - u64 stats; - - base += hw_stats->reg_offset; + struct mtk_eth *eth = mac->hw; u64_stats_update_begin(&hw_stats->syncp); - hw_stats->rx_bytes += mtk_r32(mac->hw, base); - stats = mtk_r32(mac->hw, base + 0x04); - if (stats) - hw_stats->rx_bytes += (stats << 32); - hw_stats->rx_packets += mtk_r32(mac->hw, base + 0x08); - hw_stats->rx_overflow += mtk_r32(mac->hw, base + 0x10); - hw_stats->rx_fcs_errors += mtk_r32(mac->hw, base + 0x14); - hw_stats->rx_short_errors += mtk_r32(mac->hw, base + 0x18); - hw_stats->rx_long_errors += mtk_r32(mac->hw, base + 0x1c); - hw_stats->rx_checksum_errors += mtk_r32(mac->hw, base + 0x20); - hw_stats->rx_flow_control_packets += - mtk_r32(mac->hw, base + 0x24); - hw_stats->tx_skip += mtk_r32(mac->hw, base + 0x28); - hw_stats->tx_collisions += mtk_r32(mac->hw, base + 0x2c); - hw_stats->tx_bytes += mtk_r32(mac->hw, base + 0x30); - stats = mtk_r32(mac->hw, base + 0x34); - if (stats) - hw_stats->tx_bytes += (stats << 32); - hw_stats->tx_packets += mtk_r32(mac->hw, base + 0x38); + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) { + hw_stats->tx_packets += mtk_r32(mac->hw, MT7628_SDM_TPCNT); + hw_stats->tx_bytes += mtk_r32(mac->hw, MT7628_SDM_TBCNT); + hw_stats->rx_packets += mtk_r32(mac->hw, MT7628_SDM_RPCNT); + hw_stats->rx_bytes += mtk_r32(mac->hw, MT7628_SDM_RBCNT); + hw_stats->rx_checksum_errors += + mtk_r32(mac->hw, MT7628_SDM_CS_ERR); + } else { + unsigned int offs = hw_stats->reg_offset; + u64 stats; + + hw_stats->rx_bytes += mtk_r32(mac->hw, + MTK_GDM1_RX_GBCNT_L + offs); + stats = mtk_r32(mac->hw, MTK_GDM1_RX_GBCNT_H + offs); + if (stats) + hw_stats->rx_bytes += (stats << 32); + hw_stats->rx_packets += + mtk_r32(mac->hw, MTK_GDM1_RX_GPCNT + offs); + hw_stats->rx_overflow += + mtk_r32(mac->hw, MTK_GDM1_RX_OERCNT + offs); + hw_stats->rx_fcs_errors += + mtk_r32(mac->hw, MTK_GDM1_RX_FERCNT + offs); + hw_stats->rx_short_errors += + mtk_r32(mac->hw, MTK_GDM1_RX_SERCNT + offs); + hw_stats->rx_long_errors += + mtk_r32(mac->hw, MTK_GDM1_RX_LENCNT + offs); + hw_stats->rx_checksum_errors += + mtk_r32(mac->hw, MTK_GDM1_RX_CERCNT + offs); + hw_stats->rx_flow_control_packets += + mtk_r32(mac->hw, MTK_GDM1_RX_FCCNT + offs); + hw_stats->tx_skip += + mtk_r32(mac->hw, MTK_GDM1_TX_SKIPCNT + offs); + hw_stats->tx_collisions += + mtk_r32(mac->hw, MTK_GDM1_TX_COLCNT + offs); + hw_stats->tx_bytes += + mtk_r32(mac->hw, MTK_GDM1_TX_GBCNT_L + offs); + stats = mtk_r32(mac->hw, MTK_GDM1_TX_GBCNT_H + offs); + if (stats) + hw_stats->tx_bytes += (stats << 32); + hw_stats->tx_packets += + mtk_r32(mac->hw, MTK_GDM1_TX_GPCNT + offs); + } + u64_stats_update_end(&hw_stats->syncp); } diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 11331b44ba07..5ef70dd8b49c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -278,8 +278,21 @@ /* QDMA FQ Free Page Buffer Length Register */ #define MTK_QDMA_FQ_BLEN 0x1B2C -/* GMA1 Received Good Byte Count Register */ -#define MTK_GDM1_TX_GBCNT 0x2400 +/* GMA1 counter / statics register */ +#define MTK_GDM1_RX_GBCNT_L 0x2400 +#define MTK_GDM1_RX_GBCNT_H 0x2404 +#define MTK_GDM1_RX_GPCNT 0x2408 +#define MTK_GDM1_RX_OERCNT 0x2410 +#define MTK_GDM1_RX_FERCNT 0x2414 +#define MTK_GDM1_RX_SERCNT 0x2418 +#define MTK_GDM1_RX_LENCNT 0x241c +#define MTK_GDM1_RX_CERCNT 0x2420 +#define MTK_GDM1_RX_FCCNT 0x2424 +#define MTK_GDM1_TX_SKIPCNT 0x2428 +#define MTK_GDM1_TX_COLCNT 0x242c +#define MTK_GDM1_TX_GBCNT_L 0x2430 +#define MTK_GDM1_TX_GBCNT_H 0x2434 +#define MTK_GDM1_TX_GPCNT 0x2438 #define MTK_STAT_OFFSET 0x40 /* QDMA descriptor txd4 */ @@ -502,6 +515,13 @@ #define MT7628_SDM_MAC_ADRL (MT7628_SDM_OFFSET + 0x0c) #define MT7628_SDM_MAC_ADRH (MT7628_SDM_OFFSET + 0x10) +/* Counter / stat register */ +#define MT7628_SDM_TPCNT (MT7628_SDM_OFFSET + 0x100) +#define MT7628_SDM_TBCNT (MT7628_SDM_OFFSET + 0x104) +#define MT7628_SDM_RPCNT (MT7628_SDM_OFFSET + 0x108) +#define MT7628_SDM_RBCNT (MT7628_SDM_OFFSET + 0x10c) +#define MT7628_SDM_CS_ERR (MT7628_SDM_OFFSET + 0x110) + struct mtk_rx_dma { unsigned int rxd1; unsigned int rxd2; From 5eff1461a6dec84f04fafa9128548bad51d96147 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Sat, 22 May 2021 17:16:11 +0800 Subject: [PATCH 541/730] net: macb: ensure the device is available before accessing GEMGXL control registers If runtime power menagement is enabled, the gigabit ethernet PLL would be disabled after macb_probe(). During this period of time, the system would hang up if we try to access GEMGXL control registers. We can't put runtime_pm_get/runtime_pm_put/ there due to the issue of sleep inside atomic section (7fa2955ff70ce453 ("sh_eth: Fix sleeping function called from invalid context"). Add netif_running checking to ensure the device is available before accessing GEMGXL device. Changed in v2: - Use netif_running instead of its own flag Signed-off-by: Zong Li Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6bc7d41d519b..a0c7b1167dbb 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2867,6 +2867,9 @@ static struct net_device_stats *gem_get_stats(struct macb *bp) struct gem_stats *hwstat = &bp->hw_stats.gem; struct net_device_stats *nstat = &bp->dev->stats; + if (!netif_running(bp->dev)) + return nstat; + gem_update_stats(bp); nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors + From 3a62fed2fd7b6fea96d720e779cafc30dfb3a22e Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Sat, 22 May 2021 15:14:45 +0200 Subject: [PATCH 542/730] net/sched: fq_pie: re-factor fix for fq_pie endless loop the patch that fixed an endless loop in_fq_pie_init() was not considering that 65535 is a valid class id. The correct bugfix for this infinite loop is to change 'idx' to become an u32, like Colin proposed in the past [1]. Fix this as follows: - restore 65536 as maximum possible values of 'flows_cnt' - use u32 'idx' when iterating on 'q->flows' - fix the TDC selftest This reverts commit bb2f930d6dd708469a587dc9ed1efe1ef969c0bf. [1] https://lore.kernel.org/netdev/20210407163808.499027-1-colin.king@canonical.com/ CC: Colin Ian King CC: stable@vger.kernel.org Fixes: bb2f930d6dd7 ("net/sched: fix infinite loop in sch_fq_pie") Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/sch_fq_pie.c | 10 +++++----- .../selftests/tc-testing/tc-tests/qdiscs/fq_pie.json | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 949163fe68af..266c7c1869d9 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -297,9 +297,9 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, goto flow_error; } q->flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]); - if (!q->flows_cnt || q->flows_cnt >= 65536) { + if (!q->flows_cnt || q->flows_cnt > 65536) { NL_SET_ERR_MSG_MOD(extack, - "Number of flows must range in [1..65535]"); + "Number of flows must range in [1..65536]"); goto flow_error; } } @@ -367,7 +367,7 @@ static void fq_pie_timer(struct timer_list *t) struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer); struct Qdisc *sch = q->sch; spinlock_t *root_lock; /* to lock qdisc for probability calculations */ - u16 idx; + u32 idx; root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -388,7 +388,7 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, { struct fq_pie_sched_data *q = qdisc_priv(sch); int err; - u16 idx; + u32 idx; pie_params_init(&q->p_params); sch->limit = 10 * 1024; @@ -500,7 +500,7 @@ static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) static void fq_pie_reset(struct Qdisc *sch) { struct fq_pie_sched_data *q = qdisc_priv(sch); - u16 idx; + u32 idx; INIT_LIST_HEAD(&q->new_flows); INIT_LIST_HEAD(&q->old_flows); diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json index 1cda2e11b3ad..773c5027553d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json @@ -9,11 +9,11 @@ "setup": [ "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY root fq_pie flows 65536", - "expExitCode": "2", + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_pie flows 65536", + "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc", - "matchCount": "0", + "matchPattern": "qdisc fq_pie 1: root refcnt 2 limit 10240p flows 65536", + "matchCount": "1", "teardown": [ "$IP link del dev $DUMMY" ] From e70f7a11876a1a788ceadf75e9e5f7af2c868680 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Sat, 22 May 2021 15:15:13 +0200 Subject: [PATCH 543/730] net/sched: fq_pie: fix OOB access in the traffic path the following script: # tc qdisc add dev eth0 handle 0x1 root fq_pie flows 2 # tc qdisc add dev eth0 clsact # tc filter add dev eth0 egress matchall action skbedit priority 0x10002 # ping 192.0.2.2 -I eth0 -c2 -w1 -q produces the following splat: BUG: KASAN: slab-out-of-bounds in fq_pie_qdisc_enqueue+0x1314/0x19d0 [sch_fq_pie] Read of size 4 at addr ffff888171306924 by task ping/942 CPU: 3 PID: 942 Comm: ping Not tainted 5.12.0+ #441 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014 Call Trace: dump_stack+0x92/0xc1 print_address_description.constprop.7+0x1a/0x150 kasan_report.cold.13+0x7f/0x111 fq_pie_qdisc_enqueue+0x1314/0x19d0 [sch_fq_pie] __dev_queue_xmit+0x1034/0x2b10 ip_finish_output2+0xc62/0x2120 __ip_finish_output+0x553/0xea0 ip_output+0x1ca/0x4d0 ip_send_skb+0x37/0xa0 raw_sendmsg+0x1c4b/0x2d00 sock_sendmsg+0xdb/0x110 __sys_sendto+0x1d7/0x2b0 __x64_sys_sendto+0xdd/0x1b0 do_syscall_64+0x3c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fe69735c3eb Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f3 0f 1e fa 48 8d 05 75 42 2c 00 41 89 ca 8b 00 85 c0 75 14 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 41 57 4d 89 c7 41 56 41 89 RSP: 002b:00007fff06d7fb38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 000055e961413700 RCX: 00007fe69735c3eb RDX: 0000000000000040 RSI: 000055e961413700 RDI: 0000000000000003 RBP: 0000000000000040 R08: 000055e961410500 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fff06d81260 R13: 00007fff06d7fb40 R14: 00007fff06d7fc30 R15: 000055e96140f0a0 Allocated by task 917: kasan_save_stack+0x19/0x40 __kasan_kmalloc+0x7f/0xa0 __kmalloc_node+0x139/0x280 fq_pie_init+0x555/0x8e8 [sch_fq_pie] qdisc_create+0x407/0x11b0 tc_modify_qdisc+0x3c2/0x17e0 rtnetlink_rcv_msg+0x346/0x8e0 netlink_rcv_skb+0x120/0x380 netlink_unicast+0x439/0x630 netlink_sendmsg+0x719/0xbf0 sock_sendmsg+0xe2/0x110 ____sys_sendmsg+0x5ba/0x890 ___sys_sendmsg+0xe9/0x160 __sys_sendmsg+0xd3/0x170 do_syscall_64+0x3c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff888171306800 which belongs to the cache kmalloc-256 of size 256 The buggy address is located 36 bytes to the right of 256-byte region [ffff888171306800, ffff888171306900) The buggy address belongs to the page: page:00000000bcfb624e refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x171306 head:00000000bcfb624e order:1 compound_mapcount:0 flags: 0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff) raw: 0017ffffc0010200 dead000000000100 dead000000000122 ffff888100042b40 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888171306800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff888171306880: 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc >ffff888171306900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff888171306980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888171306a00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fix fq_pie traffic path to avoid selecting 'q->flows + q->flows_cnt' as a valid flow: it's an address beyond the allocated memory. Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler") CC: stable@vger.kernel.org Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/sch_fq_pie.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 266c7c1869d9..cac684952edc 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -138,8 +138,15 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Classifies packet into corresponding flow */ idx = fq_pie_classify(skb, sch, &ret); - sel_flow = &q->flows[idx]; + if (idx == 0) { + if (ret & __NET_XMIT_BYPASS) + qdisc_qstats_drop(sch); + __qdisc_drop(skb, to_free); + return ret; + } + idx--; + sel_flow = &q->flows[idx]; /* Checks whether adding a new packet would exceed memory limit */ get_pie_cb(skb)->mem_usage = skb->truesize; memory_limited = q->memory_usage > q->memory_limit + skb->truesize; From 474a2ddaa192777522a7499784f1d60691cd831a Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Sun, 23 May 2021 22:51:54 +0800 Subject: [PATCH 544/730] net: dsa: mt7530: fix VLAN traffic leaks PCR_MATRIX field was set to all 1's when VLAN filtering is enabled, but was not reset when it is disabled, which may cause traffic leaks: ip link add br0 type bridge vlan_filtering 1 ip link add br1 type bridge vlan_filtering 1 ip link set swp0 master br0 ip link set swp1 master br1 ip link set br0 type bridge vlan_filtering 0 ip link set br1 type bridge vlan_filtering 0 # traffic in br0 and br1 will start leaking to each other As port_bridge_{add,del} have set up PCR_MATRIX properly, remove the PCR_MATRIX write from mt7530_port_set_vlan_aware. Fixes: 83163f7dca56 ("net: dsa: mediatek: add VLAN support for MT7530") Signed-off-by: DENG Qingfang Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 96f7c9eede35..9b90f3d3a8f5 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1262,14 +1262,6 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port) { struct mt7530_priv *priv = ds->priv; - /* The real fabric path would be decided on the membership in the - * entry of VLAN table. PCR_MATRIX set up here with ALL_MEMBERS - * means potential VLAN can be consisting of certain subset of all - * ports. - */ - mt7530_rmw(priv, MT7530_PCR_P(port), - PCR_MATRIX_MASK, PCR_MATRIX(MT7530_ALL_MEMBERS)); - /* Trapped into security mode allows packet forwarding through VLAN * table lookup. CPU port is set to fallback mode to let untagged * frames pass through. From 4dd649d130c634415c26df771e09e373f77fc688 Mon Sep 17 00:00:00 2001 From: Aditya Srivastava Date: Mon, 24 May 2021 02:39:09 +0530 Subject: [PATCH 545/730] NFC: nfcmrvl: fix kernel-doc syntax in file headers The opening comment mark '/**' is used for highlighting the beginning of kernel-doc comments. The header for drivers/nfc/nfcmrvl follows this syntax, but the content inside does not comply with kernel-doc. This line was probably not meant for kernel-doc parsing, but is parsed due to the presence of kernel-doc like comment syntax(i.e, '/**'), which causes unexpected warnings from kernel-doc. For e.g., running scripts/kernel-doc -none on drivers/nfc/nfcmrvl/spi.c causes warning: warning: expecting prototype for Marvell NFC(). Prototype was for SPI_WAIT_HANDSHAKE() instead Provide a simple fix by replacing such occurrences with general comment format, i.e. '/*', to prevent kernel-doc from parsing it. Signed-off-by: Aditya Srivastava Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/nfc/nfcmrvl/fw_dnld.h | 2 +- drivers/nfc/nfcmrvl/i2c.c | 2 +- drivers/nfc/nfcmrvl/nfcmrvl.h | 2 +- drivers/nfc/nfcmrvl/spi.c | 2 +- drivers/nfc/nfcmrvl/uart.c | 2 +- drivers/nfc/nfcmrvl/usb.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nfc/nfcmrvl/fw_dnld.h b/drivers/nfc/nfcmrvl/fw_dnld.h index ee4a339c05fd..058ce77b3cbc 100644 --- a/drivers/nfc/nfcmrvl/fw_dnld.h +++ b/drivers/nfc/nfcmrvl/fw_dnld.h @@ -1,4 +1,4 @@ -/** +/* * Marvell NFC driver: Firmware downloader * * Copyright (C) 2015, Marvell International Ltd. diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index 18cd96284b77..c5420616b7bc 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -1,4 +1,4 @@ -/** +/* * Marvell NFC-over-I2C driver: I2C interface related functions * * Copyright (C) 2015, Marvell International Ltd. diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h index de68ff45e49a..e84ee18c73ae 100644 --- a/drivers/nfc/nfcmrvl/nfcmrvl.h +++ b/drivers/nfc/nfcmrvl/nfcmrvl.h @@ -1,4 +1,4 @@ -/** +/* * Marvell NFC driver * * Copyright (C) 2014-2015, Marvell International Ltd. diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c index 8e0ddb434770..dec0d3eb3648 100644 --- a/drivers/nfc/nfcmrvl/spi.c +++ b/drivers/nfc/nfcmrvl/spi.c @@ -1,4 +1,4 @@ -/** +/* * Marvell NFC-over-SPI driver: SPI interface related functions * * Copyright (C) 2015, Marvell International Ltd. diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c index e5a622ce4b95..7194dd7ef0f1 100644 --- a/drivers/nfc/nfcmrvl/uart.c +++ b/drivers/nfc/nfcmrvl/uart.c @@ -1,4 +1,4 @@ -/** +/* * Marvell NFC-over-UART driver * * Copyright (C) 2015, Marvell International Ltd. diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c index 888e298f610b..bcd563cb556c 100644 --- a/drivers/nfc/nfcmrvl/usb.c +++ b/drivers/nfc/nfcmrvl/usb.c @@ -1,4 +1,4 @@ -/** +/* * Marvell NFC-over-USB driver: USB interface related functions * * Copyright (C) 2014, Marvell International Ltd. From 0bc3ee92880d910a1d100b73a781904f359e1f1c Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Sun, 23 May 2021 09:58:54 +0800 Subject: [PATCH 546/730] usb: typec: tcpm: Properly interrupt VDM AMS When a VDM AMS is interrupted by Messages other than VDM, the AMS needs to be finished properly. Also start a VDM AMS if receiving SVDM Commands from the port partner to complement the functionality of tcpm_vdm_ams(). Fixes: 0908c5aca31e ("usb: typec: tcpm: AMS and Collision Avoidance") Cc: stable Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Kyle Tso Link: https://lore.kernel.org/r/20210523015855.1785484-2-kyletso@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 8fdfd7f65ad7..6ea5df3782cf 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1550,6 +1550,8 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev, if (PD_VDO_SVDM_VER(p[0]) < svdm_version) typec_partner_set_svdm_version(port->partner, PD_VDO_SVDM_VER(p[0])); + + tcpm_ams_start(port, DISCOVER_IDENTITY); /* 6.4.4.3.1: Only respond as UFP (device) */ if (port->data_role == TYPEC_DEVICE && port->nr_snk_vdo) { @@ -1568,14 +1570,19 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev, } break; case CMD_DISCOVER_SVID: + tcpm_ams_start(port, DISCOVER_SVIDS); break; case CMD_DISCOVER_MODES: + tcpm_ams_start(port, DISCOVER_MODES); break; case CMD_ENTER_MODE: + tcpm_ams_start(port, DFP_TO_UFP_ENTER_MODE); break; case CMD_EXIT_MODE: + tcpm_ams_start(port, DFP_TO_UFP_EXIT_MODE); break; case CMD_ATTENTION: + tcpm_ams_start(port, ATTENTION); /* Attention command does not have response */ *adev_action = ADEV_ATTENTION; return 0; @@ -2287,6 +2294,12 @@ static void tcpm_pd_data_request(struct tcpm_port *port, bool frs_enable; int ret; + if (tcpm_vdm_ams(port) && type != PD_DATA_VENDOR_DEF) { + port->vdm_state = VDM_STATE_ERR_BUSY; + tcpm_ams_finish(port); + mod_vdm_delayed_work(port, 0); + } + switch (type) { case PD_DATA_SOURCE_CAP: for (i = 0; i < cnt; i++) @@ -2459,6 +2472,16 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port, enum pd_ctrl_msg_type type = pd_header_type_le(msg->header); enum tcpm_state next_state; + /* + * Stop VDM state machine if interrupted by other Messages while NOT_SUPP is allowed in + * VDM AMS if waiting for VDM responses and will be handled later. + */ + if (tcpm_vdm_ams(port) && type != PD_CTRL_NOT_SUPP && type != PD_CTRL_GOOD_CRC) { + port->vdm_state = VDM_STATE_ERR_BUSY; + tcpm_ams_finish(port); + mod_vdm_delayed_work(port, 0); + } + switch (type) { case PD_CTRL_GOOD_CRC: case PD_CTRL_PING: @@ -2717,6 +2740,13 @@ static void tcpm_pd_ext_msg_request(struct tcpm_port *port, enum pd_ext_msg_type type = pd_header_type_le(msg->header); unsigned int data_size = pd_ext_header_data_size_le(msg->ext_msg.header); + /* stopping VDM state machine if interrupted by other Messages */ + if (tcpm_vdm_ams(port)) { + port->vdm_state = VDM_STATE_ERR_BUSY; + tcpm_ams_finish(port); + mod_vdm_delayed_work(port, 0); + } + if (!(le16_to_cpu(msg->ext_msg.header) & PD_EXT_HDR_CHUNKED)) { tcpm_pd_handle_msg(port, PD_MSG_CTRL_NOT_SUPP, NONE_AMS); tcpm_log(port, "Unchunked extended messages unsupported"); From a20dcf53ea9836387b229c4878f9559cf1b55b71 Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Sun, 23 May 2021 09:58:55 +0800 Subject: [PATCH 547/730] usb: typec: tcpm: Respond Not_Supported if no snk_vdo If snk_vdo is not populated from fwnode, it implies the port does not support responding to SVDM commands. Not_Supported Message shall be sent if the contract is in PD3. And for PD2, the port shall ignore the commands. Fixes: 193a68011fdc ("staging: typec: tcpm: Respond to Discover Identity commands") Cc: stable Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Kyle Tso Link: https://lore.kernel.org/r/20210523015855.1785484-3-kyletso@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 6ea5df3782cf..9ce8c9af4da5 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2430,7 +2430,10 @@ static void tcpm_pd_data_request(struct tcpm_port *port, NONE_AMS); break; case PD_DATA_VENDOR_DEF: - tcpm_handle_vdm_request(port, msg->payload, cnt); + if (tcpm_vdm_ams(port) || port->nr_snk_vdo) + tcpm_handle_vdm_request(port, msg->payload, cnt); + else if (port->negotiated_rev > PD_REV20) + tcpm_pd_handle_msg(port, PD_MSG_CTRL_NOT_SUPP, NONE_AMS); break; case PD_DATA_BIST: port->bist_request = le32_to_cpu(msg->payload[0]); From e752dbc59e1241b13b8c4f7b6eb582862e7668fe Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 24 May 2021 15:01:55 +0900 Subject: [PATCH 548/730] usb: gadget: udc: renesas_usb3: Fix a race in usb3_start_pipen() The usb3_start_pipen() is called by renesas_usb3_ep_queue() and usb3_request_done_pipen() so that usb3_start_pipen() is possible to cause a race when getting usb3_first_req like below: renesas_usb3_ep_queue() spin_lock_irqsave() list_add_tail() spin_unlock_irqrestore() usb3_start_pipen() usb3_first_req = usb3_get_request() --- [1] --- interrupt --- usb3_irq_dma_int() usb3_request_done_pipen() usb3_get_request() usb3_start_pipen() usb3_first_req = usb3_get_request() ... (the req is possible to be finished in the interrupt) The usb3_first_req [1] above may have been finished after the interrupt ended so that this driver caused to start a transfer wrongly. To fix this issue, getting/checking the usb3_first_req are under spin_lock_irqsave() in the same section. Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller") Cc: stable Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20210524060155.1178724-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/renesas_usb3.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 0c418ce50ba0..f1b35a39d1ba 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -1488,7 +1488,7 @@ static void usb3_start_pipen(struct renesas_usb3_ep *usb3_ep, struct renesas_usb3_request *usb3_req) { struct renesas_usb3 *usb3 = usb3_ep_to_usb3(usb3_ep); - struct renesas_usb3_request *usb3_req_first = usb3_get_request(usb3_ep); + struct renesas_usb3_request *usb3_req_first; unsigned long flags; int ret = -EAGAIN; u32 enable_bits = 0; @@ -1496,7 +1496,8 @@ static void usb3_start_pipen(struct renesas_usb3_ep *usb3_ep, spin_lock_irqsave(&usb3->lock, flags); if (usb3_ep->halt || usb3_ep->started) goto out; - if (usb3_req != usb3_req_first) + usb3_req_first = __usb3_get_request(usb3_ep); + if (!usb3_req_first || usb3_req != usb3_req_first) goto out; if (usb3_pn_change(usb3, usb3_ep->num) < 0) From 9b7ff25d129df7c4f61e08382993e1988d56f6a7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 21 May 2021 15:13:11 +0200 Subject: [PATCH 549/730] ACPI: power: Refine turning off unused power resources Commit 7e4fdeafa61f ("ACPI: power: Turn off unused power resources unconditionally") dropped the power resource state check from acpi_turn_off_unused_power_resources(), because according to the ACPI specification (e.g. ACPI 6.4, Section 7.2.2) the OS "may run the _OFF method repeatedly, even if the resource is already off". However, it turns out that some systems do not follow the specification in this particular respect and that commit introduced boot issues on them, so refine acpi_turn_off_unused_power_resources() to only turn off power resources without any users after device enumeration and restore its previous behavior in the system-wide resume path. Fixes: 7e4fdeafa61f ("ACPI: power: Turn off unused power resources unconditionally") Link: https://uefi.org/specs/ACPI/6.4/07_Power_and_Performance_Mgmt/declaring-a-power-resource-object.html#off BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213019 Reported-by: Zhang Rui Tested-by: Zhang Rui Reported-by: Dave Olsthoorn Tested-by: Dave Olsthoorn Reported-by: Shujun Wang Tested-by: Shujun Wang Signed-off-by: Rafael J. Wysocki --- drivers/acpi/internal.h | 4 +-- drivers/acpi/power.c | 59 ++++++++++++++++++++++++++++++++--------- drivers/acpi/scan.c | 2 +- drivers/acpi/sleep.c | 2 +- 4 files changed, 50 insertions(+), 17 deletions(-) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index f973bbe90e5e..e21611c9a170 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -134,7 +134,7 @@ int acpi_power_init(void); void acpi_power_resources_list_free(struct list_head *list); int acpi_extract_power_resources(union acpi_object *package, unsigned int start, struct list_head *list); -int acpi_add_power_resource(acpi_handle handle); +struct acpi_device *acpi_add_power_resource(acpi_handle handle); void acpi_power_add_remove_device(struct acpi_device *adev, bool add); int acpi_power_wakeup_list_init(struct list_head *list, int *system_level); int acpi_device_sleep_wake(struct acpi_device *dev, @@ -142,7 +142,7 @@ int acpi_device_sleep_wake(struct acpi_device *dev, int acpi_power_get_inferred_state(struct acpi_device *device, int *state); int acpi_power_on_resources(struct acpi_device *device, int state); int acpi_power_transition(struct acpi_device *device, int state); -void acpi_turn_off_unused_power_resources(void); +void acpi_turn_off_unused_power_resources(bool init); /* -------------------------------------------------------------------------- Device Power Management diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 56102eaaa2da..97c9a94a1a30 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -52,6 +52,7 @@ struct acpi_power_resource { u32 system_level; u32 order; unsigned int ref_count; + unsigned int users; bool wakeup_enabled; struct mutex resource_lock; struct list_head dependents; @@ -147,6 +148,7 @@ int acpi_extract_power_resources(union acpi_object *package, unsigned int start, for (i = start; i < package->package.count; i++) { union acpi_object *element = &package->package.elements[i]; + struct acpi_device *rdev; acpi_handle rhandle; if (element->type != ACPI_TYPE_LOCAL_REFERENCE) { @@ -163,13 +165,16 @@ int acpi_extract_power_resources(union acpi_object *package, unsigned int start, if (acpi_power_resource_is_dup(package, start, i)) continue; - err = acpi_add_power_resource(rhandle); - if (err) + rdev = acpi_add_power_resource(rhandle); + if (!rdev) { + err = -ENODEV; break; - + } err = acpi_power_resources_list_add(rhandle, list); if (err) break; + + to_power_resource(rdev)->users++; } if (err) acpi_power_resources_list_free(list); @@ -907,7 +912,7 @@ static void acpi_power_add_resource_to_list(struct acpi_power_resource *resource mutex_unlock(&power_resource_list_lock); } -int acpi_add_power_resource(acpi_handle handle) +struct acpi_device *acpi_add_power_resource(acpi_handle handle) { struct acpi_power_resource *resource; struct acpi_device *device = NULL; @@ -918,11 +923,11 @@ int acpi_add_power_resource(acpi_handle handle) acpi_bus_get_device(handle, &device); if (device) - return 0; + return device; resource = kzalloc(sizeof(*resource), GFP_KERNEL); if (!resource) - return -ENOMEM; + return NULL; device = &resource->device; acpi_init_device_object(device, handle, ACPI_BUS_TYPE_POWER); @@ -959,11 +964,11 @@ int acpi_add_power_resource(acpi_handle handle) acpi_power_add_resource_to_list(resource); acpi_device_add_finalize(device); - return 0; + return device; err: acpi_release_power_resource(&device->dev); - return result; + return NULL; } #ifdef CONFIG_ACPI_SLEEP @@ -997,7 +1002,38 @@ void acpi_resume_power_resources(void) } #endif -void acpi_turn_off_unused_power_resources(void) +static void acpi_power_turn_off_if_unused(struct acpi_power_resource *resource, + bool init) +{ + if (resource->ref_count > 0) + return; + + if (init) { + if (resource->users > 0) + return; + } else { + int result, state; + + result = acpi_power_get_state(resource->device.handle, &state); + if (result || state == ACPI_POWER_RESOURCE_STATE_OFF) + return; + } + + dev_info(&resource->device.dev, "Turning OFF\n"); + __acpi_power_off(resource); +} + +/** + * acpi_turn_off_unused_power_resources - Turn off power resources not in use. + * @init: Control switch. + * + * If @ainit is set, unconditionally turn off all of the ACPI power resources + * without any users. + * + * Otherwise, turn off all ACPI power resources without active references (that + * is, the ones that should be "off" at the moment) that are "on". + */ +void acpi_turn_off_unused_power_resources(bool init) { struct acpi_power_resource *resource; @@ -1006,10 +1042,7 @@ void acpi_turn_off_unused_power_resources(void) list_for_each_entry_reverse(resource, &acpi_power_resource_list, list_node) { mutex_lock(&resource->resource_lock); - if (!resource->ref_count) { - dev_info(&resource->device.dev, "Turning OFF\n"); - __acpi_power_off(resource); - } + acpi_power_turn_off_if_unused(resource, init); mutex_unlock(&resource->resource_lock); } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 453eff8ec8c3..e10d38ac7cf2 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2360,7 +2360,7 @@ int __init acpi_scan_init(void) } } - acpi_turn_off_unused_power_resources(); + acpi_turn_off_unused_power_resources(true); acpi_scan_initialized = true; diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 09fd13757b65..df386571da98 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -504,7 +504,7 @@ static void acpi_pm_start(u32 acpi_state) */ static void acpi_pm_end(void) { - acpi_turn_off_unused_power_resources(); + acpi_turn_off_unused_power_resources(false); acpi_scan_lock_release(); /* * This is necessary in case acpi_pm_finish() is not called during a From 08b2b6fdf6b26032f025084ce2893924a0cdb4a2 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 24 May 2021 16:29:43 +0800 Subject: [PATCH 550/730] cgroup: fix spelling mistakes Fix some spelling mistakes in comments: hierarhcy ==> hierarchy automtically ==> automatically overriden ==> overridden In absense of .. or ==> In absence of .. and assocaited ==> associated taget ==> target initate ==> initiate succeded ==> succeeded curremt ==> current udpated ==> updated Signed-off-by: Zhen Lei Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 6 +++--- include/linux/cgroup.h | 2 +- kernel/cgroup/cgroup-v1.c | 2 +- kernel/cgroup/cgroup.c | 8 ++++---- kernel/cgroup/cpuset.c | 2 +- kernel/cgroup/rdma.c | 2 +- kernel/cgroup/rstat.c | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 559ee05f86b2..fb8f6d2cd104 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -232,7 +232,7 @@ struct css_set { struct list_head task_iters; /* - * On the default hierarhcy, ->subsys[ssid] may point to a css + * On the default hierarchy, ->subsys[ssid] may point to a css * attached to an ancestor instead of the cgroup this css_set is * associated with. The following node is anchored at * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to @@ -668,7 +668,7 @@ struct cgroup_subsys { */ bool threaded:1; - /* the following two fields are initialized automtically during boot */ + /* the following two fields are initialized automatically during boot */ int id; const char *name; @@ -757,7 +757,7 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer. * On boot, sock_cgroup_data records the cgroup that the sock was created * in so that cgroup2 matches can be made; however, once either net_prio or - * net_cls starts being used, the area is overriden to carry prioidx and/or + * net_cls starts being used, the area is overridden to carry prioidx and/or * classid. The two modes are distinguished by whether the lowest bit is * set. Clear bit indicates cgroup pointer while set bit prioidx and * classid. diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4f2f79de083e..6bc9c76680b2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -32,7 +32,7 @@ struct kernel_clone_args; #ifdef CONFIG_CGROUPS /* - * All weight knobs on the default hierarhcy should use the following min, + * All weight knobs on the default hierarchy should use the following min, * default and max values. The default value is the logarithmic center of * MIN and MAX and allows 100x to be expressed in both directions. */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 391aa570369b..8190b6bfc978 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1001,7 +1001,7 @@ static int check_cgroupfs_options(struct fs_context *fc) ctx->subsys_mask &= enabled; /* - * In absense of 'none', 'name=' or subsystem name options, + * In absence of 'none', 'name=' and subsystem name options, * let's default to 'all'. */ if (!ctx->subsys_mask && !ctx->none && !ctx->name) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e7a9a2998245..21ecc6ee6a6d 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -468,7 +468,7 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, * @cgrp: the cgroup of interest * @ss: the subsystem of interest * - * Find and get @cgrp's css assocaited with @ss. If the css doesn't exist + * Find and get @cgrp's css associated with @ss. If the css doesn't exist * or is offline, %NULL is returned. */ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, @@ -1633,7 +1633,7 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) /** * css_clear_dir - remove subsys files in a cgroup directory - * @css: taget css + * @css: target css */ static void css_clear_dir(struct cgroup_subsys_state *css) { @@ -5350,7 +5350,7 @@ out_unlock: /* * This is called when the refcnt of a css is confirmed to be killed. * css_tryget_online() is now guaranteed to fail. Tell the subsystem to - * initate destruction and put the css ref from kill_css(). + * initiate destruction and put the css ref from kill_css(). */ static void css_killed_work_fn(struct work_struct *work) { @@ -6052,7 +6052,7 @@ out_revert: * @kargs: the arguments passed to create the child process * * This calls the cancel_fork() callbacks if a fork failed *after* - * cgroup_can_fork() succeded and cleans up references we took to + * cgroup_can_fork() succeeded and cleans up references we took to * prepare a new css_set for the child process in cgroup_can_fork(). */ void cgroup_cancel_fork(struct task_struct *child, diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index a945504c0ae7..adb5190c4429 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3376,7 +3376,7 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk) } /** - * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed + * cpuset_nodemask_valid_mems_allowed - check nodemask vs. current mems_allowed * @nodemask: the nodemask to be checked * * Are any of the nodes in the nodemask allowed in current->mems_allowed? diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c index ae042c347c64..3135406608c7 100644 --- a/kernel/cgroup/rdma.c +++ b/kernel/cgroup/rdma.c @@ -244,7 +244,7 @@ EXPORT_SYMBOL(rdmacg_uncharge); * This function follows charging resource in hierarchical way. * It will fail if the charge would cause the new value to exceed the * hierarchical limit. - * Returns 0 if the charge succeded, otherwise -EAGAIN, -ENOMEM or -EINVAL. + * Returns 0 if the charge succeeded, otherwise -EAGAIN, -ENOMEM or -EINVAL. * Returns pointer to rdmacg for this resource when charging is successful. * * Charger needs to account resources on two criteria. diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 3a3fd2993a65..cee265cb535c 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -75,7 +75,7 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) * @root: root of the tree to traversal * @cpu: target cpu * - * Walks the udpated rstat_cpu tree on @cpu from @root. %NULL @pos starts + * Walks the updated rstat_cpu tree on @cpu from @root. %NULL @pos starts * the traversal and %NULL return indicates the end. During traversal, * each returned cgroup is unlinked from the tree. Must be called with the * matching cgroup_rstat_cpu_lock held. From 9b76eade16423ef06829cccfe3e100cfce31afcd Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 23 May 2021 14:38:53 +0000 Subject: [PATCH 551/730] sch_dsmark: fix a NULL deref in qdisc_reset() If Qdisc_ops->init() is failed, Qdisc_ops->reset() would be called. When dsmark_init(Qdisc_ops->init()) is failed, it possibly doesn't initialize dsmark_qdisc_data->q. But dsmark_reset(Qdisc_ops->reset()) uses dsmark_qdisc_data->q pointer wihtout any null checking. So, panic would occur. Test commands: sysctl net.core.default_qdisc=dsmark -w ip link add dummy0 type dummy ip link add vw0 link dummy0 type virt_wifi ip link set vw0 up Splat looks like: KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] CPU: 3 PID: 684 Comm: ip Not tainted 5.12.0+ #910 RIP: 0010:qdisc_reset+0x2b/0x680 Code: 1f 44 00 00 48 b8 00 00 00 00 00 fc ff df 41 57 41 56 41 55 41 54 55 48 89 fd 48 83 c7 18 53 48 89 fa 48 c1 ea 03 48 83 ec 20 <80> 3c 02 00 0f 85 09 06 00 00 4c 8b 65 18 0f 1f 44 00 00 65 8b 1d RSP: 0018:ffff88800fda6bf8 EFLAGS: 00010282 RAX: dffffc0000000000 RBX: ffff8880050ed800 RCX: 0000000000000000 RDX: 0000000000000003 RSI: ffffffff99e34100 RDI: 0000000000000018 RBP: 0000000000000000 R08: fffffbfff346b553 R09: fffffbfff346b553 R10: 0000000000000001 R11: fffffbfff346b552 R12: ffffffffc0824940 R13: ffff888109e83800 R14: 00000000ffffffff R15: ffffffffc08249e0 FS: 00007f5042287680(0000) GS:ffff888119800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055ae1f4dbd90 CR3: 0000000006760002 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? rcu_read_lock_bh_held+0xa0/0xa0 dsmark_reset+0x3d/0xf0 [sch_dsmark] qdisc_reset+0xa9/0x680 qdisc_destroy+0x84/0x370 qdisc_create_dflt+0x1fe/0x380 attach_one_default_qdisc.constprop.41+0xa4/0x180 dev_activate+0x4d5/0x8c0 ? __dev_open+0x268/0x390 __dev_open+0x270/0x390 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/sched/sch_dsmark.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index cd2748e2d4a2..d320bcfb2da2 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -407,7 +407,8 @@ static void dsmark_reset(struct Qdisc *sch) struct dsmark_qdisc_data *p = qdisc_priv(sch); pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - qdisc_reset(p->q); + if (p->q) + qdisc_reset(p->q); sch->qstats.backlog = 0; sch->q.qlen = 0; } From 1a44fb38cc65bc30bac490291412aa1940659fe1 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Mon, 24 May 2021 14:49:42 +0800 Subject: [PATCH 552/730] r8152: check the informaton of the device Verify some fields of the USB descriptor to make sure the driver could be used by the device. Besides, remove the check of endpoint number in rtl8152_probe(). usb_find_common_endpoints() includes it. BugLink: https://syzkaller.appspot.com/bug?id=912c9c373656996801b4de61f1e3cb326fe940aa Reported-by: syzbot+95afd23673f5dd295c57@syzkaller.appspotmail.com Fixes: c2198943e33b ("r8152: search the configuration of vendor mode") Signed-off-by: Hayes Wang Reviewed-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 42 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 136ea06540ff..f6abb2fbf972 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -8107,6 +8107,37 @@ static void r8156b_init(struct r8152 *tp) tp->coalesce = 15000; /* 15 us */ } +static bool rtl_check_vendor_ok(struct usb_interface *intf) +{ + struct usb_host_interface *alt = intf->cur_altsetting; + struct usb_endpoint_descriptor *in, *out, *intr; + + if (usb_find_common_endpoints(alt, &in, &out, &intr, NULL) < 0) { + dev_err(&intf->dev, "Expected endpoints are not found\n"); + return false; + } + + /* Check Rx endpoint address */ + if (usb_endpoint_num(in) != 1) { + dev_err(&intf->dev, "Invalid Rx endpoint address\n"); + return false; + } + + /* Check Tx endpoint address */ + if (usb_endpoint_num(out) != 2) { + dev_err(&intf->dev, "Invalid Tx endpoint address\n"); + return false; + } + + /* Check interrupt endpoint address */ + if (usb_endpoint_num(intr) != 3) { + dev_err(&intf->dev, "Invalid interrupt endpoint address\n"); + return false; + } + + return true; +} + static bool rtl_vendor_mode(struct usb_interface *intf) { struct usb_host_interface *alt = intf->cur_altsetting; @@ -8115,12 +8146,15 @@ static bool rtl_vendor_mode(struct usb_interface *intf) int i, num_configs; if (alt->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) - return true; + return rtl_check_vendor_ok(intf); /* The vendor mode is not always config #1, so to find it out. */ udev = interface_to_usbdev(intf); c = udev->config; num_configs = udev->descriptor.bNumConfigurations; + if (num_configs < 2) + return false; + for (i = 0; i < num_configs; (i++, c++)) { struct usb_interface_descriptor *desc = NULL; @@ -8135,7 +8169,8 @@ static bool rtl_vendor_mode(struct usb_interface *intf) } } - WARN_ON_ONCE(i == num_configs); + if (i == num_configs) + dev_err(&intf->dev, "Unexpected Device\n"); return false; } @@ -9381,9 +9416,6 @@ static int rtl8152_probe(struct usb_interface *intf, if (!rtl_vendor_mode(intf)) return -ENODEV; - if (intf->cur_altsetting->desc.bNumEndpoints < 3) - return -ENODEV; - usb_reset_device(udev); netdev = alloc_etherdev(sizeof(struct r8152)); if (!netdev) { From 1a6e9a9c68c1f183872e4bcc947382111c2e04eb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 May 2021 11:25:11 +0200 Subject: [PATCH 553/730] net: hso: fix control-request directions The direction of the pipe argument must match the request-type direction bit or control requests may fail depending on the host-controller-driver implementation. Fix the tiocmset and rfkill requests which erroneously used usb_rcvctrlpipe(). Fixes: 72dc1c096c70 ("HSO: add option hso driver") Cc: stable@vger.kernel.org # 2.6.27 Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index b48b2a25210c..5c779cc0ea11 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -1689,7 +1689,7 @@ static int hso_serial_tiocmset(struct tty_struct *tty, spin_unlock_irqrestore(&serial->serial_lock, flags); return usb_control_msg(serial->parent->usb, - usb_rcvctrlpipe(serial->parent->usb, 0), 0x22, + usb_sndctrlpipe(serial->parent->usb, 0), 0x22, 0x21, val, if_num, NULL, 0, USB_CTRL_SET_TIMEOUT); } @@ -2436,7 +2436,7 @@ static int hso_rfkill_set_block(void *data, bool blocked) if (hso_dev->usb_gone) rv = 0; else - rv = usb_control_msg(hso_dev->usb, usb_rcvctrlpipe(hso_dev->usb, 0), + rv = usb_control_msg(hso_dev->usb, usb_sndctrlpipe(hso_dev->usb, 0), enabled ? 0x82 : 0x81, 0x40, 0, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); mutex_unlock(&hso_dev->mutex); From ba61cf167cb77e54c1ec5adb7aa49a22ab3c9b28 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 24 May 2021 12:25:22 +0300 Subject: [PATCH 554/730] net: dsa: sja1105: fix VL lookup command packing for P/Q/R/S At the beginning of the sja1105_dynamic_config.c file there is a diagram of the dynamic config interface layout: packed_buf | V +-----------------------------------------+------------------+ | ENTRY BUFFER | COMMAND BUFFER | +-----------------------------------------+------------------+ <----------------------- packed_size ------------------------> So in order to pack/unpack the command bits into the buffer, sja1105_vl_lookup_cmd_packing must first advance the buffer pointer by the length of the entry. This is similar to what the other *cmd_packing functions do. This bug exists because the command packing function for P/Q/R/S was copied from the E/T generation, and on E/T, the command was actually embedded within the entry buffer itself. Fixes: 94f94d4acfb2 ("net: dsa: sja1105: add static tables for virtual links") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../net/dsa/sja1105/sja1105_dynamic_config.c | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c index b777d3f37573..12cd04b56803 100644 --- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c +++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c @@ -167,9 +167,10 @@ enum sja1105_hostcmd { SJA1105_HOSTCMD_INVALIDATE = 4, }; +/* Command and entry overlap */ static void -sja1105_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, - enum packing_op op) +sja1105et_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, + enum packing_op op) { const int size = SJA1105_SIZE_DYN_CMD; @@ -179,6 +180,20 @@ sja1105_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, sja1105_packing(buf, &cmd->index, 9, 0, size, op); } +/* Command and entry are separate */ +static void +sja1105pqrs_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, + enum packing_op op) +{ + u8 *p = buf + SJA1105_SIZE_VL_LOOKUP_ENTRY; + const int size = SJA1105_SIZE_DYN_CMD; + + sja1105_packing(p, &cmd->valid, 31, 31, size, op); + sja1105_packing(p, &cmd->errors, 30, 30, size, op); + sja1105_packing(p, &cmd->rdwrset, 29, 29, size, op); + sja1105_packing(p, &cmd->index, 9, 0, size, op); +} + static size_t sja1105et_vl_lookup_entry_packing(void *buf, void *entry_ptr, enum packing_op op) { @@ -641,7 +656,7 @@ static size_t sja1105pqrs_cbs_entry_packing(void *buf, void *entry_ptr, const struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = { [BLK_IDX_VL_LOOKUP] = { .entry_packing = sja1105et_vl_lookup_entry_packing, - .cmd_packing = sja1105_vl_lookup_cmd_packing, + .cmd_packing = sja1105et_vl_lookup_cmd_packing, .access = OP_WRITE, .max_entry_count = SJA1105_MAX_VL_LOOKUP_COUNT, .packed_size = SJA1105ET_SIZE_VL_LOOKUP_DYN_CMD, @@ -725,7 +740,7 @@ const struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = { const struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = { [BLK_IDX_VL_LOOKUP] = { .entry_packing = sja1105_vl_lookup_entry_packing, - .cmd_packing = sja1105_vl_lookup_cmd_packing, + .cmd_packing = sja1105pqrs_vl_lookup_cmd_packing, .access = (OP_READ | OP_WRITE), .max_entry_count = SJA1105_MAX_VL_LOOKUP_COUNT, .packed_size = SJA1105PQRS_SIZE_VL_LOOKUP_DYN_CMD, From dc596e3fe63f88e3d1e509f64e7f761cd4135538 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 24 May 2021 12:25:23 +0300 Subject: [PATCH 555/730] net: dsa: sja1105: call dsa_unregister_switch when allocating memory fails Unlike other drivers which pretty much end their .probe() execution with dsa_register_switch(), the sja1105 does some extra stuff. When that fails with -ENOMEM, the driver is quick to return that, forgetting to call dsa_unregister_switch(). Not critical, but a bug nonetheless. Fixes: 4d7525085a9b ("net: dsa: sja1105: offload the Credit-Based Shaper qdisc") Fixes: a68578c20a96 ("net: dsa: Make deferred_xmit private to sja1105") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 405024b637d6..2248152b4836 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -3646,8 +3646,10 @@ static int sja1105_probe(struct spi_device *spi) priv->cbs = devm_kcalloc(dev, priv->info->num_cbs_shapers, sizeof(struct sja1105_cbs_entry), GFP_KERNEL); - if (!priv->cbs) - return -ENOMEM; + if (!priv->cbs) { + rc = -ENOMEM; + goto out_unregister_switch; + } } /* Connections between dsa_port and sja1105_port */ @@ -3672,7 +3674,7 @@ static int sja1105_probe(struct spi_device *spi) dev_err(ds->dev, "failed to create deferred xmit thread: %d\n", rc); - goto out; + goto out_destroy_workers; } skb_queue_head_init(&sp->xmit_queue); sp->xmit_tpid = ETH_P_SJA1105; @@ -3682,7 +3684,8 @@ static int sja1105_probe(struct spi_device *spi) } return 0; -out: + +out_destroy_workers: while (port-- > 0) { struct sja1105_port *sp = &priv->ports[port]; @@ -3691,6 +3694,10 @@ out: kthread_destroy_worker(sp->xmit_worker); } + +out_unregister_switch: + dsa_unregister_switch(ds); + return rc; } From cec279a898a3b004411682f212215ccaea1cd0fb Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 24 May 2021 12:25:24 +0300 Subject: [PATCH 556/730] net: dsa: sja1105: add error handling in sja1105_setup() If any of sja1105_static_config_load(), sja1105_clocking_setup() or sja1105_devlink_setup() fails, we can't just return in the middle of sja1105_setup() or memory will leak. Add a cleanup path. Fixes: 0a7bdbc23d8a ("net: dsa: sja1105: move devlink param code to sja1105_devlink.c") Fixes: 8aa9ebccae87 ("net: dsa: Introduce driver for NXP SJA1105 5-port L2 switch") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 2248152b4836..c7a1be8bbddf 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2976,13 +2976,13 @@ static int sja1105_setup(struct dsa_switch *ds) rc = sja1105_static_config_load(priv, ports); if (rc < 0) { dev_err(ds->dev, "Failed to load static config: %d\n", rc); - return rc; + goto out_ptp_clock_unregister; } /* Configure the CGU (PHY link modes and speeds) */ rc = sja1105_clocking_setup(priv); if (rc < 0) { dev_err(ds->dev, "Failed to configure MII clocking: %d\n", rc); - return rc; + goto out_static_config_free; } /* On SJA1105, VLAN filtering per se is always enabled in hardware. * The only thing we can do to disable it is lie about what the 802.1Q @@ -3003,7 +3003,7 @@ static int sja1105_setup(struct dsa_switch *ds) rc = sja1105_devlink_setup(ds); if (rc < 0) - return rc; + goto out_static_config_free; /* The DSA/switchdev model brings up switch ports in standalone mode by * default, and that means vlan_filtering is 0 since they're not under @@ -3012,6 +3012,17 @@ static int sja1105_setup(struct dsa_switch *ds) rtnl_lock(); rc = sja1105_setup_8021q_tagging(ds, true); rtnl_unlock(); + if (rc) + goto out_devlink_teardown; + + return 0; + +out_devlink_teardown: + sja1105_devlink_teardown(ds); +out_ptp_clock_unregister: + sja1105_ptp_clock_unregister(ds); +out_static_config_free: + sja1105_static_config_free(&priv->static_config); return rc; } From 6729188d2646709941903052e4b78e1d82c239b9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 24 May 2021 12:25:25 +0300 Subject: [PATCH 557/730] net: dsa: sja1105: error out on unsupported PHY mode The driver continues probing when a port is configured for an unsupported PHY interface type, instead it should stop. Fixes: 8aa9ebccae87 ("net: dsa: Introduce driver for NXP SJA1105 5-port L2 switch") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index c7a1be8bbddf..7f7e0424a442 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -207,6 +207,7 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv, default: dev_err(dev, "Unsupported PHY mode %s!\n", phy_modes(ports[i].phy_mode)); + return -EINVAL; } /* Even though the SerDes port is able to drive SGMII autoneg From ed040abca4c1db72dfd3b8483b6ed6bfb7c2571e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 24 May 2021 12:25:26 +0300 Subject: [PATCH 558/730] net: dsa: sja1105: use 4095 as the private VLAN for untagged traffic One thing became visible when writing the blamed commit, and that was that STP and PTP frames injected by net/dsa/tag_sja1105.c using the deferred xmit mechanism are always classified to the pvid of the CPU port, regardless of whatever VLAN there might be in these packets. So a decision needed to be taken regarding the mechanism through which we should ensure that delivery of STP and PTP traffic is possible when we are in a VLAN awareness mode that involves tag_8021q. This is because tag_8021q is not concerned with managing the pvid of the CPU port, since as far as tag_8021q is concerned, no traffic should be sent as untagged from the CPU port. So we end up not actually having a pvid on the CPU port if we only listen to tag_8021q, and unless we do something about it. The decision taken at the time was to keep VLAN 1 in the list of priv->dsa_8021q_vlans, and make it a pvid of the CPU port. This ensures that STP and PTP frames can always be sent to the outside world. However there is a problem. If we do the following while we are in the best_effort_vlan_filtering=true mode: ip link add br0 type bridge vlan_filtering 1 ip link set swp2 master br0 bridge vlan del dev swp2 vid 1 Then untagged and pvid-tagged frames should be dropped. But we observe that they aren't, and this is because of the precaution we took that VID 1 is always installed on all ports. So clearly VLAN 1 is not good for this purpose. What about VLAN 0? Well, VLAN 0 is managed by the 8021q module, and that module wants to ensure that 802.1p tagged frames are always received by a port, and are always transmitted as VLAN-tagged (with VLAN ID 0). Whereas we want our STP and PTP frames to be untagged if the stack sent them as untagged - we don't want the driver to just decide out of the blue that it adds VID 0 to some packets. So what to do? Well, there is one other VLAN that is reserved, and that is 4095: $ ip link add link swp2 name swp2.4095 type vlan id 4095 Error: 8021q: Invalid VLAN id. $ bridge vlan add dev swp2 vid 4095 Error: bridge: Vlan id is invalid. After we made this change, VLAN 1 is indeed forwarded and/or dropped according to the bridge VLAN table, there are no further alterations done by the sja1105 driver. Fixes: ec5ae61076d0 ("net: dsa: sja1105: save/restore VLANs using a delta commit method") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 7f7e0424a442..dffa7dd83877 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -26,6 +26,7 @@ #include "sja1105_tas.h" #define SJA1105_UNKNOWN_MULTICAST 0x010000000000ull +#define SJA1105_DEFAULT_VLAN (VLAN_N_VID - 1) static const struct dsa_switch_ops sja1105_switch_ops; @@ -322,6 +323,13 @@ static int sja1105_init_l2_lookup_params(struct sja1105_private *priv) return 0; } +/* Set up a default VLAN for untagged traffic injected from the CPU + * using management routes (e.g. STP, PTP) as opposed to tag_8021q. + * All DT-defined ports are members of this VLAN, and there are no + * restrictions on forwarding (since the CPU selects the destination). + * Frames from this VLAN will always be transmitted as untagged, and + * neither the bridge nor the 8021q module cannot create this VLAN ID. + */ static int sja1105_init_static_vlan(struct sja1105_private *priv) { struct sja1105_table *table; @@ -331,17 +339,13 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv) .vmemb_port = 0, .vlan_bc = 0, .tag_port = 0, - .vlanid = 1, + .vlanid = SJA1105_DEFAULT_VLAN, }; struct dsa_switch *ds = priv->ds; int port; table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP]; - /* The static VLAN table will only contain the initial pvid of 1. - * All other VLANs are to be configured through dynamic entries, - * and kept in the static configuration table as backing memory. - */ if (table->entry_count) { kfree(table->entries); table->entry_count = 0; @@ -354,9 +358,6 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv) table->entry_count = 1; - /* VLAN 1: all DT-defined ports are members; no restrictions on - * forwarding; always transmit as untagged. - */ for (port = 0; port < ds->num_ports; port++) { struct sja1105_bridge_vlan *v; @@ -367,15 +368,12 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv) pvid.vlan_bc |= BIT(port); pvid.tag_port &= ~BIT(port); - /* Let traffic that don't need dsa_8021q (e.g. STP, PTP) be - * transmitted as untagged. - */ v = kzalloc(sizeof(*v), GFP_KERNEL); if (!v) return -ENOMEM; v->port = port; - v->vid = 1; + v->vid = SJA1105_DEFAULT_VLAN; v->untagged = true; if (dsa_is_cpu_port(ds, port)) v->pvid = true; From b38e659de966a122fe2cb178c1e39c9bea06bc62 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 24 May 2021 12:25:27 +0300 Subject: [PATCH 559/730] net: dsa: sja1105: update existing VLANs from the bridge VLAN list When running this sequence of operations: ip link add br0 type bridge vlan_filtering 1 ip link set swp4 master br0 bridge vlan add dev swp4 vid 1 We observe the traffic sent on swp4 is still untagged, even though the bridge has overwritten the existing VLAN entry: port vlan ids swp4 1 PVID br0 1 PVID Egress Untagged This happens because we didn't consider that the 'bridge vlan add' command just overwrites VLANs like it's nothing. We treat the 'vid 1 pvid untagged' and the 'vid 1' as two separate VLANs, and the first still has precedence when calling sja1105_build_vlan_table. Obviously there is a disagreement regarding semantics, and we end up doing something unexpected from the PoV of the bridge. Let's actually consider an "existing VLAN" to be one which is on the same port, and has the same VLAN ID, as one we already have, and update it if it has different flags than we do. The first blamed commit is the one introducing the bug, the second one is the latest on top of which the bugfix still applies. Fixes: ec5ae61076d0 ("net: dsa: sja1105: save/restore VLANs using a delta commit method") Fixes: 5899ee367ab3 ("net: dsa: tag_8021q: add a context structure") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index dffa7dd83877..b88d9ef45a1f 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2816,11 +2816,22 @@ static int sja1105_vlan_add_one(struct dsa_switch *ds, int port, u16 vid, bool pvid = flags & BRIDGE_VLAN_INFO_PVID; struct sja1105_bridge_vlan *v; - list_for_each_entry(v, vlan_list, list) - if (v->port == port && v->vid == vid && - v->untagged == untagged && v->pvid == pvid) + list_for_each_entry(v, vlan_list, list) { + if (v->port == port && v->vid == vid) { /* Already added */ - return 0; + if (v->untagged == untagged && v->pvid == pvid) + /* Nothing changed */ + return 0; + + /* It's the same VLAN, but some of the flags changed + * and the user did not bother to delete it first. + * Update it and trigger sja1105_build_vlan_table. + */ + v->untagged = untagged; + v->pvid = pvid; + return 1; + } + } v = kzalloc(sizeof(*v), GFP_KERNEL); if (!v) { From 8d84733dee9f7dfa7a323c59e9bd61b2a83bee3f Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 24 May 2021 18:26:03 +0800 Subject: [PATCH 560/730] thermal/ti-soc-thermal: Fix kernel-doc Fix function name in ti-bandgap.c kernel-doc comment to remove a warning. drivers/thermal/ti-soc-thermal/ti-bandgap.c:787: warning: expecting prototype for ti_bandgap_alert_init(). Prototype was for ti_bandgap_talert_init() instead. Reported-by: Abaci Robot Signed-off-by: Yang Li Acked-by: Suman Anna Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1621851963-36548-1-git-send-email-yang.lee@linux.alibaba.com --- drivers/thermal/ti-soc-thermal/ti-bandgap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c index ebe7cb70bfb6..ea0603b59309 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c @@ -770,7 +770,7 @@ static int ti_bandgap_tshut_init(struct ti_bandgap *bgp, } /** - * ti_bandgap_alert_init() - setup and initialize talert handling + * ti_bandgap_talert_init() - setup and initialize talert handling * @bgp: pointer to struct ti_bandgap * @pdev: pointer to device struct platform_device * From a4dd4fc6105e54393d637450a11d4cddb5fabc4f Mon Sep 17 00:00:00 2001 From: Saubhik Mukherjee Date: Mon, 24 May 2021 19:07:12 +0530 Subject: [PATCH 561/730] net: appletalk: cops: Fix data race in cops_probe1 In cops_probe1(), there is a write to dev->base_addr after requesting an interrupt line and registering the interrupt handler cops_interrupt(). The handler might be called in parallel to handle an interrupt. cops_interrupt() tries to read dev->base_addr leading to a potential data race. So write to dev->base_addr before calling request_irq(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Saubhik Mukherjee Signed-off-by: David S. Miller --- drivers/net/appletalk/cops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index ba8e70a8e312..6b12ce822e51 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -327,6 +327,8 @@ static int __init cops_probe1(struct net_device *dev, int ioaddr) break; } + dev->base_addr = ioaddr; + /* Reserve any actual interrupt. */ if (dev->irq) { retval = request_irq(dev->irq, cops_interrupt, 0, dev->name, dev); @@ -334,8 +336,6 @@ static int __init cops_probe1(struct net_device *dev, int ioaddr) goto err_out; } - dev->base_addr = ioaddr; - lp = netdev_priv(dev); spin_lock_init(&lp->lock); From 48b491a5cc74333c4a6a82fe21cea42c055a3b0b Mon Sep 17 00:00:00 2001 From: George McCollister Date: Mon, 24 May 2021 13:50:54 -0500 Subject: [PATCH 562/730] net: hsr: fix mac_len checks Commit 2e9f60932a2c ("net: hsr: check skb can contain struct hsr_ethhdr in fill_frame_info") added the following which resulted in -EINVAL always being returned: if (skb->mac_len < sizeof(struct hsr_ethhdr)) return -EINVAL; mac_len was not being set correctly so this check completely broke HSR/PRP since it was always 14, not 20. Set mac_len correctly and modify the mac_len checks to test in the correct places since sometimes it is legitimately 14. Fixes: 2e9f60932a2c ("net: hsr: check skb can contain struct hsr_ethhdr in fill_frame_info") Signed-off-by: George McCollister Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 2 ++ net/hsr/hsr_forward.c | 30 +++++++++++++++++++++--------- net/hsr/hsr_forward.h | 8 ++++---- net/hsr/hsr_main.h | 4 ++-- net/hsr/hsr_slave.c | 11 +++++------ 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index bfcdc75fc01e..26c32407f029 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -218,6 +218,7 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) if (master) { skb->dev = master->dev; skb_reset_mac_header(skb); + skb_reset_mac_len(skb); hsr_forward_skb(skb, master); } else { atomic_long_inc(&dev->tx_dropped); @@ -259,6 +260,7 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master) goto out; skb_reset_mac_header(skb); + skb_reset_mac_len(skb); skb_reset_network_header(skb); skb_reset_transport_header(skb); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 6852e9bccf5b..ceb8afb2a62f 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -474,8 +474,8 @@ static void handle_std_frame(struct sk_buff *skb, } } -void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, - struct hsr_frame_info *frame) +int hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame) { struct hsr_port *port = frame->port_rcv; struct hsr_priv *hsr = port->hsr; @@ -483,20 +483,26 @@ void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, /* HSRv0 supervisory frames double as a tag so treat them as tagged. */ if ((!hsr->prot_version && proto == htons(ETH_P_PRP)) || proto == htons(ETH_P_HSR)) { + /* Check if skb contains hsr_ethhdr */ + if (skb->mac_len < sizeof(struct hsr_ethhdr)) + return -EINVAL; + /* HSR tagged frame :- Data or Supervision */ frame->skb_std = NULL; frame->skb_prp = NULL; frame->skb_hsr = skb; frame->sequence_nr = hsr_get_skb_sequence_nr(skb); - return; + return 0; } /* Standard frame or PRP from master port */ handle_std_frame(skb, frame); + + return 0; } -void prp_fill_frame_info(__be16 proto, struct sk_buff *skb, - struct hsr_frame_info *frame) +int prp_fill_frame_info(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame) { /* Supervision frame */ struct prp_rct *rct = skb_get_PRP_rct(skb); @@ -507,9 +513,11 @@ void prp_fill_frame_info(__be16 proto, struct sk_buff *skb, frame->skb_std = NULL; frame->skb_prp = skb; frame->sequence_nr = prp_get_skb_sequence_nr(rct); - return; + return 0; } handle_std_frame(skb, frame); + + return 0; } static int fill_frame_info(struct hsr_frame_info *frame, @@ -519,9 +527,10 @@ static int fill_frame_info(struct hsr_frame_info *frame, struct hsr_vlan_ethhdr *vlan_hdr; struct ethhdr *ethhdr; __be16 proto; + int ret; - /* Check if skb contains hsr_ethhdr */ - if (skb->mac_len < sizeof(struct hsr_ethhdr)) + /* Check if skb contains ethhdr */ + if (skb->mac_len < sizeof(struct ethhdr)) return -EINVAL; memset(frame, 0, sizeof(*frame)); @@ -548,7 +557,10 @@ static int fill_frame_info(struct hsr_frame_info *frame, frame->is_from_san = false; frame->port_rcv = port; - hsr->proto_ops->fill_frame_info(proto, skb, frame); + ret = hsr->proto_ops->fill_frame_info(proto, skb, frame); + if (ret) + return ret; + check_local_dest(port->hsr, skb, frame); return 0; diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h index b6acaafa83fc..206636750b30 100644 --- a/net/hsr/hsr_forward.h +++ b/net/hsr/hsr_forward.h @@ -24,8 +24,8 @@ struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame, struct hsr_port *port); bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port); bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port); -void prp_fill_frame_info(__be16 proto, struct sk_buff *skb, - struct hsr_frame_info *frame); -void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, - struct hsr_frame_info *frame); +int prp_fill_frame_info(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame); +int hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame); #endif /* __HSR_FORWARD_H */ diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 8f264672b70b..53d1f7a82463 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -186,8 +186,8 @@ struct hsr_proto_ops { struct hsr_port *port); struct sk_buff * (*create_tagged_frame)(struct hsr_frame_info *frame, struct hsr_port *port); - void (*fill_frame_info)(__be16 proto, struct sk_buff *skb, - struct hsr_frame_info *frame); + int (*fill_frame_info)(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame); bool (*invalid_dan_ingress_frame)(__be16 protocol); void (*update_san_info)(struct hsr_node *node, bool is_sup); }; diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index c5227d42faf5..b70e6bbf6021 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -60,12 +60,11 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) goto finish_pass; skb_push(skb, ETH_HLEN); - - if (skb_mac_header(skb) != skb->data) { - WARN_ONCE(1, "%s:%d: Malformed frame at source port %s)\n", - __func__, __LINE__, port->dev->name); - goto finish_consume; - } + skb_reset_mac_header(skb); + if ((!hsr->prot_version && protocol == htons(ETH_P_PRP)) || + protocol == htons(ETH_P_HSR)) + skb_set_network_header(skb, ETH_HLEN + HSR_HLEN); + skb_reset_mac_len(skb); hsr_forward_skb(skb, port); From 46a8b29c6306d8bbfd92b614ef65a47c900d8e70 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Mon, 24 May 2021 23:02:08 +0300 Subject: [PATCH 563/730] net: usb: fix memory leak in smsc75xx_bind Syzbot reported memory leak in smsc75xx_bind(). The problem was is non-freed memory in case of errors after memory allocation. backtrace: [] kmalloc include/linux/slab.h:556 [inline] [] kzalloc include/linux/slab.h:686 [inline] [] smsc75xx_bind+0x7a/0x334 drivers/net/usb/smsc75xx.c:1460 [] usbnet_probe+0x3b6/0xc30 drivers/net/usb/usbnet.c:1728 Fixes: d0cad871703b ("smsc75xx: SMSC LAN75xx USB gigabit ethernet adapter driver") Cc: stable@kernel.vger.org Reported-and-tested-by: syzbot+b558506ba8165425fee2@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- drivers/net/usb/smsc75xx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index f8cdabb9ef5a..b286993da67c 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -1483,7 +1483,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf) ret = smsc75xx_wait_ready(dev, 0); if (ret < 0) { netdev_warn(dev->net, "device not ready in smsc75xx_bind\n"); - return ret; + goto err; } smsc75xx_init_mac_address(dev); @@ -1492,7 +1492,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf) ret = smsc75xx_reset(dev); if (ret < 0) { netdev_warn(dev->net, "smsc75xx_reset error %d\n", ret); - return ret; + goto err; } dev->net->netdev_ops = &smsc75xx_netdev_ops; @@ -1502,6 +1502,10 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf) dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; dev->net->max_mtu = MAX_SINGLE_PACKET_SIZE; return 0; + +err: + kfree(pdata); + return ret; } static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf) From 8c42a49738f16af0061f9ae5c2f5a955f268d9e3 Mon Sep 17 00:00:00 2001 From: George McCollister Date: Mon, 24 May 2021 15:29:53 -0500 Subject: [PATCH 564/730] net: dsa: microchip: enable phy errata workaround on 9567 Also enable phy errata workaround on 9567 since has the same errata as the 9477 according to the manufacture's documentation. Signed-off-by: George McCollister Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz9477.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 55e5d479acce..854e25f43fa7 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1530,6 +1530,7 @@ static const struct ksz_chip_data ksz9477_switch_chips[] = { .num_statics = 16, .cpu_ports = 0x7F, /* can be configured as cpu port */ .port_cnt = 7, /* total physical port count */ + .phy_errata_9477 = true, }, }; From 1cb61759d40716643281b8e0f8c7afebc8699249 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 21 May 2021 09:26:10 +0200 Subject: [PATCH 565/730] init: verify that function is initcall_t at compile-time In the spirit of making it hard to misuse an interface, add a compile-time assertion in the CONFIG_HAVE_ARCH_PREL32_RELOCATIONS case to verify the initcall function matches initcall_t, because the inline asm bypasses any type-checking the compiler would otherwise do. This will help developers catch incorrect API use in all configurations. A recent example of this is: https://lkml.kernel.org/r/20210514140015.2944744-1-arnd@kernel.org Signed-off-by: Marco Elver Reviewed-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Reviewed-by: Sami Tolvanen Tested-by: Paul E. McKenney Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210521072610.2880286-1-elver@google.com --- include/linux/init.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/init.h b/include/linux/init.h index 045ad1650ed1..d82b4b2e1d25 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -242,7 +242,8 @@ extern bool initcall_debug; asm(".section \"" __sec "\", \"a\" \n" \ __stringify(__name) ": \n" \ ".long " __stringify(__stub) " - . \n" \ - ".previous \n"); + ".previous \n"); \ + static_assert(__same_type(initcall_t, &fn)); #else #define ____define_initcall(fn, __unused, __name, __sec) \ static initcall_t __name __used \ From 24845dcb170e16b3100bd49743687648c71387ae Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 11 Mar 2021 17:09:41 -0800 Subject: [PATCH 566/730] Makefile: LTO: have linker check -Wframe-larger-than -Wframe-larger-than= requires stack frame information, which the frontend cannot provide. This diagnostic is emitted late during compilation once stack frame size is available. When building with LTO, the frontend simply lowers C to LLVM IR and does not have stack frame information, so it cannot emit this diagnostic. When the linker drives LTO, it restarts optimizations and lowers LLVM IR to object code. At that point, it has stack frame information but doesn't know to check for a specific max stack frame size. I consider this a bug in LLVM that we need to fix. There are some details we're working out related to LTO such as which value to use when there are multiple different values specified per TU, or how to propagate these to compiler synthesized routines properly, if at all. Until it's fixed, ensure we don't miss these. At that point we can wrap this in a compiler version guard or revert this based on the minimum support version of Clang. The error message is not generated during link: LTO vmlinux.o ld.lld: warning: stack size limit exceeded (8224) in foobarbaz Cc: Sami Tolvanen Reported-by: Candle Sun Suggested-by: Fangrui Song Signed-off-by: Nick Desaulniers Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210312010942.1546679-1-ndesaulniers@google.com --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 0ed7e061c8e9..90d1e1d7b927 100644 --- a/Makefile +++ b/Makefile @@ -928,6 +928,11 @@ CC_FLAGS_LTO += -fvisibility=hidden # Limit inlining across translation units to reduce binary size KBUILD_LDFLAGS += -mllvm -import-instr-limit=5 + +# Check for frame size exceeding threshold during prolog/epilog insertion. +ifneq ($(CONFIG_FRAME_WARN),0) +KBUILD_LDFLAGS += -plugin-opt=-warn-stack-size=$(CONFIG_FRAME_WARN) +endif endif ifdef CONFIG_LTO From 0f9342513cc78a31a4a272a19b35eee4e8cd7107 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 May 2021 17:15:49 -0700 Subject: [PATCH 567/730] xfs: check free AG space when making per-AG reservations The new online shrink code exposed a gap in the per-AG reservation code, which is that we only return ENOSPC to callers if the entire fs doesn't have enough free blocks. Except for debugging mode, the reservation init code doesn't ever check that there's enough free space in that AG to cover the reservation. Not having enough space is not considered an immediate fatal error that requires filesystem offlining because (a) it's shouldn't be possible to wind up in that state through normal file operations and (b) even if one did, freeing data blocks would recover the situation. However, online shrink now needs to know if shrinking would not leave enough space so that it can abort the shrink operation. Hence we need to promote this assertion into an actual error return. Observed by running xfs/168 with a 1k block size, though in theory this could happen with any configuration. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Carlos Maiolino Reviewed-by: Gao Xiang --- fs/xfs/libxfs/xfs_ag_resv.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index e32a1833d523..bbfea8022a3b 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -325,10 +325,22 @@ out: error2 = xfs_alloc_pagf_init(mp, tp, pag->pag_agno, 0); if (error2) return error2; - ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + - xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <= - pag->pagf_freeblks + pag->pagf_flcount); + + /* + * If there isn't enough space in the AG to satisfy the + * reservation, let the caller know that there wasn't enough + * space. Callers are responsible for deciding what to do + * next, since (in theory) we can stumble along with + * insufficient reservation if data blocks are being freed to + * replenish the AG's free space. + */ + if (!error && + xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + + xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved > + pag->pagf_freeblks + pag->pagf_flcount) + error = -ENOSPC; } + return error; } From 6b69e485894b355b333bd286f0f0958e41d8754a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 12 May 2021 12:49:19 -0700 Subject: [PATCH 568/730] xfs: standardize extent size hint validation While chasing a bug involving invalid extent size hints being propagated into newly created realtime files, I noticed that the xfs_ioctl_setattr checks for the extent size hints weren't the same as the ones now encoded in libxfs and used for validation in repair and mkfs. Because the checks in libxfs are more stringent than the ones in the ioctl, it's possible for a live system to set inode flags that immediately result in corruption warnings. Specifically, it's possible to set an extent size hint on an rtinherit directory without checking if the hint is aligned to the realtime extent size, which makes no sense since that combination is used only to seed new realtime files. Replace the open-coded and inadequate checks with the libxfs verifier versions and update the code comments a bit. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_inode_buf.c | 24 ++++++++-- fs/xfs/xfs_ioctl.c | 90 ++++++++--------------------------- 2 files changed, 41 insertions(+), 73 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 5c9a7440d9e4..045118c7bf78 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -559,8 +559,17 @@ xfs_dinode_calc_crc( /* * Validate di_extsize hint. * - * The rules are documented at xfs_ioctl_setattr_check_extsize(). - * These functions must be kept in sync with each other. + * 1. Extent size hint is only valid for directories and regular files. + * 2. FS_XFLAG_EXTSIZE is only valid for regular files. + * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. + * 4. Hint cannot be larger than MAXTEXTLEN. + * 5. Can be changed on directories at any time. + * 6. Hint value of 0 turns off hints, clears inode flags. + * 7. Extent size must be a multiple of the appropriate block size. + * For realtime files, this is the rt extent size. + * 8. For non-realtime files, the extent size hint must be limited + * to half the AG size to avoid alignment extending the extent beyond the + * limits of the AG. */ xfs_failaddr_t xfs_inode_validate_extsize( @@ -616,8 +625,15 @@ xfs_inode_validate_extsize( /* * Validate di_cowextsize hint. * - * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). - * These functions must be kept in sync with each other. + * 1. CoW extent size hint can only be set if reflink is enabled on the fs. + * The inode does not have to have any shared blocks, but it must be a v3. + * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; + * for a directory, the hint is propagated to new files. + * 3. Can be changed on files & directories at any time. + * 4. Hint value of 0 turns off hints, clears inode flags. + * 5. Extent size must be a multiple of the appropriate block size. + * 6. The extent size hint must be limited to half the AG size to avoid + * alignment extending the extent beyond the limits of the AG. */ xfs_failaddr_t xfs_inode_validate_cowextsize( diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 3925bfcb2365..6407921aca96 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1267,20 +1267,8 @@ out_error: } /* - * extent size hint validation is somewhat cumbersome. Rules are: - * - * 1. extent size hint is only valid for directories and regular files - * 2. FS_XFLAG_EXTSIZE is only valid for regular files - * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. - * 4. can only be changed on regular files if no extents are allocated - * 5. can be changed on directories at any time - * 6. extsize hint of 0 turns off hints, clears inode flags. - * 7. Extent size must be a multiple of the appropriate block size. - * 8. for non-realtime files, the extent size hint must be limited - * to half the AG size to avoid alignment extending the extent beyond the - * limits of the AG. - * - * Please keep this function in sync with xfs_scrub_inode_extsize. + * Validate a proposed extent size hint. For regular files, the hint can only + * be changed if no extents are allocated. */ static int xfs_ioctl_setattr_check_extsize( @@ -1288,86 +1276,50 @@ xfs_ioctl_setattr_check_extsize( struct fileattr *fa) { struct xfs_mount *mp = ip->i_mount; - xfs_extlen_t size; - xfs_fsblock_t extsize_fsb; + xfs_failaddr_t failaddr; + uint16_t new_diflags; if (!fa->fsx_valid) return 0; if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents && - ((ip->i_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) + XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize) return -EINVAL; - if (fa->fsx_extsize == 0) - return 0; - - extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); - if (extsize_fsb > MAXEXTLEN) + if (fa->fsx_extsize & mp->m_blockmask) return -EINVAL; - if (XFS_IS_REALTIME_INODE(ip) || - (fa->fsx_xflags & FS_XFLAG_REALTIME)) { - size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; - } else { - size = mp->m_sb.sb_blocksize; - if (extsize_fsb > mp->m_sb.sb_agblocks / 2) - return -EINVAL; - } + new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); - if (fa->fsx_extsize % size) - return -EINVAL; - - return 0; + failaddr = xfs_inode_validate_extsize(ip->i_mount, + XFS_B_TO_FSB(mp, fa->fsx_extsize), + VFS_I(ip)->i_mode, new_diflags); + return failaddr != NULL ? -EINVAL : 0; } -/* - * CoW extent size hint validation rules are: - * - * 1. CoW extent size hint can only be set if reflink is enabled on the fs. - * The inode does not have to have any shared blocks, but it must be a v3. - * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; - * for a directory, the hint is propagated to new files. - * 3. Can be changed on files & directories at any time. - * 4. CoW extsize hint of 0 turns off hints, clears inode flags. - * 5. Extent size must be a multiple of the appropriate block size. - * 6. The extent size hint must be limited to half the AG size to avoid - * alignment extending the extent beyond the limits of the AG. - * - * Please keep this function in sync with xfs_scrub_inode_cowextsize. - */ static int xfs_ioctl_setattr_check_cowextsize( struct xfs_inode *ip, struct fileattr *fa) { struct xfs_mount *mp = ip->i_mount; - xfs_extlen_t size; - xfs_fsblock_t cowextsize_fsb; + xfs_failaddr_t failaddr; + uint64_t new_diflags2; + uint16_t new_diflags; if (!fa->fsx_valid) return 0; - if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE)) - return 0; - - if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb)) + if (fa->fsx_cowextsize & mp->m_blockmask) return -EINVAL; - if (fa->fsx_cowextsize == 0) - return 0; + new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); + new_diflags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); - cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); - if (cowextsize_fsb > MAXEXTLEN) - return -EINVAL; - - size = mp->m_sb.sb_blocksize; - if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2) - return -EINVAL; - - if (fa->fsx_cowextsize % size) - return -EINVAL; - - return 0; + failaddr = xfs_inode_validate_cowextsize(ip->i_mount, + XFS_B_TO_FSB(mp, fa->fsx_cowextsize), + VFS_I(ip)->i_mode, new_diflags, new_diflags2); + return failaddr != NULL ? -EINVAL : 0; } static int From 603f000b15f21ce8932f76689c7aa9fe58261cf5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 12 May 2021 12:51:26 -0700 Subject: [PATCH 569/730] xfs: validate extsz hints against rt extent size when rtinherit is set The RTINHERIT bit can be set on a directory so that newly created regular files will have the REALTIME bit set to store their data on the realtime volume. If an extent size hint (and EXTSZINHERIT) are set on the directory, the hint will also be copied into the new file. As pointed out in previous patches, for realtime files we require the extent size hint be an integer multiple of the realtime extent, but we don't perform the same validation on a directory with both RTINHERIT and EXTSZINHERIT set, even though the only use-case of that combination is to propagate extent size hints into new realtime files. This leads to inode corruption errors when the bad values are propagated. Because there may be existing filesystems with such a configuration, we cannot simply amend the inode verifier to trip on these directories and call it a day because that will cause previously "working" filesystems to start throwing errors abruptly. Note that it's valid to have directories with rtinherit set even if there is no realtime volume, in which case the problem does not manifest because rtinherit is ignored if there's no realtime device; and it's possible that someone set the flag, crashed, repaired the filesystem (which clears the hint on the realtime file) and continued. Therefore, mitigate this issue in several ways: First, if we try to write out an inode with both rtinherit/extszinherit set and an unaligned extent size hint, turn off the hint to correct the error. Second, if someone tries to misconfigure a directory via the fssetxattr ioctl, fail the ioctl. Third, reverify both extent size hint values when we propagate heritable inode attributes from parent to child, to prevent misconfigurations from spreading. Signed-off-by: Darrick J. Wong Reviewed-by: Carlos Maiolino Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_inode_buf.c | 22 ++++++++++++++++++++++ fs/xfs/libxfs/xfs_trans_inode.c | 17 +++++++++++++++++ fs/xfs/xfs_inode.c | 29 +++++++++++++++++++++++++++++ fs/xfs/xfs_ioctl.c | 15 +++++++++++++++ fs/xfs/xfs_message.h | 2 ++ 5 files changed, 85 insertions(+) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 045118c7bf78..f3254a4f4cb4 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -589,6 +589,28 @@ xfs_inode_validate_extsize( inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); extsize_bytes = XFS_FSB_TO_B(mp, extsize); + /* + * This comment describes a historic gap in this verifier function. + * + * On older kernels, the extent size hint verifier doesn't check that + * the extent size hint is an integer multiple of the realtime extent + * size on a directory with both RTINHERIT and EXTSZINHERIT flags set. + * The verifier has always enforced the alignment rule for regular + * files with the REALTIME flag set. + * + * If a directory with a misaligned extent size hint is allowed to + * propagate that hint into a new regular realtime file, the result + * is that the inode cluster buffer verifier will trigger a corruption + * shutdown the next time it is run. + * + * Unfortunately, there could be filesystems with these misconfigured + * directories in the wild, so we cannot add a check to this verifier + * at this time because that will result a new source of directory + * corruption errors when reading an existing filesystem. Instead, we + * permit the misconfiguration to pass through the verifiers so that + * callers of this function can correct and mitigate externally. + */ + if (rt_flag) blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; else diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index 78324e043e25..8d595a5c4abd 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -142,6 +142,23 @@ xfs_trans_log_inode( flags |= XFS_ILOG_CORE; } + /* + * Inode verifiers on older kernels don't check that the extent size + * hint is an integer multiple of the rt extent size on a directory + * with both rtinherit and extszinherit flags set. If we're logging a + * directory that is misconfigured in this way, clear the hint. + */ + if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && + (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && + (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) { + xfs_info_once(ip->i_mount, + "Correcting misaligned extent size hint in inode 0x%llx.", ip->i_ino); + ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | + XFS_DIFLAG_EXTSZINHERIT); + ip->i_extsize = 0; + flags |= XFS_ILOG_CORE; + } + /* * Record the specific change for fdatasync optimisation. This allows * fdatasync to skip log forces for inodes that are only timestamp diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 0369eb22c1bb..e4c2da4566f1 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -690,6 +690,7 @@ xfs_inode_inherit_flags( const struct xfs_inode *pip) { unsigned int di_flags = 0; + xfs_failaddr_t failaddr; umode_t mode = VFS_I(ip)->i_mode; if (S_ISDIR(mode)) { @@ -729,6 +730,24 @@ xfs_inode_inherit_flags( di_flags |= XFS_DIFLAG_FILESTREAM; ip->i_diflags |= di_flags; + + /* + * Inode verifiers on older kernels only check that the extent size + * hint is an integer multiple of the rt extent size on realtime files. + * They did not check the hint alignment on a directory with both + * rtinherit and extszinherit flags set. If the misaligned hint is + * propagated from a directory into a new realtime file, new file + * allocations will fail due to math errors in the rt allocator and/or + * trip the verifiers. Validate the hint settings in the new file so + * that we don't let broken hints propagate. + */ + failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize, + VFS_I(ip)->i_mode, ip->i_diflags); + if (failaddr) { + ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | + XFS_DIFLAG_EXTSZINHERIT); + ip->i_extsize = 0; + } } /* Propagate di_flags2 from a parent inode to a child inode. */ @@ -737,12 +756,22 @@ xfs_inode_inherit_flags2( struct xfs_inode *ip, const struct xfs_inode *pip) { + xfs_failaddr_t failaddr; + if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) { ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE; ip->i_cowextsize = pip->i_cowextsize; } if (pip->i_diflags2 & XFS_DIFLAG2_DAX) ip->i_diflags2 |= XFS_DIFLAG2_DAX; + + /* Don't let invalid cowextsize hints propagate. */ + failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, + VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2); + if (failaddr) { + ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; + ip->i_cowextsize = 0; + } } /* diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 6407921aca96..1fe4c1fc0aea 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1291,6 +1291,21 @@ xfs_ioctl_setattr_check_extsize( new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); + /* + * Inode verifiers on older kernels don't check that the extent size + * hint is an integer multiple of the rt extent size on a directory + * with both rtinherit and extszinherit flags set. Don't let sysadmins + * misconfigure directories. + */ + if ((new_diflags & XFS_DIFLAG_RTINHERIT) && + (new_diflags & XFS_DIFLAG_EXTSZINHERIT)) { + unsigned int rtextsize_bytes; + + rtextsize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); + if (fa->fsx_extsize % rtextsize_bytes) + return -EINVAL; + } + failaddr = xfs_inode_validate_extsize(ip->i_mount, XFS_B_TO_FSB(mp, fa->fsx_extsize), VFS_I(ip)->i_mode, new_diflags); diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h index 3c392b1512ac..7ec1a9207517 100644 --- a/fs/xfs/xfs_message.h +++ b/fs/xfs/xfs_message.h @@ -73,6 +73,8 @@ do { \ xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__) #define xfs_notice_once(dev, fmt, ...) \ xfs_printk_once(xfs_notice, dev, fmt, ##__VA_ARGS__) +#define xfs_info_once(dev, fmt, ...) \ + xfs_printk_once(xfs_info, dev, fmt, ##__VA_ARGS__) void assfail(struct xfs_mount *mp, char *expr, char *f, int l); void asswarn(struct xfs_mount *mp, char *expr, char *f, int l); From 3596a06583a16cf7f76d836440dfba5714c9c710 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 21 May 2021 15:32:39 +0800 Subject: [PATCH 570/730] nvme: fix potential memory leaks in nvme_cdev_add MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to call put_device if cdev_device_add failed, otherwise kmemleak has below report. [<0000000024c71758>] kmem_cache_alloc_trace+0x233/0x480 [<00000000ad2813ed>] device_add+0x7ff/0xe10 [<0000000035bc54c4>] cdev_device_add+0x72/0xa0 [<000000006c9aa1e8>] nvme_cdev_add+0xa9/0xf0 [nvme_core] [<000000003c4d492d>] nvme_mpath_set_live+0x251/0x290 [nvme_core] [<00000000889a58da>] nvme_mpath_add_disk+0x268/0x320 [nvme_core] [<00000000192e7161>] nvme_alloc_ns+0x669/0xac0 [nvme_core] [<000000007a1a6041>] nvme_validate_or_alloc_ns+0x156/0x280 [nvme_core] [<000000003a763c35>] nvme_scan_work+0x221/0x3c0 [nvme_core] [<000000009ff10706>] process_one_work+0x5cf/0xb10 [<000000000644ee25>] worker_thread+0x7a/0x680 [<00000000285ebd2f>] kthread+0x1c6/0x210 [<00000000e297c6ea>] ret_from_fork+0x22/0x30 Fixes: 2637baed7801 ("nvme: introduce generic per-namespace chardev") Signed-off-by: Guoqing Jiang Reviewed-by: Javier González Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 762125f2905f..66973bb56305 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3485,8 +3485,10 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, cdev_init(cdev, fops); cdev->owner = owner; ret = cdev_device_add(cdev, cdev_device); - if (ret) + if (ret) { + put_device(cdev_device); ida_simple_remove(&nvme_ns_chr_minor_ida, minor); + } return ret; } From f25f8ef70ce2e85bae1a266dd5de714aefda81d2 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 21 May 2021 10:23:00 +0200 Subject: [PATCH 571/730] nvme-fc: short-circuit reconnect retries Returning an nvme status from nvme_fc_create_association() indicates that the association is established, and we should honour the DNR bit. If it's set a reconnect attempt will just return the same error, so we can short-circuit the reconnect attempts and fail the connection directly. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Himanshu Madhani Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 256e87721a01..f183f9fa03d0 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3107,6 +3107,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) if (ctrl->ctrl.icdoff) { dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", ctrl->ctrl.icdoff); + ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR; goto out_disconnect_admin_queue; } @@ -3114,6 +3115,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) if (!(ctrl->ctrl.sgls & ((1 << 0) | (1 << 1)))) { dev_err(ctrl->ctrl.device, "Mandatory sgls are not supported!\n"); + ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR; goto out_disconnect_admin_queue; } @@ -3280,11 +3282,13 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) return; - if (portptr->port_state == FC_OBJSTATE_ONLINE) + if (portptr->port_state == FC_OBJSTATE_ONLINE) { dev_info(ctrl->ctrl.device, "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", ctrl->cnum, status); - else if (time_after_eq(jiffies, rport->dev_loss_end)) + if (status > 0 && (status & NVME_SC_DNR)) + recon = false; + } else if (time_after_eq(jiffies, rport->dev_loss_end)) recon = false; if (recon && nvmf_should_reconnect(&ctrl->ctrl)) { @@ -3298,12 +3302,17 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay); } else { - if (portptr->port_state == FC_OBJSTATE_ONLINE) - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Max reconnect attempts (%d) " - "reached.\n", - ctrl->cnum, ctrl->ctrl.nr_reconnects); - else + if (portptr->port_state == FC_OBJSTATE_ONLINE) { + if (status > 0 && (status & NVME_SC_DNR)) + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: reconnect failure\n", + ctrl->cnum); + else + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Max reconnect attempts " + "(%d) reached.\n", + ctrl->cnum, ctrl->ctrl.nr_reconnects); + } else dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: dev_loss_tmo (%d) expired " "while waiting for remoteport connectivity.\n", From 4d9442bf263ac45d495bb7ecf75009e59c0622b2 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 21 May 2021 10:23:46 +0200 Subject: [PATCH 572/730] nvme-fabrics: decode host pathing error for connect Add an additional decoding for 'host pathing error' during connect. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index a2bb7fc63a73..34a84d2086c7 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -336,6 +336,11 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl, cmd->connect.recfmt); break; + case NVME_SC_HOST_PATH_ERROR: + dev_err(ctrl->device, + "Connect command failed: host path error\n"); + break; + default: dev_err(ctrl->device, "Connect command failed, error wo/DNR bit: %d\n", From 7cfc4ea78fc103ea51ecbacd9236abb5b1c490d2 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 30 Apr 2021 10:27:44 +0200 Subject: [PATCH 573/730] drm/meson: fix shutdown crash when component not probed When main component is not probed, by example when the dw-hdmi module is not loaded yet or in probe defer, the following crash appears on shutdown: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000038 ... pc : meson_drv_shutdown+0x24/0x50 lr : platform_drv_shutdown+0x20/0x30 ... Call trace: meson_drv_shutdown+0x24/0x50 platform_drv_shutdown+0x20/0x30 device_shutdown+0x158/0x360 kernel_restart_prepare+0x38/0x48 kernel_restart+0x18/0x68 __do_sys_reboot+0x224/0x250 __arm64_sys_reboot+0x24/0x30 ... Simply check if the priv struct has been allocated before using it. Fixes: fa0c16caf3d7 ("drm: meson_drv add shutdown function") Reported-by: Stefan Agner Signed-off-by: Neil Armstrong Tested-by: Martin Blumenstingl Reviewed-by: Martin Blumenstingl Link: https://patchwork.freedesktop.org/patch/msgid/20210430082744.3638743-1-narmstrong@baylibre.com --- drivers/gpu/drm/meson/meson_drv.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 453d8b4c5763..07fcd12dca16 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -485,11 +485,12 @@ static int meson_probe_remote(struct platform_device *pdev, static void meson_drv_shutdown(struct platform_device *pdev) { struct meson_drm *priv = dev_get_drvdata(&pdev->dev); - struct drm_device *drm = priv->drm; - DRM_DEBUG_DRIVER("\n"); - drm_kms_helper_poll_fini(drm); - drm_atomic_helper_shutdown(drm); + if (!priv) + return; + + drm_kms_helper_poll_fini(priv->drm); + drm_atomic_helper_shutdown(priv->drm); } static int meson_drv_probe(struct platform_device *pdev) From a80c203c3f1c06d2201c19ae071d0ae770a2b1ca Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 25 May 2021 10:40:59 +0300 Subject: [PATCH 574/730] xhci: fix giving back URB with incorrect status regression in 5.12 5.12 kernel changes how xhci handles cancelled URBs and halted endpoints. Among these changes cancelled and stalled URBs are no longer given back before they are cleared from xHC hardware cache. These changes unfortunately cleared the -EPIPE status of a stalled transfer in one case before giving bak the URB, causing a USB card reader to fail from working. Fixes: 674f8438c121 ("xhci: split handling halted endpoints into two steps") Cc: # 5.12 Reported-by: Peter Ganzhorn Tested-by: Peter Ganzhorn Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210525074100.1154090-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index a8e4189277da..256d336354a0 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -828,14 +828,10 @@ static void xhci_giveback_invalidated_tds(struct xhci_virt_ep *ep) list_for_each_entry_safe(td, tmp_td, &ep->cancelled_td_list, cancelled_td_list) { - /* - * Doesn't matter what we pass for status, since the core will - * just overwrite it (because the URB has been unlinked). - */ ring = xhci_urb_to_transfer_ring(ep->xhci, td->urb); if (td->cancel_status == TD_CLEARED) - xhci_td_cleanup(ep->xhci, td, ring, 0); + xhci_td_cleanup(ep->xhci, td, ring, td->status); if (ep->xhci->xhc_state & XHCI_STATE_DYING) return; From a7f2e9272aff1ccfe0fc801dab1d5a7a1c6b7ed2 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 25 May 2021 10:41:00 +0300 Subject: [PATCH 575/730] xhci: Fix 5.12 regression of missing xHC cache clearing command after a Stall If endpoints halts due to a stall then the dequeue pointer read from hardware may already be set ahead of the stalled TRB. After commit 674f8438c121 ("xhci: split handling halted endpoints into two steps") in 5.12 xhci driver won't issue a Set TR Dequeue if hardware dequeue pointer is already in the right place. Turns out the "Set TR Dequeue pointer" command is anyway needed as it in addition to moving the dequeue pointer also clears endpoint state and cache. Fixes: 674f8438c121 ("xhci: split handling halted endpoints into two steps") Cc: # 5.12 Reported-by: Peter Ganzhorn Tested-by: Peter Ganzhorn Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210525074100.1154090-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 256d336354a0..6acd2329e08d 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -933,14 +933,18 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep) continue; } /* - * If ring stopped on the TD we need to cancel, then we have to + * If a ring stopped on the TD we need to cancel then we have to * move the xHC endpoint ring dequeue pointer past this TD. + * Rings halted due to STALL may show hw_deq is past the stalled + * TD, but still require a set TR Deq command to flush xHC cache. */ hw_deq = xhci_get_hw_deq(xhci, ep->vdev, ep->ep_index, td->urb->stream_id); hw_deq &= ~0xf; - if (trb_in_td(xhci, td->start_seg, td->first_trb, + if (td->cancel_status == TD_HALTED) { + cached_td = td; + } else if (trb_in_td(xhci, td->start_seg, td->first_trb, td->last_trb, hw_deq, false)) { switch (td->cancel_status) { case TD_CLEARED: /* TD is already no-op */ From e11851429fdc23524aa244f76508c3c7aeaefdf6 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 13 May 2021 00:28:09 +0300 Subject: [PATCH 576/730] drm/i915: Reenable LTTPR non-transparent LT mode for DPCD_REV<1.4 The driver currently disables the LTTPR non-transparent link training mode for sinks with a DPCD_REV<1.4, based on the following description of the LTTPR DPCD register range in DP standard 2.0 (at the 0xF0000 register description): "" LTTPR-related registers at DPCD Addresses F0000h through F02FFh are valid only for DPCD r1.4 (or higher). """ The transparent link training mode should still work fine, however the implementation for this in some retimer FWs seems to be broken, see the References: link below. After discussions with DP standard authors the above "DPCD r1.4" does not refer to the DPCD revision (stored in the DPCD_REV reg at 0x00000), rather to the "LTTPR field data structure revision" stored in the 0xF0000 reg. An update request has been filed at vesa.org (see wg/Link/documentComment/3746) for the upcoming v2.1 specification to clarify the above description along the following lines: """ LTTPR-related registers at DPCD Addresses F0000h through F02FFh are valid only for LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV 1.4 (or higher) """ Based on my tests Windows uses the non-transparent link training mode for DPCD_REV==1.2 sinks as well (so presumably for all DPCD_REVs), and forcing it to use transparent mode on ICL/TGL platforms leads to the same LT failure as reported at the References: link. Based on the above let's assume that the transparent link training mode is not well tested/supported and align the code to the correct interpretation of what the r1.4 version refers to. Reported-and-tested-by: Casey Harkins Tested-by: Khaled Almahallawy References: https://gitlab.freedesktop.org/drm/intel/-/issues/3415 Fixes: 264613b406eb ("drm/i915: Disable LTTPR support when the DPCD rev < 1.4") Cc: # v5.11+ Signed-off-by: Imre Deak Reviewed-by: Khaled Almahallawy Link: https://patchwork.freedesktop.org/patch/msgid/20210512212809.1234701-1-imre.deak@intel.com (cherry picked from commit cb4920cc40f630b5a247f4ed7d3dea66749df588) Signed-off-by: Jani Nikula --- .../drm/i915/display/intel_dp_link_training.c | 71 +++++++++---------- 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 02a003fd48fb..50cae0198a3d 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -128,50 +128,14 @@ intel_dp_set_lttpr_transparent_mode(struct intel_dp *intel_dp, bool enable) return drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) == 1; } -/** - * intel_dp_init_lttpr_and_dprx_caps - detect LTTPR and DPRX caps, init the LTTPR link training mode - * @intel_dp: Intel DP struct - * - * Read the LTTPR common and DPRX capabilities and switch to non-transparent - * link training mode if any is detected and read the PHY capabilities for all - * detected LTTPRs. In case of an LTTPR detection error or if the number of - * LTTPRs is more than is supported (8), fall back to the no-LTTPR, - * transparent mode link training mode. - * - * Returns: - * >0 if LTTPRs were detected and the non-transparent LT mode was set. The - * DPRX capabilities are read out. - * 0 if no LTTPRs or more than 8 LTTPRs were detected or in case of a - * detection failure and the transparent LT mode was set. The DPRX - * capabilities are read out. - * <0 Reading out the DPRX capabilities failed. - */ -int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp) +static int intel_dp_init_lttpr(struct intel_dp *intel_dp) { int lttpr_count; - bool ret; int i; - ret = intel_dp_read_lttpr_common_caps(intel_dp); - - /* The DPTX shall read the DPRX caps after LTTPR detection. */ - if (drm_dp_read_dpcd_caps(&intel_dp->aux, intel_dp->dpcd)) { - intel_dp_reset_lttpr_common_caps(intel_dp); - return -EIO; - } - - if (!ret) + if (!intel_dp_read_lttpr_common_caps(intel_dp)) return 0; - /* - * The 0xF0000-0xF02FF range is only valid if the DPCD revision is - * at least 1.4. - */ - if (intel_dp->dpcd[DP_DPCD_REV] < 0x14) { - intel_dp_reset_lttpr_common_caps(intel_dp); - return 0; - } - lttpr_count = drm_dp_lttpr_count(intel_dp->lttpr_common_caps); /* * Prevent setting LTTPR transparent mode explicitly if no LTTPRs are @@ -211,6 +175,37 @@ int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp) return lttpr_count; } + +/** + * intel_dp_init_lttpr_and_dprx_caps - detect LTTPR and DPRX caps, init the LTTPR link training mode + * @intel_dp: Intel DP struct + * + * Read the LTTPR common and DPRX capabilities and switch to non-transparent + * link training mode if any is detected and read the PHY capabilities for all + * detected LTTPRs. In case of an LTTPR detection error or if the number of + * LTTPRs is more than is supported (8), fall back to the no-LTTPR, + * transparent mode link training mode. + * + * Returns: + * >0 if LTTPRs were detected and the non-transparent LT mode was set. The + * DPRX capabilities are read out. + * 0 if no LTTPRs or more than 8 LTTPRs were detected or in case of a + * detection failure and the transparent LT mode was set. The DPRX + * capabilities are read out. + * <0 Reading out the DPRX capabilities failed. + */ +int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp) +{ + int lttpr_count = intel_dp_init_lttpr(intel_dp); + + /* The DPTX shall read the DPRX caps after LTTPR detection. */ + if (drm_dp_read_dpcd_caps(&intel_dp->aux, intel_dp->dpcd)) { + intel_dp_reset_lttpr_common_caps(intel_dp); + return -EIO; + } + + return lttpr_count; +} EXPORT_SYMBOL(intel_dp_init_lttpr_and_dprx_caps); static u8 dp_voltage_max(u8 preemph) From 19dee613816d5065ad09f2ccc20b35d23dca9f28 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 May 2021 11:03:32 +0100 Subject: [PATCH 577/730] netfs: Pass flags through to grab_cache_page_write_begin() In netfs_write_begin(), pass the AOP flags through to grab_cache_page_write_begin() so that a request to use GFP_NOFS is honoured. Fixes: e1b1240c1ff5 ("netfs: Add write_begin helper") Reported-by: Matthew Wilcox (Oracle) Signed-off-by: David Howells Reviewed-by: Jeff Layton Reviewed-by: Matthew Wilcox (Oracle) cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/162090295383.3165945.13595101698295243662.stgit@warthog.procyon.org.uk # v1 --- fs/netfs/read_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c index 193841d03de0..725614625ed4 100644 --- a/fs/netfs/read_helper.c +++ b/fs/netfs/read_helper.c @@ -1068,7 +1068,7 @@ int netfs_write_begin(struct file *file, struct address_space *mapping, DEFINE_READAHEAD(ractl, file, NULL, mapping, index); retry: - page = grab_cache_page_write_begin(mapping, index, 0); + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; From b71c791254ff5e78a124c8949585dccd9e225e06 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 May 2021 11:40:27 +0100 Subject: [PATCH 578/730] netfs: Make CONFIG_NETFS_SUPPORT auto-selected rather than manual Make the netfs helper library selected automatically by the things that use it rather than being manually configured, even though it's required[1]. Fixes: 3a5829fefd3b ("netfs: Make a netfs helper module") Reported-by: Geert Uytterhoeven Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/CAMuHMdXJZ7iNQE964CdBOU=vRKVMFzo=YF_eiwsGgqzuvZ+TuA@mail.gmail.com [1] Link: https://lore.kernel.org/r/162090298141.3166007.2971118149366779916.stgit@warthog.procyon.org.uk # v1 --- fs/netfs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig index 578112713703..b4db21022cb4 100644 --- a/fs/netfs/Kconfig +++ b/fs/netfs/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config NETFS_SUPPORT - tristate "Support for network filesystem high-level I/O" + tristate help This option enables support for network filesystems, including helpers for high-level buffered I/O, abstracting out read From e69012400b0cb42b2070748322cb72f9effec00f Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 25 May 2021 10:45:51 +0800 Subject: [PATCH 579/730] arm64: mm: don't use CON and BLK mapping if KFENCE is enabled When we added KFENCE support for arm64, we intended that it would force the entire linear map to be mapped at page granularity, but we only enforced this in arch_add_memory() and not in map_mem(), so memory mapped at boot time can be mapped at a larger granularity. When booting a kernel with KFENCE=y and RODATA_FULL=n, this results in the following WARNING at boot: [ 0.000000] ------------[ cut here ]------------ [ 0.000000] WARNING: CPU: 0 PID: 0 at mm/memory.c:2462 apply_to_pmd_range+0xec/0x190 [ 0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.13.0-rc1+ #10 [ 0.000000] Hardware name: linux,dummy-virt (DT) [ 0.000000] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO BTYPE=--) [ 0.000000] pc : apply_to_pmd_range+0xec/0x190 [ 0.000000] lr : __apply_to_page_range+0x94/0x170 [ 0.000000] sp : ffffffc010573e20 [ 0.000000] x29: ffffffc010573e20 x28: ffffff801f400000 x27: ffffff801f401000 [ 0.000000] x26: 0000000000000001 x25: ffffff801f400fff x24: ffffffc010573f28 [ 0.000000] x23: ffffffc01002b710 x22: ffffffc0105fa450 x21: ffffffc010573ee4 [ 0.000000] x20: ffffff801fffb7d0 x19: ffffff801f401000 x18: 00000000fffffffe [ 0.000000] x17: 000000000000003f x16: 000000000000000a x15: ffffffc01060b940 [ 0.000000] x14: 0000000000000000 x13: 0098968000000000 x12: 0000000098968000 [ 0.000000] x11: 0000000000000000 x10: 0000000098968000 x9 : 0000000000000001 [ 0.000000] x8 : 0000000000000000 x7 : ffffffc010573ee4 x6 : 0000000000000001 [ 0.000000] x5 : ffffffc010573f28 x4 : ffffffc01002b710 x3 : 0000000040000000 [ 0.000000] x2 : ffffff801f5fffff x1 : 0000000000000001 x0 : 007800005f400705 [ 0.000000] Call trace: [ 0.000000] apply_to_pmd_range+0xec/0x190 [ 0.000000] __apply_to_page_range+0x94/0x170 [ 0.000000] apply_to_page_range+0x10/0x20 [ 0.000000] __change_memory_common+0x50/0xdc [ 0.000000] set_memory_valid+0x30/0x40 [ 0.000000] kfence_init_pool+0x9c/0x16c [ 0.000000] kfence_init+0x20/0x98 [ 0.000000] start_kernel+0x284/0x3f8 Fixes: 840b23986344 ("arm64, kfence: enable KFENCE for ARM64") Cc: # 5.12.x Signed-off-by: Jisheng Zhang Acked-by: Mark Rutland Acked-by: Marco Elver Tested-by: Marco Elver Link: https://lore.kernel.org/r/20210525104551.2ec37f77@xhacker.debian Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6dd9369e3ea0..89b66ef43a0f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -515,7 +515,8 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (rodata_full || crash_mem_map || debug_pagealloc_enabled()) + if (rodata_full || crash_mem_map || debug_pagealloc_enabled() || + IS_ENABLED(CONFIG_KFENCE)) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* From ff4cff962a7eedc73e54b5096693da7f86c61346 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 16 May 2021 17:01:08 -0700 Subject: [PATCH 580/730] MIPS: alchemy: xxs1500: add gpio-au1000.h header file board-xxs1500.c references 2 functions without declaring them, so add the header file to placate the build. ../arch/mips/alchemy/board-xxs1500.c: In function 'board_setup': ../arch/mips/alchemy/board-xxs1500.c:56:2: error: implicit declaration of function 'alchemy_gpio1_input_enable' [-Werror=implicit-function-declaration] 56 | alchemy_gpio1_input_enable(); ../arch/mips/alchemy/board-xxs1500.c:57:2: error: implicit declaration of function 'alchemy_gpio2_enable'; did you mean 'alchemy_uart_enable'? [-Werror=implicit-function-declaration] 57 | alchemy_gpio2_enable(); Fixes: 8e026910fcd4 ("MIPS: Alchemy: merge GPR/MTX-1/XXS1500 board code into single files") Signed-off-by: Randy Dunlap Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Cc: Manuel Lauss Cc: Ralf Baechle Acked-by: Manuel Lauss Signed-off-by: Thomas Bogendoerfer --- arch/mips/alchemy/board-xxs1500.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/alchemy/board-xxs1500.c b/arch/mips/alchemy/board-xxs1500.c index b184baa4e56a..f175bce2987f 100644 --- a/arch/mips/alchemy/board-xxs1500.c +++ b/arch/mips/alchemy/board-xxs1500.c @@ -18,6 +18,7 @@ #include #include #include +#include #include const char *get_system_type(void) From 6855adc2c5d9dff08be9e6e01deb319738b28780 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 20 May 2021 22:13:43 -0700 Subject: [PATCH 581/730] MIPS: launch.h: add include guard to prevent build errors arch/mips/include/asm/mips-boards/launch.h needs an include guard to prevent it from being #included more than once. Prevents these build errors: In file included from ../arch/mips/mti-malta/malta-amon.c:16: ../arch/mips/include/asm/mips-boards/launch.h:8:8: error: redefinition of 'struct cpulaunch' 8 | struct cpulaunch { | ^~~~~~~~~ In file included from ../arch/mips/include/asm/mips-cps.h:13, from ../arch/mips/include/asm/smp-ops.h:16, from ../arch/mips/include/asm/smp.h:21, from ../include/linux/smp.h:114, from ../arch/mips/mti-malta/malta-amon.c:12: ../arch/mips/include/asm/mips-boards/launch.h:8:8: note: originally defined here 8 | struct cpulaunch { | ^~~~~~~~~ make[3]: [../scripts/Makefile.build:273: arch/mips/mti-malta/malta-amon.o] Error 1 (ignored) Fixes: 6decd1aad15f ("MIPS: add support for buggy MT7621S core detection") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Cc: Ilya Lipnitskiy Reviewed-by: Ilya Lipnitskiy Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/mips-boards/launch.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/mips/include/asm/mips-boards/launch.h b/arch/mips/include/asm/mips-boards/launch.h index f93aa5ee2e2e..3481ed4c117b 100644 --- a/arch/mips/include/asm/mips-boards/launch.h +++ b/arch/mips/include/asm/mips-boards/launch.h @@ -3,6 +3,9 @@ * */ +#ifndef _ASM_MIPS_BOARDS_LAUNCH_H +#define _ASM_MIPS_BOARDS_LAUNCH_H + #ifndef _ASSEMBLER_ struct cpulaunch { @@ -34,3 +37,5 @@ struct cpulaunch { /* Polling period in count cycles for secondary CPU's */ #define LAUNCHPERIOD 10000 + +#endif /* _ASM_MIPS_BOARDS_LAUNCH_H */ From fef532ea0cd871afab7d9a7b6e9da99ac2c24371 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 16 May 2021 17:54:17 -0700 Subject: [PATCH 582/730] MIPS: ralink: export rt_sysc_membase for rt2880_wdt.c rt2880_wdt.c uses (well, attempts to use) rt_sysc_membase. However, when this watchdog driver is built as a loadable module, there is a build error since the rt_sysc_membase symbol is not exported. Export it to quell the build error. ERROR: modpost: "rt_sysc_membase" [drivers/watchdog/rt2880_wdt.ko] undefined! Fixes: 473cf939ff34 ("watchdog: add ralink watchdog driver") Signed-off-by: Randy Dunlap Cc: Guenter Roeck Cc: Wim Van Sebroeck Cc: John Crispin Cc: linux-mips@vger.kernel.org Cc: linux-watchdog@vger.kernel.org Acked-by: Guenter Roeck Signed-off-by: Thomas Bogendoerfer --- arch/mips/ralink/of.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 0c5de07da097..0135376c5de5 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -25,6 +26,7 @@ __iomem void *rt_sysc_membase; __iomem void *rt_memc_membase; +EXPORT_SYMBOL_GPL(rt_sysc_membase); __iomem void *plat_of_remap_node(const char *node) { From 78cf0eb926cb1abeff2106bae67752e032fe5f3e Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 15 May 2021 19:02:01 +0800 Subject: [PATCH 583/730] MIPS: Fix kernel hang under FUNCTION_GRAPH_TRACER and PREEMPT_TRACER When update the latest mainline kernel with the following three configs, the kernel hangs during startup: (1) CONFIG_FUNCTION_GRAPH_TRACER=y (2) CONFIG_PREEMPT_TRACER=y (3) CONFIG_FTRACE_STARTUP_TEST=y When update the latest mainline kernel with the above two configs (1) and (2), the kernel starts normally, but it still hangs when execute the following command: echo "function_graph" > /sys/kernel/debug/tracing/current_tracer Without CONFIG_PREEMPT_TRACER=y, the above two kinds of kernel hangs disappeared, so it seems that CONFIG_PREEMPT_TRACER has some influences with function_graph tracer at the first glance. I use ejtag to find out the epc address is related with preempt_enable() in the file arch/mips/lib/mips-atomic.c, because function tracing can trace the preempt_{enable,disable} calls that are traced, replace them with preempt_{enable,disable}_notrace to prevent function tracing from going into an infinite loop, and then it can fix the kernel hang issue. By the way, it seems that this commit is a complement and improvement of commit f93a1a00f2bd ("MIPS: Fix crash that occurs when function tracing is enabled"). Signed-off-by: Tiezhu Yang Cc: Steven Rostedt Signed-off-by: Thomas Bogendoerfer --- arch/mips/lib/mips-atomic.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c index de03838b343b..a9b72eacfc0b 100644 --- a/arch/mips/lib/mips-atomic.c +++ b/arch/mips/lib/mips-atomic.c @@ -37,7 +37,7 @@ */ notrace void arch_local_irq_disable(void) { - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -53,7 +53,7 @@ notrace void arch_local_irq_disable(void) : /* no inputs */ : "memory"); - preempt_enable(); + preempt_enable_notrace(); } EXPORT_SYMBOL(arch_local_irq_disable); @@ -61,7 +61,7 @@ notrace unsigned long arch_local_irq_save(void) { unsigned long flags; - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -78,7 +78,7 @@ notrace unsigned long arch_local_irq_save(void) : /* no inputs */ : "memory"); - preempt_enable(); + preempt_enable_notrace(); return flags; } @@ -88,7 +88,7 @@ notrace void arch_local_irq_restore(unsigned long flags) { unsigned long __tmp1; - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -106,7 +106,7 @@ notrace void arch_local_irq_restore(unsigned long flags) : "0" (flags) : "memory"); - preempt_enable(); + preempt_enable_notrace(); } EXPORT_SYMBOL(arch_local_irq_restore); From a8deba8547e39f26440101164a3bbc2899c5b305 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Tue, 25 May 2021 09:41:39 +0800 Subject: [PATCH 584/730] bpftool: Add sock_release help info for cgroup attach/prog load command The help information was not added at the time when the function got added. Fix this and add the missing information to its cli, documentation and bash completion. Fixes: db94cc0b4805 ("bpftool: Add support for BPF_CGROUP_INET_SOCK_RELEASE") Signed-off-by: Liu Jian Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20210525014139.323859-1-liujian56@huawei.com --- tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 4 +++- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 2 +- tools/bpf/bpftool/bash-completion/bpftool | 6 +++--- tools/bpf/bpftool/cgroup.c | 3 ++- tools/bpf/bpftool/prog.c | 2 +- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 790944c35602..baee8591ac76 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -30,7 +30,8 @@ CGROUP COMMANDS | *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | | **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | | **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | -| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** } +| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | +| **sock_release** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION @@ -106,6 +107,7 @@ DESCRIPTION **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8); **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8); **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8). + **sock_release** closing an userspace inet socket (since 5.9). **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* Detach *PROG* from the cgroup *CGROUP* and attach type diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 358c7309d419..fe1b38e7e887 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -44,7 +44,7 @@ PROG COMMANDS | **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** | | **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | | **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** | -| **cgroup/getsockopt** | **cgroup/setsockopt** | +| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** | | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** | } | *ATTACH_TYPE* := { diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index d67518bcbd44..cc33c5824a2f 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -478,7 +478,7 @@ _bpftool() cgroup/recvmsg4 cgroup/recvmsg6 \ cgroup/post_bind4 cgroup/post_bind6 \ cgroup/sysctl cgroup/getsockopt \ - cgroup/setsockopt struct_ops \ + cgroup/setsockopt cgroup/sock_release struct_ops \ fentry fexit freplace sk_lookup" -- \ "$cur" ) ) return 0 @@ -1021,7 +1021,7 @@ _bpftool() device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \ getpeername4 getpeername6 getsockname4 getsockname6 \ sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \ - setsockopt' + setsockopt sock_release' local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag name' case $prev in @@ -1032,7 +1032,7 @@ _bpftool() ingress|egress|sock_create|sock_ops|device|bind4|bind6|\ post_bind4|post_bind6|connect4|connect6|getpeername4|\ getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\ - recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt) + recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release) COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ "$cur" ) ) return 0 diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index d901cc1b904a..6e53b1d393f4 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -28,7 +28,8 @@ " connect6 | getpeername4 | getpeername6 |\n" \ " getsockname4 | getsockname6 | sendmsg4 |\n" \ " sendmsg6 | recvmsg4 | recvmsg6 |\n" \ - " sysctl | getsockopt | setsockopt }" + " sysctl | getsockopt | setsockopt |\n" \ + " sock_release }" static unsigned int query_flags; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 3f067d2d7584..da4846c9856a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2138,7 +2138,7 @@ static int do_help(int argc, char **argv) " cgroup/getpeername4 | cgroup/getpeername6 |\n" " cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n" " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" - " cgroup/getsockopt | cgroup/setsockopt |\n" + " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" " flow_dissector }\n" From 29c8f40b54a45dd23971e2bc395697731bcffbe1 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 24 May 2021 23:37:26 +0300 Subject: [PATCH 585/730] ALSA: hda/realtek: Chain in pop reduction fixup for ThinkStation P340 Lenovo ThinkStation P340 uses ALC623 codec (SSID 17aa:1048) and it produces bug plock/pop noise over line out (green jack on the back) which can be fixed by applying ALC269_FIXUP_NO_SHUTUP tot he machine. Convert the existing entry for the same SSID to chain to apply this fixup as well. Suggested-by: Takashi Iwai Signed-off-by: Peter Ujfalusi Cc: Link: https://lore.kernel.org/r/20210524203726.2278-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6571c3713732..90bf0d3a830a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6567,6 +6567,7 @@ enum { ALC295_FIXUP_HP_OMEN, ALC285_FIXUP_HP_SPECTRE_X360, ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, + ALC623_FIXUP_LENOVO_THINKSTATION_P340, }; static const struct hda_fixup alc269_fixups[] = { @@ -8139,6 +8140,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC285_FIXUP_THINKPAD_HEADSET_JACK, }, + [ALC623_FIXUP_LENOVO_THINKSTATION_P340] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_no_shutup, + .chained = true, + .chain_id = ALC283_FIXUP_HEADSET_MIC, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8457,7 +8464,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0xc019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xc022, "Clevo NH77[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), - SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE), @@ -8724,6 +8731,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, + {.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"}, {} }; #define ALC225_STANDARD_PINS \ From 6fd5fb63820a9a1146aba0bba2fdbc1db4b903e7 Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Tue, 25 May 2021 10:29:55 +0000 Subject: [PATCH 586/730] selftests/bpf: Add test for l3 use of bpf_redirect_peer Add a test case for using bpf_skb_change_head() in combination with bpf_redirect_peer() to redirect a packet from a L3 device to veth and back. The test uses a BPF program that adds L2 headers to the packet coming from a L3 device and then calls bpf_redirect_peer() to redirect the packet to a veth device. The test fails as skb->mac_len is not set properly and thus the ethernet headers are not properly skb_pull'd in cls_bpf_classify(), causing tcp_v4_rcv() to point the TCP header into middle of the IP header. Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210525102955.2811090-1-joamaki@gmail.com --- .../selftests/bpf/prog_tests/tc_redirect.c | 582 ++++++++++++------ .../selftests/bpf/progs/test_tc_peer.c | 31 + 2 files changed, 420 insertions(+), 193 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 95ef9fcd31d8..5703c918812b 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -11,14 +11,17 @@ */ #define _GNU_SOURCE -#include + +#include #include #include +#include +#include #include #include #include #include -#include +#include #include "test_progs.h" #include "network_helpers.h" @@ -32,18 +35,25 @@ #define IP4_SRC "172.16.1.100" #define IP4_DST "172.16.2.100" +#define IP4_TUN_SRC "172.17.1.100" +#define IP4_TUN_FWD "172.17.1.200" #define IP4_PORT 9004 -#define IP6_SRC "::1:dead:beef:cafe" -#define IP6_DST "::2:dead:beef:cafe" +#define IP6_SRC "0::1:dead:beef:cafe" +#define IP6_DST "0::2:dead:beef:cafe" +#define IP6_TUN_SRC "1::1:dead:beef:cafe" +#define IP6_TUN_FWD "1::2:dead:beef:cafe" #define IP6_PORT 9006 #define IP4_SLL "169.254.0.1" #define IP4_DLL "169.254.0.2" #define IP4_NET "169.254.0.0" +#define MAC_DST_FWD "00:11:22:33:44:55" +#define MAC_DST "00:22:33:44:55:66" + #define IFADDR_STR_LEN 18 -#define PING_ARGS "-c 3 -w 10 -q" +#define PING_ARGS "-i 0.2 -c 3 -w 10 -q" #define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src" #define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst" @@ -51,120 +61,104 @@ #define TIMEOUT_MILLIS 10000 -#define MAX_PROC_MODS 128 -#define MAX_PROC_VALUE_LEN 16 - #define log_err(MSG, ...) \ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__) -struct proc_mod { - char path[PATH_MAX]; - char oldval[MAX_PROC_VALUE_LEN]; - int oldlen; -}; - static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL}; -static int root_netns_fd = -1; -static int num_proc_mods; -static struct proc_mod proc_mods[MAX_PROC_MODS]; -/** - * modify_proc() - Modify entry in /proc - * - * Modifies an entry in /proc and saves the original value for later - * restoration with restore_proc(). - */ -static int modify_proc(const char *path, const char *newval) +static int write_file(const char *path, const char *newval) { - struct proc_mod *mod; FILE *f; - if (num_proc_mods + 1 > MAX_PROC_MODS) - return -1; - f = fopen(path, "r+"); if (!f) return -1; - - mod = &proc_mods[num_proc_mods]; - num_proc_mods++; - - strncpy(mod->path, path, PATH_MAX); - - if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) { - log_err("reading from %s failed", path); - goto fail; - } - rewind(f); if (fwrite(newval, strlen(newval), 1, f) != 1) { log_err("writing to %s failed", path); - goto fail; + fclose(f); + return -1; } - fclose(f); return 0; - -fail: - fclose(f); - num_proc_mods--; - return -1; } -/** - * restore_proc() - Restore all /proc modifications - */ -static void restore_proc(void) +struct nstoken { + int orig_netns_fd; +}; + +static int setns_by_fd(int nsfd) { - int i; - - for (i = 0; i < num_proc_mods; i++) { - struct proc_mod *mod = &proc_mods[i]; - FILE *f; - - f = fopen(mod->path, "w"); - if (!f) { - log_err("fopen of %s failed", mod->path); - continue; - } - - if (fwrite(mod->oldval, mod->oldlen, 1, f) != 1) - log_err("fwrite to %s failed", mod->path); - - fclose(f); - } - num_proc_mods = 0; -} - -/** - * setns_by_name() - Set networks namespace by name - */ -static int setns_by_name(const char *name) -{ - int nsfd; - char nspath[PATH_MAX]; int err; - snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); - nsfd = open(nspath, O_RDONLY | O_CLOEXEC); - if (nsfd < 0) - return nsfd; - err = setns(nsfd, CLONE_NEWNET); close(nsfd); - return err; + if (!ASSERT_OK(err, "setns")) + return err; + + /* Switch /sys to the new namespace so that e.g. /sys/class/net + * reflects the devices in the new namespace. + */ + err = unshare(CLONE_NEWNS); + if (!ASSERT_OK(err, "unshare")) + return err; + + err = umount2("/sys", MNT_DETACH); + if (!ASSERT_OK(err, "umount2 /sys")) + return err; + + err = mount("sysfs", "/sys", "sysfs", 0, NULL); + if (!ASSERT_OK(err, "mount /sys")) + return err; + + err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL); + if (!ASSERT_OK(err, "mount /sys/fs/bpf")) + return err; + + return 0; } /** - * setns_root() - Set network namespace to original (root) namespace + * open_netns() - Switch to specified network namespace by name. * - * Not expected to ever fail, so error not returned, but failure logged - * and test marked as failed. + * Returns token with which to restore the original namespace + * using close_netns(). */ -static void setns_root(void) +static struct nstoken *open_netns(const char *name) { - ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "setns root"); + int nsfd; + char nspath[PATH_MAX]; + int err; + struct nstoken *token; + + token = malloc(sizeof(struct nstoken)); + if (!ASSERT_OK_PTR(token, "malloc token")) + return NULL; + + token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY); + if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net")) + goto fail; + + snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); + nsfd = open(nspath, O_RDONLY | O_CLOEXEC); + if (!ASSERT_GE(nsfd, 0, "open netns fd")) + goto fail; + + err = setns_by_fd(nsfd); + if (!ASSERT_OK(err, "setns_by_fd")) + goto fail; + + return token; +fail: + free(token); + return NULL; +} + +static void close_netns(struct nstoken *token) +{ + ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd"); + free(token); } static int netns_setup_namespaces(const char *verb) @@ -237,15 +231,17 @@ static int get_ifindex(const char *name) static int netns_setup_links_and_routes(struct netns_setup_result *result) { + struct nstoken *nstoken = NULL; char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; - char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {}; SYS("ip link add veth_src type veth peer name veth_src_fwd"); SYS("ip link add veth_dst type veth peer name veth_dst_fwd"); + + SYS("ip link set veth_dst_fwd address " MAC_DST_FWD); + SYS("ip link set veth_dst address " MAC_DST); + if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) goto fail; - if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr)) - goto fail; result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd"); if (result->ifindex_veth_src_fwd < 0) @@ -260,7 +256,8 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) SYS("ip link set veth_dst netns " NS_DST); /** setup in 'src' namespace */ - if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src")) + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns src")) goto fail; SYS("ip addr add " IP4_SRC "/32 dev veth_src"); @@ -276,8 +273,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s", veth_src_fwd_addr); + close_netns(nstoken); + /** setup in 'fwd' namespace */ - if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) goto fail; /* The fwd netns automatically gets a v6 LL address / routes, but also @@ -294,8 +294,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); + close_netns(nstoken); + /** setup in 'dst' namespace */ - if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst")) + nstoken = open_netns(NS_DST); + if (!ASSERT_OK_PTR(nstoken, "setns dst")) goto fail; SYS("ip addr add " IP4_DST "/32 dev veth_dst"); @@ -306,23 +309,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) SYS("ip route add " IP4_NET "/16 dev veth_dst scope global"); SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global"); - SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s", - veth_dst_fwd_addr); - SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s", - veth_dst_fwd_addr); + SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); + + close_netns(nstoken); - setns_root(); return 0; fail: - setns_root(); + if (nstoken) + close_netns(nstoken); return -1; } static int netns_load_bpf(void) { - if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) - return -1; - SYS("tc qdisc add dev veth_src_fwd clsact"); SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned " SRC_PROG_PIN_FILE); @@ -335,42 +335,29 @@ static int netns_load_bpf(void) SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " CHK_PROG_PIN_FILE); - setns_root(); - return -1; -fail: - setns_root(); - return -1; -} - -static int netns_unload_bpf(void) -{ - if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) - goto fail; - SYS("tc qdisc delete dev veth_src_fwd clsact"); - SYS("tc qdisc delete dev veth_dst_fwd clsact"); - - setns_root(); return 0; fail: - setns_root(); return -1; } - static void test_tcp(int family, const char *addr, __u16 port) { int listen_fd = -1, accept_fd = -1, client_fd = -1; char buf[] = "testing testing"; int n; + struct nstoken *nstoken; - if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst")) + nstoken = open_netns(NS_DST); + if (!ASSERT_OK_PTR(nstoken, "setns dst")) return; listen_fd = start_server(family, SOCK_STREAM, addr, port, 0); if (!ASSERT_GE(listen_fd, 0, "listen")) goto done; - if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src")) + close_netns(nstoken); + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns src")) goto done; client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS); @@ -392,7 +379,8 @@ static void test_tcp(int family, const char *addr, __u16 port) ASSERT_EQ(n, sizeof(buf), "recv from server"); done: - setns_root(); + if (nstoken) + close_netns(nstoken); if (listen_fd >= 0) close(listen_fd); if (accept_fd >= 0) @@ -405,7 +393,7 @@ static int test_ping(int family, const char *addr) { const char *ping = family == AF_INET6 ? "ping6" : "ping"; - SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s", ping, addr); + SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr); return 0; fail: return -1; @@ -419,19 +407,37 @@ static void test_connectivity(void) test_ping(AF_INET6, IP6_DST); } +static int set_forwarding(bool enable) +{ + int err; + + err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0"); + if (!ASSERT_OK(err, "set ipv4.ip_forward=0")) + return err; + + err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0"); + if (!ASSERT_OK(err, "set ipv6.forwarding=0")) + return err; + + return 0; +} + static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) { - struct test_tc_neigh_fib *skel; + struct nstoken *nstoken = NULL; + struct test_tc_neigh_fib *skel = NULL; int err; + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + return; + skel = test_tc_neigh_fib__open(); if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open")) - return; + goto done; - if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) { - test_tc_neigh_fib__destroy(skel); - return; - } + if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) + goto done; err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) @@ -449,46 +455,37 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) goto done; /* bpf_fib_lookup() checks if forwarding is enabled */ - if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) + if (!ASSERT_OK(set_forwarding(true), "enable forwarding")) goto done; - err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1"); - if (!ASSERT_OK(err, "set ipv4.ip_forward")) - goto done; - - err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1"); - if (!ASSERT_OK(err, "set ipv6.forwarding")) - goto done; - setns_root(); - test_connectivity(); + done: - bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE); - bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE); - test_tc_neigh_fib__destroy(skel); - netns_unload_bpf(); - setns_root(); - restore_proc(); + if (skel) + test_tc_neigh_fib__destroy(skel); + close_netns(nstoken); } static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) { - struct test_tc_neigh *skel; + struct nstoken *nstoken = NULL; + struct test_tc_neigh *skel = NULL; int err; + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + return; + skel = test_tc_neigh__open(); if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) - return; + goto done; skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; err = test_tc_neigh__load(skel); - if (!ASSERT_OK(err, "test_tc_neigh__load")) { - test_tc_neigh__destroy(skel); - return; - } + if (!ASSERT_OK(err, "test_tc_neigh__load")) + goto done; err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) @@ -505,34 +502,37 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) if (netns_load_bpf()) goto done; + if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) + goto done; + test_connectivity(); done: - bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE); - bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE); - test_tc_neigh__destroy(skel); - netns_unload_bpf(); - setns_root(); + if (skel) + test_tc_neigh__destroy(skel); + close_netns(nstoken); } static void test_tc_redirect_peer(struct netns_setup_result *setup_result) { + struct nstoken *nstoken; struct test_tc_peer *skel; int err; + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + return; + skel = test_tc_peer__open(); if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) - return; + goto done; skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; err = test_tc_peer__load(skel); - if (!ASSERT_OK(err, "test_tc_peer__load")) { - test_tc_peer__destroy(skel); - return; - } + if (!ASSERT_OK(err, "test_tc_peer__load")) + goto done; err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) @@ -549,41 +549,237 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result) if (netns_load_bpf()) goto done; + if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) + goto done; + test_connectivity(); done: - bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE); - bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE); - test_tc_peer__destroy(skel); - netns_unload_bpf(); - setns_root(); + if (skel) + test_tc_peer__destroy(skel); + close_netns(nstoken); +} + +static int tun_open(char *name) +{ + struct ifreq ifr; + int fd, err; + + fd = open("/dev/net/tun", O_RDWR); + if (!ASSERT_GE(fd, 0, "open /dev/net/tun")) + return -1; + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_TUN | IFF_NO_PI; + if (*name) + strncpy(ifr.ifr_name, name, IFNAMSIZ); + + err = ioctl(fd, TUNSETIFF, &ifr); + if (!ASSERT_OK(err, "ioctl TUNSETIFF")) + goto fail; + + SYS("ip link set dev %s up", name); + + return fd; +fail: + close(fd); + return -1; +} + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +enum { + SRC_TO_TARGET = 0, + TARGET_TO_SRC = 1, +}; + +static int tun_relay_loop(int src_fd, int target_fd) +{ + fd_set rfds, wfds; + + FD_ZERO(&rfds); + FD_ZERO(&wfds); + + for (;;) { + char buf[1500]; + int direction, nread, nwrite; + + FD_SET(src_fd, &rfds); + FD_SET(target_fd, &rfds); + + if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) { + log_err("select failed"); + return 1; + } + + direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC; + + nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf)); + if (nread < 0) { + log_err("read failed"); + return 1; + } + + nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread); + if (nwrite != nread) { + log_err("write failed"); + return 1; + } + } +} + +static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) +{ + struct test_tc_peer *skel = NULL; + struct nstoken *nstoken = NULL; + int err; + int tunnel_pid = -1; + int src_fd, target_fd; + int ifindex; + + /* Start a L3 TUN/TAP tunnel between the src and dst namespaces. + * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those + * expose the L2 headers encapsulating the IP packet to BPF and hence + * don't have skb in suitable state for this test. Alternative to TUN/TAP + * would be e.g. Wireguard which would appear as a pure L3 device to BPF, + * but that requires much more complicated setup. + */ + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) + return; + + src_fd = tun_open("tun_src"); + if (!ASSERT_GE(src_fd, 0, "tun_open tun_src")) + goto fail; + + close_netns(nstoken); + + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) + goto fail; + + target_fd = tun_open("tun_fwd"); + if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd")) + goto fail; + + tunnel_pid = fork(); + if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop")) + goto fail; + + if (tunnel_pid == 0) + exit(tun_relay_loop(src_fd, target_fd)); + + skel = test_tc_peer__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) + goto fail; + + ifindex = get_ifindex("tun_fwd"); + if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd")) + goto fail; + + skel->rodata->IFINDEX_SRC = ifindex; + skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + + err = test_tc_peer__load(skel); + if (!ASSERT_OK(err, "test_tc_peer__load")) + goto fail; + + err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto fail; + + err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto fail; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto fail; + + /* Load "tc_src_l3" to the tun_fwd interface to redirect packets + * towards dst, and "tc_dst" to redirect packets + * and "tc_chk" on veth_dst_fwd to drop non-redirected packets. + */ + SYS("tc qdisc add dev tun_fwd clsact"); + SYS("tc filter add dev tun_fwd ingress bpf da object-pinned " + SRC_PROG_PIN_FILE); + + SYS("tc qdisc add dev veth_dst_fwd clsact"); + SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " + DST_PROG_PIN_FILE); + SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " + CHK_PROG_PIN_FILE); + + /* Setup route and neigh tables */ + SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); + SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24"); + + SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); + SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); + + SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); + SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD + " dev tun_src scope global"); + SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); + SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); + SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD + " dev tun_src scope global"); + SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); + + SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + + if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) + goto fail; + + test_connectivity(); + +fail: + if (tunnel_pid > 0) { + kill(tunnel_pid, SIGTERM); + waitpid(tunnel_pid, NULL, 0); + } + if (src_fd >= 0) + close(src_fd); + if (target_fd >= 0) + close(target_fd); + if (skel) + test_tc_peer__destroy(skel); + if (nstoken) + close_netns(nstoken); +} + +#define RUN_TEST(name) \ + ({ \ + struct netns_setup_result setup_result; \ + if (test__start_subtest(#name)) \ + if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \ + if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \ + "setup links and routes")) \ + test_ ## name(&setup_result); \ + netns_setup_namespaces("delete"); \ + } \ + }) + +static void *test_tc_redirect_run_tests(void *arg) +{ + RUN_TEST(tc_redirect_peer); + RUN_TEST(tc_redirect_peer_l3); + RUN_TEST(tc_redirect_neigh); + RUN_TEST(tc_redirect_neigh_fib); + return NULL; } void test_tc_redirect(void) { - struct netns_setup_result setup_result; + pthread_t test_thread; + int err; - root_netns_fd = open("/proc/self/ns/net", O_RDONLY); - if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net")) - return; - - if (netns_setup_namespaces("add")) - goto done; - - if (netns_setup_links_and_routes(&setup_result)) - goto done; - - if (test__start_subtest("tc_redirect_peer")) - test_tc_redirect_peer(&setup_result); - - if (test__start_subtest("tc_redirect_neigh")) - test_tc_redirect_neigh(&setup_result); - - if (test__start_subtest("tc_redirect_neigh_fib")) - test_tc_redirect_neigh_fib(&setup_result); - -done: - close(root_netns_fd); - netns_setup_namespaces("delete"); + /* Run the tests in their own thread to isolate the namespace changes + * so they do not affect the environment of other tests. + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) + */ + err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL); + if (ASSERT_OK(err, "pthread_create")) + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); } diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c index ef264bced0e6..fe818cd5f010 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_peer.c +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -5,12 +5,17 @@ #include #include #include +#include +#include #include volatile const __u32 IFINDEX_SRC; volatile const __u32 IFINDEX_DST; +static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}; +static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}; + SEC("classifier/chk_egress") int tc_chk(struct __sk_buff *skb) { @@ -29,4 +34,30 @@ int tc_src(struct __sk_buff *skb) return bpf_redirect_peer(IFINDEX_DST, 0); } +SEC("classifier/dst_ingress_l3") +int tc_dst_l3(struct __sk_buff *skb) +{ + return bpf_redirect(IFINDEX_SRC, 0); +} + +SEC("classifier/src_ingress_l3") +int tc_src_l3(struct __sk_buff *skb) +{ + __u16 proto = skb->protocol; + + if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0) + return TC_ACT_SHOT; + + if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0) + return TC_ACT_SHOT; + + if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0) + return TC_ACT_SHOT; + + if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0) + return TC_ACT_SHOT; + + return bpf_redirect_peer(IFINDEX_DST, 0); +} + char __license[] SEC("license") = "GPL"; From b2db6c35ba986ebe1ddd6b65f21a810346299d7f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 25 May 2021 15:40:22 +0100 Subject: [PATCH 587/730] afs: Fix fall-through warnings for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple warnings by explicitly adding multiple fallthrough pseudo-keywords in places where the code is intended to fall through to the next case. Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: linux-afs@lists.infradead.org cc: linux-hardening@vger.kernel.org Link: https://lore.kernel.org/r/51150b54e0b0431a2c401cd54f2c4e7f50e94601.1605896059.git.gustavoars@kernel.org/ # v1 Link: https://lore.kernel.org/r/20210420211615.GA51432@embeddedor/ # v2 Signed-off-by: Linus Torvalds --- fs/afs/cmservice.c | 5 +++++ fs/afs/fsclient.c | 4 ++++ fs/afs/vlclient.c | 1 + 3 files changed, 10 insertions(+) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index a4e9e6e07e93..d3c6bb22c5f4 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -322,6 +322,8 @@ static int afs_deliver_cb_callback(struct afs_call *call) return ret; call->unmarshall++; + fallthrough; + case 5: break; } @@ -418,6 +420,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) r->node[loop] = ntohl(b[loop + 5]); call->unmarshall++; + fallthrough; case 2: break; @@ -530,6 +533,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) r->node[loop] = ntohl(b[loop + 5]); call->unmarshall++; + fallthrough; case 2: break; @@ -663,6 +667,7 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call) afs_extract_to_tmp(call); call->unmarshall++; + fallthrough; case 3: break; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 2f695a260442..dd3f45d906d2 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -388,6 +388,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) req->file_size = vp->scb.status.size; call->unmarshall++; + fallthrough; case 5: break; @@ -1408,6 +1409,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) _debug("motd '%s'", p); call->unmarshall++; + fallthrough; case 8: break; @@ -1845,6 +1847,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) xdr_decode_AFSVolSync(&bp, &op->volsync); call->unmarshall++; + fallthrough; case 6: break; @@ -1979,6 +1982,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call) xdr_decode_AFSVolSync(&bp, &op->volsync); call->unmarshall++; + fallthrough; case 4: break; diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index dc9327332f06..00fca3c66ba6 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -593,6 +593,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) if (ret < 0) return ret; call->unmarshall = 6; + fallthrough; case 6: break; From e89d6cc51034998607502cd3899173bfa7189571 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 May 2021 09:29:44 +0100 Subject: [PATCH 588/730] arm64: assembler: replace `kaddr` with `addr` The `__dcache_op_workaround_clean_cache` and `dcache_by_line_op` macros are only expected to be usedc on kernel memory, without a user fault fixup, and so we named their address variables `kaddr` to make this clear. Subseuqent patches will modify these to also work on user memory with an (optional) user fault fixup, where `kaddr` won't make as much sense. To aid the legibility of patches, this patch (only) replaces `kaddr` with `addr` as a preparatory step. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Signed-off-by: Fuad Tabba Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Fuad Tabba Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-2-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 8418c1bd8f04..6a0fbc599196 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -377,47 +377,47 @@ alternative_cb_end /* * Macro to perform a data cache maintenance for the interval - * [kaddr, kaddr + size) + * [addr, addr + size) * * op: operation passed to dc instruction * domain: domain used in dsb instruciton - * kaddr: starting virtual address of the region + * addr: starting virtual address of the region * size: size of the region - * Corrupts: kaddr, size, tmp1, tmp2 + * Corrupts: addr, size, tmp1, tmp2 */ - .macro __dcache_op_workaround_clean_cache, op, kaddr + .macro __dcache_op_workaround_clean_cache, op, addr alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE - dc \op, \kaddr + dc \op, \addr alternative_else - dc civac, \kaddr + dc civac, \addr alternative_endif .endm - .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 + .macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2 dcache_line_size \tmp1, \tmp2 - add \size, \kaddr, \size + add \size, \addr, \size sub \tmp2, \tmp1, #1 - bic \kaddr, \kaddr, \tmp2 + bic \addr, \addr, \tmp2 9998: .ifc \op, cvau - __dcache_op_workaround_clean_cache \op, \kaddr + __dcache_op_workaround_clean_cache \op, \addr .else .ifc \op, cvac - __dcache_op_workaround_clean_cache \op, \kaddr + __dcache_op_workaround_clean_cache \op, \addr .else .ifc \op, cvap - sys 3, c7, c12, 1, \kaddr // dc cvap + sys 3, c7, c12, 1, \addr // dc cvap .else .ifc \op, cvadp - sys 3, c7, c13, 1, \kaddr // dc cvadp + sys 3, c7, c13, 1, \addr // dc cvadp .else - dc \op, \kaddr + dc \op, \addr .endif .endif .endif .endif - add \kaddr, \kaddr, \tmp1 - cmp \kaddr, \size + add \addr, \addr, \tmp1 + cmp \addr, \size b.lo 9998b dsb \domain .endm From d11b187760f52480dd83bda0429ee3c94e542b1d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 May 2021 09:29:45 +0100 Subject: [PATCH 589/730] arm64: assembler: add conditional cache fixups It would be helpful if we could use both `dcache_by_line_op` and `invalidate_icache_by_line` for user memory without accidentally fixing up unexpected faults when performing maintenance on kernel addresses. Let's make this possible by having both macros take an optional fixup label, and only generating an extable entry if a label is provided. At the same time, let's clean up the labels used to be globally unique using \@ as we do for other macros. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Signed-off-by: Fuad Tabba Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Fuad Tabba Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-3-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 39 +++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 6a0fbc599196..0a276b46ef50 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -130,15 +130,27 @@ alternative_endif .endm /* - * Emit an entry into the exception table + * Create an exception table entry for `insn`, which will branch to `fixup` + * when an unhandled fault is taken. */ - .macro _asm_extable, from, to + .macro _asm_extable, insn, fixup .pushsection __ex_table, "a" .align 3 - .long (\from - .), (\to - .) + .long (\insn - .), (\fixup - .) .popsection .endm +/* + * Create an exception table entry for `insn` if `fixup` is provided. Otherwise + * do nothing. + */ + .macro _cond_extable, insn, fixup + .ifnc \fixup, + _asm_extable \insn, \fixup + .endif + .endm + + #define USER(l, x...) \ 9999: x; \ _asm_extable 9999b, l @@ -383,6 +395,7 @@ alternative_cb_end * domain: domain used in dsb instruciton * addr: starting virtual address of the region * size: size of the region + * fixup: optional label to branch to on user fault * Corrupts: addr, size, tmp1, tmp2 */ .macro __dcache_op_workaround_clean_cache, op, addr @@ -393,12 +406,12 @@ alternative_else alternative_endif .endm - .macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2 + .macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2, fixup dcache_line_size \tmp1, \tmp2 add \size, \addr, \size sub \tmp2, \tmp1, #1 bic \addr, \addr, \tmp2 -9998: +.Ldcache_op\@: .ifc \op, cvau __dcache_op_workaround_clean_cache \op, \addr .else @@ -418,8 +431,10 @@ alternative_endif .endif add \addr, \addr, \tmp1 cmp \addr, \size - b.lo 9998b + b.lo .Ldcache_op\@ dsb \domain + + _cond_extable .Ldcache_op\@, \fixup .endm /* @@ -427,20 +442,22 @@ alternative_endif * [start, end) * * start, end: virtual addresses describing the region - * label: A label to branch to on user fault. + * fixup: optional label to branch to on user fault * Corrupts: tmp1, tmp2 */ - .macro invalidate_icache_by_line start, end, tmp1, tmp2, label + .macro invalidate_icache_by_line start, end, tmp1, tmp2, fixup icache_line_size \tmp1, \tmp2 sub \tmp2, \tmp1, #1 bic \tmp2, \start, \tmp2 -9997: -USER(\label, ic ivau, \tmp2) // invalidate I line PoU +.Licache_op\@: + ic ivau, \tmp2 // invalidate I line PoU add \tmp2, \tmp2, \tmp1 cmp \tmp2, \end - b.lo 9997b + b.lo .Licache_op\@ dsb ish isb + + _cond_extable .Licache_op\@, \fixup .endm /* From 46710cf1fcb6235388e8d80619cdf2c196ad554b Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:46 +0100 Subject: [PATCH 590/730] arm64: Apply errata to swsusp_arch_suspend_exit The Arm errata covered by ARM64_WORKAROUND_CLEAN_CACHE require that "dc cvau" instructions get promoted to "dc civac". Reported-by: Mark Rutland Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-4-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate-asm.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 8ccca660034e..0ed2f72a6b94 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -91,7 +91,8 @@ SYM_CODE_START(swsusp_arch_suspend_exit) raw_dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x10, x3 -2: dc cvau, x4 /* clean D line / unified line */ +2: /* clean D line / unified line */ +alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE add x4, x4, x2 cmp x4, x1 b.lo 2b From 116b7f559492b719ae4bd22ee773cb7fb046a736 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:47 +0100 Subject: [PATCH 591/730] arm64: Do not enable uaccess for flush_icache_range __flush_icache_range works on kernel addresses, and doesn't need uaccess. The existing code is a side-effect of its current implementation with __flush_cache_user_range fallthrough. Instead of fallthrough to share the code, use a common macro for the two where the caller specifies an optional fixup label if user access is needed. If provided, this label would be used to generate an extable entry. Simplify the code to use dcache_by_line_op, instead of replicating much of its functionality. No functional change intended. Possible performance impact due to the reduced number of instructions. Reported-by: Catalin Marinas Reported-by: Will Deacon Reported-by: Mark Rutland Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/ Link: https://lore.kernel.org/linux-arm-kernel/20210521121846.GB1040@C02TD0UTHF1T.local/ Signed-off-by: Fuad Tabba Acked-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-5-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/mm/cache.S | 57 ++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 2d881f34dd9d..7c54bcbf5a36 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -14,6 +14,34 @@ #include #include +/* + * __flush_cache_range(start,end) [fixup] + * + * Ensure that the I and D caches are coherent within specified region. + * This is typically used when code has been written to a memory region, + * and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * - fixup - optional label to branch to on user fault + */ +.macro __flush_cache_range, fixup +alternative_if ARM64_HAS_CACHE_IDC + dsb ishst + b .Ldc_skip_\@ +alternative_else_nop_endif + mov x2, x0 + sub x3, x1, x0 + dcache_by_line_op cvau, ish, x2, x3, x4, x5, \fixup +.Ldc_skip_\@: +alternative_if ARM64_HAS_CACHE_DIC + isb + b .Lic_skip_\@ +alternative_else_nop_endif + invalidate_icache_by_line x0, x1, x2, x3, \fixup +.Lic_skip_\@: +.endm + /* * flush_icache_range(start,end) * @@ -25,7 +53,9 @@ * - end - virtual end address of region */ SYM_FUNC_START(__flush_icache_range) - /* FALLTHROUGH */ + __flush_cache_range + ret +SYM_FUNC_END(__flush_icache_range) /* * __flush_cache_user_range(start,end) @@ -39,34 +69,15 @@ SYM_FUNC_START(__flush_icache_range) */ SYM_FUNC_START(__flush_cache_user_range) uaccess_ttbr0_enable x2, x3, x4 -alternative_if ARM64_HAS_CACHE_IDC - dsb ishst - b 7f -alternative_else_nop_endif - dcache_line_size x2, x3 - sub x3, x2, #1 - bic x4, x0, x3 -1: -user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE - add x4, x4, x2 - cmp x4, x1 - b.lo 1b - dsb ish -7: -alternative_if ARM64_HAS_CACHE_DIC - isb - b 8f -alternative_else_nop_endif - invalidate_icache_by_line x0, x1, x2, x3, 9f -8: mov x0, #0 + __flush_cache_range 2f + mov x0, xzr 1: uaccess_ttbr0_disable x1, x2 ret -9: +2: mov x0, #-EFAULT b 1b -SYM_FUNC_END(__flush_icache_range) SYM_FUNC_END(__flush_cache_user_range) /* From 7908072da535dca52b3a011ed6e1f73534546b59 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:48 +0100 Subject: [PATCH 592/730] arm64: Do not enable uaccess for invalidate_icache_range invalidate_icache_range() works on kernel addresses, and doesn't need uaccess. Remove the code that toggles uaccess_ttbr0_enable, as well as the code that emits an entry into the exception table (via the macro invalidate_icache_by_line). Changes return type of invalidate_icache_range() from int (which used to indicate a fault) to void, since it doesn't need uaccess and won't fault. Note that return value was never checked by any of the callers. No functional change intended. Possible performance impact due to the reduced number of instructions. Reported-by: Catalin Marinas Reported-by: Will Deacon Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/ Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Acked-by: Catalin Marinas Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-6-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/mm/cache.S | 11 +---------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 52e5c1623224..a586afa84172 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -57,7 +57,7 @@ * - size - region size */ extern void __flush_icache_range(unsigned long start, unsigned long end); -extern int invalidate_icache_range(unsigned long start, unsigned long end); +extern void invalidate_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); extern void __inval_dcache_area(void *addr, size_t len); extern void __clean_dcache_area_poc(void *addr, size_t len); diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 7c54bcbf5a36..14eac9d76d57 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -90,21 +90,12 @@ SYM_FUNC_END(__flush_cache_user_range) */ SYM_FUNC_START(invalidate_icache_range) alternative_if ARM64_HAS_CACHE_DIC - mov x0, xzr isb ret alternative_else_nop_endif - uaccess_ttbr0_enable x2, x3, x4 - - invalidate_icache_by_line x0, x1, x2, x3, 2f - mov x0, xzr -1: - uaccess_ttbr0_disable x1, x2 + invalidate_icache_by_line x0, x1, x2, x3 ret -2: - mov x0, #-EFAULT - b 1b SYM_FUNC_END(invalidate_icache_range) /* From 5e20e3499682c4f1724438d23afcafd473526a54 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:49 +0100 Subject: [PATCH 593/730] arm64: Downgrade flush_icache_range to invalidate Since __flush_dcache_area is called right before, invalidate_icache_range is sufficient in this case. Rewrite the comment to better explain the rationale behind the cache maintenance operations used here. No functional change intended. Possible performance impact due to invalidating only the icache rather than invalidating and cleaning both caches. Reported-by: Catalin Marinas Reported-by: Will Deacon Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/ Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Acked-by: Catalin Marinas Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-7-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 90a335c74442..a03944fd0cd4 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -68,10 +68,14 @@ int machine_kexec_post_load(struct kimage *kimage) kimage->arch.kern_reloc = __pa(reloc_code); kexec_image_info(kimage); - /* Flush the reloc_code in preparation for its execution. */ + /* + * For execution with the MMU off, reloc_code needs to be cleaned to the + * PoC and invalidated from the I-cache. + */ __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size); - flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code + - arm64_relocate_new_kernel_size); + invalidate_icache_range((uintptr_t)reloc_code, + (uintptr_t)reloc_code + + arm64_relocate_new_kernel_size); return 0; } From 55272ecc3ada8ec947bb5e94ee2fcde6cf31e166 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:50 +0100 Subject: [PATCH 594/730] arm64: assembler: remove user_alt user_alt isn't being used anymore. It's also simpler and clearer to directly use alternative_insn and _cond_extable in-line when needed. Reported-by: Mark Rutland Link: https://lore.kernel.org/linux-arm-kernel/20210520125735.GF17233@C02TD0UTHF1T.local/ Signed-off-by: Fuad Tabba Acked-by: Mark Rutland Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-8-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative-macros.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 8a078fc662ac..477703578caa 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -197,11 +197,6 @@ alternative_endif #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) -.macro user_alt, label, oldinstr, newinstr, cond -9999: alternative_insn "\oldinstr", "\newinstr", \cond - _asm_extable 9999b, \label -.endm - #endif /* __ASSEMBLY__ */ /* From 06b7a568ca5e9cb79a0cc4737f498ea90d8fa89d Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:51 +0100 Subject: [PATCH 595/730] arm64: Move documentation of dcache_by_line_op The comment describing the macro dcache_by_line_op is placed right before the previous macro of the one it describes, which is a bit confusing. Move it to the macro it describes (dcache_by_line_op). No functional change intended. Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-9-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 0a276b46ef50..ced791124b28 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -387,6 +387,14 @@ alternative_cb_end bfi \tcr, \tmp0, \pos, #3 .endm + .macro __dcache_op_workaround_clean_cache, op, addr +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc \op, \addr +alternative_else + dc civac, \addr +alternative_endif + .endm + /* * Macro to perform a data cache maintenance for the interval * [addr, addr + size) @@ -398,14 +406,6 @@ alternative_cb_end * fixup: optional label to branch to on user fault * Corrupts: addr, size, tmp1, tmp2 */ - .macro __dcache_op_workaround_clean_cache, op, addr -alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE - dc \op, \addr -alternative_else - dc civac, \addr -alternative_endif - .endm - .macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2, fixup dcache_line_size \tmp1, \tmp2 add \size, \addr, \size From d044f8141847bee542998a6fd8de2c270fe40e48 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:52 +0100 Subject: [PATCH 596/730] arm64: Fix comments to refer to correct function __flush_icache_range Many comments refer to the function flush_icache_range, where the intent is in fact __flush_icache_range. Fix these comments to refer to the intended function. That's probably due to commit 3b8c9f1cdfc506e9 ("arm64: IPI each CPU after invalidating the I-cache for kernel mappings"), which renamed flush_icache_range() to __flush_icache_range() and added a wrapper. No functional change intended. Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-10-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate-asm.S | 4 ++-- arch/arm64/mm/cache.S | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 0ed2f72a6b94..ef2ab7caf815 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -45,7 +45,7 @@ * Because this code has to be copied to a 'safe' page, it can't call out to * other functions by PC-relative address. Also remember that it may be * mid-way through over-writing other functions. For this reason it contains - * code from flush_icache_range() and uses the copy_page() macro. + * code from __flush_icache_range() and uses the copy_page() macro. * * This 'safe' page is mapped via ttbr0, and executed from there. This function * switches to a copy of the linear map in ttbr1, performs the restore, then @@ -87,7 +87,7 @@ SYM_CODE_START(swsusp_arch_suspend_exit) copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 add x1, x10, #PAGE_SIZE - /* Clean the copied page to PoU - based on flush_icache_range() */ + /* Clean the copied page to PoU - based on __flush_icache_range() */ raw_dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x10, x3 diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 14eac9d76d57..910ae8f6a389 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -43,7 +43,7 @@ alternative_else_nop_endif .endm /* - * flush_icache_range(start,end) + * __flush_icache_range(start,end) * * Ensure that the I and D caches are coherent within specified region. * This is typically used when code has been written to a memory region, From e3974adb4ef591e898956083a3dfa6336bb88638 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:53 +0100 Subject: [PATCH 597/730] arm64: __inval_dcache_area to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. Because the code is shared with __dma_inv_area, it changes the parameters for that as well. However, __dma_inv_area is local to cache.S, so no other users are affected. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-11-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/kernel/head.S | 5 +---- arch/arm64/mm/cache.S | 16 +++++++++------- arch/arm64/mm/flush.c | 2 +- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index a586afa84172..157234706817 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -59,7 +59,7 @@ extern void __flush_icache_range(unsigned long start, unsigned long end); extern void invalidate_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); -extern void __inval_dcache_area(void *addr, size_t len); +extern void __inval_dcache_area(unsigned long start, unsigned long end); extern void __clean_dcache_area_poc(void *addr, size_t len); extern void __clean_dcache_area_pop(void *addr, size_t len); extern void __clean_dcache_area_pou(void *addr, size_t len); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 96873dfa67fd..8df0ac8d9123 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -117,7 +117,7 @@ SYM_CODE_START_LOCAL(preserve_boot_args) dmb sy // needed before dc ivac with // MMU off - mov x1, #0x20 // 4 x 8 bytes + add x1, x0, #0x20 // 4 x 8 bytes b __inval_dcache_area // tail call SYM_CODE_END(preserve_boot_args) @@ -268,7 +268,6 @@ SYM_FUNC_START_LOCAL(__create_page_tables) */ adrp x0, init_pg_dir adrp x1, init_pg_end - sub x1, x1, x0 bl __inval_dcache_area /* @@ -382,12 +381,10 @@ SYM_FUNC_START_LOCAL(__create_page_tables) adrp x0, idmap_pg_dir adrp x1, idmap_pg_end - sub x1, x1, x0 bl __inval_dcache_area adrp x0, init_pg_dir adrp x1, init_pg_end - sub x1, x1, x0 bl __inval_dcache_area ret x28 diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 910ae8f6a389..03c1a7659ffb 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -131,25 +131,24 @@ alternative_else_nop_endif SYM_FUNC_END(__clean_dcache_area_pou) /* - * __inval_dcache_area(kaddr, size) + * __inval_dcache_area(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are invalidated. Any partial lines at the ends of the interval are * also cleaned to PoC to prevent data loss. * - * - kaddr - kernel address - * - size - size in question + * - start - kernel start address of region + * - end - kernel end address of region */ SYM_FUNC_START_LOCAL(__dma_inv_area) SYM_FUNC_START_PI(__inval_dcache_area) /* FALLTHROUGH */ /* - * __dma_inv_area(start, size) + * __dma_inv_area(start, end) * - start - virtual start address of region - * - size - size in question + * - end - virtual end address of region */ - add x1, x1, x0 dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -230,8 +229,10 @@ SYM_FUNC_END_PI(__dma_flush_area) * - dir - DMA direction */ SYM_FUNC_START_PI(__dma_map_area) + add x1, x0, x1 cmp w2, #DMA_FROM_DEVICE b.eq __dma_inv_area + sub x1, x1, x0 b __dma_clean_area SYM_FUNC_END_PI(__dma_map_area) @@ -242,6 +243,7 @@ SYM_FUNC_END_PI(__dma_map_area) * - dir - DMA direction */ SYM_FUNC_START_PI(__dma_unmap_area) + add x1, x0, x1 cmp w2, #DMA_TO_DEVICE b.ne __dma_inv_area ret diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 6d44c028d1c9..be650b573b2a 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -90,7 +90,7 @@ EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); void arch_invalidate_pmem(void *addr, size_t size) { - __inval_dcache_area(addr, size); + __inval_dcache_area((unsigned long)addr, (unsigned long)addr + size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); #endif From 163d3f80695e31068c7d32244c9e6d406d5c5c00 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:54 +0100 Subject: [PATCH 598/730] arm64: dcache_by_line_op to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-12-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 27 +++++++++++++-------------- arch/arm64/kvm/hyp/nvhe/cache.S | 1 + arch/arm64/mm/cache.S | 7 ++++++- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ced791124b28..c4cecf85dccf 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -397,40 +397,39 @@ alternative_endif /* * Macro to perform a data cache maintenance for the interval - * [addr, addr + size) + * [start, end) * * op: operation passed to dc instruction * domain: domain used in dsb instruciton - * addr: starting virtual address of the region - * size: size of the region + * start: starting virtual address of the region + * end: end virtual address of the region * fixup: optional label to branch to on user fault - * Corrupts: addr, size, tmp1, tmp2 + * Corrupts: start, end, tmp1, tmp2 */ - .macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2, fixup + .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup dcache_line_size \tmp1, \tmp2 - add \size, \addr, \size sub \tmp2, \tmp1, #1 - bic \addr, \addr, \tmp2 + bic \start, \start, \tmp2 .Ldcache_op\@: .ifc \op, cvau - __dcache_op_workaround_clean_cache \op, \addr + __dcache_op_workaround_clean_cache \op, \start .else .ifc \op, cvac - __dcache_op_workaround_clean_cache \op, \addr + __dcache_op_workaround_clean_cache \op, \start .else .ifc \op, cvap - sys 3, c7, c12, 1, \addr // dc cvap + sys 3, c7, c12, 1, \start // dc cvap .else .ifc \op, cvadp - sys 3, c7, c13, 1, \addr // dc cvadp + sys 3, c7, c13, 1, \start // dc cvadp .else - dc \op, \addr + dc \op, \start .endif .endif .endif .endif - add \addr, \addr, \tmp1 - cmp \addr, \size + add \start, \start, \tmp1 + cmp \start, \end b.lo .Ldcache_op\@ dsb \domain diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 36cef6915428..3bcfa3cac46f 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -8,6 +8,7 @@ #include SYM_FUNC_START_PI(__flush_dcache_area) + add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__flush_dcache_area) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 03c1a7659ffb..fff883f691f2 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -31,7 +31,7 @@ alternative_if ARM64_HAS_CACHE_IDC b .Ldc_skip_\@ alternative_else_nop_endif mov x2, x0 - sub x3, x1, x0 + mov x3, x1 dcache_by_line_op cvau, ish, x2, x3, x4, x5, \fixup .Ldc_skip_\@: alternative_if ARM64_HAS_CACHE_DIC @@ -108,6 +108,7 @@ SYM_FUNC_END(invalidate_icache_range) * - size - size in question */ SYM_FUNC_START_PI(__flush_dcache_area) + add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__flush_dcache_area) @@ -126,6 +127,7 @@ alternative_if ARM64_HAS_CACHE_IDC dsb ishst ret alternative_else_nop_endif + add x1, x0, x1 dcache_by_line_op cvau, ish, x0, x1, x2, x3 ret SYM_FUNC_END(__clean_dcache_area_pou) @@ -187,6 +189,7 @@ SYM_FUNC_START_PI(__clean_dcache_area_poc) * - start - virtual start address of region * - size - size in question */ + add x1, x0, x1 dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__clean_dcache_area_poc) @@ -205,6 +208,7 @@ SYM_FUNC_START_PI(__clean_dcache_area_pop) alternative_if_not ARM64_HAS_DCPOP b __clean_dcache_area_poc alternative_else_nop_endif + add x1, x0, x1 dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__clean_dcache_area_pop) @@ -218,6 +222,7 @@ SYM_FUNC_END_PI(__clean_dcache_area_pop) * - size - size in question */ SYM_FUNC_START_PI(__dma_flush_area) + add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__dma_flush_area) From 814b186079cd54d3fe3b6b8ab539cbd44705ef9d Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:55 +0100 Subject: [PATCH 599/730] arm64: __flush_dcache_area to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-13-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/arch_gicv3.h | 3 ++- arch/arm64/include/asm/cacheflush.h | 8 ++++---- arch/arm64/include/asm/efi.h | 2 +- arch/arm64/include/asm/kvm_mmu.h | 3 ++- arch/arm64/kernel/hibernate.c | 18 +++++++++++------- arch/arm64/kernel/idreg-override.c | 3 ++- arch/arm64/kernel/kaslr.c | 12 +++++++++--- arch/arm64/kernel/machine_kexec.c | 20 +++++++++++++------- arch/arm64/kernel/smp.c | 8 ++++++-- arch/arm64/kernel/smp_spin_table.c | 7 ++++--- arch/arm64/kvm/hyp/nvhe/cache.S | 1 - arch/arm64/kvm/hyp/nvhe/setup.c | 3 ++- arch/arm64/kvm/hyp/pgtable.c | 13 ++++++++++--- arch/arm64/mm/cache.S | 9 ++++----- 14 files changed, 70 insertions(+), 40 deletions(-) diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 934b9be582d2..ed1cc9d8e6df 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -124,7 +124,8 @@ static inline u32 gic_read_rpr(void) #define gic_read_lpir(c) readq_relaxed(c) #define gic_write_lpir(v, c) writeq_relaxed(v, c) -#define gic_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) +#define gic_flush_dcache_to_poc(a,l) \ + __flush_dcache_area((unsigned long)(a), (unsigned long)(a)+(l)) #define gits_read_baser(c) readq_relaxed(c) #define gits_write_baser(v, c) writeq_relaxed(v, c) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 157234706817..695f88864784 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -50,15 +50,15 @@ * - start - virtual start address * - end - virtual end address * - * __flush_dcache_area(kaddr, size) + * __flush_dcache_area(start, end) * * Ensure that the data held in page is written back. - * - kaddr - page address - * - size - region size + * - start - virtual start address + * - end - virtual end address */ extern void __flush_icache_range(unsigned long start, unsigned long end); extern void invalidate_icache_range(unsigned long start, unsigned long end); -extern void __flush_dcache_area(void *addr, size_t len); +extern void __flush_dcache_area(unsigned long start, unsigned long end); extern void __inval_dcache_area(unsigned long start, unsigned long end); extern void __clean_dcache_area_poc(void *addr, size_t len); extern void __clean_dcache_area_pop(void *addr, size_t len); diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 3578aba9c608..0ae2397076fd 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -137,7 +137,7 @@ void efi_virtmap_unload(void); static inline void efi_capsule_flush_cache_range(void *addr, int size) { - __flush_dcache_area(addr, size); + __flush_dcache_area((unsigned long)addr, (unsigned long)addr + size); } #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 25ed956f9af1..33293d5855af 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -180,7 +180,8 @@ static inline void *__kvm_vector_slot2addr(void *base, struct kvm; -#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) +#define kvm_flush_dcache_to_poc(a,l) \ + __flush_dcache_area((unsigned long)(a), (unsigned long)(a)+(l)) static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index b1cef371df2b..b40ddce71507 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -240,8 +240,6 @@ static int create_safe_exec_page(void *src_start, size_t length, return 0; } -#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) - #ifdef CONFIG_ARM64_MTE static DEFINE_XARRAY(mte_pages); @@ -383,13 +381,18 @@ int swsusp_arch_suspend(void) ret = swsusp_save(); } else { /* Clean kernel core startup/idle code to PoC*/ - dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end); - dcache_clean_range(__idmap_text_start, __idmap_text_end); + __flush_dcache_area((unsigned long)__mmuoff_data_start, + (unsigned long)__mmuoff_data_end); + __flush_dcache_area((unsigned long)__idmap_text_start, + (unsigned long)__idmap_text_end); /* Clean kvm setup code to PoC? */ if (el2_reset_needed()) { - dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); - dcache_clean_range(__hyp_text_start, __hyp_text_end); + __flush_dcache_area( + (unsigned long)__hyp_idmap_text_start, + (unsigned long)__hyp_idmap_text_end); + __flush_dcache_area((unsigned long)__hyp_text_start, + (unsigned long)__hyp_text_end); } swsusp_mte_restore_tags(); @@ -474,7 +477,8 @@ int swsusp_arch_resume(void) * The hibernate exit text contains a set of el2 vectors, that will * be executed at el2 with the mmu off in order to reload hyp-stub. */ - __flush_dcache_area(hibernate_exit, exit_size); + __flush_dcache_area((unsigned long)hibernate_exit, + (unsigned long)hibernate_exit + exit_size); /* * KASLR will cause the el2 vectors to be in a different location in diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index e628c8ce1ffe..3dd515baf526 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -237,7 +237,8 @@ asmlinkage void __init init_feature_override(void) for (i = 0; i < ARRAY_SIZE(regs); i++) { if (regs[i]->override) - __flush_dcache_area(regs[i]->override, + __flush_dcache_area((unsigned long)regs[i]->override, + (unsigned long)regs[i]->override + sizeof(*regs[i]->override)); } } diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 341342b207f6..49cccd03cb37 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -72,7 +72,9 @@ u64 __init kaslr_early_init(void) * we end up running with module randomization disabled. */ module_alloc_base = (u64)_etext - MODULES_VSIZE; - __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); + __flush_dcache_area((unsigned long)&module_alloc_base, + (unsigned long)&module_alloc_base + + sizeof(module_alloc_base)); /* * Try to map the FDT early. If this fails, we simply bail, @@ -170,8 +172,12 @@ u64 __init kaslr_early_init(void) module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; module_alloc_base &= PAGE_MASK; - __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); - __flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed)); + __flush_dcache_area((unsigned long)&module_alloc_base, + (unsigned long)&module_alloc_base + + sizeof(module_alloc_base)); + __flush_dcache_area((unsigned long)&memstart_offset_seed, + (unsigned long)&memstart_offset_seed + + sizeof(memstart_offset_seed)); return offset; } diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index a03944fd0cd4..3e79110c8f3a 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -72,7 +72,9 @@ int machine_kexec_post_load(struct kimage *kimage) * For execution with the MMU off, reloc_code needs to be cleaned to the * PoC and invalidated from the I-cache. */ - __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size); + __flush_dcache_area((unsigned long)reloc_code, + (unsigned long)reloc_code + + arm64_relocate_new_kernel_size); invalidate_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code + arm64_relocate_new_kernel_size); @@ -106,16 +108,18 @@ static void kexec_list_flush(struct kimage *kimage) for (entry = &kimage->head; ; entry++) { unsigned int flag; - void *addr; + unsigned long addr; /* flush the list entries. */ - __flush_dcache_area(entry, sizeof(kimage_entry_t)); + __flush_dcache_area((unsigned long)entry, + (unsigned long)entry + + sizeof(kimage_entry_t)); flag = *entry & IND_FLAGS; if (flag == IND_DONE) break; - addr = phys_to_virt(*entry & PAGE_MASK); + addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK); switch (flag) { case IND_INDIRECTION: @@ -124,7 +128,7 @@ static void kexec_list_flush(struct kimage *kimage) break; case IND_SOURCE: /* flush the source pages. */ - __flush_dcache_area(addr, PAGE_SIZE); + __flush_dcache_area(addr, addr + PAGE_SIZE); break; case IND_DESTINATION: break; @@ -151,8 +155,10 @@ static void kexec_segment_flush(const struct kimage *kimage) kimage->segment[i].memsz, kimage->segment[i].memsz / PAGE_SIZE); - __flush_dcache_area(phys_to_virt(kimage->segment[i].mem), - kimage->segment[i].memsz); + __flush_dcache_area( + (unsigned long)phys_to_virt(kimage->segment[i].mem), + (unsigned long)phys_to_virt(kimage->segment[i].mem) + + kimage->segment[i].memsz); } } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dcd7041b2b07..5fcdee331087 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -122,7 +122,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) secondary_data.task = idle; secondary_data.stack = task_stack_page(idle) + THREAD_SIZE; update_cpu_boot_status(CPU_MMU_OFF); - __flush_dcache_area(&secondary_data, sizeof(secondary_data)); + __flush_dcache_area((unsigned long)&secondary_data, + (unsigned long)&secondary_data + + sizeof(secondary_data)); /* Now bring the CPU into our world */ ret = boot_secondary(cpu, idle); @@ -143,7 +145,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_crit("CPU%u: failed to come online\n", cpu); secondary_data.task = NULL; secondary_data.stack = NULL; - __flush_dcache_area(&secondary_data, sizeof(secondary_data)); + __flush_dcache_area((unsigned long)&secondary_data, + (unsigned long)&secondary_data + + sizeof(secondary_data)); status = READ_ONCE(secondary_data.status); if (status == CPU_MMU_OFF) status = READ_ONCE(__early_cpu_boot_status); diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index c45a83512805..58d804582a35 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -36,7 +36,7 @@ static void write_pen_release(u64 val) unsigned long size = sizeof(secondary_holding_pen_release); secondary_holding_pen_release = val; - __flush_dcache_area(start, size); + __flush_dcache_area((unsigned long)start, (unsigned long)start + size); } @@ -90,8 +90,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) * the boot protocol. */ writeq_relaxed(pa_holding_pen, release_addr); - __flush_dcache_area((__force void *)release_addr, - sizeof(*release_addr)); + __flush_dcache_area((__force unsigned long)release_addr, + (__force unsigned long)release_addr + + sizeof(*release_addr)); /* * Send an event to wake up the secondary CPU. diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 3bcfa3cac46f..36cef6915428 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -8,7 +8,6 @@ #include SYM_FUNC_START_PI(__flush_dcache_area) - add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__flush_dcache_area) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 7488f53b0aa2..5dffe928f256 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -134,7 +134,8 @@ static void update_nvhe_init_params(void) for (i = 0; i < hyp_nr_cpus; i++) { params = per_cpu_ptr(&kvm_init_params, i); params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd); - __flush_dcache_area(params, sizeof(*params)); + __flush_dcache_area((unsigned long)params, + (unsigned long)params + sizeof(*params)); } } diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c37c1dc4feaf..10d2f04013d4 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -839,8 +839,11 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, stage2_put_pte(ptep, mmu, addr, level, mm_ops); if (need_flush) { - __flush_dcache_area(kvm_pte_follow(pte, mm_ops), - kvm_granule_size(level)); + kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops); + + __flush_dcache_area((unsigned long)pte_follow, + (unsigned long)pte_follow + + kvm_granule_size(level)); } if (childp) @@ -988,11 +991,15 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct kvm_pgtable *pgt = arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; kvm_pte_t pte = *ptep; + kvm_pte_t *pte_follow; if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) return 0; - __flush_dcache_area(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level)); + pte_follow = kvm_pte_follow(pte, mm_ops); + __flush_dcache_area((unsigned long)pte_follow, + (unsigned long)pte_follow + + kvm_granule_size(level)); return 0; } diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index fff883f691f2..b2880aeba7ca 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -99,16 +99,15 @@ alternative_else_nop_endif SYM_FUNC_END(invalidate_icache_range) /* - * __flush_dcache_area(kaddr, size) + * __flush_dcache_area(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are cleaned and invalidated to the PoC. * - * - kaddr - kernel address - * - size - size in question + * - start - virtual start address of region + * - end - virtual end address of region */ SYM_FUNC_START_PI(__flush_dcache_area) - add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__flush_dcache_area) From 1f42faf1d25de2ae239f322fda8af1c92c20e953 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:56 +0100 Subject: [PATCH 600/730] arm64: __clean_dcache_area_poc to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. Because the code is shared with __dma_clean_area, it changes the parameters for that as well. However, __dma_clean_area is local to cache.S, so no other users are affected. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-14-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/kernel/efi-entry.S | 5 +++-- arch/arm64/mm/cache.S | 16 +++++++--------- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 695f88864784..3255878d6f30 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -60,7 +60,7 @@ extern void __flush_icache_range(unsigned long start, unsigned long end); extern void invalidate_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(unsigned long start, unsigned long end); extern void __inval_dcache_area(unsigned long start, unsigned long end); -extern void __clean_dcache_area_poc(void *addr, size_t len); +extern void __clean_dcache_area_poc(unsigned long start, unsigned long end); extern void __clean_dcache_area_pop(void *addr, size_t len); extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 0073b24b5d25..b0f728fb61f0 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -28,6 +28,7 @@ SYM_CODE_START(efi_enter_kernel) * stale icache entries from before relocation. */ ldr w1, =kernel_size + add x1, x0, x1 bl __clean_dcache_area_poc ic ialluis @@ -36,7 +37,7 @@ SYM_CODE_START(efi_enter_kernel) * so that we can safely disable the MMU and caches. */ adr x0, 0f - ldr w1, 3f + adr x1, 3f bl __clean_dcache_area_poc 0: /* Turn off Dcache and MMU */ @@ -64,5 +65,5 @@ SYM_CODE_START(efi_enter_kernel) mov x2, xzr mov x3, xzr br x19 +3: SYM_CODE_END(efi_enter_kernel) -3: .long . - 0b diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index b2880aeba7ca..e2e2740c55ce 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -171,24 +171,23 @@ SYM_FUNC_END_PI(__inval_dcache_area) SYM_FUNC_END(__dma_inv_area) /* - * __clean_dcache_area_poc(kaddr, size) + * __clean_dcache_area_poc(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoC. * - * - kaddr - kernel address - * - size - size in question + * - start - virtual start address of region + * - end - virtual end address of region */ SYM_FUNC_START_LOCAL(__dma_clean_area) SYM_FUNC_START_PI(__clean_dcache_area_poc) /* FALLTHROUGH */ /* - * __dma_clean_area(start, size) + * __dma_clean_area(start, end) * - start - virtual start address of region - * - size - size in question + * - end - virtual end address of region */ - add x1, x0, x1 dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__clean_dcache_area_poc) @@ -204,10 +203,10 @@ SYM_FUNC_END(__dma_clean_area) * - size - size in question */ SYM_FUNC_START_PI(__clean_dcache_area_pop) + add x1, x0, x1 alternative_if_not ARM64_HAS_DCPOP b __clean_dcache_area_poc alternative_else_nop_endif - add x1, x0, x1 dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__clean_dcache_area_pop) @@ -236,7 +235,6 @@ SYM_FUNC_START_PI(__dma_map_area) add x1, x0, x1 cmp w2, #DMA_FROM_DEVICE b.eq __dma_inv_area - sub x1, x1, x0 b __dma_clean_area SYM_FUNC_END_PI(__dma_map_area) From f749448edb9c98bece0aeec5536260a8794af24b Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:57 +0100 Subject: [PATCH 601/730] arm64: __clean_dcache_area_pop to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-15-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/lib/uaccess_flushcache.c | 4 ++-- arch/arm64/mm/cache.S | 9 ++++----- arch/arm64/mm/flush.c | 2 +- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 3255878d6f30..fa5641868d65 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -61,7 +61,7 @@ extern void invalidate_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(unsigned long start, unsigned long end); extern void __inval_dcache_area(unsigned long start, unsigned long end); extern void __clean_dcache_area_poc(unsigned long start, unsigned long end); -extern void __clean_dcache_area_pop(void *addr, size_t len); +extern void __clean_dcache_area_pop(unsigned long start, unsigned long end); extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); extern void sync_icache_aliases(void *kaddr, unsigned long len); diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c index c83bb5a4aad2..62ea989effe8 100644 --- a/arch/arm64/lib/uaccess_flushcache.c +++ b/arch/arm64/lib/uaccess_flushcache.c @@ -15,7 +15,7 @@ void memcpy_flushcache(void *dst, const void *src, size_t cnt) * barrier to order the cache maintenance against the memcpy. */ memcpy(dst, src, cnt); - __clean_dcache_area_pop(dst, cnt); + __clean_dcache_area_pop((unsigned long)dst, (unsigned long)dst + cnt); } EXPORT_SYMBOL_GPL(memcpy_flushcache); @@ -33,6 +33,6 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from, rc = raw_copy_from_user(to, from, n); /* See above */ - __clean_dcache_area_pop(to, n - rc); + __clean_dcache_area_pop((unsigned long)to, (unsigned long)to + n - rc); return rc; } diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index e2e2740c55ce..b71fcf56516b 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -194,16 +194,15 @@ SYM_FUNC_END_PI(__clean_dcache_area_poc) SYM_FUNC_END(__dma_clean_area) /* - * __clean_dcache_area_pop(kaddr, size) + * __clean_dcache_area_pop(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoP. * - * - kaddr - kernel address - * - size - size in question + * - start - virtual start address of region + * - end - virtual end address of region */ SYM_FUNC_START_PI(__clean_dcache_area_pop) - add x1, x0, x1 alternative_if_not ARM64_HAS_DCPOP b __clean_dcache_area_poc alternative_else_nop_endif diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index be650b573b2a..b2c226d93ca5 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -84,7 +84,7 @@ void arch_wb_cache_pmem(void *addr, size_t size) { /* Ensure order against any prior non-cacheable writes */ dmb(osh); - __clean_dcache_area_pop(addr, size); + __clean_dcache_area_pop((unsigned long)addr, (unsigned long)addr + size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); From 406d7d4e2bc76d38a6dc88733a0f72fabf02d305 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:58 +0100 Subject: [PATCH 602/730] arm64: __clean_dcache_area_pou to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-16-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/mm/cache.S | 9 ++++----- arch/arm64/mm/flush.c | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index fa5641868d65..f86723047315 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -62,7 +62,7 @@ extern void __flush_dcache_area(unsigned long start, unsigned long end); extern void __inval_dcache_area(unsigned long start, unsigned long end); extern void __clean_dcache_area_poc(unsigned long start, unsigned long end); extern void __clean_dcache_area_pop(unsigned long start, unsigned long end); -extern void __clean_dcache_area_pou(void *addr, size_t len); +extern void __clean_dcache_area_pou(unsigned long start, unsigned long end); extern long __flush_cache_user_range(unsigned long start, unsigned long end); extern void sync_icache_aliases(void *kaddr, unsigned long len); diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index b71fcf56516b..ea605d94182f 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -113,20 +113,19 @@ SYM_FUNC_START_PI(__flush_dcache_area) SYM_FUNC_END_PI(__flush_dcache_area) /* - * __clean_dcache_area_pou(kaddr, size) + * __clean_dcache_area_pou(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoU. * - * - kaddr - kernel address - * - size - size in question + * - start - virtual start address of region + * - end - virtual end address of region */ SYM_FUNC_START(__clean_dcache_area_pou) alternative_if ARM64_HAS_CACHE_IDC dsb ishst ret alternative_else_nop_endif - add x1, x0, x1 dcache_by_line_op cvau, ish, x0, x1, x2, x3 ret SYM_FUNC_END(__clean_dcache_area_pou) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index b2c226d93ca5..0341bcc6fdf3 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -19,7 +19,7 @@ void sync_icache_aliases(void *kaddr, unsigned long len) unsigned long addr = (unsigned long)kaddr; if (icache_is_aliasing()) { - __clean_dcache_area_pou(kaddr, len); + __clean_dcache_area_pou(kaddr, kaddr + len); __flush_icache_all(); } else { /* From 8c28d52ccd1d6e3a5aca8a37e465a5f8b77edbc1 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:59 +0100 Subject: [PATCH 603/730] arm64: sync_icache_aliases to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-17-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/kernel/probes/uprobes.c | 2 +- arch/arm64/mm/flush.c | 21 ++++++++++----------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index f86723047315..70b389a8dea5 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -64,7 +64,7 @@ extern void __clean_dcache_area_poc(unsigned long start, unsigned long end); extern void __clean_dcache_area_pop(unsigned long start, unsigned long end); extern void __clean_dcache_area_pou(unsigned long start, unsigned long end); extern long __flush_cache_user_range(unsigned long start, unsigned long end); -extern void sync_icache_aliases(void *kaddr, unsigned long len); +extern void sync_icache_aliases(unsigned long start, unsigned long end); static inline void flush_icache_range(unsigned long start, unsigned long end) { diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 2c247634552b..9be668f3f034 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -21,7 +21,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, memcpy(dst, src, len); /* flush caches (dcache/icache) */ - sync_icache_aliases(dst, len); + sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len); kunmap_atomic(xol_page_kaddr); } diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 0341bcc6fdf3..c4ca7e05fdb8 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -14,28 +14,25 @@ #include #include -void sync_icache_aliases(void *kaddr, unsigned long len) +void sync_icache_aliases(unsigned long start, unsigned long end) { - unsigned long addr = (unsigned long)kaddr; - if (icache_is_aliasing()) { - __clean_dcache_area_pou(kaddr, kaddr + len); + __clean_dcache_area_pou(start, end); __flush_icache_all(); } else { /* * Don't issue kick_all_cpus_sync() after I-cache invalidation * for user mappings. */ - __flush_icache_range(addr, addr + len); + __flush_icache_range(start, end); } } -static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, - unsigned long uaddr, void *kaddr, - unsigned long len) +static void flush_ptrace_access(struct vm_area_struct *vma, unsigned long start, + unsigned long end) { if (vma->vm_flags & VM_EXEC) - sync_icache_aliases(kaddr, len); + sync_icache_aliases(start, end); } /* @@ -48,7 +45,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long len) { memcpy(dst, src, len); - flush_ptrace_access(vma, page, uaddr, dst, len); + flush_ptrace_access(vma, (unsigned long)dst, (unsigned long)dst + len); } void __sync_icache_dcache(pte_t pte) @@ -56,7 +53,9 @@ void __sync_icache_dcache(pte_t pte) struct page *page = pte_page(pte); if (!test_bit(PG_dcache_clean, &page->flags)) { - sync_icache_aliases(page_address(page), page_size(page)); + sync_icache_aliases((unsigned long)page_address(page), + (unsigned long)page_address(page) + + page_size(page)); set_bit(PG_dcache_clean, &page->flags); } } From 393239be1ba69dcd29be504ffe14938509795821 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:30:00 +0100 Subject: [PATCH 604/730] arm64: Fix cache maintenance function comments Fix and expand comments for the cache maintenance functions in cacheflush.h. Adds comments to functions that weren't described before. Explains what the functions do using Arm Architecture Reference Manual terminology. No functional change intended. Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-18-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 47 ++++++++++++++++++----------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 70b389a8dea5..26617df1fa45 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -30,31 +30,44 @@ * the implementation assumes non-aliasing VIPT D-cache and (aliasing) * VIPT I-cache. * - * flush_icache_range(start, end) + * All functions below apply to the interval [start, end) + * - start - virtual start address (inclusive) + * - end - virtual end address (exclusive) * - * Ensure coherency between the I-cache and the D-cache in the - * region described by start, end. - * - start - virtual start address - * - end - virtual end address + * __flush_icache_range(start, end) * - * invalidate_icache_range(start, end) - * - * Invalidate the I-cache in the region described by start, end. - * - start - virtual start address - * - end - virtual end address + * Ensure coherency between the I-cache and the D-cache region to + * the Point of Unification. * * __flush_cache_user_range(start, end) * - * Ensure coherency between the I-cache and the D-cache in the - * region described by start, end. - * - start - virtual start address - * - end - virtual end address + * Ensure coherency between the I-cache and the D-cache region to + * the Point of Unification. + * Use only if the region might access user memory. + * + * invalidate_icache_range(start, end) + * + * Invalidate I-cache region to the Point of Unification. * * __flush_dcache_area(start, end) * - * Ensure that the data held in page is written back. - * - start - virtual start address - * - end - virtual end address + * Clean and invalidate D-cache region to the Point of Coherency. + * + * __inval_dcache_area(start, end) + * + * Invalidate D-cache region to the Point of Coherency. + * + * __clean_dcache_area_poc(start, end) + * + * Clean D-cache region to the Point of Coherency. + * + * __clean_dcache_area_pop(start, end) + * + * Clean D-cache region to the Point of Persistence. + * + * __clean_dcache_area_pou(start, end) + * + * Clean D-cache region to the Point of Unification. */ extern void __flush_icache_range(unsigned long start, unsigned long end); extern void invalidate_icache_range(unsigned long start, unsigned long end); From fade9c2c6ee2baea7df8e6059b3f143c681e5ce4 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:30:01 +0100 Subject: [PATCH 605/730] arm64: Rename arm64-internal cache maintenance functions Although naming across the codebase isn't that consistent, it tends to follow certain patterns. Moreover, the term "flush" isn't defined in the Arm Architecture reference manual, and might be interpreted to mean clean, invalidate, or both for a cache. Rename arm64-internal functions to make the naming internally consistent, as well as making it consistent with the Arm ARM, by specifying whether it applies to the instruction, data, or both caches, whether the operation is a clean, invalidate, or both. Also specify which point the operation applies to, i.e., to the point of unification (PoU), coherency (PoC), or persistence (PoP). This commit applies the following sed transformation to all files under arch/arm64: "s/\b__flush_cache_range\b/caches_clean_inval_pou_macro/g;"\ "s/\b__flush_icache_range\b/caches_clean_inval_pou/g;"\ "s/\binvalidate_icache_range\b/icache_inval_pou/g;"\ "s/\b__flush_dcache_area\b/dcache_clean_inval_poc/g;"\ "s/\b__inval_dcache_area\b/dcache_inval_poc/g;"\ "s/__clean_dcache_area_poc\b/dcache_clean_poc/g;"\ "s/\b__clean_dcache_area_pop\b/dcache_clean_pop/g;"\ "s/\b__clean_dcache_area_pou\b/dcache_clean_pou/g;"\ "s/\b__flush_cache_user_range\b/caches_clean_inval_user_pou/g;"\ "s/\b__flush_icache_all\b/icache_inval_all_pou/g;" Note that __clean_dcache_area_poc is deliberately missing a word boundary check at the beginning in order to match the efistub symbols in image-vars.h. Also note that, despite its name, __flush_icache_range operates on both instruction and data caches. The name change here reflects that. No functional change intended. Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-19-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/arch_gicv3.h | 2 +- arch/arm64/include/asm/cacheflush.h | 36 +++++++++--------- arch/arm64/include/asm/efi.h | 2 +- arch/arm64/include/asm/kvm_mmu.h | 6 +-- arch/arm64/kernel/alternative.c | 2 +- arch/arm64/kernel/efi-entry.S | 4 +- arch/arm64/kernel/head.S | 8 ++-- arch/arm64/kernel/hibernate-asm.S | 4 +- arch/arm64/kernel/hibernate.c | 12 +++--- arch/arm64/kernel/idreg-override.c | 2 +- arch/arm64/kernel/image-vars.h | 2 +- arch/arm64/kernel/insn.c | 2 +- arch/arm64/kernel/kaslr.c | 6 +-- arch/arm64/kernel/machine_kexec.c | 10 ++--- arch/arm64/kernel/smp.c | 4 +- arch/arm64/kernel/smp_spin_table.c | 4 +- arch/arm64/kernel/sys_compat.c | 2 +- arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/hyp/nvhe/cache.S | 4 +- arch/arm64/kvm/hyp/nvhe/setup.c | 2 +- arch/arm64/kvm/hyp/nvhe/tlb.c | 2 +- arch/arm64/kvm/hyp/pgtable.c | 4 +- arch/arm64/lib/uaccess_flushcache.c | 4 +- arch/arm64/mm/cache.S | 58 ++++++++++++++--------------- arch/arm64/mm/flush.c | 12 +++--- 25 files changed, 98 insertions(+), 98 deletions(-) diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index ed1cc9d8e6df..4ad22c3135db 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -125,7 +125,7 @@ static inline u32 gic_read_rpr(void) #define gic_write_lpir(v, c) writeq_relaxed(v, c) #define gic_flush_dcache_to_poc(a,l) \ - __flush_dcache_area((unsigned long)(a), (unsigned long)(a)+(l)) + dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l)) #define gits_read_baser(c) readq_relaxed(c) #define gits_write_baser(v, c) writeq_relaxed(v, c) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 26617df1fa45..543c997eb3b7 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -34,54 +34,54 @@ * - start - virtual start address (inclusive) * - end - virtual end address (exclusive) * - * __flush_icache_range(start, end) + * caches_clean_inval_pou(start, end) * * Ensure coherency between the I-cache and the D-cache region to * the Point of Unification. * - * __flush_cache_user_range(start, end) + * caches_clean_inval_user_pou(start, end) * * Ensure coherency between the I-cache and the D-cache region to * the Point of Unification. * Use only if the region might access user memory. * - * invalidate_icache_range(start, end) + * icache_inval_pou(start, end) * * Invalidate I-cache region to the Point of Unification. * - * __flush_dcache_area(start, end) + * dcache_clean_inval_poc(start, end) * * Clean and invalidate D-cache region to the Point of Coherency. * - * __inval_dcache_area(start, end) + * dcache_inval_poc(start, end) * * Invalidate D-cache region to the Point of Coherency. * - * __clean_dcache_area_poc(start, end) + * dcache_clean_poc(start, end) * * Clean D-cache region to the Point of Coherency. * - * __clean_dcache_area_pop(start, end) + * dcache_clean_pop(start, end) * * Clean D-cache region to the Point of Persistence. * - * __clean_dcache_area_pou(start, end) + * dcache_clean_pou(start, end) * * Clean D-cache region to the Point of Unification. */ -extern void __flush_icache_range(unsigned long start, unsigned long end); -extern void invalidate_icache_range(unsigned long start, unsigned long end); -extern void __flush_dcache_area(unsigned long start, unsigned long end); -extern void __inval_dcache_area(unsigned long start, unsigned long end); -extern void __clean_dcache_area_poc(unsigned long start, unsigned long end); -extern void __clean_dcache_area_pop(unsigned long start, unsigned long end); -extern void __clean_dcache_area_pou(unsigned long start, unsigned long end); -extern long __flush_cache_user_range(unsigned long start, unsigned long end); +extern void caches_clean_inval_pou(unsigned long start, unsigned long end); +extern void icache_inval_pou(unsigned long start, unsigned long end); +extern void dcache_clean_inval_poc(unsigned long start, unsigned long end); +extern void dcache_inval_poc(unsigned long start, unsigned long end); +extern void dcache_clean_poc(unsigned long start, unsigned long end); +extern void dcache_clean_pop(unsigned long start, unsigned long end); +extern void dcache_clean_pou(unsigned long start, unsigned long end); +extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end); extern void sync_icache_aliases(unsigned long start, unsigned long end); static inline void flush_icache_range(unsigned long start, unsigned long end) { - __flush_icache_range(start, end); + caches_clean_inval_pou(start, end); /* * IPI all online CPUs so that they undergo a context synchronization @@ -135,7 +135,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); -static __always_inline void __flush_icache_all(void) +static __always_inline void icache_inval_all_pou(void) { if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) return; diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 0ae2397076fd..1bed37eb013a 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -137,7 +137,7 @@ void efi_virtmap_unload(void); static inline void efi_capsule_flush_cache_range(void *addr, int size) { - __flush_dcache_area((unsigned long)addr, (unsigned long)addr + size); + dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size); } #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 33293d5855af..f4cbfa9025a8 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -181,7 +181,7 @@ static inline void *__kvm_vector_slot2addr(void *base, struct kvm; #define kvm_flush_dcache_to_poc(a,l) \ - __flush_dcache_area((unsigned long)(a), (unsigned long)(a)+(l)) + dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l)) static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { @@ -209,12 +209,12 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, { if (icache_is_aliasing()) { /* any kind of VIPT cache */ - __flush_icache_all(); + icache_inval_all_pou(); } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ void *va = page_address(pfn_to_page(pfn)); - invalidate_icache_range((unsigned long)va, + icache_inval_pou((unsigned long)va, (unsigned long)va + size); } } diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index c906d20c7b52..3fb79b76e9d9 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -181,7 +181,7 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu */ if (!is_module) { dsb(ish); - __flush_icache_all(); + icache_inval_all_pou(); isb(); /* Ignore ARM64_CB bit from feature mask */ diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index b0f728fb61f0..61a87fa1c305 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -29,7 +29,7 @@ SYM_CODE_START(efi_enter_kernel) */ ldr w1, =kernel_size add x1, x0, x1 - bl __clean_dcache_area_poc + bl dcache_clean_poc ic ialluis /* @@ -38,7 +38,7 @@ SYM_CODE_START(efi_enter_kernel) */ adr x0, 0f adr x1, 3f - bl __clean_dcache_area_poc + bl dcache_clean_poc 0: /* Turn off Dcache and MMU */ mrs x0, CurrentEL diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 8df0ac8d9123..6928cb67d3a0 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -118,7 +118,7 @@ SYM_CODE_START_LOCAL(preserve_boot_args) // MMU off add x1, x0, #0x20 // 4 x 8 bytes - b __inval_dcache_area // tail call + b dcache_inval_poc // tail call SYM_CODE_END(preserve_boot_args) /* @@ -268,7 +268,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) */ adrp x0, init_pg_dir adrp x1, init_pg_end - bl __inval_dcache_area + bl dcache_inval_poc /* * Clear the init page tables. @@ -381,11 +381,11 @@ SYM_FUNC_START_LOCAL(__create_page_tables) adrp x0, idmap_pg_dir adrp x1, idmap_pg_end - bl __inval_dcache_area + bl dcache_inval_poc adrp x0, init_pg_dir adrp x1, init_pg_end - bl __inval_dcache_area + bl dcache_inval_poc ret x28 SYM_FUNC_END(__create_page_tables) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index ef2ab7caf815..81c0186a5e32 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -45,7 +45,7 @@ * Because this code has to be copied to a 'safe' page, it can't call out to * other functions by PC-relative address. Also remember that it may be * mid-way through over-writing other functions. For this reason it contains - * code from __flush_icache_range() and uses the copy_page() macro. + * code from caches_clean_inval_pou() and uses the copy_page() macro. * * This 'safe' page is mapped via ttbr0, and executed from there. This function * switches to a copy of the linear map in ttbr1, performs the restore, then @@ -87,7 +87,7 @@ SYM_CODE_START(swsusp_arch_suspend_exit) copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 add x1, x10, #PAGE_SIZE - /* Clean the copied page to PoU - based on __flush_icache_range() */ + /* Clean the copied page to PoU - based on caches_clean_inval_pou() */ raw_dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x10, x3 diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index b40ddce71507..46a0b4d6e251 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -210,7 +210,7 @@ static int create_safe_exec_page(void *src_start, size_t length, return -ENOMEM; memcpy(page, src_start, length); - __flush_icache_range((unsigned long)page, (unsigned long)page + length); + caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length); rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page); if (rc) return rc; @@ -381,17 +381,17 @@ int swsusp_arch_suspend(void) ret = swsusp_save(); } else { /* Clean kernel core startup/idle code to PoC*/ - __flush_dcache_area((unsigned long)__mmuoff_data_start, + dcache_clean_inval_poc((unsigned long)__mmuoff_data_start, (unsigned long)__mmuoff_data_end); - __flush_dcache_area((unsigned long)__idmap_text_start, + dcache_clean_inval_poc((unsigned long)__idmap_text_start, (unsigned long)__idmap_text_end); /* Clean kvm setup code to PoC? */ if (el2_reset_needed()) { - __flush_dcache_area( + dcache_clean_inval_poc( (unsigned long)__hyp_idmap_text_start, (unsigned long)__hyp_idmap_text_end); - __flush_dcache_area((unsigned long)__hyp_text_start, + dcache_clean_inval_poc((unsigned long)__hyp_text_start, (unsigned long)__hyp_text_end); } @@ -477,7 +477,7 @@ int swsusp_arch_resume(void) * The hibernate exit text contains a set of el2 vectors, that will * be executed at el2 with the mmu off in order to reload hyp-stub. */ - __flush_dcache_area((unsigned long)hibernate_exit, + dcache_clean_inval_poc((unsigned long)hibernate_exit, (unsigned long)hibernate_exit + exit_size); /* diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 3dd515baf526..53a381a7f65d 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -237,7 +237,7 @@ asmlinkage void __init init_feature_override(void) for (i = 0; i < ARRAY_SIZE(regs); i++) { if (regs[i]->override) - __flush_dcache_area((unsigned long)regs[i]->override, + dcache_clean_inval_poc((unsigned long)regs[i]->override, (unsigned long)regs[i]->override + sizeof(*regs[i]->override)); } diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index bcf3c2755370..c96a9a0043bf 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -35,7 +35,7 @@ __efistub_strnlen = __pi_strnlen; __efistub_strcmp = __pi_strcmp; __efistub_strncmp = __pi_strncmp; __efistub_strrchr = __pi_strrchr; -__efistub___clean_dcache_area_poc = __pi___clean_dcache_area_poc; +__efistub_dcache_clean_poc = __pi_dcache_clean_poc; #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) __efistub___memcpy = __pi_memcpy; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 6c0de2f60ea9..51cb8dc98d00 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -198,7 +198,7 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) ret = aarch64_insn_write(tp, insn); if (ret == 0) - __flush_icache_range((uintptr_t)tp, + caches_clean_inval_pou((uintptr_t)tp, (uintptr_t)tp + AARCH64_INSN_SIZE); return ret; diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 49cccd03cb37..cfa2cfde3019 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -72,7 +72,7 @@ u64 __init kaslr_early_init(void) * we end up running with module randomization disabled. */ module_alloc_base = (u64)_etext - MODULES_VSIZE; - __flush_dcache_area((unsigned long)&module_alloc_base, + dcache_clean_inval_poc((unsigned long)&module_alloc_base, (unsigned long)&module_alloc_base + sizeof(module_alloc_base)); @@ -172,10 +172,10 @@ u64 __init kaslr_early_init(void) module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; module_alloc_base &= PAGE_MASK; - __flush_dcache_area((unsigned long)&module_alloc_base, + dcache_clean_inval_poc((unsigned long)&module_alloc_base, (unsigned long)&module_alloc_base + sizeof(module_alloc_base)); - __flush_dcache_area((unsigned long)&memstart_offset_seed, + dcache_clean_inval_poc((unsigned long)&memstart_offset_seed, (unsigned long)&memstart_offset_seed + sizeof(memstart_offset_seed)); diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 3e79110c8f3a..03ceabe4d912 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -72,10 +72,10 @@ int machine_kexec_post_load(struct kimage *kimage) * For execution with the MMU off, reloc_code needs to be cleaned to the * PoC and invalidated from the I-cache. */ - __flush_dcache_area((unsigned long)reloc_code, + dcache_clean_inval_poc((unsigned long)reloc_code, (unsigned long)reloc_code + arm64_relocate_new_kernel_size); - invalidate_icache_range((uintptr_t)reloc_code, + icache_inval_pou((uintptr_t)reloc_code, (uintptr_t)reloc_code + arm64_relocate_new_kernel_size); @@ -111,7 +111,7 @@ static void kexec_list_flush(struct kimage *kimage) unsigned long addr; /* flush the list entries. */ - __flush_dcache_area((unsigned long)entry, + dcache_clean_inval_poc((unsigned long)entry, (unsigned long)entry + sizeof(kimage_entry_t)); @@ -128,7 +128,7 @@ static void kexec_list_flush(struct kimage *kimage) break; case IND_SOURCE: /* flush the source pages. */ - __flush_dcache_area(addr, addr + PAGE_SIZE); + dcache_clean_inval_poc(addr, addr + PAGE_SIZE); break; case IND_DESTINATION: break; @@ -155,7 +155,7 @@ static void kexec_segment_flush(const struct kimage *kimage) kimage->segment[i].memsz, kimage->segment[i].memsz / PAGE_SIZE); - __flush_dcache_area( + dcache_clean_inval_poc( (unsigned long)phys_to_virt(kimage->segment[i].mem), (unsigned long)phys_to_virt(kimage->segment[i].mem) + kimage->segment[i].memsz); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5fcdee331087..9b4c1118194d 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -122,7 +122,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) secondary_data.task = idle; secondary_data.stack = task_stack_page(idle) + THREAD_SIZE; update_cpu_boot_status(CPU_MMU_OFF); - __flush_dcache_area((unsigned long)&secondary_data, + dcache_clean_inval_poc((unsigned long)&secondary_data, (unsigned long)&secondary_data + sizeof(secondary_data)); @@ -145,7 +145,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_crit("CPU%u: failed to come online\n", cpu); secondary_data.task = NULL; secondary_data.stack = NULL; - __flush_dcache_area((unsigned long)&secondary_data, + dcache_clean_inval_poc((unsigned long)&secondary_data, (unsigned long)&secondary_data + sizeof(secondary_data)); status = READ_ONCE(secondary_data.status); diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 58d804582a35..7e1624ecab3c 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -36,7 +36,7 @@ static void write_pen_release(u64 val) unsigned long size = sizeof(secondary_holding_pen_release); secondary_holding_pen_release = val; - __flush_dcache_area((unsigned long)start, (unsigned long)start + size); + dcache_clean_inval_poc((unsigned long)start, (unsigned long)start + size); } @@ -90,7 +90,7 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) * the boot protocol. */ writeq_relaxed(pa_holding_pen, release_addr); - __flush_dcache_area((__force unsigned long)release_addr, + dcache_clean_inval_poc((__force unsigned long)release_addr, (__force unsigned long)release_addr + sizeof(*release_addr)); diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 265fe3eb1069..db5159a3055f 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -41,7 +41,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end) dsb(ish); } - ret = __flush_cache_user_range(start, start + chunk); + ret = caches_clean_inval_user_pou(start, start + chunk); if (ret) return ret; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1cb39c0803a4..c1953f65ca0e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1064,7 +1064,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) stage2_unmap_vm(vcpu->kvm); else - __flush_icache_all(); + icache_inval_all_pou(); } vcpu_reset_hcr(vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 36cef6915428..958734f4d6b0 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -7,7 +7,7 @@ #include #include -SYM_FUNC_START_PI(__flush_dcache_area) +SYM_FUNC_START_PI(dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__flush_dcache_area) +SYM_FUNC_END_PI(dcache_clean_inval_poc) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 5dffe928f256..8143ebd4fb72 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -134,7 +134,7 @@ static void update_nvhe_init_params(void) for (i = 0; i < hyp_nr_cpus; i++) { params = per_cpu_ptr(&kvm_init_params, i); params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd); - __flush_dcache_area((unsigned long)params, + dcache_clean_inval_poc((unsigned long)params, (unsigned long)params + sizeof(*params)); } } diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 83dc3b271bc5..38ed0f6f2703 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -104,7 +104,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, * you should be running with VHE enabled. */ if (icache_is_vpipt()) - __flush_icache_all(); + icache_inval_all_pou(); __tlb_switch_to_host(&cxt); } diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 10d2f04013d4..e9ad7fb28ee3 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -841,7 +841,7 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, if (need_flush) { kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops); - __flush_dcache_area((unsigned long)pte_follow, + dcache_clean_inval_poc((unsigned long)pte_follow, (unsigned long)pte_follow + kvm_granule_size(level)); } @@ -997,7 +997,7 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, return 0; pte_follow = kvm_pte_follow(pte, mm_ops); - __flush_dcache_area((unsigned long)pte_follow, + dcache_clean_inval_poc((unsigned long)pte_follow, (unsigned long)pte_follow + kvm_granule_size(level)); return 0; diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c index 62ea989effe8..baee22961bdb 100644 --- a/arch/arm64/lib/uaccess_flushcache.c +++ b/arch/arm64/lib/uaccess_flushcache.c @@ -15,7 +15,7 @@ void memcpy_flushcache(void *dst, const void *src, size_t cnt) * barrier to order the cache maintenance against the memcpy. */ memcpy(dst, src, cnt); - __clean_dcache_area_pop((unsigned long)dst, (unsigned long)dst + cnt); + dcache_clean_pop((unsigned long)dst, (unsigned long)dst + cnt); } EXPORT_SYMBOL_GPL(memcpy_flushcache); @@ -33,6 +33,6 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from, rc = raw_copy_from_user(to, from, n); /* See above */ - __clean_dcache_area_pop((unsigned long)to, (unsigned long)to + n - rc); + dcache_clean_pop((unsigned long)to, (unsigned long)to + n - rc); return rc; } diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index ea605d94182f..5051b3c1a4f1 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -15,7 +15,7 @@ #include /* - * __flush_cache_range(start,end) [fixup] + * caches_clean_inval_pou_macro(start,end) [fixup] * * Ensure that the I and D caches are coherent within specified region. * This is typically used when code has been written to a memory region, @@ -25,7 +25,7 @@ * - end - virtual end address of region * - fixup - optional label to branch to on user fault */ -.macro __flush_cache_range, fixup +.macro caches_clean_inval_pou_macro, fixup alternative_if ARM64_HAS_CACHE_IDC dsb ishst b .Ldc_skip_\@ @@ -43,7 +43,7 @@ alternative_else_nop_endif .endm /* - * __flush_icache_range(start,end) + * caches_clean_inval_pou(start,end) * * Ensure that the I and D caches are coherent within specified region. * This is typically used when code has been written to a memory region, @@ -52,13 +52,13 @@ alternative_else_nop_endif * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START(__flush_icache_range) - __flush_cache_range +SYM_FUNC_START(caches_clean_inval_pou) + caches_clean_inval_pou_macro ret -SYM_FUNC_END(__flush_icache_range) +SYM_FUNC_END(caches_clean_inval_pou) /* - * __flush_cache_user_range(start,end) + * caches_clean_inval_user_pou(start,end) * * Ensure that the I and D caches are coherent within specified region. * This is typically used when code has been written to a memory region, @@ -67,10 +67,10 @@ SYM_FUNC_END(__flush_icache_range) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START(__flush_cache_user_range) +SYM_FUNC_START(caches_clean_inval_user_pou) uaccess_ttbr0_enable x2, x3, x4 - __flush_cache_range 2f + caches_clean_inval_pou_macro 2f mov x0, xzr 1: uaccess_ttbr0_disable x1, x2 @@ -78,17 +78,17 @@ SYM_FUNC_START(__flush_cache_user_range) 2: mov x0, #-EFAULT b 1b -SYM_FUNC_END(__flush_cache_user_range) +SYM_FUNC_END(caches_clean_inval_user_pou) /* - * invalidate_icache_range(start,end) + * icache_inval_pou(start,end) * * Ensure that the I cache is invalid within specified region. * * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START(invalidate_icache_range) +SYM_FUNC_START(icache_inval_pou) alternative_if ARM64_HAS_CACHE_DIC isb ret @@ -96,10 +96,10 @@ alternative_else_nop_endif invalidate_icache_by_line x0, x1, x2, x3 ret -SYM_FUNC_END(invalidate_icache_range) +SYM_FUNC_END(icache_inval_pou) /* - * __flush_dcache_area(start, end) + * dcache_clean_inval_poc(start, end) * * Ensure that any D-cache lines for the interval [start, end) * are cleaned and invalidated to the PoC. @@ -107,13 +107,13 @@ SYM_FUNC_END(invalidate_icache_range) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(__flush_dcache_area) +SYM_FUNC_START_PI(dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__flush_dcache_area) +SYM_FUNC_END_PI(dcache_clean_inval_poc) /* - * __clean_dcache_area_pou(start, end) + * dcache_clean_pou(start, end) * * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoU. @@ -121,17 +121,17 @@ SYM_FUNC_END_PI(__flush_dcache_area) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START(__clean_dcache_area_pou) +SYM_FUNC_START(dcache_clean_pou) alternative_if ARM64_HAS_CACHE_IDC dsb ishst ret alternative_else_nop_endif dcache_by_line_op cvau, ish, x0, x1, x2, x3 ret -SYM_FUNC_END(__clean_dcache_area_pou) +SYM_FUNC_END(dcache_clean_pou) /* - * __inval_dcache_area(start, end) + * dcache_inval_poc(start, end) * * Ensure that any D-cache lines for the interval [start, end) * are invalidated. Any partial lines at the ends of the interval are @@ -141,7 +141,7 @@ SYM_FUNC_END(__clean_dcache_area_pou) * - end - kernel end address of region */ SYM_FUNC_START_LOCAL(__dma_inv_area) -SYM_FUNC_START_PI(__inval_dcache_area) +SYM_FUNC_START_PI(dcache_inval_poc) /* FALLTHROUGH */ /* @@ -166,11 +166,11 @@ SYM_FUNC_START_PI(__inval_dcache_area) b.lo 2b dsb sy ret -SYM_FUNC_END_PI(__inval_dcache_area) +SYM_FUNC_END_PI(dcache_inval_poc) SYM_FUNC_END(__dma_inv_area) /* - * __clean_dcache_area_poc(start, end) + * dcache_clean_poc(start, end) * * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoC. @@ -179,7 +179,7 @@ SYM_FUNC_END(__dma_inv_area) * - end - virtual end address of region */ SYM_FUNC_START_LOCAL(__dma_clean_area) -SYM_FUNC_START_PI(__clean_dcache_area_poc) +SYM_FUNC_START_PI(dcache_clean_poc) /* FALLTHROUGH */ /* @@ -189,11 +189,11 @@ SYM_FUNC_START_PI(__clean_dcache_area_poc) */ dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__clean_dcache_area_poc) +SYM_FUNC_END_PI(dcache_clean_poc) SYM_FUNC_END(__dma_clean_area) /* - * __clean_dcache_area_pop(start, end) + * dcache_clean_pop(start, end) * * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoP. @@ -201,13 +201,13 @@ SYM_FUNC_END(__dma_clean_area) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(__clean_dcache_area_pop) +SYM_FUNC_START_PI(dcache_clean_pop) alternative_if_not ARM64_HAS_DCPOP - b __clean_dcache_area_poc + b dcache_clean_poc alternative_else_nop_endif dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__clean_dcache_area_pop) +SYM_FUNC_END_PI(dcache_clean_pop) /* * __dma_flush_area(start, size) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index c4ca7e05fdb8..2aaf950b906c 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -17,14 +17,14 @@ void sync_icache_aliases(unsigned long start, unsigned long end) { if (icache_is_aliasing()) { - __clean_dcache_area_pou(start, end); - __flush_icache_all(); + dcache_clean_pou(start, end); + icache_inval_all_pou(); } else { /* * Don't issue kick_all_cpus_sync() after I-cache invalidation * for user mappings. */ - __flush_icache_range(start, end); + caches_clean_inval_pou(start, end); } } @@ -76,20 +76,20 @@ EXPORT_SYMBOL(flush_dcache_page); /* * Additional functions defined in assembly. */ -EXPORT_SYMBOL(__flush_icache_range); +EXPORT_SYMBOL(caches_clean_inval_pou); #ifdef CONFIG_ARCH_HAS_PMEM_API void arch_wb_cache_pmem(void *addr, size_t size) { /* Ensure order against any prior non-cacheable writes */ dmb(osh); - __clean_dcache_area_pop((unsigned long)addr, (unsigned long)addr + size); + dcache_clean_pop((unsigned long)addr, (unsigned long)addr + size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); void arch_invalidate_pmem(void *addr, size_t size) { - __inval_dcache_area((unsigned long)addr, (unsigned long)addr + size); + dcache_inval_poc((unsigned long)addr, (unsigned long)addr + size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); #endif From c0c8a8397fa8a74d04915f4d3d28cb4a5d401427 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 25 May 2021 14:50:06 +0200 Subject: [PATCH 606/730] s390/dasd: add missing discipline function Fix crash with illegal operation exception in dasd_device_tasklet. Commit b72949328869 ("s390/dasd: Prepare for additional path event handling") renamed the verify_path function for ECKD but not for FBA and DIAG. This leads to a panic when the path verification function is called for a FBA or DIAG device. Fix by defining a wrapper function for dasd_generic_verify_path(). Fixes: b72949328869 ("s390/dasd: Prepare for additional path event handling") Cc: #5.11 Reviewed-by: Jan Hoeppner Signed-off-by: Stefan Haberland Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20210525125006.157531-2-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_diag.c | 8 +++++++- drivers/s390/block/dasd_fba.c | 8 +++++++- drivers/s390/block/dasd_int.h | 1 - 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 1b9e1442e6a5..fd42a5fffaed 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -642,12 +642,18 @@ static void dasd_diag_setup_blk_queue(struct dasd_block *block) blk_queue_segment_boundary(q, PAGE_SIZE - 1); } +static int dasd_diag_pe_handler(struct dasd_device *device, + __u8 tbvpm, __u8 fcsecpm) +{ + return dasd_generic_verify_path(device, tbvpm); +} + static struct dasd_discipline dasd_diag_discipline = { .owner = THIS_MODULE, .name = "DIAG", .ebcname = "DIAG", .check_device = dasd_diag_check_device, - .verify_path = dasd_generic_verify_path, + .pe_handler = dasd_diag_pe_handler, .fill_geometry = dasd_diag_fill_geometry, .setup_blk_queue = dasd_diag_setup_blk_queue, .start_IO = dasd_start_diag, diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 4789410885e4..3ad319aee51e 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -794,13 +794,19 @@ static void dasd_fba_setup_blk_queue(struct dasd_block *block) blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } +static int dasd_fba_pe_handler(struct dasd_device *device, + __u8 tbvpm, __u8 fcsecpm) +{ + return dasd_generic_verify_path(device, tbvpm); +} + static struct dasd_discipline dasd_fba_discipline = { .owner = THIS_MODULE, .name = "FBA ", .ebcname = "FBA ", .check_device = dasd_fba_check_characteristics, .do_analysis = dasd_fba_do_analysis, - .verify_path = dasd_generic_verify_path, + .pe_handler = dasd_fba_pe_handler, .setup_blk_queue = dasd_fba_setup_blk_queue, .fill_geometry = dasd_fba_fill_geometry, .start_IO = dasd_start_IO, diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 1c59b0e86a9f..155428bfed8a 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -297,7 +297,6 @@ struct dasd_discipline { * e.g. verify that new path is compatible with the current * configuration. */ - int (*verify_path)(struct dasd_device *, __u8); int (*pe_handler)(struct dasd_device *, __u8, __u8); /* From 5c9d706f61336d9f7f285df64c734af778c70f39 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 25 May 2021 20:35:29 +0200 Subject: [PATCH 607/730] bpf: Fix BPF_LSM kconfig symbol dependency Similarly as 6bdacdb48e94 ("bpf: Fix BPF_JIT kconfig symbol dependency") we need to detangle the hard BPF_LSM dependency on NET. This was previously implicit by its dependency on BPF_JIT which itself was dependent on NET (but without any actual/real hard dependency code-wise). Given the latter was lifted, so should be the former as BPF_LSMs could well exist on net-less systems. This therefore also fixes a randconfig build error recently reported by Randy: ld: kernel/bpf/bpf_lsm.o: in function `bpf_lsm_func_proto': bpf_lsm.c:(.text+0x1a0): undefined reference to `bpf_sk_storage_get_proto' ld: bpf_lsm.c:(.text+0x1b8): undefined reference to `bpf_sk_storage_delete_proto' [...] Fixes: b24abcff918a ("bpf, kconfig: Add consolidated menu entry for bpf with core options") Reported-by: Randy Dunlap Signed-off-by: Daniel Borkmann Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- kernel/bpf/bpf_lsm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 5efb2b24012c..da471bf01b97 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -107,10 +107,12 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_inode_storage_get_proto; case BPF_FUNC_inode_storage_delete: return &bpf_inode_storage_delete_proto; +#ifdef CONFIG_NET case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_proto; +#endif /* CONFIG_NET */ case BPF_FUNC_spin_lock: return &bpf_spin_lock_proto; case BPF_FUNC_spin_unlock: From d4b250562fb89ba6f94156b8bea12b8829cfa9a6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 3 Nov 2019 21:22:04 +0000 Subject: [PATCH 608/730] i2c: qcom-geni: fix spelling mistake "unepxected" -> "unexpected" There is a spelling mistake in an error message string, fix it. Signed-off-by: Colin Ian King Reviewed-by: Akash Asthana Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-qcom-geni.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 214b4c913a13..07b710a774df 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -100,7 +100,7 @@ static const struct geni_i2c_err_log gi2c_log[] = { [GP_IRQ0] = {-EIO, "Unknown I2C err GP_IRQ0"}, [NACK] = {-ENXIO, "NACK: slv unresponsive, check its power/reset-ln"}, [GP_IRQ2] = {-EIO, "Unknown I2C err GP IRQ2"}, - [BUS_PROTO] = {-EPROTO, "Bus proto err, noisy/unepxected start/stop"}, + [BUS_PROTO] = {-EPROTO, "Bus proto err, noisy/unexpected start/stop"}, [ARB_LOST] = {-EAGAIN, "Bus arbitration lost, clock line undriveable"}, [GP_IRQ5] = {-EIO, "Unknown I2C err GP IRQ5"}, [GENI_OVERRUN] = {-EIO, "Cmd overrun, check GENI cmd-state machine"}, From 9dd45bbad947f7cc4f3d4eff7fc02a7e3804e47b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 3 May 2021 09:02:20 +0200 Subject: [PATCH 609/730] i2c: icy: Remove unused variable new_fwnode in icy_probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last user of new_fwnode was removed, leading to: drivers/i2c/busses/i2c-icy.c: In function ‘icy_probe’: drivers/i2c/busses/i2c-icy.c:126:24: warning: unused variable ‘new_fwnode’ [-Wunused-variable] 126 | struct fwnode_handle *new_fwnode; | ^~~~~~~~~~ Fixes: dd7a37102b79ae55 ("i2c: icy: Constify the software node") Signed-off-by: Geert Uytterhoeven Reviewed-by: Max Staudt Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-icy.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-icy.c b/drivers/i2c/busses/i2c-icy.c index c8c422e9dda4..5dae7cab7260 100644 --- a/drivers/i2c/busses/i2c-icy.c +++ b/drivers/i2c/busses/i2c-icy.c @@ -123,7 +123,6 @@ static int icy_probe(struct zorro_dev *z, { struct icy_i2c *i2c; struct i2c_algo_pcf_data *algo_data; - struct fwnode_handle *new_fwnode; struct i2c_board_info ltc2990_info = { .type = "ltc2990", .swnode = &icy_ltc2990_node, From 52b806e8d6b3c06d5f8415f82d7353695acb2f00 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 4 May 2021 11:06:32 +0200 Subject: [PATCH 610/730] i2c: I2C_HISI should depend on ACPI The HiSilicon Kunpeng I2C controller driver relies on ACPI to probe for its presence. Hence add a dependency on ACPI, to prevent asking the user about this driver when configuring a kernel without ACPI firmware support. Fixes: d62fbdb99a85730a ("i2c: add support for HiSilicon I2C controller") Signed-off-by: Geert Uytterhoeven Acked-by: Yicong Yang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 281a65d9b44b..10acece9d7b9 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -647,7 +647,7 @@ config I2C_HIGHLANDER config I2C_HISI tristate "HiSilicon I2C controller" - depends on ARM64 || COMPILE_TEST + depends on (ARM64 && ACPI) || COMPILE_TEST help Say Y here if you want to have Hisilicon I2C controller support available on the Kunpeng Server. From c4740e293c93c747e65d53d9aacc2ba8521d1489 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 May 2021 13:15:40 +0200 Subject: [PATCH 611/730] i2c: sh_mobile: Use new clock calculation formulas for RZ/G2E When switching the Gen3 SoCs to the new clock calculation formulas, the match entry for RZ/G2E added in commit 51243b73455f2d12 ("i2c: sh_mobile: Add support for r8a774c0 (RZ/G2E)") was forgotten. Fixes: e8a27567509b2439 ("i2c: sh_mobile: use new clock calculation formulas for Gen3") Signed-off-by: Geert Uytterhoeven Reviewed-by: Fabrizio Castro Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sh_mobile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index 3ae6ca21a02c..2d2e630fd438 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -807,7 +807,7 @@ static const struct sh_mobile_dt_config r8a7740_dt_config = { static const struct of_device_id sh_mobile_i2c_dt_ids[] = { { .compatible = "renesas,iic-r8a73a4", .data = &fast_clock_dt_config }, { .compatible = "renesas,iic-r8a7740", .data = &r8a7740_dt_config }, - { .compatible = "renesas,iic-r8a774c0", .data = &fast_clock_dt_config }, + { .compatible = "renesas,iic-r8a774c0", .data = &v2_freq_calc_dt_config }, { .compatible = "renesas,iic-r8a7790", .data = &v2_freq_calc_dt_config }, { .compatible = "renesas,iic-r8a7791", .data = &v2_freq_calc_dt_config }, { .compatible = "renesas,iic-r8a7792", .data = &v2_freq_calc_dt_config }, From 3d0220f6861d713213b015b582e9f21e5b28d2e0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 May 2021 10:17:36 +0000 Subject: [PATCH 612/730] bpf: Wrap aux data inside bpf_sanitize_info container Add a container structure struct bpf_sanitize_info which holds the current aux info, and update call-sites to sanitize_ptr_alu() to pass it in. This is needed for passing in additional state later on. Signed-off-by: Daniel Borkmann Reviewed-by: Piotr Krysiuk Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 09849e43f035..98690f5367f9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6486,15 +6486,19 @@ static bool sanitize_needed(u8 opcode) return opcode == BPF_ADD || opcode == BPF_SUB; } +struct bpf_sanitize_info { + struct bpf_insn_aux_data aux; +}; + static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg, struct bpf_reg_state *dst_reg, - struct bpf_insn_aux_data *tmp_aux, + struct bpf_sanitize_info *info, const bool commit_window) { - struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux; + struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux; struct bpf_verifier_state *vstate = env->cur_state; bool off_is_imm = tnum_is_const(off_reg->var_off); bool off_is_neg = off_reg->smin_value < 0; @@ -6523,8 +6527,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, /* In commit phase we narrow the masking window based on * the observed pointer move after the simulated operation. */ - alu_state = tmp_aux->alu_state; - alu_limit = abs(tmp_aux->alu_limit - alu_limit); + alu_state = info->aux.alu_state; + alu_limit = abs(info->aux.alu_limit - alu_limit); } else { alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0; @@ -6685,7 +6689,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; - struct bpf_insn_aux_data tmp_aux = {}; + struct bpf_sanitize_info info = {}; u8 opcode = BPF_OP(insn->code); u32 dst = insn->dst_reg; int ret; @@ -6754,7 +6758,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (sanitize_needed(opcode)) { ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, - &tmp_aux, false); + &info, false); if (ret < 0) return sanitize_err(env, insn, ret, off_reg, dst_reg); } @@ -6895,7 +6899,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, return -EACCES; if (sanitize_needed(opcode)) { ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, - &tmp_aux, true); + &info, true); if (ret < 0) return sanitize_err(env, insn, ret, off_reg, dst_reg); } From bb01a1bba579b4b1c5566af24d95f1767859771e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 May 2021 10:19:22 +0000 Subject: [PATCH 613/730] bpf: Fix mask direction swap upon off reg sign change Masking direction as indicated via mask_to_left is considered to be calculated once and then used to derive pointer limits. Thus, this needs to be placed into bpf_sanitize_info instead so we can pass it to sanitize_ptr_alu() call after the pointer move. Piotr noticed a corner case where the off reg causes masking direction change which then results in an incorrect final aux->alu_limit. Fixes: 7fedb63a8307 ("bpf: Tighten speculative pointer arithmetic mask") Reported-by: Piotr Krysiuk Signed-off-by: Daniel Borkmann Reviewed-by: Piotr Krysiuk Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 98690f5367f9..8574cb60915a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6409,18 +6409,10 @@ enum { }; static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, - const struct bpf_reg_state *off_reg, - u32 *alu_limit, u8 opcode) + u32 *alu_limit, bool mask_to_left) { - bool off_is_neg = off_reg->smin_value < 0; - bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || - (opcode == BPF_SUB && !off_is_neg); u32 max = 0, ptr_limit = 0; - if (!tnum_is_const(off_reg->var_off) && - (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) - return REASON_BOUNDS; - switch (ptr_reg->type) { case PTR_TO_STACK: /* Offset 0 is out-of-bounds, but acceptable start for the @@ -6488,6 +6480,7 @@ static bool sanitize_needed(u8 opcode) struct bpf_sanitize_info { struct bpf_insn_aux_data aux; + bool mask_to_left; }; static int sanitize_ptr_alu(struct bpf_verifier_env *env, @@ -6519,7 +6512,16 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, if (vstate->speculative) goto do_sim; - err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode); + if (!commit_window) { + if (!tnum_is_const(off_reg->var_off) && + (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) + return REASON_BOUNDS; + + info->mask_to_left = (opcode == BPF_ADD && off_is_neg) || + (opcode == BPF_SUB && !off_is_neg); + } + + err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left); if (err < 0) return err; From a7036191277f9fa68d92f2071ddc38c09b1e5ee5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 4 May 2021 08:58:25 +0000 Subject: [PATCH 614/730] bpf: No need to simulate speculative domain for immediates In 801c6058d14a ("bpf: Fix leakage of uninitialized bpf stack under speculation") we replaced masking logic with direct loads of immediates if the register is a known constant. Given in this case we do not apply any masking, there is also no reason for the operation to be truncated under the speculative domain. Therefore, there is also zero reason for the verifier to branch-off and simulate this case, it only needs to do it for unknown but bounded scalars. As a side-effect, this also enables few test cases that were previously rejected due to simulation under zero truncation. Signed-off-by: Daniel Borkmann Reviewed-by: Piotr Krysiuk Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8574cb60915a..94ba5163d4c5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6545,8 +6545,12 @@ do_sim: /* If we're in commit phase, we're done here given we already * pushed the truncated dst_reg into the speculative verification * stack. + * + * Also, when register is a known constant, we rewrite register-based + * operation to immediate-based, and thus do not need masking (and as + * a consequence, do not need to simulate the zero-truncation either). */ - if (commit_window) + if (commit_window || off_is_imm) return 0; /* Simulate and find potential out-of-bounds access under From 1bad6fd52be4ce12d207e2820ceb0f29ab31fc53 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 4 May 2021 08:58:25 +0000 Subject: [PATCH 615/730] bpf, selftests: Adjust few selftest result_unpriv outcomes Given we don't need to simulate the speculative domain for registers with immediates anymore since the verifier uses direct imm-based rewrites instead of having to mask, we can also lift a few cases that were previously rejected. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/stack_ptr.c | 2 -- tools/testing/selftests/bpf/verifier/value_ptr_arith.c | 8 -------- 2 files changed, 10 deletions(-) diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c index 07eaa04412ae..8ab94d65f3d5 100644 --- a/tools/testing/selftests/bpf/verifier/stack_ptr.c +++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c @@ -295,8 +295,6 @@ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), BPF_EXIT_INSN(), }, - .result_unpriv = REJECT, - .errstr_unpriv = "invalid write to stack R1 off=0 size=1", .result = ACCEPT, .retval = 42, }, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index e5913fd3b903..7ae2859d495c 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -300,8 +300,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -371,8 +369,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -472,8 +468,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -766,8 +760,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { From 0c1f3193b1cdd21e7182f97dc9bca7d284d18a15 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Wed, 12 May 2021 12:14:21 +0100 Subject: [PATCH 616/730] dm verity: fix require_signatures module_param permissions The third parameter of module_param() is permissions for the sysfs node but it looks like it is being used as the initial value of the parameter here. In fact, false here equates to omitting the file from sysfs and does not affect the value of require_signatures. Making the parameter writable is not simple because going from false->true is fine but it should not be possible to remove the requirement to verify a signature. But it can be useful to inspect the value of this parameter from userspace, so change the permissions to make a read-only file in sysfs. Signed-off-by: John Keeping Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-verify-sig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-verify-sig.c b/drivers/md/dm-verity-verify-sig.c index 29385dc470d5..db61a1f43ae9 100644 --- a/drivers/md/dm-verity-verify-sig.c +++ b/drivers/md/dm-verity-verify-sig.c @@ -15,7 +15,7 @@ #define DM_VERITY_VERIFY_ERR(s) DM_VERITY_ROOT_HASH_VERIFICATION " " s static bool require_signatures; -module_param(require_signatures, bool, false); +module_param(require_signatures, bool, 0444); MODULE_PARM_DESC(require_signatures, "Verify the roothash of dm-verity hash tree"); From f16dba5dc6f094041ab8c356e1e3a48ee0e3c8cd Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 25 May 2021 13:16:21 -0400 Subject: [PATCH 617/730] dm snapshot: revert "fix a crash when an origin has no snapshots" Commit 7ee06ddc4038f936b0d4459d37a7d4d844fb03db ("dm snapshot: fix a crash when an origin has no snapshots") introduced a regression in snapshot merging - causing the lvm2 test lvcreate-cache-snapshot.sh got stuck in an infinite loop. Even though commit 7ee06ddc4038f936b0d4459d37a7d4d844fb03db was marked for stable@ the stable team was notified to _not_ backport it. Fixes: 7ee06ddc4038 ("dm snapshot: fix a crash when an origin has no snapshots") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index b8e4d31124ea..75e59294ef77 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -855,11 +855,12 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) static uint32_t __minimum_chunk_size(struct origin *o) { struct dm_snapshot *snap; - unsigned chunk_size = rounddown_pow_of_two(UINT_MAX); + unsigned chunk_size = 0; if (o) list_for_each_entry(snap, &o->snapshots, list) - chunk_size = min(chunk_size, snap->store->chunk_size); + chunk_size = min_not_zero(chunk_size, + snap->store->chunk_size); return (uint32_t) chunk_size; } From 7e768532b2396bcb7fbf6f82384b85c0f1d2f197 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 25 May 2021 13:17:19 -0400 Subject: [PATCH 618/730] dm snapshot: properly fix a crash when an origin has no snapshots If an origin target has no snapshots, o->split_boundary is set to 0. This causes BUG_ON(sectors <= 0) in block/bio.c:bio_split(). Fix this by initializing chunk_size, and in turn split_boundary, to rounddown_pow_of_two(UINT_MAX) -- the largest power of two that fits into "unsigned" type. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 75e59294ef77..751ec5ea1dbb 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -855,7 +855,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) static uint32_t __minimum_chunk_size(struct origin *o) { struct dm_snapshot *snap; - unsigned chunk_size = 0; + unsigned chunk_size = rounddown_pow_of_two(UINT_MAX); if (o) list_for_each_entry(snap, &o->snapshots, list) From bfb819ea20ce8bbeeba17e1a6418bf8bda91fc28 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 25 May 2021 12:37:35 -0700 Subject: [PATCH 619/730] proc: Check /proc/$pid/attr/ writes against file opener Fix another "confused deputy" weakness[1]. Writes to /proc/$pid/attr/ files need to check the opener credentials, since these fds do not transition state across execve(). Without this, it is possible to trick another process (which may have different credentials) to write to its own /proc/$pid/attr/ files, leading to unexpected and possibly exploitable behaviors. [1] https://www.kernel.org/doc/html/latest/security/credentials.html?highlight=confused#open-file-credentials Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Linus Torvalds --- fs/proc/base.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index 3851bfcdba56..58bbf334265b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2703,6 +2703,10 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, void *page; int rv; + /* A task may only write when it was the opener. */ + if (file->f_cred != current_real_cred()) + return -EPERM; + rcu_read_lock(); task = pid_task(proc_pid(inode), PIDTYPE_PID); if (!task) { From 297739bd73f6e49d80bac4bfd27f3598b798c0d4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 24 May 2021 22:49:24 -0400 Subject: [PATCH 620/730] sctp: add the missing setting for asoc encap_port This patch is to add the missing setting back for asoc encap_port. Fixes: 8dba29603b5c ("sctp: add SCTP_REMOTE_UDP_ENCAPS_PORT sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 40f9f6c4a0a1..a79d193ff872 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4473,6 +4473,7 @@ static int sctp_setsockopt_encap_port(struct sock *sk, transports) t->encap_port = encap_port; + asoc->encap_port = encap_port; return 0; } From b2540cdce6e22ecf3de54daf5129cc37951348cc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 24 May 2021 22:49:42 -0400 Subject: [PATCH 621/730] sctp: fix the proc_handler for sysctl encap_port proc_dointvec() cannot do min and max check for setting a value when extra1/extra2 is set, so change it to proc_dointvec_minmax() for sysctl encap_port. Fixes: e8a3001c2120 ("sctp: add encap_port for netns sock asoc and transport") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index e92df779af73..55871b277f47 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -307,7 +307,7 @@ static struct ctl_table sctp_net_table[] = { .data = &init_net.sctp.encap_port, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &udp_port_max, }, From c1cf1afd8b0f2f1b077df84e90497c07094406fc Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 25 May 2021 18:52:47 +0800 Subject: [PATCH 622/730] net: hns: Fix kernel-doc Fix function name in hns_ethtool.c kernel-doc comment to remove these warnings found by clang_w1. drivers/net/ethernet/hisilicon/hns/hns_ethtool.c:202: warning: expecting prototype for hns_nic_set_link_settings(). Prototype was for hns_nic_set_link_ksettings() instead. drivers/net/ethernet/hisilicon/hns/hns_ethtool.c:837: warning: expecting prototype for get_ethtool_stats(). Prototype was for hns_get_ethtool_stats() instead. drivers/net/ethernet/hisilicon/hns/hns_ethtool.c:894: warning: expecting prototype for get_strings(). Prototype was for hns_get_strings() instead. Reported-by: Abaci Robot Fixes: 'commit 262b38cdb3e4 ("net: ethernet: hisilicon: hns: use phydev from struct net_device")' Signed-off-by: Yang Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index da48c05435ea..7e62dcff2426 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -192,7 +192,7 @@ static int hns_nic_get_link_ksettings(struct net_device *net_dev, } /** - *hns_nic_set_link_settings - implement ethtool set link ksettings + *hns_nic_set_link_ksettings - implement ethtool set link ksettings *@net_dev: net_device *@cmd: ethtool_link_ksettings *retuen 0 - success , negative --fail @@ -827,7 +827,7 @@ hns_get_channels(struct net_device *net_dev, struct ethtool_channels *ch) } /** - * get_ethtool_stats - get detail statistics. + * hns_get_ethtool_stats - get detail statistics. * @netdev: net device * @stats: statistics info. * @data: statistics data. @@ -885,7 +885,7 @@ static void hns_get_ethtool_stats(struct net_device *netdev, } /** - * get_strings: Return a set of strings that describe the requested objects + * hns_get_strings: Return a set of strings that describe the requested objects * @netdev: net device * @stringset: string set ID. * @data: objects data. From 9453d45ecb6c2199d72e73c993e9d98677a2801b Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 25 May 2021 16:21:52 +0300 Subject: [PATCH 623/730] net: zero-initialize tc skb extension on allocation Function skb_ext_add() doesn't initialize created skb extension with any value and leaves it up to the user. However, since extension of type TC_SKB_EXT originally contained only single value tc_skb_ext->chain its users used to just assign the chain value without setting whole extension memory to zero first. This assumption changed when TC_SKB_EXT extension was extended with additional fields but not all users were updated to initialize the new fields which leads to use of uninitialized memory afterwards. UBSAN log: [ 778.299821] UBSAN: invalid-load in net/openvswitch/flow.c:899:28 [ 778.301495] load of value 107 is not a valid value for type '_Bool' [ 778.303215] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc7+ #2 [ 778.304933] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 778.307901] Call Trace: [ 778.308680] [ 778.309358] dump_stack+0xbb/0x107 [ 778.310307] ubsan_epilogue+0x5/0x40 [ 778.311167] __ubsan_handle_load_invalid_value.cold+0x43/0x48 [ 778.312454] ? memset+0x20/0x40 [ 778.313230] ovs_flow_key_extract.cold+0xf/0x14 [openvswitch] [ 778.314532] ovs_vport_receive+0x19e/0x2e0 [openvswitch] [ 778.315749] ? ovs_vport_find_upcall_portid+0x330/0x330 [openvswitch] [ 778.317188] ? create_prof_cpu_mask+0x20/0x20 [ 778.318220] ? arch_stack_walk+0x82/0xf0 [ 778.319153] ? secondary_startup_64_no_verify+0xb0/0xbb [ 778.320399] ? stack_trace_save+0x91/0xc0 [ 778.321362] ? stack_trace_consume_entry+0x160/0x160 [ 778.322517] ? lock_release+0x52e/0x760 [ 778.323444] netdev_frame_hook+0x323/0x610 [openvswitch] [ 778.324668] ? ovs_netdev_get_vport+0xe0/0xe0 [openvswitch] [ 778.325950] __netif_receive_skb_core+0x771/0x2db0 [ 778.327067] ? lock_downgrade+0x6e0/0x6f0 [ 778.328021] ? lock_acquire+0x565/0x720 [ 778.328940] ? generic_xdp_tx+0x4f0/0x4f0 [ 778.329902] ? inet_gro_receive+0x2a7/0x10a0 [ 778.330914] ? lock_downgrade+0x6f0/0x6f0 [ 778.331867] ? udp4_gro_receive+0x4c4/0x13e0 [ 778.332876] ? lock_release+0x52e/0x760 [ 778.333808] ? dev_gro_receive+0xcc8/0x2380 [ 778.334810] ? lock_downgrade+0x6f0/0x6f0 [ 778.335769] __netif_receive_skb_list_core+0x295/0x820 [ 778.336955] ? process_backlog+0x780/0x780 [ 778.337941] ? mlx5e_rep_tc_netdevice_event_unregister+0x20/0x20 [mlx5_core] [ 778.339613] ? seqcount_lockdep_reader_access.constprop.0+0xa7/0xc0 [ 778.341033] ? kvm_clock_get_cycles+0x14/0x20 [ 778.342072] netif_receive_skb_list_internal+0x5f5/0xcb0 [ 778.343288] ? __kasan_kmalloc+0x7a/0x90 [ 778.344234] ? mlx5e_handle_rx_cqe_mpwrq+0x9e0/0x9e0 [mlx5_core] [ 778.345676] ? mlx5e_xmit_xdp_frame_mpwqe+0x14d0/0x14d0 [mlx5_core] [ 778.347140] ? __netif_receive_skb_list_core+0x820/0x820 [ 778.348351] ? mlx5e_post_rx_mpwqes+0xa6/0x25d0 [mlx5_core] [ 778.349688] ? napi_gro_flush+0x26c/0x3c0 [ 778.350641] napi_complete_done+0x188/0x6b0 [ 778.351627] mlx5e_napi_poll+0x373/0x1b80 [mlx5_core] [ 778.352853] __napi_poll+0x9f/0x510 [ 778.353704] ? mlx5_flow_namespace_set_mode+0x260/0x260 [mlx5_core] [ 778.355158] net_rx_action+0x34c/0xa40 [ 778.356060] ? napi_threaded_poll+0x3d0/0x3d0 [ 778.357083] ? sched_clock_cpu+0x18/0x190 [ 778.358041] ? __common_interrupt+0x8e/0x1a0 [ 778.359045] __do_softirq+0x1ce/0x984 [ 778.359938] __irq_exit_rcu+0x137/0x1d0 [ 778.360865] irq_exit_rcu+0xa/0x20 [ 778.361708] common_interrupt+0x80/0xa0 [ 778.362640] [ 778.363212] asm_common_interrupt+0x1e/0x40 [ 778.364204] RIP: 0010:native_safe_halt+0xe/0x10 [ 778.365273] Code: 4f ff ff ff 4c 89 e7 e8 50 3f 40 fe e9 dc fe ff ff 48 89 df e8 43 3f 40 fe eb 90 cc e9 07 00 00 00 0f 00 2d 74 05 62 00 fb f4 90 e9 07 00 00 00 0f 00 2d 64 05 62 00 f4 c3 cc cc 0f 1f 44 00 [ 778.369355] RSP: 0018:ffffffff84407e48 EFLAGS: 00000246 [ 778.370570] RAX: ffff88842de46a80 RBX: ffffffff84425840 RCX: ffffffff83418468 [ 778.372143] RDX: 000000000026f1da RSI: 0000000000000004 RDI: ffffffff8343af5e [ 778.373722] RBP: fffffbfff0884b08 R08: 0000000000000000 R09: ffff88842de46bcb [ 778.375292] R10: ffffed1085bc8d79 R11: 0000000000000001 R12: 0000000000000000 [ 778.376860] R13: ffffffff851124a0 R14: 0000000000000000 R15: dffffc0000000000 [ 778.378491] ? rcu_eqs_enter.constprop.0+0xb8/0xe0 [ 778.379606] ? default_idle_call+0x5e/0xe0 [ 778.380578] default_idle+0xa/0x10 [ 778.381406] default_idle_call+0x96/0xe0 [ 778.382350] do_idle+0x3d4/0x550 [ 778.383153] ? arch_cpu_idle_exit+0x40/0x40 [ 778.384143] cpu_startup_entry+0x19/0x20 [ 778.385078] start_kernel+0x3c7/0x3e5 [ 778.385978] secondary_startup_64_no_verify+0xb0/0xbb Fix the issue by providing new function tc_skb_ext_alloc() that allocates tc skb extension and initializes its memory to 0 before returning it to the caller. Change all existing users to use new API instead of calling skb_ext_add() directly. Fixes: 038ebb1a713d ("net/sched: act_ct: fix miss set mru for ovs after defrag in act_ct") Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do conntrack in act_ct") Signed-off-by: Vlad Buslov Acked-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- include/net/pkt_cls.h | 11 +++++++++++ net/sched/cls_api.c | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 6cdc52d50a48..311382261840 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -626,7 +626,7 @@ static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1, struct mlx5_eswitch *esw; u32 zone_restore_id; - tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + tc_skb_ext = tc_skb_ext_alloc(skb); if (!tc_skb_ext) { WARN_ON(1); return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index bccdb43a880b..2c776e7a7692 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5090,7 +5090,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { chain = mapped_obj.chain; - tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + tc_skb_ext = tc_skb_ext_alloc(skb); if (WARN_ON(!tc_skb_ext)) return false; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 255e4f4b521f..ec7823921bd2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -709,6 +709,17 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common, cls_common->extack = extack; } +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb) +{ + struct tc_skb_ext *tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + + if (tc_skb_ext) + memset(tc_skb_ext, 0, sizeof(*tc_skb_ext)); + return tc_skb_ext; +} +#endif + enum tc_matchall_command { TC_CLSMATCHALL_REPLACE, TC_CLSMATCHALL_DESTROY, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 40fbea626dfd..279f9e2a2319 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1624,7 +1624,7 @@ int tcf_classify_ingress(struct sk_buff *skb, /* If we missed on some chain */ if (ret == TC_ACT_UNSPEC && last_executed_chain) { - ext = skb_ext_add(skb, TC_SKB_EXT); + ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) return TC_ACT_SHOT; ext->chain = last_executed_chain; From 65161c35554f7135e6656b3df1ce2c500ca0bdcf Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 25 May 2021 19:00:12 +0800 Subject: [PATCH 624/730] bnx2x: Fix missing error code in bnx2x_iov_init_one() Eliminate the follow smatch warning: drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c:1227 bnx2x_iov_init_one() warn: missing error code 'err'. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index d21f085044cd..27943b0446c2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -1223,8 +1223,10 @@ int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param, goto failed; /* SR-IOV capability was enabled but there are no VFs*/ - if (iov->total == 0) + if (iov->total == 0) { + err = -EINVAL; goto failed; + } iov->nr_virtfn = min_t(u16, iov->total, num_vfs_param); From 17f9c1b63cdd4439523cfcdf5683e5070b911f24 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Tue, 25 May 2021 19:04:41 +0300 Subject: [PATCH 625/730] net: mvpp2: add buffer header handling in RX If Link Partner sends frames larger than RX buffer size, MAC mark it as oversize but still would pass it to the Packet Processor. In this scenario, Packet Processor scatter frame between multiple buffers, but only a single buffer would be returned to the Buffer Manager pool and it would not refill the poll. Patch add handling of oversize error with buffer header handling, so all buffers would be returned to the Buffer Manager pool. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Reported-by: Russell King Signed-off-by: Stefan Chulski Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 22 ++++++++ .../net/ethernet/marvell/mvpp2/mvpp2_main.c | 54 +++++++++++++++---- 2 files changed, 67 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index 8edba5ea90f0..4a61c90003b5 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -993,6 +993,14 @@ enum mvpp22_ptp_packet_format { #define MVPP2_DESC_DMA_MASK DMA_BIT_MASK(40) +/* Buffer header info bits */ +#define MVPP2_B_HDR_INFO_MC_ID_MASK 0xfff +#define MVPP2_B_HDR_INFO_MC_ID(info) ((info) & MVPP2_B_HDR_INFO_MC_ID_MASK) +#define MVPP2_B_HDR_INFO_LAST_OFFS 12 +#define MVPP2_B_HDR_INFO_LAST_MASK BIT(12) +#define MVPP2_B_HDR_INFO_IS_LAST(info) \ + (((info) & MVPP2_B_HDR_INFO_LAST_MASK) >> MVPP2_B_HDR_INFO_LAST_OFFS) + struct mvpp2_tai; /* Definitions */ @@ -1002,6 +1010,20 @@ struct mvpp2_rss_table { u32 indir[MVPP22_RSS_TABLE_ENTRIES]; }; +struct mvpp2_buff_hdr { + __le32 next_phys_addr; + __le32 next_dma_addr; + __le16 byte_count; + __le16 info; + __le16 reserved1; /* bm_qset (for future use, BM) */ + u8 next_phys_addr_high; + u8 next_dma_addr_high; + __le16 reserved2; + __le16 reserved3; + __le16 reserved4; + __le16 reserved5; +}; + /* Shared Packet Processor resources */ struct mvpp2 { /* Shared registers' base addresses */ diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index ec706d614cac..d39c7639cdba 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3839,6 +3839,35 @@ mvpp2_run_xdp(struct mvpp2_port *port, struct mvpp2_rx_queue *rxq, return ret; } +static void mvpp2_buff_hdr_pool_put(struct mvpp2_port *port, struct mvpp2_rx_desc *rx_desc, + int pool, u32 rx_status) +{ + phys_addr_t phys_addr, phys_addr_next; + dma_addr_t dma_addr, dma_addr_next; + struct mvpp2_buff_hdr *buff_hdr; + + phys_addr = mvpp2_rxdesc_dma_addr_get(port, rx_desc); + dma_addr = mvpp2_rxdesc_cookie_get(port, rx_desc); + + do { + buff_hdr = (struct mvpp2_buff_hdr *)phys_to_virt(phys_addr); + + phys_addr_next = le32_to_cpu(buff_hdr->next_phys_addr); + dma_addr_next = le32_to_cpu(buff_hdr->next_dma_addr); + + if (port->priv->hw_version >= MVPP22) { + phys_addr_next |= ((u64)buff_hdr->next_phys_addr_high << 32); + dma_addr_next |= ((u64)buff_hdr->next_dma_addr_high << 32); + } + + mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr); + + phys_addr = phys_addr_next; + dma_addr = dma_addr_next; + + } while (!MVPP2_B_HDR_INFO_IS_LAST(le16_to_cpu(buff_hdr->info))); +} + /* Main rx processing */ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, int rx_todo, struct mvpp2_rx_queue *rxq) @@ -3885,14 +3914,6 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, MVPP2_RXD_BM_POOL_ID_OFFS; bm_pool = &port->priv->bm_pools[pool]; - /* In case of an error, release the requested buffer pointer - * to the Buffer Manager. This request process is controlled - * by the hardware, and the information about the buffer is - * comprised by the RX descriptor. - */ - if (rx_status & MVPP2_RXD_ERR_SUMMARY) - goto err_drop_frame; - if (port->priv->percpu_pools) { pp = port->priv->page_pool[pool]; dma_dir = page_pool_get_dma_dir(pp); @@ -3904,6 +3925,18 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, rx_bytes + MVPP2_MH_SIZE, dma_dir); + /* Buffer header not supported */ + if (rx_status & MVPP2_RXD_BUF_HDR) + goto err_drop_frame; + + /* In case of an error, release the requested buffer pointer + * to the Buffer Manager. This request process is controlled + * by the hardware, and the information about the buffer is + * comprised by the RX descriptor. + */ + if (rx_status & MVPP2_RXD_ERR_SUMMARY) + goto err_drop_frame; + /* Prefetch header */ prefetch(data); @@ -3985,7 +4018,10 @@ err_drop_frame: dev->stats.rx_errors++; mvpp2_rx_error(port, rx_desc); /* Return the buffer to the pool */ - mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr); + if (rx_status & MVPP2_RXD_BUF_HDR) + mvpp2_buff_hdr_pool_put(port, rx_desc, pool, rx_status); + else + mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr); } rcu_read_unlock(); From bab09fe2f65200a67209a360988bc24f3de4b95d Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 25 May 2021 17:47:04 +0200 Subject: [PATCH 626/730] nfp: update maintainer and mailing list addresses Some of Netronome's activities and people have moved over to Corigine, including NFP driver maintenance and myself. Signed-off-by: Simon Horman Signed-off-by: Louis Peens Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2cc1cb72bc92..d34c0036bdcd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12687,9 +12687,9 @@ F: drivers/rtc/rtc-ntxec.c F: include/linux/mfd/ntxec.h NETRONOME ETHERNET DRIVERS -M: Simon Horman +M: Simon Horman R: Jakub Kicinski -L: oss-drivers@netronome.com +L: oss-drivers@corigine.com S: Maintained F: drivers/net/ethernet/netronome/ From 20b5759f21cf53a0e03031bd3fe539e332b13568 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 25 May 2021 14:23:10 -0700 Subject: [PATCH 627/730] mptcp: avoid OOB access in setsockopt() We can't use tcp_set_congestion_control() on an mptcp socket, as such function can end-up accessing a tcp-specific field - prior_ssthresh - causing an OOB access. To allow propagating the correct ca algo on subflow, cache the ca name at initialization time. Additionally avoid overriding the user-selected CA (if any) at clone time. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/182 Fixes: aa1fbd94e5c7 ("mptcp: sockopt: add TCP_CONGESTION and TCP_INFO") Acked-by: Florian Westphal Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 14 +++++++++++--- net/mptcp/protocol.h | 1 + net/mptcp/sockopt.c | 4 ++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2d21a4793d9d..2bc199549a88 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2424,13 +2424,12 @@ static int __mptcp_init_sock(struct sock *sk) timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); - tcp_assign_congestion_control(sk); - return 0; } static int mptcp_init_sock(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); int ret; @@ -2448,6 +2447,16 @@ static int mptcp_init_sock(struct sock *sk) if (ret) return ret; + /* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will + * propagate the correct value + */ + tcp_assign_congestion_control(sk); + strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name); + + /* no need to keep a reference to the ops, the name will suffice */ + tcp_cleanup_congestion_control(sk); + icsk->icsk_ca_ops = NULL; + sk_sockets_allocated_inc(sk); sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; @@ -2622,7 +2631,6 @@ static void __mptcp_destroy_sock(struct sock *sk) sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); - tcp_cleanup_congestion_control(sk); sk_refcnt_debug_release(sk); mptcp_dispose_initial_subflow(msk); sock_put(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index edc0128730df..165c8b40b384 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -258,6 +258,7 @@ struct mptcp_sock { } rcvq_space; u32 setsockopt_seq; + char ca_name[TCP_CA_NAME_MAX]; }; #define mptcp_lock_sock(___sk, cb) do { \ diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 00d941b66c1e..a79798189599 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -547,7 +547,7 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t } if (ret == 0) - tcp_set_congestion_control(sk, name, false, cap_net_admin); + strcpy(msk->ca_name, name); release_sock(sk); return ret; @@ -705,7 +705,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) - tcp_set_congestion_control(ssk, inet_csk(sk)->icsk_ca_ops->name, false, true); + tcp_set_congestion_control(ssk, msk->ca_name, false, true); } static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) From 3812ce895047afdb78dc750a236515416e0ccded Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 25 May 2021 14:23:11 -0700 Subject: [PATCH 628/730] mptcp: drop unconditional pr_warn on bad opt This is a left-over of early day. A malicious peer can flood the kernel logs with useless messages, just drop it. Fixes: f296234c98a8 ("mptcp: Add handling of incoming MP_JOIN requests") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 99fc21406168..71c535f4e1ef 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -130,7 +130,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN); pr_debug("MP_JOIN hmac"); } else { - pr_warn("MP_JOIN bad option size"); mp_opt->mp_join = 0; } break; From 3ed0a585bfadb6bd7080f11184adbc9edcce7dbc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 25 May 2021 14:23:12 -0700 Subject: [PATCH 629/730] mptcp: avoid error message on infinite mapping Another left-over. Avoid flooding dmesg with useless text, we already have a MIB for that event. Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a5ede357cfbc..bde6be77ea73 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -867,7 +867,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk, data_len = mpext->data_len; if (data_len == 0) { - pr_err("Infinite mapping not handled"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); return MAPPING_INVALID; } From d58300c3185b78ab910092488126b97f0abe3ae2 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 25 May 2021 14:23:13 -0700 Subject: [PATCH 630/730] mptcp: validate 'id' when stopping the ADD_ADDR retransmit timer when Linux receives an echo-ed ADD_ADDR, it checks the IP address against the list of "announced" addresses. In case of a positive match, the timer that handles retransmissions is stopped regardless of the 'Address Id' in the received packet: this behaviour does not comply with RFC8684 3.4.1. Fix it by validating the 'Address Id' in received echo-ed ADD_ADDRs. Tested using packetdrill, with the following captured output: unpatched kernel: Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0xfd2e62517888fe29,mptcp dss ack 3007449509], length 0 In <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 1 1.2.3.4,mptcp dss ack 3013740213], length 0 Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0xfd2e62517888fe29,mptcp dss ack 3007449509], length 0 In <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 90 198.51.100.2,mptcp dss ack 3013740213], length 0 ^^^ retransmission is stopped here, but 'Address Id' is 90 patched kernel: Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0x1cf372d59e05f4b8,mptcp dss ack 3007449509], length 0 In <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 1 1.2.3.4,mptcp dss ack 1672384568], length 0 Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0x1cf372d59e05f4b8,mptcp dss ack 3007449509], length 0 In <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 90 198.51.100.2,mptcp dss ack 1672384568], length 0 Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0x1cf372d59e05f4b8,mptcp dss ack 3007449509], length 0 In <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 1 198.51.100.2,mptcp dss ack 1672384568], length 0 ^^^ retransmission is stopped here, only when both 'Address Id' and 'IP Address' match Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Signed-off-by: Davide Caratti Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 2 +- net/mptcp/pm_netlink.c | 8 ++++---- net/mptcp/protocol.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 71c535f4e1ef..6b825fb3fa83 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1023,7 +1023,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); } else { mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); - mptcp_pm_del_add_timer(msk, &mp_opt.addr); + mptcp_pm_del_add_timer(msk, &mp_opt.addr, true); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 6ba040897738..2469e06a3a9d 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -346,18 +346,18 @@ out: struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, - struct mptcp_addr_info *addr) + struct mptcp_addr_info *addr, bool check_id) { struct mptcp_pm_add_entry *entry; struct sock *sk = (struct sock *)msk; spin_lock_bh(&msk->pm.lock); entry = mptcp_lookup_anno_list_by_saddr(msk, addr); - if (entry) + if (entry && (!check_id || entry->addr.id == addr->id)) entry->retrans_times = ADD_ADDR_RETRANS_MAX; spin_unlock_bh(&msk->pm.lock); - if (entry) + if (entry && (!check_id || entry->addr.id == addr->id)) sk_stop_timer_sync(sk, &entry->add_timer); return entry; @@ -1064,7 +1064,7 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, { struct mptcp_pm_add_entry *entry; - entry = mptcp_pm_del_add_timer(msk, addr); + entry = mptcp_pm_del_add_timer(msk, addr, false); if (entry) { list_del(&entry->list); kfree(entry); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 165c8b40b384..0c6f99c67345 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -672,7 +672,7 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, - struct mptcp_addr_info *addr); + struct mptcp_addr_info *addr, bool check_id); struct mptcp_pm_add_entry * mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk, struct mptcp_addr_info *addr); From cc146267914950b12c2bdee68c1e9e5453c81cde Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 May 2021 08:22:15 +0200 Subject: [PATCH 631/730] md/raid5: remove an incorrect assert in in_chunk_boundary Now that the original bdev is stored in the bio this assert is incorrect and will trigger for any partitioned raid5 device. Reported-by: Florian Dazinger Tested-by: Florian Dazinger Cc: stable@vger.kernel.org # 5.12 Fixes: 309dca309fc3 ("block: store a block_device pointer in struct bio"), Reviewed-by: Guoqing Jiang Signed-off-by: Christoph Hellwig Signed-off-by: Song Liu --- drivers/md/raid5.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 841e1c1aa5e6..7d4ff8a5c55e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5311,8 +5311,6 @@ static int in_chunk_boundary(struct mddev *mddev, struct bio *bio) unsigned int chunk_sectors; unsigned int bio_sectors = bio_sectors(bio); - WARN_ON_ONCE(bio->bi_bdev->bd_partno); - chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors); return chunk_sectors >= ((sector & (chunk_sectors - 1)) + bio_sectors); From 17a91051fe63b40ec651b80097c9fff5b093fdc5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 23 May 2021 15:48:39 +0100 Subject: [PATCH 632/730] io_uring/io-wq: close io-wq full-stop gap There is an old problem with io-wq cancellation where requests should be killed and are in io-wq but are not discoverable, e.g. in @next_hashed or @linked vars of io_worker_handle_work(). It adds some unreliability to individual request canellation, but also may potentially get __io_uring_cancel() stuck. For instance: 1) An __io_uring_cancel()'s cancellation round have not found any request but there are some as desribed. 2) __io_uring_cancel() goes to sleep 3) Then workers wake up and try to execute those hidden requests that happen to be unbound. As we already cancel all requests of io-wq there, set IO_WQ_BIT_EXIT in advance, so preventing 3) from executing unbound requests. The workers will initially break looping because of getting a signal as they are threads of the dying/exec()'ing user task. Cc: stable@vger.kernel.org Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/abfcf8c54cb9e8f7bfbad7e9a0cc5433cc70bdc2.1621781238.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io-wq.c | 20 +++++++++----------- fs/io-wq.h | 2 +- fs/io_uring.c | 6 ++++++ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 5361a9b4b47b..de9b7ba3ba01 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -979,13 +979,16 @@ static bool io_task_work_match(struct callback_head *cb, void *data) return cwd->wqe->wq == data; } +void io_wq_exit_start(struct io_wq *wq) +{ + set_bit(IO_WQ_BIT_EXIT, &wq->state); +} + static void io_wq_exit_workers(struct io_wq *wq) { struct callback_head *cb; int node; - set_bit(IO_WQ_BIT_EXIT, &wq->state); - if (!wq->task) return; @@ -1020,8 +1023,6 @@ static void io_wq_destroy(struct io_wq *wq) cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); - io_wq_exit_workers(wq); - for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; struct io_cb_cancel_data match = { @@ -1036,16 +1037,13 @@ static void io_wq_destroy(struct io_wq *wq) kfree(wq); } -void io_wq_put(struct io_wq *wq) -{ - if (refcount_dec_and_test(&wq->refs)) - io_wq_destroy(wq); -} - void io_wq_put_and_exit(struct io_wq *wq) { + WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state)); + io_wq_exit_workers(wq); - io_wq_put(wq); + if (refcount_dec_and_test(&wq->refs)) + io_wq_destroy(wq); } static bool io_wq_worker_affinity(struct io_worker *worker, void *data) diff --git a/fs/io-wq.h b/fs/io-wq.h index 0e6d310999e8..af2df0680ee2 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -122,7 +122,7 @@ struct io_wq_data { }; struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); -void io_wq_put(struct io_wq *wq); +void io_wq_exit_start(struct io_wq *wq); void io_wq_put_and_exit(struct io_wq *wq); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); diff --git a/fs/io_uring.c b/fs/io_uring.c index 5f82954004f6..6af8ca0cb01c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9078,6 +9078,9 @@ static void io_uring_cancel_sqpoll(struct io_sq_data *sqd) if (!current->io_uring) return; + if (tctx->io_wq) + io_wq_exit_start(tctx->io_wq); + WARN_ON_ONCE(!sqd || sqd->thread != current); atomic_inc(&tctx->in_idle); @@ -9112,6 +9115,9 @@ void __io_uring_cancel(struct files_struct *files) DEFINE_WAIT(wait); s64 inflight; + if (tctx->io_wq) + io_wq_exit_start(tctx->io_wq); + /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); do { From e86be3a04bc4aeaf12f93af35f08f8d4385bcd98 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 May 2021 18:43:38 -0400 Subject: [PATCH 633/730] SUNRPC: More fixes for backlog congestion Ensure that we fix the XPRT_CONGESTED starvation issue for RDMA as well as socket based transports. Ensure we always initialise the request after waking up from the backlog list. Fixes: e877a88d1f06 ("SUNRPC in case of backlog, hand free slots directly to waiting task") Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 ++ net/sunrpc/xprt.c | 58 ++++++++++++++++----------------- net/sunrpc/xprtrdma/transport.c | 12 +++---- net/sunrpc/xprtrdma/verbs.c | 18 ++++++++-- net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 5 files changed, 52 insertions(+), 39 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d81fe8b364d0..61b622e334ee 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -368,6 +368,8 @@ struct rpc_xprt * xprt_alloc(struct net *net, size_t size, unsigned int num_prealloc, unsigned int max_req); void xprt_free(struct rpc_xprt *); +void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task); +bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req); static inline int xprt_enable_swap(struct rpc_xprt *xprt) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 5b3981fd3783..3509a7f139b9 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1607,11 +1607,18 @@ xprt_transmit(struct rpc_task *task) spin_unlock(&xprt->queue_lock); } -static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) +static void xprt_complete_request_init(struct rpc_task *task) +{ + if (task->tk_rqstp) + xprt_request_init(task); +} + +void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) { set_bit(XPRT_CONGESTED, &xprt->state); - rpc_sleep_on(&xprt->backlog, task, NULL); + rpc_sleep_on(&xprt->backlog, task, xprt_complete_request_init); } +EXPORT_SYMBOL_GPL(xprt_add_backlog); static bool __xprt_set_rq(struct rpc_task *task, void *data) { @@ -1619,14 +1626,13 @@ static bool __xprt_set_rq(struct rpc_task *task, void *data) if (task->tk_rqstp == NULL) { memset(req, 0, sizeof(*req)); /* mark unused */ - task->tk_status = -EAGAIN; task->tk_rqstp = req; return true; } return false; } -static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req) +bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req) { if (rpc_wake_up_first(&xprt->backlog, __xprt_set_rq, req) == NULL) { clear_bit(XPRT_CONGESTED, &xprt->state); @@ -1634,6 +1640,7 @@ static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req) } return true; } +EXPORT_SYMBOL_GPL(xprt_wake_up_backlog); static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task) { @@ -1643,7 +1650,7 @@ static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task goto out; spin_lock(&xprt->reserve_lock); if (test_bit(XPRT_CONGESTED, &xprt->state)) { - rpc_sleep_on(&xprt->backlog, task, NULL); + xprt_add_backlog(xprt, task); ret = true; } spin_unlock(&xprt->reserve_lock); @@ -1812,10 +1819,6 @@ xprt_request_init(struct rpc_task *task) struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; - if (req->rq_task) - /* Already initialized */ - return; - req->rq_task = task; req->rq_xprt = xprt; req->rq_buffer = NULL; @@ -1876,10 +1879,8 @@ void xprt_retry_reserve(struct rpc_task *task) struct rpc_xprt *xprt = task->tk_xprt; task->tk_status = 0; - if (task->tk_rqstp != NULL) { - xprt_request_init(task); + if (task->tk_rqstp != NULL) return; - } task->tk_status = -EAGAIN; xprt_do_reserve(xprt, task); @@ -1904,24 +1905,21 @@ void xprt_release(struct rpc_task *task) } xprt = req->rq_xprt; - if (xprt) { - xprt_request_dequeue_xprt(task); - spin_lock(&xprt->transport_lock); - xprt->ops->release_xprt(xprt, task); - if (xprt->ops->release_request) - xprt->ops->release_request(task); - xprt_schedule_autodisconnect(xprt); - spin_unlock(&xprt->transport_lock); - if (req->rq_buffer) - xprt->ops->buf_free(task); - xdr_free_bvec(&req->rq_rcv_buf); - xdr_free_bvec(&req->rq_snd_buf); - if (req->rq_cred != NULL) - put_rpccred(req->rq_cred); - if (req->rq_release_snd_buf) - req->rq_release_snd_buf(req); - } else - xprt = task->tk_xprt; + xprt_request_dequeue_xprt(task); + spin_lock(&xprt->transport_lock); + xprt->ops->release_xprt(xprt, task); + if (xprt->ops->release_request) + xprt->ops->release_request(task); + xprt_schedule_autodisconnect(xprt); + spin_unlock(&xprt->transport_lock); + if (req->rq_buffer) + xprt->ops->buf_free(task); + xdr_free_bvec(&req->rq_rcv_buf); + xdr_free_bvec(&req->rq_snd_buf); + if (req->rq_cred != NULL) + put_rpccred(req->rq_cred); + if (req->rq_release_snd_buf) + req->rq_release_snd_buf(req); task->tk_rqstp = NULL; if (likely(!bc_prealloc(req))) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 09953597d055..19a49d26b1e4 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -520,9 +520,8 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) return; out_sleep: - set_bit(XPRT_CONGESTED, &xprt->state); - rpc_sleep_on(&xprt->backlog, task, NULL); task->tk_status = -EAGAIN; + xprt_add_backlog(xprt, task); } /** @@ -537,10 +536,11 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, rx_xprt); - memset(rqst, 0, sizeof(*rqst)); - rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); - if (unlikely(!rpc_wake_up_next(&xprt->backlog))) - clear_bit(XPRT_CONGESTED, &xprt->state); + rpcrdma_reply_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); + if (!xprt_wake_up_backlog(xprt, rqst)) { + memset(rqst, 0, sizeof(*rqst)); + rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); + } } static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt, diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 1e965a380896..649c23518ec0 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1200,6 +1200,20 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) return mr; } +/** + * rpcrdma_reply_put - Put reply buffers back into pool + * @buffers: buffer pool + * @req: object to return + * + */ +void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) +{ + if (req->rl_reply) { + rpcrdma_rep_put(buffers, req->rl_reply); + req->rl_reply = NULL; + } +} + /** * rpcrdma_buffer_get - Get a request buffer * @buffers: Buffer pool from which to obtain a buffer @@ -1228,9 +1242,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) */ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) { - if (req->rl_reply) - rpcrdma_rep_put(buffers, req->rl_reply); - req->rl_reply = NULL; + rpcrdma_reply_put(buffers, req); spin_lock(&buffers->rb_lock); list_add(&req->rl_list, &buffers->rb_send_bufs); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 436ad7312614..5d231d94e944 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -479,6 +479,7 @@ struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req); void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep); +void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req); bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags); From 56517ab958b7c11030e626250c00b9b1a24b41eb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 May 2021 10:23:05 -0400 Subject: [PATCH 634/730] NFS: Fix an Oopsable condition in __nfs_pageio_add_request() Ensure that nfs_pageio_error_cleanup() resets the mirror array contents, so that the structure reflects the fact that it is now empty. Also change the test in nfs_pageio_do_add_request() to be more robust by checking whether or not the list is empty rather than relying on the value of pg_count. Fixes: a7d42ddb3099 ("nfs: add mirroring support to pgio layer") Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 6c20b28d9d7c..d35c84af44e0 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1094,15 +1094,16 @@ nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *prev = NULL; unsigned int size; - if (mirror->pg_count != 0) { - prev = nfs_list_entry(mirror->pg_list.prev); - } else { + if (list_empty(&mirror->pg_list)) { if (desc->pg_ops->pg_init) desc->pg_ops->pg_init(desc, req); if (desc->pg_error < 0) return 0; mirror->pg_base = req->wb_pgbase; - } + mirror->pg_count = 0; + mirror->pg_recoalesce = 0; + } else + prev = nfs_list_entry(mirror->pg_list.prev); if (desc->pg_maxretrans && req->wb_nio > desc->pg_maxretrans) { if (NFS_SERVER(desc->pg_inode)->flags & NFS_MOUNT_SOFTERR) From 0d0ea309357dea0d85a82815f02157eb7fcda39f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 May 2021 10:40:12 -0400 Subject: [PATCH 635/730] NFS: Don't corrupt the value of pg_bytes_written in nfs_do_recoalesce() The value of mirror->pg_bytes_written should only be updated after a successful attempt to flush out the requests on the list. Fixes: a7d42ddb3099 ("nfs: add mirroring support to pgio layer") Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d35c84af44e0..daf6658517f4 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1128,17 +1128,16 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) { struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); - if (!list_empty(&mirror->pg_list)) { int error = desc->pg_ops->pg_doio(desc); if (error < 0) desc->pg_error = error; - else + if (list_empty(&mirror->pg_list)) { mirror->pg_bytes_written += mirror->pg_count; - } - if (list_empty(&mirror->pg_list)) { - mirror->pg_count = 0; - mirror->pg_base = 0; + mirror->pg_count = 0; + mirror->pg_base = 0; + mirror->pg_recoalesce = 0; + } } } @@ -1228,7 +1227,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) do { list_splice_init(&mirror->pg_list, &head); - mirror->pg_bytes_written -= mirror->pg_count; mirror->pg_count = 0; mirror->pg_base = 0; mirror->pg_recoalesce = 0; From 70536bf4eb07ed5d2816ccb274e5e6b41b95a437 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 May 2021 11:26:35 -0400 Subject: [PATCH 636/730] NFS: Clean up reset of the mirror accounting variables Now that nfs_pageio_do_add_request() resets the pg_count, we don't need these other inlined resets. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index daf6658517f4..cf9cc62ec48e 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1132,12 +1132,8 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) int error = desc->pg_ops->pg_doio(desc); if (error < 0) desc->pg_error = error; - if (list_empty(&mirror->pg_list)) { + if (list_empty(&mirror->pg_list)) mirror->pg_bytes_written += mirror->pg_count; - mirror->pg_count = 0; - mirror->pg_base = 0; - mirror->pg_recoalesce = 0; - } } } @@ -1227,9 +1223,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) do { list_splice_init(&mirror->pg_list, &head); - mirror->pg_count = 0; - mirror->pg_base = 0; - mirror->pg_recoalesce = 0; while (!list_empty(&head)) { struct nfs_page *req; From 35f819d218035ddfbc71e7cf62a4849231701e58 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Fri, 21 May 2021 16:31:12 +0800 Subject: [PATCH 637/730] drm/ttm: Skip swapout if ttm object is not populated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Swapping a ttm object which has no backend pages makes no sense. Suggested-by: Christian König Signed-off-by: xinhui pan Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210521083112.33176-1-xinhui.pan@amd.com CC: stable@kernel.org Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 510e3e001dab..a1dcf7d55c90 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -145,7 +145,7 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, list_for_each_entry(bo, &man->lru[j], lru) { uint32_t num_pages; - if (!bo->ttm || + if (!bo->ttm || !ttm_tt_is_populated(bo->ttm) || bo->ttm->page_flags & TTM_PAGE_FLAG_SG || bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) continue; From 75ea44e356b5de8c817f821c9dd68ae329e82add Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 25 May 2021 18:07:58 +0200 Subject: [PATCH 638/730] perf jevents: Fix getting maximum number of fds On some hosts, rlim.rlim_max can be returned as RLIM_INFINITY. By casting it to int, it is interpreted as -1, which will cause get_maxfds to return 0, causing "Invalid argument" errors in nftw() calls. Fix this by casting the second argument of min() to rlim_t instead. Fixes: 80eeb67fe577 ("perf jevents: Program to convert JSON file") Signed-off-by: Felix Fietkau Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: http://lore.kernel.org/lkml/20210525160758.97829-1-nbd@nbd.name Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 7422b0ea8790..9604446f8360 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -960,7 +960,7 @@ static int get_maxfds(void) struct rlimit rlim; if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) - return min((int)rlim.rlim_max / 2, 512); + return min(rlim.rlim_max / 2, (rlim_t)512); return 512; } From 042a3eaad6daeabcfaf163aa44da8ea3cf8b5496 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 21 May 2021 14:51:15 -0700 Subject: [PATCH 639/730] nvme-tcp: remove incorrect Kconfig dep in BLK_DEV_NVME We need to select NVME_CORE. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index a44d49d63968..494675aeaaad 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -71,7 +71,8 @@ config NVME_FC config NVME_TCP tristate "NVM Express over Fabrics TCP host driver" depends on INET - depends on BLK_DEV_NVME + depends on BLOCK + select NVME_CORE select NVME_FABRICS select CRYPTO select CRYPTO_CRC32C From 25df1acd2d36eb72b14c3d00f6b861b1e00b3aab Mon Sep 17 00:00:00 2001 From: Hou Pu Date: Thu, 20 May 2021 19:30:45 +0800 Subject: [PATCH 640/730] nvmet-tcp: fix inline data size comparison in nvmet_tcp_queue_response Using "<=" instead "<" to compare inline data size. Fixes: bdaf13279192 ("nvmet-tcp: fix a segmentation fault during io parsing error") Signed-off-by: Hou Pu Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f9f34f6caf5e..d8aceef83284 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -550,7 +550,7 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req) * nvmet_req_init is completed. */ if (queue->rcv_state == NVMET_TCP_RECV_PDU && - len && len < cmd->req.port->inline_data_size && + len && len <= cmd->req.port->inline_data_size && nvme_is_write(cmd->req.cmd)) return; } From aaeadd7075dc9e184bc7876e9dd7b3bada771df2 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 25 May 2021 08:49:05 -0700 Subject: [PATCH 641/730] nvmet: fix false keep-alive timeout when a controller is torn down Controller teardown flow may take some time in case it has many I/O queues, and the host may not send us keep-alive during this period. Hence reset the traffic based keep-alive timer so we don't trigger a controller teardown as a result of a keep-alive expiration. Reported-by: Yi Zhang Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Tested-by: Yi Zhang Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 15 +++++++++++---- drivers/nvme/target/nvmet.h | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 1853db38b682..4b29a5bac896 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -388,10 +388,10 @@ static void nvmet_keep_alive_timer(struct work_struct *work) { struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), struct nvmet_ctrl, ka_work); - bool cmd_seen = ctrl->cmd_seen; + bool reset_tbkas = ctrl->reset_tbkas; - ctrl->cmd_seen = false; - if (cmd_seen) { + ctrl->reset_tbkas = false; + if (reset_tbkas) { pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", ctrl->cntlid); schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); @@ -804,6 +804,13 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) percpu_ref_exit(&sq->ref); if (ctrl) { + /* + * The teardown flow may take some time, and the host may not + * send us keep-alive during this period, hence reset the + * traffic based keep-alive timer so we don't trigger a + * controller teardown as a result of a keep-alive expiration. + */ + ctrl->reset_tbkas = true; nvmet_ctrl_put(ctrl); sq->ctrl = NULL; /* allows reusing the queue later */ } @@ -952,7 +959,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, } if (sq->ctrl) - sq->ctrl->cmd_seen = true; + sq->ctrl->reset_tbkas = true; return true; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index d69a409515d6..53aea9a8056e 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -167,7 +167,7 @@ struct nvmet_ctrl { struct nvmet_subsys *subsys; struct nvmet_sq **sqs; - bool cmd_seen; + bool reset_tbkas; struct mutex lock; u64 cap; From 3743c1723bfc62e69dbf022417720eed3f431b29 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Wed, 26 May 2021 13:08:26 +0800 Subject: [PATCH 642/730] io-wq: Fix UAF when wakeup wqe in hash waitqueue BUG: KASAN: use-after-free in __wake_up_common+0x637/0x650 Read of size 8 at addr ffff8880304250d8 by task iou-wrk-28796/28802 Call Trace: __dump_stack [inline] dump_stack+0x141/0x1d7 print_address_description.constprop.0.cold+0x5b/0x2c6 __kasan_report [inline] kasan_report.cold+0x7c/0xd8 __wake_up_common+0x637/0x650 __wake_up_common_lock+0xd0/0x130 io_worker_handle_work+0x9dd/0x1790 io_wqe_worker+0xb2a/0xd40 ret_from_fork+0x1f/0x30 Allocated by task 28798: kzalloc_node [inline] io_wq_create+0x3c4/0xdd0 io_init_wq_offload [inline] io_uring_alloc_task_context+0x1bf/0x6b0 __io_uring_add_task_file+0x29a/0x3c0 io_uring_add_task_file [inline] io_uring_install_fd [inline] io_uring_create [inline] io_uring_setup+0x209a/0x2bd0 do_syscall_64+0x3a/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xae Freed by task 28798: kfree+0x106/0x2c0 io_wq_destroy+0x182/0x380 io_wq_put [inline] io_wq_put_and_exit+0x7a/0xa0 io_uring_clean_tctx [inline] __io_uring_cancel+0x428/0x530 io_uring_files_cancel do_exit+0x299/0x2a60 do_group_exit+0x125/0x310 get_signal+0x47f/0x2150 arch_do_signal_or_restart+0x2a8/0x1eb0 handle_signal_work[inline] exit_to_user_mode_loop [inline] exit_to_user_mode_prepare+0x171/0x280 __syscall_exit_to_user_mode_work [inline] syscall_exit_to_user_mode+0x19/0x60 do_syscall_64+0x47/0xb0 entry_SYSCALL_64_after_hwframe There are the following scenarios, hash waitqueue is shared by io-wq1 and io-wq2. (note: wqe is worker) io-wq1:worker2 | locks bit1 io-wq2:worker1 | waits bit1 io-wq1:worker3 | waits bit1 io-wq1:worker2 | completes all wqe bit1 work items io-wq1:worker2 | drop bit1, exit io-wq2:worker1 | locks bit1 io-wq1:worker3 | can not locks bit1, waits bit1 and exit io-wq1 | exit and free io-wq1 io-wq2:worker1 | drops bit1 io-wq1:worker3 | be waked up, even though wqe is freed After all iou-wrk belonging to io-wq1 have exited, remove wqe form hash waitqueue, it is guaranteed that there will be no more wqe belonging to io-wq1 in the hash waitqueue. Reported-by: syzbot+6cb11ade52aa17095297@syzkaller.appspotmail.com Signed-off-by: Zqiang Link: https://lore.kernel.org/r/20210526050826.30500-1-qiang.zhang@windriver.com Signed-off-by: Jens Axboe --- fs/io-wq.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index de9b7ba3ba01..b3e8624a37d0 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -1006,13 +1006,16 @@ static void io_wq_exit_workers(struct io_wq *wq) struct io_wqe *wqe = wq->wqes[node]; io_wq_for_each_worker(wqe, io_wq_worker_wake, NULL); - spin_lock_irq(&wq->hash->wait.lock); - list_del_init(&wq->wqes[node]->wait.entry); - spin_unlock_irq(&wq->hash->wait.lock); } rcu_read_unlock(); io_worker_ref_put(wq); wait_for_completion(&wq->worker_done); + + for_each_node(node) { + spin_lock_irq(&wq->hash->wait.lock); + list_del_init(&wq->wqes[node]->wait.entry); + spin_unlock_irq(&wq->hash->wait.lock); + } put_task_struct(wq->task); wq->task = NULL; } From 9f5815315e0b93146d7b0be4d96ee2d74eeabb98 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 25 May 2021 22:19:01 -0700 Subject: [PATCH 643/730] xfs: add new IRC channel to MAINTAINERS Add our new OFTC channel to the MAINTAINERS list so everyone will know where to go. Ignore the XFS wikis, we have no access to them. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 008fcad7ac00..ceb146e9b506 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19998,6 +19998,7 @@ F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* XFS FILESYSTEM +C: irc://irc.oftc.net/xfs M: Darrick J. Wong M: linux-xfs@vger.kernel.org L: linux-xfs@vger.kernel.org From 62f3415db237b8d2aa9a804ff84ce2efa87df179 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 26 May 2021 11:46:17 -0700 Subject: [PATCH 644/730] net: phy: Document phydev::dev_flags bits allocation Document the phydev::dev_flags bit allocation to allow bits 15:0 to define PHY driver specific behavior, bits 23:16 to be reserved for now, and bits 31:24 to hold generic PHY driver flags. Signed-off-by: Florian Fainelli Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20210526184617.3105012-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/phy.h b/include/linux/phy.h index 60d2b26026a2..852743f07e3e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -496,6 +496,11 @@ struct macsec_ops; * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY * @state: State of the PHY for management purposes * @dev_flags: Device-specific flags used by the PHY driver. + * Bits [15:0] are free to use by the PHY driver to communicate + * driver specific behavior. + * Bits [23:16] are currently reserved for future use. + * Bits [31:24] are reserved for defining generic + * PHY driver behavior. * @irq: IRQ number of the PHY's interrupt (-1 if none) * @phy_timer: The timer for handling the state machine * @phylink: Pointer to phylink instance for this PHY From 0e68c4b11f1e66d211ad242007e9f1076a6b7709 Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Thu, 20 May 2021 01:03:53 +0800 Subject: [PATCH 645/730] ALSA: hda/realtek: fix mute/micmute LEDs for HP 855 G8 The HP EliteBook 855 G8 Notebook PC is using ALC285 codec which needs ALC285_FIXUP_HP_MUTE_LED fixup to make it works. After applying the fixup, the mute/micmute LEDs work good. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20210519170357.58410-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 90bf0d3a830a..7f743382d395 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8328,6 +8328,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), From bbe183e07817a46cf8d3d7fc88093df81d23a957 Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Thu, 20 May 2021 01:03:54 +0800 Subject: [PATCH 646/730] ALSA: hda/realtek: fix mute/micmute LEDs and speaker for HP Zbook G8 The HP ZBook Studio 15.6 Inch G8 is using ALC285 codec which is using 0x04 to control mute LED and 0x01 to control micmute LED. In the other hand, there is no output from right channel of speaker. Therefore, add a quirk to make it works. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20210519170357.58410-2-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7f743382d395..f33537099ae2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8328,6 +8328,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), From e650c1a959da49f2b873cb56564b825882c22e7a Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Thu, 20 May 2021 01:03:55 +0800 Subject: [PATCH 647/730] ALSA: hda/realtek: fix mute/micmute LEDs and speaker for HP Zbook Fury 15 G8 The HP ZBook Fury 15.6 Inch G8 is using ALC285 codec which is using 0x04 to control mute LED and 0x01 to control micmute LED. In the other hand, there is no output from right channel of speaker. Therefore, add a quirk to make it works. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20210519170357.58410-3-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f33537099ae2..784fdeb8dfea 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8328,6 +8328,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), From 50dbfae972cbe0e3c631e73c7c58cbc48bfc6a49 Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Thu, 20 May 2021 01:03:56 +0800 Subject: [PATCH 648/730] ALSA: hda/realtek: fix mute/micmute LEDs and speaker for HP Zbook Fury 17 G8 The HP ZBook Studio 17.3 Inch G8 is using ALC285 codec which is using 0x04 to control mute LED and 0x01 to control micmute LED. In the other hand, there is no output from right channel of speaker. Therefore, add a quirk to make it works. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20210519170357.58410-4-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 784fdeb8dfea..61a60c420f6f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8328,6 +8328,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x886d, "HP ZBook Fury 17.3 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), From 5d8db38ad7660e4d78f4e2a63f14336f31f07a63 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 27 May 2021 17:26:40 +0800 Subject: [PATCH 649/730] thermal/drivers/qcom: Fix error code in adc_tm5_get_dt_channel_data() Return -EINVAL when args is invalid instead of 'ret' which is set to zero by a previous successful call to a function. Fixes: ca66dca5eda6 ("thermal: qcom: add support for adc-tm5 PMIC thermal monitor") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210527092640.2070555-1-yangyingliang@huawei.com --- drivers/thermal/qcom/qcom-spmi-adc-tm5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c index b460b56e981c..232fd0b33325 100644 --- a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c +++ b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c @@ -441,7 +441,7 @@ static int adc_tm5_get_dt_channel_data(struct adc_tm5_chip *adc_tm, if (args.args_count != 1 || args.args[0] >= ADC5_MAX_CHANNEL) { dev_err(dev, "%s: invalid ADC channel number %d\n", name, chan); - return ret; + return -EINVAL; } channel->adc_channel = args.args[0]; From e67afa7ee4a59584d7253e45d7f63b9528819a13 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 25 May 2021 23:32:35 -0400 Subject: [PATCH 650/730] NFSv4: Fix v4.0/v4.1 SEEK_DATA return -ENOTSUPP when set NFS_V4_2 config Since commit bdcc2cd14e4e ("NFSv4.2: handle NFS-specific llseek errors"), nfs42_proc_llseek would return -EOPNOTSUPP rather than -ENOTSUPP when SEEK_DATA on NFSv4.0/v4.1. This will lead xfstests generic/285 not run on NFSv4.0/v4.1 when set the CONFIG_NFS_V4_2, rather than run failed. Fixes: bdcc2cd14e4e ("NFSv4.2: handle NFS-specific llseek errors") Cc: # 4.2 Signed-off-by: Zhang Xiaoxu Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 57b3821d975a..a1e5c6b85ded 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -211,7 +211,7 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) case SEEK_HOLE: case SEEK_DATA: ret = nfs42_proc_llseek(filep, offset, whence); - if (ret != -ENOTSUPP) + if (ret != -EOPNOTSUPP) return ret; fallthrough; default: From ae605ee9830840f14566a3b1cde27fa8096dbdd4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 26 May 2021 15:35:20 -0400 Subject: [PATCH 651/730] xprtrdma: Revert 586a0787ce35 Commit 9ed5af268e88 ("SUNRPC: Clean up the handling of page padding in rpc_prepare_reply_pages()") [Dec 2020] affects RPC Replies that have a data payload (i.e., Write chunks). rpcrdma_prepare_readch(), as its name suggests, sets up Read chunks which are data payloads within RPC Calls. Those payloads are constructed by xdr_write_pages(), which continues to stuff the call buffer's tail kvec with the payload's XDR roundup. Thus removing the tail buffer logic in rpcrdma_prepare_readch() was the wrong thing to do. Fixes: 586a0787ce35 ("xprtrdma: Clean up rpcrdma_prepare_readch()") Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/rpc_rdma.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 649f7d8b9733..c335c1361564 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -628,8 +628,9 @@ out_mapping_err: return false; } -/* The tail iovec might not reside in the same page as the - * head iovec. +/* The tail iovec may include an XDR pad for the page list, + * as well as additional content, and may not reside in the + * same page as the head iovec. */ static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req, struct xdr_buf *xdr, @@ -747,19 +748,27 @@ static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, struct xdr_buf *xdr) { - struct kvec *tail = &xdr->tail[0]; - if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len)) return false; - /* If there is a Read chunk, the page list is handled + /* If there is a Read chunk, the page list is being handled * via explicit RDMA, and thus is skipped here. */ - if (tail->iov_len) { - if (!rpcrdma_prepare_tail_iov(req, xdr, - offset_in_page(tail->iov_base), - tail->iov_len)) + /* Do not include the tail if it is only an XDR pad */ + if (xdr->tail[0].iov_len > 3) { + unsigned int page_base, len; + + /* If the content in the page list is an odd length, + * xdr_write_pages() adds a pad at the beginning of + * the tail iovec. Force the tail's non-pad content to + * land at the next XDR position in the Send message. + */ + page_base = offset_in_page(xdr->tail[0].iov_base); + len = xdr->tail[0].iov_len; + page_base += len & 3; + len -= len & 3; + if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len)) return false; kref_get(&req->rl_kref); } From bbf0a94744edfeee298e4a9ab6fd694d639a5cdf Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Wed, 26 May 2021 22:33:34 +0300 Subject: [PATCH 652/730] mei: request autosuspend after sending rx flow control A rx flow control waiting in the control queue may block autosuspend. Re-request autosuspend after flow control been sent to unblock the transition to the low power state. Cc: Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20210526193334.445759-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/interrupt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c index a98f6b895af7..aab3ebfa9fc4 100644 --- a/drivers/misc/mei/interrupt.c +++ b/drivers/misc/mei/interrupt.c @@ -277,6 +277,9 @@ static int mei_cl_irq_read(struct mei_cl *cl, struct mei_cl_cb *cb, return ret; } + pm_runtime_mark_last_busy(dev->dev); + pm_request_autosuspend(dev->dev); + list_move_tail(&cb->list, &cl->rd_pending); return 0; From a799b68a7c7ac97b457aba4ede4122a2a9f536ab Mon Sep 17 00:00:00 2001 From: Huilong Deng Date: Fri, 21 May 2021 22:07:37 +0800 Subject: [PATCH 653/730] nfs: Remove trailing semicolon in macros Macros should not use a trailing semicolon. Signed-off-by: Huilong Deng Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 2 +- fs/nfs/super.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 93e60e921f92..bc0c698f3350 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -362,7 +362,7 @@ static const struct kernel_param_ops param_ops_nfs_timeout = { .set = param_set_nfs_timeout, .get = param_get_nfs_timeout, }; -#define param_check_nfs_timeout(name, p) __param_check(name, p, int); +#define param_check_nfs_timeout(name, p) __param_check(name, p, int) module_param(nfs_mountpoint_expiry_timeout, nfs_timeout, 0644); MODULE_PARM_DESC(nfs_mountpoint_expiry_timeout, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 19a212f9725d..fe58525cfed4 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1379,7 +1379,7 @@ static const struct kernel_param_ops param_ops_portnr = { .set = param_set_portnr, .get = param_get_uint, }; -#define param_check_portnr(name, p) __param_check(name, p, unsigned int); +#define param_check_portnr(name, p) __param_check(name, p, unsigned int) module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); module_param_named(callback_nr_threads, nfs_callback_nr_threads, ushort, 0644); From 9808f9be31c68af43f6e531f2c851ebb066513fe Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Thu, 27 May 2021 11:54:40 +0200 Subject: [PATCH 654/730] serial: 8250_pci: handle FL_NOIRQ board flag In commit 8428413b1d14 ("serial: 8250_pci: Implement MSI(-X) support") the way the irq gets allocated was changed. With that change the handling FL_NOIRQ got lost. Restore the old behaviour. Fixes: 8428413b1d14 ("serial: 8250_pci: Implement MSI(-X) support") Cc: Signed-off-by: Christian Gmeiner Link: https://lore.kernel.org/r/20210527095529.26281-1-christian.gmeiner@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 04fe42469990..780cc99732b6 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -3958,21 +3958,26 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board) uart.port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | UPF_SHARE_IRQ; uart.port.uartclk = board->base_baud * 16; - if (pci_match_id(pci_use_msi, dev)) { - dev_dbg(&dev->dev, "Using MSI(-X) interrupts\n"); - pci_set_master(dev); - rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_ALL_TYPES); + if (board->flags & FL_NOIRQ) { + uart.port.irq = 0; } else { - dev_dbg(&dev->dev, "Using legacy interrupts\n"); - rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY); - } - if (rc < 0) { - kfree(priv); - priv = ERR_PTR(rc); - goto err_deinit; + if (pci_match_id(pci_use_msi, dev)) { + dev_dbg(&dev->dev, "Using MSI(-X) interrupts\n"); + pci_set_master(dev); + rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_ALL_TYPES); + } else { + dev_dbg(&dev->dev, "Using legacy interrupts\n"); + rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY); + } + if (rc < 0) { + kfree(priv); + priv = ERR_PTR(rc); + goto err_deinit; + } + + uart.port.irq = pci_irq_vector(dev, 0); } - uart.port.irq = pci_irq_vector(dev, 0); uart.port.dev = &dev->dev; for (i = 0; i < nr_ports; i++) { From b16ef427adf31fb4f6522458d37b3fe21d6d03b8 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 27 May 2021 11:25:48 +0200 Subject: [PATCH 655/730] io_uring: fix data race to avoid potential NULL-deref Commit ba5ef6dc8a82 ("io_uring: fortify tctx/io_wq cleanup") introduced setting tctx->io_wq to NULL a bit earlier. This has caused KCSAN to detect a data race between accesses to tctx->io_wq: write to 0xffff88811d8df330 of 8 bytes by task 3709 on cpu 1: io_uring_clean_tctx fs/io_uring.c:9042 [inline] __io_uring_cancel fs/io_uring.c:9136 io_uring_files_cancel include/linux/io_uring.h:16 [inline] do_exit kernel/exit.c:781 do_group_exit kernel/exit.c:923 get_signal kernel/signal.c:2835 arch_do_signal_or_restart arch/x86/kernel/signal.c:789 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] ... read to 0xffff88811d8df330 of 8 bytes by task 6412 on cpu 0: io_uring_try_cancel_iowq fs/io_uring.c:8911 [inline] io_uring_try_cancel_requests fs/io_uring.c:8933 io_ring_exit_work fs/io_uring.c:8736 process_one_work kernel/workqueue.c:2276 ... With the config used, KCSAN only reports data races with value changes: this implies that in the case here we also know that tctx->io_wq was non-NULL. Therefore, depending on interleaving, we may end up with: [CPU 0] | [CPU 1] io_uring_try_cancel_iowq() | io_uring_clean_tctx() if (!tctx->io_wq) // false | ... ... | tctx->io_wq = NULL io_wq_cancel_cb(tctx->io_wq, ...) | ... -> NULL-deref | Note: It is likely that thus far we've gotten lucky and the compiler optimizes the double-read into a single read into a register -- but this is never guaranteed, and can easily change with a different config! Fix the data race by restoring the previous behaviour, where both setting io_wq to NULL and put of the wq are _serialized_ after concurrent io_uring_try_cancel_iowq() via acquisition of the uring_lock and removal of the node in io_uring_del_task_file(). Fixes: ba5ef6dc8a82 ("io_uring: fortify tctx/io_wq cleanup") Suggested-by: Pavel Begunkov Reported-by: syzbot+bf2b3d0435b9b728946c@syzkaller.appspotmail.com Signed-off-by: Marco Elver Cc: Jens Axboe Link: https://lore.kernel.org/r/20210527092547.2656514-1-elver@google.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6af8ca0cb01c..903458afd56c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9039,11 +9039,16 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx) struct io_tctx_node *node; unsigned long index; - tctx->io_wq = NULL; xa_for_each(&tctx->xa, index, node) io_uring_del_task_file(index); - if (wq) + if (wq) { + /* + * Must be after io_uring_del_task_file() (removes nodes under + * uring_lock) to avoid race with io_uring_try_cancel_iowq(). + */ + tctx->io_wq = NULL; io_wq_put_and_exit(wq); + } } static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) From 0ee74d5a48635c848c20f152d0d488bf84641304 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Tue, 25 May 2021 15:08:02 +0800 Subject: [PATCH 656/730] iommu/vt-d: Fix sysfs leak in alloc_iommu() iommu_device_sysfs_add() is called before, so is has to be cleaned on subsequent errors. Fixes: 39ab9555c2411 ("iommu: Add sysfs bindings for struct iommu_device") Cc: stable@vger.kernel.org # 4.11.x Signed-off-by: Rolf Eike Beer Acked-by: Lu Baolu Link: https://lore.kernel.org/r/17411490.HIIP88n32C@mobilepool36.emlix.com Link: https://lore.kernel.org/r/20210525070802.361755-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 1757ac1e1623..84057cb9596c 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1142,7 +1142,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); if (err) - goto err_unmap; + goto err_sysfs; } drhd->iommu = iommu; @@ -1150,6 +1150,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) return 0; +err_sysfs: + iommu_device_sysfs_remove(&iommu->iommu); err_unmap: unmap_iommu(iommu); error_free_seq_id: From 991c2c5980fb97ae6194f7c46b44f9446629eb4e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 26 May 2021 19:57:42 -0700 Subject: [PATCH 657/730] xfs: btree format inode forks can have zero extents xfs/538 is assert failing with this trace when testing with directory block sizes of 64kB: XFS: Assertion failed: !xfs_need_iread_extents(ifp), file: fs/xfs/libxfs/xfs_bmap.c, line: 608 .... Call Trace: xfs_bmap_btree_to_extents+0x2a9/0x470 ? kmem_cache_alloc+0xe7/0x220 __xfs_bunmapi+0x4ca/0xdf0 xfs_bunmapi+0x1a/0x30 xfs_dir2_shrink_inode+0x71/0x210 xfs_dir2_block_to_sf+0x2ae/0x410 xfs_dir2_block_removename+0x21a/0x280 xfs_dir_removename+0x195/0x1d0 xfs_remove+0x244/0x460 xfs_vn_unlink+0x53/0xa0 ? selinux_inode_unlink+0x13/0x20 vfs_unlink+0x117/0x220 do_unlinkat+0x1a2/0x2d0 __x64_sys_unlink+0x42/0x60 do_syscall_64+0x3a/0x70 entry_SYSCALL_64_after_hwframe+0x44/0xae This is a check to ensure that the extents have been read into memory before we are doing a ifork btree manipulation. This assert is bogus in the above case. We have a fragmented directory block that has more extents in it than can fit in extent format, so the inode data fork is in btree format. xfs_dir2_shrink_inode() asks to remove all remaining 16 filesystem blocks from the inode so it can convert to short form, and __xfs_bunmapi() removes all the extents. We now have a data fork in btree format but have zero extents in the fork. This incorrectly trips the xfs_need_iread_extents() assert because it assumes that an empty extent btree means the extent tree has not been read into memory yet. This is clearly not the case with xfs_bunmapi(), as it has an explicit call to xfs_iread_extents() in it to pull the extents into memory before it starts unmapping. Also, the assert directly after this bogus one is: ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); Which covers the context in which it is legal to call xfs_bmap_btree_to_extents just fine. Hence we should just remove the bogus assert as it is clearly wrong and causes a regression. The returns the test behaviour to the pre-existing assert failure in xfs_dir2_shrink_inode() that indicates xfs_bunmapi() has failed to remove all the extents in the range it was asked to unmap. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 7e3b9b01431e..3f8b6da09261 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -605,7 +605,6 @@ xfs_bmap_btree_to_extents( ASSERT(cur); ASSERT(whichfork != XFS_COW_FORK); - ASSERT(!xfs_need_iread_extents(ifp)); ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); ASSERT(be16_to_cpu(rblock->bb_level) == 1); ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); From 0fe0bbe00a6fb77adf75085b7d06b71a830dd6f2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 27 May 2021 08:11:01 -0700 Subject: [PATCH 658/730] xfs: bunmapi has unnecessary AG lock ordering issues large directory block size operations are assert failing because xfs_bunmapi() is not completely removing fragmented directory blocks like so: XFS: Assertion failed: done, file: fs/xfs/libxfs/xfs_dir2.c, line: 677 .... Call Trace: xfs_dir2_shrink_inode+0x1a8/0x210 xfs_dir2_block_to_sf+0x2ae/0x410 xfs_dir2_block_removename+0x21a/0x280 xfs_dir_removename+0x195/0x1d0 xfs_rename+0xb79/0xc50 ? avc_has_perm+0x8d/0x1a0 ? avc_has_perm_noaudit+0x9a/0x120 xfs_vn_rename+0xdb/0x150 vfs_rename+0x719/0xb50 ? __lookup_hash+0x6a/0xa0 do_renameat2+0x413/0x5e0 __x64_sys_rename+0x45/0x50 do_syscall_64+0x3a/0x70 entry_SYSCALL_64_after_hwframe+0x44/0xae We are aborting the bunmapi() pass because of this specific chunk of code: /* * Make sure we don't touch multiple AGF headers out of order * in a single transaction, as that could cause AB-BA deadlocks. */ if (!wasdel && !isrt) { agno = XFS_FSB_TO_AGNO(mp, del.br_startblock); if (prev_agno != NULLAGNUMBER && prev_agno > agno) break; prev_agno = agno; } This is designed to prevent deadlocks in AGF locking when freeing multiple extents by ensuring that we only ever lock in increasing AG number order. Unfortunately, this also violates the "bunmapi will always succeed" semantic that some high level callers depend on, such as xfs_dir2_shrink_inode(), xfs_da_shrink_inode() and xfs_inactive_symlink_rmt(). This AG lock ordering was introduced back in 2017 to fix deadlocks triggered by generic/299 as reported here: https://lore.kernel.org/linux-xfs/800468eb-3ded-9166-20a4-047de8018582@gmail.com/ This codebase is old enough that it was before we were defering all AG based extent freeing from within xfs_bunmapi(). THat is, we never actually lock AGs in xfs_bunmapi() any more - every non-rt based extent free is added to the defer ops list, as is all BMBT block freeing. And RT extents are not RT based, so there's no lock ordering issues associated with them. Hence this AGF lock ordering code is both broken and dead. Let's just remove it so that the large directory block code works reliably again. Tested against xfs/538 and generic/299 which is the original test that exposed the deadlocks that this code fixed. Fixes: 5b094d6dac04 ("xfs: fix multi-AG deadlock in xfs_bunmapi") Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 3f8b6da09261..a3e0e6f672d6 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5349,7 +5349,6 @@ __xfs_bunmapi( xfs_fsblock_t sum; xfs_filblks_t len = *rlen; /* length to unmap in file */ xfs_fileoff_t max_len; - xfs_agnumber_t prev_agno = NULLAGNUMBER, agno; xfs_fileoff_t end; struct xfs_iext_cursor icur; bool done = false; @@ -5441,16 +5440,6 @@ __xfs_bunmapi( del = got; wasdel = isnullstartblock(del.br_startblock); - /* - * Make sure we don't touch multiple AGF headers out of order - * in a single transaction, as that could cause AB-BA deadlocks. - */ - if (!wasdel && !isrt) { - agno = XFS_FSB_TO_AGNO(mp, del.br_startblock); - if (prev_agno != NULLAGNUMBER && prev_agno > agno) - break; - prev_agno = agno; - } if (got.br_startoff < start) { del.br_startoff = start; del.br_blockcount -= start - got.br_startoff; From f610a5a29c3cfb7d37bdfa4ef52f72ea51f24a76 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 May 2021 11:24:33 +0100 Subject: [PATCH 659/730] afs: Fix the nlink handling of dir-over-dir rename Fix rename of one directory over another such that the nlink on the deleted directory is cleared to 0 rather than being decremented to 1. This was causing the generic/035 xfstest to fail. Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept") Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/162194384460.3999479.7605572278074191079.stgit@warthog.procyon.org.uk/ # v1 Signed-off-by: Linus Torvalds --- fs/afs/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 9fbe5a5ec9bd..78719f2f567e 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1919,7 +1919,9 @@ static void afs_rename_edit_dir(struct afs_operation *op) new_inode = d_inode(new_dentry); if (new_inode) { spin_lock(&new_inode->i_lock); - if (new_inode->i_nlink > 0) + if (S_ISDIR(new_inode->i_mode)) + clear_nlink(new_inode); + else if (new_inode->i_nlink > 0) drop_nlink(new_inode); spin_unlock(&new_inode->i_lock); } From c59870e2110e1229a6e4b2457aece6ffe8d68d99 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 19 May 2021 09:44:47 -0700 Subject: [PATCH 660/730] perf debug: Move debug initialization earlier This avoids segfaults during option handlers that use pr_err. For example, "perf --debug nopager list" segfaults before this change. Fixes: 8abceacff87d (perf debug: Add debug_set_file function) Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210519164447.2672030-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 20cb91ef06ff..2f6b67189b42 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -443,6 +443,8 @@ int main(int argc, const char **argv) const char *cmd; char sbuf[STRERR_BUFSIZE]; + perf_debug_setup(); + /* libsubcmd init */ exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT); pager_init(PERF_PAGER_ENVIRONMENT); @@ -531,8 +533,6 @@ int main(int argc, const char **argv) */ pthread__block_sigwinch(); - perf_debug_setup(); - while (1) { static int done_help; From 6d2fcfe6b517fe7cbf2687adfb0a16cdcd5d9243 Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Fri, 21 May 2021 17:19:27 +0200 Subject: [PATCH 661/730] cifs: set server->cipher_type to AES-128-CCM for SMB3.0 SMB3.0 doesn't have encryption negotiate context but simply uses the SMB2_GLOBAL_CAP_ENCRYPTION flag. When that flag is present in the neg response cifs.ko uses AES-128-CCM which is the only cipher available in this context. cipher_type was set to the server cipher only when parsing encryption negotiate context (SMB3.1.1). For SMB3.0 it was set to 0. This means cipher_type value can be 0 or 1 for AES-128-CCM. Fix this by checking for SMB3.0 and encryption capability and setting cipher_type appropriately. Signed-off-by: Aurelien Aptel Cc: Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 9f24eb88297a..c205f93e0a10 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -958,6 +958,13 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) /* Internal types */ server->capabilities |= SMB2_NT_FIND | SMB2_LARGE_FILES; + /* + * SMB3.0 supports only 1 cipher and doesn't have a encryption neg context + * Set the cipher type manually. + */ + if (server->dialect == SMB30_PROT_ID && (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) + server->cipher_type = SMB2_ENCRYPTION_AES128_CCM; + security_blob = smb2_get_data_area_len(&blob_offset, &blob_length, (struct smb2_sync_hdr *)rsp); /* From eb0688180549e3b72464e9f78df58cb7a5592c7f Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 21 May 2021 06:35:52 +0000 Subject: [PATCH 662/730] cifs: fix string declarations and assignments in tracepoints We missed using the variable length string macros in several tracepoints. Fixed them in this change. There's probably more useful macros that we can use to print others like flags etc. But I'll submit sepawrate patches for those at a future date. Signed-off-by: Shyam Prasad N Cc: # v5.12 Signed-off-by: Steve French --- fs/cifs/trace.h | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index d6df908dccad..dafcb6ab050d 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -12,6 +12,11 @@ #include +/* + * Please use this 3-part article as a reference for writing new tracepoints: + * https://lwn.net/Articles/379903/ + */ + /* For logging errors in read or write */ DECLARE_EVENT_CLASS(smb3_rw_err_class, TP_PROTO(unsigned int xid, @@ -529,16 +534,16 @@ DECLARE_EVENT_CLASS(smb3_exit_err_class, TP_ARGS(xid, func_name, rc), TP_STRUCT__entry( __field(unsigned int, xid) - __field(const char *, func_name) + __string(func_name, func_name) __field(int, rc) ), TP_fast_assign( __entry->xid = xid; - __entry->func_name = func_name; + __assign_str(func_name, func_name); __entry->rc = rc; ), TP_printk("\t%s: xid=%u rc=%d", - __entry->func_name, __entry->xid, __entry->rc) + __get_str(func_name), __entry->xid, __entry->rc) ) #define DEFINE_SMB3_EXIT_ERR_EVENT(name) \ @@ -583,14 +588,14 @@ DECLARE_EVENT_CLASS(smb3_enter_exit_class, TP_ARGS(xid, func_name), TP_STRUCT__entry( __field(unsigned int, xid) - __field(const char *, func_name) + __string(func_name, func_name) ), TP_fast_assign( __entry->xid = xid; - __entry->func_name = func_name; + __assign_str(func_name, func_name); ), TP_printk("\t%s: xid=%u", - __entry->func_name, __entry->xid) + __get_str(func_name), __entry->xid) ) #define DEFINE_SMB3_ENTER_EXIT_EVENT(name) \ @@ -857,16 +862,16 @@ DECLARE_EVENT_CLASS(smb3_reconnect_class, TP_STRUCT__entry( __field(__u64, currmid) __field(__u64, conn_id) - __field(char *, hostname) + __string(hostname, hostname) ), TP_fast_assign( __entry->currmid = currmid; __entry->conn_id = conn_id; - __entry->hostname = hostname; + __assign_str(hostname, hostname); ), TP_printk("conn_id=0x%llx server=%s current_mid=%llu", __entry->conn_id, - __entry->hostname, + __get_str(hostname), __entry->currmid) ) @@ -891,7 +896,7 @@ DECLARE_EVENT_CLASS(smb3_credit_class, TP_STRUCT__entry( __field(__u64, currmid) __field(__u64, conn_id) - __field(char *, hostname) + __string(hostname, hostname) __field(int, credits) __field(int, credits_to_add) __field(int, in_flight) @@ -899,7 +904,7 @@ DECLARE_EVENT_CLASS(smb3_credit_class, TP_fast_assign( __entry->currmid = currmid; __entry->conn_id = conn_id; - __entry->hostname = hostname; + __assign_str(hostname, hostname); __entry->credits = credits; __entry->credits_to_add = credits_to_add; __entry->in_flight = in_flight; @@ -907,7 +912,7 @@ DECLARE_EVENT_CLASS(smb3_credit_class, TP_printk("conn_id=0x%llx server=%s current_mid=%llu " "credits=%d credit_change=%d in_flight=%d", __entry->conn_id, - __entry->hostname, + __get_str(hostname), __entry->currmid, __entry->credits, __entry->credits_to_add, From 72ab7b6bb1a60bfc7baba1864fa28383dab4f862 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 20 May 2021 20:00:50 +0100 Subject: [PATCH 663/730] i2c: busses: i2c-nomadik: Fix formatting issue pertaining to 'timeout' Fixes the following W=1 kernel build warning(s): drivers/i2c/busses/i2c-nomadik.c:184: warning: Function parameter or member 'timeout' not described in 'nmk_i2c_dev' Signed-off-by: Lee Jones Reviewed-by: Linus Walleij Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-nomadik.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index dc77e1c4e80f..a2d12a5b1c34 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -159,7 +159,7 @@ struct i2c_nmk_client { * @clk_freq: clock frequency for the operation mode * @tft: Tx FIFO Threshold in bytes * @rft: Rx FIFO Threshold in bytes - * @timeout Slave response timeout (ms) + * @timeout: Slave response timeout (ms) * @sm: speed mode * @stop: stop condition. * @xfer_complete: acknowledge completion for a I2C message. From 45ce82f5eaedd5868b366d09d921a3205166d625 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 20 May 2021 20:00:51 +0100 Subject: [PATCH 664/730] i2c: muxes: i2c-arb-gpio-challenge: Demote non-conformant kernel-doc headers Fixes the following W=1 kernel build warning(s): drivers/i2c/muxes/i2c-arb-gpio-challenge.c:43: warning: Function parameter or member 'muxc' not described in 'i2c_arbitrator_select' drivers/i2c/muxes/i2c-arb-gpio-challenge.c:43: warning: Function parameter or member 'chan' not described in 'i2c_arbitrator_select' drivers/i2c/muxes/i2c-arb-gpio-challenge.c:86: warning: Function parameter or member 'muxc' not described in 'i2c_arbitrator_deselect' drivers/i2c/muxes/i2c-arb-gpio-challenge.c:86: warning: Function parameter or member 'chan' not described in 'i2c_arbitrator_deselect' Signed-off-by: Lee Jones Acked-by: Douglas Anderson Acked-by: Peter Rosin Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-arb-gpio-challenge.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/muxes/i2c-arb-gpio-challenge.c b/drivers/i2c/muxes/i2c-arb-gpio-challenge.c index 6dc88902c189..1c78657631f4 100644 --- a/drivers/i2c/muxes/i2c-arb-gpio-challenge.c +++ b/drivers/i2c/muxes/i2c-arb-gpio-challenge.c @@ -34,7 +34,7 @@ struct i2c_arbitrator_data { }; -/** +/* * i2c_arbitrator_select - claim the I2C bus * * Use the GPIO-based signalling protocol; return -EBUSY if we fail. @@ -77,7 +77,7 @@ static int i2c_arbitrator_select(struct i2c_mux_core *muxc, u32 chan) return -EBUSY; } -/** +/* * i2c_arbitrator_deselect - release the I2C bus * * Release the I2C bus using the GPIO-based signalling protocol. From f09aa114c4aff5c5b170be3498b63a006ea46f92 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 20 May 2021 20:00:52 +0100 Subject: [PATCH 665/730] i2c: busses: i2c-ali1563: File headers are not good candidates for kernel-doc Fixes the following W=1 kernel build warning(s): drivers/i2c/busses/i2c-ali1563.c:24: warning: expecting prototype for i2c(). Prototype was for ALI1563_MAX_TIMEOUT() instead Signed-off-by: Lee Jones Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-ali1563.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-ali1563.c b/drivers/i2c/busses/i2c-ali1563.c index 4d12e3da12f0..55a9e93fbfeb 100644 --- a/drivers/i2c/busses/i2c-ali1563.c +++ b/drivers/i2c/busses/i2c-ali1563.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/** +/* * i2c-ali1563.c - i2c driver for the ALi 1563 Southbridge * * Copyright (C) 2004 Patrick Mochel From 6eb8a473693149f814a5082f395e130e75d41d57 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 20 May 2021 20:00:55 +0100 Subject: [PATCH 666/730] i2c: busses: i2c-cadence: Fix incorrectly documented 'enum cdns_i2c_slave_mode' Fixes the following W=1 kernel build warning(s): drivers/i2c/busses/i2c-cadence.c:157: warning: expecting prototype for enum cdns_i2c_slave_mode. Prototype was for enum cdns_i2c_slave_state instead Signed-off-by: Lee Jones Reviewed-by: Michal Simek Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cadence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index c1bbc4caeb5c..66aafa7d1123 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -144,7 +144,7 @@ enum cdns_i2c_mode { }; /** - * enum cdns_i2c_slave_mode - Slave state when I2C is operating in slave mode + * enum cdns_i2c_slave_state - Slave state when I2C is operating in slave mode * * @CDNS_I2C_SLAVE_STATE_IDLE: I2C slave idle * @CDNS_I2C_SLAVE_STATE_SEND: I2C slave sending data to master From b4c760de3cedd41e63797b7eea73baf2a165dde2 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 20 May 2021 20:00:56 +0100 Subject: [PATCH 667/730] i2c: busses: i2c-designware-master: Fix misnaming of 'i2c_dw_init_master()' Fixes the following W=1 kernel build warning(s): drivers/i2c/busses/i2c-designware-master.c:176: warning: expecting prototype for i2c_dw_init(). Prototype was for i2c_dw_init_master() instead Signed-off-by: Lee Jones Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index 13be1d678c39..9b08bb5df38d 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -165,7 +165,7 @@ static int i2c_dw_set_timings_master(struct dw_i2c_dev *dev) } /** - * i2c_dw_init() - Initialize the designware I2C master hardware + * i2c_dw_init_master() - Initialize the designware I2C master hardware * @dev: device private data * * This functions configures and enables the I2C master. From f9f193fc222bd5352a414ba34406303cfedd2c5e Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 20 May 2021 20:00:57 +0100 Subject: [PATCH 668/730] i2c: busses: i2c-eg20t: Fix 'bad line' issue and provide description for 'msgs' param Fixes the following W=1 kernel build warning(s): drivers/i2c/busses/i2c-eg20t.c:151: warning: bad line: PCH i2c controller drivers/i2c/busses/i2c-eg20t.c:369: warning: Function parameter or member 'msgs' not described in 'pch_i2c_writebytes' Signed-off-by: Lee Jones Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-eg20t.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index 843b31a0f752..321b2770feab 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -148,7 +148,7 @@ struct i2c_algo_pch_data { /** * struct adapter_info - This structure holds the adapter information for the - PCH i2c controller + * PCH i2c controller * @pch_data: stores a list of i2c_algo_pch_data * @pch_i2c_suspended: specifies whether the system is suspended or not * perhaps with more lines and words. @@ -358,6 +358,7 @@ static void pch_i2c_repstart(struct i2c_algo_pch_data *adap) /** * pch_i2c_writebytes() - write data to I2C bus in normal mode * @i2c_adap: Pointer to the struct i2c_adapter. + * @msgs: Pointer to the i2c message structure. * @last: specifies whether last message or not. * In the case of compound mode it will be 1 for last message, * otherwise 0. From d4c73d41bef08f6d7878cb3e55d7e50df13d02c1 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 20 May 2021 20:00:59 +0100 Subject: [PATCH 669/730] i2c: busses: i2c-ocores: Place the expected function names into the documentation headers Fixes the following W=1 kernel build warning(s): drivers/i2c/busses/i2c-ocores.c:253: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst drivers/i2c/busses/i2c-ocores.c:267: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst drivers/i2c/busses/i2c-ocores.c:299: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst drivers/i2c/busses/i2c-ocores.c:347: warning: expecting prototype for It handles an IRQ(). Prototype was for ocores_process_polling() instead Signed-off-by: Lee Jones Reviewed-by: Andrew Lunn Reviewed-by: Peter Korsgaard Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-ocores.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c index 273222e38056..a0af027db04c 100644 --- a/drivers/i2c/busses/i2c-ocores.c +++ b/drivers/i2c/busses/i2c-ocores.c @@ -250,7 +250,7 @@ static irqreturn_t ocores_isr(int irq, void *dev_id) } /** - * Process timeout event + * ocores_process_timeout() - Process timeout event * @i2c: ocores I2C device instance */ static void ocores_process_timeout(struct ocores_i2c *i2c) @@ -264,7 +264,7 @@ static void ocores_process_timeout(struct ocores_i2c *i2c) } /** - * Wait until something change in a given register + * ocores_wait() - Wait until something change in a given register * @i2c: ocores I2C device instance * @reg: register to query * @mask: bitmask to apply on register value @@ -296,7 +296,7 @@ static int ocores_wait(struct ocores_i2c *i2c, } /** - * Wait until is possible to process some data + * ocores_poll_wait() - Wait until is possible to process some data * @i2c: ocores I2C device instance * * Used when the device is in polling mode (interrupts disabled). @@ -334,7 +334,7 @@ static int ocores_poll_wait(struct ocores_i2c *i2c) } /** - * It handles an IRQ-less transfer + * ocores_process_polling() - It handles an IRQ-less transfer * @i2c: ocores I2C device instance * * Even if IRQ are disabled, the I2C OpenCore IP behavior is exactly the same From 3e0f8672f1685ed1fbbc4b3388fe8093e43e9783 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 20 May 2021 20:01:00 +0100 Subject: [PATCH 670/730] i2c: busses: i2c-pnx: Provide descriptions for 'alg_data' data structure Fixes the following W=1 kernel build warning(s): drivers/i2c/busses/i2c-pnx.c:147: warning: Function parameter or member 'alg_data' not described in 'i2c_pnx_start' drivers/i2c/busses/i2c-pnx.c:147: warning: Excess function parameter 'adap' description in 'i2c_pnx_start' drivers/i2c/busses/i2c-pnx.c:202: warning: Function parameter or member 'alg_data' not described in 'i2c_pnx_stop' drivers/i2c/busses/i2c-pnx.c:202: warning: Excess function parameter 'adap' description in 'i2c_pnx_stop' drivers/i2c/busses/i2c-pnx.c:231: warning: Function parameter or member 'alg_data' not described in 'i2c_pnx_master_xmit' drivers/i2c/busses/i2c-pnx.c:231: warning: Excess function parameter 'adap' description in 'i2c_pnx_master_xmit' drivers/i2c/busses/i2c-pnx.c:301: warning: Function parameter or member 'alg_data' not described in 'i2c_pnx_master_rcv' drivers/i2c/busses/i2c-pnx.c:301: warning: Excess function parameter 'adap' description in 'i2c_pnx_master_rcv' Signed-off-by: Lee Jones Acked-by: Vladimir Zapolskiy Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-pnx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index 8c4ec7f13f5a..50f21cdbe90d 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -138,7 +138,7 @@ static inline void i2c_pnx_arm_timer(struct i2c_pnx_algo_data *alg_data) /** * i2c_pnx_start - start a device * @slave_addr: slave address - * @adap: pointer to adapter structure + * @alg_data: pointer to local driver data structure * * Generate a START signal in the desired mode. */ @@ -194,7 +194,7 @@ static int i2c_pnx_start(unsigned char slave_addr, /** * i2c_pnx_stop - stop a device - * @adap: pointer to I2C adapter structure + * @alg_data: pointer to local driver data structure * * Generate a STOP signal to terminate the master transaction. */ @@ -223,7 +223,7 @@ static void i2c_pnx_stop(struct i2c_pnx_algo_data *alg_data) /** * i2c_pnx_master_xmit - transmit data to slave - * @adap: pointer to I2C adapter structure + * @alg_data: pointer to local driver data structure * * Sends one byte of data to the slave */ @@ -293,7 +293,7 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data) /** * i2c_pnx_master_rcv - receive data from slave - * @adap: pointer to I2C adapter structure + * @alg_data: pointer to local driver data structure * * Reads one byte data from the slave */ From 721a6fe5f9584357617b463e687f379412d1c213 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 20 May 2021 20:01:02 +0100 Subject: [PATCH 671/730] i2c: busses: i2c-st: Fix copy/paste function misnaming issues Fixes the following W=1 kernel build warning(s): drivers/i2c/busses/i2c-st.c:531: warning: expecting prototype for st_i2c_handle_write(). Prototype was for st_i2c_handle_read() instead drivers/i2c/busses/i2c-st.c:566: warning: expecting prototype for st_i2c_isr(). Prototype was for st_i2c_isr_thread() instead Fix the "enmpty" typo while here. Signed-off-by: Lee Jones Reviewed-by: Alain Volmat Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-st.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-st.c b/drivers/i2c/busses/i2c-st.c index faa81a95551f..88482316d22a 100644 --- a/drivers/i2c/busses/i2c-st.c +++ b/drivers/i2c/busses/i2c-st.c @@ -524,7 +524,7 @@ static void st_i2c_handle_write(struct st_i2c_dev *i2c_dev) } /** - * st_i2c_handle_write() - Handle FIFO enmpty interrupt in case of read + * st_i2c_handle_read() - Handle FIFO empty interrupt in case of read * @i2c_dev: Controller's private data */ static void st_i2c_handle_read(struct st_i2c_dev *i2c_dev) @@ -558,7 +558,7 @@ static void st_i2c_handle_read(struct st_i2c_dev *i2c_dev) } /** - * st_i2c_isr() - Interrupt routine + * st_i2c_isr_thread() - Interrupt routine * @irq: interrupt number * @data: Controller's private data */ From a00cb25169d508908c6baa886035e0aa9121942a Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 20 May 2021 20:01:03 +0100 Subject: [PATCH 672/730] i2c: busses: i2c-stm32f4: Remove incorrectly placed ' ' from function name Fixes the following W=1 kernel build warning(s): drivers/i2c/busses/i2c-stm32f4.c:321: warning: expecting prototype for stm32f4_i2c_write_ byte()(). Prototype was for stm32f4_i2c_write_byte() instead Signed-off-by: Lee Jones Reviewed-by: Alain Volmat Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-stm32f4.c b/drivers/i2c/busses/i2c-stm32f4.c index 4933fc8ce3fd..eebce7ecef25 100644 --- a/drivers/i2c/busses/i2c-stm32f4.c +++ b/drivers/i2c/busses/i2c-stm32f4.c @@ -313,7 +313,7 @@ static int stm32f4_i2c_wait_free_bus(struct stm32f4_i2c_dev *i2c_dev) } /** - * stm32f4_i2c_write_ byte() - Write a byte in the data register + * stm32f4_i2c_write_byte() - Write a byte in the data register * @i2c_dev: Controller's private data * @byte: Data to write in the register */ From a5063ab976024f72865029646d7c8c9dfa63b595 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Wed, 12 May 2021 09:20:49 +1200 Subject: [PATCH 673/730] dt-bindings: i2c: mpc: Add fsl,i2c-erratum-a004447 flag Document the fsl,i2c-erratum-a004447 flag which indicates the presence of an i2c erratum on some QorIQ SoCs. Signed-off-by: Chris Packham Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-mpc.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/i2c-mpc.yaml b/Documentation/devicetree/bindings/i2c/i2c-mpc.yaml index 7b553d559c83..98c6fcf7bf26 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-mpc.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-mpc.yaml @@ -46,6 +46,13 @@ properties: description: | I2C bus timeout in microseconds + fsl,i2c-erratum-a004447: + $ref: /schemas/types.yaml#/definitions/flag + description: | + Indicates the presence of QorIQ erratum A-004447, which + says that the standard i2c recovery scheme mechanism does + not work and an alternate implementation is needed. + required: - compatible - reg From 7adc7b225cddcfd0f346d10144fd7a3d3d9f9ea7 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Wed, 12 May 2021 09:20:50 +1200 Subject: [PATCH 674/730] powerpc/fsl: set fsl,i2c-erratum-a004447 flag for P2041 i2c controllers The i2c controllers on the P2040/P2041 have an erratum where the documented scheme for i2c bus recovery will not work (A-004447). A different mechanism is needed which is documented in the P2040 Chip Errata Rev Q (latest available at the time of writing). Signed-off-by: Chris Packham Acked-by: Michael Ellerman Signed-off-by: Wolfram Sang --- arch/powerpc/boot/dts/fsl/p2041si-post.dtsi | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi index 872e4485dc3f..ddc018d42252 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi @@ -371,7 +371,23 @@ }; /include/ "qoriq-i2c-0.dtsi" + i2c@118000 { + fsl,i2c-erratum-a004447; + }; + + i2c@118100 { + fsl,i2c-erratum-a004447; + }; + /include/ "qoriq-i2c-1.dtsi" + i2c@119000 { + fsl,i2c-erratum-a004447; + }; + + i2c@119100 { + fsl,i2c-erratum-a004447; + }; + /include/ "qoriq-duart-0.dtsi" /include/ "qoriq-duart-1.dtsi" /include/ "qoriq-gpio-0.dtsi" From 19ae697a1e4edf1d755b413e3aa38da65e2db23b Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Wed, 12 May 2021 09:20:51 +1200 Subject: [PATCH 675/730] powerpc/fsl: set fsl,i2c-erratum-a004447 flag for P1010 i2c controllers The i2c controllers on the P1010 have an erratum where the documented scheme for i2c bus recovery will not work (A-004447). A different mechanism is needed which is documented in the P1010 Chip Errata Rev L. Signed-off-by: Chris Packham Acked-by: Michael Ellerman Signed-off-by: Wolfram Sang --- arch/powerpc/boot/dts/fsl/p1010si-post.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi index c2717f31925a..ccda0a91abf0 100644 --- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi @@ -122,7 +122,15 @@ }; /include/ "pq3-i2c-0.dtsi" + i2c@3000 { + fsl,i2c-erratum-a004447; + }; + /include/ "pq3-i2c-1.dtsi" + i2c@3100 { + fsl,i2c-erratum-a004447; + }; + /include/ "pq3-duart-0.dtsi" /include/ "pq3-espi-0.dtsi" spi0: spi@7000 { From 8f0cdec8b5fd94135d643662506ee94ae9e98785 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Wed, 12 May 2021 09:20:52 +1200 Subject: [PATCH 676/730] i2c: mpc: implement erratum A-004447 workaround The P2040/P2041 has an erratum where the normal i2c recovery mechanism does not work. Implement the alternative recovery mechanism documented in the P2040 Chip Errata Rev Q. Signed-off-by: Chris Packham Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mpc.c | 81 +++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 30d9e89a3db2..dcca9c2396db 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -45,6 +46,7 @@ #define CCR_MTX 0x10 #define CCR_TXAK 0x08 #define CCR_RSTA 0x04 +#define CCR_RSVD 0x02 #define CSR_MCF 0x80 #define CSR_MAAS 0x40 @@ -97,7 +99,7 @@ struct mpc_i2c { u32 block; int rc; int expect_rxack; - + bool has_errata_A004447; }; struct mpc_i2c_divider { @@ -136,6 +138,75 @@ static void mpc_i2c_fixup(struct mpc_i2c *i2c) } } +static int i2c_mpc_wait_sr(struct mpc_i2c *i2c, int mask) +{ + void __iomem *addr = i2c->base + MPC_I2C_SR; + u8 val; + + return readb_poll_timeout(addr, val, val & mask, 0, 100); +} + +/* + * Workaround for Erratum A004447. From the P2040CE Rev Q + * + * 1. Set up the frequency divider and sampling rate. + * 2. I2CCR - a0h + * 3. Poll for I2CSR[MBB] to get set. + * 4. If I2CSR[MAL] is set (an indication that SDA is stuck low), then go to + * step 5. If MAL is not set, then go to step 13. + * 5. I2CCR - 00h + * 6. I2CCR - 22h + * 7. I2CCR - a2h + * 8. Poll for I2CSR[MBB] to get set. + * 9. Issue read to I2CDR. + * 10. Poll for I2CSR[MIF] to be set. + * 11. I2CCR - 82h + * 12. Workaround complete. Skip the next steps. + * 13. Issue read to I2CDR. + * 14. Poll for I2CSR[MIF] to be set. + * 15. I2CCR - 80h + */ +static void mpc_i2c_fixup_A004447(struct mpc_i2c *i2c) +{ + int ret; + u32 val; + + writeccr(i2c, CCR_MEN | CCR_MSTA); + ret = i2c_mpc_wait_sr(i2c, CSR_MBB); + if (ret) { + dev_err(i2c->dev, "timeout waiting for CSR_MBB\n"); + return; + } + + val = readb(i2c->base + MPC_I2C_SR); + + if (val & CSR_MAL) { + writeccr(i2c, 0x00); + writeccr(i2c, CCR_MSTA | CCR_RSVD); + writeccr(i2c, CCR_MEN | CCR_MSTA | CCR_RSVD); + ret = i2c_mpc_wait_sr(i2c, CSR_MBB); + if (ret) { + dev_err(i2c->dev, "timeout waiting for CSR_MBB\n"); + return; + } + val = readb(i2c->base + MPC_I2C_DR); + ret = i2c_mpc_wait_sr(i2c, CSR_MIF); + if (ret) { + dev_err(i2c->dev, "timeout waiting for CSR_MIF\n"); + return; + } + writeccr(i2c, CCR_MEN | CCR_RSVD); + } else { + val = readb(i2c->base + MPC_I2C_DR); + ret = i2c_mpc_wait_sr(i2c, CSR_MIF); + if (ret) { + dev_err(i2c->dev, "timeout waiting for CSR_MIF\n"); + return; + } + writeccr(i2c, CCR_MEN); + } +} + #if defined(CONFIG_PPC_MPC52xx) || defined(CONFIG_PPC_MPC512x) static const struct mpc_i2c_divider mpc_i2c_dividers_52xx[] = { {20, 0x20}, {22, 0x21}, {24, 0x22}, {26, 0x23}, @@ -670,7 +741,10 @@ static int fsl_i2c_bus_recovery(struct i2c_adapter *adap) { struct mpc_i2c *i2c = i2c_get_adapdata(adap); - mpc_i2c_fixup(i2c); + if (i2c->has_errata_A004447) + mpc_i2c_fixup_A004447(i2c); + else + mpc_i2c_fixup(i2c); return 0; } @@ -767,6 +841,9 @@ static int fsl_i2c_probe(struct platform_device *op) } dev_info(i2c->dev, "timeout %u us\n", mpc_ops.timeout * 1000000 / HZ); + if (of_property_read_bool(op->dev.of_node, "fsl,i2c-erratum-a004447")) + i2c->has_errata_A004447 = true; + i2c->adap = mpc_ops; scnprintf(i2c->adap.name, sizeof(i2c->adap.name), "MPC adapter (%s)", of_node_full_name(op->dev.of_node)); From e4d8716c3dcec47f1557024add24e1f3c09eb24b Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 25 May 2021 17:03:36 +0200 Subject: [PATCH 677/730] i2c: i801: Don't generate an interrupt on bus reset Now that the i2c-i801 driver supports interrupts, setting the KILL bit in a attempt to recover from a timed out transaction triggers an interrupt. Unfortunately, the interrupt handler (i801_isr) is not prepared for this situation and will try to process the interrupt as if it was signaling the end of a successful transaction. In the case of a block transaction, this can result in an out-of-range memory access. This condition was reproduced several times by syzbot: https://syzkaller.appspot.com/bug?extid=ed71512d469895b5b34e https://syzkaller.appspot.com/bug?extid=8c8dedc0ba9e03f6c79e https://syzkaller.appspot.com/bug?extid=c8ff0b6d6c73d81b610e https://syzkaller.appspot.com/bug?extid=33f6c360821c399d69eb https://syzkaller.appspot.com/bug?extid=be15dc0b1933f04b043a https://syzkaller.appspot.com/bug?extid=b4d3fd1dfd53e90afd79 So disable interrupts while trying to reset the bus. Interrupts will be enabled again for the following transaction. Fixes: 636752bcb517 ("i2c-i801: Enable IRQ for SMBus transactions") Reported-by: syzbot+b4d3fd1dfd53e90afd79@syzkaller.appspotmail.com Signed-off-by: Jean Delvare Acked-by: Andy Shevchenko Cc: Jarkko Nikula Tested-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 99d446763530..f9e1c2ceaac0 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -395,11 +395,9 @@ static int i801_check_post(struct i801_priv *priv, int status) dev_err(&priv->pci_dev->dev, "Transaction timeout\n"); /* try to stop the current command */ dev_dbg(&priv->pci_dev->dev, "Terminating the current operation\n"); - outb_p(inb_p(SMBHSTCNT(priv)) | SMBHSTCNT_KILL, - SMBHSTCNT(priv)); + outb_p(SMBHSTCNT_KILL, SMBHSTCNT(priv)); usleep_range(1000, 2000); - outb_p(inb_p(SMBHSTCNT(priv)) & (~SMBHSTCNT_KILL), - SMBHSTCNT(priv)); + outb_p(0, SMBHSTCNT(priv)); /* Check if it worked */ status = inb_p(SMBHSTSTS(priv)); From 1bb56810677f26b78d57a3038054943efd334a1c Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Fri, 21 May 2021 17:19:28 +0200 Subject: [PATCH 678/730] cifs: change format of CIFS_FULL_KEY_DUMP ioctl Make CIFS_FULL_KEY_DUMP ioctl able to return variable-length keys. * userspace needs to pass the struct size along with optional session_id and some space at the end to store keys * if there is enough space kernel returns keys in the extra space and sets the length of each key via xyz_key_length fields This also fixes the build error for get_user() on ARM. Sample program: #include #include #include #include #include struct smb3_full_key_debug_info { uint32_t in_size; uint64_t session_id; uint16_t cipher_type; uint8_t session_key_length; uint8_t server_in_key_length; uint8_t server_out_key_length; uint8_t data[]; /* * return this struct with the keys appended at the end: * uint8_t session_key[session_key_length]; * uint8_t server_in_key[server_in_key_length]; * uint8_t server_out_key[server_out_key_length]; */ } __attribute__((packed)); #define CIFS_IOCTL_MAGIC 0xCF #define CIFS_DUMP_FULL_KEY _IOWR(CIFS_IOCTL_MAGIC, 10, struct smb3_full_key_debug_info) void dump(const void *p, size_t len) { const char *hex = "0123456789ABCDEF"; const uint8_t *b = p; for (int i = 0; i < len; i++) printf("%c%c ", hex[(b[i]>>4)&0xf], hex[b[i]&0xf]); putchar('\n'); } int main(int argc, char **argv) { struct smb3_full_key_debug_info *keys; uint8_t buf[sizeof(*keys)+1024] = {0}; size_t off = 0; int fd, rc; keys = (struct smb3_full_key_debug_info *)&buf; keys->in_size = sizeof(buf); fd = open(argv[1], O_RDONLY); if (fd < 0) perror("open"), exit(1); rc = ioctl(fd, CIFS_DUMP_FULL_KEY, keys); if (rc < 0) perror("ioctl"), exit(1); printf("SessionId "); dump(&keys->session_id, 8); printf("Cipher %04x\n", keys->cipher_type); printf("SessionKey "); dump(keys->data+off, keys->session_key_length); off += keys->session_key_length; printf("ServerIn Key "); dump(keys->data+off, keys->server_in_key_length); off += keys->server_in_key_length; printf("ServerOut Key "); dump(keys->data+off, keys->server_out_key_length); return 0; } Usage: $ gcc -o dumpkeys dumpkeys.c Against Windows Server 2020 preview (with AES-256-GCM support): # mount.cifs //$ip/test /mnt -o "username=administrator,password=foo,vers=3.0,seal" # ./dumpkeys /mnt/somefile SessionId 0D 00 00 00 00 0C 00 00 Cipher 0002 SessionKey AB CD CC 0D E4 15 05 0C 6F 3C 92 90 19 F3 0D 25 ServerIn Key 73 C6 6A C8 6B 08 CF A2 CB 8E A5 7D 10 D1 5B DC ServerOut Key 6D 7E 2B A1 71 9D D7 2B 94 7B BA C4 F0 A5 A4 F8 # umount /mnt With 256 bit keys: # echo 1 > /sys/module/cifs/parameters/require_gcm_256 # mount.cifs //$ip/test /mnt -o "username=administrator,password=foo,vers=3.11,seal" # ./dumpkeys /mnt/somefile SessionId 09 00 00 00 00 0C 00 00 Cipher 0004 SessionKey 93 F5 82 3B 2F B7 2A 50 0B B9 BA 26 FB 8C 8B 03 ServerIn Key 6C 6A 89 B2 CB 7B 78 E8 04 93 37 DA 22 53 47 DF B3 2C 5F 02 26 70 43 DB 8D 33 7B DC 66 D3 75 A9 ServerOut Key 04 11 AA D7 52 C7 A8 0F ED E3 93 3A 65 FE 03 AD 3F 63 03 01 2B C0 1B D7 D7 E5 52 19 7F CC 46 B4 Signed-off-by: Aurelien Aptel Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cifs_ioctl.h | 25 ++++++-- fs/cifs/cifspdu.h | 3 +- fs/cifs/ioctl.c | 141 +++++++++++++++++++++++++++++++------------ 3 files changed, 125 insertions(+), 44 deletions(-) diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h index 4a97fe12006b..37fc7d6ac457 100644 --- a/fs/cifs/cifs_ioctl.h +++ b/fs/cifs/cifs_ioctl.h @@ -72,15 +72,28 @@ struct smb3_key_debug_info { } __packed; /* - * Dump full key (32 byte encrypt/decrypt keys instead of 16 bytes) - * is needed if GCM256 (stronger encryption) negotiated + * Dump variable-sized keys */ struct smb3_full_key_debug_info { - __u64 Suid; + /* INPUT: size of userspace buffer */ + __u32 in_size; + + /* + * INPUT: 0 for current user, otherwise session to dump + * OUTPUT: session id that was dumped + */ + __u64 session_id; __u16 cipher_type; - __u8 auth_key[16]; /* SMB2_NTLMV2_SESSKEY_SIZE */ - __u8 smb3encryptionkey[32]; /* SMB3_ENC_DEC_KEY_SIZE */ - __u8 smb3decryptionkey[32]; /* SMB3_ENC_DEC_KEY_SIZE */ + __u8 session_key_length; + __u8 server_in_key_length; + __u8 server_out_key_length; + __u8 data[]; + /* + * return this struct with the keys appended at the end: + * __u8 session_key[session_key_length]; + * __u8 server_in_key[server_in_key_length]; + * __u8 server_out_key[server_out_key_length]; + */ } __packed; struct smb3_notify { diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index b53a87db282f..554d64fe171e 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -148,7 +148,8 @@ #define SMB3_SIGN_KEY_SIZE (16) /* - * Size of the smb3 encryption/decryption keys + * Size of the smb3 encryption/decryption key storage. + * This size is big enough to store any cipher key types. */ #define SMB3_ENC_DEC_KEY_SIZE (32) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 28ec8d7c521a..d67d281ab863 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -33,6 +33,7 @@ #include "cifsfs.h" #include "cifs_ioctl.h" #include "smb2proto.h" +#include "smb2glob.h" #include static long cifs_ioctl_query_info(unsigned int xid, struct file *filep, @@ -214,48 +215,112 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) return 0; } -static int cifs_dump_full_key(struct cifs_tcon *tcon, unsigned long arg) +static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug_info __user *in) { - struct smb3_full_key_debug_info pfull_key_inf; - __u64 suid; - struct list_head *tmp; + struct smb3_full_key_debug_info out; struct cifs_ses *ses; + int rc = 0; bool found = false; + u8 __user *end; - if (!smb3_encryption_required(tcon)) - return -EOPNOTSUPP; + if (!smb3_encryption_required(tcon)) { + rc = -EOPNOTSUPP; + goto out; + } + + /* copy user input into our output buffer */ + if (copy_from_user(&out, in, sizeof(out))) { + rc = -EINVAL; + goto out; + } + + if (!out.session_id) { + /* if ses id is 0, use current user session */ + ses = tcon->ses; + } else { + /* otherwise if a session id is given, look for it in all our sessions */ + struct cifs_ses *ses_it = NULL; + struct TCP_Server_Info *server_it = NULL; - ses = tcon->ses; /* default to user id for current user */ - if (get_user(suid, (__u64 __user *)arg)) - suid = 0; - if (suid) { - /* search to see if there is a session with a matching SMB UID */ spin_lock(&cifs_tcp_ses_lock); - list_for_each(tmp, &tcon->ses->server->smb_ses_list) { - ses = list_entry(tmp, struct cifs_ses, smb_ses_list); - if (ses->Suid == suid) { - found = true; - break; + list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) { + list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) { + if (ses_it->Suid == out.session_id) { + ses = ses_it; + /* + * since we are using the session outside the crit + * section, we need to make sure it won't be released + * so increment its refcount + */ + ses->ses_count++; + found = true; + goto search_end; + } } } +search_end: spin_unlock(&cifs_tcp_ses_lock); - if (found == false) - return -EINVAL; - } /* else uses default user's SMB UID (ie current user) */ + if (!found) { + rc = -ENOENT; + goto out; + } + } - pfull_key_inf.cipher_type = le16_to_cpu(ses->server->cipher_type); - pfull_key_inf.Suid = ses->Suid; - memcpy(pfull_key_inf.auth_key, ses->auth_key.response, - 16 /* SMB2_NTLMV2_SESSKEY_SIZE */); - memcpy(pfull_key_inf.smb3decryptionkey, ses->smb3decryptionkey, - 32 /* SMB3_ENC_DEC_KEY_SIZE */); - memcpy(pfull_key_inf.smb3encryptionkey, - ses->smb3encryptionkey, 32 /* SMB3_ENC_DEC_KEY_SIZE */); - if (copy_to_user((void __user *)arg, &pfull_key_inf, - sizeof(struct smb3_full_key_debug_info))) - return -EFAULT; + switch (ses->server->cipher_type) { + case SMB2_ENCRYPTION_AES128_CCM: + case SMB2_ENCRYPTION_AES128_GCM: + out.session_key_length = CIFS_SESS_KEY_SIZE; + out.server_in_key_length = out.server_out_key_length = SMB3_GCM128_CRYPTKEY_SIZE; + break; + case SMB2_ENCRYPTION_AES256_CCM: + case SMB2_ENCRYPTION_AES256_GCM: + out.session_key_length = CIFS_SESS_KEY_SIZE; + out.server_in_key_length = out.server_out_key_length = SMB3_GCM256_CRYPTKEY_SIZE; + break; + default: + rc = -EOPNOTSUPP; + goto out; + } - return 0; + /* check if user buffer is big enough to store all the keys */ + if (out.in_size < sizeof(out) + out.session_key_length + out.server_in_key_length + + out.server_out_key_length) { + rc = -ENOBUFS; + goto out; + } + + out.session_id = ses->Suid; + out.cipher_type = le16_to_cpu(ses->server->cipher_type); + + /* overwrite user input with our output */ + if (copy_to_user(in, &out, sizeof(out))) { + rc = -EINVAL; + goto out; + } + + /* append all the keys at the end of the user buffer */ + end = in->data; + if (copy_to_user(end, ses->auth_key.response, out.session_key_length)) { + rc = -EINVAL; + goto out; + } + end += out.session_key_length; + + if (copy_to_user(end, ses->smb3encryptionkey, out.server_in_key_length)) { + rc = -EINVAL; + goto out; + } + end += out.server_in_key_length; + + if (copy_to_user(end, ses->smb3decryptionkey, out.server_out_key_length)) { + rc = -EINVAL; + goto out; + } + +out: + if (found) + cifs_put_smb_ses(ses); + return rc; } long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) @@ -371,6 +436,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = -EOPNOTSUPP; break; case CIFS_DUMP_KEY: + /* + * Dump encryption keys. This is an old ioctl that only + * handles AES-128-{CCM,GCM}. + */ if (pSMBFile == NULL) break; if (!capable(CAP_SYS_ADMIN)) { @@ -398,11 +467,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) else rc = 0; break; - /* - * Dump full key (32 bytes instead of 16 bytes) is - * needed if GCM256 (stronger encryption) negotiated - */ case CIFS_DUMP_FULL_KEY: + /* + * Dump encryption keys (handles any key sizes) + */ if (pSMBFile == NULL) break; if (!capable(CAP_SYS_ADMIN)) { @@ -410,8 +478,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) break; } tcon = tlink_tcon(pSMBFile->tlink); - rc = cifs_dump_full_key(tcon, arg); - + rc = cifs_dump_full_key(tcon, (void __user *)arg); break; case CIFS_IOC_NOTIFY: if (!S_ISDIR(inode->i_mode)) { From c673b7f59e940061467200f1746820a178444bd0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 27 May 2021 15:00:52 -0700 Subject: [PATCH 679/730] perf stat: Fix error check for bpf_program__attach It seems the bpf_program__attach() returns a negative error code instead of a NULL pointer in case of error. Fixes: 7fac83aaf2ee ("perf stat: Introduce 'bperf' to share hardware PMCs with BPF") Signed-off-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Song Liu Link: http://lore.kernel.org/lkml/20210527220052.1657578-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_counter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index ddb52f748c8e..974f10e356f0 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -451,10 +451,10 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, goto out; } - err = -1; link = bpf_program__attach(skel->progs.on_switch); - if (!link) { + if (IS_ERR(link)) { pr_err("Failed to attach leader program\n"); + err = PTR_ERR(link); goto out; } From fed1bd51a504eb96caa38b4f13ab138fc169ea75 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Thu, 27 May 2021 20:04:04 +0800 Subject: [PATCH 680/730] i2c: mediatek: Disable i2c start_en and clear intr_stat brfore reset The i2c controller driver do dma reset after transfer timeout, but sometimes dma reset will trigger an unexpected DMA_ERR irq. It will cause the i2c controller to continuously send interrupts to the system and cause soft lock-up. So we need to disable i2c start_en and clear intr_stat to stop i2c controller before dma reset when transfer timeout. Fixes: aafced673c06("i2c: mediatek: move dma reset before i2c reset") Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 5ddfa4e56ee2..4e9fb6b44436 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -479,6 +479,11 @@ static void mtk_i2c_clock_disable(struct mtk_i2c *i2c) static void mtk_i2c_init_hw(struct mtk_i2c *i2c) { u16 control_reg; + u16 intr_stat_reg; + + mtk_i2c_writew(i2c, I2C_CHN_CLR_FLAG, OFFSET_START); + intr_stat_reg = mtk_i2c_readw(i2c, OFFSET_INTR_STAT); + mtk_i2c_writew(i2c, intr_stat_reg, OFFSET_INTR_STAT); if (i2c->dev_comp->apdma_sync) { writel(I2C_DMA_WARM_RST, i2c->pdmabase + OFFSET_RST); From 24990423267ec283b9d86f07f362b753eb9b0ed5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 26 May 2021 08:39:37 -0400 Subject: [PATCH 681/730] i2c: s3c2410: fix possible NULL pointer deref on read message after write Interrupt handler processes multiple message write requests one after another, till the driver message queue is drained. However if driver encounters a read message without preceding START, it stops the I2C transfer as it is an invalid condition for the controller. At least the comment describes a requirement "the controller forces us to send a new START when we change direction". This stop results in clearing the message queue (i2c->msg = NULL). The code however immediately jumped back to label "retry_write" which dereferenced the "i2c->msg" making it a possible NULL pointer dereference. The Coverity analysis: 1. Condition !is_msgend(i2c), taking false branch. if (!is_msgend(i2c)) { 2. Condition !is_lastmsg(i2c), taking true branch. } else if (!is_lastmsg(i2c)) { 3. Condition i2c->msg->flags & 1, taking true branch. if (i2c->msg->flags & I2C_M_RD) { 4. write_zero_model: Passing i2c to s3c24xx_i2c_stop, which sets i2c->msg to NULL. s3c24xx_i2c_stop(i2c, -EINVAL); 5. Jumping to label retry_write. goto retry_write; 6. var_deref_model: Passing i2c to is_msgend, which dereferences null i2c->msg. if (!is_msgend(i2c)) {" All previous calls to s3c24xx_i2c_stop() in this interrupt service routine are followed by jumping to end of function (acknowledging the interrupt and returning). This seems a reasonable choice also here since message buffer was entirely emptied. Addresses-Coverity: Explicit null dereferenced Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-s3c2410.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index ab928613afba..4d82761e1585 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -480,7 +480,10 @@ static int i2c_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat) * forces us to send a new START * when we change direction */ + dev_dbg(i2c->dev, + "missing START before write->read\n"); s3c24xx_i2c_stop(i2c, -EINVAL); + break; } goto retry_write; From 56dde68f85be0a20935bb4ed996db7a7f68b3202 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 28 May 2021 10:58:49 +0200 Subject: [PATCH 682/730] Revert "serial: 8250: 8250_omap: Fix possible interrupt storm" This reverts commit 31fae7c8b18c3f8029a2a5dce97a3182c1a167a0. Tony writes: I just noticed this causes the following regression in Linux next when pressing a key on uart console after boot at least on omap3. This seems to happen on serial_port_in(port, UART_RX) in the quirk handling. So let's drop this. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/YLCCJzkkB4N7LTQS@atomide.com Fixes: 31fae7c8b18c ("serial: 8250: 8250_omap: Fix possible interrupt storm") Reported-by: Tony Lindgren Cc: Jan Kiszka Cc: Vignesh Raghavendra Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index c71bd766fa56..8ac11eaeca51 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -104,9 +104,6 @@ #define UART_OMAP_EFR2 0x23 #define UART_OMAP_EFR2_TIMEOUT_BEHAVE BIT(6) -/* RX FIFO occupancy indicator */ -#define UART_OMAP_RX_LVL 0x64 - struct omap8250_priv { int line; u8 habit; @@ -628,15 +625,6 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) serial8250_rpm_get(up); iir = serial_port_in(port, UART_IIR); ret = serial8250_handle_irq(port, iir); - /* - * It is possible that RX TIMEOUT is signalled after FIFO - * has been drained, in which case a dummy read of RX FIFO is - * required to clear RX TIMEOUT condition. - */ - if ((iir & UART_IIR_RX_TIMEOUT) == UART_IIR_RX_TIMEOUT) { - if (serial_port_in(port, UART_OMAP_RX_LVL) == 0) - serial_port_in(port, UART_RX); - } serial8250_rpm_put(up); return IRQ_RETVAL(ret); From 8fc4e4aa2bfca8d32e8bc2a01526ea2da450e6cb Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Tue, 25 May 2021 12:07:23 +0530 Subject: [PATCH 683/730] perf vendor events powerpc: Fix eventcode of power10 JSON events Fixed the eventcode values in the power10 JSON event files to prepend "0x" since these are hexadecimal values. The patch also changes the event description of the PM_EXEC_STALL_LOAD_FINISH and PM_EXEC_STALL_NTC_FLUSH event and move some events to correct files. Fixes: 32daa5d7899e ("perf vendor events: Initial JSON/events list for power10 platform") Signed-off-by: Kajol Jain Reviewed-by: Paul A. Clarke Tested-by: Nageswara R Sastry Cc: Athira Jajeev Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Ravi Bangoria Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/lkml/20210525063723.1191514-1-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/powerpc/power10/cache.json | 32 ++-- .../arch/powerpc/power10/floating_point.json | 2 +- .../arch/powerpc/power10/frontend.json | 124 ++++++++++------ .../arch/powerpc/power10/locks.json | 4 +- .../arch/powerpc/power10/marked.json | 61 ++++---- .../arch/powerpc/power10/memory.json | 79 +++++----- .../arch/powerpc/power10/others.json | 133 +++++++---------- .../arch/powerpc/power10/pipeline.json | 137 +++++++++--------- .../pmu-events/arch/powerpc/power10/pmc.json | 8 +- .../arch/powerpc/power10/translation.json | 22 +-- 10 files changed, 301 insertions(+), 301 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/power10/cache.json b/tools/perf/pmu-events/arch/powerpc/power10/cache.json index 616f29098c71..605be14f441c 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/cache.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/cache.json @@ -1,46 +1,56 @@ [ { - "EventCode": "1003C", + "EventCode": "0x1003C", "EventName": "PM_EXEC_STALL_DMISS_L2L3", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3." }, { - "EventCode": "34056", - "EventName": "PM_EXEC_STALL_LOAD_FINISH", - "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ." + "EventCode": "0x1E054", + "EventName": "PM_EXEC_STALL_DMISS_L21_L31", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip." }, { - "EventCode": "3006C", + "EventCode": "0x34054", + "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict." + }, + { + "EventCode": "0x34056", + "EventName": "PM_EXEC_STALL_LOAD_FINISH", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction." + }, + { + "EventCode": "0x3006C", "EventName": "PM_RUN_CYC_SMT2_MODE", "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode." }, { - "EventCode": "300F4", + "EventCode": "0x300F4", "EventName": "PM_RUN_INST_CMPL_CONC", "BriefDescription": "PowerPC instructions completed by this thread when all threads in the core had the run-latch set." }, { - "EventCode": "4C016", + "EventCode": "0x4C016", "EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict." }, { - "EventCode": "4D014", + "EventCode": "0x4D014", "EventName": "PM_EXEC_STALL_LOAD", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit." }, { - "EventCode": "4D016", + "EventCode": "0x4D016", "EventName": "PM_EXEC_STALL_PTESYNC", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit." }, { - "EventCode": "401EA", + "EventCode": "0x401EA", "EventName": "PM_THRESH_EXC_128", "BriefDescription": "Threshold counter exceeded a value of 128." }, { - "EventCode": "400F6", + "EventCode": "0x400F6", "EventName": "PM_BR_MPRED_CMPL", "BriefDescription": "A mispredicted branch completed. Includes direction and target." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json index 703cd431ae5b..54acb55e2c8c 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json @@ -1,6 +1,6 @@ [ { - "EventCode": "4016E", + "EventCode": "0x4016E", "EventName": "PM_THRESH_NOT_MET", "BriefDescription": "Threshold counter did not meet threshold." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json index eac8609dcc90..558f9530f54e 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json @@ -1,216 +1,246 @@ [ { - "EventCode": "10004", + "EventCode": "0x10004", "EventName": "PM_EXEC_STALL_TRANSLATION", "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve." }, { - "EventCode": "10010", + "EventCode": "0x10006", + "EventName": "PM_DISP_STALL_HELD_OTHER_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason." + }, + { + "EventCode": "0x10010", "EventName": "PM_PMC4_OVERFLOW", "BriefDescription": "The event selected for PMC4 caused the event counter to overflow." }, { - "EventCode": "10020", + "EventCode": "0x10020", "EventName": "PM_PMC4_REWIND", "BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged." }, { - "EventCode": "10038", + "EventCode": "0x10038", "EventName": "PM_DISP_STALL_TRANSLATION", "BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss." }, { - "EventCode": "1003A", + "EventCode": "0x1003A", "EventName": "PM_DISP_STALL_BR_MPRED_IC_L2", "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict." }, { - "EventCode": "1E050", + "EventCode": "0x1D05E", + "EventName": "PM_DISP_STALL_HELD_HALT_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management." + }, + { + "EventCode": "0x1E050", "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC", "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR." }, { - "EventCode": "1F054", + "EventCode": "0x1F054", "EventName": "PM_DTLB_HIT", "BriefDescription": "The PTE required by the instruction was resident in the TLB (data TLB access). When MMCR1[16]=0 this event counts only demand hits. When MMCR1[16]=1 this event includes demand and prefetch. Applies to both HPT and RPT." }, { - "EventCode": "101E8", + "EventCode": "0x10064", + "EventName": "PM_DISP_STALL_IC_L2", + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2." + }, + { + "EventCode": "0x101E8", "EventName": "PM_THRESH_EXC_256", "BriefDescription": "Threshold counter exceeded a count of 256." }, { - "EventCode": "101EC", + "EventCode": "0x101EC", "EventName": "PM_THRESH_MET", "BriefDescription": "Threshold exceeded." }, { - "EventCode": "100F2", + "EventCode": "0x100F2", "EventName": "PM_1PLUS_PPC_CMPL", "BriefDescription": "Cycles in which at least one instruction is completed by this thread." }, { - "EventCode": "100F6", + "EventCode": "0x100F6", "EventName": "PM_IERAT_MISS", "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event." }, { - "EventCode": "100F8", + "EventCode": "0x100F8", "EventName": "PM_DISP_STALL_CYC", "BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)." }, { - "EventCode": "20114", + "EventCode": "0x20006", + "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue." + }, + { + "EventCode": "0x20114", "EventName": "PM_MRK_L2_RC_DISP", "BriefDescription": "Marked instruction RC dispatched in L2." }, { - "EventCode": "2C010", + "EventCode": "0x2C010", "EventName": "PM_EXEC_STALL_LSU", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions." }, { - "EventCode": "2C016", + "EventCode": "0x2C016", "EventName": "PM_DISP_STALL_IERAT_ONLY_MISS", "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss." }, { - "EventCode": "2C01E", + "EventCode": "0x2C01E", "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3", "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict." }, { - "EventCode": "2D01A", + "EventCode": "0x2D01A", "EventName": "PM_DISP_STALL_IC_MISS", "BriefDescription": "Cycles when dispatch was stalled for this thread due to an Icache Miss." }, { - "EventCode": "2D01C", - "EventName": "PM_CMPL_STALL_STCX", - "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing." - }, - { - "EventCode": "2E018", + "EventCode": "0x2E018", "EventName": "PM_DISP_STALL_FETCH", "BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held." }, { - "EventCode": "2E01A", + "EventCode": "0x2E01A", "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC", "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the XVFC mapper/SRB was full." }, { - "EventCode": "2C142", + "EventCode": "0x2C142", "EventName": "PM_MRK_XFER_FROM_SRC_PMC2", "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "24050", + "EventCode": "0x24050", "EventName": "PM_IOPS_DISP", "BriefDescription": "Internal Operations dispatched. PM_IOPS_DISP / PM_INST_DISP will show the average number of internal operations per PowerPC instruction." }, { - "EventCode": "2405E", + "EventCode": "0x2405E", "EventName": "PM_ISSUE_CANCEL", "BriefDescription": "An instruction issued and the issue was later cancelled. Only one cancel per PowerPC instruction." }, { - "EventCode": "200FA", + "EventCode": "0x200FA", "EventName": "PM_BR_TAKEN_CMPL", "BriefDescription": "Branch Taken instruction completed." }, { - "EventCode": "30012", + "EventCode": "0x30004", + "EventName": "PM_DISP_STALL_FLUSH", + "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC." + }, + { + "EventCode": "0x3000A", + "EventName": "PM_DISP_STALL_ITLB_MISS", + "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss." + }, + { + "EventCode": "0x30012", "EventName": "PM_FLUSH_COMPLETION", "BriefDescription": "The instruction that was next to complete (oldest in the pipeline) did not complete because it suffered a flush." }, { - "EventCode": "30014", + "EventCode": "0x30014", "EventName": "PM_EXEC_STALL_STORE", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit." }, { - "EventCode": "30018", + "EventCode": "0x30018", "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC", "BriefDescription": "Cycles in which the NTC instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together." }, { - "EventCode": "30026", + "EventCode": "0x30026", "EventName": "PM_EXEC_STALL_STORE_MISS", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1." }, { - "EventCode": "3012A", + "EventCode": "0x3012A", "EventName": "PM_MRK_L2_RC_DONE", "BriefDescription": "L2 RC machine completed the transaction for the marked instruction." }, { - "EventCode": "3F046", + "EventCode": "0x3F046", "EventName": "PM_ITLB_HIT_1G", "BriefDescription": "Instruction TLB hit (IERAT reload) page size 1G, which implies Radix Page Table translation is in use. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches." }, { - "EventCode": "34058", + "EventCode": "0x34058", "EventName": "PM_DISP_STALL_BR_MPRED_ICMISS", "BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss." }, { - "EventCode": "3D05C", + "EventCode": "0x3D05C", "EventName": "PM_DISP_STALL_HELD_RENAME_CYC", "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC." }, { - "EventCode": "3E052", + "EventCode": "0x3E052", "EventName": "PM_DISP_STALL_IC_L3", "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3." }, { - "EventCode": "3E054", + "EventCode": "0x3E054", "EventName": "PM_LD_MISS_L1", "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." }, { - "EventCode": "301EA", + "EventCode": "0x301EA", "EventName": "PM_THRESH_EXC_1024", "BriefDescription": "Threshold counter exceeded a value of 1024." }, { - "EventCode": "300FA", + "EventCode": "0x300FA", "EventName": "PM_INST_FROM_L3MISS", "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss." }, { - "EventCode": "40006", + "EventCode": "0x40006", "EventName": "PM_ISSUE_KILL", "BriefDescription": "Cycles in which an instruction or group of instructions were cancelled after being issued. This event increments once per occurrence, regardless of how many instructions are included in the issue group." }, { - "EventCode": "40116", + "EventCode": "0x40116", "EventName": "PM_MRK_LARX_FIN", "BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock." }, { - "EventCode": "4C010", + "EventCode": "0x4C010", "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS", "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch." }, { - "EventCode": "4D01E", + "EventCode": "0x4D01E", "EventName": "PM_DISP_STALL_BR_MPRED", "BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch." }, { - "EventCode": "4E010", + "EventCode": "0x4E010", "EventName": "PM_DISP_STALL_IC_L3MISS", "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3." }, { - "EventCode": "4E01A", + "EventCode": "0x4E01A", "EventName": "PM_DISP_STALL_HELD_CYC", "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason." }, { - "EventCode": "44056", + "EventCode": "0x4003C", + "EventName": "PM_DISP_STALL_HELD_SYNC_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch." + }, + { + "EventCode": "0x44056", "EventName": "PM_VECTOR_ST_CMPL", "BriefDescription": "Vector store instructions completed." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/locks.json b/tools/perf/pmu-events/arch/powerpc/power10/locks.json index 016d8de0e14a..b5a0d6521963 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/locks.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/locks.json @@ -1,11 +1,11 @@ [ { - "EventCode": "1E058", + "EventCode": "0x1E058", "EventName": "PM_STCX_FAIL_FIN", "BriefDescription": "Conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock." }, { - "EventCode": "4E050", + "EventCode": "0x4E050", "EventName": "PM_STCX_PASS_FIN", "BriefDescription": "Conditional store instruction (STCX) passed. LARX and STCX are instructions used to acquire a lock." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/marked.json b/tools/perf/pmu-events/arch/powerpc/power10/marked.json index 93a5a5910648..58b5dfe3a273 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/marked.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/marked.json @@ -1,146 +1,141 @@ [ { - "EventCode": "1002C", + "EventCode": "0x1002C", "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS", "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request." }, { - "EventCode": "10132", + "EventCode": "0x10132", "EventName": "PM_MRK_INST_ISSUED", "BriefDescription": "Marked instruction issued. Note that stores always get issued twice, the address gets issued to the LSU and the data gets issued to the VSU. Also, issues can sometimes get killed/cancelled and cause multiple sequential issues for the same instruction." }, { - "EventCode": "101E0", + "EventCode": "0x101E0", "EventName": "PM_MRK_INST_DISP", "BriefDescription": "The thread has dispatched a randomly sampled marked instruction." }, { - "EventCode": "101E2", + "EventCode": "0x101E2", "EventName": "PM_MRK_BR_TAKEN_CMPL", "BriefDescription": "Marked Branch Taken instruction completed." }, { - "EventCode": "20112", + "EventCode": "0x20112", "EventName": "PM_MRK_NTF_FIN", "BriefDescription": "The marked instruction became the oldest in the pipeline before it finished. It excludes instructions that finish at dispatch." }, { - "EventCode": "2C01C", + "EventCode": "0x2C01C", "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip." }, { - "EventCode": "20138", + "EventCode": "0x20138", "EventName": "PM_MRK_ST_NEST", "BriefDescription": "A store has been sampled/marked and is at the point of execution where it has completed in the core and can no longer be flushed. At this point the store is sent to the L2." }, { - "EventCode": "2013A", + "EventCode": "0x2013A", "EventName": "PM_MRK_BRU_FIN", "BriefDescription": "Marked Branch instruction finished." }, { - "EventCode": "2C144", + "EventCode": "0x2C144", "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC2", "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[15:27]." }, { - "EventCode": "24156", + "EventCode": "0x24156", "EventName": "PM_MRK_STCX_FIN", "BriefDescription": "Marked conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock." }, { - "EventCode": "24158", + "EventCode": "0x24158", "EventName": "PM_MRK_INST", "BriefDescription": "An instruction was marked. Includes both Random Instruction Sampling (RIS) at decode time and Random Event Sampling (RES) at the time the configured event happens." }, { - "EventCode": "2415C", + "EventCode": "0x2415C", "EventName": "PM_MRK_BR_CMPL", "BriefDescription": "A marked branch completed. All branches are included." }, { - "EventCode": "200FD", + "EventCode": "0x200FD", "EventName": "PM_L1_ICACHE_MISS", "BriefDescription": "Demand iCache Miss." }, { - "EventCode": "30130", + "EventCode": "0x30130", "EventName": "PM_MRK_INST_FIN", "BriefDescription": "marked instruction finished. Excludes instructions that finish at dispatch. Note that stores always finish twice since the address gets issued to the LSU and the data gets issued to the VSU." }, { - "EventCode": "34146", + "EventCode": "0x34146", "EventName": "PM_MRK_LD_CMPL", "BriefDescription": "Marked loads completed." }, { - "EventCode": "3E158", + "EventCode": "0x3E158", "EventName": "PM_MRK_STCX_FAIL", "BriefDescription": "Marked conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock." }, { - "EventCode": "3E15A", + "EventCode": "0x3E15A", "EventName": "PM_MRK_ST_FIN", "BriefDescription": "The marked instruction was a store of any kind." }, { - "EventCode": "30068", + "EventCode": "0x30068", "EventName": "PM_L1_ICACHE_RELOADED_PREF", "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)." }, { - "EventCode": "301E4", + "EventCode": "0x301E4", "EventName": "PM_MRK_BR_MPRED_CMPL", "BriefDescription": "Marked Branch Mispredicted. Includes direction and target." }, { - "EventCode": "300F6", + "EventCode": "0x300F6", "EventName": "PM_LD_DEMAND_MISS_L1", "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish." }, { - "EventCode": "300FE", + "EventCode": "0x300FE", "EventName": "PM_DATA_FROM_L3MISS", "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss." }, { - "EventCode": "40012", + "EventCode": "0x40012", "EventName": "PM_L1_ICACHE_RELOADED_ALL", "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch." }, { - "EventCode": "40134", + "EventCode": "0x40134", "EventName": "PM_MRK_INST_TIMEO", "BriefDescription": "Marked instruction finish timeout (instruction was lost)." }, { - "EventCode": "4003C", - "EventName": "PM_DISP_STALL_HELD_SYNC_CYC", - "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch." - }, - { - "EventCode": "4505A", + "EventCode": "0x4505A", "EventName": "PM_SP_FLOP_CMPL", "BriefDescription": "Single Precision floating point instructions completed." }, { - "EventCode": "4D058", + "EventCode": "0x4D058", "EventName": "PM_VECTOR_FLOP_CMPL", "BriefDescription": "Vector floating point instructions completed." }, { - "EventCode": "4D05A", + "EventCode": "0x4D05A", "EventName": "PM_NON_MATH_FLOP_CMPL", "BriefDescription": "Non Math instructions completed." }, { - "EventCode": "401E0", + "EventCode": "0x401E0", "EventName": "PM_MRK_INST_CMPL", "BriefDescription": "marked instruction completed." }, { - "EventCode": "400FE", + "EventCode": "0x400FE", "EventName": "PM_DATA_FROM_MEMORY", "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/memory.json b/tools/perf/pmu-events/arch/powerpc/power10/memory.json index b01141eeebee..843b51f531e9 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/memory.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/memory.json @@ -1,191 +1,186 @@ [ { - "EventCode": "1000A", + "EventCode": "0x1000A", "EventName": "PM_PMC3_REWIND", "BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged." }, { - "EventCode": "1C040", + "EventCode": "0x1C040", "EventName": "PM_XFER_FROM_SRC_PMC1", "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "1C142", + "EventCode": "0x1C142", "EventName": "PM_MRK_XFER_FROM_SRC_PMC1", "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "1C144", + "EventCode": "0x1C144", "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1", "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]." }, { - "EventCode": "1C056", + "EventCode": "0x1C056", "EventName": "PM_DERAT_MISS_4K", "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "1C058", + "EventCode": "0x1C058", "EventName": "PM_DTLB_MISS_16G", "BriefDescription": "Data TLB reload (after a miss) page size 16G. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "1C05C", + "EventCode": "0x1C05C", "EventName": "PM_DTLB_MISS_2M", "BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "1E056", + "EventCode": "0x1E056", "EventName": "PM_EXEC_STALL_STORE_PIPE", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions." }, { - "EventCode": "1F150", + "EventCode": "0x1F150", "EventName": "PM_MRK_ST_L2_CYC", "BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion." }, { - "EventCode": "10062", + "EventCode": "0x10062", "EventName": "PM_LD_L3MISS_PEND_CYC", "BriefDescription": "Cycles L3 miss was pending for this thread." }, { - "EventCode": "20010", + "EventCode": "0x20010", "EventName": "PM_PMC1_OVERFLOW", "BriefDescription": "The event selected for PMC1 caused the event counter to overflow." }, { - "EventCode": "2001A", + "EventCode": "0x2001A", "EventName": "PM_ITLB_HIT", "BriefDescription": "The PTE required to translate the instruction address was resident in the TLB (instruction TLB access/IERAT reload). Applies to both HPT and RPT. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches." }, { - "EventCode": "2003E", + "EventCode": "0x2003E", "EventName": "PM_PTESYNC_FIN", "BriefDescription": "Ptesync instruction finished in the store unit. Only one ptesync can finish at a time." }, { - "EventCode": "2C040", + "EventCode": "0x2C040", "EventName": "PM_XFER_FROM_SRC_PMC2", "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "2C054", + "EventCode": "0x2C054", "EventName": "PM_DERAT_MISS_64K", "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "2C056", + "EventCode": "0x2C056", "EventName": "PM_DTLB_MISS_4K", "BriefDescription": "Data TLB reload (after a miss) page size 4K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "2D154", + "EventCode": "0x2D154", "EventName": "PM_MRK_DERAT_MISS_64K", "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "200F6", + "EventCode": "0x200F6", "EventName": "PM_DERAT_MISS", "BriefDescription": "DERAT Reloaded to satisfy a DERAT miss. All page sizes are counted by this event. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "3000A", - "EventName": "PM_DISP_STALL_ITLB_MISS", - "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss." - }, - { - "EventCode": "30016", + "EventCode": "0x30016", "EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS", "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve." }, { - "EventCode": "3C040", + "EventCode": "0x3C040", "EventName": "PM_XFER_FROM_SRC_PMC3", "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "3C142", + "EventCode": "0x3C142", "EventName": "PM_MRK_XFER_FROM_SRC_PMC3", "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "3C144", + "EventCode": "0x3C144", "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3", "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]." }, { - "EventCode": "3C054", + "EventCode": "0x3C054", "EventName": "PM_DERAT_MISS_16M", "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "3C056", + "EventCode": "0x3C056", "EventName": "PM_DTLB_MISS_64K", "BriefDescription": "Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "3C058", + "EventCode": "0x3C058", "EventName": "PM_LARX_FIN", "BriefDescription": "Load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock." }, { - "EventCode": "301E2", + "EventCode": "0x301E2", "EventName": "PM_MRK_ST_CMPL", "BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores." }, { - "EventCode": "300FC", + "EventCode": "0x300FC", "EventName": "PM_DTLB_MISS", "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. It includes pages of all sizes for demand and prefetch activity." }, { - "EventCode": "4D02C", + "EventCode": "0x4D02C", "EventName": "PM_PMC1_REWIND", "BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged." }, { - "EventCode": "4003E", + "EventCode": "0x4003E", "EventName": "PM_LD_CMPL", "BriefDescription": "Loads completed." }, { - "EventCode": "4C040", + "EventCode": "0x4C040", "EventName": "PM_XFER_FROM_SRC_PMC4", "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "4C142", + "EventCode": "0x4C142", "EventName": "PM_MRK_XFER_FROM_SRC_PMC4", "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "4C144", + "EventCode": "0x4C144", "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4", "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]." }, { - "EventCode": "4C056", + "EventCode": "0x4C056", "EventName": "PM_DTLB_MISS_16M", "BriefDescription": "Data TLB reload (after a miss) page size 16M. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "4C05A", + "EventCode": "0x4C05A", "EventName": "PM_DTLB_MISS_1G", "BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "4C15E", + "EventCode": "0x4C15E", "EventName": "PM_MRK_DTLB_MISS_64K", "BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "4D056", + "EventCode": "0x4D056", "EventName": "PM_NON_FMA_FLOP_CMPL", "BriefDescription": "Non FMA instruction completed." }, { - "EventCode": "40164", + "EventCode": "0x40164", "EventName": "PM_MRK_DERAT_MISS_2M", "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json index a119e56cbf1c..7d0de1a2860b 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/others.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json @@ -1,296 +1,271 @@ [ { - "EventCode": "10016", + "EventCode": "0x10016", "EventName": "PM_VSU0_ISSUE", "BriefDescription": "VSU instructions issued to VSU pipe 0." }, { - "EventCode": "1001C", + "EventCode": "0x1001C", "EventName": "PM_ULTRAVISOR_INST_CMPL", "BriefDescription": "PowerPC instructions that completed while the thread was in ultravisor state." }, { - "EventCode": "100F0", + "EventCode": "0x100F0", "EventName": "PM_CYC", "BriefDescription": "Processor cycles." }, { - "EventCode": "10134", + "EventCode": "0x10134", "EventName": "PM_MRK_ST_DONE_L2", "BriefDescription": "Marked stores completed in L2 (RC machine done)." }, { - "EventCode": "1505E", + "EventCode": "0x1505E", "EventName": "PM_LD_HIT_L1", "BriefDescription": "Loads that finished without experiencing an L1 miss." }, { - "EventCode": "1D05E", - "EventName": "PM_DISP_STALL_HELD_HALT_CYC", - "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management." - }, - { - "EventCode": "1E054", - "EventName": "PM_EXEC_STALL_DMISS_L21_L31", - "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip." - }, - { - "EventCode": "1E05A", - "EventName": "PM_CMPL_STALL_LWSYNC", - "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete." - }, - { - "EventCode": "1F056", + "EventCode": "0x1F056", "EventName": "PM_DISP_SS0_2_INSTR_CYC", "BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions." }, { - "EventCode": "1F15C", + "EventCode": "0x1F15C", "EventName": "PM_MRK_STCX_L2_CYC", "BriefDescription": "Cycles spent in the nest portion of a marked Stcx instruction. It starts counting when the operation starts to drain to the L2 and it stops counting when the instruction retires from the Instruction Completion Table (ICT) in the Instruction Sequencing Unit (ISU)." }, { - "EventCode": "10066", + "EventCode": "0x10066", "EventName": "PM_ADJUNCT_CYC", "BriefDescription": "Cycles in which the thread is in Adjunct state. MSR[S HV PR] bits = 011." }, { - "EventCode": "101E4", + "EventCode": "0x101E4", "EventName": "PM_MRK_L1_ICACHE_MISS", "BriefDescription": "Marked Instruction suffered an icache Miss." }, { - "EventCode": "101EA", + "EventCode": "0x101EA", "EventName": "PM_MRK_L1_RELOAD_VALID", "BriefDescription": "Marked demand reload." }, { - "EventCode": "100F4", + "EventCode": "0x100F4", "EventName": "PM_FLOP_CMPL", "BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops." }, { - "EventCode": "100FA", + "EventCode": "0x100FA", "EventName": "PM_RUN_LATCH_ANY_THREAD_CYC", "BriefDescription": "Cycles when at least one thread has the run latch set." }, { - "EventCode": "100FC", + "EventCode": "0x100FC", "EventName": "PM_LD_REF_L1", "BriefDescription": "All L1 D cache load references counted at finish, gated by reject. In P9 and earlier this event counted only cacheable loads but in P10 both cacheable and non-cacheable loads are included." }, { - "EventCode": "20006", - "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC", - "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue." - }, - { - "EventCode": "2000C", + "EventCode": "0x2000C", "EventName": "PM_RUN_LATCH_ALL_THREADS_CYC", "BriefDescription": "Cycles when the run latch is set for all threads." }, { - "EventCode": "2E010", + "EventCode": "0x2E010", "EventName": "PM_ADJUNCT_INST_CMPL", "BriefDescription": "PowerPC instructions that completed while the thread is in Adjunct state." }, { - "EventCode": "2E014", + "EventCode": "0x2E014", "EventName": "PM_STCX_FIN", "BriefDescription": "Conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock." }, { - "EventCode": "20130", + "EventCode": "0x20130", "EventName": "PM_MRK_INST_DECODED", "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only." }, { - "EventCode": "20132", + "EventCode": "0x20132", "EventName": "PM_MRK_DFU_ISSUE", "BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time." }, { - "EventCode": "20134", + "EventCode": "0x20134", "EventName": "PM_MRK_FXU_ISSUE", "BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time." }, { - "EventCode": "2505C", + "EventCode": "0x2505C", "EventName": "PM_VSU_ISSUE", "BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations." }, { - "EventCode": "2F054", + "EventCode": "0x2F054", "EventName": "PM_DISP_SS1_2_INSTR_CYC", "BriefDescription": "Cycles in which Superslice 1 dispatches either 1 or 2 instructions." }, { - "EventCode": "2F056", + "EventCode": "0x2F056", "EventName": "PM_DISP_SS1_4_INSTR_CYC", "BriefDescription": "Cycles in which Superslice 1 dispatches either 3 or 4 instructions." }, { - "EventCode": "2006C", + "EventCode": "0x2006C", "EventName": "PM_RUN_CYC_SMT4_MODE", "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode." }, { - "EventCode": "201E0", + "EventCode": "0x201E0", "EventName": "PM_MRK_DATA_FROM_MEMORY", "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load." }, { - "EventCode": "201E4", + "EventCode": "0x201E4", "EventName": "PM_MRK_DATA_FROM_L3MISS", "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load." }, { - "EventCode": "201E8", + "EventCode": "0x201E8", "EventName": "PM_THRESH_EXC_512", "BriefDescription": "Threshold counter exceeded a value of 512." }, { - "EventCode": "200F2", + "EventCode": "0x200F2", "EventName": "PM_INST_DISP", "BriefDescription": "PowerPC instructions dispatched." }, { - "EventCode": "30132", + "EventCode": "0x30132", "EventName": "PM_MRK_VSU_FIN", "BriefDescription": "VSU marked instructions finished. Excludes simple FX instructions issued to the Store Unit." }, { - "EventCode": "30038", + "EventCode": "0x30038", "EventName": "PM_EXEC_STALL_DMISS_LMEM", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCapp cache, or local OpenCapp memory." }, { - "EventCode": "3F04A", + "EventCode": "0x3F04A", "EventName": "PM_LSU_ST5_FIN", "BriefDescription": "LSU Finished an internal operation in ST2 port." }, { - "EventCode": "34054", - "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT", - "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict." - }, - { - "EventCode": "3405A", + "EventCode": "0x3405A", "EventName": "PM_PRIVILEGED_INST_CMPL", "BriefDescription": "PowerPC Instructions that completed while the thread is in Privileged state." }, { - "EventCode": "3F150", + "EventCode": "0x3F150", "EventName": "PM_MRK_ST_DRAIN_CYC", "BriefDescription": "cycles to drain st from core to L2." }, { - "EventCode": "3F054", + "EventCode": "0x3F054", "EventName": "PM_DISP_SS0_4_INSTR_CYC", "BriefDescription": "Cycles in which Superslice 0 dispatches either 3 or 4 instructions." }, { - "EventCode": "3F056", + "EventCode": "0x3F056", "EventName": "PM_DISP_SS0_8_INSTR_CYC", "BriefDescription": "Cycles in which Superslice 0 dispatches either 5, 6, 7 or 8 instructions." }, { - "EventCode": "30162", + "EventCode": "0x30162", "EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD", "BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill." }, { - "EventCode": "40114", + "EventCode": "0x40114", "EventName": "PM_MRK_START_PROBE_NOP_DISP", "BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0." }, { - "EventCode": "4001C", + "EventCode": "0x4001C", "EventName": "PM_VSU_FIN", "BriefDescription": "VSU instructions finished." }, { - "EventCode": "4C01A", + "EventCode": "0x4C01A", "EventName": "PM_EXEC_STALL_DMISS_OFF_NODE", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip." }, { - "EventCode": "4D012", + "EventCode": "0x4D012", "EventName": "PM_PMC3_SAVED", "BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged." }, { - "EventCode": "4D022", + "EventCode": "0x4D022", "EventName": "PM_HYPERVISOR_INST_CMPL", "BriefDescription": "PowerPC instructions that completed while the thread is in hypervisor state." }, { - "EventCode": "4D026", + "EventCode": "0x4D026", "EventName": "PM_ULTRAVISOR_CYC", "BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110." }, { - "EventCode": "4D028", + "EventCode": "0x4D028", "EventName": "PM_PRIVILEGED_CYC", "BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00." }, { - "EventCode": "40030", + "EventCode": "0x40030", "EventName": "PM_INST_FIN", "BriefDescription": "Instructions finished." }, { - "EventCode": "44146", + "EventCode": "0x44146", "EventName": "PM_MRK_STCX_CORE_CYC", "BriefDescription": "Cycles spent in the core portion of a marked Stcx instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2." }, { - "EventCode": "44054", + "EventCode": "0x44054", "EventName": "PM_VECTOR_LD_CMPL", "BriefDescription": "Vector load instructions completed." }, { - "EventCode": "45054", + "EventCode": "0x45054", "EventName": "PM_FMA_CMPL", "BriefDescription": "Two floating point instructions completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only." }, { - "EventCode": "45056", + "EventCode": "0x45056", "EventName": "PM_SCALAR_FLOP_CMPL", "BriefDescription": "Scalar floating point instructions completed." }, { - "EventCode": "4505C", + "EventCode": "0x4505C", "EventName": "PM_MATH_FLOP_CMPL", "BriefDescription": "Math floating point instructions completed." }, { - "EventCode": "4D05E", + "EventCode": "0x4D05E", "EventName": "PM_BR_CMPL", "BriefDescription": "A branch completed. All branches are included." }, { - "EventCode": "4E15E", + "EventCode": "0x4E15E", "EventName": "PM_MRK_INST_FLUSHED", "BriefDescription": "The marked instruction was flushed." }, { - "EventCode": "401E6", + "EventCode": "0x401E6", "EventName": "PM_MRK_INST_FROM_L3MISS", "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked instruction." }, { - "EventCode": "401E8", + "EventCode": "0x401E8", "EventName": "PM_MRK_DATA_FROM_L2MISS", "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss for a marked load." }, { - "EventCode": "400F0", + "EventCode": "0x400F0", "EventName": "PM_LD_DEMAND_MISS_L1_FIN", "BriefDescription": "Load Missed L1, counted at finish time." }, { - "EventCode": "400FA", + "EventCode": "0x400FA", "EventName": "PM_RUN_INST_CMPL", "BriefDescription": "Completed PowerPC instructions gated by the run latch." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json index b61b5cc157ee..b8aded6045fa 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json @@ -1,296 +1,291 @@ [ { - "EventCode": "100FE", + "EventCode": "0x100FE", "EventName": "PM_INST_CMPL", "BriefDescription": "PowerPC instructions completed." }, { - "EventCode": "10006", - "EventName": "PM_DISP_STALL_HELD_OTHER_CYC", - "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason." - }, - { - "EventCode": "1000C", + "EventCode": "0x1000C", "EventName": "PM_LSU_LD0_FIN", "BriefDescription": "LSU Finished an internal operation in LD0 port." }, { - "EventCode": "1000E", + "EventCode": "0x1000E", "EventName": "PM_MMA_ISSUED", "BriefDescription": "MMA instructions issued." }, { - "EventCode": "10012", + "EventCode": "0x10012", "EventName": "PM_LSU_ST0_FIN", "BriefDescription": "LSU Finished an internal operation in ST0 port." }, { - "EventCode": "10014", + "EventCode": "0x10014", "EventName": "PM_LSU_ST4_FIN", "BriefDescription": "LSU Finished an internal operation in ST4 port." }, { - "EventCode": "10018", + "EventCode": "0x10018", "EventName": "PM_IC_DEMAND_CYC", "BriefDescription": "Cycles in which an instruction reload is pending to satisfy a demand miss." }, { - "EventCode": "10022", + "EventCode": "0x10022", "EventName": "PM_PMC2_SAVED", "BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged." }, { - "EventCode": "10024", + "EventCode": "0x10024", "EventName": "PM_PMC5_OVERFLOW", "BriefDescription": "The event selected for PMC5 caused the event counter to overflow." }, { - "EventCode": "10058", + "EventCode": "0x10058", "EventName": "PM_EXEC_STALL_FIN_AT_DISP", "BriefDescription": "Cycles in which the oldest instruction in the pipeline finished at dispatch and did not require execution in the LSU, BRU or VSU." }, { - "EventCode": "1005A", + "EventCode": "0x1005A", "EventName": "PM_FLUSH_MPRED", "BriefDescription": "A flush occurred due to a mispredicted branch. Includes target and direction." }, { - "EventCode": "1C05A", + "EventCode": "0x1C05A", "EventName": "PM_DERAT_MISS_2M", "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "10064", - "EventName": "PM_DISP_STALL_IC_L2", - "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2." + "EventCode": "0x1E05A", + "EventName": "PM_CMPL_STALL_LWSYNC", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete." }, { - "EventCode": "10068", + "EventCode": "0x10068", "EventName": "PM_BR_FIN", "BriefDescription": "A branch instruction finished. Includes predicted/mispredicted/unconditional." }, { - "EventCode": "1006A", + "EventCode": "0x1006A", "EventName": "PM_FX_LSU_FIN", "BriefDescription": "Simple fixed point instruction issued to the store unit. Measured at finish time." }, { - "EventCode": "1006C", + "EventCode": "0x1006C", "EventName": "PM_RUN_CYC_ST_MODE", "BriefDescription": "Cycles when the run latch is set and the core is in ST mode." }, { - "EventCode": "20004", + "EventCode": "0x20004", "EventName": "PM_ISSUE_STALL", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was dispatched but not issued yet." }, { - "EventCode": "2000A", + "EventCode": "0x2000A", "EventName": "PM_HYPERVISOR_CYC", "BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010." }, { - "EventCode": "2000E", + "EventCode": "0x2000E", "EventName": "PM_LSU_LD1_FIN", "BriefDescription": "LSU Finished an internal operation in LD1 port." }, { - "EventCode": "2C014", + "EventCode": "0x2C014", "EventName": "PM_CMPL_STALL_SPECIAL", "BriefDescription": "Cycles in which the oldest instruction in the pipeline required special handling before completing." }, { - "EventCode": "2C018", + "EventCode": "0x2C018", "EventName": "PM_EXEC_STALL_DMISS_L3MISS", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a source beyond the local L2 or local L3." }, { - "EventCode": "2D010", + "EventCode": "0x2D010", "EventName": "PM_LSU_ST1_FIN", "BriefDescription": "LSU Finished an internal operation in ST1 port." }, { - "EventCode": "2D012", + "EventCode": "0x2D012", "EventName": "PM_VSU1_ISSUE", "BriefDescription": "VSU instructions issued to VSU pipe 1." }, { - "EventCode": "2D018", + "EventCode": "0x2D018", "EventName": "PM_EXEC_STALL_VSU", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the VSU (includes FXU, VSU, CRU)." }, { - "EventCode": "2E01E", - "EventName": "PM_EXEC_STALL_NTC_FLUSH", - "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children." + "EventCode": "0x2D01C", + "EventName": "PM_CMPL_STALL_STCX", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing." }, { - "EventCode": "2013C", + "EventCode": "0x2E01E", + "EventName": "PM_EXEC_STALL_NTC_FLUSH", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous NTF instruction is still completing and the new NTF instruction is stalled at dispatch." + }, + { + "EventCode": "0x2013C", "EventName": "PM_MRK_FX_LSU_FIN", "BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time." }, { - "EventCode": "2405A", + "EventCode": "0x2405A", "EventName": "PM_NTC_FIN", "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. Note that instructions can finish out of order, therefore not all the instructions that finish have a Next-to-complete status." }, { - "EventCode": "201E2", + "EventCode": "0x201E2", "EventName": "PM_MRK_LD_MISS_L1", "BriefDescription": "Marked DL1 Demand Miss counted at finish time." }, { - "EventCode": "200F4", + "EventCode": "0x200F4", "EventName": "PM_RUN_CYC", "BriefDescription": "Processor cycles gated by the run latch." }, { - "EventCode": "30004", - "EventName": "PM_DISP_STALL_FLUSH", - "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC." - }, - { - "EventCode": "30008", + "EventCode": "0x30008", "EventName": "PM_EXEC_STALL", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting to finish in one of the execution units (BRU, LSU, VSU). Only cycles between issue and finish are counted in this category." }, { - "EventCode": "3001A", + "EventCode": "0x3001A", "EventName": "PM_LSU_ST2_FIN", "BriefDescription": "LSU Finished an internal operation in ST2 port." }, { - "EventCode": "30020", + "EventCode": "0x30020", "EventName": "PM_PMC2_REWIND", "BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged." }, { - "EventCode": "30022", + "EventCode": "0x30022", "EventName": "PM_PMC4_SAVED", "BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged." }, { - "EventCode": "30024", + "EventCode": "0x30024", "EventName": "PM_PMC6_OVERFLOW", "BriefDescription": "The event selected for PMC6 caused the event counter to overflow." }, { - "EventCode": "30028", + "EventCode": "0x30028", "EventName": "PM_CMPL_STALL_MEM_ECC", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC." }, { - "EventCode": "30036", + "EventCode": "0x30036", "EventName": "PM_EXEC_STALL_SIMPLE_FX", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a simple fixed point instruction executing in the Load Store Unit." }, { - "EventCode": "3003A", + "EventCode": "0x3003A", "EventName": "PM_CMPL_STALL_EXCEPTION", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete." }, { - "EventCode": "3F044", + "EventCode": "0x3F044", "EventName": "PM_VSU2_ISSUE", "BriefDescription": "VSU instructions issued to VSU pipe 2." }, { - "EventCode": "30058", + "EventCode": "0x30058", "EventName": "PM_TLBIE_FIN", "BriefDescription": "TLBIE instructions finished in the LSU. Two TLBIEs can finish each cycle. All will be counted." }, { - "EventCode": "3D058", + "EventCode": "0x3D058", "EventName": "PM_SCALAR_FSQRT_FDIV_ISSUE", "BriefDescription": "Scalar versions of four floating point operations: fdiv,fsqrt (xvdivdp, xvdivsp, xvsqrtdp, xvsqrtsp)." }, { - "EventCode": "30066", + "EventCode": "0x30066", "EventName": "PM_LSU_FIN", "BriefDescription": "LSU Finished an internal operation (up to 4 per cycle)." }, { - "EventCode": "40004", + "EventCode": "0x40004", "EventName": "PM_FXU_ISSUE", "BriefDescription": "A fixed point instruction was issued to the VSU." }, { - "EventCode": "40008", + "EventCode": "0x40008", "EventName": "PM_NTC_ALL_FIN", "BriefDescription": "Cycles in which both instructions in the ICT entry pair show as finished. These are the cycles between finish and completion for the oldest pair of instructions in the pipeline." }, { - "EventCode": "40010", + "EventCode": "0x40010", "EventName": "PM_PMC3_OVERFLOW", "BriefDescription": "The event selected for PMC3 caused the event counter to overflow." }, { - "EventCode": "4C012", + "EventCode": "0x4C012", "EventName": "PM_EXEC_STALL_DERAT_ONLY_MISS", "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered an ERAT miss and waited for it resolve." }, { - "EventCode": "4C018", + "EventCode": "0x4C018", "EventName": "PM_CMPL_STALL", "BriefDescription": "Cycles in which the oldest instruction in the pipeline cannot complete because the thread was blocked for any reason." }, { - "EventCode": "4C01E", + "EventCode": "0x4C01E", "EventName": "PM_LSU_ST3_FIN", "BriefDescription": "LSU Finished an internal operation in ST3 port." }, { - "EventCode": "4D018", + "EventCode": "0x4D018", "EventName": "PM_EXEC_STALL_BRU", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Branch unit." }, { - "EventCode": "4D01A", + "EventCode": "0x4D01A", "EventName": "PM_CMPL_STALL_HWSYNC", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a hwsync waiting for response from L2 before completing." }, { - "EventCode": "4D01C", + "EventCode": "0x4D01C", "EventName": "PM_EXEC_STALL_TLBIEL", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIEL instruction executing in the Load Store Unit. TLBIEL instructions have lower overhead than TLBIE instructions because they don't get set to the nest." }, { - "EventCode": "4E012", + "EventCode": "0x4E012", "EventName": "PM_EXEC_STALL_UNKNOWN", "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the NTF finishes and completions came too close together." }, { - "EventCode": "4D020", + "EventCode": "0x4D020", "EventName": "PM_VSU3_ISSUE", "BriefDescription": "VSU instruction was issued to VSU pipe 3." }, { - "EventCode": "40132", + "EventCode": "0x40132", "EventName": "PM_MRK_LSU_FIN", "BriefDescription": "LSU marked instruction finish." }, { - "EventCode": "45058", + "EventCode": "0x45058", "EventName": "PM_IC_MISS_CMPL", "BriefDescription": "Non-speculative icache miss, counted at completion." }, { - "EventCode": "4D050", + "EventCode": "0x4D050", "EventName": "PM_VSU_NON_FLOP_CMPL", "BriefDescription": "Non-floating point VSU instructions completed." }, { - "EventCode": "4D052", + "EventCode": "0x4D052", "EventName": "PM_2FLOP_CMPL", "BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed." }, { - "EventCode": "400F2", + "EventCode": "0x400F2", "EventName": "PM_1PLUS_PPC_DISP", "BriefDescription": "Cycles at least one Instr Dispatched." }, { - "EventCode": "400F8", + "EventCode": "0x400F8", "EventName": "PM_FLUSH", "BriefDescription": "Flush (any type)." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json index ea122a91ceb0..b5d1bd39cfb2 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json @@ -1,21 +1,21 @@ [ { - "EventCode": "301E8", + "EventCode": "0x301E8", "EventName": "PM_THRESH_EXC_64", "BriefDescription": "Threshold counter exceeded a value of 64." }, { - "EventCode": "45050", + "EventCode": "0x45050", "EventName": "PM_1FLOP_CMPL", "BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)." }, { - "EventCode": "45052", + "EventCode": "0x45052", "EventName": "PM_4FLOP_CMPL", "BriefDescription": "Four floating point instructions completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)." }, { - "EventCode": "4D054", + "EventCode": "0x4D054", "EventName": "PM_8FLOP_CMPL", "BriefDescription": "Four Double Precision vector instructions completed." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/translation.json b/tools/perf/pmu-events/arch/powerpc/power10/translation.json index 5a714e3dd71a..db3766dca07c 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/translation.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/translation.json @@ -1,56 +1,56 @@ [ { - "EventCode": "1F15E", + "EventCode": "0x1F15E", "EventName": "PM_MRK_START_PROBE_NOP_CMPL", "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed." }, { - "EventCode": "20016", + "EventCode": "0x20016", "EventName": "PM_ST_FIN", "BriefDescription": "Store finish count. Includes speculative activity." }, { - "EventCode": "20018", + "EventCode": "0x20018", "EventName": "PM_ST_FWD", "BriefDescription": "Store forwards that finished." }, { - "EventCode": "2011C", + "EventCode": "0x2011C", "EventName": "PM_MRK_NTF_CYC", "BriefDescription": "Cycles during which the marked instruction is the oldest in the pipeline (NTF or NTC)." }, { - "EventCode": "2E01C", + "EventCode": "0x2E01C", "EventName": "PM_EXEC_STALL_TLBIE", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit." }, { - "EventCode": "201E6", + "EventCode": "0x201E6", "EventName": "PM_THRESH_EXC_32", "BriefDescription": "Threshold counter exceeded a value of 32." }, { - "EventCode": "200F0", + "EventCode": "0x200F0", "EventName": "PM_ST_CMPL", "BriefDescription": "Stores completed from S2Q (2nd-level store queue). This event includes regular stores, stcx and cache inhibited stores. The following operations are excluded (pteupdate, snoop tlbie complete, store atomics, miso, load atomic payloads, tlbie, tlbsync, slbieg, isync, msgsnd, slbiag, cpabort, copy, tcheck, tend, stsync, dcbst, icbi, dcbf, hwsync, lwsync, ptesync, eieio, msgsync)." }, { - "EventCode": "200FE", + "EventCode": "0x200FE", "EventName": "PM_DATA_FROM_L2MISS", "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss." }, { - "EventCode": "30010", + "EventCode": "0x30010", "EventName": "PM_PMC2_OVERFLOW", "BriefDescription": "The event selected for PMC2 caused the event counter to overflow." }, { - "EventCode": "4D010", + "EventCode": "0x4D010", "EventName": "PM_PMC1_SAVED", "BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged." }, { - "EventCode": "4D05C", + "EventCode": "0x4D05C", "EventName": "PM_DPP_FLOP_CMPL", "BriefDescription": "Double-Precision or Quad-Precision instructions completed." } From 8aa0ae439966364da86fc6437375e32f2890c4c3 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 19 Apr 2021 08:18:09 +0200 Subject: [PATCH 684/730] MAINTAINERS: adjust to removing i2c designware platform data Commit 5a517b5bf687 ("i2c: designware: Get rid of legacy platform data") removes ./include/linux/platform_data/i2c-designware.h, but misses to adjust the SYNOPSYS DESIGNWARE I2C DRIVER section in MAINTAINERS. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains: warning: no file matches F: include/linux/platform_data/i2c-designware.h Remove the file entry to this removed file as well. Signed-off-by: Lukas Bulwahn Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 81e1edeceae4..e686cf614262 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17662,7 +17662,6 @@ R: Mika Westerberg L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-designware-* -F: include/linux/platform_data/i2c-designware.h SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER M: Jaehoon Chung From aac902925ea646e461c95edc98a8a57eb0def917 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Mon, 17 May 2021 12:39:05 -0700 Subject: [PATCH 685/730] Documentation: seccomp: Fix user notification documentation The documentation had some previously incorrect information about how userspace notifications (and responses) were handled due to a change from a previously proposed patchset. Signed-off-by: Sargun Dhillon Acked-by: Tycho Andersen Acked-by: Christian Brauner Signed-off-by: Kees Cook Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210517193908.3113-2-sargun@sargun.me --- Documentation/userspace-api/seccomp_filter.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index bd9165241b6c..6efb41cc8072 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -250,14 +250,14 @@ Users can read via ``ioctl(SECCOMP_IOCTL_NOTIF_RECV)`` (or ``poll()``) on a seccomp notification fd to receive a ``struct seccomp_notif``, which contains five members: the input length of the structure, a unique-per-filter ``id``, the ``pid`` of the task which triggered this request (which may be 0 if the -task is in a pid ns not visible from the listener's pid namespace), a ``flags`` -member which for now only has ``SECCOMP_NOTIF_FLAG_SIGNALED``, representing -whether or not the notification is a result of a non-fatal signal, and the -``data`` passed to seccomp. Userspace can then make a decision based on this -information about what to do, and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a -response, indicating what should be returned to userspace. The ``id`` member of -``struct seccomp_notif_resp`` should be the same ``id`` as in ``struct -seccomp_notif``. +task is in a pid ns not visible from the listener's pid namespace). The +notification also contains the ``data`` passed to seccomp, and a filters flag. +The structure should be zeroed out prior to calling the ioctl. + +Userspace can then make a decision based on this information about what to do, +and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be +returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should +be the same ``id`` as in ``struct seccomp_notif``. It is worth noting that ``struct seccomp_data`` contains the values of register arguments to the syscall, but does not contain pointers to memory. The task's From ddc473916955f7710d1eb17c1273d91c8622a9fe Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Mon, 17 May 2021 12:39:06 -0700 Subject: [PATCH 686/730] seccomp: Refactor notification handler to prepare for new semantics This refactors the user notification code to have a do / while loop around the completion condition. This has a small change in semantic, in that previously we ignored addfd calls upon wakeup if the notification had been responded to, but instead with the new change we check for an outstanding addfd calls prior to returning to userspace. Rodrigo Campos also identified a bug that can result in addfd causing an early return, when the supervisor didn't actually handle the syscall [1]. [1]: https://lore.kernel.org/lkml/20210413160151.3301-1-rodrigo@kinvolk.io/ Fixes: 7cf97b125455 ("seccomp: Introduce addfd ioctl to seccomp user notifier") Signed-off-by: Sargun Dhillon Acked-by: Tycho Andersen Acked-by: Christian Brauner Signed-off-by: Kees Cook Tested-by: Rodrigo Campos Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210517193908.3113-3-sargun@sargun.me --- kernel/seccomp.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 6ecd3f3a52b5..9f58049ac16d 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1105,28 +1105,30 @@ static int seccomp_do_user_notification(int this_syscall, up(&match->notif->request); wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM); - mutex_unlock(&match->notify_lock); /* * This is where we wait for a reply from userspace. */ -wait: - err = wait_for_completion_interruptible(&n.ready); - mutex_lock(&match->notify_lock); - if (err == 0) { - /* Check if we were woken up by a addfd message */ + do { + mutex_unlock(&match->notify_lock); + err = wait_for_completion_interruptible(&n.ready); + mutex_lock(&match->notify_lock); + if (err != 0) + goto interrupted; + addfd = list_first_entry_or_null(&n.addfd, struct seccomp_kaddfd, list); - if (addfd && n.state != SECCOMP_NOTIFY_REPLIED) { + /* Check if we were woken up by a addfd message */ + if (addfd) seccomp_handle_addfd(addfd); - mutex_unlock(&match->notify_lock); - goto wait; - } - ret = n.val; - err = n.error; - flags = n.flags; - } + } while (n.state != SECCOMP_NOTIFY_REPLIED); + + ret = n.val; + err = n.error; + flags = n.flags; + +interrupted: /* If there were any pending addfd calls, clear them out */ list_for_each_entry_safe(addfd, tmp, &n.addfd, list) { /* The process went away before we got a chance to handle it */ From 8124c8a6b35386f73523d27eacb71b5364a68c4c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 30 May 2021 11:58:25 -1000 Subject: [PATCH 687/730] Linux 5.13-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 82ef373caf4d..b79e0e8acbe3 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Frozen Wasteland # *DOCUMENTATION* From 0e5cb7770684b4c81bcc63f4675e488f9a0e31eb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 27 Feb 2021 10:23:45 +0000 Subject: [PATCH 688/730] irqchip/gic: Split vGIC probing information from the GIC code The vGIC advertising code is unsurprisingly very much tied to the GIC implementations. However, we are about to extend the support to lesser implementations. Let's dissociate the vgic registration from the GIC code and move it into KVM, where it makes a bit more sense. This also allows us to mark the gic_kvm_info structures as __initdata. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 18 +++++++++-- drivers/irqchip/irq-gic-common.c | 13 -------- drivers/irqchip/irq-gic-common.h | 2 -- drivers/irqchip/irq-gic-v3.c | 6 ++-- drivers/irqchip/irq-gic.c | 6 ++-- include/linux/irqchip/arm-gic-common.h | 25 +--------------- include/linux/irqchip/arm-vgic-info.h | 41 ++++++++++++++++++++++++++ 7 files changed, 63 insertions(+), 48 deletions(-) create mode 100644 include/linux/irqchip/arm-vgic-info.h diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 58cbda00e56d..2fdb65529594 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -482,6 +482,16 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) return IRQ_HANDLED; } +static struct gic_kvm_info *gic_kvm_info; + +void __init vgic_set_kvm_info(const struct gic_kvm_info *info) +{ + BUG_ON(gic_kvm_info != NULL); + gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL); + if (gic_kvm_info) + *gic_kvm_info = *info; +} + /** * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware * @@ -509,10 +519,8 @@ void kvm_vgic_init_cpu_hardware(void) */ int kvm_vgic_hyp_init(void) { - const struct gic_kvm_info *gic_kvm_info; int ret; - gic_kvm_info = gic_get_kvm_info(); if (!gic_kvm_info) return -ENODEV; @@ -536,10 +544,14 @@ int kvm_vgic_hyp_init(void) ret = -ENODEV; } + kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; + + kfree(gic_kvm_info); + gic_kvm_info = NULL; + if (ret) return ret; - kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c index f47b41dfd023..a610821c8ff2 100644 --- a/drivers/irqchip/irq-gic-common.c +++ b/drivers/irqchip/irq-gic-common.c @@ -12,19 +12,6 @@ static DEFINE_RAW_SPINLOCK(irq_controller_lock); -static const struct gic_kvm_info *gic_kvm_info; - -const struct gic_kvm_info *gic_get_kvm_info(void) -{ - return gic_kvm_info; -} - -void gic_set_kvm_info(const struct gic_kvm_info *info) -{ - BUG_ON(gic_kvm_info != NULL); - gic_kvm_info = info; -} - void gic_enable_of_quirks(const struct device_node *np, const struct gic_quirk *quirks, void *data) { diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h index ccba8b0fe0f5..27e3d4ed4f32 100644 --- a/drivers/irqchip/irq-gic-common.h +++ b/drivers/irqchip/irq-gic-common.h @@ -28,6 +28,4 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, void gic_enable_of_quirks(const struct device_node *np, const struct gic_quirk *quirks, void *data); -void gic_set_kvm_info(const struct gic_kvm_info *info); - #endif /* _IRQ_GIC_COMMON_H */ diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 37a23aa6de37..453fc425eede 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -103,7 +103,7 @@ EXPORT_SYMBOL(gic_nonsecure_priorities); /* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */ static refcount_t *ppi_nmi_refs; -static struct gic_kvm_info gic_v3_kvm_info; +static struct gic_kvm_info gic_v3_kvm_info __initdata; static DEFINE_PER_CPU(bool, has_rss); #define MPIDR_RS(mpidr) (((mpidr) & 0xF0UL) >> 4) @@ -1852,7 +1852,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; - gic_set_kvm_info(&gic_v3_kvm_info); + vgic_set_kvm_info(&gic_v3_kvm_info); } static int __init gic_of_init(struct device_node *node, struct device_node *parent) @@ -2168,7 +2168,7 @@ static void __init gic_acpi_setup_kvm_info(void) gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; - gic_set_kvm_info(&gic_v3_kvm_info); + vgic_set_kvm_info(&gic_v3_kvm_info); } static int __init diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index b1d9c22caf2e..2de9ec8ece0c 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -119,7 +119,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly; -static struct gic_kvm_info gic_v2_kvm_info; +static struct gic_kvm_info gic_v2_kvm_info __initdata; static DEFINE_PER_CPU(u32, sgi_intid); @@ -1451,7 +1451,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) return; if (static_branch_likely(&supports_deactivate_key)) - gic_set_kvm_info(&gic_v2_kvm_info); + vgic_set_kvm_info(&gic_v2_kvm_info); } int __init @@ -1618,7 +1618,7 @@ static void __init gic_acpi_setup_kvm_info(void) gic_v2_kvm_info.maint_irq = irq; - gic_set_kvm_info(&gic_v2_kvm_info); + vgic_set_kvm_info(&gic_v2_kvm_info); } static int __init gic_v2_acpi_init(union acpi_subtable_headers *header, diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h index fa8c0455c352..1177f3a1aed5 100644 --- a/include/linux/irqchip/arm-gic-common.h +++ b/include/linux/irqchip/arm-gic-common.h @@ -7,8 +7,7 @@ #ifndef __LINUX_IRQCHIP_ARM_GIC_COMMON_H #define __LINUX_IRQCHIP_ARM_GIC_COMMON_H -#include -#include +#include #define GICD_INT_DEF_PRI 0xa0 #define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ @@ -16,28 +15,6 @@ (GICD_INT_DEF_PRI << 8) |\ GICD_INT_DEF_PRI) -enum gic_type { - GIC_V2, - GIC_V3, -}; - -struct gic_kvm_info { - /* GIC type */ - enum gic_type type; - /* Virtual CPU interface */ - struct resource vcpu; - /* Interrupt number */ - unsigned int maint_irq; - /* Virtual control interface */ - struct resource vctrl; - /* vlpi support */ - bool has_v4; - /* rvpeid support */ - bool has_v4_1; -}; - -const struct gic_kvm_info *gic_get_kvm_info(void); - struct irq_domain; struct fwnode_handle; int gicv2m_init(struct fwnode_handle *parent_handle, diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h new file mode 100644 index 000000000000..a25d4da5697d --- /dev/null +++ b/include/linux/irqchip/arm-vgic-info.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * include/linux/irqchip/arm-vgic-info.h + * + * Copyright (C) 2016 ARM Limited, All Rights Reserved. + */ +#ifndef __LINUX_IRQCHIP_ARM_VGIC_INFO_H +#define __LINUX_IRQCHIP_ARM_VGIC_INFO_H + +#include +#include + +enum gic_type { + /* Full GICv2 */ + GIC_V2, + /* Full GICv3, optionally with v2 compat */ + GIC_V3, +}; + +struct gic_kvm_info { + /* GIC type */ + enum gic_type type; + /* Virtual CPU interface */ + struct resource vcpu; + /* Interrupt number */ + unsigned int maint_irq; + /* Virtual control interface */ + struct resource vctrl; + /* vlpi support */ + bool has_v4; + /* rvpeid support */ + bool has_v4_1; +}; + +#ifdef CONFIG_KVM +void vgic_set_kvm_info(const struct gic_kvm_info *info); +#else +static inline void vgic_set_kvm_info(const struct gic_kvm_info *info) {} +#endif + +#endif From 74501499d4e0d4ba59ab2bc6be1873716549169d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 19 Feb 2021 16:39:31 +0000 Subject: [PATCH 689/730] KVM: arm64: Handle physical FIQ as an IRQ while running a guest As we we now entertain the possibility of FIQ being used on the host, treat the signalling of a FIQ while running a guest as an IRQ, causing an exit instead of a HYP panic. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/hyp-entry.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 5f49df4ffdd8..9aa9b73475c9 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -76,6 +76,7 @@ el1_trap: b __guest_exit el1_irq: +el1_fiq: get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_IRQ b __guest_exit @@ -131,7 +132,6 @@ SYM_CODE_END(\label) invalid_vector el2t_error_invalid invalid_vector el2h_irq_invalid invalid_vector el2h_fiq_invalid - invalid_vector el1_fiq_invalid .ltorg @@ -179,12 +179,12 @@ SYM_CODE_START(__kvm_hyp_vector) valid_vect el1_sync // Synchronous 64-bit EL1 valid_vect el1_irq // IRQ 64-bit EL1 - invalid_vect el1_fiq_invalid // FIQ 64-bit EL1 + valid_vect el1_fiq // FIQ 64-bit EL1 valid_vect el1_error // Error 64-bit EL1 valid_vect el1_sync // Synchronous 32-bit EL1 valid_vect el1_irq // IRQ 32-bit EL1 - invalid_vect el1_fiq_invalid // FIQ 32-bit EL1 + valid_vect el1_fiq // FIQ 32-bit EL1 valid_vect el1_error // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_vector) From 669062d2a1aa36661b490683fe17810aa24a9cfb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 28 Feb 2021 11:09:59 +0000 Subject: [PATCH 690/730] KVM: arm64: vgic: Be tolerant to the lack of maintenance interrupt masking As it turns out, not all the interrupt controllers are able to expose a vGIC maintenance interrupt that can be independently enabled/disabled. And to be fair, it doesn't really matter as all we require is for the interrupt to kick us out of guest mode out way or another. To that effect, add gic_kvm_info.no_maint_irq_mask for an interrupt controller to advertise the lack of masking. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 8 +++++++- include/linux/irqchip/arm-vgic-info.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 2fdb65529594..6752d084934d 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -519,12 +519,15 @@ void kvm_vgic_init_cpu_hardware(void) */ int kvm_vgic_hyp_init(void) { + bool has_mask; int ret; if (!gic_kvm_info) return -ENODEV; - if (!gic_kvm_info->maint_irq) { + has_mask = !gic_kvm_info->no_maint_irq_mask; + + if (has_mask && !gic_kvm_info->maint_irq) { kvm_err("No vgic maintenance irq\n"); return -ENXIO; } @@ -552,6 +555,9 @@ int kvm_vgic_hyp_init(void) if (ret) return ret; + if (!has_mask) + return 0; + ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h index a25d4da5697d..7c0d08ebb82c 100644 --- a/include/linux/irqchip/arm-vgic-info.h +++ b/include/linux/irqchip/arm-vgic-info.h @@ -24,6 +24,8 @@ struct gic_kvm_info { struct resource vcpu; /* Interrupt number */ unsigned int maint_irq; + /* No interrupt mask, no need to use the above field */ + bool no_maint_irq_mask; /* Virtual control interface */ struct resource vctrl; /* vlpi support */ From f6c3e24fb721dda247f6691c809d6e6c413f22c7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 21:56:47 +0000 Subject: [PATCH 691/730] KVM: arm64: vgic: Let an interrupt controller advertise lack of HW deactivation The vGIC, as architected by ARM, allows a virtual interrupt to trigger the deactivation of a physical interrupt. This allows the following interrupt to be delivered without requiring an exit. However, some implementations have choosen not to implement this, meaning that we will need some unsavoury workarounds to deal with this. On detecting such a case, taint the kernel and spit a nastygram. We'll deal with this in later patches. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 10 ++++++++++ include/kvm/arm_vgic.h | 3 +++ include/linux/irqchip/arm-vgic-info.h | 2 ++ 3 files changed, 15 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 6752d084934d..340c51d87677 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -532,6 +532,16 @@ int kvm_vgic_hyp_init(void) return -ENXIO; } + /* + * If we get one of these oddball non-GICs, taint the kernel, + * as we have no idea of how they *really* behave. + */ + if (gic_kvm_info->no_hw_deactivation) { + kvm_info("Non-architectural vgic, tainting kernel\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + kvm_vgic_global_state.no_hw_deactivation = true; + } + switch (gic_kvm_info->type) { case GIC_V2: ret = vgic_v2_probe(gic_kvm_info); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ec621180ef09..e45b26e8d479 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -72,6 +72,9 @@ struct vgic_global { bool has_gicv4; bool has_gicv4_1; + /* Pseudo GICv3 from outer space */ + bool no_hw_deactivation; + /* GIC system register CPU interface */ struct static_key_false gicv3_cpuif; diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h index 7c0d08ebb82c..a75b2c7de69d 100644 --- a/include/linux/irqchip/arm-vgic-info.h +++ b/include/linux/irqchip/arm-vgic-info.h @@ -32,6 +32,8 @@ struct gic_kvm_info { bool has_v4; /* rvpeid support */ bool has_v4_1; + /* Deactivation impared, subpar stuff */ + bool no_hw_deactivation; }; #ifdef CONFIG_KVM From db75f1a33f82ad332b6e139c5960e01999969d2c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Mar 2021 17:39:39 +0000 Subject: [PATCH 692/730] KVM: arm64: vgic: move irq->get_input_level into an ops structure We already have the option to attach a callback to an interrupt to retrieve its pending state. As we are planning to expand this facility, move this callback into its own data structure. This will limit the size of individual interrupts as the ops structures can be shared across multiple interrupts. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 8 ++++++-- arch/arm64/kvm/vgic/vgic.c | 14 +++++++------- include/kvm/arm_vgic.h | 28 +++++++++++++++++----------- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 74e0699661e9..e2288b6bf435 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1116,6 +1116,10 @@ bool kvm_arch_timer_get_input_level(int vintid) return kvm_timer_should_fire(timer); } +static struct irq_ops arch_timer_irq_ops = { + .get_input_level = kvm_arch_timer_get_input_level, +}; + int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); @@ -1143,7 +1147,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) ret = kvm_vgic_map_phys_irq(vcpu, map.direct_vtimer->host_timer_irq, map.direct_vtimer->irq.irq, - kvm_arch_timer_get_input_level); + &arch_timer_irq_ops); if (ret) return ret; @@ -1151,7 +1155,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) ret = kvm_vgic_map_phys_irq(vcpu, map.direct_ptimer->host_timer_irq, map.direct_ptimer->irq.irq, - kvm_arch_timer_get_input_level); + &arch_timer_irq_ops); } if (ret) diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 15b666200f0b..111bff47e471 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -182,8 +182,8 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq) BUG_ON(!irq->hw); - if (irq->get_input_level) - return irq->get_input_level(irq->intid); + if (irq->ops && irq->ops->get_input_level) + return irq->ops->get_input_level(irq->intid); WARN_ON(irq_get_irqchip_state(irq->host_irq, IRQCHIP_STATE_PENDING, @@ -480,7 +480,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, /* @irq->irq_lock must be held */ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, unsigned int host_irq, - bool (*get_input_level)(int vindid)) + struct irq_ops *ops) { struct irq_desc *desc; struct irq_data *data; @@ -500,7 +500,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, irq->hw = true; irq->host_irq = host_irq; irq->hwintid = data->hwirq; - irq->get_input_level = get_input_level; + irq->ops = ops; return 0; } @@ -509,11 +509,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) { irq->hw = false; irq->hwintid = 0; - irq->get_input_level = NULL; + irq->ops = NULL; } int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, - u32 vintid, bool (*get_input_level)(int vindid)) + u32 vintid, struct irq_ops *ops) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); unsigned long flags; @@ -522,7 +522,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, BUG_ON(!irq); raw_spin_lock_irqsave(&irq->irq_lock, flags); - ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level); + ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops); raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index e45b26e8d479..e5f06df000f2 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -92,6 +92,21 @@ enum vgic_irq_config { VGIC_CONFIG_LEVEL }; +/* + * Per-irq ops overriding some common behavious. + * + * Always called in non-preemptible section and the functions can use + * kvm_arm_get_running_vcpu() to get the vcpu pointer for private IRQs. + */ +struct irq_ops { + /* + * Callback function pointer to in-kernel devices that can tell us the + * state of the input level of mapped level-triggered IRQ faster than + * peaking into the physical GIC. + */ + bool (*get_input_level)(int vintid); +}; + struct vgic_irq { raw_spinlock_t irq_lock; /* Protects the content of the struct */ struct list_head lpi_list; /* Used to link all LPIs together */ @@ -129,16 +144,7 @@ struct vgic_irq { u8 group; /* 0 == group 0, 1 == group 1 */ enum vgic_irq_config config; /* Level or edge */ - /* - * Callback function pointer to in-kernel devices that can tell us the - * state of the input level of mapped level-triggered IRQ faster than - * peaking into the physical GIC. - * - * Always called in non-preemptible section and the functions can use - * kvm_arm_get_running_vcpu() to get the vcpu pointer for private - * IRQs. - */ - bool (*get_input_level)(int vintid); + struct irq_ops *ops; void *owner; /* Opaque pointer to reserve an interrupt for in-kernel devices. */ @@ -355,7 +361,7 @@ void kvm_vgic_init_cpu_hardware(void); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, bool level, void *owner); int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, - u32 vintid, bool (*get_input_level)(int vindid)); + u32 vintid, struct irq_ops *ops); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid); From 354920e79441c8a53ac73008b06d3b70ed06eb34 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 13:11:58 +0000 Subject: [PATCH 693/730] KVM: arm64: vgic: Implement SW-driven deactivation In order to deal with these systems that do not offer HW-based deactivation of interrupts, let implement a SW-based approach: - When the irq is queued into a LR, treat it as a pure virtual interrupt and set the EOI flag in the LR. - When the interrupt state is read back from the LR, force a deactivation when the state is invalid (neither active nor pending) Interrupts requiring such treatment get the VGIC_SW_RESAMPLE flag. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-v2.c | 19 +++++++++++++++---- arch/arm64/kvm/vgic/vgic-v3.c | 19 +++++++++++++++---- include/kvm/arm_vgic.h | 10 ++++++++++ 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 11934c2af2f4..2c580204f1dc 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -108,11 +108,22 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) * If this causes us to lower the level, we have to also clear * the physical active state, since we will otherwise never be * told when the interrupt becomes asserted again. + * + * Another case is when the interrupt requires a helping hand + * on deactivation (no HW deactivation, for example). */ - if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) { - irq->line_level = vgic_get_phys_line_level(irq); + if (vgic_irq_is_mapped_level(irq)) { + bool resample = false; - if (!irq->line_level) + if (val & GICH_LR_PENDING_BIT) { + irq->line_level = vgic_get_phys_line_level(irq); + resample = !irq->line_level; + } else if (vgic_irq_needs_resampling(irq) && + !(irq->active || irq->pending_latch)) { + resample = true; + } + + if (resample) vgic_irq_set_phys_active(irq, false); } @@ -152,7 +163,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (irq->group) val |= GICH_LR_GROUP1; - if (irq->hw) { + if (irq->hw && !vgic_irq_needs_resampling(irq)) { val |= GICH_LR_HW; val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; /* diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 41ecf219c333..66004f61cd83 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -101,11 +101,22 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) * If this causes us to lower the level, we have to also clear * the physical active state, since we will otherwise never be * told when the interrupt becomes asserted again. + * + * Another case is when the interrupt requires a helping hand + * on deactivation (no HW deactivation, for example). */ - if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) { - irq->line_level = vgic_get_phys_line_level(irq); + if (vgic_irq_is_mapped_level(irq)) { + bool resample = false; - if (!irq->line_level) + if (val & ICH_LR_PENDING_BIT) { + irq->line_level = vgic_get_phys_line_level(irq); + resample = !irq->line_level; + } else if (vgic_irq_needs_resampling(irq) && + !(irq->active || irq->pending_latch)) { + resample = true; + } + + if (resample) vgic_irq_set_phys_active(irq, false); } @@ -136,7 +147,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) } } - if (irq->hw) { + if (irq->hw && !vgic_irq_needs_resampling(irq)) { val |= ICH_LR_HW; val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; /* diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index e5f06df000f2..e602d848fc1a 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -99,6 +99,11 @@ enum vgic_irq_config { * kvm_arm_get_running_vcpu() to get the vcpu pointer for private IRQs. */ struct irq_ops { + /* Per interrupt flags for special-cased interrupts */ + unsigned long flags; + +#define VGIC_IRQ_SW_RESAMPLE BIT(0) /* Clear the active state for resampling */ + /* * Callback function pointer to in-kernel devices that can tell us the * state of the input level of mapped level-triggered IRQ faster than @@ -150,6 +155,11 @@ struct vgic_irq { for in-kernel devices. */ }; +static inline bool vgic_irq_needs_resampling(struct vgic_irq *irq) +{ + return irq->ops && (irq->ops->flags & VGIC_IRQ_SW_RESAMPLE); +} + struct vgic_register_region; struct vgic_its; From 2f2f7e39dbb31aa1db13c490a4e47502497510fe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 14:05:21 +0000 Subject: [PATCH 694/730] KVM: arm64: timer: Refactor IRQ configuration As we are about to add some more things to the timer IRQ configuration, move this code out of the main timer init code into its own set of functions. No functional changes. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 57 +++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index e2288b6bf435..3cd170388d88 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -973,6 +973,35 @@ static int kvm_timer_dying_cpu(unsigned int cpu) return 0; } +static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) +{ + *flags = irq_get_trigger_type(virq); + if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) { + kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n", + virq); + *flags = IRQF_TRIGGER_LOW; + } +} + +static int kvm_irq_init(struct arch_timer_kvm_info *info) +{ + if (info->virtual_irq <= 0) { + kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", + info->virtual_irq); + return -ENODEV; + } + + host_vtimer_irq = info->virtual_irq; + kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); + + if (info->physical_irq > 0) { + host_ptimer_irq = info->physical_irq; + kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); + } + + return 0; +} + int kvm_timer_hyp_init(bool has_gic) { struct arch_timer_kvm_info *info; @@ -986,23 +1015,12 @@ int kvm_timer_hyp_init(bool has_gic) return -ENODEV; } + err = kvm_irq_init(info); + if (err) + return err; + /* First, do the virtual EL1 timer irq */ - if (info->virtual_irq <= 0) { - kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", - info->virtual_irq); - return -ENODEV; - } - host_vtimer_irq = info->virtual_irq; - - host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); - if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && - host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { - kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n", - host_vtimer_irq); - host_vtimer_irq_flags = IRQF_TRIGGER_LOW; - } - err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, "kvm guest vtimer", kvm_get_running_vcpus()); if (err) { @@ -1027,15 +1045,6 @@ int kvm_timer_hyp_init(bool has_gic) /* Now let's do the physical EL1 timer irq */ if (info->physical_irq > 0) { - host_ptimer_irq = info->physical_irq; - host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq); - if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH && - host_ptimer_irq_flags != IRQF_TRIGGER_LOW) { - kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n", - host_ptimer_irq); - host_ptimer_irq_flags = IRQF_TRIGGER_LOW; - } - err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, "kvm guest ptimer", kvm_get_running_vcpus()); if (err) { From 5f59229680f70078ac4c11db2ae89be087474144 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 14:21:21 +0000 Subject: [PATCH 695/730] KVM: arm64: timer: Add support for SW-based deactivation In order to deal with the lack of active state, we need to use the mask/unmask primitives (after all, the active state is just an additional mask on top of the normal one). To avoid adding a bunch of ugly conditionals in the timer and vgic code, let's use a timer-specific irqdomain to deal with the state conversion. Yes, this is an unexpected use of irqdomains, but there is no reason not to be just as creative as the designers of the HW... This involves overloading the vcpu_affinity, set_irqchip_state and eoi callbacks so that the rest of the KVM code can continue ignoring the oddities of the underlying platform. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 105 ++++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 3cd170388d88..3df67c127489 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -973,6 +974,77 @@ static int kvm_timer_dying_cpu(unsigned int cpu) return 0; } +static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) +{ + if (vcpu) + irqd_set_forwarded_to_vcpu(d); + else + irqd_clr_forwarded_to_vcpu(d); + + return 0; +} + +static int timer_irq_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, bool val) +{ + if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d)) + return irq_chip_set_parent_state(d, which, val); + + if (val) + irq_chip_mask_parent(d); + else + irq_chip_unmask_parent(d); + + return 0; +} + +static void timer_irq_eoi(struct irq_data *d) +{ + if (!irqd_is_forwarded_to_vcpu(d)) + irq_chip_eoi_parent(d); +} + +static void timer_irq_ack(struct irq_data *d) +{ + d = d->parent_data; + if (d->chip->irq_ack) + d->chip->irq_ack(d); +} + +static struct irq_chip timer_chip = { + .name = "KVM", + .irq_ack = timer_irq_ack, + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_eoi = timer_irq_eoi, + .irq_set_type = irq_chip_set_type_parent, + .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity, + .irq_set_irqchip_state = timer_irq_set_irqchip_state, +}; + +static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + irq_hw_number_t hwirq = (uintptr_t)arg; + + return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, + &timer_chip, NULL); +} + +static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ +} + +static const struct irq_domain_ops timer_domain_ops = { + .alloc = timer_irq_domain_alloc, + .free = timer_irq_domain_free, +}; + +static struct irq_ops arch_timer_irq_ops = { + .get_input_level = kvm_arch_timer_get_input_level, +}; + static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) { *flags = irq_get_trigger_type(virq); @@ -985,6 +1057,8 @@ static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) static int kvm_irq_init(struct arch_timer_kvm_info *info) { + struct irq_domain *domain = NULL; + if (info->virtual_irq <= 0) { kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", info->virtual_irq); @@ -994,9 +1068,36 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info) host_vtimer_irq = info->virtual_irq; kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); + if (kvm_vgic_global_state.no_hw_deactivation) { + struct fwnode_handle *fwnode; + struct irq_data *data; + + fwnode = irq_domain_alloc_named_fwnode("kvm-timer"); + if (!fwnode) + return -ENOMEM; + + /* Assume both vtimer and ptimer in the same parent */ + data = irq_get_irq_data(host_vtimer_irq); + domain = irq_domain_create_hierarchy(data->domain, 0, + NR_KVM_TIMERS, fwnode, + &timer_domain_ops, NULL); + if (!domain) { + irq_domain_free_fwnode(fwnode); + return -ENOMEM; + } + + arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; + WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, + (void *)TIMER_VTIMER)); + } + if (info->physical_irq > 0) { host_ptimer_irq = info->physical_irq; kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); + + if (domain) + WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq, + (void *)TIMER_PTIMER)); } return 0; @@ -1125,10 +1226,6 @@ bool kvm_arch_timer_get_input_level(int vintid) return kvm_timer_should_fire(timer); } -static struct irq_ops arch_timer_irq_ops = { - .get_input_level = kvm_arch_timer_get_input_level, -}; - int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); From b6ca556c352979d09659027dc1559fad15b72649 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 28 Feb 2021 11:11:47 +0000 Subject: [PATCH 696/730] irqchip/apple-aic: Advertise some level of vGICv3 compatibility The CPUs in the Apple M1 SoC partially implement a virtual GICv3 CPU interface, although one that is incapable of HW deactivation of interrupts, nor masking the maintenance interrupt. Advertise the support to KVM. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-apple-aic.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index c179e27062fd..b8c06bd8659e 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -787,6 +788,12 @@ static int aic_init_cpu(unsigned int cpu) return 0; } +static struct gic_kvm_info vgic_info __initdata = { + .type = GIC_V3, + .no_maint_irq_mask = true, + .no_hw_deactivation = true, +}; + static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent) { int i; @@ -843,6 +850,8 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p "irqchip/apple-aic/ipi:starting", aic_init_cpu, NULL); + vgic_set_kvm_info(&vgic_info); + pr_info("Initialized with %d IRQs, %d FIQs, %d vIPIs\n", irqc->nr_hw, AIC_NR_FIQ, AIC_NR_SWIPI); From fd6f17bade2147b31198ad00b22d3acf5a398aec Mon Sep 17 00:00:00 2001 From: Keqian Zhu Date: Fri, 7 May 2021 19:03:21 +0800 Subject: [PATCH 697/730] KVM: arm64: Remove the creation time's mapping of MMIO regions The MMIO regions may be unmapped for many reasons and can be remapped by stage2 fault path. Map MMIO regions at creation time becomes a minor optimization and makes these two mapping path hard to sync. Remove the mapping code while keep the useful sanity check. Signed-off-by: Keqian Zhu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210507110322.23348-2-zhukeqian1@huawei.com --- arch/arm64/kvm/mmu.c | 38 +++----------------------------------- 1 file changed, 3 insertions(+), 35 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c10207fed2f3..e982178c8c72 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1346,7 +1346,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, { hva_t hva = mem->userspace_addr; hva_t reg_end = hva + mem->memory_size; - bool writable = !(mem->flags & KVM_MEM_READONLY); int ret = 0; if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && @@ -1363,8 +1362,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, mmap_read_lock(current->mm); /* * A memory region could potentially cover multiple VMAs, and any holes - * between them, so iterate over all of them to find out if we can map - * any of them right now. + * between them, so iterate over all of them. * * +--------------------------------------------+ * +---------------+----------------+ +----------------+ @@ -1375,51 +1373,21 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, */ do { struct vm_area_struct *vma; - hva_t vm_start, vm_end; vma = find_vma_intersection(current->mm, hva, reg_end); if (!vma) break; - /* - * Take the intersection of this VMA with the memory region - */ - vm_start = max(hva, vma->vm_start); - vm_end = min(reg_end, vma->vm_end); - if (vma->vm_flags & VM_PFNMAP) { - gpa_t gpa = mem->guest_phys_addr + - (vm_start - mem->userspace_addr); - phys_addr_t pa; - - pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; - pa += vm_start - vma->vm_start; - /* IO region dirty page logging not allowed */ if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { ret = -EINVAL; - goto out; - } - - ret = kvm_phys_addr_ioremap(kvm, gpa, pa, - vm_end - vm_start, - writable); - if (ret) break; + } } - hva = vm_end; + hva = min(reg_end, vma->vm_end); } while (hva < reg_end); - if (change == KVM_MR_FLAGS_ONLY) - goto out; - - spin_lock(&kvm->mmu_lock); - if (ret) - unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size); - else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) - stage2_flush_memslot(kvm, memslot); - spin_unlock(&kvm->mmu_lock); -out: mmap_read_unlock(current->mm); return ret; } From 2aa53d68cee6603931f73b28ef6b51ff3fde9397 Mon Sep 17 00:00:00 2001 From: Keqian Zhu Date: Fri, 7 May 2021 19:03:22 +0800 Subject: [PATCH 698/730] KVM: arm64: Try stage2 block mapping for host device MMIO The MMIO region of a device maybe huge (GB level), try to use block mapping in stage2 to speedup both map and unmap. Compared to normal memory mapping, we should consider two more points when try block mapping for MMIO region: 1. For normal memory mapping, the PA(host physical address) and HVA have same alignment within PUD_SIZE or PMD_SIZE when we use the HVA to request hugepage, so we don't need to consider PA alignment when verifing block mapping. But for device memory mapping, the PA and HVA may have different alignment. 2. For normal memory mapping, we are sure hugepage size properly fit into vma, so we don't check whether the mapping size exceeds the boundary of vma. But for device memory mapping, we should pay attention to this. This adds get_vma_page_shift() to get page shift for both normal memory and device MMIO region, and check these two points when selecting block mapping size for MMIO region. Signed-off-by: Keqian Zhu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210507110322.23348-3-zhukeqian1@huawei.com --- arch/arm64/kvm/mmu.c | 61 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index e982178c8c72..5742ba765ff9 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -822,6 +822,35 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot, return PAGE_SIZE; } +static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva) +{ + unsigned long pa; + + if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP)) + return huge_page_shift(hstate_vma(vma)); + + if (!(vma->vm_flags & VM_PFNMAP)) + return PAGE_SHIFT; + + VM_BUG_ON(is_vm_hugetlb_page(vma)); + + pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start); + +#ifndef __PAGETABLE_PMD_FOLDED + if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) && + ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start && + ALIGN(hva, PUD_SIZE) <= vma->vm_end) + return PUD_SHIFT; +#endif + + if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) && + ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start && + ALIGN(hva, PMD_SIZE) <= vma->vm_end) + return PMD_SHIFT; + + return PAGE_SHIFT; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -853,7 +882,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - /* Let's check if we will get back a huge page backed by hugetlbfs */ + /* + * Let's check if we will get back a huge page backed by hugetlbfs, or + * get block mapping for device MMIO region. + */ mmap_read_lock(current->mm); vma = find_vma_intersection(current->mm, hva, hva + 1); if (unlikely(!vma)) { @@ -862,15 +894,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - if (is_vm_hugetlb_page(vma)) - vma_shift = huge_page_shift(hstate_vma(vma)); - else - vma_shift = PAGE_SHIFT; - - if (logging_active || - (vma->vm_flags & VM_PFNMAP)) { + /* + * logging_active is guaranteed to never be true for VM_PFNMAP + * memslots. + */ + if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; + } else { + vma_shift = get_vma_page_shift(vma, hva); } switch (vma_shift) { @@ -943,8 +975,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; if (kvm_is_device_pfn(pfn)) { + /* + * If the page was identified as device early by looking at + * the VMA flags, vma_pagesize is already representing the + * largest quantity we can map. If instead it was mapped + * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE + * and must not be upgraded. + * + * In both cases, we don't let transparent_hugepage_adjust() + * change things at the last minute. + */ device = true; - force_pte = true; } else if (logging_active && !write_fault) { /* * Only actually map the page as writable if this was a write @@ -965,7 +1006,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * If we are not forced to use page mapping, check if we are * backed by a THP and thus use block mapping if possible. */ - if (vma_pagesize == PAGE_SIZE && !force_pte) + if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) vma_pagesize = transparent_hugepage_adjust(memslot, hva, &pfn, &fault_ipa); if (writable) From 6cbf874e51b68e5b2eb0cc50be3676f5d5601dab Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:12 +0000 Subject: [PATCH 699/730] KVM: arm64: Move hyp_pool locking out of refcount helpers The hyp_page refcount helpers currently rely on the hyp_pool lock for serialization. However, this means the refcounts can't be changed from the buddy allocator core as it already holds the lock, which means pages have to go through odd transient states. For example, when a page is freed, its refcount is set to 0, and the lock is transiently released before the page can be attached to a free list in the buddy tree. This is currently harmless as the allocator checks the list node of each page to see if it is available for allocation or not, but it means the page refcount can't be trusted to represent the state of the page even if the pool lock is held. In order to fix this, remove the pool locking from the refcount helpers, and move all the logic to the buddy allocator. This will simplify the removal of the list node from struct hyp_page in a later patch. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-2-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/gfp.h | 35 ---------------------- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 43 ++++++++++++++++++++------- 2 files changed, 32 insertions(+), 46 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index 18a4494337bd..f2c84e4fa40f 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -22,41 +22,6 @@ struct hyp_pool { unsigned int max_order; }; -static inline void hyp_page_ref_inc(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - p->refcount++; - hyp_spin_unlock(&pool->lock); -} - -static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - int ret; - - hyp_spin_lock(&pool->lock); - p->refcount--; - ret = (p->refcount == 0); - hyp_spin_unlock(&pool->lock); - - return ret; -} - -static inline void hyp_set_page_refcounted(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - if (p->refcount) { - hyp_spin_unlock(&pool->lock); - BUG(); - } - p->refcount = 1; - hyp_spin_unlock(&pool->lock); -} - /* Allocation */ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order); void hyp_get_page(void *addr); diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 237e03bf0cb1..d666f4789e31 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -93,15 +93,6 @@ static void __hyp_attach_page(struct hyp_pool *pool, list_add_tail(&p->node, &pool->free_area[order]); } -static void hyp_attach_page(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - __hyp_attach_page(pool, p); - hyp_spin_unlock(&pool->lock); -} - static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, struct hyp_page *p, unsigned int order) @@ -125,19 +116,49 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, return p; } +static inline void hyp_page_ref_inc(struct hyp_page *p) +{ + p->refcount++; +} + +static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +{ + p->refcount--; + return (p->refcount == 0); +} + +static inline void hyp_set_page_refcounted(struct hyp_page *p) +{ + BUG_ON(p->refcount); + p->refcount = 1; +} + +/* + * Changes to the buddy tree and page refcounts must be done with the hyp_pool + * lock held. If a refcount change requires an update to the buddy tree (e.g. + * hyp_put_page()), both operations must be done within the same critical + * section to guarantee transient states (e.g. a page with null refcount but + * not yet attached to a free list) can't be observed by well-behaved readers. + */ void hyp_put_page(void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); + struct hyp_pool *pool = hyp_page_to_pool(p); + hyp_spin_lock(&pool->lock); if (hyp_page_ref_dec_and_test(p)) - hyp_attach_page(p); + __hyp_attach_page(pool, p); + hyp_spin_unlock(&pool->lock); } void hyp_get_page(void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); + struct hyp_pool *pool = hyp_page_to_pool(p); + hyp_spin_lock(&pool->lock); hyp_page_ref_inc(p); + hyp_spin_unlock(&pool->lock); } void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) @@ -159,8 +180,8 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) p = list_first_entry(&pool->free_area[i], struct hyp_page, node); p = __hyp_extract_page(pool, p, order); - hyp_spin_unlock(&pool->lock); hyp_set_page_refcounted(p); + hyp_spin_unlock(&pool->lock); return hyp_page_to_virt(p); } From 581982decc635c93934aaeb88d62c21238c63f11 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:13 +0000 Subject: [PATCH 700/730] KVM: arm64: Use refcount at hyp to check page availability The hyp buddy allocator currently checks the struct hyp_page list node to see if a page is available for allocation or not when trying to coalesce memory. Now that decrementing the refcount and attaching to the buddy tree is done in the same critical section, we can rely on the refcount of the buddy page to be in sync, which allows to replace the list node check by a refcount check. This will ease removing the list node from struct hyp_page later on. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-3-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index d666f4789e31..2602577daa00 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -55,7 +55,7 @@ static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, { struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order); - if (!buddy || buddy->order != order || list_empty(&buddy->node)) + if (!buddy || buddy->order != order || buddy->refcount) return NULL; return buddy; @@ -133,6 +133,12 @@ static inline void hyp_set_page_refcounted(struct hyp_page *p) p->refcount = 1; } +static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p) +{ + if (hyp_page_ref_dec_and_test(p)) + __hyp_attach_page(pool, p); +} + /* * Changes to the buddy tree and page refcounts must be done with the hyp_pool * lock held. If a refcount change requires an update to the buddy tree (e.g. @@ -146,8 +152,7 @@ void hyp_put_page(void *addr) struct hyp_pool *pool = hyp_page_to_pool(p); hyp_spin_lock(&pool->lock); - if (hyp_page_ref_dec_and_test(p)) - __hyp_attach_page(pool, p); + __hyp_put_page(pool, p); hyp_spin_unlock(&pool->lock); } @@ -202,15 +207,16 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, /* Init the vmemmap portion */ p = hyp_phys_to_page(phys); - memset(p, 0, sizeof(*p) * nr_pages); for (i = 0; i < nr_pages; i++) { p[i].pool = pool; + p[i].order = 0; INIT_LIST_HEAD(&p[i].node); + hyp_set_page_refcounted(&p[i]); } /* Attach the unused pages to the buddy tree */ for (i = reserved_pages; i < nr_pages; i++) - __hyp_attach_page(pool, &p[i]); + __hyp_put_page(pool, &p[i]); return 0; } From 914cde58a03cc5eef858db34687433e17d0e44be Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:14 +0000 Subject: [PATCH 701/730] KVM: arm64: Remove list_head from hyp_page The list_head member of struct hyp_page is only needed when the page is attached to a free-list, which by definition implies the page is free. As such, nothing prevents us from using the page itself to store the list_head, hence reducing the size of the vmemmap. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-4-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/memory.h | 1 - arch/arm64/kvm/hyp/nvhe/page_alloc.c | 39 ++++++++++++++++++++---- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index fd78bde939ee..7691ab495eb4 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -12,7 +12,6 @@ struct hyp_page { unsigned int refcount; unsigned int order; struct hyp_pool *pool; - struct list_head node; }; extern u64 __hyp_vmemmap; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 2602577daa00..34f0eb026dd2 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -62,6 +62,34 @@ static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, } +/* + * Pages that are available for allocation are tracked in free-lists, so we use + * the pages themselves to store the list nodes to avoid wasting space. As the + * allocator always returns zeroed pages (which are zeroed on the hyp_put_page() + * path to optimize allocation speed), we also need to clean-up the list node in + * each page when we take it out of the list. + */ +static inline void page_remove_from_list(struct hyp_page *p) +{ + struct list_head *node = hyp_page_to_virt(p); + + __list_del_entry(node); + memset(node, 0, sizeof(*node)); +} + +static inline void page_add_to_list(struct hyp_page *p, struct list_head *head) +{ + struct list_head *node = hyp_page_to_virt(p); + + INIT_LIST_HEAD(node); + list_add_tail(node, head); +} + +static inline struct hyp_page *node_to_page(struct list_head *node) +{ + return hyp_virt_to_page(node); +} + static void __hyp_attach_page(struct hyp_pool *pool, struct hyp_page *p) { @@ -83,14 +111,14 @@ static void __hyp_attach_page(struct hyp_pool *pool, break; /* Take the buddy out of its list, and coallesce with @p */ - list_del_init(&buddy->node); + page_remove_from_list(buddy); buddy->order = HYP_NO_ORDER; p = min(p, buddy); } /* Mark the new head, and insert it */ p->order = order; - list_add_tail(&p->node, &pool->free_area[order]); + page_add_to_list(p, &pool->free_area[order]); } static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, @@ -99,7 +127,7 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, { struct hyp_page *buddy; - list_del_init(&p->node); + page_remove_from_list(p); while (p->order > order) { /* * The buddy of order n - 1 currently has HYP_NO_ORDER as it @@ -110,7 +138,7 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, p->order--; buddy = __find_buddy_nocheck(pool, p, p->order); buddy->order = p->order; - list_add_tail(&buddy->node, &pool->free_area[buddy->order]); + page_add_to_list(buddy, &pool->free_area[buddy->order]); } return p; @@ -182,7 +210,7 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) } /* Extract it from the tree at the right order */ - p = list_first_entry(&pool->free_area[i], struct hyp_page, node); + p = node_to_page(pool->free_area[i].next); p = __hyp_extract_page(pool, p, order); hyp_set_page_refcounted(p); @@ -210,7 +238,6 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, for (i = 0; i < nr_pages; i++) { p[i].pool = pool; p[i].order = 0; - INIT_LIST_HEAD(&p[i].node); hyp_set_page_refcounted(&p[i]); } From 7c350ea39e53ade33ca7be00b0947f2b9f53dda0 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:15 +0000 Subject: [PATCH 702/730] KVM: arm64: Unify MMIO and mem host stage-2 pools We currently maintain two separate memory pools for the host stage-2, one for pages used in the page-table when mapping memory regions, and the other to map MMIO regions. The former is large enough to map all of memory with page granularity and the latter can cover an arbitrary portion of IPA space, but allows to 'recycle' pages. However, this split makes accounting difficult to manage as pages at intermediate levels of the page-table may be used to map both memory and MMIO regions. Simplify the scheme by merging both pools into one. This means we can now hit the -ENOMEM case in the memory abort path, but we're still guaranteed forward-progress in the worst case by unmapping MMIO regions. On the plus side this also means we can usually map a lot more MMIO space at once if memory ranges happen to be mapped with block mappings. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-5-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 +- arch/arm64/kvm/hyp/include/nvhe/mm.h | 13 +++--- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 46 ++++++++----------- arch/arm64/kvm/hyp/nvhe/setup.c | 16 ++----- arch/arm64/kvm/hyp/reserved_mem.c | 3 +- 5 files changed, 32 insertions(+), 48 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 42d81ec739fa..9c227d87c36d 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -23,7 +23,7 @@ extern struct host_kvm host_kvm; int __pkvm_prot_finalize(void); int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); -int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool); +int kvm_host_prepare_stage2(void *pgt_pool_base); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); static __always_inline void __load_host_stage2(void) diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 0095f6289742..8ec3a5a7744b 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -78,19 +78,20 @@ static inline unsigned long hyp_s1_pgtable_pages(void) return res; } -static inline unsigned long host_s2_mem_pgtable_pages(void) +static inline unsigned long host_s2_pgtable_pages(void) { + unsigned long res; + /* * Include an extra 16 pages to safely upper-bound the worst case of * concatenated pgds. */ - return __hyp_pgtable_total_pages() + 16; -} + res = __hyp_pgtable_total_pages() + 16; -static inline unsigned long host_s2_dev_pgtable_pages(void) -{ /* Allow 1 GiB for MMIO mappings */ - return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + + return res; } #endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 4b60c0056c04..c8ed7e86231b 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -23,8 +23,7 @@ extern unsigned long hyp_nr_cpus; struct host_kvm host_kvm; -static struct hyp_pool host_s2_mem; -static struct hyp_pool host_s2_dev; +static struct hyp_pool host_s2_pool; /* * Copies of the host's CPU features registers holding sanitized values. @@ -36,7 +35,7 @@ static const u8 pkvm_hyp_id = 1; static void *host_s2_zalloc_pages_exact(size_t size) { - return hyp_alloc_pages(&host_s2_mem, get_order(size)); + return hyp_alloc_pages(&host_s2_pool, get_order(size)); } static void *host_s2_zalloc_page(void *pool) @@ -44,20 +43,14 @@ static void *host_s2_zalloc_page(void *pool) return hyp_alloc_pages(pool, 0); } -static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool) +static int prepare_s2_pool(void *pgt_pool_base) { unsigned long nr_pages, pfn; int ret; - pfn = hyp_virt_to_pfn(mem_pgt_pool); - nr_pages = host_s2_mem_pgtable_pages(); - ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0); - if (ret) - return ret; - - pfn = hyp_virt_to_pfn(dev_pgt_pool); - nr_pages = host_s2_dev_pgtable_pages(); - ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0); + pfn = hyp_virt_to_pfn(pgt_pool_base); + nr_pages = host_s2_pgtable_pages(); + ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0); if (ret) return ret; @@ -86,7 +79,7 @@ static void prepare_host_vtcr(void) id_aa64mmfr1_el1_sys_val, phys_shift); } -int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool) +int kvm_host_prepare_stage2(void *pgt_pool_base) { struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; int ret; @@ -94,7 +87,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool) prepare_host_vtcr(); hyp_spin_lock_init(&host_kvm.lock); - ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool); + ret = prepare_s2_pool(pgt_pool_base); if (ret) return ret; @@ -199,11 +192,10 @@ static bool range_is_memory(u64 start, u64 end) } static inline int __host_stage2_idmap(u64 start, u64 end, - enum kvm_pgtable_prot prot, - struct hyp_pool *pool) + enum kvm_pgtable_prot prot) { return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, - prot, pool); + prot, &host_s2_pool); } static int host_stage2_idmap(u64 addr) @@ -211,7 +203,6 @@ static int host_stage2_idmap(u64 addr) enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; struct kvm_mem_range range; bool is_memory = find_mem_range(addr, &range); - struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev; int ret; if (is_memory) @@ -222,22 +213,21 @@ static int host_stage2_idmap(u64 addr) if (ret) goto unlock; - ret = __host_stage2_idmap(range.start, range.end, prot, pool); - if (is_memory || ret != -ENOMEM) + ret = __host_stage2_idmap(range.start, range.end, prot); + if (ret != -ENOMEM) goto unlock; /* - * host_s2_mem has been provided with enough pages to cover all of - * memory with page granularity, so we should never hit the ENOMEM case. - * However, it is difficult to know how much of the MMIO range we will - * need to cover upfront, so we may need to 'recycle' the pages if we - * run out. + * The pool has been provided with enough pages to cover all of memory + * with page granularity, but it is difficult to know how much of the + * MMIO range we will need to cover upfront, so we may need to 'recycle' + * the pages if we run out. */ ret = host_stage2_unmap_dev_all(); if (ret) goto unlock; - ret = __host_stage2_idmap(range.start, range.end, prot, pool); + ret = __host_stage2_idmap(range.start, range.end, prot); unlock: hyp_spin_unlock(&host_kvm.lock); @@ -258,7 +248,7 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) hyp_spin_lock(&host_kvm.lock); ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start, - &host_s2_mem, pkvm_hyp_id); + &host_s2_pool, pkvm_hyp_id); hyp_spin_unlock(&host_kvm.lock); return ret != -EAGAIN ? ret : 0; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index a3d3a275344e..1cff3259a493 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -24,8 +24,7 @@ unsigned long hyp_nr_cpus; static void *vmemmap_base; static void *hyp_pgt_base; -static void *host_s2_mem_pgt_base; -static void *host_s2_dev_pgt_base; +static void *host_s2_pgt_base; static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; static int divide_memory_pool(void *virt, unsigned long size) @@ -45,14 +44,9 @@ static int divide_memory_pool(void *virt, unsigned long size) if (!hyp_pgt_base) return -ENOMEM; - nr_pages = host_s2_mem_pgtable_pages(); - host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages); - if (!host_s2_mem_pgt_base) - return -ENOMEM; - - nr_pages = host_s2_dev_pgtable_pages(); - host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages); - if (!host_s2_dev_pgt_base) + nr_pages = host_s2_pgtable_pages(); + host_s2_pgt_base = hyp_early_alloc_contig(nr_pages); + if (!host_s2_pgt_base) return -ENOMEM; return 0; @@ -158,7 +152,7 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; - ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base); + ret = kvm_host_prepare_stage2(host_s2_pgt_base); if (ret) goto out; diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c index 83ca23ac259b..d654921dd09b 100644 --- a/arch/arm64/kvm/hyp/reserved_mem.c +++ b/arch/arm64/kvm/hyp/reserved_mem.c @@ -71,8 +71,7 @@ void __init kvm_hyp_reserve(void) } hyp_mem_pages += hyp_s1_pgtable_pages(); - hyp_mem_pages += host_s2_mem_pgtable_pages(); - hyp_mem_pages += host_s2_dev_pgtable_pages(); + hyp_mem_pages += host_s2_pgtable_pages(); /* * The hyp_vmemmap needs to be backed by pages, but these pages From d978b9cfe6fe8008467f8c5d51677f52e7815b39 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:16 +0000 Subject: [PATCH 703/730] KVM: arm64: Remove hyp_pool pointer from struct hyp_page Each struct hyp_page currently contains a pointer to a hyp_pool struct where the page should be freed if its refcount reaches 0. However, this information can always be inferred from the context in the EL2 code, so drop the pointer to save a few bytes in the vmemmap. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-6-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/gfp.h | 4 ++-- arch/arm64/kvm/hyp/include/nvhe/memory.h | 2 -- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 14 ++++++++++++-- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 7 ++----- arch/arm64/kvm/hyp/nvhe/setup.c | 14 ++++++++++++-- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index f2c84e4fa40f..3ea7bfb6c380 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -24,8 +24,8 @@ struct hyp_pool { /* Allocation */ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order); -void hyp_get_page(void *addr); -void hyp_put_page(void *addr); +void hyp_get_page(struct hyp_pool *pool, void *addr); +void hyp_put_page(struct hyp_pool *pool, void *addr); /* Used pages cannot be freed */ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 7691ab495eb4..991636be2f46 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -7,11 +7,9 @@ #include -struct hyp_pool; struct hyp_page { unsigned int refcount; unsigned int order; - struct hyp_pool *pool; }; extern u64 __hyp_vmemmap; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index c8ed7e86231b..d938ce95d3bd 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -43,6 +43,16 @@ static void *host_s2_zalloc_page(void *pool) return hyp_alloc_pages(pool, 0); } +static void host_s2_get_page(void *addr) +{ + hyp_get_page(&host_s2_pool, addr); +} + +static void host_s2_put_page(void *addr) +{ + hyp_put_page(&host_s2_pool, addr); +} + static int prepare_s2_pool(void *pgt_pool_base) { unsigned long nr_pages, pfn; @@ -60,8 +70,8 @@ static int prepare_s2_pool(void *pgt_pool_base) .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, .page_count = hyp_page_count, - .get_page = hyp_get_page, - .put_page = hyp_put_page, + .get_page = host_s2_get_page, + .put_page = host_s2_put_page, }; return 0; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 34f0eb026dd2..e3689def7033 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -174,20 +174,18 @@ static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p) * section to guarantee transient states (e.g. a page with null refcount but * not yet attached to a free list) can't be observed by well-behaved readers. */ -void hyp_put_page(void *addr) +void hyp_put_page(struct hyp_pool *pool, void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); - struct hyp_pool *pool = hyp_page_to_pool(p); hyp_spin_lock(&pool->lock); __hyp_put_page(pool, p); hyp_spin_unlock(&pool->lock); } -void hyp_get_page(void *addr) +void hyp_get_page(struct hyp_pool *pool, void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); - struct hyp_pool *pool = hyp_page_to_pool(p); hyp_spin_lock(&pool->lock); hyp_page_ref_inc(p); @@ -236,7 +234,6 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, /* Init the vmemmap portion */ p = hyp_phys_to_page(phys); for (i = 0; i < nr_pages; i++) { - p[i].pool = pool; p[i].order = 0; hyp_set_page_refcounted(&p[i]); } diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 1cff3259a493..f834833ac921 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -137,6 +137,16 @@ static void *hyp_zalloc_hyp_page(void *arg) return hyp_alloc_pages(&hpool, 0); } +static void hpool_get_page(void *addr) +{ + hyp_get_page(&hpool, addr); +} + +static void hpool_put_page(void *addr) +{ + hyp_put_page(&hpool, addr); +} + void __noreturn __pkvm_init_finalise(void) { struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); @@ -160,8 +170,8 @@ void __noreturn __pkvm_init_finalise(void) .zalloc_page = hyp_zalloc_hyp_page, .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, - .get_page = hyp_get_page, - .put_page = hyp_put_page, + .get_page = hpool_get_page, + .put_page = hpool_put_page, }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; From 87ec0606733e1aa9568f54ddb41f03aa6b5687f2 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:17 +0000 Subject: [PATCH 704/730] KVM: arm64: Use less bits for hyp_page order The hyp_page order is currently encoded on 4 bytes even though it is guaranteed to be smaller than this. Make it 2 bytes to reduce the hyp vmemmap overhead. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-7-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/gfp.h | 6 +++--- arch/arm64/kvm/hyp/include/nvhe/memory.h | 2 +- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index 3ea7bfb6c380..fb0f523d1492 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -7,7 +7,7 @@ #include #include -#define HYP_NO_ORDER UINT_MAX +#define HYP_NO_ORDER USHRT_MAX struct hyp_pool { /* @@ -19,11 +19,11 @@ struct hyp_pool { struct list_head free_area[MAX_ORDER]; phys_addr_t range_start; phys_addr_t range_end; - unsigned int max_order; + unsigned short max_order; }; /* Allocation */ -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order); +void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order); void hyp_get_page(struct hyp_pool *pool, void *addr); void hyp_put_page(struct hyp_pool *pool, void *addr); diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 991636be2f46..3fe34fa30ea4 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -9,7 +9,7 @@ struct hyp_page { unsigned int refcount; - unsigned int order; + unsigned short order; }; extern u64 __hyp_vmemmap; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index e3689def7033..be07055bbc10 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -32,7 +32,7 @@ u64 __hyp_vmemmap; */ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { phys_addr_t addr = hyp_page_to_phys(p); @@ -51,7 +51,7 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, /* Find a buddy page currently available for allocation */ static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order); @@ -93,7 +93,7 @@ static inline struct hyp_page *node_to_page(struct list_head *node) static void __hyp_attach_page(struct hyp_pool *pool, struct hyp_page *p) { - unsigned int order = p->order; + unsigned short order = p->order; struct hyp_page *buddy; memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); @@ -123,7 +123,7 @@ static void __hyp_attach_page(struct hyp_pool *pool, static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { struct hyp_page *buddy; @@ -192,9 +192,9 @@ void hyp_get_page(struct hyp_pool *pool, void *addr) hyp_spin_unlock(&pool->lock); } -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) +void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order) { - unsigned int i = order; + unsigned short i = order; struct hyp_page *p; hyp_spin_lock(&pool->lock); From 6929586d8eddad184f43526efe7bf0a8be4f18b2 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:18 +0000 Subject: [PATCH 705/730] KVM: arm64: Use less bits for hyp_page refcount The hyp_page refcount is currently encoded on 4 bytes even though we never need to count that many objects in a page. Make it 2 bytes to save some space in the vmemmap. As overflows are more likely to happen as well, make sure to catch those with a BUG in the increment function. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-8-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/memory.h | 2 +- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 3fe34fa30ea4..592b7edb3edb 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -8,7 +8,7 @@ #include struct hyp_page { - unsigned int refcount; + unsigned short refcount; unsigned short order; }; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index be07055bbc10..41fc25bdfb34 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -146,6 +146,7 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, static inline void hyp_page_ref_inc(struct hyp_page *p) { + BUG_ON(p->refcount == USHRT_MAX); p->refcount++; } From b78f4a596692f6805e796a4c13f2d921b8a95166 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Thu, 10 Jun 2021 18:10:15 -0700 Subject: [PATCH 706/730] KVM: selftests: Rename vm_handle_exception Rename the vm_handle_exception function to a name that indicates more clearly that it installs something: vm_install_exception_handler. Reported-by: kernel test robot Suggested-by: Marc Zyngier Suggested-by: Andrew Jones Signed-off-by: Ricardo Koller Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210611011020.3420067-2-ricarkol@google.com --- tools/testing/selftests/kvm/include/x86_64/processor.h | 2 +- tools/testing/selftests/kvm/lib/x86_64/processor.c | 4 ++-- tools/testing/selftests/kvm/x86_64/evmcs_test.c | 4 ++-- tools/testing/selftests/kvm/x86_64/kvm_pv_test.c | 2 +- .../selftests/kvm/x86_64/userspace_msr_exit_test.c | 8 ++++---- tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 0b30b4e15c38..e9f584991332 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -391,7 +391,7 @@ struct ex_regs { void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); -void vm_handle_exception(struct kvm_vm *vm, int vector, +void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); /* diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index efe235044421..257c5c33d04e 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1244,8 +1244,8 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; } -void vm_handle_exception(struct kvm_vm *vm, int vector, - void (*handler)(struct ex_regs *)) +void vm_install_exception_handler(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) { vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 63096cea26c6..0864b2e3fd9e 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -154,8 +154,8 @@ int main(int argc, char *argv[]) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, UD_VECTOR, guest_ud_handler); - vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); pr_info("Running L1 which uses EVMCS to run L2\n"); diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c index 732b244d6956..04ed975662c9 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -227,7 +227,7 @@ int main(void) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); enter_guest(vm); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c index 72c0d0797522..e3e20e8848d0 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -574,7 +574,7 @@ static void test_msr_filter_allow(void) { vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); /* Process guest code userspace exits. */ run_guest_then_process_rdmsr(vm, MSR_IA32_XSS); @@ -588,12 +588,12 @@ static void test_msr_filter_allow(void) { run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT); run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT); - vm_handle_exception(vm, UD_VECTOR, guest_ud_handler); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); run_guest(vm); - vm_handle_exception(vm, UD_VECTOR, NULL); + vm_install_exception_handler(vm, UD_VECTOR, NULL); if (process_ucall(vm) != UCALL_DONE) { - vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler); /* Process emulated rdmsr and wrmsr instructions. */ run_guest_then_process_rdmsr(vm, MSR_IA32_XSS); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c index 2f964cdc273c..ed27269a01bb 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c @@ -462,7 +462,7 @@ int main(int argc, char *argv[]) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID); - vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler); + vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0); From b7326c01122683b88e273a0cc826cd4c01234470 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Thu, 10 Jun 2021 18:10:16 -0700 Subject: [PATCH 707/730] KVM: selftests: Complete x86_64/sync_regs_test ucall The guest in sync_regs_test does raw ucalls by directly accessing the ucall IO port. It makes these ucalls without setting %rdi to a `struct ucall`, which is what a ucall uses to pass messages. The issue is that if the host did a get_ucall (the receiver side), it would try to access the `struct ucall` at %rdi=0 which would lead to an error ("No mapping for vm virtual address, gva: 0x0"). This issue is currently benign as there is no get_ucall in sync_regs_test; however, that will change in the next commit as it changes the unhandled exception reporting mechanism to use ucalls. In that case, every vcpu_run is followed by a get_ucall to check if the guest is trying to report an unhandled exception. Fix this in advance by setting %rdi to a UCALL_NONE struct ucall for the sync_regs_test guest. Tested with gcc-[8,9,10], and clang-[9,11]. Signed-off-by: Ricardo Koller Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210611011020.3420067-3-ricarkol@google.com --- tools/testing/selftests/kvm/x86_64/sync_regs_test.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index d672f0a473f8..fc03a150278d 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -24,6 +24,10 @@ #define UCALL_PIO_PORT ((uint16_t)0x1000) +struct ucall uc_none = { + .cmd = UCALL_NONE, +}; + /* * ucall is embedded here to protect against compiler reshuffling registers * before calling a function. In this test we only need to get KVM_EXIT_IO @@ -34,7 +38,8 @@ void guest_code(void) asm volatile("1: in %[port], %%al\n" "add $0x1, %%rbx\n" "jmp 1b" - : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx"); + : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none) + : "rax", "rbx"); } static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) From 75275d7fbef47805b77e8af81a4d51e2d92db70f Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Thu, 10 Jun 2021 18:10:17 -0700 Subject: [PATCH 708/730] KVM: selftests: Introduce UCALL_UNHANDLED for unhandled vector reporting x86, the only arch implementing exception handling, reports unhandled vectors using port IO at a specific port number. This replicates what ucall already does. Introduce a new ucall type, UCALL_UNHANDLED, for guests to report unhandled exceptions. Then replace the x86 unhandled vector exception reporting to use it instead of port IO. This new ucall type will be used in the next commits by arm64 to report unhandled vectors as well. Tested: Forcing a page fault in the ./x86_64/xapic_ipi_test halter_guest_code() shows this: $ ./x86_64/xapic_ipi_test ... Unexpected vectored event in guest (vector:0xe) Signed-off-by: Ricardo Koller Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210611011020.3420067-4-ricarkol@google.com --- tools/testing/selftests/kvm/include/kvm_util.h | 1 + .../selftests/kvm/include/x86_64/processor.h | 2 -- .../selftests/kvm/lib/x86_64/processor.c | 17 +++++++---------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index fcd8e3855111..beb76d6deaa9 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -349,6 +349,7 @@ enum { UCALL_SYNC, UCALL_ABORT, UCALL_DONE, + UCALL_UNHANDLED, }; #define UCALL_MAX_ARGS 6 diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e9f584991332..92a62c6999bc 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -53,8 +53,6 @@ #define CPUID_PKU (1ul << 3) #define CPUID_LA57 (1ul << 16) -#define UNEXPECTED_VECTOR_PORT 0xfff0u - /* General Registers in 64-Bit Mode */ struct gpr64_regs { u64 rax; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 257c5c33d04e..a217515a9bc2 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1201,7 +1201,7 @@ static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, void kvm_exit_unexpected_vector(uint32_t value) { - outl(UNEXPECTED_VECTOR_PORT, value); + ucall(UCALL_UNHANDLED, 1, value); } void route_exception(struct ex_regs *regs) @@ -1254,16 +1254,13 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) { - if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO - && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT - && vcpu_state(vm, vcpuid)->io.size == 4) { - /* Grab pointer to io data */ - uint32_t *data = (void *)vcpu_state(vm, vcpuid) - + vcpu_state(vm, vcpuid)->io.data_offset; + struct ucall uc; - TEST_ASSERT(false, - "Unexpected vectored event in guest (vector:0x%x)", - *data); + if (get_ucall(vm, vcpuid, &uc) == UCALL_UNHANDLED) { + uint64_t vector = uc.args[0]; + + TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)", + vector); } } From 67f709f52bf0b5c19f24d1234163123cbb6af545 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Thu, 10 Jun 2021 18:10:18 -0700 Subject: [PATCH 709/730] KVM: selftests: Move GUEST_ASSERT_EQ to utils header Move GUEST_ASSERT_EQ to a common header, kvm_util.h, for other architectures and tests to use. Also modify __GUEST_ASSERT so it can be reused to implement GUEST_ASSERT_EQ. Signed-off-by: Ricardo Koller Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210611011020.3420067-5-ricarkol@google.com --- .../testing/selftests/kvm/include/kvm_util.h | 22 ++++++++++--------- .../selftests/kvm/x86_64/tsc_msrs_test.c | 9 -------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index beb76d6deaa9..ce49e22843d8 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -368,26 +368,28 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) #define GUEST_DONE() ucall(UCALL_DONE, 0) -#define __GUEST_ASSERT(_condition, _nargs, _args...) do { \ - if (!(_condition)) \ - ucall(UCALL_ABORT, 2 + _nargs, \ - "Failed guest assert: " \ - #_condition, __LINE__, _args); \ +#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \ + if (!(_condition)) \ + ucall(UCALL_ABORT, 2 + _nargs, \ + "Failed guest assert: " \ + _condstr, __LINE__, _args); \ } while (0) #define GUEST_ASSERT(_condition) \ - __GUEST_ASSERT((_condition), 0, 0) + __GUEST_ASSERT(_condition, #_condition, 0, 0) #define GUEST_ASSERT_1(_condition, arg1) \ - __GUEST_ASSERT((_condition), 1, (arg1)) + __GUEST_ASSERT(_condition, #_condition, 1, (arg1)) #define GUEST_ASSERT_2(_condition, arg1, arg2) \ - __GUEST_ASSERT((_condition), 2, (arg1), (arg2)) + __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2)) #define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \ - __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3)) + __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3)) #define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \ - __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4)) + __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4)) + +#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b) #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c index e357d8e222d4..5a6a662f2e59 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -18,15 +18,6 @@ #define rounded_rdmsr(x) ROUND(rdmsr(x)) #define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vm, 0, x)) -#define GUEST_ASSERT_EQ(a, b) do { \ - __typeof(a) _a = (a); \ - __typeof(b) _b = (b); \ - if (_a != _b) \ - ucall(UCALL_ABORT, 4, \ - "Failed guest assert: " \ - #a " == " #b, __LINE__, _a, _b); \ - } while(0) - static void guest_code(void) { u64 val = 0; From e3db7579ef355a0b2bfef4448b84d9ac882c8f2c Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Thu, 10 Jun 2021 18:10:19 -0700 Subject: [PATCH 710/730] KVM: selftests: Add exception handling support for aarch64 Add the infrastructure needed to enable exception handling in aarch64 selftests. The exception handling defaults to an unhandled-exception handler which aborts the test, just like x86. These handlers can be overridden by calling vm_install_exception_handler(vector) or vm_install_sync_handler(vector, ec). The unhandled exception reporting from the guest is done using the ucall type introduced in a previous commit, UCALL_UNHANDLED. The exception handling code is inspired on kvm-unit-tests. Signed-off-by: Ricardo Koller Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210611011020.3420067-6-ricarkol@google.com --- tools/testing/selftests/kvm/Makefile | 2 +- .../selftests/kvm/include/aarch64/processor.h | 63 +++++++++ .../selftests/kvm/lib/aarch64/handlers.S | 126 ++++++++++++++++++ .../selftests/kvm/lib/aarch64/processor.c | 97 ++++++++++++++ 4 files changed, 287 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/kvm/lib/aarch64/handlers.S diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index daaee1888b12..a77e6063f7e9 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -35,7 +35,7 @@ endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S -LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c +LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index b7fa0c8551db..b2b3e9d626cb 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -8,6 +8,7 @@ #define SELFTEST_KVM_PROCESSOR_H #include "kvm_util.h" +#include #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ @@ -18,6 +19,7 @@ #define MAIR_EL1 3, 0, 10, 2, 0 #define TTBR0_EL1 3, 0, 2, 0, 0 #define SCTLR_EL1 3, 0, 1, 0, 0 +#define VBAR_EL1 3, 0, 12, 0, 0 /* * Default MAIR @@ -56,4 +58,65 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init, void *guest_code); +struct ex_regs { + u64 regs[31]; + u64 sp; + u64 pc; + u64 pstate; +}; + +#define VECTOR_NUM 16 + +enum { + VECTOR_SYNC_CURRENT_SP0, + VECTOR_IRQ_CURRENT_SP0, + VECTOR_FIQ_CURRENT_SP0, + VECTOR_ERROR_CURRENT_SP0, + + VECTOR_SYNC_CURRENT, + VECTOR_IRQ_CURRENT, + VECTOR_FIQ_CURRENT, + VECTOR_ERROR_CURRENT, + + VECTOR_SYNC_LOWER_64, + VECTOR_IRQ_LOWER_64, + VECTOR_FIQ_LOWER_64, + VECTOR_ERROR_LOWER_64, + + VECTOR_SYNC_LOWER_32, + VECTOR_IRQ_LOWER_32, + VECTOR_FIQ_LOWER_32, + VECTOR_ERROR_LOWER_32, +}; + +#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \ + (v) == VECTOR_SYNC_CURRENT || \ + (v) == VECTOR_SYNC_LOWER_64 || \ + (v) == VECTOR_SYNC_LOWER_32) + +#define ESR_EC_NUM 64 +#define ESR_EC_SHIFT 26 +#define ESR_EC_MASK (ESR_EC_NUM - 1) + +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); + +typedef void(*handler_fn)(struct ex_regs *); +void vm_install_exception_handler(struct kvm_vm *vm, + int vector, handler_fn handler); +void vm_install_sync_handler(struct kvm_vm *vm, + int vector, int ec, handler_fn handler); + +#define write_sysreg(reg, val) \ +({ \ + u64 __val = (u64)(val); \ + asm volatile("msr " __stringify(reg) ", %x0" : : "rZ" (__val)); \ +}) + +#define read_sysreg(reg) \ +({ u64 val; \ + asm volatile("mrs %0, "__stringify(reg) : "=r"(val) : : "memory");\ + val; \ +}) + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/aarch64/handlers.S new file mode 100644 index 000000000000..0e443eadfac6 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/aarch64/handlers.S @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +.macro save_registers + add sp, sp, #-16 * 17 + + stp x0, x1, [sp, #16 * 0] + stp x2, x3, [sp, #16 * 1] + stp x4, x5, [sp, #16 * 2] + stp x6, x7, [sp, #16 * 3] + stp x8, x9, [sp, #16 * 4] + stp x10, x11, [sp, #16 * 5] + stp x12, x13, [sp, #16 * 6] + stp x14, x15, [sp, #16 * 7] + stp x16, x17, [sp, #16 * 8] + stp x18, x19, [sp, #16 * 9] + stp x20, x21, [sp, #16 * 10] + stp x22, x23, [sp, #16 * 11] + stp x24, x25, [sp, #16 * 12] + stp x26, x27, [sp, #16 * 13] + stp x28, x29, [sp, #16 * 14] + + /* + * This stores sp_el1 into ex_regs.sp so exception handlers can "look" + * at it. It will _not_ be used to restore the sp on return from the + * exception so handlers can not update it. + */ + add x1, sp, #16 * 17 + stp x30, x1, [sp, #16 * 15] /* x30, SP */ + + mrs x1, elr_el1 + mrs x2, spsr_el1 + stp x1, x2, [sp, #16 * 16] /* PC, PSTATE */ +.endm + +.macro restore_registers + ldp x1, x2, [sp, #16 * 16] /* PC, PSTATE */ + msr elr_el1, x1 + msr spsr_el1, x2 + + /* sp is not restored */ + ldp x30, xzr, [sp, #16 * 15] /* x30, SP */ + + ldp x28, x29, [sp, #16 * 14] + ldp x26, x27, [sp, #16 * 13] + ldp x24, x25, [sp, #16 * 12] + ldp x22, x23, [sp, #16 * 11] + ldp x20, x21, [sp, #16 * 10] + ldp x18, x19, [sp, #16 * 9] + ldp x16, x17, [sp, #16 * 8] + ldp x14, x15, [sp, #16 * 7] + ldp x12, x13, [sp, #16 * 6] + ldp x10, x11, [sp, #16 * 5] + ldp x8, x9, [sp, #16 * 4] + ldp x6, x7, [sp, #16 * 3] + ldp x4, x5, [sp, #16 * 2] + ldp x2, x3, [sp, #16 * 1] + ldp x0, x1, [sp, #16 * 0] + + add sp, sp, #16 * 17 + + eret +.endm + +.pushsection ".entry.text", "ax" +.balign 0x800 +.global vectors +vectors: +.popsection + +.set vector, 0 + +/* + * Build an exception handler for vector and append a jump to it into + * vectors (while making sure that it's 0x80 aligned). + */ +.macro HANDLER, label +handler_\label: + save_registers + mov x0, sp + mov x1, #vector + bl route_exception + restore_registers + +.pushsection ".entry.text", "ax" +.balign 0x80 + b handler_\label +.popsection + +.set vector, vector + 1 +.endm + +.macro HANDLER_INVALID +.pushsection ".entry.text", "ax" +.balign 0x80 +/* This will abort so no need to save and restore registers. */ + mov x0, #vector + mov x1, #0 /* ec */ + mov x2, #0 /* valid_ec */ + b kvm_exit_unexpected_exception +.popsection + +.set vector, vector + 1 +.endm + +/* + * Caution: be sure to not add anything between the declaration of vectors + * above and these macro calls that will build the vectors table below it. + */ + HANDLER_INVALID // Synchronous EL1t + HANDLER_INVALID // IRQ EL1t + HANDLER_INVALID // FIQ EL1t + HANDLER_INVALID // Error EL1t + + HANDLER el1h_sync // Synchronous EL1h + HANDLER el1h_irq // IRQ EL1h + HANDLER el1h_fiq // FIQ EL1h + HANDLER el1h_error // Error EL1h + + HANDLER el0_sync_64 // Synchronous 64-bit EL0 + HANDLER el0_irq_64 // IRQ 64-bit EL0 + HANDLER el0_fiq_64 // FIQ 64-bit EL0 + HANDLER el0_error_64 // Error 64-bit EL0 + + HANDLER el0_sync_32 // Synchronous 32-bit EL0 + HANDLER el0_irq_32 // IRQ 32-bit EL0 + HANDLER el0_fiq_32 // FIQ 32-bit EL0 + HANDLER el0_error_32 // Error 32-bit EL0 diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index cee92d477dc0..48b55c93f858 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -6,6 +6,7 @@ */ #include +#include #include "kvm_util.h" #include "../kvm_util_internal.h" @@ -14,6 +15,8 @@ #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 +static vm_vaddr_t exception_handlers; + static uint64_t page_align(struct kvm_vm *vm, uint64_t v) { return (v + vm->page_size) & ~(vm->page_size - 1); @@ -334,6 +337,100 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) va_end(ap); } +void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec) +{ + ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec); + while (1) + ; +} + void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) { + struct ucall uc; + + if (get_ucall(vm, vcpuid, &uc) != UCALL_UNHANDLED) + return; + + if (uc.args[2]) /* valid_ec */ { + assert(VECTOR_IS_SYNC(uc.args[0])); + TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)", + uc.args[0], uc.args[1]); + } else { + assert(!VECTOR_IS_SYNC(uc.args[0])); + TEST_FAIL("Unexpected exception (vector:0x%lx)", + uc.args[0]); + } +} + +struct handlers { + handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM]; +}; + +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) +{ + extern char vectors; + + set_reg(vm, vcpuid, ARM64_SYS_REG(VBAR_EL1), (uint64_t)&vectors); +} + +void route_exception(struct ex_regs *regs, int vector) +{ + struct handlers *handlers = (struct handlers *)exception_handlers; + bool valid_ec; + int ec = 0; + + switch (vector) { + case VECTOR_SYNC_CURRENT: + case VECTOR_SYNC_LOWER_64: + ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK; + valid_ec = true; + break; + case VECTOR_IRQ_CURRENT: + case VECTOR_IRQ_LOWER_64: + case VECTOR_FIQ_CURRENT: + case VECTOR_FIQ_LOWER_64: + case VECTOR_ERROR_CURRENT: + case VECTOR_ERROR_LOWER_64: + ec = 0; + valid_ec = false; + break; + default: + valid_ec = false; + goto unexpected_exception; + } + + if (handlers && handlers->exception_handlers[vector][ec]) + return handlers->exception_handlers[vector][ec](regs); + +unexpected_exception: + kvm_exit_unexpected_exception(vector, ec, valid_ec); +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers), + vm->page_size, 0, 0); + + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; +} + +void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, + void (*handler)(struct ex_regs *)) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(VECTOR_IS_SYNC(vector)); + assert(vector < VECTOR_NUM); + assert(ec < ESR_EC_NUM); + handlers->exception_handlers[vector][ec] = handler; +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(!VECTOR_IS_SYNC(vector)); + assert(vector < VECTOR_NUM); + handlers->exception_handlers[vector][0] = handler; } From 4f05223acaeaabe0a1a188e25fab334735d85c5e Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Thu, 10 Jun 2021 18:10:20 -0700 Subject: [PATCH 711/730] KVM: selftests: Add aarch64/debug-exceptions test Covers fundamental tests for debug exceptions. The guest installs and handle its debug exceptions itself, without KVM_SET_GUEST_DEBUG. Signed-off-by: Ricardo Koller Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210611011020.3420067-7-ricarkol@google.com --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/aarch64/debug-exceptions.c | 250 ++++++++++++++++++ .../selftests/kvm/include/aarch64/processor.h | 22 +- 4 files changed, 268 insertions(+), 6 deletions(-) create mode 100644 tools/testing/selftests/kvm/aarch64/debug-exceptions.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 524c857a049c..7e2c66155b06 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +/aarch64/debug-exceptions /aarch64/get-reg-list /aarch64/get-reg-list-sve /aarch64/vgic_init diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a77e6063f7e9..36e4ebcc82f0 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -78,6 +78,7 @@ TEST_GEN_PROGS_x86_64 += memslot_perf_test TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time +TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve TEST_GEN_PROGS_aarch64 += aarch64/vgic_init diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c new file mode 100644 index 000000000000..e5e6c92b60da --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#define VCPU_ID 0 + +#define MDSCR_KDE (1 << 13) +#define MDSCR_MDE (1 << 15) +#define MDSCR_SS (1 << 0) + +#define DBGBCR_LEN8 (0xff << 5) +#define DBGBCR_EXEC (0x0 << 3) +#define DBGBCR_EL1 (0x1 << 1) +#define DBGBCR_E (0x1 << 0) + +#define DBGWCR_LEN8 (0xff << 5) +#define DBGWCR_RD (0x1 << 3) +#define DBGWCR_WR (0x2 << 3) +#define DBGWCR_EL1 (0x1 << 1) +#define DBGWCR_E (0x1 << 0) + +#define SPSR_D (1 << 9) +#define SPSR_SS (1 << 21) + +extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start; +static volatile uint64_t sw_bp_addr, hw_bp_addr; +static volatile uint64_t wp_addr, wp_data_addr; +static volatile uint64_t svc_addr; +static volatile uint64_t ss_addr[4], ss_idx; +#define PC(v) ((uint64_t)&(v)) + +static void reset_debug_state(void) +{ + asm volatile("msr daifset, #8"); + + write_sysreg(osdlr_el1, 0); + write_sysreg(oslar_el1, 0); + isb(); + + write_sysreg(mdscr_el1, 0); + /* This test only uses the first bp and wp slot. */ + write_sysreg(dbgbvr0_el1, 0); + write_sysreg(dbgbcr0_el1, 0); + write_sysreg(dbgwcr0_el1, 0); + write_sysreg(dbgwvr0_el1, 0); + isb(); +} + +static void install_wp(uint64_t addr) +{ + uint32_t wcr; + uint32_t mdscr; + + wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; + write_sysreg(dbgwcr0_el1, wcr); + write_sysreg(dbgwvr0_el1, addr); + isb(); + + asm volatile("msr daifclr, #8"); + + mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; + write_sysreg(mdscr_el1, mdscr); + isb(); +} + +static void install_hw_bp(uint64_t addr) +{ + uint32_t bcr; + uint32_t mdscr; + + bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E; + write_sysreg(dbgbcr0_el1, bcr); + write_sysreg(dbgbvr0_el1, addr); + isb(); + + asm volatile("msr daifclr, #8"); + + mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE; + write_sysreg(mdscr_el1, mdscr); + isb(); +} + +static void install_ss(void) +{ + uint32_t mdscr; + + asm volatile("msr daifclr, #8"); + + mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS; + write_sysreg(mdscr_el1, mdscr); + isb(); +} + +static volatile char write_data; + +static void guest_code(void) +{ + GUEST_SYNC(0); + + /* Software-breakpoint */ + asm volatile("sw_bp: brk #0"); + GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp)); + + GUEST_SYNC(1); + + /* Hardware-breakpoint */ + reset_debug_state(); + install_hw_bp(PC(hw_bp)); + asm volatile("hw_bp: nop"); + GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp)); + + GUEST_SYNC(2); + + /* Hardware-breakpoint + svc */ + reset_debug_state(); + install_hw_bp(PC(bp_svc)); + asm volatile("bp_svc: svc #0"); + GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc)); + GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4); + + GUEST_SYNC(3); + + /* Hardware-breakpoint + software-breakpoint */ + reset_debug_state(); + install_hw_bp(PC(bp_brk)); + asm volatile("bp_brk: brk #0"); + GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk)); + GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk)); + + GUEST_SYNC(4); + + /* Watchpoint */ + reset_debug_state(); + install_wp(PC(write_data)); + write_data = 'x'; + GUEST_ASSERT_EQ(write_data, 'x'); + GUEST_ASSERT_EQ(wp_data_addr, PC(write_data)); + + GUEST_SYNC(5); + + /* Single-step */ + reset_debug_state(); + install_ss(); + ss_idx = 0; + asm volatile("ss_start:\n" + "mrs x0, esr_el1\n" + "add x0, x0, #1\n" + "msr daifset, #8\n" + : : : "x0"); + GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start)); + GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4); + GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8); + + GUEST_DONE(); +} + +static void guest_sw_bp_handler(struct ex_regs *regs) +{ + sw_bp_addr = regs->pc; + regs->pc += 4; +} + +static void guest_hw_bp_handler(struct ex_regs *regs) +{ + hw_bp_addr = regs->pc; + regs->pstate |= SPSR_D; +} + +static void guest_wp_handler(struct ex_regs *regs) +{ + wp_data_addr = read_sysreg(far_el1); + wp_addr = regs->pc; + regs->pstate |= SPSR_D; +} + +static void guest_ss_handler(struct ex_regs *regs) +{ + GUEST_ASSERT_1(ss_idx < 4, ss_idx); + ss_addr[ss_idx++] = regs->pc; + regs->pstate |= SPSR_SS; +} + +static void guest_svc_handler(struct ex_regs *regs) +{ + svc_addr = regs->pc; +} + +static int debug_version(struct kvm_vm *vm) +{ + uint64_t id_aa64dfr0; + + get_reg(vm, VCPU_ID, ARM64_SYS_REG(ID_AA64DFR0_EL1), &id_aa64dfr0); + return id_aa64dfr0 & 0xf; +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct ucall uc; + int stage; + + vm = vm_create_default(VCPU_ID, 0, guest_code); + ucall_init(vm, NULL); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + + if (debug_version(vm) < 6) { + print_skip("Armv8 debug architecture not supported."); + kvm_vm_free(vm); + exit(KSFT_SKIP); + } + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_BRK_INS, guest_sw_bp_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_WP_CURRENT, guest_wp_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_SSTEP_CURRENT, guest_ss_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_SVC64, guest_svc_handler); + + for (stage = 0; stage < 7; stage++) { + vcpu_run(vm, VCPU_ID); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(uc.args[1] == stage, + "Stage %d: Unexpected sync ucall, got %lx", + stage, (ulong)uc.args[1]); + break; + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx", + (const char *)uc.args[0], + __FILE__, uc.args[1], uc.args[2], uc.args[3]); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index b2b3e9d626cb..27dc5c2e56b9 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -14,12 +14,14 @@ #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) -#define CPACR_EL1 3, 0, 1, 0, 2 -#define TCR_EL1 3, 0, 2, 0, 2 -#define MAIR_EL1 3, 0, 10, 2, 0 -#define TTBR0_EL1 3, 0, 2, 0, 0 -#define SCTLR_EL1 3, 0, 1, 0, 0 -#define VBAR_EL1 3, 0, 12, 0, 0 +#define CPACR_EL1 3, 0, 1, 0, 2 +#define TCR_EL1 3, 0, 2, 0, 2 +#define MAIR_EL1 3, 0, 10, 2, 0 +#define TTBR0_EL1 3, 0, 2, 0, 0 +#define SCTLR_EL1 3, 0, 1, 0, 0 +#define VBAR_EL1 3, 0, 12, 0, 0 + +#define ID_AA64DFR0_EL1 3, 0, 0, 5, 0 /* * Default MAIR @@ -98,6 +100,12 @@ enum { #define ESR_EC_SHIFT 26 #define ESR_EC_MASK (ESR_EC_NUM - 1) +#define ESR_EC_SVC64 0x15 +#define ESR_EC_HW_BP_CURRENT 0x31 +#define ESR_EC_SSTEP_CURRENT 0x33 +#define ESR_EC_WP_CURRENT 0x35 +#define ESR_EC_BRK_INS 0x3c + void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); @@ -119,4 +127,6 @@ void vm_install_sync_handler(struct kvm_vm *vm, val; \ }) +#define isb() asm volatile("isb" : : : "memory") + #endif /* SELFTEST_KVM_PROCESSOR_H */ From 6204004de3160900435bdb4b9a2fb8749a9277d2 Mon Sep 17 00:00:00 2001 From: Yanan Wang Date: Thu, 17 Jun 2021 18:58:21 +0800 Subject: [PATCH 712/730] KVM: arm64: Introduce two cache maintenance callbacks To prepare for performing CMOs for guest stage-2 in the fault handlers in pgtable.c, here introduce two cache maintenance callbacks in struct kvm_pgtable_mm_ops. We also adjust the comment alignment for the existing part but make no real content change at all. Reviewed-by: Fuad Tabba Signed-off-by: Yanan Wang [maz: fixed up comments and renamed callbacks] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210617105824.31752-2-wangyanan55@huawei.com --- arch/arm64/include/asm/kvm_pgtable.h | 42 +++++++++++++++++----------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index c3674c47d48c..f004c0115d89 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -27,23 +27,29 @@ typedef u64 kvm_pte_t; /** * struct kvm_pgtable_mm_ops - Memory management callbacks. - * @zalloc_page: Allocate a single zeroed memory page. The @arg parameter - * can be used by the walker to pass a memcache. The - * initial refcount of the page is 1. - * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. The - * @size parameter is in bytes, and is rounded-up to the - * next page boundary. The resulting allocation is - * physically contiguous. - * @free_pages_exact: Free an exact number of memory pages previously - * allocated by zalloc_pages_exact. - * @get_page: Increment the refcount on a page. - * @put_page: Decrement the refcount on a page. When the refcount - * reaches 0 the page is automatically freed. - * @page_count: Return the refcount of a page. - * @phys_to_virt: Convert a physical address into a virtual address mapped - * in the current context. - * @virt_to_phys: Convert a virtual address mapped in the current context - * into a physical address. + * @zalloc_page: Allocate a single zeroed memory page. + * The @arg parameter can be used by the walker + * to pass a memcache. The initial refcount of + * the page is 1. + * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. + * The @size parameter is in bytes, and is rounded + * up to the next page boundary. The resulting + * allocation is physically contiguous. + * @free_pages_exact: Free an exact number of memory pages previously + * allocated by zalloc_pages_exact. + * @get_page: Increment the refcount on a page. + * @put_page: Decrement the refcount on a page. When the + * refcount reaches 0 the page is automatically + * freed. + * @page_count: Return the refcount of a page. + * @phys_to_virt: Convert a physical address into a virtual + * address mapped in the current context. + * @virt_to_phys: Convert a virtual address mapped in the current + * context into a physical address. + * @dcache_clean_inval_poc: Clean and invalidate the data cache to the PoC + * for the specified memory address range. + * @icache_inval_pou: Invalidate the instruction cache to the PoU + * for the specified memory address range. */ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); @@ -54,6 +60,8 @@ struct kvm_pgtable_mm_ops { int (*page_count)(void *addr); void* (*phys_to_virt)(phys_addr_t phys); phys_addr_t (*virt_to_phys)(void *addr); + void (*dcache_clean_inval_poc)(void *addr, size_t size); + void (*icache_inval_pou)(void *addr, size_t size); }; /** From a4d5ca5c7cd8fe85056b8cb838fbcb7e5a05f356 Mon Sep 17 00:00:00 2001 From: Yanan Wang Date: Thu, 17 Jun 2021 18:58:22 +0800 Subject: [PATCH 713/730] KVM: arm64: Introduce mm_ops member for structure stage2_attr_data Also add a mm_ops member for structure stage2_attr_data, since we will move I-cache maintenance for guest stage-2 to the permission path and as a result will need mm_ops for some callbacks. Reviewed-by: Fuad Tabba Signed-off-by: Yanan Wang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210617105824.31752-3-wangyanan55@huawei.com --- arch/arm64/kvm/hyp/pgtable.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c37c1dc4feaf..d99789432b05 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -861,10 +861,11 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) } struct stage2_attr_data { - kvm_pte_t attr_set; - kvm_pte_t attr_clr; - kvm_pte_t pte; - u32 level; + kvm_pte_t attr_set; + kvm_pte_t attr_clr; + kvm_pte_t pte; + u32 level; + struct kvm_pgtable_mm_ops *mm_ops; }; static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, @@ -903,6 +904,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, struct stage2_attr_data data = { .attr_set = attr_set & attr_mask, .attr_clr = attr_clr & attr_mask, + .mm_ops = pgt->mm_ops, }; struct kvm_pgtable_walker walker = { .cb = stage2_attr_walker, From 378e6a9c78a02b4b609846aa0afccf34d3038977 Mon Sep 17 00:00:00 2001 From: Yanan Wang Date: Thu, 17 Jun 2021 18:58:23 +0800 Subject: [PATCH 714/730] KVM: arm64: Tweak parameters of guest cache maintenance functions Adjust the parameter "kvm_pfn_t pfn" of __clean_dcache_guest_page and __invalidate_icache_guest_page to "void *va", which paves the way for converting these two guest CMO functions into callbacks in structure kvm_pgtable_mm_ops. No functional change. Reviewed-by: Fuad Tabba Signed-off-by: Yanan Wang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210617105824.31752-4-wangyanan55@huawei.com --- arch/arm64/include/asm/kvm_mmu.h | 9 ++------- arch/arm64/kvm/mmu.c | 28 +++++++++++++++------------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 25ed956f9af1..6844a7550392 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -187,10 +187,8 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; } -static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) +static inline void __clean_dcache_guest_page(void *va, size_t size) { - void *va = page_address(pfn_to_page(pfn)); - /* * With FWB, we ensure that the guest always accesses memory using * cacheable attributes, and we don't have to clean to PoC when @@ -203,16 +201,13 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) kvm_flush_dcache_to_poc(va, size); } -static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, - unsigned long size) +static inline void __invalidate_icache_guest_page(void *va, size_t size) { if (icache_is_aliasing()) { /* any kind of VIPT cache */ __flush_icache_all(); } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ - void *va = page_address(pfn_to_page(pfn)); - invalidate_icache_range((unsigned long)va, (unsigned long)va + size); } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c10207fed2f3..0a5a5b098a4a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -126,6 +126,16 @@ static void *kvm_host_va(phys_addr_t phys) return __va(phys); } +static void clean_dcache_guest_page(void *va, size_t size) +{ + __clean_dcache_guest_page(va, size); +} + +static void invalidate_icache_guest_page(void *va, size_t size) +{ + __invalidate_icache_guest_page(va, size); +} + /* * Unmapping vs dcache management: * @@ -693,16 +703,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } -static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) -{ - __clean_dcache_guest_page(pfn, size); -} - -static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) -{ - __invalidate_icache_guest_page(pfn, size); -} - static void kvm_send_hwpoison_signal(unsigned long address, short lsb) { send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); @@ -972,11 +972,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, prot |= KVM_PGTABLE_PROT_W; if (fault_status != FSC_PERM && !device) - clean_dcache_guest_page(pfn, vma_pagesize); + clean_dcache_guest_page(page_address(pfn_to_page(pfn)), + vma_pagesize); if (exec_fault) { prot |= KVM_PGTABLE_PROT_X; - invalidate_icache_guest_page(pfn, vma_pagesize); + invalidate_icache_guest_page(page_address(pfn_to_page(pfn)), + vma_pagesize); } if (device) @@ -1178,7 +1180,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) * We've moved a page around, probably through CoW, so let's treat it * just like a translation fault and clean the cache to the PoC. */ - clean_dcache_guest_page(pfn, PAGE_SIZE); + clean_dcache_guest_page(page_address(pfn_to_page(pfn)), PAGE_SIZE); /* * The MMU notifiers will have unmapped a huge PMD before calling From 25aa28691bb960a76f0cffd8862144a29487f6ff Mon Sep 17 00:00:00 2001 From: Yanan Wang Date: Thu, 17 Jun 2021 18:58:24 +0800 Subject: [PATCH 715/730] KVM: arm64: Move guest CMOs to the fault handlers We currently uniformly perform CMOs of D-cache and I-cache in function user_mem_abort before calling the fault handlers. If we get concurrent guest faults(e.g. translation faults, permission faults) or some really unnecessary guest faults caused by BBM, CMOs for the first vcpu are necessary while the others later are not. By moving CMOs to the fault handlers, we can easily identify conditions where they are really needed and avoid the unnecessary ones. As it's a time consuming process to perform CMOs especially when flushing a block range, so this solution reduces much load of kvm and improve efficiency of the stage-2 page table code. We can imagine two specific scenarios which will gain much benefit: 1) In a normal VM startup, this solution will improve the efficiency of handling guest page faults incurred by vCPUs, when initially populating stage-2 page tables. 2) After live migration, the heavy workload will be resumed on the destination VM, however all the stage-2 page tables need to be rebuilt at the moment. So this solution will ease the performance drop during resuming stage. Reviewed-by: Fuad Tabba Signed-off-by: Yanan Wang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210617105824.31752-5-wangyanan55@huawei.com --- arch/arm64/kvm/hyp/pgtable.c | 38 +++++++++++++++++++++++++++++------- arch/arm64/kvm/mmu.c | 21 +++++++------------- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index d99789432b05..72f1d8f50094 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -577,12 +577,24 @@ static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr, mm_ops->put_page(ptep); } +static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) +{ + u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; + return memattr == KVM_S2_MEMATTR(pgt, NORMAL); +} + +static bool stage2_pte_executable(kvm_pte_t pte) +{ + return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); +} + static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) { kvm_pte_t new, old = *ptep; u64 granule = kvm_granule_size(level), phys = data->phys; + struct kvm_pgtable *pgt = data->mmu->pgt; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_block_mapping_supported(addr, end, phys, level)) @@ -606,6 +618,14 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); } + /* Perform CMOs before installation of the guest stage-2 PTE */ + if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new)) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops), + granule); + + if (mm_ops->icache_inval_pou && stage2_pte_executable(new)) + mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule); + smp_store_release(ptep, new); if (stage2_pte_is_counted(new)) mm_ops->get_page(ptep); @@ -798,12 +818,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } -static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) -{ - u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; - return memattr == KVM_S2_MEMATTR(pgt, NORMAL); -} - static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) @@ -874,6 +888,7 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, { kvm_pte_t pte = *ptep; struct stage2_attr_data *data = arg; + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_pte_valid(pte)) return 0; @@ -888,8 +903,17 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * but worst-case the access flag update gets lost and will be * set on the next access instead. */ - if (data->pte != pte) + if (data->pte != pte) { + /* + * Invalidate instruction cache before updating the guest + * stage-2 PTE if we are going to add executable permission. + */ + if (mm_ops->icache_inval_pou && + stage2_pte_executable(pte) && !stage2_pte_executable(*ptep)) + mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); WRITE_ONCE(*ptep, pte); + } return 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0a5a5b098a4a..0b3ba57849f6 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -442,6 +442,8 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { .page_count = kvm_host_page_count, .phys_to_virt = kvm_host_va, .virt_to_phys = kvm_host_pa, + .dcache_clean_inval_poc = clean_dcache_guest_page, + .icache_inval_pou = invalidate_icache_guest_page, }; /** @@ -971,15 +973,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (writable) prot |= KVM_PGTABLE_PROT_W; - if (fault_status != FSC_PERM && !device) - clean_dcache_guest_page(page_address(pfn_to_page(pfn)), - vma_pagesize); - - if (exec_fault) { + if (exec_fault) prot |= KVM_PGTABLE_PROT_X; - invalidate_icache_guest_page(page_address(pfn_to_page(pfn)), - vma_pagesize); - } if (device) prot |= KVM_PGTABLE_PROT_DEVICE; @@ -1177,12 +1172,10 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) WARN_ON(range->end - range->start != 1); /* - * We've moved a page around, probably through CoW, so let's treat it - * just like a translation fault and clean the cache to the PoC. - */ - clean_dcache_guest_page(page_address(pfn_to_page(pfn)), PAGE_SIZE); - - /* + * We've moved a page around, probably through CoW, so let's treat + * it just like a translation fault and the map handler will clean + * the cache to the PoC. + * * The MMU notifiers will have unmapped a huge PMD before calling * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and * therefore we never need to clear out a huge PMD through this From 2a71fabf6a1bc9162a84e18d6ab991230ca4d588 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Fri, 18 Jun 2021 11:51:39 +0100 Subject: [PATCH 716/730] KVM: arm64: Don't zero the cycle count register when PMCR_EL0.P is set According to ARM DDI 0487G.a, page D13-3895, setting the PMCR_EL0.P bit to 1 has the following effect: "Reset all event counters accessible in the current Exception level, not including PMCCNTR_EL0, to zero." Similar behaviour is described for AArch32 on page G8-7022. Make it so. Fixes: c01d6a18023b ("KVM: arm64: pmu: Only handle supported event counters") Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210618105139.83795-1-alexandru.elisei@arm.com --- arch/arm64/kvm/pmu-emul.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index fd167d4f4215..ecc0d19c8cc1 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -578,6 +578,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); if (val & ARMV8_PMU_PMCR_P) { + mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); for_each_set_bit(i, &mask, 32) kvm_pmu_set_counter_value(vcpu, i, 0); } From d0c94c49792cf780cbfefe29f81bb8c3b73bc76b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 3 Jun 2021 16:50:02 +0100 Subject: [PATCH 717/730] KVM: arm64: Restore PMU configuration on first run Restoring a guest with an active virtual PMU results in no perf counters being instanciated on the host side. Not quite what you'd expect from a restore. In order to fix this, force a writeback of PMCR_EL0 on the first run of a vcpu (using a new request so that it happens once the vcpu has been loaded). This will in turn create all the host-side counters that were missing. Reported-by: Jinank Jain Tested-by: Jinank Jain Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/87wnrbylxv.wl-maz@kernel.org Link: https://lore.kernel.org/r/b53dfcf9bbc4db7f96154b1cd5188d72b9766358.camel@amazon.de --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 4 ++++ arch/arm64/kvm/pmu-emul.c | 3 +++ 3 files changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7cd7d5c8c4bc..6336b4309114 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,7 @@ #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) +#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e720148232a0..facf4d41d32a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -689,6 +689,10 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) vgic_v4_load(vcpu); preempt_enable(); } + + if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu)) + kvm_pmu_handle_pmcr(vcpu, + __vcpu_sys_reg(vcpu, PMCR_EL0)); } } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index ecc0d19c8cc1..f33825c995cb 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -851,6 +851,9 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) return -EINVAL; } + /* One-off reload of the PMU on first run */ + kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); + return 0; } From 2f9ace5d4557f8ceea07969d6214c320f5e50c0c Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 31 May 2021 12:33:40 +0200 Subject: [PATCH 718/730] KVM: arm64: selftests: get-reg-list: Introduce vcpu configs We already break register lists into sublists that get selected based on vcpu config. However, since we only had two configs (vregs and sve), we didn't structure the code very well to manage them. Restructure it now to more cleanly handle register sublists that are dependent on the vcpu config. This patch has no intended functional change (except for the vcpu config name now being prepended to all output). Signed-off-by: Andrew Jones Reviewed-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210531103344.29325-2-drjones@redhat.com --- .../selftests/kvm/aarch64/get-reg-list.c | 265 ++++++++++++------ 1 file changed, 175 insertions(+), 90 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 486932164cf2..7bb09ce20dde 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -37,7 +37,30 @@ #define reg_list_sve() (false) #endif -#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) +static struct kvm_reg_list *reg_list; +static __u64 *blessed_reg, blessed_n; + +struct reg_sublist { + const char *name; + long capability; + int feature; + bool finalize; + __u64 *regs; + __u64 regs_n; + __u64 *rejects_set; + __u64 rejects_set_n; +}; + +struct vcpu_config { + char *name; + struct reg_sublist sublists[]; +}; + +static struct vcpu_config vregs_config; +static struct vcpu_config sve_config; + +#define for_each_sublist(c, s) \ + for ((s) = &(c)->sublists[0]; (s)->regs; ++(s)) #define for_each_reg(i) \ for ((i) = 0; (i) < reg_list->n; ++(i)) @@ -54,12 +77,41 @@ for_each_reg_filtered(i) \ if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) +static const char *config_name(struct vcpu_config *c) +{ + struct reg_sublist *s; + int len = 0; -static struct kvm_reg_list *reg_list; + if (c->name) + return c->name; -static __u64 base_regs[], vregs[], sve_regs[], rejects_set[]; -static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n; -static __u64 *blessed_reg, blessed_n; + for_each_sublist(c, s) + len += strlen(s->name) + 1; + + c->name = malloc(len); + + len = 0; + for_each_sublist(c, s) { + if (!strcmp(s->name, "base")) + continue; + strcat(c->name + len, s->name); + len += strlen(s->name) + 1; + c->name[len - 1] = '+'; + } + c->name[len - 1] = '\0'; + + return c->name; +} + +static bool has_cap(struct vcpu_config *c, long capability) +{ + struct reg_sublist *s; + + for_each_sublist(c, s) + if (s->capability == capability) + return true; + return false; +} static bool filter_reg(__u64 reg) { @@ -96,11 +148,13 @@ static const char *str_with_index(const char *template, __u64 index) return (const char *)str; } +#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) + #define CORE_REGS_XX_NR_WORDS 2 #define CORE_SPSR_XX_NR_WORDS 2 #define CORE_FPREGS_XX_NR_WORDS 4 -static const char *core_id_to_str(__u64 id) +static const char *core_id_to_str(struct vcpu_config *c, __u64 id) { __u64 core_off = id & ~REG_MASK, idx; @@ -111,7 +165,7 @@ static const char *core_id_to_str(__u64 id) case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... KVM_REG_ARM_CORE_REG(regs.regs[30]): idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS; - TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx); + TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", config_name(c), idx); return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx); case KVM_REG_ARM_CORE_REG(regs.sp): return "KVM_REG_ARM_CORE_REG(regs.sp)"; @@ -126,12 +180,12 @@ static const char *core_id_to_str(__u64 id) case KVM_REG_ARM_CORE_REG(spsr[0]) ... KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS; - TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx); + TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", config_name(c), idx); return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx); case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS; - TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx); + TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", config_name(c), idx); return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx); case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)"; @@ -139,11 +193,11 @@ static const char *core_id_to_str(__u64 id) return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)"; } - TEST_FAIL("Unknown core reg id: 0x%llx", id); + TEST_FAIL("%s: Unknown core reg id: 0x%llx", config_name(c), id); return NULL; } -static const char *sve_id_to_str(__u64 id) +static const char *sve_id_to_str(struct vcpu_config *c, __u64 id) { __u64 sve_off, n, i; @@ -153,37 +207,37 @@ static const char *sve_id_to_str(__u64 id) sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1)); i = id & (KVM_ARM64_SVE_MAX_SLICES - 1); - TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id); + TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", config_name(c), id); switch (sve_off) { case KVM_REG_ARM64_SVE_ZREG_BASE ... KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1: n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1); TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0), - "Unexpected bits set in SVE ZREG id: 0x%llx", id); + "%s: Unexpected bits set in SVE ZREG id: 0x%llx", config_name(c), id); return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n); case KVM_REG_ARM64_SVE_PREG_BASE ... KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1: n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1); TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0), - "Unexpected bits set in SVE PREG id: 0x%llx", id); + "%s: Unexpected bits set in SVE PREG id: 0x%llx", config_name(c), id); return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n); case KVM_REG_ARM64_SVE_FFR_BASE: TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0), - "Unexpected bits set in SVE FFR id: 0x%llx", id); + "%s: Unexpected bits set in SVE FFR id: 0x%llx", config_name(c), id); return "KVM_REG_ARM64_SVE_FFR(0)"; } return NULL; } -static void print_reg(__u64 id) +static void print_reg(struct vcpu_config *c, __u64 id) { unsigned op0, op1, crn, crm, op2; const char *reg_size = NULL; TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64, - "KVM_REG_ARM64 missing in reg id: 0x%llx", id); + "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", config_name(c), id); switch (id & KVM_REG_SIZE_MASK) { case KVM_REG_SIZE_U8: @@ -214,17 +268,17 @@ static void print_reg(__u64 id) reg_size = "KVM_REG_SIZE_U2048"; break; default: - TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx", - (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx", + config_name(c), (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); } switch (id & KVM_REG_ARM_COPROC_MASK) { case KVM_REG_ARM_CORE: - printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id)); + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(c, id)); break; case KVM_REG_ARM_DEMUX: TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)), - "Unexpected bits set in DEMUX reg id: 0x%llx", id); + "%s: Unexpected bits set in DEMUX reg id: 0x%llx", config_name(c), id); printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n", reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK); break; @@ -235,23 +289,23 @@ static void print_reg(__u64 id) crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT; op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT; TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2), - "Unexpected bits set in SYSREG reg id: 0x%llx", id); + "%s: Unexpected bits set in SYSREG reg id: 0x%llx", config_name(c), id); printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2); break; case KVM_REG_ARM_FW: TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff), - "Unexpected bits set in FW reg id: 0x%llx", id); + "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id); printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); break; case KVM_REG_ARM64_SVE: - if (reg_list_sve()) - printf("\t%s,\n", sve_id_to_str(id)); + if (has_cap(c, KVM_CAP_ARM_SVE)) + printf("\t%s,\n", sve_id_to_str(c, id)); else - TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); + TEST_FAIL("%s: KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", config_name(c), id); break; default: - TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx", - (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); + TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx", + config_name(c), (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); } } @@ -312,40 +366,51 @@ static void core_reg_fixup(void) reg_list = tmp; } -static void prepare_vcpu_init(struct kvm_vcpu_init *init) +static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init) { - if (reg_list_sve()) - init->features[0] |= 1 << KVM_ARM_VCPU_SVE; + struct reg_sublist *s; + + for_each_sublist(c, s) + if (s->capability) + init->features[s->feature / 32] |= 1 << (s->feature % 32); } -static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid, struct vcpu_config *c) { + struct reg_sublist *s; int feature; - if (reg_list_sve()) { - feature = KVM_ARM_VCPU_SVE; - vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature); + for_each_sublist(c, s) { + if (s->finalize) { + feature = s->feature; + vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature); + } } } -static void check_supported(void) +static void check_supported(struct vcpu_config *c) { - if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) { - fprintf(stderr, "SVE not available, skipping tests\n"); - exit(KSFT_SKIP); + struct reg_sublist *s; + + for_each_sublist(c, s) { + if (s->capability && !kvm_check_cap(s->capability)) { + fprintf(stderr, "%s: %s not available, skipping tests\n", config_name(c), s->name); + exit(KSFT_SKIP); + } } } int main(int ac, char **av) { + struct vcpu_config *c = reg_list_sve() ? &sve_config : &vregs_config; struct kvm_vcpu_init init = { .target = -1, }; - int new_regs = 0, missing_regs = 0, i; + int new_regs = 0, missing_regs = 0, i, n; int failed_get = 0, failed_set = 0, failed_reject = 0; bool print_list = false, print_filtered = false, fixup_core_regs = false; struct kvm_vm *vm; - __u64 *vec_regs; + struct reg_sublist *s; - check_supported(); + check_supported(c); for (i = 1; i < ac; ++i) { if (strcmp(av[i], "--core-reg-fixup") == 0) @@ -359,9 +424,9 @@ int main(int ac, char **av) } vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); - prepare_vcpu_init(&init); + prepare_vcpu_init(c, &init); aarch64_vcpu_add_default(vm, 0, &init, NULL); - finalize_vcpu(vm, 0); + finalize_vcpu(vm, 0, c); reg_list = vcpu_get_reg_list(vm, 0); @@ -374,7 +439,7 @@ int main(int ac, char **av) __u64 id = reg_list->reg[i]; if ((print_list && !filter_reg(id)) || (print_filtered && filter_reg(id))) - print_reg(id); + print_reg(c, id); } putchar('\n'); return 0; @@ -396,50 +461,52 @@ int main(int ac, char **av) .id = reg_list->reg[i], .addr = (__u64)&addr, }; + bool reject_reg = false; int ret; ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, ®); if (ret) { - puts("Failed to get "); - print_reg(reg.id); + printf("%s: Failed to get ", config_name(c)); + print_reg(c, reg.id); putchar('\n'); ++failed_get; } /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */ - if (find_reg(rejects_set, rejects_set_n, reg.id)) { - ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); - if (ret != -1 || errno != EPERM) { - printf("Failed to reject (ret=%d, errno=%d) ", ret, errno); - print_reg(reg.id); - putchar('\n'); - ++failed_reject; + for_each_sublist(c, s) { + if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) { + reject_reg = true; + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret != -1 || errno != EPERM) { + printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno); + print_reg(c, reg.id); + putchar('\n'); + ++failed_reject; + } + break; } - continue; } - ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); - if (ret) { - puts("Failed to set "); - print_reg(reg.id); - putchar('\n'); - ++failed_set; + if (!reject_reg) { + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret) { + printf("%s: Failed to set ", config_name(c)); + print_reg(c, reg.id); + putchar('\n'); + ++failed_set; + } } } - if (reg_list_sve()) { - blessed_n = base_regs_n + sve_regs_n; - vec_regs = sve_regs; - } else { - blessed_n = base_regs_n + vregs_n; - vec_regs = vregs; - } - + for_each_sublist(c, s) + blessed_n += s->regs_n; blessed_reg = calloc(blessed_n, sizeof(__u64)); - for (i = 0; i < base_regs_n; ++i) - blessed_reg[i] = base_regs[i]; - for (i = 0; i < blessed_n - base_regs_n; ++i) - blessed_reg[base_regs_n + i] = vec_regs[i]; + + n = 0; + for_each_sublist(c, s) { + for (i = 0; i < s->regs_n; ++i) + blessed_reg[n++] = s->regs[i]; + } for_each_new_reg(i) ++new_regs; @@ -448,31 +515,31 @@ int main(int ac, char **av) ++missing_regs; if (new_regs || missing_regs) { - printf("Number blessed registers: %5lld\n", blessed_n); - printf("Number registers: %5lld\n", reg_list->n); + printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n); + printf("%s: Number registers: %5lld\n", config_name(c), reg_list->n); } if (new_regs) { - printf("\nThere are %d new registers.\n" + printf("\n%s: There are %d new registers.\n" "Consider adding them to the blessed reg " - "list with the following lines:\n\n", new_regs); + "list with the following lines:\n\n", config_name(c), new_regs); for_each_new_reg(i) - print_reg(reg_list->reg[i]); + print_reg(c, reg_list->reg[i]); putchar('\n'); } if (missing_regs) { - printf("\nThere are %d missing registers.\n" - "The following lines are missing registers:\n\n", missing_regs); + printf("\n%s: There are %d missing registers.\n" + "The following lines are missing registers:\n\n", config_name(c), missing_regs); for_each_missing_reg(i) - print_reg(blessed_reg[i]); + print_reg(c, blessed_reg[i]); putchar('\n'); } TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, - "There are %d missing registers; " + "%s: There are %d missing registers; " "%d registers failed get; %d registers failed set; %d registers failed reject", - missing_regs, failed_get, failed_set, failed_reject); + config_name(c), missing_regs, failed_get, failed_set, failed_reject); return 0; } @@ -761,7 +828,6 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ }; -static __u64 base_regs_n = ARRAY_SIZE(base_regs); static __u64 vregs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), @@ -797,7 +863,6 @@ static __u64 vregs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), }; -static __u64 vregs_n = ARRAY_SIZE(vregs); static __u64 sve_regs[] = { KVM_REG_ARM64_SVE_VLS, @@ -852,11 +917,31 @@ static __u64 sve_regs[] = { KVM_REG_ARM64_SVE_FFR(0), ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */ }; -static __u64 sve_regs_n = ARRAY_SIZE(sve_regs); -static __u64 rejects_set[] = { -#ifdef REG_LIST_SVE +static __u64 sve_rejects_set[] = { KVM_REG_ARM64_SVE_VLS, -#endif }; -static __u64 rejects_set_n = ARRAY_SIZE(rejects_set); + +#define BASE_SUBLIST \ + { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } +#define VREGS_SUBLIST \ + { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), } +#define SVE_SUBLIST \ + { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \ + .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \ + .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), } + +static struct vcpu_config vregs_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; +static struct vcpu_config sve_config = { + .sublists = { + BASE_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; From 94e9223c06bece9165a36f0f56bac3552a45cbfc Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 31 May 2021 12:33:41 +0200 Subject: [PATCH 719/730] KVM: arm64: selftests: get-reg-list: Prepare to run multiple configs at once We don't want to have to create a new binary for each vcpu config, so prepare to run the test for multiple vcpu configs in a single binary. We do this by factoring out the test from main() and then looping over configs. When given '--list' we still never print more than a single reg-list for a single vcpu config though, because it would be confusing otherwise. No functional change intended. Signed-off-by: Andrew Jones Reviewed-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210531103344.29325-3-drjones@redhat.com --- .../selftests/kvm/aarch64/get-reg-list.c | 68 ++++++++++++++----- 1 file changed, 51 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 7bb09ce20dde..14fc8d82e30f 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -56,8 +56,8 @@ struct vcpu_config { struct reg_sublist sublists[]; }; -static struct vcpu_config vregs_config; -static struct vcpu_config sve_config; +static struct vcpu_config *vcpu_configs[]; +static int vcpu_configs_n; #define for_each_sublist(c, s) \ for ((s) = &(c)->sublists[0]; (s)->regs; ++(s)) @@ -400,29 +400,20 @@ static void check_supported(struct vcpu_config *c) } } -int main(int ac, char **av) +static bool print_list; +static bool print_filtered; +static bool fixup_core_regs; + +static void run_test(struct vcpu_config *c) { - struct vcpu_config *c = reg_list_sve() ? &sve_config : &vregs_config; struct kvm_vcpu_init init = { .target = -1, }; int new_regs = 0, missing_regs = 0, i, n; int failed_get = 0, failed_set = 0, failed_reject = 0; - bool print_list = false, print_filtered = false, fixup_core_regs = false; struct kvm_vm *vm; struct reg_sublist *s; check_supported(c); - for (i = 1; i < ac; ++i) { - if (strcmp(av[i], "--core-reg-fixup") == 0) - fixup_core_regs = true; - else if (strcmp(av[i], "--list") == 0) - print_list = true; - else if (strcmp(av[i], "--list-filtered") == 0) - print_filtered = true; - else - TEST_FAIL("Unknown option: %s\n", av[i]); - } - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); prepare_vcpu_init(c, &init); aarch64_vcpu_add_default(vm, 0, &init, NULL); @@ -442,7 +433,7 @@ int main(int ac, char **av) print_reg(c, id); } putchar('\n'); - return 0; + return; } /* @@ -541,6 +532,44 @@ int main(int ac, char **av) "%d registers failed get; %d registers failed set; %d registers failed reject", config_name(c), missing_regs, failed_get, failed_set, failed_reject); + pr_info("%s: PASS\n", config_name(c)); + blessed_n = 0; + free(blessed_reg); + free(reg_list); + kvm_vm_free(vm); +} + +int main(int ac, char **av) +{ + struct vcpu_config *c, *sel = NULL; + int i; + + for (i = 1; i < ac; ++i) { + if (strcmp(av[i], "--core-reg-fixup") == 0) + fixup_core_regs = true; + else if (strcmp(av[i], "--list") == 0) + print_list = true; + else if (strcmp(av[i], "--list-filtered") == 0) + print_filtered = true; + else + TEST_FAIL("Unknown option: %s\n", av[i]); + } + + if (print_list || print_filtered) { + /* + * We only want to print the register list of a single config. + * TODO: Add command line support to pick which config. + */ + sel = vcpu_configs[0]; + } + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + if (sel && c != sel) + continue; + run_test(c); + } + return 0; } @@ -945,3 +974,8 @@ static struct vcpu_config sve_config = { {0}, }, }; + +static struct vcpu_config *vcpu_configs[] = { + reg_list_sve() ? &sve_config : &vregs_config, +}; +static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); From f3032fcc9cf065733ce9a50057aaeffd6c464e2e Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 31 May 2021 12:33:42 +0200 Subject: [PATCH 720/730] KVM: arm64: selftests: get-reg-list: Provide config selection option Add a new command line option that allows the user to select a specific configuration, e.g. --config=sve will give the sve config. Also provide help text and the --help/-h options. Signed-off-by: Andrew Jones Reviewed-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210531103344.29325-4-drjones@redhat.com --- .../selftests/kvm/aarch64/get-reg-list.c | 56 ++++++++++++++++++- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 14fc8d82e30f..03e041d97a18 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -539,6 +539,52 @@ static void run_test(struct vcpu_config *c) kvm_vm_free(vm); } +static void help(void) +{ + struct vcpu_config *c; + int i; + + printf( + "\n" + "usage: get-reg-list [--config=] [--list] [--list-filtered] [--core-reg-fixup]\n\n" + " --config= Used to select a specific vcpu configuration for the test/listing\n" + " '' may be\n"); + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + printf( + " '%s'\n", config_name(c)); + } + + printf( + "\n" + " --list Print the register list rather than test it (requires --config)\n" + " --list-filtered Print registers that would normally be filtered out (requires --config)\n" + " --core-reg-fixup Needed when running on old kernels with broken core reg listings\n" + "\n" + ); +} + +static struct vcpu_config *parse_config(const char *config) +{ + struct vcpu_config *c; + int i; + + if (config[8] != '=') + help(), exit(1); + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + if (strcmp(config_name(c), &config[9]) == 0) + break; + } + + if (i == vcpu_configs_n) + help(), exit(1); + + return c; +} + int main(int ac, char **av) { struct vcpu_config *c, *sel = NULL; @@ -547,20 +593,24 @@ int main(int ac, char **av) for (i = 1; i < ac; ++i) { if (strcmp(av[i], "--core-reg-fixup") == 0) fixup_core_regs = true; + else if (strncmp(av[i], "--config", 8) == 0) + sel = parse_config(av[i]); else if (strcmp(av[i], "--list") == 0) print_list = true; else if (strcmp(av[i], "--list-filtered") == 0) print_filtered = true; + else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0) + help(), exit(0); else - TEST_FAIL("Unknown option: %s\n", av[i]); + help(), exit(1); } if (print_list || print_filtered) { /* * We only want to print the register list of a single config. - * TODO: Add command line support to pick which config. */ - sel = vcpu_configs[0]; + if (!sel) + help(), exit(1); } for (i = 0; i < vcpu_configs_n; ++i) { From 32edd2290889d0cd0751dd11853e5a368188066d Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 31 May 2021 12:33:43 +0200 Subject: [PATCH 721/730] KVM: arm64: selftests: get-reg-list: Remove get-reg-list-sve Now that we can easily run the test for multiple vcpu configs, let's merge get-reg-list and get-reg-list-sve into just get-reg-list. We also add a final change to make it more possible to run multiple tests, which is to fork the test, rather than directly run it. That allows a test to fail, but subsequent tests can still run. Signed-off-by: Andrew Jones Reviewed-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210531103344.29325-5-drjones@redhat.com --- tools/testing/selftests/kvm/.gitignore | 1 - tools/testing/selftests/kvm/Makefile | 1 - .../selftests/kvm/aarch64/get-reg-list-sve.c | 3 -- .../selftests/kvm/aarch64/get-reg-list.c | 31 +++++++++++++------ 4 files changed, 21 insertions(+), 15 deletions(-) delete mode 100644 tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 524c857a049c..dd36575b732a 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only /aarch64/get-reg-list -/aarch64/get-reg-list-sve /aarch64/vgic_init /s390x/memop /s390x/resets diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index daaee1888b12..5c8f3725a7f0 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -79,7 +79,6 @@ TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list -TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c deleted file mode 100644 index efba76682b4b..000000000000 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c +++ /dev/null @@ -1,3 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define REG_LIST_SVE -#include "get-reg-list.c" diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 03e041d97a18..b46b8a1fdc0c 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -27,16 +27,13 @@ #include #include #include +#include +#include +#include #include "kvm_util.h" #include "test_util.h" #include "processor.h" -#ifdef REG_LIST_SVE -#define reg_list_sve() (true) -#else -#define reg_list_sve() (false) -#endif - static struct kvm_reg_list *reg_list; static __u64 *blessed_reg, blessed_n; @@ -588,7 +585,8 @@ static struct vcpu_config *parse_config(const char *config) int main(int ac, char **av) { struct vcpu_config *c, *sel = NULL; - int i; + int i, ret = 0; + pid_t pid; for (i = 1; i < ac; ++i) { if (strcmp(av[i], "--core-reg-fixup") == 0) @@ -617,10 +615,22 @@ int main(int ac, char **av) c = vcpu_configs[i]; if (sel && c != sel) continue; - run_test(c); + + pid = fork(); + + if (!pid) { + run_test(c); + exit(0); + } else { + int wstatus; + pid_t wpid = wait(&wstatus); + TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return"); + if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP) + ret = KSFT_FAIL; + } } - return 0; + return ret; } /* @@ -1026,6 +1036,7 @@ static struct vcpu_config sve_config = { }; static struct vcpu_config *vcpu_configs[] = { - reg_list_sve() ? &sve_config : &vregs_config, + &vregs_config, + &sve_config, }; static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); From 313673bad871750c0c829def53d037868af75b67 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 31 May 2021 12:33:44 +0200 Subject: [PATCH 722/730] KVM: arm64: selftests: get-reg-list: Split base and pmu registers Since KVM commit 11663111cd49 ("KVM: arm64: Hide PMU registers from userspace when not available") the get-reg-list* tests have been failing with ... ... There are 74 missing registers. The following lines are missing registers: ... where the 74 missing registers are all PMU registers. This isn't a bug in KVM that the selftest found, even though it's true that a KVM userspace that wasn't setting the KVM_ARM_VCPU_PMU_V3 VCPU flag, but still expecting the PMU registers to be in the reg-list, would suddenly no longer have their expectations met. In that case, the expectations were wrong, though, so that KVM userspace needs to be fixed, and so does this selftest. The fix for this selftest is to pull the PMU registers out of the base register sublist into their own sublist and then create new, pmu-enabled vcpu configs which can be tested. Signed-off-by: Andrew Jones Reviewed-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210531103344.29325-6-drjones@redhat.com --- .../selftests/kvm/aarch64/get-reg-list.c | 39 +++++++++++++++---- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index b46b8a1fdc0c..a16c8f05366c 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -637,7 +637,7 @@ int main(int ac, char **av) * The current blessed list was primed with the output of kernel version * v4.15 with --core-reg-fixup and then later updated with new registers. * - * The blessed list is up to date with kernel version v5.10-rc5 + * The blessed list is up to date with kernel version v5.13-rc3 */ static __u64 base_regs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), @@ -829,8 +829,6 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */ ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */ ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */ - ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */ - ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ @@ -839,6 +837,16 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ + ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ +}; + +static __u64 pmu_regs[] = { + ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */ ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */ ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */ ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */ @@ -848,8 +856,6 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */ ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */ ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */ - ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ - ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ ARM64_SYS_REG(3, 3, 14, 8, 0), ARM64_SYS_REG(3, 3, 14, 8, 1), ARM64_SYS_REG(3, 3, 14, 8, 2), @@ -913,9 +919,6 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 3, 14, 15, 5), ARM64_SYS_REG(3, 3, 14, 15, 6), ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */ - ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ - ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ - ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ }; static __u64 vregs[] = { @@ -1015,6 +1018,8 @@ static __u64 sve_rejects_set[] = { { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } #define VREGS_SUBLIST \ { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), } +#define PMU_SUBLIST \ + { "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } #define SVE_SUBLIST \ { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \ .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \ @@ -1027,6 +1032,14 @@ static struct vcpu_config vregs_config = { {0}, }, }; +static struct vcpu_config vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; static struct vcpu_config sve_config = { .sublists = { BASE_SUBLIST, @@ -1034,9 +1047,19 @@ static struct vcpu_config sve_config = { {0}, }, }; +static struct vcpu_config sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; static struct vcpu_config *vcpu_configs[] = { &vregs_config, + &vregs_pmu_config, &sve_config, + &sve_pmu_config, }; static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); From b356a831088730a3ef36848cd9f2d62dcac392bf Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 22 Jun 2021 08:46:44 +0100 Subject: [PATCH 723/730] KVM: arm64: Update MAINTAINERS to include selftests As the KVM/arm64 selftests are routed via the kvmarm tree, add the relevant references to the MAINTAINERS file. Suggested-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210622070732.zod7gaqhqo344vg6@gator --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 503fd21901f1..b9d5999253c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9993,6 +9993,8 @@ F: arch/arm64/include/asm/kvm* F: arch/arm64/include/uapi/asm/kvm* F: arch/arm64/kvm/ F: include/kvm/arm_* +F: tools/testing/selftests/kvm/*/aarch64/ +F: tools/testing/selftests/kvm/aarch64/ KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips) M: Huacai Chen From 69e3b846d8a753f9f279f29531ca56b0f7563ad0 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:11 +0100 Subject: [PATCH 724/730] arm64: mte: Sync tags for pages where PTE is untagged A KVM guest could store tags in a page even if the VMM hasn't mapped the page with PROT_MTE. So when restoring pages from swap we will need to check to see if there are any saved tags even if !pte_tagged(). However don't check pages for which pte_access_permitted() returns false as these will not have been swapped out. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-2-steven.price@arm.com --- arch/arm64/include/asm/mte.h | 4 ++-- arch/arm64/include/asm/pgtable.h | 22 +++++++++++++++++++--- arch/arm64/kernel/mte.c | 18 +++++++++++++----- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index bc88a1ced0d7..347ef38a35f7 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -37,7 +37,7 @@ void mte_free_tag_storage(char *storage); /* track which pages have valid allocation tags */ #define PG_mte_tagged PG_arch_2 -void mte_sync_tags(pte_t *ptep, pte_t pte); +void mte_sync_tags(pte_t old_pte, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); void mte_thread_init_user(void); void mte_thread_switch(struct task_struct *next); @@ -53,7 +53,7 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request, /* unused if !CONFIG_ARM64_MTE, silence the compiler */ #define PG_mte_tagged 0 -static inline void mte_sync_tags(pte_t *ptep, pte_t pte) +static inline void mte_sync_tags(pte_t old_pte, pte_t pte) { } static inline void mte_copy_page_tags(void *kto, const void *kfrom) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0b10204e72fc..db5402168841 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -314,9 +314,25 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte); - if (system_supports_mte() && - pte_present(pte) && pte_tagged(pte) && !pte_special(pte)) - mte_sync_tags(ptep, pte); + /* + * If the PTE would provide user space access to the tags associated + * with it then ensure that the MTE tags are synchronised. Although + * pte_access_permitted() returns false for exec only mappings, they + * don't expose tags (instruction fetches don't check tags). + */ + if (system_supports_mte() && pte_access_permitted(pte, false) && + !pte_special(pte)) { + pte_t old_pte = READ_ONCE(*ptep); + /* + * We only need to synchronise if the new PTE has tags enabled + * or if swapping in (in which case another mapping may have + * set tags in the past even if this PTE isn't tagged). + * (!pte_none() && !pte_present()) is an open coded version of + * is_swap_pte() + */ + if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte))) + mte_sync_tags(old_pte, pte); + } __check_racy_pte_update(mm, ptep, pte); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 125a10e413e9..69b3fde8759e 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -32,10 +32,9 @@ DEFINE_STATIC_KEY_FALSE(mte_async_mode); EXPORT_SYMBOL_GPL(mte_async_mode); #endif -static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) +static void mte_sync_page_tags(struct page *page, pte_t old_pte, + bool check_swap, bool pte_is_tagged) { - pte_t old_pte = READ_ONCE(*ptep); - if (check_swap && is_swap_pte(old_pte)) { swp_entry_t entry = pte_to_swp_entry(old_pte); @@ -43,6 +42,9 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) return; } + if (!pte_is_tagged) + return; + page_kasan_tag_reset(page); /* * We need smp_wmb() in between setting the flags and clearing the @@ -55,16 +57,22 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) mte_clear_page_tags(page_address(page)); } -void mte_sync_tags(pte_t *ptep, pte_t pte) +void mte_sync_tags(pte_t old_pte, pte_t pte) { struct page *page = pte_page(pte); long i, nr_pages = compound_nr(page); bool check_swap = nr_pages == 1; + bool pte_is_tagged = pte_tagged(pte); + + /* Early out if there's nothing to do */ + if (!check_swap && !pte_is_tagged) + return; /* if PG_mte_tagged is set, tags have already been initialised */ for (i = 0; i < nr_pages; i++, page++) { if (!test_and_set_bit(PG_mte_tagged, &page->flags)) - mte_sync_page_tags(page, ptep, check_swap); + mte_sync_page_tags(page, old_pte, check_swap, + pte_is_tagged); } } From ea7fc1bb1cd1b92b42b1d9273ce7e231d3dc9321 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:12 +0100 Subject: [PATCH 725/730] KVM: arm64: Introduce MTE VM feature Add a new VM feature 'KVM_ARM_CAP_MTE' which enables memory tagging for a VM. This will expose the feature to the guest and automatically tag memory pages touched by the VM as PG_mte_tagged (and clear the tag storage) to ensure that the guest cannot see stale tags, and so that the tags are correctly saved/restored across swap. Actually exposing the new capability to user space happens in a later patch. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price [maz: move VM_SHARED sampling into the critical section] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-3-steven.price@arm.com --- arch/arm64/include/asm/kvm_emulate.h | 3 ++ arch/arm64/include/asm/kvm_host.h | 4 ++ arch/arm64/kvm/hyp/exception.c | 3 +- arch/arm64/kvm/mmu.c | 67 +++++++++++++++++++++++++++- arch/arm64/kvm/sys_regs.c | 7 +++ include/uapi/linux/kvm.h | 1 + 6 files changed, 83 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 01b9857757f2..fd418955e31e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -84,6 +84,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 |= HCR_TID2; + + if (kvm_has_mte(vcpu->kvm)) + vcpu->arch.hcr_el2 |= HCR_ATA; } static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7cd7d5c8c4bc..1c4293c46ef6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -132,6 +132,9 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; + + /* Memory Tagging Extension enabled for the guest */ + bool mte_enabled; }; struct kvm_vcpu_fault_info { @@ -769,6 +772,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) +#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled) #define kvm_vcpu_has_pmu(vcpu) \ (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 11541b94b328..0418399e0a20 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -112,7 +112,8 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, new |= (old & PSR_C_BIT); new |= (old & PSR_V_BIT); - // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + if (kvm_has_mte(vcpu->kvm)) + new |= PSR_TCO_BIT; new |= (old & PSR_DIT_BIT); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c10207fed2f3..c6a97d463892 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -822,6 +822,45 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot, return PAGE_SIZE; } +/* + * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be + * able to see the page's tags and therefore they must be initialised first. If + * PG_mte_tagged is set, tags have already been initialised. + * + * The race in the test/set of the PG_mte_tagged flag is handled by: + * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs + * racing to santise the same page + * - mmap_lock protects between a VM faulting a page in and the VMM performing + * an mprotect() to add VM_MTE + */ +static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, + unsigned long size) +{ + unsigned long i, nr_pages = size >> PAGE_SHIFT; + struct page *page; + + if (!kvm_has_mte(kvm)) + return 0; + + /* + * pfn_to_online_page() is used to reject ZONE_DEVICE pages + * that may not support tags. + */ + page = pfn_to_online_page(pfn); + + if (!page) + return -EFAULT; + + for (i = 0; i < nr_pages; i++, page++) { + if (!test_bit(PG_mte_tagged, &page->flags)) { + mte_clear_page_tags(page_address(page)); + set_bit(PG_mte_tagged, &page->flags); + } + } + + return 0; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -830,6 +869,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, bool write_fault, writable, force_pte = false; bool exec_fault; bool device = false; + bool shared; unsigned long mmu_seq; struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; @@ -873,6 +913,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = PAGE_SHIFT; } + shared = (vma->vm_flags & VM_PFNMAP); + switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SHIFT: @@ -971,8 +1013,18 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (writable) prot |= KVM_PGTABLE_PROT_W; - if (fault_status != FSC_PERM && !device) + if (fault_status != FSC_PERM && !device) { + /* Check the VMM hasn't introduced a new VM_SHARED VMA */ + if (kvm_has_mte(kvm) && shared) { + ret = -EFAULT; + goto out_unlock; + } + ret = sanitise_mte_tags(kvm, pfn, vma_pagesize); + if (ret) + goto out_unlock; + clean_dcache_guest_page(pfn, vma_pagesize); + } if (exec_fault) { prot |= KVM_PGTABLE_PROT_X; @@ -1168,12 +1220,17 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { kvm_pfn_t pfn = pte_pfn(range->pte); + int ret; if (!kvm->arch.mmu.pgt) return false; WARN_ON(range->end - range->start != 1); + ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE); + if (ret) + return false; + /* * We've moved a page around, probably through CoW, so let's treat it * just like a translation fault and clean the cache to the PoC. @@ -1381,6 +1438,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (!vma) break; + /* + * VM_SHARED mappings are not allowed with MTE to avoid races + * when updating the PG_mte_tagged page flag, see + * sanitise_mte_tags for more details. + */ + if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) + return -EINVAL; + /* * Take the intersection of this VMA with the memory region */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1a7968ad078c..36f67f8deae1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1047,6 +1047,13 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64PFR1_EL1: val &= ~FEATURE(ID_AA64PFR1_MTE); + if (kvm_has_mte(vcpu->kvm)) { + u64 pfr, mte; + + pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); + mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT); + val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte); + } break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 79d9c44d1ad7..d4da58ddcad7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1083,6 +1083,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 #define KVM_CAP_PTP_KVM 198 +#define KVM_CAP_ARM_MTE 199 #ifdef KVM_CAP_IRQ_ROUTING From e1f358b5046479d2897f23b1d5b092687c6e7a67 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:13 +0100 Subject: [PATCH 726/730] KVM: arm64: Save/restore MTE registers Define the new system registers that MTE introduces and context switch them. The MTE feature is still hidden from the ID register as it isn't supported in a VM yet. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-4-steven.price@arm.com --- arch/arm64/include/asm/kvm_arm.h | 3 +- arch/arm64/include/asm/kvm_host.h | 6 ++ arch/arm64/include/asm/kvm_mte.h | 66 ++++++++++++++++++++++ arch/arm64/include/asm/sysreg.h | 3 +- arch/arm64/kernel/asm-offsets.c | 2 + arch/arm64/kvm/hyp/entry.S | 7 +++ arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 21 +++++++ arch/arm64/kvm/sys_regs.c | 22 ++++++-- 8 files changed, 124 insertions(+), 6 deletions(-) create mode 100644 arch/arm64/include/asm/kvm_mte.h diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 692c9049befa..d436831dd706 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -12,7 +12,8 @@ #include /* Hyp Configuration Register (HCR) bits */ -#define HCR_ATA (UL(1) << 56) +#define HCR_ATA_SHIFT 56 +#define HCR_ATA (UL(1) << HCR_ATA_SHIFT) #define HCR_FWB (UL(1) << 46) #define HCR_API (UL(1) << 41) #define HCR_APK (UL(1) << 40) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 1c4293c46ef6..74a7447a83a1 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -209,6 +209,12 @@ enum vcpu_sysreg { CNTP_CVAL_EL0, CNTP_CTL_EL0, + /* Memory Tagging Extension registers */ + RGSR_EL1, /* Random Allocation Tag Seed Register */ + GCR_EL1, /* Tag Control Register */ + TFSR_EL1, /* Tag Fault Status Register (EL1) */ + TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ + /* 32bit specific registers. Keep them at the end of the range */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h new file mode 100644 index 000000000000..de002636eb1f --- /dev/null +++ b/arch/arm64/include/asm/kvm_mte.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 ARM Ltd. + */ +#ifndef __ASM_KVM_MTE_H +#define __ASM_KVM_MTE_H + +#ifdef __ASSEMBLY__ + +#include + +#ifdef CONFIG_ARM64_MTE + +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1 +alternative_if_not ARM64_MTE + b .L__skip_switch\@ +alternative_else_nop_endif + mrs \reg1, hcr_el2 + tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@ + + mrs_s \reg1, SYS_RGSR_EL1 + str \reg1, [\h_ctxt, #CPU_RGSR_EL1] + mrs_s \reg1, SYS_GCR_EL1 + str \reg1, [\h_ctxt, #CPU_GCR_EL1] + + ldr \reg1, [\g_ctxt, #CPU_RGSR_EL1] + msr_s SYS_RGSR_EL1, \reg1 + ldr \reg1, [\g_ctxt, #CPU_GCR_EL1] + msr_s SYS_GCR_EL1, \reg1 + +.L__skip_switch\@: +.endm + +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1 +alternative_if_not ARM64_MTE + b .L__skip_switch\@ +alternative_else_nop_endif + mrs \reg1, hcr_el2 + tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@ + + mrs_s \reg1, SYS_RGSR_EL1 + str \reg1, [\g_ctxt, #CPU_RGSR_EL1] + mrs_s \reg1, SYS_GCR_EL1 + str \reg1, [\g_ctxt, #CPU_GCR_EL1] + + ldr \reg1, [\h_ctxt, #CPU_RGSR_EL1] + msr_s SYS_RGSR_EL1, \reg1 + ldr \reg1, [\h_ctxt, #CPU_GCR_EL1] + msr_s SYS_GCR_EL1, \reg1 + + isb + +.L__skip_switch\@: +.endm + +#else /* !CONFIG_ARM64_MTE */ + +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1 +.endm + +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1 +.endm + +#endif /* CONFIG_ARM64_MTE */ +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_KVM_MTE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 65d15700a168..347ccac2341e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -651,7 +651,8 @@ #define INIT_SCTLR_EL2_MMU_ON \ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \ - SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1) + SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \ + SCTLR_ELx_ITFSB | SCTLR_EL2_RES1) #define INIT_SCTLR_EL2_MMU_OFF \ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 0cb34ccb6e73..6f0044cb233e 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -111,6 +111,8 @@ int main(void) DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs)); + DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1])); + DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1])); DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1])); DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1])); DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1])); diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index e831d3dfd50d..435346ea1504 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -13,6 +13,7 @@ #include #include #include +#include #include .text @@ -51,6 +52,9 @@ alternative_else_nop_endif add x29, x0, #VCPU_CONTEXT + // mte_switch_to_guest(g_ctxt, h_ctxt, tmp1) + mte_switch_to_guest x29, x1, x2 + // Macro ptrauth_switch_to_guest format: // ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3) // The below macro to restore guest keys is not implemented in C code @@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // when this feature is enabled for kernel code. ptrauth_switch_to_hyp x1, x2, x3, x4, x5 + // mte_switch_to_hyp(g_ctxt, h_ctxt, reg1) + mte_switch_to_hyp x1, x2, x3 + // Restore hyp's sp_el0 restore_sp_el0 x2, x3 diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index cce43bfe158f..de7e14c862e6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -14,6 +14,7 @@ #include #include #include +#include static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { @@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); } +static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; + + if (!vcpu) + vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); + + return kvm_has_mte(kern_hyp_va(vcpu->kvm)); +} + static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1); @@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); + if (ctxt_has_mte(ctxt)) { + ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR); + ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1); + } + ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); @@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); + if (ctxt_has_mte(ctxt)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR); + write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1); + } + if (!has_vhe() && cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && ctxt->__hyp_running_vcpu) { diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 36f67f8deae1..5c75b24eae21 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1309,6 +1309,20 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } +static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + return REG_HIDDEN; +} + +#define MTE_REG(name) { \ + SYS_DESC(SYS_##name), \ + .access = undef_access, \ + .reset = reset_unknown, \ + .reg = name, \ + .visibility = mte_visibility, \ +} + /* sys_reg_desc initialiser for known cpufeature ID registers */ #define ID_SANITISED(name) { \ SYS_DESC(SYS_##name), \ @@ -1477,8 +1491,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, - { SYS_DESC(SYS_RGSR_EL1), undef_access }, - { SYS_DESC(SYS_GCR_EL1), undef_access }, + MTE_REG(RGSR_EL1), + MTE_REG(GCR_EL1), { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, { SYS_DESC(SYS_TRFCR_EL1), undef_access }, @@ -1505,8 +1519,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi }, { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi }, - { SYS_DESC(SYS_TFSR_EL1), undef_access }, - { SYS_DESC(SYS_TFSRE0_EL1), undef_access }, + MTE_REG(TFSR_EL1), + MTE_REG(TFSRE0_EL1), { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 }, { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, From 673638f434ee4a00319e254ade338c57618d6f7e Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:14 +0100 Subject: [PATCH 727/730] KVM: arm64: Expose KVM_ARM_CAP_MTE It's now safe for the VMM to enable MTE in a guest, so expose the capability to user space. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-5-steven.price@arm.com --- arch/arm64/kvm/arm.c | 9 +++++++++ arch/arm64/kvm/reset.c | 4 ++++ arch/arm64/kvm/sys_regs.c | 3 +++ 3 files changed, 16 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e720148232a0..28ce26a68f09 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -93,6 +93,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = 0; kvm->arch.return_nisv_io_abort_to_user = true; break; + case KVM_CAP_ARM_MTE: + if (!system_supports_mte() || kvm->created_vcpus) + return -EINVAL; + r = 0; + kvm->arch.mte_enabled = true; + break; default: r = -EINVAL; break; @@ -237,6 +243,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) */ r = 1; break; + case KVM_CAP_ARM_MTE: + r = system_supports_mte(); + break; case KVM_CAP_STEAL_TIME: r = kvm_arm_pvtime_supported(); break; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index d37ebee085cf..cba7872d69a8 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -176,6 +176,10 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu) if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit) return false; + /* MTE is incompatible with AArch32 */ + if (kvm_has_mte(vcpu->kvm) && is32bit) + return false; + /* Check that the vcpus are either all 32bit or all 64bit */ kvm_for_each_vcpu(i, tmp, vcpu->kvm) { if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 5c75b24eae21..f6f126eb6ac1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1312,6 +1312,9 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { + if (kvm_has_mte(vcpu->kvm)) + return 0; + return REG_HIDDEN; } From f0376edb1ddcab19a473b4bf1fbd5b6bbed3705b Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:15 +0100 Subject: [PATCH 728/730] KVM: arm64: Add ioctl to fetch/store tags in a guest The VMM may not wish to have it's own mapping of guest memory mapped with PROT_MTE because this causes problems if the VMM has tag checking enabled (the guest controls the tags in physical RAM and it's unlikely the tags are correct for the VMM). Instead add a new ioctl which allows the VMM to easily read/write the tags from guest memory, allowing the VMM's mapping to be non-PROT_MTE while the VMM can still read/write the tags for the purpose of migration. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-6-steven.price@arm.com --- arch/arm64/include/asm/kvm_host.h | 3 ++ arch/arm64/include/asm/mte-def.h | 1 + arch/arm64/include/uapi/asm/kvm.h | 11 +++++ arch/arm64/kvm/arm.c | 7 +++ arch/arm64/kvm/guest.c | 82 +++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 1 + 6 files changed, 105 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 74a7447a83a1..c93a7198c242 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -730,6 +730,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags); + /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h index cf241b0f0a42..626d359b396e 100644 --- a/arch/arm64/include/asm/mte-def.h +++ b/arch/arm64/include/asm/mte-def.h @@ -7,6 +7,7 @@ #define MTE_GRANULE_SIZE UL(16) #define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1)) +#define MTE_GRANULES_PER_PAGE (PAGE_SIZE / MTE_GRANULE_SIZE) #define MTE_TAG_SHIFT 56 #define MTE_TAG_SIZE 4 #define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 24223adae150..b3edde68bc3e 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -184,6 +184,17 @@ struct kvm_vcpu_events { __u32 reserved[12]; }; +struct kvm_arm_copy_mte_tags { + __u64 guest_ipa; + __u64 length; + void __user *addr; + __u64 flags; + __u64 reserved[2]; +}; + +#define KVM_ARM_TAGS_TO_GUEST 0 +#define KVM_ARM_TAGS_FROM_GUEST 1 + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 28ce26a68f09..511f3716fe33 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1359,6 +1359,13 @@ long kvm_arch_vm_ioctl(struct file *filp, return 0; } + case KVM_ARM_MTE_COPY_TAGS: { + struct kvm_arm_copy_mte_tags copy_tags; + + if (copy_from_user(©_tags, argp, sizeof(copy_tags))) + return -EFAULT; + return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags); + } default: return -EINVAL; } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 5cb4a1cd5603..4ddb20017b2f 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -995,3 +995,85 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, return ret; } + +long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags) +{ + gpa_t guest_ipa = copy_tags->guest_ipa; + size_t length = copy_tags->length; + void __user *tags = copy_tags->addr; + gpa_t gfn; + bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST); + int ret = 0; + + if (!kvm_has_mte(kvm)) + return -EINVAL; + + if (copy_tags->reserved[0] || copy_tags->reserved[1]) + return -EINVAL; + + if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST) + return -EINVAL; + + if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK) + return -EINVAL; + + gfn = gpa_to_gfn(guest_ipa); + + mutex_lock(&kvm->slots_lock); + + while (length > 0) { + kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL); + void *maddr; + unsigned long num_tags; + struct page *page; + + if (is_error_noslot_pfn(pfn)) { + ret = -EFAULT; + goto out; + } + + page = pfn_to_online_page(pfn); + if (!page) { + /* Reject ZONE_DEVICE memory */ + ret = -EFAULT; + goto out; + } + maddr = page_address(page); + + if (!write) { + if (test_bit(PG_mte_tagged, &page->flags)) + num_tags = mte_copy_tags_to_user(tags, maddr, + MTE_GRANULES_PER_PAGE); + else + /* No tags in memory, so write zeros */ + num_tags = MTE_GRANULES_PER_PAGE - + clear_user(tags, MTE_GRANULES_PER_PAGE); + kvm_release_pfn_clean(pfn); + } else { + num_tags = mte_copy_tags_from_user(maddr, tags, + MTE_GRANULES_PER_PAGE); + kvm_release_pfn_dirty(pfn); + } + + if (num_tags != MTE_GRANULES_PER_PAGE) { + ret = -EFAULT; + goto out; + } + + /* Set the flag after checking the write completed fully */ + if (write) + set_bit(PG_mte_tagged, &page->flags); + + gfn++; + tags += num_tags; + length -= PAGE_SIZE; + } + +out: + mutex_unlock(&kvm->slots_lock); + /* If some data has been copied report the number of bytes copied */ + if (length != copy_tags->length) + return copy_tags->length - length; + return ret; +} diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d4da58ddcad7..da1edd2b4046 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1429,6 +1429,7 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_PMU_EVENT_FILTER */ #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) +#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) From 04c02c201d7e8149ae336ead69fb64e4e6f94bc9 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:16 +0100 Subject: [PATCH 729/730] KVM: arm64: Document MTE capability and ioctl A new capability (KVM_CAP_ARM_MTE) identifies that the kernel supports granting a guest access to the tags, and provides a mechanism for the VMM to enable it. A new ioctl (KVM_ARM_MTE_COPY_TAGS) provides a simple way for a VMM to access the tags of a guest without having to maintain a PROT_MTE mapping in userspace. The above capability gates access to the ioctl. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-7-steven.price@arm.com --- Documentation/virt/kvm/api.rst | 61 ++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 7fcb2fd38f42..97661a97943f 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5034,6 +5034,43 @@ see KVM_XEN_VCPU_SET_ATTR above. The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used with the KVM_XEN_VCPU_GET_ATTR ioctl. +4.130 KVM_ARM_MTE_COPY_TAGS +--------------------------- + +:Capability: KVM_CAP_ARM_MTE +:Architectures: arm64 +:Type: vm ioctl +:Parameters: struct kvm_arm_copy_mte_tags +:Returns: number of bytes copied, < 0 on error (-EINVAL for incorrect + arguments, -EFAULT if memory cannot be accessed). + +:: + + struct kvm_arm_copy_mte_tags { + __u64 guest_ipa; + __u64 length; + void __user *addr; + __u64 flags; + __u64 reserved[2]; + }; + +Copies Memory Tagging Extension (MTE) tags to/from guest tag memory. The +``guest_ipa`` and ``length`` fields must be ``PAGE_SIZE`` aligned. The ``addr`` +field must point to a buffer which the tags will be copied to or from. + +``flags`` specifies the direction of copy, either ``KVM_ARM_TAGS_TO_GUEST`` or +``KVM_ARM_TAGS_FROM_GUEST``. + +The size of the buffer to store the tags is ``(length / 16)`` bytes +(granules in MTE are 16 bytes long). Each byte contains a single tag +value. This matches the format of ``PTRACE_PEEKMTETAGS`` and +``PTRACE_POKEMTETAGS``. + +If an error occurs before any data is copied then a negative error code is +returned. If some tags have been copied before an error occurs then the number +of bytes successfully copied is returned. If the call completes successfully +then ``length`` is returned. + 5. The kvm_run structure ======================== @@ -6362,6 +6399,30 @@ default. See Documentation/x86/sgx/2.Kernel-internals.rst for more details. +7.26 KVM_CAP_ARM_MTE +-------------------- + +:Architectures: arm64 +:Parameters: none + +This capability indicates that KVM (and the hardware) supports exposing the +Memory Tagging Extensions (MTE) to the guest. It must also be enabled by the +VMM before creating any VCPUs to allow the guest access. Note that MTE is only +available to a guest running in AArch64 mode and enabling this capability will +cause attempts to create AArch32 VCPUs to fail. + +When enabled the guest is able to access tags associated with any memory given +to the guest. KVM will ensure that the tags are maintained during swap or +hibernation of the host; however the VMM needs to manually save/restore the +tags as appropriate if the VM is migrated. + +When this capability is enabled all memory in memslots must be mapped as +not-shareable (no MAP_SHARED), attempts to create a memslot with a +MAP_SHARED mmap will result in an -EINVAL return. + +When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to +perform a bulk copy of tags to/from the guest. + 8. Other capabilities. ====================== From 98db7259fa7b963d80da49fd636744e28a78981e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Jun 2021 14:21:05 +0100 Subject: [PATCH 730/730] KVM: arm64: Set the MTE tag bit before releasing the page Setting a page flag without holding a reference to the page is living dangerously. In the tag-writing path, we drop the reference to the page by calling kvm_release_pfn_dirty(), and only then set the PG_mte_tagged bit. It would be safer to do it the other way round. Fixes: f0376edb1ddca ("KVM: arm64: Add ioctl to fetch/store tags in a guest") Cc: Catalin Marinas Reviewed-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/87k0mjidwb.wl-maz@kernel.org --- arch/arm64/kvm/guest.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 4ddb20017b2f..60815ae477cf 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -1053,6 +1053,14 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, } else { num_tags = mte_copy_tags_from_user(maddr, tags, MTE_GRANULES_PER_PAGE); + + /* + * Set the flag after checking the write + * completed fully + */ + if (num_tags == MTE_GRANULES_PER_PAGE) + set_bit(PG_mte_tagged, &page->flags); + kvm_release_pfn_dirty(pfn); } @@ -1061,10 +1069,6 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, goto out; } - /* Set the flag after checking the write completed fully */ - if (write) - set_bit(PG_mte_tagged, &page->flags); - gfn++; tags += num_tags; length -= PAGE_SIZE;