From 5647b25c3335a25ba32d73e61850a374a708788a Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Mon, 26 Sep 2016 20:21:45 +1000 Subject: [PATCH 001/503] drm/sun4i: rgb: Enable panel after controller The panel should be enabled after the controller so that we do not have visual glitches on the panel while the controller is setup. Similarly, the panel should be disabled before the controller. Signed-off-by: Jonathan Liu Reviewed-by: Sean Paul Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_rgb.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c index c3ff10f559cc..4e4bea6f395c 100644 --- a/drivers/gpu/drm/sun4i/sun4i_rgb.c +++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c @@ -152,15 +152,16 @@ static void sun4i_rgb_encoder_enable(struct drm_encoder *encoder) DRM_DEBUG_DRIVER("Enabling RGB output\n"); - if (!IS_ERR(tcon->panel)) { + if (!IS_ERR(tcon->panel)) drm_panel_prepare(tcon->panel); - drm_panel_enable(tcon->panel); - } /* encoder->bridge can be NULL; drm_bridge_enable checks for it */ drm_bridge_enable(encoder->bridge); sun4i_tcon_channel_enable(tcon, 0); + + if (!IS_ERR(tcon->panel)) + drm_panel_enable(tcon->panel); } static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder) @@ -171,15 +172,16 @@ static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder) DRM_DEBUG_DRIVER("Disabling RGB output\n"); + if (!IS_ERR(tcon->panel)) + drm_panel_disable(tcon->panel); + sun4i_tcon_channel_disable(tcon, 0); /* encoder->bridge can be NULL; drm_bridge_disable checks for it */ drm_bridge_disable(encoder->bridge); - if (!IS_ERR(tcon->panel)) { - drm_panel_disable(tcon->panel); + if (!IS_ERR(tcon->panel)) drm_panel_unprepare(tcon->panel); - } } static void sun4i_rgb_encoder_mode_set(struct drm_encoder *encoder, From 0df03b43035afd0a64916fe4e5bca978562ffa5a Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 29 Sep 2016 14:05:05 +0200 Subject: [PATCH 002/503] drm/sun4i: rgb: Remove the bridge enable/disable functions The atomic helpers already call the drm_bridge_enable on our behalf, there's no need to do it a second time. Reported-by: Sean Paul Reviewed-by: Sean Paul Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_rgb.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c index 4e4bea6f395c..d198ad7e5323 100644 --- a/drivers/gpu/drm/sun4i/sun4i_rgb.c +++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c @@ -155,9 +155,6 @@ static void sun4i_rgb_encoder_enable(struct drm_encoder *encoder) if (!IS_ERR(tcon->panel)) drm_panel_prepare(tcon->panel); - /* encoder->bridge can be NULL; drm_bridge_enable checks for it */ - drm_bridge_enable(encoder->bridge); - sun4i_tcon_channel_enable(tcon, 0); if (!IS_ERR(tcon->panel)) @@ -177,9 +174,6 @@ static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder) sun4i_tcon_channel_disable(tcon, 0); - /* encoder->bridge can be NULL; drm_bridge_disable checks for it */ - drm_bridge_disable(encoder->bridge); - if (!IS_ERR(tcon->panel)) drm_panel_unprepare(tcon->panel); } From 0ce267ff95a0302cf6fb2a552833abbfb7861a43 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 18 Oct 2016 15:36:48 +0200 Subject: [PATCH 003/503] fuse: fix root dentry initialization Add missing dentry initialization to root dentry. Fixes: f75fdf22b0a8 ("fuse: don't use ->d_time") Reported-by: Andreas Reis Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 5 +++++ fs/fuse/fuse_i.h | 1 + fs/fuse/inode.c | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 6a4d0e5418a1..b3ebe512d64c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -286,6 +286,11 @@ const struct dentry_operations fuse_dentry_operations = { .d_release = fuse_dentry_release, }; +const struct dentry_operations fuse_root_dentry_operations = { + .d_init = fuse_dentry_init, + .d_release = fuse_dentry_release, +}; + int fuse_valid_type(int m) { return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0dfbb136e59a..91307940c8ac 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -692,6 +692,7 @@ static inline u64 get_node_id(struct inode *inode) extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; +extern const struct dentry_operations fuse_root_dentry_operations; /** * Inode to nodeid comparison. diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 17141099f2e7..6fe6a88ecb4a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1131,10 +1131,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; root = fuse_get_root_inode(sb, d.rootmode); + sb->s_d_op = &fuse_root_dentry_operations; root_dentry = d_make_root(root); if (!root_dentry) goto err_dev_free; - /* only now - we want root dentry with NULL ->d_op */ + /* Root dentry doesn't have .d_revalidate */ sb->s_d_op = &fuse_dentry_operations; init_req = fuse_request_alloc(0); From f95df7d6cd92787d54c9ad3d4843f9bcd137f9db Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 17 Oct 2016 15:16:35 +0000 Subject: [PATCH 004/503] dmaengine: edma: Fix error return code in edma_alloc_chan_resources() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Vinod Koul --- drivers/dma/edma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index e18a58068bca..77242b37ef87 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1628,6 +1628,7 @@ static int edma_alloc_chan_resources(struct dma_chan *chan) if (echan->slot[0] < 0) { dev_err(dev, "Entry slot allocation failed for channel %u\n", EDMA_CHAN_SLOT(echan->ch_num)); + ret = echan->slot[0]; goto err_slot; } From d6619761068cf573cae406f176d00b82a39a37fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= Date: Thu, 6 Oct 2016 17:59:53 -0400 Subject: [PATCH 005/503] dmaengine: mmp_tdma: add missing select GENERIC_ALLOCATOR in Kconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some compilation errors when CONFIG_MMP_TDMA is enabled and CONFIG_GENERIC_ALLOCATOR is disabled: drivers/built-in.o: In function `mmp_tdma_prep_dma_cyclic': mmp_tdma.c:(.text+0x7890e): undefined reference to `gen_pool_dma_alloc' drivers/built-in.o: In function `mmp_tdma_free_chan_resources': mmp_tdma.c:(.text+0x78aca): undefined reference to `gen_pool_free' drivers/built-in.o: In function `mmp_tdma_probe': mmp_tdma.c:(.text+0x78ea8): undefined reference to `of_gen_pool_get' This commit fix this problem by selecting GENERIC_ALLOCATOR when CONFIG_MMP_TDMA is enabled. Signed-off-by: Jérémy Lefaure Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index af63a6bcf564..141aefbe37ec 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -306,6 +306,7 @@ config MMP_TDMA depends on ARCH_MMP || COMPILE_TEST select DMA_ENGINE select MMP_SRAM if ARCH_MMP + select GENERIC_ALLOCATOR help Support the MMP Two-Channel DMA engine. This engine used for MMP Audio DMA and pxa910 SQU. From 83ba62bc700bab710b22be3a1bf6cf973f754273 Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Tue, 18 Oct 2016 16:23:59 +0800 Subject: [PATCH 006/503] drm/mediatek: fix a typo of OD_CFG to OD_RELAYMODE If we want to set the hardware OD to relay mode, we have to set OD_CFG register rather than OD_RELAYMODE; otherwise, the system will access the wrong address. Fixes: 7216436420414144646f5d8343d061355fd23483 ("drm/mediatek: set mt8173 dithering function") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Bibby Hsieh Acked-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index df33b3ca6ffd..aa5f20fabd10 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -123,7 +123,7 @@ static void mtk_od_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int bpc) { writel(w << 16 | h, comp->regs + DISP_OD_SIZE); - writel(OD_RELAYMODE, comp->regs + OD_RELAYMODE); + writel(OD_RELAYMODE, comp->regs + OD_CFG); mtk_dither_set(comp, bpc, DISP_OD_CFG); } From f752fff611b99f5679224f3990a1f531ea64b1ec Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Thu, 29 Sep 2016 11:29:48 +0800 Subject: [PATCH 007/503] drm/mediatek: set vblank_disable_allowed to true MTK DRM driver didn't set the vblank_disable_allowed to true, it cause that the irq_handler is called every 16.6 ms (every vblank) when the display didn't be updated. Signed-off-by: Bibby Hsieh Acked-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index cf83f6507ec8..0b2ae47eb52c 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -217,6 +217,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) if (ret < 0) goto err_component_unbind; + drm->vblank_disable_allowed = true; drm_kms_helper_poll_init(drm); drm_mode_config_reset(drm); From 56e4b1e183555c74097fa012f1606b22223f027b Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Thu, 29 Sep 2016 11:29:49 +0800 Subject: [PATCH 008/503] drm/mediatek: clear IRQ status before enable OVL interrupt To make sure that the first vblank IRQ after enabling vblank isn't too short or immediate, we have to clear the IRQ status before enable OVL interrupt. Signed-off-by: Bibby Hsieh Acked-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_disp_ovl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 019b7ca392d7..f75c5b5a536c 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -80,6 +80,7 @@ static void mtk_ovl_enable_vblank(struct mtk_ddp_comp *comp, ddp_comp); priv->crtc = crtc; + writel(0x0, comp->regs + DISP_REG_OVL_INTSTA); writel_relaxed(OVL_FME_CPL_INT, comp->regs + DISP_REG_OVL_INTEN); } From d542b7c473f0eb34455974d66ea93653b3eb40ce Mon Sep 17 00:00:00 2001 From: Junzhi Zhao Date: Thu, 29 Sep 2016 11:02:13 +0800 Subject: [PATCH 009/503] drm/mediatek: do mtk_hdmi_send_infoframe after HDMI clock enable The mtk_hdmi_send_infoframe have to be run after PLL and PIXEL clock of HDMI enable. Make sure that HDMI inforframes can be sent successfully. Signed-off-by: Junzhi Zhao Signed-off-by: Bibby Hsieh --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 71227deef21b..0e8c4d9af340 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1133,12 +1133,6 @@ static int mtk_hdmi_output_set_display_mode(struct mtk_hdmi *hdmi, phy_power_on(hdmi->phy); mtk_hdmi_aud_output_config(hdmi, mode); - mtk_hdmi_setup_audio_infoframe(hdmi); - mtk_hdmi_setup_avi_infoframe(hdmi, mode); - mtk_hdmi_setup_spd_infoframe(hdmi, "mediatek", "On-chip HDMI"); - if (mode->flags & DRM_MODE_FLAG_3D_MASK) - mtk_hdmi_setup_vendor_specific_infoframe(hdmi, mode); - mtk_hdmi_hw_vid_black(hdmi, false); mtk_hdmi_hw_aud_unmute(hdmi); mtk_hdmi_hw_send_av_unmute(hdmi); @@ -1401,6 +1395,16 @@ static void mtk_hdmi_bridge_pre_enable(struct drm_bridge *bridge) hdmi->powered = true; } +static void mtk_hdmi_send_infoframe(struct mtk_hdmi *hdmi, + struct drm_display_mode *mode) +{ + mtk_hdmi_setup_audio_infoframe(hdmi); + mtk_hdmi_setup_avi_infoframe(hdmi, mode); + mtk_hdmi_setup_spd_infoframe(hdmi, "mediatek", "On-chip HDMI"); + if (mode->flags & DRM_MODE_FLAG_3D_MASK) + mtk_hdmi_setup_vendor_specific_infoframe(hdmi, mode); +} + static void mtk_hdmi_bridge_enable(struct drm_bridge *bridge) { struct mtk_hdmi *hdmi = hdmi_ctx_from_bridge(bridge); @@ -1409,6 +1413,7 @@ static void mtk_hdmi_bridge_enable(struct drm_bridge *bridge) clk_prepare_enable(hdmi->clk[MTK_HDMI_CLK_HDMI_PLL]); clk_prepare_enable(hdmi->clk[MTK_HDMI_CLK_HDMI_PIXEL]); phy_power_on(hdmi->phy); + mtk_hdmi_send_infoframe(hdmi, &hdmi->mode); hdmi->enabled = true; } From 968253bd7caae5621f6806dd5055353fe33d366e Mon Sep 17 00:00:00 2001 From: Junzhi Zhao Date: Thu, 29 Sep 2016 11:02:14 +0800 Subject: [PATCH 010/503] drm/mediatek: enhance the HDMI driving current In order to improve 4K resolution performance, we have to enhance the HDMI driving current when clock rate is greater than 165MHz. Signed-off-by: Junzhi Zhao Signed-off-by: Bibby Hsieh --- .../gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c index 8a24754b440f..51cb9cfb6646 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c @@ -265,6 +265,9 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); unsigned int pre_div; unsigned int div; + unsigned int pre_ibias; + unsigned int hdmi_ibias; + unsigned int imp_en; dev_dbg(hdmi_phy->dev, "%s: %lu Hz, parent: %lu Hz\n", __func__, rate, parent_rate); @@ -298,18 +301,31 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, (0x1 << PLL_BR_SHIFT), RG_HDMITX_PLL_BP | RG_HDMITX_PLL_BC | RG_HDMITX_PLL_BR); - mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON3, RG_HDMITX_PRD_IMP_EN); + if (rate < 165000000) { + mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON3, + RG_HDMITX_PRD_IMP_EN); + pre_ibias = 0x3; + imp_en = 0x0; + hdmi_ibias = hdmi_phy->ibias; + } else { + mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON3, + RG_HDMITX_PRD_IMP_EN); + pre_ibias = 0x6; + imp_en = 0xf; + hdmi_ibias = hdmi_phy->ibias_up; + } mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON4, - (0x3 << PRD_IBIAS_CLK_SHIFT) | - (0x3 << PRD_IBIAS_D2_SHIFT) | - (0x3 << PRD_IBIAS_D1_SHIFT) | - (0x3 << PRD_IBIAS_D0_SHIFT), + (pre_ibias << PRD_IBIAS_CLK_SHIFT) | + (pre_ibias << PRD_IBIAS_D2_SHIFT) | + (pre_ibias << PRD_IBIAS_D1_SHIFT) | + (pre_ibias << PRD_IBIAS_D0_SHIFT), RG_HDMITX_PRD_IBIAS_CLK | RG_HDMITX_PRD_IBIAS_D2 | RG_HDMITX_PRD_IBIAS_D1 | RG_HDMITX_PRD_IBIAS_D0); mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON3, - (0x0 << DRV_IMP_EN_SHIFT), RG_HDMITX_DRV_IMP_EN); + (imp_en << DRV_IMP_EN_SHIFT), + RG_HDMITX_DRV_IMP_EN); mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON6, (hdmi_phy->drv_imp_clk << DRV_IMP_CLK_SHIFT) | (hdmi_phy->drv_imp_d2 << DRV_IMP_D2_SHIFT) | @@ -318,12 +334,14 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, RG_HDMITX_DRV_IMP_CLK | RG_HDMITX_DRV_IMP_D2 | RG_HDMITX_DRV_IMP_D1 | RG_HDMITX_DRV_IMP_D0); mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON5, - (hdmi_phy->ibias << DRV_IBIAS_CLK_SHIFT) | - (hdmi_phy->ibias << DRV_IBIAS_D2_SHIFT) | - (hdmi_phy->ibias << DRV_IBIAS_D1_SHIFT) | - (hdmi_phy->ibias << DRV_IBIAS_D0_SHIFT), - RG_HDMITX_DRV_IBIAS_CLK | RG_HDMITX_DRV_IBIAS_D2 | - RG_HDMITX_DRV_IBIAS_D1 | RG_HDMITX_DRV_IBIAS_D0); + (hdmi_ibias << DRV_IBIAS_CLK_SHIFT) | + (hdmi_ibias << DRV_IBIAS_D2_SHIFT) | + (hdmi_ibias << DRV_IBIAS_D1_SHIFT) | + (hdmi_ibias << DRV_IBIAS_D0_SHIFT), + RG_HDMITX_DRV_IBIAS_CLK | + RG_HDMITX_DRV_IBIAS_D2 | + RG_HDMITX_DRV_IBIAS_D1 | + RG_HDMITX_DRV_IBIAS_D0); return 0; } From 0d2200794f0a2c1ebb3b6613842914d8ce4b67f9 Mon Sep 17 00:00:00 2001 From: Junzhi Zhao Date: Thu, 29 Sep 2016 11:02:15 +0800 Subject: [PATCH 011/503] drm/mediatek: modify the factor to make the pll_rate set in the 1G-2G range Currently, the code sets the "pll" to the desired multiple of the pixel clock manully(4*3m 8*3,etc). The valid range of the pll is 1G-2G, however, when the pixel clock is bigger than 167MHz, the "pll" will be set to a invalid value( > 2G), then the "pll" will be 2GHz, thus the pixel clock will be in correct. Change the factor to make the "pll" be set in the (1G, 2G) range. Signed-off-by: Junzhi Zhao Signed-off-by: Bibby Hsieh --- drivers/gpu/drm/mediatek/mtk_dpi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index 0186e500d2a5..90fb831ef031 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -432,11 +432,16 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, unsigned long pll_rate; unsigned int factor; + /* let pll_rate can fix the valid range of tvdpll (1G~2GHz) */ pix_rate = 1000UL * mode->clock; - if (mode->clock <= 74000) + if (mode->clock <= 27000) + factor = 16 * 3; + else if (mode->clock <= 84000) factor = 8 * 3; - else + else if (mode->clock <= 167000) factor = 4 * 3; + else + factor = 2 * 3; pll_rate = pix_rate * factor; dev_dbg(dpi->dev, "Want PLL %lu Hz, pixel clock %lu Hz\n", From faead41cc7213ccef5a58c1bf518ac24816fe8a6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Sep 2016 10:31:41 +0200 Subject: [PATCH 012/503] iwlwifi: pcie: mark command queue lock with separate lockdep class Emmanuel reports that when CMD_WANT_ASYNC_CALLBACK is used by mvm, the callback will be called with the command queue lock held, and mvm will try to stop all (other) TX queues, which acquires their locks - this caused a false lockdep recursive locking report. Suppress this report by marking the command queue lock with a new, separate, lock class so lockdep can tell the difference between the two types of queues. Fixes: 156f92f2b471 ("iwlwifi: block the queues when we send ADD_STA for uAPSD") Reported-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index e9a278b60dfd..5f840f16f40b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -592,6 +592,7 @@ error: static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, int slots_num, u32 txq_id) { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); int ret; txq->need_update = false; @@ -606,6 +607,13 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, return ret; spin_lock_init(&txq->lock); + + if (txq_id == trans_pcie->cmd_queue) { + static struct lock_class_key iwl_pcie_cmd_queue_lock_class; + + lockdep_set_class(&txq->lock, &iwl_pcie_cmd_queue_lock_class); + } + __skb_queue_head_init(&txq->overflow_q); /* From 276c4b4b74b6d5bc3cab35534409f3ad32464b78 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 28 Sep 2016 11:32:35 +0300 Subject: [PATCH 013/503] iwlwifi: mvm: use ssize_t for len in iwl_debugfs_mem_read() In iwl_dbgfs_mem_read(), the len variable may become negative and is compared to < 0 (an error case). Comparing size_t (which is unsigned) to < 0 causes a warning on certain platforms (like i386): drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c:1561:5-8: WARNING: Unsigned expression compared with zero: len < 0 To prevent that, use ssize_t for len instead. Fixes: commit 2b55f43f8e47 ("iwlwifi: mvm: Add mem debugfs entry") Reported-by: kbuild test robot Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 539d718df797..06805a63f091 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -1529,8 +1529,8 @@ static ssize_t iwl_dbgfs_mem_read(struct file *file, char __user *user_buf, .data = { &cmd, }, .len = { sizeof(cmd) }, }; - size_t delta, len; - ssize_t ret; + size_t delta; + ssize_t ret, len; hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR, DEBUG_GROUP, 0); From 85cd69b8f1f7e289fe931a82889e673fd0f04842 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 5 Oct 2016 11:24:12 +0300 Subject: [PATCH 014/503] iwlwifi: mvm: fix d3_test with unified D0/D3 images When a unified D0/D3 image is used, we don't restart the FW in the D0->D3->D0 transitions. Therefore, the d3_test functionality should not call ieee8021_restart_hw() when the resuming either. Fixes: commit 23ae61282b88 ("iwlwifi: mvm: Do not switch to D3 image on suspend") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 25 ++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 4fdc3dad3e85..0e17cb238643 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -2271,7 +2271,8 @@ static void iwl_mvm_d3_test_disconn_work_iter(void *_data, u8 *mac, static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) { struct iwl_mvm *mvm = inode->i_private; - int remaining_time = 10; + bool unified_image = fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG); mvm->d3_test_active = false; @@ -2282,18 +2283,22 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED; iwl_abort_notification_waits(&mvm->notif_wait); - ieee80211_restart_hw(mvm->hw); + if (!unified_image) { + int remaining_time = 10; - /* wait for restart and disconnect all interfaces */ - while (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) && - remaining_time > 0) { - remaining_time--; - msleep(1000); + ieee80211_restart_hw(mvm->hw); + + /* wait for restart and disconnect all interfaces */ + while (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) && + remaining_time > 0) { + remaining_time--; + msleep(1000); + } + + if (remaining_time == 0) + IWL_ERR(mvm, "Timed out waiting for HW restart!\n"); } - if (remaining_time == 0) - IWL_ERR(mvm, "Timed out waiting for HW restart to finish!\n"); - ieee80211_iterate_active_interfaces_atomic( mvm->hw, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_d3_test_disconn_work_iter, mvm->keep_vif); From 5bfadc8255e2cd92be7538fd7dfa777c27f58be0 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Mon, 12 Sep 2016 10:24:19 +0300 Subject: [PATCH 015/503] iwlwifi: mvm: comply with fw_restart mod param on suspend If the suspend flow fails, we restart the hardware to go back to the D0 image (with non-unified images), but we don't comply with the fw_restart module parameter. If something goes wrong when starting the D3 image, we may want to debug it, so we should comply with the fw_restart flag to avoid clearing everything up and losing the firmware state when the error occurred. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 0e17cb238643..03a8fc586548 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1254,7 +1254,10 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, out: if (ret < 0) { iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN); - ieee80211_restart_hw(mvm->hw); + if (mvm->restart_fw > 0) { + mvm->restart_fw--; + ieee80211_restart_hw(mvm->hw); + } iwl_mvm_free_nd(mvm); } out_noreset: From 3a732c65de427fdae67a243fd331356034b5a1e8 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 9 Oct 2016 17:34:24 +0300 Subject: [PATCH 016/503] iwlwifi: mvm: wake the wait queue when the RX sync counter is zero When we sync the RX queues the driver waits to receive echo notification on all the RX queues. The wait queue is set with timeout until all queues have received the notification. However, iwl_mvm_rx_queue_notif() never woke up the wait queue, with the result of the counter value being checked only when the timeout expired. This may cause a latency of up to 1 second. Fixes: 0636b938214c ("iwlwifi: mvm: implement driver RX queues sync command") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 3 +-- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1 + drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 1 + drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 3 ++- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 318efd814037..1db1dc13e988 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4121,7 +4121,6 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, struct iwl_mvm_internal_rxq_notif *notif, u32 size) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(notif_waitq); u32 qmask = BIT(mvm->trans->num_rx_queues) - 1; int ret; @@ -4143,7 +4142,7 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, } if (notif->sync) - ret = wait_event_timeout(notif_waitq, + ret = wait_event_timeout(mvm->rx_sync_waitq, atomic_read(&mvm->queue_sync_counter) == 0, HZ); WARN_ON_ONCE(!ret); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index d17cbf603f7c..c60703e0c246 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -937,6 +937,7 @@ struct iwl_mvm { /* sync d0i3_tx queue and IWL_MVM_STATUS_IN_D0I3 status flag */ spinlock_t d0i3_tx_lock; wait_queue_head_t d0i3_exit_waitq; + wait_queue_head_t rx_sync_waitq; /* BT-Coex */ struct iwl_bt_coex_profile_notif last_bt_notif; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 05fe6dd1a2c8..4d35deb628bc 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -619,6 +619,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, spin_lock_init(&mvm->refs_lock); skb_queue_head_init(&mvm->d0i3_tx); init_waitqueue_head(&mvm->d0i3_exit_waitq); + init_waitqueue_head(&mvm->rx_sync_waitq); atomic_set(&mvm->queue_sync_counter, 0); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index a57c6ef5bc14..6c802cee900c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -547,7 +547,8 @@ void iwl_mvm_rx_queue_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb, "Received expired RX queue sync message\n"); return; } - atomic_dec(&mvm->queue_sync_counter); + if (!atomic_dec_return(&mvm->queue_sync_counter)) + wake_up(&mvm->rx_sync_waitq); } switch (internal_notif->type) { From aa156c8aee22a24865bf94d0c4a5f604f687fa7d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 14 Oct 2016 10:15:35 -0300 Subject: [PATCH 017/503] rtc: asm9260: fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled so user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/rtc/rtc-asm9260.ko | grep alias $ After this patch: $ modinfo drivers/rtc/rtc-asm9260.ko | grep alias alias: of:N*T*Calphascale,asm9260-rtcC* alias: of:N*T*Calphascale,asm9260-rtc Signed-off-by: Javier Martinez Canillas Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-asm9260.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/rtc-asm9260.c b/drivers/rtc/rtc-asm9260.c index 18a93d3e3f93..d36534965635 100644 --- a/drivers/rtc/rtc-asm9260.c +++ b/drivers/rtc/rtc-asm9260.c @@ -327,6 +327,7 @@ static const struct of_device_id asm9260_dt_ids[] = { { .compatible = "alphascale,asm9260-rtc", }, {} }; +MODULE_DEVICE_TABLE(of, asm9260_dt_ids); static struct platform_driver asm9260_rtc_driver = { .probe = asm9260_rtc_probe, From a3a0673b9db6fad2a3f7874c34e4b5cbc5fa01c6 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Tue, 18 Oct 2016 16:39:54 +0200 Subject: [PATCH 018/503] rtc: cmos: remove all __exit_p annotations I got the following stack trace under qemu: [ 7.575243] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 [ 7.596098] IP: [] cmos_set_alarm+0x38/0x280 [ 7.615699] PGD 3ccbe067 [ 7.615923] PUD 3daf2067 [ 7.635156] PMD 0 [ 7.654358] Oops: 0000 [#1] SMP [ 7.673869] Modules linked in: [ 7.693235] CPU: 0 PID: 1701 Comm: hwclock Tainted: G W 4.9.0-rc1+ #24 [ 7.712455] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 [ 7.753569] task: ffff88003d88dc40 task.stack: ffffc90000224000 [ 7.773743] RIP: 0010:[] [] cmos_set_alarm+0x38/0x280 [ 7.794893] RSP: 0018:ffffc90000227c10 EFLAGS: 00010296 [ 7.815890] RAX: 000000000000001d RBX: ffffc90000227d28 RCX: ffffffff8182be78 [ 7.836057] RDX: 0000000000000001 RSI: 0000000000000202 RDI: 0000000000000202 [ 7.856612] RBP: ffffc90000227c48 R08: 0000000000000000 R09: 0000000000000001 [ 7.877561] R10: 00000000000001c0 R11: 00000000000001c0 R12: 0000000000000000 [ 7.897072] R13: ffff88003d96f400 R14: ffff88003dac6410 R15: ffff88003dac6420 [ 7.917403] FS: 00007f77f42d9700(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000 [ 7.938293] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 7.958364] CR2: 0000000000000010 CR3: 000000003ccbb000 CR4: 00000000000006f0 [ 7.978028] Stack: [ 7.997120] ffff88003dac6000 ffff88003dac6410 0000000058049d01 ffffc90000227d28 [ 8.016993] ffff88003dac6000 ffff88003dac6410 ffff88003dac6420 ffffc90000227c98 [ 8.039505] ffffffff814f225d 0000001800227c98 000000090000002a 0000000900000011 [ 8.059985] Call Trace: [ 8.080110] [] __rtc_set_alarm+0x8d/0xa0 [ 8.099421] [] rtc_timer_enqueue+0x119/0x190 [ 8.119925] [] rtc_update_irq_enable+0xbe/0x100 [ 8.140583] [] rtc_dev_ioctl+0x3c0/0x480 [ 8.161162] [] ? user_path_at_empty+0x3a/0x50 [ 8.182717] [] do_vfs_ioctl+0x96/0x5c0 [ 8.204624] [] ? vfs_stat+0x16/0x20 [ 8.225994] [] ? SyS_newstat+0x15/0x30 [ 8.247043] [] SyS_ioctl+0x47/0x80 [ 8.267191] [] entry_SYSCALL_64_fastpath+0x1a/0xa9 [ 8.288719] Code: 6a 81 48 89 e5 41 57 41 56 41 55 49 89 fd 41 54 53 48 89 f3 48 c7 c6 20 c4 78 81 48 83 ec 10 e8 8f 00 ef ff 4d 8b a5 a0 00 00 00 <41> 8b 44 24 10 85 c0 0f 8e 2b 02 00 00 4c 89 ef 31 c0 b9 53 01 [ 8.335233] RIP [] cmos_set_alarm+0x38/0x280 [ 8.357096] RSP [ 8.379051] CR2: 0000000000000010 [ 8.401736] ---[ end trace 5cbcd83a1f225ed3 ]--- This occur only when CONFIG_DEBUG_TEST_DRIVER_REMOVE is enabled and CONFIG_RTC_DRV_CMOS builtin. When cmos_set_alarm() is called dev is NULL and so trigger the deref via cmos->irq The problem comes from that the device is removed but no remove function are called due to _exit_p(). This patch remove all _exit_p() annotation. Signed-off-by: Corentin Labbe Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index dd3d59806ffa..6f0e12e66296 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -776,7 +776,7 @@ static void cmos_do_shutdown(int rtc_irq) spin_unlock_irq(&rtc_lock); } -static void __exit cmos_do_remove(struct device *dev) +static void cmos_do_remove(struct device *dev) { struct cmos_rtc *cmos = dev_get_drvdata(dev); struct resource *ports; @@ -1129,7 +1129,7 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) pnp_irq(pnp, 0)); } -static void __exit cmos_pnp_remove(struct pnp_dev *pnp) +static void cmos_pnp_remove(struct pnp_dev *pnp) { cmos_do_remove(&pnp->dev); } @@ -1161,7 +1161,7 @@ static struct pnp_driver cmos_pnp_driver = { .name = (char *) driver_name, .id_table = rtc_ids, .probe = cmos_pnp_probe, - .remove = __exit_p(cmos_pnp_remove), + .remove = cmos_pnp_remove, .shutdown = cmos_pnp_shutdown, /* flag ensures resume() gets called, and stops syslog spam */ @@ -1238,7 +1238,7 @@ static int __init cmos_platform_probe(struct platform_device *pdev) return cmos_do_probe(&pdev->dev, resource, irq); } -static int __exit cmos_platform_remove(struct platform_device *pdev) +static int cmos_platform_remove(struct platform_device *pdev) { cmos_do_remove(&pdev->dev); return 0; @@ -1263,7 +1263,7 @@ static void cmos_platform_shutdown(struct platform_device *pdev) MODULE_ALIAS("platform:rtc_cmos"); static struct platform_driver cmos_platform_driver = { - .remove = __exit_p(cmos_platform_remove), + .remove = cmos_platform_remove, .shutdown = cmos_platform_shutdown, .driver = { .name = driver_name, From e0d9727c111a5917a1184c71c1a8e6f78c7fc41d Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 13 Oct 2016 10:07:07 +0300 Subject: [PATCH 019/503] iwlwifi: pcie: fix SPLC structure parsing The SPLC data parsing is too restrictive and was not trying find the correct element for WiFi. This causes problems with some BIOSes where the SPLC method exists, but doesn't have a WiFi entry on the first element of the list. The domain type values are also incorrect according to the specification. Fix this by complying with the actual specification. Additionally, replace all occurrences of SPLX to SPLC, since SPLX is only a structure internal to the ACPI tables, and may not even exist. Fixes: bcb079a14d75 ("iwlwifi: pcie: retrieve and parse ACPI power limitations") Reported-by: Chris Rorvick Tested-by: Paul Bolle Tested-by: Chris Rorvick Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 81 +++++++++++-------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 001be406a3d3..2f8134b2a504 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -541,48 +541,64 @@ static const struct pci_device_id iwl_hw_card_ids[] = { MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids); #ifdef CONFIG_ACPI -#define SPL_METHOD "SPLC" -#define SPL_DOMAINTYPE_MODULE BIT(0) -#define SPL_DOMAINTYPE_WIFI BIT(1) -#define SPL_DOMAINTYPE_WIGIG BIT(2) -#define SPL_DOMAINTYPE_RFEM BIT(3) +#define ACPI_SPLC_METHOD "SPLC" +#define ACPI_SPLC_DOMAIN_WIFI (0x07) -static u64 splx_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splx) +static u64 splc_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splc) { - union acpi_object *limits, *domain_type, *power_limit; + union acpi_object *data_pkg, *dflt_pwr_limit; + int i; - if (splx->type != ACPI_TYPE_PACKAGE || - splx->package.count != 2 || - splx->package.elements[0].type != ACPI_TYPE_INTEGER || - splx->package.elements[0].integer.value != 0) { - IWL_ERR(trans, "Unsupported splx structure\n"); + /* We need at least two elements, one for the revision and one + * for the data itself. Also check that the revision is + * supported (currently only revision 0). + */ + if (splc->type != ACPI_TYPE_PACKAGE || + splc->package.count < 2 || + splc->package.elements[0].type != ACPI_TYPE_INTEGER || + splc->package.elements[0].integer.value != 0) { + IWL_DEBUG_INFO(trans, + "Unsupported structure returned by the SPLC method. Ignoring.\n"); return 0; } - limits = &splx->package.elements[1]; - if (limits->type != ACPI_TYPE_PACKAGE || - limits->package.count < 2 || - limits->package.elements[0].type != ACPI_TYPE_INTEGER || - limits->package.elements[1].type != ACPI_TYPE_INTEGER) { - IWL_ERR(trans, "Invalid limits element\n"); + /* loop through all the packages to find the one for WiFi */ + for (i = 1; i < splc->package.count; i++) { + union acpi_object *domain; + + data_pkg = &splc->package.elements[i]; + + /* Skip anything that is not a package with the right + * amount of elements (i.e. at least 2 integers). + */ + if (data_pkg->type != ACPI_TYPE_PACKAGE || + data_pkg->package.count < 2 || + data_pkg->package.elements[0].type != ACPI_TYPE_INTEGER || + data_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) + continue; + + domain = &data_pkg->package.elements[0]; + if (domain->integer.value == ACPI_SPLC_DOMAIN_WIFI) + break; + + data_pkg = NULL; + } + + if (!data_pkg) { + IWL_DEBUG_INFO(trans, + "No element for the WiFi domain returned by the SPLC method.\n"); return 0; } - domain_type = &limits->package.elements[0]; - power_limit = &limits->package.elements[1]; - if (!(domain_type->integer.value & SPL_DOMAINTYPE_WIFI)) { - IWL_DEBUG_INFO(trans, "WiFi power is not limited\n"); - return 0; - } - - return power_limit->integer.value; + dflt_pwr_limit = &data_pkg->package.elements[1]; + return dflt_pwr_limit->integer.value; } static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev) { acpi_handle pxsx_handle; acpi_handle handle; - struct acpi_buffer splx = {ACPI_ALLOCATE_BUFFER, NULL}; + struct acpi_buffer splc = {ACPI_ALLOCATE_BUFFER, NULL}; acpi_status status; pxsx_handle = ACPI_HANDLE(&pdev->dev); @@ -593,23 +609,24 @@ static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev) } /* Get the method's handle */ - status = acpi_get_handle(pxsx_handle, (acpi_string)SPL_METHOD, &handle); + status = acpi_get_handle(pxsx_handle, (acpi_string)ACPI_SPLC_METHOD, + &handle); if (ACPI_FAILURE(status)) { - IWL_DEBUG_INFO(trans, "SPL method not found\n"); + IWL_DEBUG_INFO(trans, "SPLC method not found\n"); return; } /* Call SPLC with no arguments */ - status = acpi_evaluate_object(handle, NULL, NULL, &splx); + status = acpi_evaluate_object(handle, NULL, NULL, &splc); if (ACPI_FAILURE(status)) { IWL_ERR(trans, "SPLC invocation failed (0x%x)\n", status); return; } - trans->dflt_pwr_limit = splx_get_pwr_limit(trans, splx.pointer); + trans->dflt_pwr_limit = splc_get_pwr_limit(trans, splc.pointer); IWL_DEBUG_INFO(trans, "Default power limit set to %lld\n", trans->dflt_pwr_limit); - kfree(splx.pointer); + kfree(splc.pointer); } #else /* CONFIG_ACPI */ From 5a143db8c4a28dab6423cb6197e9f1389da375f2 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 5 Oct 2016 09:28:53 +0300 Subject: [PATCH 020/503] iwlwifi: mvm: fix netdetect starting/stopping for unified images With unified images, we need to make sure the net-detect scan is stopped after resuming, since we don't restart the FW. Also, we need to make sure we check if there are enough scan slots available to run it, as we do with other scans. Fixes: commit 23ae61282b88 ("iwlwifi: mvm: Do not switch to D3 image on suspend") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 19 +++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 33 +++++++++++++++---- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 03a8fc586548..b88e2048ae0b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1087,6 +1087,15 @@ iwl_mvm_netdetect_config(struct iwl_mvm *mvm, ret = iwl_mvm_switch_to_d3(mvm); if (ret) return ret; + } else { + /* In theory, we wouldn't have to stop a running sched + * scan in order to start another one (for + * net-detect). But in practice this doesn't seem to + * work properly, so stop any running sched_scan now. + */ + ret = iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_SCHED, true); + if (ret) + return ret; } /* rfkill release can be either for wowlan or netdetect */ @@ -2091,6 +2100,16 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) iwl_mvm_update_changed_regdom(mvm); if (mvm->net_detect) { + /* If this is a non-unified image, we restart the FW, + * so no need to stop the netdetect scan. If that + * fails, continue and try to get the wake-up reasons, + * but trigger a HW restart by keeping a failure code + * in ret. + */ + if (unified_image) + ret = iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_NETDETECT, + false); + iwl_mvm_query_netdetect_reasons(mvm, vif); /* has unlocked the mutex, so skip that */ goto out; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index f279fdd6eb44..fa9743205491 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1199,6 +1199,9 @@ static int iwl_mvm_num_scans(struct iwl_mvm *mvm) static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type) { + bool unified_image = fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG); + /* This looks a bit arbitrary, but the idea is that if we run * out of possible simultaneous scans and the userspace is * trying to run a scan type that is already running, we @@ -1225,12 +1228,30 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type) return -EBUSY; return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_REGULAR, true); case IWL_MVM_SCAN_NETDETECT: - /* No need to stop anything for net-detect since the - * firmware is restarted anyway. This way, any sched - * scans that were running will be restarted when we - * resume. - */ - return 0; + /* For non-unified images, there's no need to stop + * anything for net-detect since the firmware is + * restarted anyway. This way, any sched scans that + * were running will be restarted when we resume. + */ + if (!unified_image) + return 0; + + /* If this is a unified image and we ran out of scans, + * we need to stop something. Prefer stopping regular + * scans, because the results are useless at this + * point, and we should be able to keep running + * another scheduled scan while suspended. + */ + if (mvm->scan_status & IWL_MVM_SCAN_REGULAR_MASK) + return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_REGULAR, + true); + if (mvm->scan_status & IWL_MVM_SCAN_SCHED_MASK) + return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_SCHED, + true); + + /* fall through, something is wrong if no scan was + * running but we ran out of scans. + */ default: WARN_ON(1); break; From a17b9e4c9c5e9c4da4385908af0377af11529266 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 18 Oct 2016 13:42:09 +0800 Subject: [PATCH 021/503] clk: sunxi-ng: sun6i-a31: Force AHB1 clock to use PLL6 as parent On the A31, the DMA engine only works if AHB1 is clocked from PLL6. In addition, the hstimer is clocked from AHB1, and if AHB1 is clocked from the CPU clock, and cpufreq is working, we get an unstable timer. Force the AHB1 clock to use PLL6 as its parent. Previously this was done in the device tree with the assigned-clocks and assigned-clocks-parent bindings. However with this new monolithic driver, the system critical clocks aren't exported through the device tree. The alternative is to force this setting in the driver before the clocks are registered. This is also done in newer versions of mainline U-boot. But people still using an older version, or even the vendor version, can still hit this issue. Hence the need to do it in the kernel as well. Reported-by: Hans de Goede Reported-by: Maxime Ripard Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu-sun6i-a31.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c index 79596463e0d9..4a82a49cff5e 100644 --- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c +++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c @@ -191,6 +191,8 @@ static struct clk_div_table axi_div_table[] = { static SUNXI_CCU_DIV_TABLE(axi_clk, "axi", "cpu", 0x050, 0, 3, axi_div_table, 0); +#define SUN6I_A31_AHB1_REG 0x054 + static const char * const ahb1_parents[] = { "osc32k", "osc24M", "axi", "pll-periph" }; @@ -1230,6 +1232,16 @@ static void __init sun6i_a31_ccu_setup(struct device_node *node) val &= BIT(16); writel(val, reg + SUN6I_A31_PLL_MIPI_REG); + /* Force AHB1 to PLL6 / 3 */ + val = readl(reg + SUN6I_A31_AHB1_REG); + /* set PLL6 pre-div = 3 */ + val &= ~GENMASK(7, 6); + val |= 0x2 << 6; + /* select PLL6 / pre-div */ + val &= ~GENMASK(13, 12); + val |= 0x3 << 12; + writel(val, reg + SUN6I_A31_AHB1_REG); + sunxi_ccu_probe(node, reg, &sun6i_a31_ccu_desc); ccu_mux_notifier_register(pll_cpu_clk.common.hw.clk, From 368e21aebe9535c1643b272aaa9819298a6bc3e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 19 Oct 2016 21:02:04 +0300 Subject: [PATCH 022/503] rtc: cmos: Don't enable interrupts in the middle of the interrupt handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using spin_lock_irq()/spin_unlock_irq() from within the interrupt handler is a no-no. Let's save/restore the flags to avoid turning on interrupts prematurely. We hit this in a bunch of our CI systems, but for whatever reason I wasn't able to reproduce on my own machine, so this fix is just based on the backtrace. [ 202.634918] WARNING: CPU: 0 PID: 0 at kernel/locking/lockdep.c:2729 trace_hardirqs_on_caller+0x113/0x1b0 [ 202.634919] DEBUG_LOCKS_WARN_ON(current->hardirq_context) [ 202.634929] Modules linked in: snd_hda_intel i915 x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel lpc_ich snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_codec snd_hwdep i2c_designware_platform i2c_designware_core snd_hda_core mei_me mei snd_pcm r8169 mii sdhci_acpi sdhci mmc_core i2c_hid [last unloaded: i915] [ 202.634930] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U 4.9.0-rc1-CI-CI_DRM_1734+ #1 [ 202.634931] Hardware name: GIGABYTE M4HM87P-00/M4HM87P-00, BIOS F6 12/10/2014 [ 202.634933] ffff88011ea03d68 ffffffff8142dce5 ffff88011ea03db8 0000000000000000 [ 202.634934] ffff88011ea03da8 ffffffff8107e496 00000aa900000002 ffffffff81e249a0 [ 202.634935] ffffffff81815637 ffffffff82e7c280 0000000000000000 0000000000000004 [ 202.634936] Call Trace: [ 202.634939] [ 202.634939] [] dump_stack+0x67/0x92 [ 202.634941] [] __warn+0xc6/0xe0 [ 202.634944] [] ? _raw_spin_unlock_irq+0x27/0x50 [ 202.634945] [] warn_slowpath_fmt+0x4a/0x50 [ 202.634946] [] trace_hardirqs_on_caller+0x113/0x1b0 [ 202.634948] [] trace_hardirqs_on+0xd/0x10 [ 202.634949] [] _raw_spin_unlock_irq+0x27/0x50 [ 202.634951] [] rtc_handler+0x32/0xa0 [ 202.634954] [] acpi_ev_fixed_event_detect+0xd4/0xfb [ 202.634956] [] acpi_ev_sci_xrupt_handler+0xf/0x2d [ 202.634957] [] acpi_irq+0x11/0x2c [ 202.634960] [] __handle_irq_event_percpu+0x58/0x370 [ 202.634961] [] handle_irq_event_percpu+0x1e/0x50 [ 202.634962] [] handle_irq_event+0x34/0x60 [ 202.634963] [] handle_fasteoi_irq+0xa6/0x170 [ 202.634966] [] handle_irq+0x15/0x20 [ 202.634967] [] do_IRQ+0x68/0x130 [ 202.634968] [] common_interrupt+0x89/0x89 [ 202.634970] [ 202.634970] [] ? mwait_idle+0x93/0x210 [ 202.634971] [] ? mwait_idle+0x8a/0x210 [ 202.634972] [] arch_cpu_idle+0xa/0x10 [ 202.634973] [] default_idle_call+0x1e/0x30 [ 202.634974] [] cpu_startup_entry+0x17c/0x1f0 [ 202.634976] [] rest_init+0x127/0x130 [ 202.634978] [] start_kernel+0x3f6/0x403 [ 202.634980] [] x86_64_start_reservations+0x2a/0x2c [ 202.634981] [] x86_64_start_kernel+0x173/0x186 [ 202.634982] ---[ end trace 293c99618fa08d34 ]--- Cc: Gabriele Mazzotta Cc: Alexandre Belloni Fixes: 983bf1256edb ("rtc: cmos: Clear ACPI-driven alarms upon resume") Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 6f0e12e66296..7030d7cd3861 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -996,8 +996,9 @@ static u32 rtc_handler(void *context) struct cmos_rtc *cmos = dev_get_drvdata(dev); unsigned char rtc_control = 0; unsigned char rtc_intr; + unsigned long flags; - spin_lock_irq(&rtc_lock); + spin_lock_irqsave(&rtc_lock, flags); if (cmos_rtc.suspend_ctrl) rtc_control = CMOS_READ(RTC_CONTROL); if (rtc_control & RTC_AIE) { @@ -1006,7 +1007,7 @@ static u32 rtc_handler(void *context) rtc_intr = CMOS_READ(RTC_INTR_FLAGS); rtc_update_irq(cmos->rtc, 1, rtc_intr); } - spin_unlock_irq(&rtc_lock); + spin_unlock_irqrestore(&rtc_lock, flags); pm_wakeup_event(dev, 0); acpi_clear_event(ACPI_EVENT_RTC); From 989cea5c14be024e879c0055dc6d033680a52610 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 21 Oct 2016 01:13:33 +1100 Subject: [PATCH 023/503] kbuild: prevent lib-ksyms.o rebuilds Signed-off-by: Nicholas Piggin Reported-by: Russell King Tested-by: Russell King Signed-off-by: Michal Marek --- scripts/Makefile.build | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index de46ab03f063..e1f25d6d132e 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -430,6 +430,9 @@ cmd_export_list = $(OBJDUMP) -h $< | \ $(obj)/lib-ksyms.o: $(lib-target) FORCE $(call if_changed,export_list) + +targets += $(obj)/lib-ksyms.o + endif # From b7f865ede20c87073216f77fe97f6fc56666e3da Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Tue, 25 Oct 2016 01:08:31 +0800 Subject: [PATCH 024/503] ARM: dts: sun8i: fix the pinmux for UART1 When the patch is applied, the allwinner,driver and allwinner,pull properties are removed. Although they're described to be optional in the devicetree binding, without them, the pinmux cannot be initialized, and the uart cannot be used. Add them back to fix the problem, and makes the bluetooth on iNet D978 Rev2 board work. Fixes: 82eec384249f (ARM: dts: sun8i: add pinmux for UART1 at PG) Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun8i-a23-a33.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi index 48fc24f36fcb..300a1bd5a6ec 100644 --- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi +++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi @@ -282,11 +282,15 @@ uart1_pins_a: uart1@0 { allwinner,pins = "PG6", "PG7"; allwinner,function = "uart1"; + allwinner,drive = ; + allwinner,pull = ; }; uart1_pins_cts_rts_a: uart1-cts-rts@0 { allwinner,pins = "PG8", "PG9"; allwinner,function = "uart1"; + allwinner,drive = ; + allwinner,pull = ; }; mmc0_pins_a: mmc0@0 { From d3532ea6ce4ea501e421d130555e59edc2945f99 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:13:40 +0200 Subject: [PATCH 025/503] brcmfmac: avoid maybe-uninitialized warning in brcmf_cfg80211_start_ap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A bugfix added a sanity check around the assignment and use of the 'is_11d' variable, which looks correct to me, but as the function is rather complex already, this confuses the compiler to the point where it can no longer figure out if the variable is always initialized correctly: brcm80211/brcmfmac/cfg80211.c: In function ‘brcmf_cfg80211_start_ap’: brcm80211/brcmfmac/cfg80211.c:4586:10: error: ‘is_11d’ may be used uninitialized in this function [-Werror=maybe-uninitialized] This adds an initialization for the newly introduced case in which the variable should not really be used, in order to make the warning go away. Fixes: b3589dfe0212 ("brcmfmac: ignore 11d configuration errors") Cc: Hante Meuleman Cc: Arend van Spriel Cc: Kalle Valo Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index b777e1b2f87a..78d9966a3957 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4516,7 +4516,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, /* store current 11d setting */ if (brcmf_fil_cmd_int_get(ifp, BRCMF_C_GET_REGULATORY, &ifp->vif->is_11d)) { - supports_11d = false; + is_11d = supports_11d = false; } else { country_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail, settings->beacon.tail_len, From bb6a6e8e091353770074608c1d1bfde0e20b8154 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 22 Oct 2016 18:51:24 +0800 Subject: [PATCH 026/503] netfilter: nft_dynset: fix panic if NFT_SET_HASH is not enabled When CONFIG_NFT_SET_HASH is not enabled and I input the following rule: "nft add rule filter output flow table test {ip daddr counter }", kernel panic happened on my system: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [< (null)>] (null) [...] Call Trace: [] ? nft_dynset_eval+0x56/0x100 [nf_tables] [] nft_do_chain+0xfb/0x4e0 [nf_tables] [] ? nf_conntrack_tuple_taken+0x61/0x210 [nf_conntrack] [] ? get_unique_tuple+0x136/0x560 [nf_nat] [] ? __nf_ct_ext_add_length+0x111/0x130 [nf_conntrack] [] ? nf_nat_setup_info+0x87/0x3b0 [nf_nat] [] ? ipt_do_table+0x327/0x610 [] ? __nf_nat_alloc_null_binding+0x57/0x80 [nf_nat] [] nft_ipv4_output+0xaf/0xd0 [nf_tables_ipv4] [] nf_iterate+0x55/0x60 [] nf_hook_slow+0x73/0xd0 Because in rbtree type set, ops->update is not implemented. So just keep it simple, in such case, report -EOPNOTSUPP to the user space. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_dynset.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 517f08767a3c..bfdb689664b0 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -139,6 +139,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return PTR_ERR(set); } + if (set->ops->update == NULL) + return -EOPNOTSUPP; + if (set->flags & NFT_SET_CONSTANT) return -EBUSY; From 61f9e2924f4981d626b3a931fed935f2fa3cb4de Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 22 Oct 2016 18:51:25 +0800 Subject: [PATCH 027/503] netfilter: nf_tables: fix *leak* when expr clone fail When nft_expr_clone failed, a series of problems will happen: 1. module refcnt will leak, we call __module_get at the beginning but we forget to put it back if ops->clone returns fail 2. memory will be leaked, if clone fail, we just return NULL and forget to free the alloced element 3. set->nelems will become incorrect when set->size is specified. If clone fail, we should decrease the set->nelems Now this patch fixes these problems. And fortunately, clone fail will only happen on counter expression when memory is exhausted. Fixes: 086f332167d6 ("netfilter: nf_tables: add clone interface to expression operations") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++++-- net/netfilter/nf_tables_api.c | 11 ++++++----- net/netfilter/nft_dynset.c | 16 ++++++++++------ net/netfilter/nft_set_hash.c | 4 ++-- net/netfilter/nft_set_rbtree.c | 2 +- 5 files changed, 23 insertions(+), 16 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5031e072567b..741dcded5b4f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -542,7 +542,8 @@ void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *data, u64 timeout, gfp_t gfp); -void nft_set_elem_destroy(const struct nft_set *set, void *elem); +void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr); /** * struct nft_set_gc_batch_head - nf_tables set garbage collection batch @@ -693,7 +694,6 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) { int err; - __module_get(src->ops->type->owner); if (src->ops->clone) { dst->ops = src->ops; err = src->ops->clone(dst, src); @@ -702,6 +702,8 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) } else { memcpy(dst, src, src->ops->size); } + + __module_get(src->ops->type->owner); return 0; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 24db22257586..86e48aeb20be 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3452,14 +3452,15 @@ void *nft_set_elem_init(const struct nft_set *set, return elem; } -void nft_set_elem_destroy(const struct nft_set *set, void *elem) +void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_uninit(nft_set_ext_data(ext), set->dtype); - if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); kfree(elem); @@ -3812,7 +3813,7 @@ void nft_set_gc_batch_release(struct rcu_head *rcu) gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); for (i = 0; i < gcb->head.cnt; i++) - nft_set_elem_destroy(gcb->head.set, gcb->elems[i]); + nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true); kfree(gcb); } EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); @@ -4030,7 +4031,7 @@ static void nf_tables_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv); + nft_trans_elem(trans).priv, true); break; } kfree(trans); @@ -4171,7 +4172,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv); + nft_trans_elem(trans).priv, true); break; } kfree(trans); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index bfdb689664b0..31ca94793aa9 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -44,18 +44,22 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, ®s->data[priv->sreg_key], ®s->data[priv->sreg_data], timeout, GFP_ATOMIC); - if (elem == NULL) { - if (set->size) - atomic_dec(&set->nelems); - return NULL; - } + if (elem == NULL) + goto err1; ext = nft_set_elem_ext(set, elem); if (priv->expr != NULL && nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0) - return NULL; + goto err2; return elem; + +err2: + nft_set_elem_destroy(set, elem, false); +err1: + if (set->size) + atomic_dec(&set->nelems); + return NULL; } static void nft_dynset_eval(const struct nft_expr *expr, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 3794cb2fc788..88d9fc8343e7 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -120,7 +120,7 @@ out: return true; err2: - nft_set_elem_destroy(set, he); + nft_set_elem_destroy(set, he, true); err1: return false; } @@ -332,7 +332,7 @@ static int nft_hash_init(const struct nft_set *set, static void nft_hash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy((const struct nft_set *)arg, ptr); + nft_set_elem_destroy((const struct nft_set *)arg, ptr, true); } static void nft_hash_destroy(const struct nft_set *set) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 38b5bda242f8..36493a7cae88 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -266,7 +266,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_set_elem_destroy(set, rbe); + nft_set_elem_destroy(set, rbe, true); } } From dab45060a56a9732b027d2031c1b6100bc75eea2 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 22 Oct 2016 18:51:26 +0800 Subject: [PATCH 028/503] netfilter: nf_tables: fix race when create new element in dynset Packets may race when create the new element in nft_hash_update: CPU0 CPU1 lookup_fast - fail lookup_fast - fail new - ok new - ok insert - ok insert - fail(EEXIST) So when race happened, we reuse the existing element. Otherwise, these *racing* packets will not be handled properly. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_hash.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 88d9fc8343e7..a3dface3e6e6 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -98,7 +98,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; + struct nft_hash_elem *he, *prev; struct nft_hash_cmp_arg arg = { .genmask = NFT_GENMASK_ANY, .set = set, @@ -112,9 +112,18 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key, he = new(set, expr, regs); if (he == NULL) goto err1; - if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params)) + + prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); + if (IS_ERR(prev)) goto err2; + + /* Another cpu may race to insert the element with the same key */ + if (prev) { + nft_set_elem_destroy(set, he, true); + he = prev; + } + out: *ext = &he->ext; return true; From 444f901742d054a4cd5ff045871eac5131646cfb Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Mon, 24 Oct 2016 18:07:23 +0200 Subject: [PATCH 029/503] netfilter: nf_conntrack_sip: extend request line validation on SIP requests, so a fragmented TCP SIP packet from an allow header starting with INVITE,NOTIFY,OPTIONS,REFER,REGISTER,UPDATE,SUBSCRIBE Content-Length: 0 will not bet interpreted as an INVITE request. Also Request-URI must start with an alphabetic character. Confirm with RFC 3261 Request-Line = Method SP Request-URI SP SIP-Version CRLF Fixes: 30f33e6dee80 ("[NETFILTER]: nf_conntrack_sip: support method specific request/response handling") Signed-off-by: Ulrich Weber Acked-by: Marco Angaroni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 621b81c7bddc..c3fc14e021ec 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1436,9 +1436,12 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, handler = &sip_handlers[i]; if (handler->request == NULL) continue; - if (*datalen < handler->len || + if (*datalen < handler->len + 2 || strncasecmp(*dptr, handler->method, handler->len)) continue; + if ((*dptr)[handler->len] != ' ' || + !isalpha((*dptr)[handler->len+1])) + continue; if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ, &matchoff, &matchlen) <= 0) { From f1d505bb762e30bf316ff5d3b604914649d6aed3 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 25 Oct 2016 15:56:39 -0400 Subject: [PATCH 030/503] netfilter: nf_tables: fix type mismatch with error return from nft_parse_u32_check Commit 36b701fae12ac ("netfilter: nf_tables: validate maximum value of u32 netlink attributes") introduced nft_parse_u32_check with a return value of "unsigned int", yet on error it returns "-ERANGE". This patch corrects the mismatch by changing the return value to "int", which happens to match the actual users of nft_parse_u32_check already. Found by Coverity, CID 1373930. Note that commit 21a9e0f1568ea ("netfilter: nft_exthdr: fix error handling in nft_exthdr_init()) attempted to address the issue, but did not address the return type of nft_parse_u32_check. Signed-off-by: John W. Linville Cc: Laura Garcia Liebana Cc: Pablo Neira Ayuso Cc: Dan Carpenter Fixes: 36b701fae12ac ("netfilter: nf_tables: validate maximum value...") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 741dcded5b4f..d79d1e9b9546 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -145,7 +145,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } -unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); +int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 86e48aeb20be..365d31b86816 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4422,7 +4422,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, * Otherwise a 0 is returned and the attribute value is stored in the * destination variable. */ -unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) +int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) { u32 val; From cdb436d181d21af4d273b49ec7734eecd6a37fe9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Oct 2016 23:46:17 +0200 Subject: [PATCH 031/503] netfilter: conntrack: avoid excess memory allocation This is now a fixed-size extension, so we don't need to pass a variable alloc size. This (harmless) error results in allocating 32 instead of the needed 16 bytes for this extension as the size gets passed twice. Fixes: 23014011ba420 ("netfilter: conntrack: support a fixed size of 128 distinct labels") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 498814626e28..1723a67c0b0a 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -30,8 +30,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) if (net->ct.labels_used == 0) return NULL; - return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, - sizeof(struct nf_conn_labels), GFP_ATOMIC); + return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC); #else return NULL; #endif From 5747620257812530adda58cbff591fede6fb261e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Oct 2016 17:34:32 +0200 Subject: [PATCH 032/503] netfilter: ip_vs_sync: fix bogus maybe-uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building the ip_vs_sync code with CONFIG_OPTIMIZE_INLINING on x86 confuses the compiler to the point where it produces a rather dubious warning message: net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘opt.init_seq’ may be used uninitialized in this function [-Werror=maybe-uninitialized] struct ip_vs_sync_conn_options opt; ^~~ net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘opt.delta’ may be used uninitialized in this function [-Werror=maybe-uninitialized] net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘opt.previous_delta’ may be used uninitialized in this function [-Werror=maybe-uninitialized] net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘*((void *)&opt+12).init_seq’ may be used uninitialized in this function [-Werror=maybe-uninitialized] net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘*((void *)&opt+12).delta’ may be used uninitialized in this function [-Werror=maybe-uninitialized] net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘*((void *)&opt+12).previous_delta’ may be used uninitialized in this function [-Werror=maybe-uninitialized] The problem appears to be a combination of a number of factors, including the __builtin_bswap32 compiler builtin being slightly odd, having a large amount of code inlined into a single function, and the way that some functions only get partially inlined here. I've spent way too much time trying to work out a way to improve the code, but the best I've come up with is to add an explicit memset right before the ip_vs_seq structure is first initialized here. When the compiler works correctly, this has absolutely no effect, but in the case that produces the warning, the warning disappears. In the process of analysing this warning, I also noticed that we use memcpy to copy the larger ip_vs_sync_conn_options structure over two members of the ip_vs_conn structure. This works because the layout is identical, but seems error-prone, so I'm changing this in the process to directly copy the two members. This change seemed to have no effect on the object code or the warning, but it deals with the same data, so I kept the two changes together. Signed-off-by: Arnd Bergmann Acked-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_sync.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 1b07578bedf3..9350530c16c1 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -283,6 +283,7 @@ struct ip_vs_sync_buff { */ static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) { + memset(ho, 0, sizeof(*ho)); ho->init_seq = get_unaligned_be32(&no->init_seq); ho->delta = get_unaligned_be32(&no->delta); ho->previous_delta = get_unaligned_be32(&no->previous_delta); @@ -917,8 +918,10 @@ static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *pa kfree(param->pe_data); } - if (opt) - memcpy(&cp->in_seq, opt, sizeof(*opt)); + if (opt) { + cp->in_seq = opt->in_seq; + cp->out_seq = opt->out_seq; + } atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs)); cp->state = state; cp->old_state = cp->state; From 6a84fb4b4e439a8ef0ce19ec7e7661ad76f655c9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 28 Oct 2016 14:34:51 -0700 Subject: [PATCH 033/503] device-dax: check devm_nsio_enable() return value If the dax_pmem driver is passed a resource that is already busy the driver probe attempt should fail with a message like the following: dax_pmem dax0.1: could not reserve region [mem 0x100000000-0x11fffffff] However, if we do not catch the error we crash for the obvious reason of accessing memory that is not mapped. BUG: unable to handle kernel paging request at ffffc90020001000 IP: [] __memcpy+0x12/0x20 [..] Call Trace: [] ? nsio_rw_bytes+0x60/0x180 [] nd_pfn_validate+0x75/0x320 [] nvdimm_setup_pfn+0xb9/0x5d0 [] ? devm_nsio_enable+0xff/0x110 [] dax_pmem_probe+0x59/0x260 Cc: Fixes: ab68f2622136 ("/dev/dax, pmem: direct access to persistent memory") Reported-by: Dave Hansen Signed-off-by: Dan Williams --- drivers/dax/pmem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index 4a15fa5df98b..73c6ce93a0d9 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -78,7 +78,9 @@ static int dax_pmem_probe(struct device *dev) nsio = to_nd_namespace_io(&ndns->dev); /* parse the 'pfn' info block via ->rw_bytes */ - devm_nsio_enable(dev, nsio); + rc = devm_nsio_enable(dev, nsio); + if (rc) + return rc; altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap); if (IS_ERR(altmap)) return PTR_ERR(altmap); From 1c387188c60f53b338c20eee32db055dfe022a9b Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Fri, 21 Oct 2016 15:32:05 -0700 Subject: [PATCH 034/503] iommu/vt-d: Fix IOMMU lookup for SR-IOV Virtual Functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VT-d specification (§8.3.3) says: ‘Virtual Functions’ of a ‘Physical Function’ are under the scope of the same remapping unit as the ‘Physical Function’. The BIOS is not required to list all the possible VFs in the scope tables, and arguably *shouldn't* make any attempt to do so, since there could be a huge number of them. This has been broken basically for ever — the VF is never going to match against a specific unit's scope, so it ends up being assigned to the INCLUDE_ALL IOMMU. Which was always actually correct by coincidence, but now we're looking at Root-Complex integrated devices with SR-IOV support it's going to start being wrong. Fix it to simply use pci_physfn() before doing the lookup for PCI devices. Cc: stable@vger.kernel.org Signed-off-by: Sainath Grandhi Signed-off-by: Ashok Raj Signed-off-by: David Woodhouse --- drivers/iommu/dmar.c | 4 +++- drivers/iommu/intel-iommu.c | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 58470f5ced04..8c53748a769d 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -338,7 +338,9 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, struct pci_dev *pdev = to_pci_dev(data); struct dmar_pci_notify_info *info; - /* Only care about add/remove events for physical functions */ + /* Only care about add/remove events for physical functions. + * For VFs we actually do the lookup based on the corresponding + * PF in device_to_iommu() anyway. */ if (pdev->is_virtfn) return NOTIFY_DONE; if (action != BUS_NOTIFY_ADD_DEVICE && diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a4407eabf0e6..2723090a0d54 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -892,7 +892,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf return NULL; if (dev_is_pci(dev)) { + struct pci_dev *pf_pdev; + pdev = to_pci_dev(dev); + /* VFs aren't listed in scope tables; we need to look up + * the PF instead to find the IOMMU. */ + pf_pdev = pci_physfn(pdev); + dev = &pf_pdev->dev; segment = pci_domain_nr(pdev->bus); } else if (has_acpi_companion(dev)) dev = &ACPI_COMPANION(dev)->dev; @@ -905,6 +911,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, tmp) { if (tmp == dev) { + /* For a VF use its original BDF# not that of the PF + * which we used for the IOMMU lookup. Strictly speaking + * we could do this for all PCI devices; we only need to + * get the BDF# from the scope table for ACPI matches. */ + if (pdev->is_virtfn) + goto got_pdev; + *bus = drhd->devices[i].bus; *devfn = drhd->devices[i].devfn; goto out; From b9a321b48af40e0606009df8aff0a8c65dfbbfd8 Mon Sep 17 00:00:00 2001 From: Mark Lord Date: Sun, 30 Oct 2016 19:28:27 -0400 Subject: [PATCH 035/503] r8152: Fix broken RX checksums. The r8152 driver has been broken since (approx) 3.16.xx when support was added for hardware RX checksums on newer chip versions. Symptoms include random segfaults and silent data corruption over NFS. The hardware checksum logig does not work on the VER_02 dongles I have here when used with a slow embedded system CPU. Google reveals others reporting similar issues on Raspberry Pi. So, disable hardware RX checksum support for VER_02, and fix an obvious coding error for IPV6 checksums in the same function. Because this bug results in silent data corruption, it is a good candidate for back-porting to -stable >= 3.16.xx. Signed-off-by: Mark Lord Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 44d439f50961..75c516889645 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1730,7 +1730,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc) u8 checksum = CHECKSUM_NONE; u32 opts2, opts3; - if (tp->version == RTL_VER_01) + if (tp->version == RTL_VER_01 || tp->version == RTL_VER_02) goto return_result; opts2 = le32_to_cpu(rx_desc->opts2); @@ -1745,7 +1745,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc) checksum = CHECKSUM_NONE; else checksum = CHECKSUM_UNNECESSARY; - } else if (RD_IPV6_CS) { + } else if (opts2 & RD_IPV6_CS) { if ((opts2 & RD_UDP_CS) && !(opts3 & UDPF)) checksum = CHECKSUM_UNNECESSARY; else if ((opts2 & RD_TCP_CS) && !(opts3 & TCPF)) From c17c3cdff10b9f59ef1244a14604f10949f17117 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 29 Oct 2016 22:03:05 +0800 Subject: [PATCH 036/503] netfilter: nf_tables: destroy the set if fail to add transaction When the memory is exhausted, then we will fail to add the NFT_MSG_NEWSET transaction. In such case, we should destroy the set before we free it. Fixes: 958bee14d071 ("netfilter: nf_tables: use new transaction infrastructure to handle sets") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 365d31b86816..7d6a626b08f1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2956,12 +2956,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) - goto err2; + goto err3; list_add_tail_rcu(&set->list, &table->sets); table->use++; return 0; +err3: + ops->destroy(set); err2: kfree(set); err1: From b73b8a1ba598236296a46103d81c10d629d9a470 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 29 Oct 2016 22:09:51 +0800 Subject: [PATCH 037/503] netfilter: nft_dup: do not use sreg_dev if the user doesn't specify it The NFTA_DUP_SREG_DEV attribute is not a must option, so we should use it in routing lookup only when the user specify it. Fixes: d877f07112f1 ("netfilter: nf_tables: add nft_dup expression") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nft_dup_ipv4.c | 6 ++++-- net/ipv6/netfilter/nft_dup_ipv6.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index bf855e64fc45..0c01a270bf9f 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -28,7 +28,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr, struct in_addr gw = { .s_addr = (__force __be32)regs->data[priv->sreg_addr], }; - int oif = regs->data[priv->sreg_dev]; + int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif); } @@ -59,7 +59,9 @@ static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_dup_ipv4 *priv = nft_expr_priv(expr); - if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) || + if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr)) + goto nla_put_failure; + if (priv->sreg_dev && nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) goto nla_put_failure; diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 8bfd470cbe72..831f86e1ec08 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -26,7 +26,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr, { struct nft_dup_ipv6 *priv = nft_expr_priv(expr); struct in6_addr *gw = (struct in6_addr *)®s->data[priv->sreg_addr]; - int oif = regs->data[priv->sreg_dev]; + int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif); } @@ -57,7 +57,9 @@ static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_dup_ipv6 *priv = nft_expr_priv(expr); - if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) || + if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr)) + goto nla_put_failure; + if (priv->sreg_dev && nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) goto nla_put_failure; From f89c56ce710afa65e1b2ead555b52c4807f34ff7 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 26 Oct 2016 11:21:14 +0200 Subject: [PATCH 038/503] ipv6: Don't use ufo handling on later transformed packets Similar to commit c146066ab802 ("ipv4: Don't use ufo handling on later transformed packets"), don't perform UFO on packets that will be IPsec transformed. To detect it we rely on the fact that headerlen in dst_entry is non-zero only for transformation bundles (xfrm_dst objects). Unwanted segmentation can be observed with a NETIF_F_UFO capable device, such as a dummy device: DEV=dum0 LEN=1493 ip li add $DEV type dummy ip addr add fc00::1/64 dev $DEV nodad ip link set $DEV up ip xfrm policy add dir out src fc00::1 dst fc00::2 \ tmpl src fc00::1 dst fc00::2 proto esp spi 1 ip xfrm state add src fc00::1 dst fc00::2 \ proto esp spi 1 enc 'aes' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b tcpdump -n -nn -i $DEV -t & socat /dev/zero,readbytes=$LEN udp6:[fc00::2]:$LEN tcpdump output before: IP6 fc00::1 > fc00::2: frag (0|1448) ESP(spi=0x00000001,seq=0x1), length 1448 IP6 fc00::1 > fc00::2: frag (1448|48) IP6 fc00::1 > fc00::2: ESP(spi=0x00000001,seq=0x2), length 88 ... and after: IP6 fc00::1 > fc00::2: frag (0|1448) ESP(spi=0x00000001,seq=0x1), length 1448 IP6 fc00::1 > fc00::2: frag (1448|80) Fixes: e89e9cf539a2 ("[IPv4/IPv6]: UFO Scatter-gather approach") Signed-off-by: Jakub Sitnicki Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6001e781164e..59eb4ed99ce8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1366,7 +1366,7 @@ emsgsize: if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, exthdrlen, From 19bda36c4299ce3d7e5bce10bebe01764a655a6d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 28 Oct 2016 18:18:01 +0800 Subject: [PATCH 039/503] ipv6: add mtu lock check in __ip6_rt_update_pmtu Prior to this patch, ipv6 didn't do mtu lock check in ip6_update_pmtu. It leaded to that mtu lock doesn't really work when receiving the pkt of ICMPV6_PKT_TOOBIG. This patch is to add mtu lock check in __ip6_rt_update_pmtu just as ipv4 did in __ip_rt_update_pmtu. Acked-by: Hannes Frederic Sowa Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 947ed1ded026..7403d90dcb38 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1364,6 +1364,9 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (rt6->rt6i_flags & RTF_LOCAL) return; + if (dst_metric_locked(dst, RTAX_MTU)) + return; + dst_confirm(dst); mtu = max_t(u32, mtu, IPV6_MIN_MTU); if (mtu >= dst_mtu(dst)) From ce6dd23329b1ee6a794acf5f7e40f8e89b8317ee Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Oct 2016 18:43:11 +0200 Subject: [PATCH 040/503] dctcp: avoid bogus doubling of cwnd after loss If a congestion control module doesn't provide .undo_cwnd function, tcp_undo_cwnd_reduction() will set cwnd to tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); ... which makes sense for reno (it sets ssthresh to half the current cwnd), but it makes no sense for dctcp, which sets ssthresh based on the current congestion estimate. This can cause severe growth of cwnd (eventually overflowing u32). Fix this by saving last cwnd on loss and restore cwnd based on that, similar to cubic and other algorithms. Fixes: e3118e8359bb7c ("net: tcp: add DCTCP congestion control algorithm") Cc: Lawrence Brakmo Cc: Andrew Shewmaker Cc: Glenn Judd Acked-by: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/tcp_dctcp.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 10d728b6804c..ab37c6775630 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -56,6 +56,7 @@ struct dctcp { u32 next_seq; u32 ce_state; u32 delayed_ack_reserved; + u32 loss_cwnd; }; static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ @@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk) ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); ca->delayed_ack_reserved = 0; + ca->loss_cwnd = 0; ca->ce_state = 0; dctcp_reset(tp, ca); @@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk) static u32 dctcp_ssthresh(struct sock *sk) { - const struct dctcp *ca = inet_csk_ca(sk); + struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); + ca->loss_cwnd = tp->snd_cwnd; return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); } @@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, return 0; } +static u32 dctcp_cwnd_undo(struct sock *sk) +{ + const struct dctcp *ca = inet_csk_ca(sk); + + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); +} + static struct tcp_congestion_ops dctcp __read_mostly = { .init = dctcp_init, .in_ack_event = dctcp_update_alpha, .cwnd_event = dctcp_cwnd_event, .ssthresh = dctcp_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = dctcp_cwnd_undo, .set_state = dctcp_state, .get_info = dctcp_get_info, .flags = TCP_CONG_NEEDS_ECN, From e551c32d57c88923f99f8f010e89ca7ed0735e83 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Oct 2016 13:40:24 -0700 Subject: [PATCH 041/503] net: clear sk_err_soft in sk_clone_lock() At accept() time, it is possible the parent has a non zero sk_err_soft, leftover from a prior error. Make sure we do not leave this value in the child, as it makes future getsockopt(SO_ERROR) calls quite unreliable. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/sock.c b/net/core/sock.c index c73e28fc9c2a..df171acfe232 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1543,6 +1543,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); newsk->sk_err = 0; + newsk->sk_err_soft = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); From 4f2e4ad56a65f3b7d64c258e373cb71e8d2499f4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 29 Oct 2016 11:02:36 -0700 Subject: [PATCH 042/503] net: mangle zero checksum in skb_checksum_help() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sending zero checksum is ok for TCP, but not for UDP. UDPv6 receiver should by default drop a frame with a 0 checksum, and UDPv4 would not verify the checksum and might accept a corrupted packet. Simply replace such checksum by 0xffff, regardless of transport. This error was caught on SIT tunnels, but seems generic. Signed-off-by: Eric Dumazet Cc: Maciej Żenczykowski Cc: Willem de Bruijn Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 820bac239738..eaad4c28069f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2484,7 +2484,7 @@ int skb_checksum_help(struct sk_buff *skb) goto out; } - *(__sum16 *)(skb->data + offset) = csum_fold(csum); + *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: From cbbf049a7c346180cc61ae0a9245c5d749d20a12 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 30 Oct 2016 10:25:42 +0200 Subject: [PATCH 043/503] qede: Fix statistics' strings for Tx/Rx queues When an interface is configured to use Tx/Rx-only queues, the length of the statistics would be shortened to accomodate only the statistics required per-each queue, and the values would be provided accordingly. However, the strings provided would still contain both Tx and Rx strings for each one of the queues [regardless of its configuration], which might lead to out-of-bound access when filling the buffers as well as incorrect statistics presented. Fixes: 9a4d7e86acf3 ("qede: Add support for Tx/Rx-only queues.") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- .../net/ethernet/qlogic/qede/qede_ethtool.c | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 12251a1032d1..7567cc464b88 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -175,16 +175,23 @@ static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf) for (i = 0, k = 0; i < QEDE_QUEUE_CNT(edev); i++) { int tc; - for (j = 0; j < QEDE_NUM_RQSTATS; j++) - sprintf(buf + (k + j) * ETH_GSTRING_LEN, - "%d: %s", i, qede_rqstats_arr[j].string); - k += QEDE_NUM_RQSTATS; - for (tc = 0; tc < edev->num_tc; tc++) { - for (j = 0; j < QEDE_NUM_TQSTATS; j++) + if (edev->fp_array[i].type & QEDE_FASTPATH_RX) { + for (j = 0; j < QEDE_NUM_RQSTATS; j++) sprintf(buf + (k + j) * ETH_GSTRING_LEN, - "%d.%d: %s", i, tc, - qede_tqstats_arr[j].string); - k += QEDE_NUM_TQSTATS; + "%d: %s", i, + qede_rqstats_arr[j].string); + k += QEDE_NUM_RQSTATS; + } + + if (edev->fp_array[i].type & QEDE_FASTPATH_TX) { + for (tc = 0; tc < edev->num_tc; tc++) { + for (j = 0; j < QEDE_NUM_TQSTATS; j++) + sprintf(buf + (k + j) * + ETH_GSTRING_LEN, + "%d.%d: %s", i, tc, + qede_tqstats_arr[j].string); + k += QEDE_NUM_TQSTATS; + } } } From 46d0847cdd4a3fc1920e56827b9189b9a105d362 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 30 Oct 2016 10:09:22 +0100 Subject: [PATCH 044/503] mlxsw: spectrum: Fix incorrect reuse of MID entries In the device, a MID entry represents a group of local ports, which can later be bound to a MDB entry. The lookup of an existing MID entry is currently done using the provided MC MAC address and VID, from the Linux bridge. However, this can result in an incorrect reuse of the same MID index in different VLAN-unaware bridges (same IP MC group and VID 0). Fix this by performing the lookup based on FID instead of VID, which is unique across different bridges. Fixes: 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support") Signed-off-by: Ido Schimmel Acked-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 +- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 9b22863a924b..97bbc1d21df8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -115,7 +115,7 @@ struct mlxsw_sp_rif { struct mlxsw_sp_mid { struct list_head list; unsigned char addr[ETH_ALEN]; - u16 vid; + u16 fid; u16 mid; unsigned int ref_count; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 5e00c79e8133..1e2c8eca3af1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -929,12 +929,12 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp, const unsigned char *addr, - u16 vid) + u16 fid) { struct mlxsw_sp_mid *mid; list_for_each_entry(mid, &mlxsw_sp->br_mids.list, list) { - if (ether_addr_equal(mid->addr, addr) && mid->vid == vid) + if (ether_addr_equal(mid->addr, addr) && mid->fid == fid) return mid; } return NULL; @@ -942,7 +942,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, const unsigned char *addr, - u16 vid) + u16 fid) { struct mlxsw_sp_mid *mid; u16 mid_idx; @@ -958,7 +958,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, set_bit(mid_idx, mlxsw_sp->br_mids.mapped); ether_addr_copy(mid->addr, addr); - mid->vid = vid; + mid->fid = fid; mid->mid = mid_idx; mid->ref_count = 0; list_add_tail(&mid->list, &mlxsw_sp->br_mids.list); @@ -991,9 +991,9 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, if (switchdev_trans_ph_prepare(trans)) return 0; - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, mdb->vid); + mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid); if (!mid) { - mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, mdb->vid); + mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, fid); if (!mid) { netdev_err(dev, "Unable to allocate MC group\n"); return -ENOMEM; @@ -1137,7 +1137,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid_idx; int err = 0; - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, mdb->vid); + mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid); if (!mid) { netdev_err(dev, "Unable to remove port from MC DB\n"); return -EINVAL; From 460d2830b00db407be2b72ed792eb3596f245192 Mon Sep 17 00:00:00 2001 From: Lukas Resch Date: Mon, 10 Oct 2016 08:07:32 +0000 Subject: [PATCH 045/503] can: sja1000: plx_pci: Add support for Moxa CAN devices This patch adds support for Moxa CAN devices. Signed-off-by: Lukas Resch Signed-off-by: Christoph Zehentner Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/plx_pci.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/can/sja1000/plx_pci.c b/drivers/net/can/sja1000/plx_pci.c index 3eb7430dffbf..f8ff25c8ee2e 100644 --- a/drivers/net/can/sja1000/plx_pci.c +++ b/drivers/net/can/sja1000/plx_pci.c @@ -142,6 +142,9 @@ struct plx_pci_card { #define CTI_PCI_VENDOR_ID 0x12c4 #define CTI_PCI_DEVICE_ID_CRG001 0x0900 +#define MOXA_PCI_VENDOR_ID 0x1393 +#define MOXA_PCI_DEVICE_ID 0x0100 + static void plx_pci_reset_common(struct pci_dev *pdev); static void plx9056_pci_reset_common(struct pci_dev *pdev); static void plx_pci_reset_marathon_pci(struct pci_dev *pdev); @@ -258,6 +261,14 @@ static struct plx_pci_card_info plx_pci_card_info_elcus = { /* based on PLX9030 */ }; +static struct plx_pci_card_info plx_pci_card_info_moxa = { + "MOXA", 2, + PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR, + {0, 0x00, 0x00}, { {0, 0x00, 0x80}, {1, 0x00, 0x80} }, + &plx_pci_reset_common + /* based on PLX9052 */ +}; + static const struct pci_device_id plx_pci_tbl[] = { { /* Adlink PCI-7841/cPCI-7841 */ @@ -357,6 +368,13 @@ static const struct pci_device_id plx_pci_tbl[] = { 0, 0, (kernel_ulong_t)&plx_pci_card_info_elcus }, + { + /* moxa */ + MOXA_PCI_VENDOR_ID, MOXA_PCI_DEVICE_ID, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + (kernel_ulong_t)&plx_pci_card_info_moxa + }, { 0,} }; MODULE_DEVICE_TABLE(pci, plx_pci_tbl); From deb507f91f1adbf64317ad24ac46c56eeccfb754 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 24 Oct 2016 21:11:26 +0200 Subject: [PATCH 046/503] can: bcm: fix warning in bcm_connect/proc_register Andrey Konovalov reported an issue with proc_register in bcm.c. As suggested by Cong Wang this patch adds a lock_sock() protection and a check for unsuccessful proc_create_data() in bcm_connect(). Reference: http://marc.info/?l=linux-netdev&m=147732648731237 Reported-by: Andrey Konovalov Suggested-by: Cong Wang Signed-off-by: Oliver Hartkopp Acked-by: Cong Wang Tested-by: Andrey Konovalov Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 8e999ffdf28b..8af9d25ff988 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1549,24 +1549,31 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); + int ret = 0; if (len < sizeof(*addr)) return -EINVAL; - if (bo->bound) - return -EISCONN; + lock_sock(sk); + + if (bo->bound) { + ret = -EISCONN; + goto fail; + } /* bind a device to this socket */ if (addr->can_ifindex) { struct net_device *dev; dev = dev_get_by_index(&init_net, addr->can_ifindex); - if (!dev) - return -ENODEV; - + if (!dev) { + ret = -ENODEV; + goto fail; + } if (dev->type != ARPHRD_CAN) { dev_put(dev); - return -ENODEV; + ret = -ENODEV; + goto fail; } bo->ifindex = dev->ifindex; @@ -1577,17 +1584,24 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, bo->ifindex = 0; } - bo->bound = 1; - if (proc_dir) { /* unique socket address as filename */ sprintf(bo->procname, "%lu", sock_i_ino(sk)); bo->bcm_proc_read = proc_create_data(bo->procname, 0644, proc_dir, &bcm_proc_fops, sk); + if (!bo->bcm_proc_read) { + ret = -ENOMEM; + goto fail; + } } - return 0; + bo->bound = 1; + +fail: + release_sock(sk); + + return ret; } static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, From 87557efc27f6a50140fb20df06a917f368ce3c66 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Mon, 31 Oct 2016 13:38:29 +0800 Subject: [PATCH 047/503] xen-netfront: do not cast grant table reference to signed short While grant reference is of type uint32_t, xen-netfront erroneously casts it to signed short in BUG_ON(). This would lead to the xen domU panic during boot-up or migration when it is attached with lots of paravirtual devices. Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e17879dd5d5a..189a28dcd80d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -304,7 +304,7 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue) queue->rx_skbs[id] = skb; ref = gnttab_claim_grant_reference(&queue->gref_rx_head); - BUG_ON((signed short)ref < 0); + WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)ref)); queue->grant_rx_ref[id] = ref; page = skb_frag_page(&skb_shinfo(skb)->frags[0]); @@ -428,7 +428,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs); tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); ref = gnttab_claim_grant_reference(&queue->gref_tx_head); - BUG_ON((signed short)ref < 0); + WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)ref)); gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id, gfn, GNTMAP_readonly); From cd26da4ff4eb7189921d4e7ad87e8adebb7b416b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 31 Oct 2016 20:32:31 +0800 Subject: [PATCH 048/503] sctp: hold transport instead of assoc in sctp_diag In sctp_transport_lookup_process(), Commit 1cceda784980 ("sctp: fix the issue sctp_diag uses lock_sock in rcu_read_lock") moved cb() out of rcu lock, but it put transport and hold assoc instead, and ignore that cb() still uses transport. It may cause a use-after-free issue. This patch is to hold transport instead of assoc there. Fixes: 1cceda784980 ("sctp: fix the issue sctp_diag uses lock_sock in rcu_read_lock") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9fbb6feb8c27..71b75f9d9c1b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4480,12 +4480,9 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), if (!transport || !sctp_transport_hold(transport)) goto out; - sctp_association_hold(transport->asoc); - sctp_transport_put(transport); - rcu_read_unlock(); err = cb(transport, p); - sctp_association_put(transport->asoc); + sctp_transport_put(transport); out: return err; From 7c17fcc726903ffed1716351efdc617e752533ed Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 31 Oct 2016 20:32:32 +0800 Subject: [PATCH 049/503] sctp: return back transport in __sctp_rcv_init_lookup Prior to this patch, it used a local variable to save the transport that is looked up by __sctp_lookup_association(), and didn't return it back. But in sctp_rcv, it is used to initialize chunk->transport. So when hitting this, even if it found the transport, it was still initializing chunk->transport with null instead. This patch is to return the transport back through transport pointer that is from __sctp_rcv_lookup_harder(). Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index a2ea1d1cc06a..8e0bc58eec20 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1021,7 +1021,6 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, struct sctphdr *sh = sctp_hdr(skb); union sctp_params params; sctp_init_chunk_t *init; - struct sctp_transport *transport; struct sctp_af *af; /* @@ -1052,7 +1051,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, af->from_addr_param(paddr, params.addr, sh->source, 0); - asoc = __sctp_lookup_association(net, laddr, paddr, &transport); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp); if (asoc) return asoc; } From dae399d7fdee84d8f5227a9711d95bb4e9a05d4e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 31 Oct 2016 20:32:33 +0800 Subject: [PATCH 050/503] sctp: hold transport instead of assoc when lookup assoc in rx path Prior to this patch, in rx path, before calling lock_sock, it needed to hold assoc when got it by __sctp_lookup_association, in case other place would free/put assoc. But in __sctp_lookup_association, it lookup and hold transport, then got assoc by transport->assoc, then hold assoc and put transport. It means it didn't hold transport, yet it was returned and later on directly assigned to chunk->transport. Without the protection of sock lock, the transport may be freed/put by other places, which would cause a use-after-free issue. This patch is to fix this issue by holding transport instead of assoc. As holding transport can make sure to access assoc is also safe, and actually it looks up assoc by searching transport rhashtable, to hold transport here makes more sense. Note that the function will be renamed later on on another patch. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 +- net/sctp/input.c | 32 ++++++++++++++++---------------- net/sctp/ipv6.c | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 87a7f42e7639..31acc3f4f132 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -152,7 +152,7 @@ void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *, struct sctphdr *, struct sctp_association **, struct sctp_transport **); -void sctp_err_finish(struct sock *, struct sctp_association *); +void sctp_err_finish(struct sock *, struct sctp_transport *); void sctp_icmp_frag_needed(struct sock *, struct sctp_association *, struct sctp_transport *t, __u32 pmtu); void sctp_icmp_redirect(struct sock *, struct sctp_transport *, diff --git a/net/sctp/input.c b/net/sctp/input.c index 8e0bc58eec20..a01a56ec8b8c 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -181,9 +181,10 @@ int sctp_rcv(struct sk_buff *skb) * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB */ if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) { - if (asoc) { - sctp_association_put(asoc); + if (transport) { + sctp_transport_put(transport); asoc = NULL; + transport = NULL; } else { sctp_endpoint_put(ep); ep = NULL; @@ -269,8 +270,8 @@ int sctp_rcv(struct sk_buff *skb) bh_unlock_sock(sk); /* Release the asoc/ep ref we took in the lookup calls. */ - if (asoc) - sctp_association_put(asoc); + if (transport) + sctp_transport_put(transport); else sctp_endpoint_put(ep); @@ -283,8 +284,8 @@ discard_it: discard_release: /* Release the asoc/ep ref we took in the lookup calls. */ - if (asoc) - sctp_association_put(asoc); + if (transport) + sctp_transport_put(transport); else sctp_endpoint_put(ep); @@ -300,6 +301,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; struct sctp_inq *inqueue = &chunk->rcvr->inqueue; + struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = NULL; int backloged = 0; @@ -351,7 +353,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) done: /* Release the refs we took in sctp_add_backlog */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_put(sctp_assoc(rcvr)); + sctp_transport_put(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_put(sctp_ep(rcvr)); else @@ -363,6 +365,7 @@ done: static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; + struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = chunk->rcvr; int ret; @@ -373,7 +376,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) * from us */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_hold(sctp_assoc(rcvr)); + sctp_transport_hold(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_hold(sctp_ep(rcvr)); else @@ -537,15 +540,15 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, return sk; out: - sctp_association_put(asoc); + sctp_transport_put(transport); return NULL; } /* Common cleanup code for icmp/icmpv6 error handler. */ -void sctp_err_finish(struct sock *sk, struct sctp_association *asoc) +void sctp_err_finish(struct sock *sk, struct sctp_transport *t) { bh_unlock_sock(sk); - sctp_association_put(asoc); + sctp_transport_put(t); } /* @@ -641,7 +644,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) } out_unlock: - sctp_err_finish(sk, asoc); + sctp_err_finish(sk, transport); } /* @@ -952,11 +955,8 @@ static struct sctp_association *__sctp_lookup_association( goto out; asoc = t->asoc; - sctp_association_hold(asoc); *pt = t; - sctp_transport_put(t); - out: return asoc; } @@ -986,7 +986,7 @@ int sctp_has_association(struct net *net, struct sctp_transport *transport; if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) { - sctp_association_put(asoc); + sctp_transport_put(transport); return 1; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f473779e8b1c..176af3080a2b 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -198,7 +198,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } out_unlock: - sctp_err_finish(sk, asoc); + sctp_err_finish(sk, transport); out: if (likely(idev != NULL)) in6_dev_put(idev); From fcdefccac976ee51dd6071832b842d8fb41c479c Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Mon, 31 Oct 2016 13:32:03 -0400 Subject: [PATCH 051/503] bgmac: stop clearing DMA receive control register right after it is set Current bgmac code initializes some DMA settings in the receive control register for some hardware and then immediately clears those settings. Not clearing those settings results in ~420Mbps *improvement* in throughput; this system can now receive frames at line-rate on Broadcom 5871x hardware compared to ~520Mbps today. I also tested a few other values but found there to be no discernible difference in CPU utilization even if burst size and prefetching values are different. On the hardware tested there was no need to keep the code that cleared all but bits 16-17, but since there is a wide variety of hardware that used this driver (I did not look at all hardware docs for hardware using this IP block), I find it wise to move this call up and clear bits just after reading the default value from the hardware rather than completely removing it. This is a good candidate for -stable >=3.14 since that is when the code that was supposed to improve performance (but did not) was introduced. Signed-off-by: Andy Gospodarek Fixes: 56ceecde1f29 ("bgmac: initialize the DMA controller of core...") Cc: Hauke Mehrtens Acked-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 31ca204b38d2..91cbf92de971 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -307,6 +307,10 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac, u32 ctl; ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL); + + /* preserve ONLY bits 16-17 from current hardware value */ + ctl &= BGMAC_DMA_RX_ADDREXT_MASK; + if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) { ctl &= ~BGMAC_DMA_RX_BL_MASK; ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT; @@ -317,7 +321,6 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac, ctl &= ~BGMAC_DMA_RX_PT_MASK; ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT; } - ctl &= BGMAC_DMA_RX_ADDREXT_MASK; ctl |= BGMAC_DMA_RX_ENABLE; ctl |= BGMAC_DMA_RX_PARITY_DISABLE; ctl |= BGMAC_DMA_RX_OVERFLOW_CONT; From 4efca4ed05cbdfd13ec3e8cb623fb77d6e4ab187 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 Nov 2016 12:46:19 +1100 Subject: [PATCH 052/503] kbuild: modversions for EXPORT_SYMBOL() for asm Allow architectures to create asm/asm-prototypes.h file that provides C prototypes for exported asm functions, which enables proper CRC versions to be generated for them. Signed-off-by: Nicholas Piggin Signed-off-by: Michal Marek --- scripts/Makefile.build | 78 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index e1f25d6d132e..3e223c264469 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -159,7 +159,8 @@ cmd_cpp_i_c = $(CPP) $(c_flags) -o $@ $< $(obj)/%.i: $(src)/%.c FORCE $(call if_changed_dep,cpp_i_c) -cmd_gensymtypes = \ +# These mirror gensymtypes_S and co below, keep them in synch. +cmd_gensymtypes_c = \ $(CPP) -D__GENKSYMS__ $(c_flags) $< | \ $(GENKSYMS) $(if $(1), -T $(2)) \ $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \ @@ -169,7 +170,7 @@ cmd_gensymtypes = \ quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@ cmd_cc_symtypes_c = \ set -e; \ - $(call cmd_gensymtypes,true,$@) >/dev/null; \ + $(call cmd_gensymtypes_c,true,$@) >/dev/null; \ test -s $@ || rm -f $@ $(obj)/%.symtypes : $(src)/%.c FORCE @@ -198,9 +199,10 @@ else # the actual value of the checksum generated by genksyms cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $< -cmd_modversions = \ + +cmd_modversions_c = \ if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ - $(call cmd_gensymtypes,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ > $(@D)/.tmp_$(@F:.o=.ver); \ \ $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \ @@ -268,13 +270,14 @@ endif # CONFIG_STACK_VALIDATION define rule_cc_o_c $(call echo-cmd,checksrc) $(cmd_checksrc) \ $(call cmd_and_fixdep,cc_o_c) \ - $(cmd_modversions) \ + $(cmd_modversions_c) \ $(cmd_objtool) \ $(call echo-cmd,record_mcount) $(cmd_record_mcount) endef define rule_as_o_S $(call cmd_and_fixdep,as_o_S) \ + $(cmd_modversions_S) \ $(cmd_objtool) endef @@ -314,6 +317,39 @@ modkern_aflags := $(KBUILD_AFLAGS_KERNEL) $(AFLAGS_KERNEL) $(real-objs-m) : modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE) $(real-objs-m:.o=.s): modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE) +# .S file exports must have their C prototypes defined in asm/asm-prototypes.h +# or a file that it includes, in order to get versioned symbols. We build a +# dummy C file that includes asm-prototypes and the EXPORT_SYMBOL lines from +# the .S file (with trailing ';'), and run genksyms on that, to extract vers. +# +# This is convoluted. The .S file must first be preprocessed to run guards and +# expand names, then the resulting exports must be constructed into plain +# EXPORT_SYMBOL(symbol); to build our dummy C file, and that gets preprocessed +# to make the genksyms input. +# +# These mirror gensymtypes_c and co above, keep them in synch. +cmd_gensymtypes_S = \ + (echo "\#include " ; \ + echo "\#include " ; \ + $(CPP) $(a_flags) $< | \ + grep ^___EXPORT_SYMBOL | \ + sed 's/___EXPORT_SYMBOL \([a-zA-Z0-9_]*\),.*/EXPORT_SYMBOL(\1);/' ) | \ + $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | \ + $(GENKSYMS) $(if $(1), -T $(2)) \ + $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \ + $(if $(KBUILD_PRESERVE),-p) \ + -r $(firstword $(wildcard $(2:.symtypes=.symref) /dev/null)) + +quiet_cmd_cc_symtypes_S = SYM $(quiet_modtag) $@ +cmd_cc_symtypes_S = \ + set -e; \ + $(call cmd_gensymtypes_S,true,$@) >/dev/null; \ + test -s $@ || rm -f $@ + +$(obj)/%.symtypes : $(src)/%.S FORCE + $(call cmd,cc_symtypes_S) + + quiet_cmd_cpp_s_S = CPP $(quiet_modtag) $@ cmd_cpp_s_S = $(CPP) $(a_flags) -o $@ $< @@ -321,7 +357,37 @@ $(obj)/%.s: $(src)/%.S FORCE $(call if_changed_dep,cpp_s_S) quiet_cmd_as_o_S = AS $(quiet_modtag) $@ -cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< + +ifndef CONFIG_MODVERSIONS +cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< + +else + +ASM_PROTOTYPES := $(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/asm-prototypes.h) + +ifeq ($(ASM_PROTOTYPES),) +cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< + +else + +# versioning matches the C process described above, with difference that +# we parse asm-prototypes.h C header to get function definitions. + +cmd_as_o_S = $(CC) $(a_flags) -c -o $(@D)/.tmp_$(@F) $< + +cmd_modversions_S = \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_S,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $(@D)/.tmp_$(@F:.o=.ver); \ + \ + $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \ + -T $(@D)/.tmp_$(@F:.o=.ver); \ + rm -f $(@D)/.tmp_$(@F) $(@D)/.tmp_$(@F:.o=.ver); \ + else \ + mv -f $(@D)/.tmp_$(@F) $@; \ + fi; +endif +endif $(obj)/%.o: $(src)/%.S $(objtool_obj) FORCE $(call if_changed_rule,as_o_S) From b5a4a3eb4ec7382780aa153224780b9ecdc76ceb Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Mon, 31 Oct 2016 16:00:26 -0700 Subject: [PATCH 053/503] drivers: net: xgene: fix: Disable coalescing on v1 hardware Since ethernet v1 hardware has a bug related to coalescing, disabling this feature. Signed-off-by: Iyappan Subramanian Signed-off-by: Toan Le Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_hw.c | 12 ------------ drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 3 ++- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index c481f104a8fe..5390ae89136c 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -204,17 +204,6 @@ static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) return num_msgs; } -static void xgene_enet_setup_coalescing(struct xgene_enet_desc_ring *ring) -{ - u32 data = 0x7777; - - xgene_enet_ring_wr32(ring, CSR_PBM_COAL, 0x8e); - xgene_enet_ring_wr32(ring, CSR_PBM_CTICK1, data); - xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data << 16); - xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x40); - xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x80); -} - void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring, struct xgene_enet_pdata *pdata, enum xgene_enet_err_code status) @@ -929,5 +918,4 @@ struct xgene_ring_ops xgene_ring1_ops = { .clear = xgene_enet_clear_ring, .wr_cmd = xgene_enet_wr_cmd, .len = xgene_enet_ring_len, - .coalesce = xgene_enet_setup_coalescing, }; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 429f18fc5503..8158d4698734 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1188,7 +1188,8 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) tx_ring->dst_ring_num = xgene_enet_dst_ring_num(cp_ring); } - pdata->ring_ops->coalesce(pdata->tx_ring[0]); + if (pdata->ring_ops->coalesce) + pdata->ring_ops->coalesce(pdata->tx_ring[0]); pdata->tx_qcnt_hi = pdata->tx_ring[0]->slots - 128; return 0; From f126df8503275facc96dd05a818086afbb89b77d Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Mon, 31 Oct 2016 16:00:27 -0700 Subject: [PATCH 054/503] drivers: net: xgene: fix: Coalescing values for v2 hardware Changing the interrupt trigger region id to 2 and the corresponding threshold set0/set1 values to 8/16. Signed-off-by: Iyappan Subramanian Signed-off-by: Toan Le Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_hw.h | 2 ++ drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c | 12 +++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index 8456337a237d..06e598c8bc16 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -55,8 +55,10 @@ enum xgene_enet_rm { #define PREFETCH_BUF_EN BIT(21) #define CSR_RING_ID_BUF 0x000c #define CSR_PBM_COAL 0x0014 +#define CSR_PBM_CTICK0 0x0018 #define CSR_PBM_CTICK1 0x001c #define CSR_PBM_CTICK2 0x0020 +#define CSR_PBM_CTICK3 0x0024 #define CSR_THRESHOLD0_SET1 0x0030 #define CSR_THRESHOLD1_SET1 0x0034 #define CSR_RING_NE_INT_MODE 0x017c diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c index 2b76732add5d..af51dd5844ce 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c @@ -30,7 +30,7 @@ static void xgene_enet_ring_init(struct xgene_enet_desc_ring *ring) ring_cfg[0] |= SET_VAL(X2_INTLINE, ring->id & RING_BUFNUM_MASK); ring_cfg[3] |= SET_BIT(X2_DEQINTEN); } - ring_cfg[0] |= SET_VAL(X2_CFGCRID, 1); + ring_cfg[0] |= SET_VAL(X2_CFGCRID, 2); addr >>= 8; ring_cfg[2] |= QCOHERENT | SET_VAL(RINGADDRL, addr); @@ -192,13 +192,15 @@ static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) static void xgene_enet_setup_coalescing(struct xgene_enet_desc_ring *ring) { - u32 data = 0x7777; + u32 data = 0x77777777; xgene_enet_ring_wr32(ring, CSR_PBM_COAL, 0x8e); + xgene_enet_ring_wr32(ring, CSR_PBM_CTICK0, data); xgene_enet_ring_wr32(ring, CSR_PBM_CTICK1, data); - xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data << 16); - xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x40); - xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x80); + xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data); + xgene_enet_ring_wr32(ring, CSR_PBM_CTICK3, data); + xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x08); + xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x10); } struct xgene_ring_ops xgene_ring2_ops = { From 7bb9f731d1026bd48b84cee7853cba7f5678193c Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 31 Oct 2016 18:18:42 -0500 Subject: [PATCH 055/503] net: qcom/emac: use correct value for SGMII_LN_UCDR_SO_GAIN_MODE0 The documentation says that SGMII_LN_UCDR_SO_GAIN_MODE0 should be set to 0, not 6, on the Qualcomm Technologies QDF2432. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-sgmii.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c index 75c1b530e39e..72fe343c7a36 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -421,7 +421,7 @@ static const struct emac_reg_write sgmii_v2_laned[] = { /* CDR Settings */ {EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0, UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)}, - {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(6)}, + {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(0)}, {EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)}, /* TX/RX Settings */ From e7947ea770d0de434d38a0f823e660d3fd4bebb5 Mon Sep 17 00:00:00 2001 From: Isaac Boukris Date: Tue, 1 Nov 2016 02:41:35 +0200 Subject: [PATCH 056/503] unix: escape all null bytes in abstract unix domain socket Abstract unix domain socket may embed null characters, these should be translated to '@' when printed out to proc the same way the null prefix is currently being translated. This helps for tools such as netstat, lsof and the proc based implementation in ss to show all the significant bytes of the name (instead of getting cut at the first null occurrence). Signed-off-by: Isaac Boukris Signed-off-by: David S. Miller --- net/unix/af_unix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 145082e2ba36..5d1c14a2f268 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2812,7 +2812,8 @@ static int unix_seq_show(struct seq_file *seq, void *v) i++; } for ( ; i < len; i++) - seq_putc(seq, u->addr->name->sun_path[i]); + seq_putc(seq, u->addr->name->sun_path[i] ?: + '@'); } unix_state_unlock(s); seq_putc(seq, '\n'); From 42fb18fd5852dba9c43ac008558e4bc8062bda57 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Tue, 1 Nov 2016 08:10:53 +0100 Subject: [PATCH 057/503] net/mlx5: Simplify a test 'create_root_ns()' does not return an error pointer, so the test can be simplified to be more consistent. Signed-off-by: Christophe JAILLET Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 89696048b045..914e5466f729 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1690,7 +1690,7 @@ static int init_root_ns(struct mlx5_flow_steering *steering) { steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); - if (IS_ERR_OR_NULL(steering->root_ns)) + if (!steering->root_ns) goto cleanup; if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node)) From b9b84fc07dbc6ac74d85f1b1a401b41c403fecb1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 1 Nov 2016 10:50:01 +0000 Subject: [PATCH 058/503] net: mv643xx_eth: ensure coalesce settings survive read-modify-write The coalesce settings behave badly when changing just one value: ... # ethtool -c eth0 rx-usecs: 249 ... # ethtool -C eth0 tx-usecs 250 ... # ethtool -c eth0 rx-usecs: 248 This occurs due to rounding errors when calculating the microseconds value - the divisons round down. This causes (eg) the rx-usecs to decrease by one every time the tx-usecs value is set as per the above. Fix this by making the divison round-to-nearest. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index bf5cc55ba24c..5b12022adf1f 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -1381,6 +1381,7 @@ static unsigned int get_rx_coal(struct mv643xx_eth_private *mp) temp = (val & 0x003fff00) >> 8; temp *= 64000000; + temp += mp->t_clk / 2; do_div(temp, mp->t_clk); return (unsigned int)temp; @@ -1417,6 +1418,7 @@ static unsigned int get_tx_coal(struct mv643xx_eth_private *mp) temp = (rdlp(mp, TX_FIFO_URGENT_THRESHOLD) & 0x3fff0) >> 4; temp *= 64000000; + temp += mp->t_clk / 2; do_div(temp, mp->t_clk); return (unsigned int)temp; From 6c08d7ab23dd07c046e8de1520073053bdc76ae2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 30 Oct 2016 09:49:26 +0100 Subject: [PATCH 059/503] drm/sun4i: Fix error handling 'sun4i_layers_init()' returns an error pointer in case of error, not NULL. So test it with IS_ERR. Signed-off-by: Christophe JAILLET Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 0da9862ad8ed..077f3785439e 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -142,7 +142,7 @@ static int sun4i_drv_bind(struct device *dev) /* Create our layers */ drv->layers = sun4i_layers_init(drm); - if (!drv->layers) { + if (IS_ERR(drv->layers)) { dev_err(drm->dev, "Couldn't create the planes\n"); ret = -EINVAL; goto free_drm; From 45788f1f5534fb02063ca077719592c2c3ba621e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Nov 2016 15:09:58 +0200 Subject: [PATCH 060/503] MAINTAINERS: Update MELLANOX MLX5 core VPI driver maintainers Add myself as a maintainer for mlx5 core driver as well. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 4012c2f98617..53964ad4f2de 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8053,6 +8053,7 @@ F: drivers/infiniband/hw/mlx4/ F: include/linux/mlx4/ MELLANOX MLX5 core VPI driver +M: Saeed Mahameed M: Matan Barak M: Leon Romanovsky L: netdev@vger.kernel.org From 23f4ffedb7d751c7e298732ba91ca75d224bc1a6 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Tue, 1 Nov 2016 23:45:12 +0800 Subject: [PATCH 061/503] ip6_tunnel: Clear IP6CB in ip6tunnel_xmit() skb->cb may contain data from previous layers. In the observed scenario, the garbage data were misinterpreted as IP6CB(skb)->frag_max_size, so that small packets sent through the tunnel are mistakenly fragmented. This patch unconditionally clears the control buffer in ip6tunnel_xmit(), which affects ip6_tunnel, ip6_udp_tunnel and ip6_gre. Currently none of these tunnels set IP6CB(skb)->flags, otherwise it needs to be done earlier. Cc: stable@vger.kernel.org Signed-off-by: Eli Cooper Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 20ed9699fcd4..1b1cf33cbfb0 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -146,6 +146,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, { int pkt_len, err; + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); if (unlikely(net_xmit_eval(err))) From 4fd19c15decedd06d707e2691c24fce08700e2b1 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Tue, 1 Nov 2016 23:45:13 +0800 Subject: [PATCH 062/503] ip6_udp_tunnel: remove unused IPCB related codes Some IPCB fields are currently set in udp_tunnel6_xmit_skb(), which are never used before it reaches ip6tunnel_xmit(), and past that point the control buffer is no longer interpreted as IPCB. This clears these unused IPCB related codes. Currently there is no skb scrubbing in ip6_udp_tunnel, otherwise IPCB(skb)->opt might need to be cleared for IPv4 packets, as shown in 5146d1f1511 ("tunnel: Clear IPCB(skb)->opt before dst_link_failure called"). Signed-off-by: Eli Cooper Signed-off-by: David S. Miller --- net/ipv6/ip6_udp_tunnel.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index a7520528ecd2..b283f293ee4a 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -88,9 +88,6 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, uh->len = htons(skb->len); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED - | IPSKB_REROUTED); skb_dst_set(skb, dst); udp6_set_csum(nocheck, skb, saddr, daddr, skb->len); From 269ebce4531b8edc4224259a02143181a1c1d77c Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Wed, 2 Nov 2016 09:04:33 +0800 Subject: [PATCH 063/503] xen-netfront: cast grant table reference first to type int IS_ERR_VALUE() in commit 87557efc27f6a50140fb20df06a917f368ce3c66 ("xen-netfront: do not cast grant table reference to signed short") would not return true for error code unless we cast ref first to type int. Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 189a28dcd80d..bf2744e1e3db 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -304,7 +304,7 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue) queue->rx_skbs[id] = skb; ref = gnttab_claim_grant_reference(&queue->gref_rx_head); - WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)ref)); + WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref)); queue->grant_rx_ref[id] = ref; page = skb_frag_page(&skb_shinfo(skb)->frags[0]); @@ -428,7 +428,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs); tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); ref = gnttab_claim_grant_reference(&queue->gref_tx_head); - WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)ref)); + WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref)); gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id, gfn, GNTMAP_readonly); From cfbd950d5e6e649c6c1a88925feada64f890c894 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 20 Oct 2016 08:46:32 +0200 Subject: [PATCH 064/503] video: ARM CLCD: fix Vexpress regression The CLCD does not come up on Versatile Express as it does not (currently) have a syscon node for controlling the block apart from the CLCD itself. Make sure the .init() function can bail out without an error making it probe again. Reported-by: Amit Pundir Signed-off-by: Linus Walleij Tested-by: Amit Pundir Tested-by: Nicolae Rosia Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/amba-clcd-versatile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/amba-clcd-versatile.c b/drivers/video/fbdev/amba-clcd-versatile.c index 19ad8645d93c..e5d9bfc1703a 100644 --- a/drivers/video/fbdev/amba-clcd-versatile.c +++ b/drivers/video/fbdev/amba-clcd-versatile.c @@ -526,8 +526,8 @@ int versatile_clcd_init_panel(struct clcd_fb *fb, np = of_find_matching_node_and_match(NULL, versatile_clcd_of_match, &clcd_id); if (!np) { - dev_err(dev, "no Versatile syscon node\n"); - return -ENODEV; + /* Vexpress does not have this */ + return 0; } versatile_clcd_type = (enum versatile_clcd)clcd_id->data; From 14135f30e33ce37b22529f73660d7369cf424375 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 1 Nov 2016 16:04:36 -0700 Subject: [PATCH 065/503] inet: fix sleeping inside inet_wait_for_connect() Andrey reported this kernel warning: WARNING: CPU: 0 PID: 4608 at kernel/sched/core.c:7724 __might_sleep+0x14c/0x1a0 kernel/sched/core.c:7719 do not call blocking ops when !TASK_RUNNING; state=1 set at [] prepare_to_wait+0xbc/0x210 kernel/sched/wait.c:178 Modules linked in: CPU: 0 PID: 4608 Comm: syz-executor Not tainted 4.9.0-rc2+ #320 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff88006625f7a0 ffffffff81b46914 ffff88006625f818 0000000000000000 ffffffff84052960 0000000000000000 ffff88006625f7e8 ffffffff81111237 ffff88006aceac00 ffffffff00001e2c ffffed000cc4beff ffffffff84052960 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0xb3/0x10f lib/dump_stack.c:51 [] __warn+0x1a7/0x1f0 kernel/panic.c:550 [] warn_slowpath_fmt+0xac/0xd0 kernel/panic.c:565 [] __might_sleep+0x14c/0x1a0 kernel/sched/core.c:7719 [< inline >] slab_pre_alloc_hook mm/slab.h:393 [< inline >] slab_alloc_node mm/slub.c:2634 [< inline >] slab_alloc mm/slub.c:2716 [] __kmalloc_track_caller+0x150/0x2a0 mm/slub.c:4240 [] kmemdup+0x24/0x50 mm/util.c:113 [] dccp_feat_clone_sp_val.part.5+0x4f/0xe0 net/dccp/feat.c:374 [< inline >] dccp_feat_clone_sp_val net/dccp/feat.c:1141 [< inline >] dccp_feat_change_recv net/dccp/feat.c:1141 [] dccp_feat_parse_options+0xaa1/0x13d0 net/dccp/feat.c:1411 [] dccp_parse_options+0x721/0x1010 net/dccp/options.c:128 [] dccp_rcv_state_process+0x200/0x15b0 net/dccp/input.c:644 [] dccp_v4_do_rcv+0xf4/0x1a0 net/dccp/ipv4.c:681 [< inline >] sk_backlog_rcv ./include/net/sock.h:872 [] __release_sock+0x126/0x3a0 net/core/sock.c:2044 [] release_sock+0x59/0x1c0 net/core/sock.c:2502 [< inline >] inet_wait_for_connect net/ipv4/af_inet.c:547 [] __inet_stream_connect+0x5d2/0xbb0 net/ipv4/af_inet.c:617 [] inet_stream_connect+0x55/0xa0 net/ipv4/af_inet.c:656 [] SYSC_connect+0x244/0x2f0 net/socket.c:1533 [] SyS_connect+0x24/0x30 net/socket.c:1514 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:209 Unlike commit 26cabd31259ba43f68026ce3f62b78094124333f ("sched, net: Clean up sk_wait_event() vs. might_sleep()"), the sleeping function is called before schedule_timeout(), this is indeed a bug. Fix this by moving the wait logic to the new API, it is similar to commit ff960a731788a7408b6f66ec4fd772ff18833211 ("netdev, sched/wait: Fix sleeping inside wait event"). Reported-by: Andrey Konovalov Cc: Andrey Konovalov Cc: Eric Dumazet Cc: Peter Zijlstra Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9648c97e541f..5ddf5cda07f4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -533,9 +533,9 @@ EXPORT_SYMBOL(inet_dgram_connect); static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending += writebias; /* Basic assumption: if someone sets sk->sk_err, he _must_ @@ -545,13 +545,12 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) */ while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { release_sock(sk); - timeo = schedule_timeout(timeo); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); if (signal_pending(current) || !timeo) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending -= writebias; return timeo; } From 0b53df1e9e07984a93ad3454686740fc2f4d6b4b Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 2 Nov 2016 10:52:53 +0530 Subject: [PATCH 066/503] cxgb4: correct device ID of T6 adapter Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 50812a1d67bd..df1573c4a659 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -178,9 +178,9 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x6005), CH_PCI_ID_TABLE_FENTRY(0x6006), CH_PCI_ID_TABLE_FENTRY(0x6007), + CH_PCI_ID_TABLE_FENTRY(0x6008), CH_PCI_ID_TABLE_FENTRY(0x6009), CH_PCI_ID_TABLE_FENTRY(0x600d), - CH_PCI_ID_TABLE_FENTRY(0x6010), CH_PCI_ID_TABLE_FENTRY(0x6011), CH_PCI_ID_TABLE_FENTRY(0x6014), CH_PCI_ID_TABLE_FENTRY(0x6015), From 9512925a2cc2b1cd0206bb93bad200a69716f998 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Wed, 2 Nov 2016 16:36:46 +0200 Subject: [PATCH 067/503] qede: Correctly map aggregation replacement pages Driver allocates replacement buffers before-hand to make sure whenever an aggregation begins there would be a replacement for the Rx buffers, as we can't release the buffer until aggregation is terminated and driver logic assumes the Rx rings are always full. For every other Rx page that's being allocated [I.e., regular] the page is being completely mapped while for the replacement buffers only the first portion of the page is being mapped. This means that: a. Once replacement buffer replenishes the regular Rx ring, assuming there's more than a single packet on page we'd post unmapped memory toward HW [assuming mapping is actually done in granularity smaller than page]. b. Unmaps are being done for the entire page, which is incorrect. Fixes: 55482edc25f06 ("qede: Add slowpath/fastpath support and enable hardware GRO") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 7def29aaf65c..85f46dbecd5b 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2839,7 +2839,7 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) } mapping = dma_map_page(&edev->pdev->dev, replace_buf->data, 0, - rxq->rx_buf_size, DMA_FROM_DEVICE); + PAGE_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) { DP_NOTICE(edev, "Failed to map TPA replacement buffer\n"); From ac9e70b17ecd7c6e933ff2eaf7ab37429e71bf4d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 07:53:17 -0700 Subject: [PATCH 068/503] tcp: fix potential memory corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Imagine initial value of max_skb_frags is 17, and last skb in write queue has 15 frags. Then max_skb_frags is lowered to 14 or smaller value. tcp_sendmsg() will then be allowed to add additional page frags and eventually go past MAX_SKB_FRAGS, overflowing struct skb_shared_info. Fixes: 5f74f82ea34c ("net:Add sysctl_max_skb_frags") Signed-off-by: Eric Dumazet Cc: Hans Westgaard Ry Cc: Håkon Bugge Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3251fe71f39f..18238ef8135a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1241,7 +1241,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i == sysctl_max_skb_frags || !sg) { + if (i >= sysctl_max_skb_frags || !sg) { tcp_mark_push(tp, skb); goto new_segment; } From da96786e26c3ae47316db2b92046b11268c4379c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 2 Nov 2016 12:08:25 -0700 Subject: [PATCH 069/503] net: tcp: check skb is non-NULL for exact match on lookups Andrey reported the following error report while running the syzkaller fuzzer: general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 648 Comm: syz-executor Not tainted 4.9.0-rc3+ #333 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff8800398c4480 task.stack: ffff88003b468000 RIP: 0010:[] [< inline >] inet_exact_dif_match include/net/tcp.h:808 RIP: 0010:[] [] __inet_lookup_listener+0xb6/0x500 net/ipv4/inet_hashtables.c:219 RSP: 0018:ffff88003b46f270 EFLAGS: 00010202 RAX: 0000000000000004 RBX: 0000000000004242 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffc90000e3c000 RDI: 0000000000000054 RBP: ffff88003b46f2d8 R08: 0000000000004000 R09: ffffffff830910e7 R10: 0000000000000000 R11: 000000000000000a R12: ffffffff867fa0c0 R13: 0000000000004242 R14: 0000000000000003 R15: dffffc0000000000 FS: 00007fb135881700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020cc3000 CR3: 000000006d56a000 CR4: 00000000000006f0 Stack: 0000000000000000 000000000601a8c0 0000000000000000 ffffffff00004242 424200003b9083c2 ffff88003def4041 ffffffff84e7e040 0000000000000246 ffff88003a0911c0 0000000000000000 ffff88003a091298 ffff88003b9083ae Call Trace: [] tcp_v4_send_reset+0x584/0x1700 net/ipv4/tcp_ipv4.c:643 [] tcp_v4_rcv+0x198b/0x2e50 net/ipv4/tcp_ipv4.c:1718 [] ip_local_deliver_finish+0x332/0xad0 net/ipv4/ip_input.c:216 ... MD5 has a code path that calls __inet_lookup_listener with a null skb, so inet{6}_exact_dif_match needs to check skb against null before pulling the flag. Fixes: a04a480d4392 ("net: Require exact match for TCP socket lookups if dif is l3mdev") Reported-by: Andrey Konovalov Signed-off-by: David Ahern Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/tcp.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ca1ad9ebbc92..a0649973ee5b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -149,7 +149,7 @@ static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) { #if defined(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - ipv6_l3mdev_skb(IP6CB(skb)->flags)) + skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) return true; #endif return false; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5b82d4d94834..304a8e17bc87 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -805,7 +805,7 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) return true; #endif return false; From 9ee6c5dc816aa8256257f2cd4008a9291ec7e985 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 2 Nov 2016 16:36:17 -0400 Subject: [PATCH 070/503] ipv4: allow local fragmentation in ip_finish_output_gso() Some configurations (e.g. geneve interface with default MTU of 1500 over an ethernet interface with 1500 MTU) result in the transmission of packets that exceed the configured MTU. While this should be considered to be a "bad" configuration, it is still allowed and should not result in the sending of packets that exceed the configured MTU. Fix by dropping the assumption in ip_finish_output_gso() that locally originated gso packets will never need fragmentation. Basic testing using iperf (observing CPU usage and bandwidth) have shown no measurable performance impact for traffic not requiring fragmentation. Fixes: c7ba65d7b649 ("net: ip: push gso skb forwarding handling down the stack") Reported-by: Jan Tluka Signed-off-by: Lance Richardson Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 3 +-- net/ipv4/ip_forward.c | 2 +- net/ipv4/ip_output.c | 6 ++---- net/ipv4/ip_tunnel_core.c | 11 ----------- net/ipv4/ipmr.c | 2 +- 5 files changed, 5 insertions(+), 19 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 5413883ac47f..d3a107850a41 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -47,8 +47,7 @@ struct inet_skb_parm { #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) -#define IPSKB_FRAG_SEGS BIT(7) -#define IPSKB_L3SLAVE BIT(8) +#define IPSKB_L3SLAVE BIT(7) u16 frag_max_size; }; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 8b4ffd216839..9f0a7b96646f 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; + IPCB(skb)->flags |= IPSKB_FORWARDED; mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); if (ip_exceeds_mtu(skb, mtu)) { IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 03e7f7310423..49714010ac2e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -239,11 +239,9 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, struct sk_buff *segs; int ret = 0; - /* common case: fragmentation of segments is not allowed, - * or seglen is <= mtu + /* common case: seglen is <= mtu */ - if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) || - skb_gso_validate_mtu(skb, mtu)) + if (skb_gso_validate_mtu(skb, mtu)) return ip_finish_output2(net, sk, skb); /* Slowpath - GSO segment length is exceeding the dst MTU. diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 777bc1883870..fed3d29f9eb3 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, int pkt_len = skb->len - skb_inner_network_offset(skb); struct net *net = dev_net(rt->dst.dev); struct net_device *dev = skb->dev; - int skb_iif = skb->skb_iif; struct iphdr *iph; int err; @@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - if (skb_iif && !(df & htons(IP_DF))) { - /* Arrived from an ingress interface, got encapsulated, with - * fragmentation of encapulating frames allowed. - * If skb is gso, the resulting encapsulated network segments - * may exceed dst mtu. - * Allow IP Fragmentation of segments. - */ - IPCB(skb)->flags |= IPSKB_FRAG_SEGS; - } - /* Push down and install the IP header. */ skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f006e13de56..27089f5ebbb1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, vif->dev->stats.tx_bytes += skb->len; } - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; + IPCB(skb)->flags |= IPSKB_FORWARDED; /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output From 79d8665b9545e128637c51cf7febde9c493b6481 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 14:41:50 -0700 Subject: [PATCH 071/503] tcp: fix return value for partial writes After my commit, tcp_sendmsg() might restart its loop after processing socket backlog. If sk_err is set, we blindly return an error, even though we copied data to user space before. We should instead return number of bytes that could be copied, otherwise user space might resend data and corrupt the stream. This might happen if another thread is using recvmsg(MSG_ERRQUEUE) to process timestamps. Issue was diagnosed by Soheil and Willem, big kudos to them ! Fixes: d41a69f1d390f ("tcp: make tcp_sendmsg() aware of socket backlog") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Soheil Hassas Yeganeh Cc: Yuchung Cheng Cc: Neal Cardwell Tested-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 18238ef8135a..814af89c1bd3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1164,7 +1164,7 @@ restart: err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) - goto out_err; + goto do_error; sg = !!(sk->sk_route_caps & NETIF_F_SG); From c3f24cfb3e508c70c26ee8569d537c8ca67a36c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 17:14:41 -0700 Subject: [PATCH 072/503] dccp: do not release listeners too soon Andrey Konovalov reported following error while fuzzing with syzkaller : IPv4: Attempt to release alive inet socket ffff880068e98940 kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 1 PID: 3905 Comm: a.out Not tainted 4.9.0-rc3+ #333 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88006b9e0000 task.stack: ffff880068770000 RIP: 0010:[] [] selinux_socket_sock_rcv_skb+0xff/0x6a0 security/selinux/hooks.c:4639 RSP: 0018:ffff8800687771c8 EFLAGS: 00010202 RAX: ffff88006b9e0000 RBX: 1ffff1000d0eee3f RCX: 1ffff1000d1d312a RDX: 1ffff1000d1d31a6 RSI: dffffc0000000000 RDI: 0000000000000010 RBP: ffff880068777360 R08: 0000000000000000 R09: 0000000000000002 R10: dffffc0000000000 R11: 0000000000000006 R12: ffff880068e98940 R13: 0000000000000002 R14: ffff880068777338 R15: 0000000000000000 FS: 00007f00ff760700(0000) GS:ffff88006cd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020008000 CR3: 000000006a308000 CR4: 00000000000006e0 Stack: ffff8800687771e0 ffffffff812508a5 ffff8800686f3168 0000000000000007 ffff88006ac8cdfc ffff8800665ea500 0000000041b58ab3 ffffffff847b5480 ffffffff819eac60 ffff88006b9e0860 ffff88006b9e0868 ffff88006b9e07f0 Call Trace: [] security_sock_rcv_skb+0x75/0xb0 security/security.c:1317 [] sk_filter_trim_cap+0x67/0x10e0 net/core/filter.c:81 [] __sk_receive_skb+0x30/0xa00 net/core/sock.c:460 [] dccp_v4_rcv+0xdb2/0x1910 net/dccp/ipv4.c:873 [] ip_local_deliver_finish+0x332/0xad0 net/ipv4/ip_input.c:216 [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 [< inline >] NF_HOOK ./include/linux/netfilter.h:255 [] ip_local_deliver+0x1c2/0x4b0 net/ipv4/ip_input.c:257 [< inline >] dst_input ./include/net/dst.h:507 [] ip_rcv_finish+0x750/0x1c40 net/ipv4/ip_input.c:396 [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 [< inline >] NF_HOOK ./include/linux/netfilter.h:255 [] ip_rcv+0x96f/0x12f0 net/ipv4/ip_input.c:487 [] __netif_receive_skb_core+0x1897/0x2a50 net/core/dev.c:4213 [] __netif_receive_skb+0x2a/0x170 net/core/dev.c:4251 [] netif_receive_skb_internal+0x1b3/0x390 net/core/dev.c:4279 [] netif_receive_skb+0x48/0x250 net/core/dev.c:4303 [] tun_get_user+0xbd5/0x28a0 drivers/net/tun.c:1308 [] tun_chr_write_iter+0xda/0x190 drivers/net/tun.c:1332 [< inline >] new_sync_write fs/read_write.c:499 [] __vfs_write+0x334/0x570 fs/read_write.c:512 [] vfs_write+0x17b/0x500 fs/read_write.c:560 [< inline >] SYSC_write fs/read_write.c:607 [] SyS_write+0xd4/0x1a0 fs/read_write.c:599 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 It turns out DCCP calls __sk_receive_skb(), and this broke when lookups no longer took a reference on listeners. Fix this issue by adding a @refcounted parameter to __sk_receive_skb(), so that sock_put() is used only when needed. Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- net/core/sock.c | 5 +++-- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 3 ++- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 73c6b008f1b7..92b269709b9a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1596,11 +1596,11 @@ static inline void sock_put(struct sock *sk) void sock_gen_put(struct sock *sk); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, - unsigned int trim_cap); + unsigned int trim_cap, bool refcounted); static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) { - return __sk_receive_skb(sk, skb, nested, 1); + return __sk_receive_skb(sk, skb, nested, 1, true); } static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) diff --git a/net/core/sock.c b/net/core/sock.c index df171acfe232..5e3ca414357e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) EXPORT_SYMBOL(sock_queue_rcv_skb); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, - const int nested, unsigned int trim_cap) + const int nested, unsigned int trim_cap, bool refcounted) { int rc = NET_RX_SUCCESS; @@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, bh_unlock_sock(sk); out: - sock_put(sk); + if (refcounted) + sock_put(sk); return rc; discard_and_relse: kfree_skb(skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 345a3aeb8c7e..dff7cfab1da4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -868,7 +868,7 @@ lookup: goto discard_and_relse; nf_reset(skb); - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4); + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted); no_dccp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3828f94b234c..09c4e19aa285 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -738,7 +738,8 @@ lookup: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0; + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, + refcounted) ? -1 : 0; no_dccp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) From 346da62cc186c4b4b1ac59f87f4482b47a047388 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 18:04:24 -0700 Subject: [PATCH 073/503] dccp: do not send reset to already closed sockets Andrey reported following warning while fuzzing with syzkaller WARNING: CPU: 1 PID: 21072 at net/dccp/proto.c:83 dccp_set_state+0x229/0x290 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 21072 Comm: syz-executor Not tainted 4.9.0-rc1+ #293 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff88003d4c7738 ffffffff81b474f4 0000000000000003 dffffc0000000000 ffffffff844f8b00 ffff88003d4c7804 ffff88003d4c7800 ffffffff8140c06a 0000000041b58ab3 ffffffff8479ab7d ffffffff8140beae ffffffff8140cd00 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0xb3/0x10f lib/dump_stack.c:51 [] panic+0x1bc/0x39d kernel/panic.c:179 [] __warn+0x1cc/0x1f0 kernel/panic.c:542 [] warn_slowpath_null+0x2c/0x40 kernel/panic.c:585 [] dccp_set_state+0x229/0x290 net/dccp/proto.c:83 [] dccp_close+0x612/0xc10 net/dccp/proto.c:1016 [] inet_release+0xef/0x1c0 net/ipv4/af_inet.c:415 [] sock_release+0x8e/0x1d0 net/socket.c:570 [] sock_close+0x16/0x20 net/socket.c:1017 [] __fput+0x29d/0x720 fs/file_table.c:208 [] ____fput+0x15/0x20 fs/file_table.c:244 [] task_work_run+0xf8/0x170 kernel/task_work.c:116 [< inline >] exit_task_work include/linux/task_work.h:21 [] do_exit+0x883/0x2ac0 kernel/exit.c:828 [] do_group_exit+0x10e/0x340 kernel/exit.c:931 [] get_signal+0x634/0x15a0 kernel/signal.c:2307 [] do_signal+0x8d/0x1a30 arch/x86/kernel/signal.c:807 [] exit_to_usermode_loop+0xe5/0x130 arch/x86/entry/common.c:156 [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 [] syscall_return_slowpath+0x1a8/0x1e0 arch/x86/entry/common.c:259 [] entry_SYSCALL_64_fastpath+0xc0/0xc2 Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: disabled Fix this the same way we did for TCP in commit 565b7b2d2e63 ("tcp: do not send reset to already closed sockets") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/dccp/proto.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 41e65804ddf5..9fe25bf63296 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout) __kfree_skb(skb); } + /* If socket has been already reset kill it. */ + if (sk->sk_state == DCCP_CLOSED) + goto adjudge_to_death; + if (data_was_unread) { /* Unread data was tossed, send an appropriate Reset Code */ DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); From 6706a97fec963d6cb3f7fc2978ec1427b4651214 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 19:00:40 -0700 Subject: [PATCH 074/503] dccp: fix out of bound access in dccp_v4_err() dccp_v4_err() does not use pskb_may_pull() and might access garbage. We only need 4 bytes at the beginning of the DCCP header, like TCP, so the 8 bytes pulled in icmp_socket_deliver() are more than enough. This patch might allow to process more ICMP messages, as some routers are still limiting the size of reflected bytes to 28 (RFC 792), instead of extended lengths (RFC 1812 4.3.2.3) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index dff7cfab1da4..b567c8725aea 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; const u8 offset = iph->ihl << 2; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + const struct dccp_hdr *dh; struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; @@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - if (skb->len < offset + sizeof(*dh) || - skb->len < offset + __dccp_basic_hdr_len(dh)) { - __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return; - } + /* Only need dccph_dport & dccph_sport which are the first + * 4 bytes in dccp header. + * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. + */ + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet_lookup_established(net, &dccp_hashinfo, iph->daddr, dh->dccph_dport, From 93636d1f1f162ae89ae4f2a22a83bf4fd960724e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 20:21:20 -0700 Subject: [PATCH 075/503] netlink: netlink_diag_dump() runs without locks A recent commit removed locking from netlink_diag_dump() but forgot one error case. ===================================== [ BUG: bad unlock balance detected! ] 4.9.0-rc3+ #336 Not tainted ------------------------------------- syz-executor/4018 is trying to release lock ([ 36.220068] nl_table_lock ) at: [] netlink_diag_dump+0x1a3/0x250 net/netlink/diag.c:182 but there are no more locks to release! other info that might help us debug this: 3 locks held by syz-executor/4018: #0: [ 36.220068] ( sock_diag_mutex[ 36.220068] ){+.+.+.} , at: [ 36.220068] [] sock_diag_rcv+0x1b/0x40 #1: [ 36.220068] ( sock_diag_table_mutex[ 36.220068] ){+.+.+.} , at: [ 36.220068] [] sock_diag_rcv_msg+0x140/0x3a0 #2: [ 36.220068] ( nlk->cb_mutex[ 36.220068] ){+.+.+.} , at: [ 36.220068] [] netlink_dump+0x50/0xac0 stack backtrace: CPU: 1 PID: 4018 Comm: syz-executor Not tainted 4.9.0-rc3+ #336 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff8800645df688 ffffffff81b46934 ffffffff84eb3e78 ffff88006ad85800 ffffffff82dc8683 ffffffff84eb3e78 ffff8800645df6b8 ffffffff812043ca dffffc0000000000 ffff88006ad85ff8 ffff88006ad85fd0 00000000ffffffff Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0xb3/0x10f lib/dump_stack.c:51 [] print_unlock_imbalance_bug+0x17a/0x1a0 kernel/locking/lockdep.c:3388 [< inline >] __lock_release kernel/locking/lockdep.c:3512 [] lock_release+0x8e8/0xc60 kernel/locking/lockdep.c:3765 [< inline >] __raw_read_unlock ./include/linux/rwlock_api_smp.h:225 [] _raw_read_unlock+0x1a/0x30 kernel/locking/spinlock.c:255 [] netlink_diag_dump+0x1a3/0x250 net/netlink/diag.c:182 [] netlink_dump+0x397/0xac0 net/netlink/af_netlink.c:2110 Fixes: ad202074320c ("netlink: Use rhashtable walk interface in diag dump") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/netlink/diag.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index b2f0e986a6f4..a5546249fb10 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -178,11 +178,8 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } cb->args[1] = i; } else { - if (req->sdiag_protocol >= MAX_LINKS) { - read_unlock(&nl_table_lock); - rcu_read_unlock(); + if (req->sdiag_protocol >= MAX_LINKS) return -ENOENT; - } err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num); } From 1aa9d1a0e7eefcc61696e147d123453fc0016005 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 20:30:48 -0700 Subject: [PATCH 076/503] ipv6: dccp: fix out of bound access in dccp_v6_err() dccp_v6_err() does not use pskb_may_pull() and might access garbage. We only need 4 bytes at the beginning of the DCCP header, like TCP, so the 8 bytes pulled in icmpv6_notify() are more than enough. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 09c4e19aa285..b2a43af967e5 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + const struct dccp_hdr *dh; struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; @@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; struct net *net = dev_net(skb->dev); - if (skb->len < offset + sizeof(*dh) || - skb->len < offset + __dccp_basic_hdr_len(dh)) { - __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); - return; - } + /* Only need dccph_dport & dccph_sport which are the first + * 4 bytes in dccp header. + * Our caller (icmpv6_notify()) already pulled 8 bytes for us. + */ + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet6_lookup_established(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, From 29ab5a3b94c87382da06db88e96119911d557293 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Thu, 3 Nov 2016 08:16:20 -0200 Subject: [PATCH 077/503] ehea: fix operation state report Currently the ehea driver is missing a call to netif_carrier_off() before the interface bring-up; this is necessary in order to initialize the __LINK_STATE_NOCARRIER bit in the net_device state field. Otherwise, we observe state UNKNOWN on "ip address" command output. This patch adds a call to netif_carrier_off() on ehea's net device open callback. Reported-by: Xiong Zhou Reference-ID: IBM bz #137702, Red Hat bz #1089134 Signed-off-by: Guilherme G. Piccoli Signed-off-by: Douglas Miller Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ehea/ehea_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 54efa9a5167b..bd719e25dd76 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -2446,6 +2446,8 @@ static int ehea_open(struct net_device *dev) netif_info(port, ifup, dev, "enabling port\n"); + netif_carrier_off(dev); + ret = ehea_up(dev); if (!ret) { port_napi_enable(port); From 990ff4d84408fc55942ca6644f67e361737b3d8e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Nov 2016 08:59:46 -0700 Subject: [PATCH 078/503] ipv6: dccp: add missing bind_conflict to dccp_ipv6_mapped While fuzzing kernel with syzkaller, Andrey reported a nasty crash in inet6_bind() caused by DCCP lacking a required method. Fixes: ab1e0a13d7029 ("[SOCK] proto: Add hashinfo member to struct proto") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Arnaldo Carvalho de Melo Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b2a43af967e5..715e5d1dc107 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -958,6 +958,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), + .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, From 00ffc1ba02d876478c125e4305f9a02d40c6d284 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 3 Nov 2016 09:42:35 -0700 Subject: [PATCH 079/503] genetlink: fix a memory leak on error path In __genl_register_family(), when genl_validate_assign_mc_groups() fails, we forget to free the memory we possibly allocate for family->attrbuf. Note, some callers call genl_unregister_family() to clean up on error path, it doesn't work because the family is inserted to the global list in the nearly last step. Cc: Jakub Kicinski Cc: Johannes Berg Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 23cc12639ba7..49c28e8ef01b 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -404,7 +404,7 @@ int __genl_register_family(struct genl_family *family) err = genl_validate_assign_mc_groups(family); if (err) - goto errout_locked; + goto errout_free; list_add_tail(&family->family_list, genl_family_chain(family->id)); genl_unlock_all(); @@ -417,6 +417,8 @@ int __genl_register_family(struct genl_family *family) return 0; +errout_free: + kfree(family->attrbuf); errout_locked: genl_unlock_all(); errout: From 243d52126184b072a18fe2130ce0008f8aa3a340 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 3 Nov 2016 09:42:36 -0700 Subject: [PATCH 080/503] taskstats: fix the length of cgroupstats_cmd_get_policy cgroupstats_cmd_get_policy is [CGROUPSTATS_CMD_ATTR_MAX+1], taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1], but their family.maxattr is TASKSTATS_CMD_ATTR_MAX. CGROUPSTATS_CMD_ATTR_MAX is less than TASKSTATS_CMD_ATTR_MAX, so we could end up accessing out-of-bound. Change cgroupstats_cmd_get_policy to TASKSTATS_CMD_ATTR_MAX+1, this is safe because the rest are initialized to 0's. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- kernel/taskstats.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index b3f05ee20d18..cbb387a265db 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -54,7 +54,11 @@ static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1 [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; -static const struct nla_policy cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] = { +/* + * We have to use TASKSTATS_CMD_ATTR_MAX here, it is the maxattr in the family. + * Make sure they are always aligned. + */ +static const struct nla_policy cgroupstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, }; From ac95330b96376550ae7a533d1396272d675adfa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phan=20Rafin?= Date: Fri, 4 Nov 2016 00:53:56 +0100 Subject: [PATCH 081/503] clk: sunxi: Fix M factor computation for APB1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cfa636886033 ("clk: sunxi: factors: Consolidate get_factors parameters into a struct") introduced a regression for m factor computation in sun4i_get_apb1_factors function. The old code reassigned the "parent_rate" parameter to the targeted divisor value and was buggy for the returned frequency but not for the computed factors. Now, returned frequency is good but m factor is incorrectly computed (its max value 31 is always set resulting in a significantly slower frequency than the requested one...) This patch simply restores the original proper computation for m while keeping the good changes for returned rate. Fixes: cfa636886033 ("clk: sunxi: factors: Consolidate get_factors parameters into a struct") Signed-off-by: Stéphan Rafin Signed-off-by: Maxime Ripard --- drivers/clk/sunxi/clk-sunxi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c index 838b22aa8b67..f2c9274b8bd5 100644 --- a/drivers/clk/sunxi/clk-sunxi.c +++ b/drivers/clk/sunxi/clk-sunxi.c @@ -373,7 +373,7 @@ static void sun4i_get_apb1_factors(struct factors_request *req) else calcp = 3; - calcm = (req->parent_rate >> calcp) - 1; + calcm = (div >> calcp) - 1; req->rate = (req->parent_rate >> calcp) / (calcm + 1); req->m = calcm; From 27915aa61060fd8954a68a86657784705955088a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 2 Nov 2016 18:14:43 +0100 Subject: [PATCH 082/503] batman-adv: Revert "fix splat on disabling an interface" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 9799c50372b2 ("batman-adv: fix splat on disabling an interface") fixed a warning but at the same time broke the rtnl function add_slave for devices which were temporarily removed. batadv_softif_slave_add requires soft_iface of and hard_iface to be NULL before it is allowed to be enslaved. But this resetting of soft_iface to NULL in batadv_hardif_disable_interface was removed with the aforementioned commit. Reported-by: Julian Labus Signed-off-by: Sven Eckelmann Acked-by: Linus Lüssing Signed-off-by: Simon Wunderlich --- net/batman-adv/hard-interface.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e034afbd1bb0..08ce36147c4c 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -652,6 +652,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_softif_destroy_sysfs(hard_iface->soft_iface); } + hard_iface->soft_iface = NULL; batadv_hardif_put(hard_iface); out: From e13258f38e927b61cdb5f4ad25309450d3b127d1 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 29 Oct 2016 09:18:43 +0200 Subject: [PATCH 083/503] batman-adv: Detect missing primaryif during tp_send as error The throughput meter detects different situations as problems for the current test. It stops the test after these and reports it to userspace. This also has to be done when the primary interface disappeared during the test. Fixes: 33a3bb4a3345 ("batman-adv: throughput meter implementation") Reported-by: Joe Perches Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/tp_meter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 2333777f919d..8af1611b8ab2 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -837,6 +837,7 @@ static int batadv_tp_send(void *arg) primary_if = batadv_primary_if_get_selected(bat_priv); if (unlikely(!primary_if)) { err = BATADV_TP_REASON_DST_UNREACHABLE; + tp_vars->reason = err; goto out; } From 87dc02551c509703123a60dd7f043d75e92a6aed Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 4 Nov 2016 01:48:42 +0200 Subject: [PATCH 084/503] net/mlx5e: Fix XDP error path of mlx5e_open_channel() In case of mlx5e_open_rq fails the error handling will jump to label err_close_xdp_sq and will try to close the xdp_sq unconditionally. xdp_sq is valid only in case of XDP use cases, i.e priv->xdp_prog is not null. To fix this in this patch we test xdp_sq validity prior to closing it. In addition we now close the xdp_sq.cq as well. Fixes: b5503b994ed5 ("net/mlx5e: XDP TX forwarding support") Signed-off-by: Saeed Mahameed Reported-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f4c687ce4c59..c83619d081d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1512,7 +1512,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, return 0; err_close_xdp_sq: - mlx5e_close_sq(&c->xdp_sq); + if (priv->xdp_prog) { + mlx5e_close_sq(&c->xdp_sq); + mlx5e_close_cq(&c->xdp_sq.cq); + } err_close_sqs: mlx5e_close_sqs(c); From d7a0ecab380c62ccd9fbe2e08dd720f7a367d6ee Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 4 Nov 2016 01:48:43 +0200 Subject: [PATCH 085/503] net/mlx5e: Re-arrange XDP SQ/CQ creation In mlx5e_open_channel CQs must be created before napi is enabled. Here we move the XDP CQ creation to satisfy that fact. mlx5e_close_channel is already working according to the right order. Fixes: b5503b994ed5 ("net/mlx5e: XDP TX forwarding support") Signed-off-by: Saeed Mahameed Reported-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c83619d081d8..84e8b250e2af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1445,6 +1445,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); c->num_tc = priv->params.num_tc; + c->xdp = !!priv->xdp_prog; if (priv->params.rx_am_enabled) rx_cq_profile = mlx5e_am_get_def_profile(priv->params.rx_cq_period_mode); @@ -1468,6 +1469,12 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (err) goto err_close_tx_cqs; + /* XDP SQ CQ params are same as normal TXQ sq CQ params */ + err = c->xdp ? mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, + priv->params.tx_cq_moderation) : 0; + if (err) + goto err_close_rx_cq; + napi_enable(&c->napi); err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq); @@ -1488,21 +1495,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, } } - if (priv->xdp_prog) { - /* XDP SQ CQ params are same as normal TXQ sq CQ params */ - err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, - priv->params.tx_cq_moderation); - if (err) - goto err_close_sqs; + err = c->xdp ? mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq) : 0; + if (err) + goto err_close_sqs; - err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq); - if (err) { - mlx5e_close_cq(&c->xdp_sq.cq); - goto err_close_sqs; - } - } - - c->xdp = !!priv->xdp_prog; err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) goto err_close_xdp_sq; @@ -1512,10 +1508,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, return 0; err_close_xdp_sq: - if (priv->xdp_prog) { + if (c->xdp) mlx5e_close_sq(&c->xdp_sq); - mlx5e_close_cq(&c->xdp_sq.cq); - } err_close_sqs: mlx5e_close_sqs(c); @@ -1525,6 +1519,10 @@ err_close_icosq: err_disable_napi: napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->xdp_sq.cq); + +err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); err_close_tx_cqs: From abd3277287c7743d3999b801c6769e8ad1b381dd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 4 Nov 2016 01:48:44 +0200 Subject: [PATCH 086/503] net/mlx5e: Disallow changing name-space for VF representors VF reps should be altogether on the same NS as they were created. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 7fe6559e4ab3..bf1c09ca73c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -308,7 +308,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; #endif - netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC; + netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; netdev->hw_features |= NETIF_F_HW_TC; eth_hw_addr_random(netdev); From 358d79a47e5a8db83925241629252cfe64f225f7 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 4 Nov 2016 01:48:45 +0200 Subject: [PATCH 087/503] net/mlx5e: Handle matching on vlan priority for offloaded TC rules We ignored the vlan priority in offloaded TC rules matching part, fix that. Fixes: 095b6cfd69ce ('net/mlx5e: Add TC vlan match parsing') Signed-off-by: Or Gerlitz Reported-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index ce8c54d18906..6bb21b31cfeb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -237,12 +237,15 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_VLAN, f->mask); - if (mask->vlan_id) { + if (mask->vlan_id || mask->vlan_priority) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority); } } From ee39fbc4447d5c42640963b559bf68490cb45308 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 4 Nov 2016 01:48:46 +0200 Subject: [PATCH 088/503] net/mlx5: E-Switch, Set the actions for offloaded rules properly As for the current generation of the mlx5 HW (CX4/CX4-Lx) per flow vlan push/pop actions are emulated, we must not program them to the firmware. Fixes: f5f82476090f ('net/mlx5: E-Switch, Support VLAN actions in the offloads mode') Signed-off-by: Or Gerlitz Reported-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c55ad8d00c05..d239f5d0ea36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -57,7 +57,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); - action = attr->action; + /* per flow vlan pop/push is emulated, don't set that into the firmware */ + action = attr->action & ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; From 0e97a34083a03c931d4e9b34ba7899c29a09dce6 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Fri, 4 Nov 2016 01:48:47 +0200 Subject: [PATCH 089/503] net/mlx5: Fix invalid pointer reference when prof_sel parameter is invalid When prof_sel is invalid, mlx5_core_warn is called but the mlx5_core_dev is not initialized yet. Solution is moving the prof_sel code after dev->pdev assignment Fixes: 2974ab6e8bd8 ('net/mlx5: Improve driver log messages') Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d5433c49b2b0..3eb931585b3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1226,6 +1226,9 @@ static int init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); + dev->pdev = pdev; + dev->event = mlx5_core_event; + if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) { mlx5_core_warn(dev, "selected profile out of range, selecting default (%d)\n", @@ -1233,8 +1236,6 @@ static int init_one(struct pci_dev *pdev, prof_sel = MLX5_DEFAULT_PROF; } dev->profile = &profile[prof_sel]; - dev->pdev = pdev; - dev->event = mlx5_core_event; INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); From 3984903a2e3906d3def220e688040ce93368200a Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 27 Oct 2016 11:27:25 +0530 Subject: [PATCH 090/503] rtc: omap: Fix selecting external osc RTC can be clocked from an external 32KHz oscillator, or from the Peripheral PLL. The RTC has an internal oscillator buffer to support direct operation with a crystal. ---------------------------------------- | Device --------- | | | | | | | RTCSS | | | --------- | | | OSC |<------| RTC | | | | |------>| OSC |--- | | | | -------- | | | | | ----|clk | | | -------- | | | | | | PRCM |--- | | | | -------- -------- | ---------------------------------------- The RTC functional clock is sourced by default from the clock derived from the Peripheral PLL. In order to select source as external osc clk the following changes needs to be done: - Enable the RTC OSC (RTC_OSC_REG[4]OSC32K_GZ = 0) - Enable the clock mux(RTC_OSC_REG[6]K32CLK_EN = 1) - Select the external clock source (RTC_OSC_REG[3]32KCLK_SEL = 1) Fixes: 399cf0f63f6f2 ("rtc: omap: Add external clock enabling support") Signed-off-by: Keerthy Signed-off-by: Lokesh Vutla Signed-off-by: Dave Gerlach Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-omap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index b04ea9b5ae67..dddaa60871b9 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -113,6 +113,7 @@ /* OMAP_RTC_OSC_REG bit fields: */ #define OMAP_RTC_OSC_32KCLK_EN BIT(6) #define OMAP_RTC_OSC_SEL_32KCLK_SRC BIT(3) +#define OMAP_RTC_OSC_OSC32K_GZ_DISABLE BIT(4) /* OMAP_RTC_IRQWAKEEN bit fields: */ #define OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN BIT(1) @@ -786,8 +787,9 @@ static int omap_rtc_probe(struct platform_device *pdev) */ if (rtc->has_ext_clk) { reg = rtc_read(rtc, OMAP_RTC_OSC_REG); - rtc_write(rtc, OMAP_RTC_OSC_REG, - reg | OMAP_RTC_OSC_SEL_32KCLK_SRC); + reg &= ~OMAP_RTC_OSC_OSC32K_GZ_DISABLE; + reg |= OMAP_RTC_OSC_32KCLK_EN | OMAP_RTC_OSC_SEL_32KCLK_SRC; + rtc_writel(rtc, OMAP_RTC_OSC_REG, reg); } rtc->type->lock(rtc); From efce21fc43e00a76aee7b0a1eda73730ed2d5d3a Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 27 Oct 2016 11:27:26 +0530 Subject: [PATCH 091/503] rtc: omap: prevent disabling of clock/module during suspend If RTC is running from an internal clock source, the RTC module can't be disabled; otherwise it stops ticking completely. Current suspend handler implementation disables the clock/module unconditionally, instead fix this by disabling the clock only if we are running on external clock source, which is not affected by suspend. The prevention of disabling the clock must be done via implementing the runtime_pm handlers for the device, and returning an error code from the runtime suspend handler; otherwise OMAP core PM will disable the clocks for the driver. Signed-off-by: Tero Kristo Signed-off-by: Keerthy Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-omap.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index dddaa60871b9..51e52446eacb 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -147,6 +147,7 @@ struct omap_rtc { u8 interrupts_reg; bool is_pmic_controller; bool has_ext_clk; + bool is_suspending; const struct omap_rtc_device_type *type; struct pinctrl_dev *pctldev; }; @@ -900,8 +901,7 @@ static int omap_rtc_suspend(struct device *dev) rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, 0); rtc->type->lock(rtc); - /* Disable the clock/module */ - pm_runtime_put_sync(dev); + rtc->is_suspending = true; return 0; } @@ -910,9 +910,6 @@ static int omap_rtc_resume(struct device *dev) { struct omap_rtc *rtc = dev_get_drvdata(dev); - /* Enable the clock/module so that we can access the registers */ - pm_runtime_get_sync(dev); - rtc->type->unlock(rtc); if (device_may_wakeup(dev)) disable_irq_wake(rtc->irq_alarm); @@ -920,11 +917,34 @@ static int omap_rtc_resume(struct device *dev) rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, rtc->interrupts_reg); rtc->type->lock(rtc); + rtc->is_suspending = false; + return 0; } #endif -static SIMPLE_DEV_PM_OPS(omap_rtc_pm_ops, omap_rtc_suspend, omap_rtc_resume); +#ifdef CONFIG_PM +static int omap_rtc_runtime_suspend(struct device *dev) +{ + struct omap_rtc *rtc = dev_get_drvdata(dev); + + if (rtc->is_suspending && !rtc->has_ext_clk) + return -EBUSY; + + return 0; +} + +static int omap_rtc_runtime_resume(struct device *dev) +{ + return 0; +} +#endif + +static const struct dev_pm_ops omap_rtc_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(omap_rtc_suspend, omap_rtc_resume) + SET_RUNTIME_PM_OPS(omap_rtc_runtime_suspend, + omap_rtc_runtime_resume, NULL) +}; static void omap_rtc_shutdown(struct platform_device *pdev) { From e3c9d9d6ebfeeeee29c6240e1b5978d40d31d21f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 27 Oct 2016 13:06:44 -0200 Subject: [PATCH 092/503] ARM: dts: imx53-qsb: Fix regulator constraints Since commit fa93fd4ecc9c ("regulator: core: Ensure we are at least in bounds for our constraints") the imx53-qsb board populated with a Dialog DA9053 PMIC fails to boot: LDO3: Bringing 3300000uV into 1800000-1800000uV The LDO3 voltage constraints passed in the device tree do not match the valid range according to the datasheet, so fix this accordingly to allow the board booting again. While at it, fix the other voltage constraints as well. Cc: # 4.7.x Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx53-qsb.dts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/imx53-qsb.dts b/arch/arm/boot/dts/imx53-qsb.dts index dec4b073ceb1..379939699164 100644 --- a/arch/arm/boot/dts/imx53-qsb.dts +++ b/arch/arm/boot/dts/imx53-qsb.dts @@ -64,8 +64,8 @@ }; ldo3_reg: ldo3 { - regulator-min-microvolt = <600000>; - regulator-max-microvolt = <1800000>; + regulator-min-microvolt = <1725000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; @@ -76,8 +76,8 @@ }; ldo5_reg: ldo5 { - regulator-min-microvolt = <1725000>; - regulator-max-microvolt = <3300000>; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3600000>; regulator-always-on; }; @@ -100,14 +100,14 @@ }; ldo9_reg: ldo9 { - regulator-min-microvolt = <1200000>; + regulator-min-microvolt = <1250000>; regulator-max-microvolt = <3600000>; regulator-always-on; }; ldo10_reg: ldo10 { - regulator-min-microvolt = <1250000>; - regulator-max-microvolt = <3650000>; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3600000>; regulator-always-on; }; }; From d4eccafcaf339de77ec562e96e6b223d447f924a Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 4 Nov 2016 14:45:08 -0700 Subject: [PATCH 093/503] xtensa: clean up printk usage for boot/crash logging Convert printk(KERN_* to pr_* and printk's without level to pr_cont. This fixes torn register dumps, stack dumps, stack traces and timestamps in the middle of 'Calibrating CPU frequency' message. Also drop unused show_code and drop false comment about show_stack. Signed-off-by: Max Filippov --- arch/xtensa/kernel/time.c | 14 ++++---- arch/xtensa/kernel/traps.c | 74 ++++++++++++-------------------------- 2 files changed, 29 insertions(+), 59 deletions(-) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 9a5bcd0381a7..be81e69b25bc 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -172,10 +172,11 @@ void __init time_init(void) { of_clk_init(NULL); #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT - printk("Calibrating CPU frequency "); + pr_info("Calibrating CPU frequency "); calibrate_ccount(); - printk("%d.%02d MHz\n", (int)ccount_freq/1000000, - (int)(ccount_freq/10000)%100); + pr_cont("%d.%02d MHz\n", + (int)ccount_freq / 1000000, + (int)(ccount_freq / 10000) % 100); #else ccount_freq = CONFIG_XTENSA_CPU_CLOCK*1000000UL; #endif @@ -210,9 +211,8 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) void calibrate_delay(void) { loops_per_jiffy = ccount_freq / HZ; - printk("Calibrating delay loop (skipped)... " - "%lu.%02lu BogoMIPS preset\n", - loops_per_jiffy/(1000000/HZ), - (loops_per_jiffy/(10000/HZ)) % 100); + pr_info("Calibrating delay loop (skipped)... %lu.%02lu BogoMIPS preset\n", + loops_per_jiffy / (1000000 / HZ), + (loops_per_jiffy / (10000 / HZ)) % 100); } #endif diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index d02fc304b31c..ce37d5b899fe 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -465,26 +465,25 @@ void show_regs(struct pt_regs * regs) for (i = 0; i < 16; i++) { if ((i % 8) == 0) - printk(KERN_INFO "a%02d:", i); - printk(KERN_CONT " %08lx", regs->areg[i]); + pr_info("a%02d:", i); + pr_cont(" %08lx", regs->areg[i]); } - printk(KERN_CONT "\n"); - - printk("pc: %08lx, ps: %08lx, depc: %08lx, excvaddr: %08lx\n", - regs->pc, regs->ps, regs->depc, regs->excvaddr); - printk("lbeg: %08lx, lend: %08lx lcount: %08lx, sar: %08lx\n", - regs->lbeg, regs->lend, regs->lcount, regs->sar); + pr_cont("\n"); + pr_info("pc: %08lx, ps: %08lx, depc: %08lx, excvaddr: %08lx\n", + regs->pc, regs->ps, regs->depc, regs->excvaddr); + pr_info("lbeg: %08lx, lend: %08lx lcount: %08lx, sar: %08lx\n", + regs->lbeg, regs->lend, regs->lcount, regs->sar); if (user_mode(regs)) - printk("wb: %08lx, ws: %08lx, wmask: %08lx, syscall: %ld\n", - regs->windowbase, regs->windowstart, regs->wmask, - regs->syscall); + pr_cont("wb: %08lx, ws: %08lx, wmask: %08lx, syscall: %ld\n", + regs->windowbase, regs->windowstart, regs->wmask, + regs->syscall); } static int show_trace_cb(struct stackframe *frame, void *data) { if (kernel_text_address(frame->pc)) { - printk(" [<%08lx>] ", frame->pc); - print_symbol("%s\n", frame->pc); + pr_cont(" [<%08lx>]", frame->pc); + print_symbol(" %s\n", frame->pc); } return 0; } @@ -494,19 +493,13 @@ void show_trace(struct task_struct *task, unsigned long *sp) if (!sp) sp = stack_pointer(task); - printk("Call Trace:"); -#ifdef CONFIG_KALLSYMS - printk("\n"); -#endif + pr_info("Call Trace:\n"); walk_stackframe(sp, show_trace_cb, NULL); - printk("\n"); +#ifndef CONFIG_KALLSYMS + pr_cont("\n"); +#endif } -/* - * This routine abuses get_user()/put_user() to reference pointers - * with at least a bit of error checking ... - */ - static int kstack_depth_to_print = 24; void show_stack(struct task_struct *task, unsigned long *sp) @@ -518,52 +511,29 @@ void show_stack(struct task_struct *task, unsigned long *sp) sp = stack_pointer(task); stack = sp; - printk("\nStack: "); + pr_info("Stack:\n"); for (i = 0; i < kstack_depth_to_print; i++) { if (kstack_end(sp)) break; - if (i && ((i % 8) == 0)) - printk("\n "); - printk("%08lx ", *sp++); + pr_cont(" %08lx", *sp++); + if (i % 8 == 7) + pr_cont("\n"); } - printk("\n"); show_trace(task, stack); } -void show_code(unsigned int *pc) -{ - long i; - - printk("\nCode:"); - - for(i = -3 ; i < 6 ; i++) { - unsigned long insn; - if (__get_user(insn, pc + i)) { - printk(" (Bad address in pc)\n"); - break; - } - printk("%c%08lx%c",(i?' ':'<'),insn,(i?' ':'>')); - } -} - DEFINE_SPINLOCK(die_lock); void die(const char * str, struct pt_regs * regs, long err) { static int die_counter; - int nl = 0; console_verbose(); spin_lock_irq(&die_lock); - printk("%s: sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); - nl = 1; -#endif - if (nl) - printk("\n"); + pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter, + IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : ""); show_regs(regs); if (!user_mode(regs)) show_stack(NULL, (unsigned long*)regs->areg[1]); From 9bfef729a3d11f04d12788d749a3ce6b47645734 Mon Sep 17 00:00:00 2001 From: Doug Brown Date: Fri, 4 Nov 2016 21:18:20 -0700 Subject: [PATCH 094/503] USB: serial: ftdi_sio: add support for TI CC3200 LaunchPad This patch adds support for the TI CC3200 LaunchPad board, which uses a custom USB vendor ID and product ID. Channel A is used for JTAG, and channel B is used for a UART. Signed-off-by: Doug Brown Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 0ff7f38d7800..6e9fc8bcc285 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1012,6 +1012,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) }, { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) }, + { USB_DEVICE(TI_VID, TI_CC3200_LAUNCHPAD_PID), + .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 21011c0a4c64..48ee04c94a75 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -595,6 +595,12 @@ #define ATMEL_VID 0x03eb /* Vendor ID */ #define STK541_PID 0x2109 /* Zigbee Controller */ +/* + * Texas Instruments + */ +#define TI_VID 0x0451 +#define TI_CC3200_LAUNCHPAD_PID 0xC32A /* SimpleLink Wi-Fi CC3200 LaunchPad */ + /* * Blackfin gnICE JTAG * http://docs.blackfin.uclinux.org/doku.php?id=hw:jtag:gnice From e8a6123e9ead1b0d40349809e51de9341312fe08 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 23 Oct 2016 13:55:34 +0200 Subject: [PATCH 095/503] x86/platform/intel-mid: Retrofit pci_platform_pm_ops ->get_state hook Commit cc7cc02bada8 ("PCI: Query platform firmware for device power state") augmented struct pci_platform_pm_ops with a ->get_state hook and implemented it for acpi_pci_platform_pm, the only pci_platform_pm_ops existing till v4.7. However v4.8 introduced another pci_platform_pm_ops for Intel Mobile Internet Devices with commit 5823d0893ec2 ("x86/platform/intel-mid: Add Power Management Unit driver"). It is missing the ->get_state hook, which is fatal since pci_set_platform_pm() enforces its presence. Andy Shevchenko reports that without the present commit, such a device "crashes without even a character printed out on serial console and reboots (since watchdog)". Retrofit mid_pci_platform_pm with the missing callback to fix the breakage. Acked-and-tested-by: Andy Shevchenko Fixes: cc7cc02bada8 ("PCI: Query platform firmware for device power state") Signed-off-by: Lukas Wunner Acked-by: Bjorn Helgaas Cc: linux-pci@vger.kernel.org Cc: Andy Shevchenko Link: http://lkml.kernel.org/r/7c1567d4c49303a4aada94ba16275cbf56b8976b.1477221514.git.lukas@wunner.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/intel-mid.h | 1 + arch/x86/platform/intel-mid/pwr.c | 19 +++++++++++++++++++ drivers/pci/pci-mid.c | 6 ++++++ 3 files changed, 26 insertions(+) diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 5b6753d1f7f4..49da9f497b90 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -17,6 +17,7 @@ extern int intel_mid_pci_init(void); extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state); +extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev); extern void intel_mid_pwr_power_off(void); diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c index 5d3b45ad1c03..67375dda451c 100644 --- a/arch/x86/platform/intel-mid/pwr.c +++ b/arch/x86/platform/intel-mid/pwr.c @@ -272,6 +272,25 @@ int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) } EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state); +pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev) +{ + struct mid_pwr *pwr = midpwr; + int id, reg, bit; + u32 power; + + if (!pwr || !pwr->available) + return PCI_UNKNOWN; + + id = intel_mid_pwr_get_lss_id(pdev); + if (id < 0) + return PCI_UNKNOWN; + + reg = (id * LSS_PWS_BITS) / 32; + bit = (id * LSS_PWS_BITS) % 32; + power = mid_pwr_get_state(pwr, reg); + return (__force pci_power_t)((power >> bit) & 3); +} + void intel_mid_pwr_power_off(void) { struct mid_pwr *pwr = midpwr; diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c index 55f453de562e..c7f3408e3148 100644 --- a/drivers/pci/pci-mid.c +++ b/drivers/pci/pci-mid.c @@ -29,6 +29,11 @@ static int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) return intel_mid_pci_set_power_state(pdev, state); } +static pci_power_t mid_pci_get_power_state(struct pci_dev *pdev) +{ + return intel_mid_pci_get_power_state(pdev); +} + static pci_power_t mid_pci_choose_state(struct pci_dev *pdev) { return PCI_D3hot; @@ -52,6 +57,7 @@ static bool mid_pci_need_resume(struct pci_dev *dev) static struct pci_platform_pm_ops mid_pci_platform_pm = { .is_manageable = mid_pci_power_manageable, .set_state = mid_pci_set_power_state, + .get_state = mid_pci_get_power_state, .choose_state = mid_pci_choose_state, .sleep_wake = mid_pci_sleep_wake, .run_wake = mid_pci_run_wake, From 4db069a2bf990e278ea57ff615dcaa89b85376bd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 4 Nov 2016 07:13:52 +0100 Subject: [PATCH 096/503] drm/sun4i: Propagate error to the caller If 'sun4i_layers_init()' returns an error, propagate it instead of returning -EINVAL unconditionally. Signed-off-by: Christophe JAILLET Reviewed-by: Gustavo Padovan Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 077f3785439e..70e9fd59c5a2 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -144,7 +144,7 @@ static int sun4i_drv_bind(struct device *dev) drv->layers = sun4i_layers_init(drm); if (IS_ERR(drv->layers)) { dev_err(drm->dev, "Couldn't create the planes\n"); - ret = -EINVAL; + ret = PTR_ERR(drv->layers); goto free_drm; } From 17ae1c650c1ecf8dc8e16d54b0f68a345965f43f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2016 18:40:19 +0100 Subject: [PATCH 097/503] phy: fix device reference leaks Make sure to drop the reference taken by bus_find_device_by_name() before returning from phy_connect() and phy_attach(). Note that both function still take a reference to the phy device through phy_attach_direct(). Fixes: e13934563db0 ("[PATCH] PHY Layer fixup") Cc: Florian Fainelli Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index e977ba931878..1a4bf8acad78 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -723,6 +723,7 @@ struct phy_device *phy_connect(struct net_device *dev, const char *bus_id, phydev = to_phy_device(d); rc = phy_connect_direct(dev, phydev, handler, interface); + put_device(d); if (rc) return ERR_PTR(rc); @@ -953,6 +954,7 @@ struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, phydev = to_phy_device(d); rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface); + put_device(d); if (rc) return ERR_PTR(rc); From c7262aaace1b17a650598063e3b9ee1785fde377 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2016 18:40:20 +0100 Subject: [PATCH 098/503] net: ethernet: ti: cpsw: fix device and of_node leaks Make sure to drop the references taken by of_get_child_by_name() and bus_find_device() before returning from cpsw_phy_sel(). Note that holding a reference to the cpsw-phy-sel device does not prevent the devres-managed private data from going away. Fixes: 5892cd135e16 ("drivers: net: cpsw-phy-sel: Add new driver...") Cc: Mugunthan V N Cc: Grygorii Strashko Cc: linux-omap@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw-phy-sel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c index 054a8dd23dae..ba1e45ff6aae 100644 --- a/drivers/net/ethernet/ti/cpsw-phy-sel.c +++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c @@ -176,9 +176,12 @@ void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave) } dev = bus_find_device(&platform_bus_type, NULL, node, match); + of_node_put(node); priv = dev_get_drvdata(dev); priv->cpsw_phy_sel(priv, phy_mode, slave); + + put_device(dev); } EXPORT_SYMBOL_GPL(cpsw_phy_sel); From 6bed0118012ea350acbe606ab3ae0ed3d60ed5f3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2016 18:40:21 +0100 Subject: [PATCH 099/503] net: ethernet: ti: davinci_emac: fix device reference leak Make sure to drop the references taken by bus_find_device() before returning from emac_dev_open(). Note that phy_connect still takes a reference to the phy device. Fixes: 5d69e0076a72 ("net: davinci_emac: switch to new mdio") Cc: Mugunthan V N Cc: Grygorii Strashko Cc: linux-omap@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_emac.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 2fd94a5bc1f3..84fbe5714f8b 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1410,6 +1410,7 @@ static int emac_dev_open(struct net_device *ndev) int i = 0; struct emac_priv *priv = netdev_priv(ndev); struct phy_device *phydev = NULL; + struct device *phy = NULL; ret = pm_runtime_get_sync(&priv->pdev->dev); if (ret < 0) { @@ -1488,19 +1489,20 @@ static int emac_dev_open(struct net_device *ndev) /* use the first phy on the bus if pdata did not give us a phy id */ if (!phydev && !priv->phy_id) { - struct device *phy; - phy = bus_find_device(&mdio_bus_type, NULL, NULL, match_first_device); - if (phy) + if (phy) { priv->phy_id = dev_name(phy); + if (!priv->phy_id || !*priv->phy_id) + put_device(phy); + } } if (!phydev && priv->phy_id && *priv->phy_id) { phydev = phy_connect(ndev, priv->phy_id, &emac_adjust_link, PHY_INTERFACE_MODE_MII); - + put_device(phy); /* reference taken by bus_find_device */ if (IS_ERR(phydev)) { dev_err(emac_dev, "could not connect to phy %s\n", priv->phy_id); From 2271150bfb814b72ec57ae2fdf66e39da2eafafd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2016 18:40:22 +0100 Subject: [PATCH 100/503] net: hns: fix device reference leaks Make sure to drop the reference taken by class_find_device() in hnae_get_handle() on errors and when later releasing the handle. Fixes: 6fe6611ff275 ("net: add Hisilicon Network Subsystem...") Cc: Yisen Zhuang Cc: Salil Mehta Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index c54c6fac0d1d..b6ed818f78ff 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c +++ b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -332,8 +332,10 @@ struct hnae_handle *hnae_get_handle(struct device *owner_dev, return ERR_PTR(-ENODEV); handle = dev->ops->get_handle(dev, port_id); - if (IS_ERR(handle)) + if (IS_ERR(handle)) { + put_device(&dev->cls_dev); return handle; + } handle->dev = dev; handle->owner_dev = owner_dev; @@ -356,6 +358,8 @@ out_when_init_queue: for (j = i - 1; j >= 0; j--) hnae_fini_queue(handle->qs[j]); + put_device(&dev->cls_dev); + return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(hnae_get_handle); @@ -377,6 +381,8 @@ void hnae_put_handle(struct hnae_handle *h) dev->ops->put_handle(h); module_put(dev->owner); + + put_device(&dev->cls_dev); } EXPORT_SYMBOL(hnae_put_handle); From 7233bc84a3aeda835d334499dc00448373caf5c0 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 3 Nov 2016 17:03:41 -0200 Subject: [PATCH 101/503] sctp: assign assoc_id earlier in __sctp_connect sctp_wait_for_connect() currently already holds the asoc to keep it alive during the sleep, in case another thread release it. But Andrey Konovalov and Dmitry Vyukov reported an use-after-free in such situation. Problem is that __sctp_connect() doesn't get a ref on the asoc and will do a read on the asoc after calling sctp_wait_for_connect(), but by then another thread may have closed it and the _put on sctp_wait_for_connect will actually release it, causing the use-after-free. Fix is, instead of doing the read after waiting for the connect, do it before so, and avoid this issue as the socket is still locked by then. There should be no issue on returning the asoc id in case of failure as the application shouldn't trust on that number in such situations anyway. This issue doesn't exist in sctp_sendmsg() path. Reported-by: Dmitry Vyukov Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 71b75f9d9c1b..faa48ff5cf4b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *sk, timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); - err = sctp_wait_for_connect(asoc, &timeo); - if ((err == 0 || err == -EINPROGRESS) && assoc_id) + if (assoc_id) *assoc_id = asoc->assoc_id; + err = sctp_wait_for_connect(asoc, &timeo); + /* Note: the asoc may be freed after the return of + * sctp_wait_for_connect. + */ /* Don't free association on exit. */ asoc = NULL; From 483bed2b0ddd12ec33fc9407e0c6e1088e77a97c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Nov 2016 00:01:19 +0100 Subject: [PATCH 102/503] bpf: fix htab map destruction when extra reserve is in use Commit a6ed3ea65d98 ("bpf: restore behavior of bpf_map_update_elem") added an extra per-cpu reserve to the hash table map to restore old behaviour from pre prealloc times. When non-prealloc is in use for a map, then problem is that once a hash table extra element has been linked into the hash-table, and the hash table is destroyed due to refcount dropping to zero, then htab_map_free() -> delete_all_elements() will walk the whole hash table and drop all elements via htab_elem_free(). The problem is that the element from the extra reserve is first fed to the wrong backend allocator and eventually freed twice. Fixes: a6ed3ea65d98 ("bpf: restore behavior of bpf_map_update_elem") Reported-by: Dmitry Vyukov Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/hashtab.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 570eeca7bdfa..ad1bc67aff1b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -687,7 +687,8 @@ static void delete_all_elements(struct bpf_htab *htab) hlist_for_each_entry_safe(l, n, head, hash_node) { hlist_del_rcu(&l->hash_node); - htab_elem_free(htab, l); + if (l->state != HTAB_EXTRA_ELEM_USED) + htab_elem_free(htab, l); } } } From 20b2b24f91f70e7d3f0918c077546cb21bd73a87 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Nov 2016 00:56:31 +0100 Subject: [PATCH 103/503] bpf: fix map not being uncharged during map creation failure In map_create(), we first find and create the map, then once that suceeded, we charge it to the user's RLIMIT_MEMLOCK, and then fetch a new anon fd through anon_inode_getfd(). The problem is, once the latter fails f.e. due to RLIMIT_NOFILE limit, then we only destruct the map via map->ops->map_free(), but without uncharging the previously locked memory first. That means that the user_struct allocation is leaked as well as the accounted RLIMIT_MEMLOCK memory not released. Make the label names in the fix consistent with bpf_prog_load(). Fixes: aaac3ba95e4c ("bpf: charge user for creation of BPF maps and programs") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 228f962447a5..237f3d6a7ddc 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -194,7 +194,7 @@ static int map_create(union bpf_attr *attr) err = bpf_map_charge_memlock(map); if (err) - goto free_map; + goto free_map_nouncharge; err = bpf_map_new_fd(map); if (err < 0) @@ -204,6 +204,8 @@ static int map_create(union bpf_attr *attr) return err; free_map: + bpf_map_uncharge_memlock(map); +free_map_nouncharge: map->ops->map_free(map); return err; } From 85566ca6b6d7e131837cd197a441d98e83146fae Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 7 Nov 2016 14:52:39 -0700 Subject: [PATCH 104/503] ARM: OMAP3: Fix formatting of features printed With the printk cleanups merged into v4.9-rc1, we now get the omap revision printed on multiple lines. Let's fix that and also remove the extra empty space at the end of the features. And let's update things to use scnprintf as suggested by Ivaylo Dimitrov . Reported-by: Adam Ford Cc: Ivaylo Dimitrov Reviewed-by: Sebastian Reichel Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/id.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 2abd53ae3e7a..cc6d9fa60924 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -205,11 +205,15 @@ void __init omap2xxx_check_revision(void) #define OMAP3_SHOW_FEATURE(feat) \ if (omap3_has_ ##feat()) \ - printk(#feat" "); + n += scnprintf(buf + n, sizeof(buf) - n, #feat " "); static void __init omap3_cpuinfo(void) { const char *cpu_name; + char buf[64]; + int n = 0; + + memset(buf, 0, sizeof(buf)); /* * OMAP3430 and OMAP3530 are assumed to be same. @@ -241,10 +245,10 @@ static void __init omap3_cpuinfo(void) cpu_name = "OMAP3503"; } - sprintf(soc_name, "%s", cpu_name); + scnprintf(soc_name, sizeof(soc_name), "%s", cpu_name); /* Print verbose information */ - pr_info("%s %s (", soc_name, soc_rev); + n += scnprintf(buf, sizeof(buf) - n, "%s %s (", soc_name, soc_rev); OMAP3_SHOW_FEATURE(l2cache); OMAP3_SHOW_FEATURE(iva); @@ -252,8 +256,10 @@ static void __init omap3_cpuinfo(void) OMAP3_SHOW_FEATURE(neon); OMAP3_SHOW_FEATURE(isp); OMAP3_SHOW_FEATURE(192mhz_clk); - - printk(")\n"); + if (*(buf + n - 1) == ' ') + n--; + n += scnprintf(buf + n, sizeof(buf) - n, ")\n"); + pr_info("%s", buf); } #define OMAP3_CHECK_FEATURE(status,feat) \ From 72bb40b8b7620f1390c84c10309a40e886bf449e Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Wed, 19 Oct 2016 15:44:12 -0500 Subject: [PATCH 105/503] ARM: AM43XX: Select OMAP_INTERCONNECT in Kconfig AM437x makes use of the omap_l3_noc driver so explicitly select OMAP_INTERCONNECT in the Kconfig for SOC_AM43XX to ensure it gets enabled for AM43XX only builds. Signed-off-by: Dave Gerlach Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index a9afeebd59f2..0465338183c7 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -71,6 +71,7 @@ config SOC_AM43XX select HAVE_ARM_TWD select ARM_ERRATA_754322 select ARM_ERRATA_775420 + select OMAP_INTERCONNECT config SOC_DRA7XX bool "TI DRA7XX" From 271a3024db1f32ca34f504178fade6ef95cd6c9b Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Fri, 21 Oct 2016 09:12:31 -0500 Subject: [PATCH 106/503] ARM: dts: omap3: Fix memory node in Torpedo board Commit ("766a1fe78fc3 ARM: omap3: Add missing memory node") added the memory node, but the patch didn't have the correct starting address. This patch fixes the correct starting address. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-torpedo-som.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index 731ec37aed5b..8f9a69ca818c 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -13,9 +13,9 @@ }; }; - memory@0 { + memory@80000000 { device_type = "memory"; - reg = <0 0>; + reg = <0x80000000 0>; }; leds { From 4ae46efcff19445afbf49fe7038de6020f37fefe Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Oct 2016 12:00:21 +0100 Subject: [PATCH 107/503] ARM: OMAP2+: PRM: initialize en_uart4_mask and grpsel_uart4_mask In the case where has_uart4 is false, en_uart4_mask and grpsel_uart4_mask are not initialized and so any garbage value is being logically or'd into the write of PM_WKEN and OMAP3430_PM_MPUGRPSEL. Fix this by initializing these masks to zero. Signed-off-by: Colin Ian King Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/prm3xxx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c index 62680aad2126..718981bb80cd 100644 --- a/arch/arm/mach-omap2/prm3xxx.c +++ b/arch/arm/mach-omap2/prm3xxx.c @@ -319,6 +319,9 @@ void __init omap3_prm_init_pm(bool has_uart4, bool has_iva) if (has_uart4) { en_uart4_mask = OMAP3630_EN_UART4_MASK; grpsel_uart4_mask = OMAP3630_GRPSEL_UART4_MASK; + } else { + en_uart4_mask = 0; + grpsel_uart4_mask = 0; } /* Enable wakeups in PER */ From 0ab11d8ea46fd6faf67df4461c795091429a1496 Mon Sep 17 00:00:00 2001 From: Nicolae Rosia Date: Tue, 1 Nov 2016 11:49:25 +0200 Subject: [PATCH 108/503] ARM: OMAP2+: avoid NULL pointer dereference For OMAP4, volt_data is set in omap44xx_voltagedomains_init. If the SoC is neither OMAP443X or OMAP446X, we end up with a NULL in volt_data which causes a kernel oops. This is the case when booting OMAP4470. Signed-off-by: Nicolae Rosia Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/voltage.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/mach-omap2/voltage.c b/arch/arm/mach-omap2/voltage.c index cba8cada8c81..cd15dbd62671 100644 --- a/arch/arm/mach-omap2/voltage.c +++ b/arch/arm/mach-omap2/voltage.c @@ -87,6 +87,12 @@ int voltdm_scale(struct voltagedomain *voltdm, return -ENODATA; } + if (!voltdm->volt_data) { + pr_err("%s: No voltage data defined for vdd_%s\n", + __func__, voltdm->name); + return -ENODATA; + } + /* Adjust voltage to the exact voltage from the OPP table */ for (i = 0; voltdm->volt_data[i].volt_nominal != 0; i++) { if (voltdm->volt_data[i].volt_nominal >= target_volt) { From 725ed2238cdb3807c19e7edcb20fde8d0f91597f Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 25 Oct 2016 19:38:09 +0200 Subject: [PATCH 109/503] dts: omap5: board-common: add phandle to reference Palmas gpadc Will be needed for iio based drivers. Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 6365635fea5c..7cd1c674ceec 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -415,7 +415,7 @@ ti,backup-battery-charge-high-current; }; - gpadc { + gpadc: gpadc { compatible = "ti,palmas-gpadc"; interrupts = <18 0 16 0 From 0b68f1beea9ed2b31ff7873d5ed0cfbd087da0eb Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 25 Oct 2016 19:38:10 +0200 Subject: [PATCH 110/503] dts: omap5: board-common: enable twl6040 headset jack detection Signed-off-by: H. Nikolaus Schaller Reviewed-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 7cd1c674ceec..60a33c4b7b82 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -124,6 +124,7 @@ compatible = "ti,abe-twl6040"; ti,model = "omap5-uevm"; + ti,jack-detection; ti,mclk-freq = <19200000>; ti,mcpdm = <&mcpdm>; From 1219e3db7ecb59ab269e9c8b1a199d82b8d088bb Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 25 Oct 2016 19:38:11 +0200 Subject: [PATCH 111/503] ASoC: omap-abe-twl6040: fix typo in bindings documentation Signed-off-by: H. Nikolaus Schaller Acked-by: Peter Ujfalusi Acked-by: Rob Herring Signed-off-by: Tony Lindgren --- Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt b/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt index fd40c852d7c7..462b04e8209f 100644 --- a/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt +++ b/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt @@ -12,7 +12,7 @@ Required properties: Optional properties: - ti,dmic: phandle for the OMAP dmic node if the machine have it connected -- ti,jack_detection: Need to be present if the board capable to detect jack +- ti,jack-detection: Need to be present if the board capable to detect jack insertion, removal. Available audio endpoints for the audio-routing table: From 5d41ce29e3b91ef305f88d23f72b3359de329cec Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 3 Nov 2016 16:17:26 -0700 Subject: [PATCH 112/503] net: icmp6_send should use dst dev to determine L3 domain icmp6_send is called in response to some event. The skb may not have the device set (skb->dev is NULL), but it is expected to have a dst set. Update icmp6_send to use the dst on the skb to determine L3 domain. Fixes: ca254490c8dfd ("net: Add VRF support to IPv6 stack") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index bd59c343d35f..7370ad2e693a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -448,7 +448,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (__ipv6_addr_needs_scope_id(addr_type)) iif = skb->dev->ifindex; else - iif = l3mdev_master_ifindex(skb->dev); + iif = l3mdev_master_ifindex(skb_dst(skb)->dev); /* * Must not send error if the source does not uniquely From f3358507c11999c91abf54744658bccd49b5879c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 4 Nov 2016 12:55:36 +0200 Subject: [PATCH 113/503] virtio-net: drop legacy features in virtio 1 mode Virtio 1.0 spec says VIRTIO_F_ANY_LAYOUT and VIRTIO_NET_F_GSO are legacy-only feature bits. Do not negotiate them in virtio 1 mode. Note this is a spec violation so we need to backport it to stable/downstream kernels. Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fad84f3f4109..fd8b1e62301f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2038,23 +2038,33 @@ static struct virtio_device_id id_table[] = { { 0 }, }; +#define VIRTNET_FEATURES \ + VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ + VIRTIO_NET_F_MAC, \ + VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ + VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ + VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ + VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ + VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ + VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ + VIRTIO_NET_F_CTRL_MAC_ADDR, \ + VIRTIO_NET_F_MTU + static unsigned int features[] = { - VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, - VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC, - VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, - VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, - VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, - VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, - VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, - VIRTIO_NET_F_CTRL_MAC_ADDR, + VIRTNET_FEATURES, +}; + +static unsigned int features_legacy[] = { + VIRTNET_FEATURES, + VIRTIO_NET_F_GSO, VIRTIO_F_ANY_LAYOUT, - VIRTIO_NET_F_MTU, }; static struct virtio_driver virtio_net_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), + .feature_table_legacy = features_legacy, + .feature_table_size_legacy = ARRAY_SIZE(features_legacy), .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, From 8e0140a2d7c9f55b794a5fce22e05350a435b965 Mon Sep 17 00:00:00 2001 From: Fabian Mewes Date: Fri, 4 Nov 2016 13:16:14 +0100 Subject: [PATCH 114/503] Documentation: networking: dsa: Update tagging protocols Add Qualcomm QCA tagging introduced in cafdc45c9 to the list of supported protocols. Signed-off-by: Fabian Mewes Reviewed-by: Andrew Lunn Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 6d6c07cf1a9a..63912ef34606 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -67,13 +67,14 @@ Note that DSA does not currently create network interfaces for the "cpu" and Switch tagging protocols ------------------------ -DSA currently supports 4 different tagging protocols, and a tag-less mode as +DSA currently supports 5 different tagging protocols, and a tag-less mode as well. The different protocols are implemented in: net/dsa/tag_trailer.c: Marvell's 4 trailer tag mode (legacy) net/dsa/tag_dsa.c: Marvell's original DSA tag net/dsa/tag_edsa.c: Marvell's enhanced DSA tag net/dsa/tag_brcm.c: Broadcom's 4 bytes tag +net/dsa/tag_qca.c: Qualcomm's 2 bytes tag The exact format of the tag protocol is vendor specific, but in general, they all contain something which: From fd0285a39b1cb496f60210a9a00ad33a815603e7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 4 Nov 2016 15:11:57 -0400 Subject: [PATCH 115/503] fib_trie: Correct /proc/net/route off by one error The display of /proc/net/route has had a couple issues due to the fact that when I originally rewrote most of fib_trie I made it so that the iterator was tracking the next value to use instead of the current. In addition it had an off by 1 error where I was tracking the first piece of data as position 0, even though in reality that belonged to the SEQ_START_TOKEN. This patch updates the code so the iterator tracks the last reported position and key instead of the next expected position and key. In addition it shifts things so that all of the leaves start at 1 instead of trying to report leaves starting with offset 0 as being valid. With these two issues addressed this should resolve any off by one errors that were present in the display of /proc/net/route. Fixes: 25b97c016b26 ("ipv4: off-by-one in continuation handling in /proc/net/route") Cc: Andy Whitcroft Reported-by: Jason Baron Tested-by: Jason Baron Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 31cef3602585..4cff74d4133f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2413,22 +2413,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, struct key_vector *l, **tp = &iter->tnode; t_key key; - /* use cache location of next-to-find key */ + /* use cached location of previously found key */ if (iter->pos > 0 && pos >= iter->pos) { - pos -= iter->pos; key = iter->key; } else { - iter->pos = 0; + iter->pos = 1; key = 0; } - while ((l = leaf_walk_rcu(tp, key)) != NULL) { + pos -= iter->pos; + + while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) { key = l->key + 1; iter->pos++; - - if (--pos <= 0) - break; - l = NULL; /* handle unlikely case of a key wrap */ @@ -2437,7 +2434,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, } if (l) - iter->key = key; /* remember it */ + iter->key = l->key; /* remember it */ else iter->pos = 0; /* forget it */ @@ -2465,7 +2462,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) return fib_route_get_idx(iter, *pos); iter->pos = 0; - iter->key = 0; + iter->key = KEY_MAX; return SEQ_START_TOKEN; } @@ -2474,7 +2471,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_route_iter *iter = seq->private; struct key_vector *l = NULL; - t_key key = iter->key; + t_key key = iter->key + 1; ++*pos; @@ -2483,7 +2480,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) l = leaf_walk_rcu(&iter->tnode, key); if (l) { - iter->key = l->key + 1; + iter->key = l->key; iter->pos++; } else { iter->pos = 0; From 53f8d322234649b4d6f1515b20c127a577efd164 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 8 Nov 2016 14:00:45 +0800 Subject: [PATCH 116/503] gpio: pca953x: Fix corruption of other gpios in set_multiple. gpiod_set_array_value_complex does not clear the bits field. Therefore when the drivers set_multiple funciton is called bits outside the mask are undefined and can be either set or not. So bank_val needs to be masked with bank_mask before or with the reg_val cache. Cc: stable@vger.kernel.org Fixes: b4818afeacbd ("gpio: pca953x: Add set_multiple to allow multiple") Signed-off-by: Phil Reid Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pca953x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index e422568e14ad..4a8d0fe60e0c 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -380,6 +380,7 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc, if (bank_mask) { bank_val = bits[bank / sizeof(*bits)] >> ((bank % sizeof(*bits)) * 8); + bank_val &= bank_mask; reg_val[bank] = (reg_val[bank] & ~bank_mask) | bank_val; } } From 386377b5473043c09b2de40bfe5abfb0fc87e1b4 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 8 Nov 2016 13:18:11 +0800 Subject: [PATCH 117/503] gpio: pca953x: Move memcpy into mutex lock for set multiple Need to ensure that reg_output is not updated while setting multiple bits. This makes the mutex locking behaviour for the set_multiple call consistent with that of the set_value call. Cc: stable@vger.kernel.org Fixes: b4818afeacbd ("gpio: pca953x: Add set_multiple to allow multiple") Signed-off-by: Phil Reid Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pca953x.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 4a8d0fe60e0c..fe731f094257 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -372,8 +372,8 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc, bank_shift = fls((chip->gpio_chip.ngpio - 1) / BANK_SZ); - memcpy(reg_val, chip->reg_output, NBANK(chip)); mutex_lock(&chip->i2c_lock); + memcpy(reg_val, chip->reg_output, NBANK(chip)); for (bank = 0; bank < NBANK(chip); bank++) { bank_mask = mask[bank / sizeof(*mask)] >> ((bank % sizeof(*mask)) * 8); @@ -608,7 +608,6 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, if (client->irq && irq_base != -1 && (chip->driver_data & PCA_INT)) { - ret = pca953x_read_regs(chip, chip->regs->input, chip->irq_stat); if (ret) From 7ee7e87dfb158e79019ea1d5ea1b0e6f2bc93ee4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 7 Nov 2016 19:57:00 +0100 Subject: [PATCH 118/503] genirq: Use irq type from irqdata instead of irqdesc The type flags in the irq descriptor are there for historical reasons and only updated via irq_modify_status() or irq_set_type(). Both functions also update the type flags in irqdata. __setup_irq() is the only left over user of the type flags in the irq descriptor. If __setup_irq() is called with empty irq type flags, then the type flags are retrieved from irqdata. If an interrupt is shared, then the type flags are compared with the type flags stored in the irq descriptor. On x86 the ioapic does not have a irq_set_type() callback because the type is defined in the BIOS tables and cannot be changed. The type is stored in irqdata at setup time without updating the type data in the irq descriptor. As a result the comparison described above fails. There is no point in updating the irq descriptor flags because the only relevant storage is irqdata. Use the type flags from irqdata for both retrieval and comparison in __setup_irq() instead. Aside of that the print out in case of non matching type flags has the old and new type flags arguments flipped. Fix that as well. For correctness sake the flags stored in the irq descriptor should be removed, but this is beyond the scope of this bugfix and will be done in a later patch. Fixes: 4b357daed698 ("genirq: Look-up trigger type if not specified by caller") Reported-and-tested-by: Mika Westerberg Signed-off-by: Thomas Gleixner Cc: Marc Zyngier Cc: Jon Hunter Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1611072020360.3501@nanos Signed-off-by: Thomas Gleixner --- kernel/irq/manage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 9c4d30483264..6b669593e7eb 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1341,12 +1341,12 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } else if (new->flags & IRQF_TRIGGER_MASK) { unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK; - unsigned int omsk = irq_settings_get_trigger_mask(desc); + unsigned int omsk = irqd_get_trigger_type(&desc->irq_data); if (nmsk != omsk) /* hope the handler works with current trigger mode */ pr_warn("irq %d uses trigger mode %u; requested %u\n", - irq, nmsk, omsk); + irq, omsk, nmsk); } *old_ptr = new; From 8ae94224c9d72fc4d9aaac93b2d7833cf46d7141 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Nov 2016 19:39:38 +0100 Subject: [PATCH 119/503] kbuild: add -fno-PIE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Debian started to build the gcc with -fPIE by default so the kernel build ends before it starts properly with: |kernel/bounds.c:1:0: error: code model kernel does not support PIC mode Also add to KBUILD_AFLAGS due to: |gcc -Wp,-MD,arch/x86/entry/vdso/vdso32/.note.o.d … -mfentry -DCC_USING_FENTRY … vdso/vdso32/note.S |arch/x86/entry/vdso/vdso32/note.S:1:0: sorry, unimplemented: -mfentry isn’t supported for 32-bit in combination with -fpic Tagging it stable so it is possible to compile recent stable kernels as well. Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Michal Marek --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 512e47a53e9a..58fc5d935ce6 100644 --- a/Makefile +++ b/Makefile @@ -622,6 +622,8 @@ include arch/$(SRCARCH)/Makefile KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,) KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,) KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) +KBUILD_CFLAGS += $(call cc-option,-fno-PIE) +KBUILD_AFLAGS += $(call cc-option,-fno-PIE) ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION KBUILD_CFLAGS += $(call cc-option,-ffunction-sections,) From 8fbfef7f505bba60fb57078b7621270ee57cd1c4 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 3 Nov 2016 17:14:03 -0700 Subject: [PATCH 120/503] ipvs: use IPVS_CMD_ATTR_MAX for family.maxattr family.maxattr is the max index for policy[], the size of ops[] is determined with ARRAY_SIZE(). Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Pablo Neira Ayuso Signed-off-by: Cong Wang Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c3c809b2e712..a6e44ef2ec9a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2845,7 +2845,7 @@ static struct genl_family ip_vs_genl_family = { .hdrsize = 0, .name = IPVS_GENL_NAME, .version = IPVS_GENL_VERSION, - .maxattr = IPVS_CMD_MAX, + .maxattr = IPVS_CMD_ATTR_MAX, .netnsok = true, /* Make ipvsadm to work on netns */ }; From fb9c9649a1d0a65a8f94f784aa18252a0dd584c1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 29 Oct 2016 03:01:50 +0200 Subject: [PATCH 121/503] netfilter: connmark: ignore skbs with magic untracked conntrack objects The (percpu) untracked conntrack entries can end up with nonzero connmarks. The 'untracked' conntrack objects are merely a way to distinguish INVALID (i.e. protocol connection tracker says payload doesn't meet some requirements or packet was never seen by the connection tracking code) from packets that are intentionally not tracked (some icmpv6 types such as neigh solicitation, or by using 'iptables -j CT --notrack' option). Untracked conntrack objects are implementation detail, we might as well use invalid magic address instead to tell INVALID and UNTRACKED apart. Check skb->nfct for untracked dummy and behave as if skb->nfct is NULL. Reported-by: XU Tianwen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_connmark.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 69f78e96fdb4..b83e158e116a 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -44,7 +44,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) u_int32_t newmark; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) + if (ct == NULL || nf_ct_is_untracked(ct)) return XT_CONTINUE; switch (info->mode) { @@ -97,7 +97,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) + if (ct == NULL || nf_ct_is_untracked(ct)) return false; return ((ct->mark & info->mask) == info->mark) ^ info->invert; From 6114cc516dcc0d311badb83ad7db5aa4b611bea6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Nov 2016 14:44:42 +0100 Subject: [PATCH 122/503] netfilter: conntrack: fix CT target for UNSPEC helpers Thomas reports its not possible to attach the H.245 helper: iptables -t raw -A PREROUTING -p udp -j CT --helper H.245 iptables: No chain/target/match by that name. xt_CT: No such helper "H.245" This is because H.245 registers as NFPROTO_UNSPEC, but the CT target passes NFPROTO_IPV4/IPV6 to nf_conntrack_helper_try_module_get. We should treat UNSPEC as wildcard and ignore the l3num instead. Reported-by: Thomas Woerner Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 336e21559e01..7341adf7059d 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -138,9 +138,14 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) { - if (!strcmp(h->name, name) && - h->tuple.src.l3num == l3num && - h->tuple.dst.protonum == protonum) + if (strcmp(h->name, name)) + continue; + + if (h->tuple.src.l3num != NFPROTO_UNSPEC && + h->tuple.src.l3num != l3num) + continue; + + if (h->tuple.dst.protonum == protonum) return h; } } From e0df8cae6c16b9ba66a005079aa754b9eedc6efa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 4 Nov 2016 16:54:58 +0100 Subject: [PATCH 123/503] netfilter: conntrack: refine gc worker heuristics Nicolas Dichtel says: After commit b87a2f9199ea ("netfilter: conntrack: add gc worker to remove timed-out entries"), netlink conntrack deletion events may be sent with a huge delay. Nicolas further points at this line: goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS); and indeed, this isn't optimal at all. Rationale here was to ensure that we don't block other work items for too long, even if nf_conntrack_htable_size is huge. But in order to have some guarantee about maximum time period where a scan of the full conntrack table completes we should always use a fixed slice size, so that once every N scans the full table has been examined at least once. We also need to balance this vs. the case where the system is either idle (i.e., conntrack table (almost) empty) or very busy (i.e. eviction happens from packet path). So, after some discussion with Nicolas: 1. want hard guarantee that we scan entire table at least once every X s -> need to scan fraction of table (get rid of upper bound) 2. don't want to eat cycles on idle or very busy system -> increase interval if we did not evict any entries 3. don't want to block other worker items for too long -> make fraction really small, and prefer small scan interval instead 4. Want reasonable short time where we detect timed-out entry when system went idle after a burst of traffic, while not doing scans all the time. -> Store next gc scan in worker, increasing delays when no eviction happened and shrinking delay when we see timed out entries. The old gc interval is turned into a max number, scans can now happen every jiffy if stale entries are present. Longest possible time period until an entry is evicted is now 2 minutes in worst case (entry expires right after it was deemed 'not expired'). Reported-by: Nicolas Dichtel Signed-off-by: Florian Westphal Acked-by: Nicolas Dichtel Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 49 ++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index df2f5a3901df..0f87e5d21be7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -76,6 +76,7 @@ struct conntrack_gc_work { struct delayed_work dwork; u32 last_bucket; bool exiting; + long next_gc_run; }; static __read_mostly struct kmem_cache *nf_conntrack_cachep; @@ -83,9 +84,11 @@ static __read_mostly spinlock_t nf_conntrack_locks_all_lock; static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; +/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ #define GC_MAX_BUCKETS_DIV 64u -#define GC_MAX_BUCKETS 8192u -#define GC_INTERVAL (5 * HZ) +/* upper bound of scan intervals */ +#define GC_INTERVAL_MAX (2 * HZ) +/* maximum conntracks to evict per gc run */ #define GC_MAX_EVICTS 256u static struct conntrack_gc_work conntrack_gc_work; @@ -936,13 +939,13 @@ static noinline int early_drop(struct net *net, unsigned int _hash) static void gc_worker(struct work_struct *work) { unsigned int i, goal, buckets = 0, expired_count = 0; - unsigned long next_run = GC_INTERVAL; - unsigned int ratio, scanned = 0; struct conntrack_gc_work *gc_work; + unsigned int ratio, scanned = 0; + unsigned long next_run; gc_work = container_of(work, struct conntrack_gc_work, dwork.work); - goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS); + goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; i = gc_work->last_bucket; do { @@ -982,17 +985,47 @@ static void gc_worker(struct work_struct *work) if (gc_work->exiting) return; + /* + * Eviction will normally happen from the packet path, and not + * from this gc worker. + * + * This worker is only here to reap expired entries when system went + * idle after a busy period. + * + * The heuristics below are supposed to balance conflicting goals: + * + * 1. Minimize time until we notice a stale entry + * 2. Maximize scan intervals to not waste cycles + * + * Normally, expired_count will be 0, this increases the next_run time + * to priorize 2) above. + * + * As soon as a timed-out entry is found, move towards 1) and increase + * the scan frequency. + * In case we have lots of evictions next scan is done immediately. + */ ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90 || expired_count == GC_MAX_EVICTS) + if (ratio >= 90 || expired_count == GC_MAX_EVICTS) { + gc_work->next_gc_run = 0; next_run = 0; + } else if (expired_count) { + gc_work->next_gc_run /= 2U; + next_run = msecs_to_jiffies(1); + } else { + if (gc_work->next_gc_run < GC_INTERVAL_MAX) + gc_work->next_gc_run += msecs_to_jiffies(1); + + next_run = gc_work->next_gc_run; + } gc_work->last_bucket = i; - schedule_delayed_work(&gc_work->dwork, next_run); + queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); + gc_work->next_gc_run = GC_INTERVAL_MAX; gc_work->exiting = false; } @@ -1885,7 +1918,7 @@ int nf_conntrack_init_start(void) nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); conntrack_gc_work_init(&conntrack_gc_work); - schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL); + queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX); return 0; From 58c78e104d937c1f560fb10ed9bb2dcde0db4fcf Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 6 Nov 2016 14:40:01 +0800 Subject: [PATCH 124/503] netfilter: nf_tables: fix oops when inserting an element into a verdict map Dalegaard says: The following ruleset, when loaded with 'nft -f bad.txt' ----snip---- flush ruleset table ip inlinenat { map sourcemap { type ipv4_addr : verdict; } chain postrouting { ip saddr vmap @sourcemap accept } } add chain inlinenat test add element inlinenat sourcemap { 100.123.10.2 : jump test } ----snip---- results in a kernel oops: BUG: unable to handle kernel paging request at 0000000000001344 IP: [] nf_tables_check_loops+0x114/0x1f0 [nf_tables] [...] Call Trace: [] ? nft_data_init+0x13e/0x1a0 [nf_tables] [] nft_validate_register_store+0x60/0xb0 [nf_tables] [] nft_add_set_elem+0x545/0x5e0 [nf_tables] [] ? nft_table_lookup+0x30/0x60 [nf_tables] [] ? nla_strcmp+0x40/0x50 [] nf_tables_newsetelem+0x11e/0x210 [nf_tables] [] ? nla_validate+0x60/0x80 [] nfnetlink_rcv+0x354/0x5a7 [nfnetlink] Because we forget to fill the net pointer in bind_ctx, so dereferencing it may cause kernel crash. Reported-by: Dalegaard Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7d6a626b08f1..026581b04ea8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3568,6 +3568,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, dreg = nft_type_to_reg(set->dtype); list_for_each_entry(binding, &set->bindings, list) { struct nft_ctx bind_ctx = { + .net = ctx->net, .afi = ctx->afi, .table = ctx->table, .chain = (struct nft_chain *)binding->chain, From 34a515d27c6573b6f550877b30dd5e0f440c3d8f Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 4 Jan 2016 16:34:22 -0800 Subject: [PATCH 125/503] drm/fsl-dcu: do not update when modifying irq registers The IRQ status and mask registers are not "double buffered" according to the reference manual. Hence, there is no extra transfer/update write needed when modifying these registers. Signed-off-by: Stefan Agner --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c index e04efbed1a54..cc2fde2ae5ef 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c @@ -59,8 +59,6 @@ static int fsl_dcu_drm_irq_init(struct drm_device *dev) regmap_write(fsl_dev->regmap, DCU_INT_STATUS, 0); regmap_write(fsl_dev->regmap, DCU_INT_MASK, ~0); - regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE, - DCU_UPDATE_MODE_READREG); return ret; } @@ -139,8 +137,6 @@ static irqreturn_t fsl_dcu_drm_irq(int irq, void *arg) drm_handle_vblank(dev, 0); regmap_write(fsl_dev->regmap, DCU_INT_STATUS, int_status); - regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE, - DCU_UPDATE_MODE_READREG); return IRQ_HANDLED; } From 93daeeca2c9472a47d419884a64f6ca2b7f006e4 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 5 Sep 2016 19:05:12 -0700 Subject: [PATCH 126/503] drm/fsl-dcu: update all registers on flush Use the UPDATE_MODE READREG bit to initiate a register transfer on flush. This makes sure that we flush all registers only once for all planes. Signed-off-by: Stefan Agner --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c | 5 +++++ drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c index b2d5e188b1b8..2ea9dbd9be30 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c @@ -25,8 +25,13 @@ static void fsl_dcu_drm_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { + struct drm_device *dev = crtc->dev; + struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; struct drm_pending_vblank_event *event = crtc->state->event; + regmap_write(fsl_dev->regmap, + DCU_UPDATE_MODE, DCU_UPDATE_MODE_READREG); + if (event) { crtc->state->event = NULL; diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c index 9e6f7d8112b3..a99f48847420 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c @@ -160,11 +160,6 @@ static void fsl_dcu_drm_plane_atomic_update(struct drm_plane *plane, DCU_LAYER_POST_SKIP(0) | DCU_LAYER_PRE_SKIP(0)); } - regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE, - DCU_MODE_DCU_MODE_MASK, - DCU_MODE_DCU_MODE(DCU_MODE_NORMAL)); - regmap_write(fsl_dev->regmap, - DCU_UPDATE_MODE, DCU_UPDATE_MODE_READREG); return; } From 3d6f37102bd6e4b55a7f336d44974c0bd1c22a15 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 31 Oct 2016 09:51:19 -0700 Subject: [PATCH 127/503] drm/fsl-dcu: disable planes before disabling CRTC After disabling and reenabling the CRTC the DCU sometimes got stuck displaying the whole screen with a solid color. Disabling and reenabling the CRTC did not recover from the situation. This was often reproducable by just restarting the X-Server. The disabling sequence is not explicitly documented. But it turns out that disabling the planes before disabling the CRTC seems to prevent the above situation from happening. Use the callback ->atomic_disable instead of ->disable which allows to use the drm_atomic_helper_disable_planes_on_crtc() helper to disable planes before disabling the controller. Signed-off-by: Stefan Agner --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c index 2ea9dbd9be30..deb57435cc89 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c @@ -44,11 +44,15 @@ static void fsl_dcu_drm_crtc_atomic_flush(struct drm_crtc *crtc, } } -static void fsl_dcu_drm_disable_crtc(struct drm_crtc *crtc) +static void fsl_dcu_drm_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_crtc_state *old_crtc_state) { struct drm_device *dev = crtc->dev; struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; + /* always disable planes on the CRTC */ + drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, true); + drm_crtc_vblank_off(crtc); regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE, @@ -127,8 +131,8 @@ static void fsl_dcu_drm_crtc_mode_set_nofb(struct drm_crtc *crtc) } static const struct drm_crtc_helper_funcs fsl_dcu_drm_crtc_helper_funcs = { + .atomic_disable = fsl_dcu_drm_crtc_atomic_disable, .atomic_flush = fsl_dcu_drm_crtc_atomic_flush, - .disable = fsl_dcu_drm_disable_crtc, .enable = fsl_dcu_drm_crtc_enable, .mode_set_nofb = fsl_dcu_drm_crtc_mode_set_nofb, }; From 29f0c9edbdd98a977a4c629f411260f6e0356c67 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 8 Nov 2016 17:28:02 +0100 Subject: [PATCH 128/503] arm64: dts: marvell: Fix typo in label name on Armada 37xx The label names of the peripheral clocks have a typo. Fix it before it is more widely used. Reported-by: Thomas Petazzoni Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-37xx.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index c4762538ec01..e9bd58793464 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -105,7 +105,7 @@ status = "disabled"; }; - nb_perih_clk: nb-periph-clk@13000{ + nb_periph_clk: nb-periph-clk@13000 { compatible = "marvell,armada-3700-periph-clock-nb"; reg = <0x13000 0x100>; clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>, @@ -113,7 +113,7 @@ #clock-cells = <1>; }; - sb_perih_clk: sb-periph-clk@18000{ + sb_periph_clk: sb-periph-clk@18000 { compatible = "marvell,armada-3700-periph-clock-sb"; reg = <0x18000 0x100>; clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>, From 2ec27be33898effa47fcb2cd45abb552a97fac89 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Tue, 6 Sep 2016 19:41:12 +0200 Subject: [PATCH 129/503] arm64: dts: marvell: fix clocksource for CP110 slave SPI0 I2C and SPI interfaces share common clock trees within the CP110 HW block. It occurred that SPI0 interface has wrong clock assignment in the device tree, which is fixed in this commit to a proper value. Fixes: c749b8d9de32 ("arm64: dts: marvell: add description for the ...") Signed-off-by: Marcin Wojtas Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 842fb333285c..565b3cb3d7ff 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -131,7 +131,7 @@ #address-cells = <0x1>; #size-cells = <0x0>; cell-index = <1>; - clocks = <&cps_syscon0 0 3>; + clocks = <&cps_syscon0 1 21>; status = "disabled"; }; From 8d897006fe9206d64cbe353310be26d7c911e69d Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Tue, 8 Nov 2016 17:31:32 +0100 Subject: [PATCH 130/503] arm64: dts: marvell: add unique identifiers for Armada A8k SPI controllers Enabling SPI controllers, which are attached to different busses inside an SoC, may result in overlapping enumeration and cause sysfs registration failure. Example log after enabling two controllers on Armada 8040 SoC with same identifiers: [ 3.740415] sysfs: cannot create duplicate filename '/class/spi_master/spi0' [ 3.747510] ------------[ cut here ]------------ [ 3.752145] WARNING: at fs/sysfs/dir.c:31 [...] [ 4.002299] orion_spi: probe of f4700600.spi failed with error -17 spi-orion driver offers dedicated DT property ('cell-index'), that allow setting unique identifiers. Recently added support for CP110-slave HW block introduced two new SPI controllers' nodes with same ID as ones from CP110-master. This commit fixes the issue by assigning different 'cell-index' values for CP110-slave SPI controllers. Fixes: 4eef78a0091b ("arm64: dts: marvell: add description for the slave CP110 in Armada 8K") Signed-off-by: Marcin Wojtas Acked-by: Thomas Petazzoni Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 565b3cb3d7ff..6bf9e241179b 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -130,7 +130,7 @@ reg = <0x700600 0x50>; #address-cells = <0x1>; #size-cells = <0x0>; - cell-index = <1>; + cell-index = <3>; clocks = <&cps_syscon0 1 21>; status = "disabled"; }; @@ -140,7 +140,7 @@ reg = <0x700680 0x50>; #address-cells = <1>; #size-cells = <0>; - cell-index = <2>; + cell-index = <4>; clocks = <&cps_syscon0 1 21>; status = "disabled"; }; From 9cba9844547731d2f14d79485c43192ffaa37b76 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 25 Oct 2016 01:21:10 +0900 Subject: [PATCH 131/503] perf hist browser: Fix hierarchy column counts The perf report/top on TUI supports horizontal scrolling using LEFT and RIGHT keys. But it calculate the number of columns incorrectly when hierarchy mode is enabled so that keep pressing RIGHT key can make the output disappeared. In the hierarchy mode, all sort keys are collapsed into a single column, so it needs to be applied when calculating column numbers. Reported-and-Tested-by: Markus Trippelsdorf Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161024162110.17918-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 4ffff7be9299..5adedc1a09d3 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2076,8 +2076,21 @@ void hist_browser__init(struct hist_browser *browser, browser->b.use_navkeypressed = true; browser->show_headers = symbol_conf.show_hist_headers; - hists__for_each_format(hists, fmt) + if (symbol_conf.report_hierarchy) { + struct perf_hpp_list_node *fmt_node; + + /* count overhead columns (in the first node) */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) + ++browser->b.columns; + + /* add a single column for whole hierarchy sort keys*/ ++browser->b.columns; + } else { + hists__for_each_format(hists, fmt) + ++browser->b.columns; + } hists__reset_column_width(hists); } From 3d9f4683929a968dc9b9493f4e608b109ad292a2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 8 Nov 2016 22:08:30 +0900 Subject: [PATCH 132/503] perf hists browser: Fix indentation of folded sign on --hierarchy It should indent 2 spaces for folded sign and a whitespace. Signed-off-by: Namhyung Kim Tested-by: Markus Trippelsdorf Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161108130833.9263-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 5adedc1a09d3..225ef2a15a13 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1337,8 +1337,8 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, } if (first) { - ui_browser__printf(&browser->b, "%c", folded_sign); - width--; + ui_browser__printf(&browser->b, "%c ", folded_sign); + width -= 2; first = false; } else { ui_browser__printf(&browser->b, " "); @@ -1555,7 +1555,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows int indent = hists->nr_hpp_node - 2; bool first_node, first_col; - ret = scnprintf(buf, size, " "); + ret = scnprintf(buf, size, " "); if (advance_hpp_check(&dummy_hpp, ret)) return ret; From 131d51eb1d17aac3a604cf929fd99ff4dd34f495 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 8 Nov 2016 22:08:31 +0900 Subject: [PATCH 133/503] perf hists browser: Show folded sign properly on --hierarchy When horizontal scrolling is used in hierarchy mode, the folded signed disappears at the right most column. Committer note: To test it, run 'perf top --hierarchy, see the '+' symbol at the first column, then press the right arrow key, the '+' symbol will disappear, this patch fixes that. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Tested-by: Markus Trippelsdorf Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161108130833.9263-3-namhyung@kernel.org [ Move 'width -= 2' invariant to right after the if/else ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 225ef2a15a13..e767fbd17ad2 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1381,7 +1381,13 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, } perf_hpp_list__for_each_format(entry->hpp_list, fmt) { - ui_browser__write_nstring(&browser->b, "", 2); + if (first) { + ui_browser__printf(&browser->b, "%c ", folded_sign); + first = false; + } else { + ui_browser__write_nstring(&browser->b, "", 2); + } + width -= 2; /* From b9bf911e990a189f89147ee6b66660a153ed0125 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 8 Nov 2016 22:08:32 +0900 Subject: [PATCH 134/503] perf hists browser: Fix column indentation on --hierarchy When horizontall scrolling is used in hierarchy mode, the the right most column has unnecessary indentation. Actually it's needed only if some of left (overhead) columns were shown. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Tested-by: Markus Trippelsdorf Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161108130833.9263-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e767fbd17ad2..a53fef0c673b 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1361,8 +1361,10 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, width -= hpp.buf - s; } - ui_browser__write_nstring(&browser->b, "", hierarchy_indent); - width -= hierarchy_indent; + if (!first) { + ui_browser__write_nstring(&browser->b, "", hierarchy_indent); + width -= hierarchy_indent; + } if (column >= browser->b.horiz_scroll) { char s[2048]; @@ -1565,6 +1567,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows if (advance_hpp_check(&dummy_hpp, ret)) return ret; + first_node = true; /* the first hpp_list_node is for overhead columns */ fmt_node = list_first_entry(&hists->hpp_formats, struct perf_hpp_list_node, list); @@ -1579,12 +1582,16 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " "); if (advance_hpp_check(&dummy_hpp, ret)) break; + + first_node = false; } - ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s", - indent * HIERARCHY_INDENT, ""); - if (advance_hpp_check(&dummy_hpp, ret)) - return ret; + if (!first_node) { + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s", + indent * HIERARCHY_INDENT, ""); + if (advance_hpp_check(&dummy_hpp, ret)) + return ret; + } first_node = true; list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { From c72ab446cac1d6c9551fd26c4cfef1b2fc5041fd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 8 Nov 2016 22:08:33 +0900 Subject: [PATCH 135/503] perf hists: Fix column length on --hierarchy Markus reported that there's a weird behavior on perf top --hierarchy regarding the column length. Looking at the code, I found a dubious code which affects the symptoms. When --hierarchy option is used, the last column length might be inaccurate since it skips to update the length on leaf entries. I cannot remember why it did and looks like a leftover from previous version during the development. Anyway, updating the column length often is not harmful. So let's move the code out. Reported-and-Tested-by: Markus Trippelsdorf Signed-off-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Fixes: 1a3906a7e6b9 ("perf hists: Resort hist entries with hierarchy") Link: http://lkml.kernel.org/r/20161108130833.9263-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b02992efb513..a69f027368ef 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1600,18 +1600,18 @@ static void hists__hierarchy_output_resort(struct hists *hists, if (prog) ui_progress__update(prog, 1); + hists->nr_entries++; + if (!he->filtered) { + hists->nr_non_filtered_entries++; + hists__calc_col_len(hists, he); + } + if (!he->leaf) { hists__hierarchy_output_resort(hists, prog, &he->hroot_in, &he->hroot_out, min_callchain_hits, use_callchain); - hists->nr_entries++; - if (!he->filtered) { - hists->nr_non_filtered_entries++; - hists__calc_col_len(hists, he); - } - continue; } From b0b6e86846093c5f8820386bc01515f857dd8faa Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 8 Nov 2016 09:35:06 +0100 Subject: [PATCH 136/503] x86/cpu/AMD: Fix cpu_llc_id for AMD Fam17h systems cpu_llc_id (Last Level Cache ID) derivation on AMD Fam17h has an underflow bug when extracting the socket_id value. It starts from 0 so subtracting 1 from it will result in an invalid value. This breaks scheduling topology later on since the cpu_llc_id will be incorrect. For example, the the cpu_llc_id of the *other* CPU in the loops in set_cpu_sibling_map() underflows and we're generating the funniest thread_siblings masks and then when I run 8 threads of nbench, they get spread around the LLC domains in a very strange pattern which doesn't give you the normal scheduling spread one would expect for performance. Other things like EDAC use cpu_llc_id so they will be b0rked too. So, the APIC ID is preset in APICx020 for bits 3 and above: they contain the core complex, node and socket IDs. The LLC is at the core complex level so we can find a unique cpu_llc_id by right shifting the APICID by 3 because then the least significant bit will be the Core Complex ID. Tested-by: Borislav Petkov Signed-off-by: Yazen Ghannam [ Cleaned up and extended the commit message. ] Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Cc: # v4.4.. Cc: Aravind Gopalakrishnan Cc: Linus Torvalds Cc: Peter Zijlstra Fixes: 3849e91f571d ("x86/AMD: Fix last level cache topology for AMD Fam17h systems") Link: http://lkml.kernel.org/r/20161108083506.rvqb5h4chrcptj7d@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b81fe2d63e15..1e81a37c034e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -347,7 +347,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP unsigned bits; int cpu = smp_processor_id(); - unsigned int socket_id, core_complex_id; bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ @@ -365,10 +364,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) if (c->x86 != 0x17 || !cpuid_edx(0x80000006)) return; - socket_id = (c->apicid >> bits) - 1; - core_complex_id = (c->apicid & ((1 << bits) - 1)) >> 3; - - per_cpu(cpu_llc_id, cpu) = (socket_id << 3) | core_complex_id; + per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; #endif } From aa5fd0fb77486b8a6764ead8627baa14790e4280 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 4 Nov 2016 10:28:49 +0800 Subject: [PATCH 137/503] driver: macvlan: Destroy new macvlan port if macvlan_common_newlink failed. When there is no existing macvlan port in lowdev, one new macvlan port would be created. But it doesn't be destoried when something failed later. It casues some memleak. Now add one flag to indicate if new macvlan port is created. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 3234fcdea317..d2d6f12a112f 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1278,6 +1278,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, struct net_device *lowerdev; int err; int macmode; + bool create = false; if (!tb[IFLA_LINK]) return -EINVAL; @@ -1304,12 +1305,18 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, err = macvlan_port_create(lowerdev); if (err < 0) return err; + create = true; } port = macvlan_port_get_rtnl(lowerdev); /* Only 1 macvlan device can be created in passthru mode */ - if (port->passthru) - return -EINVAL; + if (port->passthru) { + /* The macvlan port must be not created this time, + * still goto destroy_macvlan_port for readability. + */ + err = -EINVAL; + goto destroy_macvlan_port; + } vlan->lowerdev = lowerdev; vlan->dev = dev; @@ -1325,24 +1332,28 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]); if (vlan->mode == MACVLAN_MODE_PASSTHRU) { - if (port->count) - return -EINVAL; + if (port->count) { + err = -EINVAL; + goto destroy_macvlan_port; + } port->passthru = true; eth_hw_addr_inherit(dev, lowerdev); } if (data && data[IFLA_MACVLAN_MACADDR_MODE]) { - if (vlan->mode != MACVLAN_MODE_SOURCE) - return -EINVAL; + if (vlan->mode != MACVLAN_MODE_SOURCE) { + err = -EINVAL; + goto destroy_macvlan_port; + } macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]); err = macvlan_changelink_sources(vlan, macmode, data); if (err) - return err; + goto destroy_macvlan_port; } err = register_netdevice(dev); if (err < 0) - return err; + goto destroy_macvlan_port; dev->priv_flags |= IFF_MACVLAN; err = netdev_upper_dev_link(lowerdev, dev); @@ -1357,7 +1368,9 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, unregister_netdev: unregister_netdevice(dev); - +destroy_macvlan_port: + if (create) + macvlan_port_destroy(port->dev); return err; } EXPORT_SYMBOL_GPL(macvlan_common_newlink); From 3023898b7d4aac65987bd2f485cc22390aae6f78 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Fri, 4 Nov 2016 15:36:49 -0400 Subject: [PATCH 138/503] sock: fix sendmmsg for partial sendmsg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not send the next message in sendmmsg for partial sendmsg invocations. sendmmsg assumes that it can continue sending the next message when the return value of the individual sendmsg invocations is positive. It results in corrupting the data for TCP, SCTP, and UNIX streams. For example, sendmmsg([["abcd"], ["efgh"]]) can result in a stream of "aefgh" if the first sendmsg invocation sends only the first byte while the second sendmsg goes through. Datagram sockets either send the entire datagram or fail, so this patch affects only sockets of type SOCK_STREAM and SOCK_SEQPACKET. Fixes: 228e548e6020 ("net: Add sendmmsg socket system call") Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Signed-off-by: Willem de Bruijn Signed-off-by: Neal Cardwell Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/socket.c b/net/socket.c index 5a9bf5ee2464..272518b087c8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2038,6 +2038,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (err) break; ++datagrams; + if (msg_data_left(&msg_sys)) + break; cond_resched(); } From fb56be83e43d0bb0cc9e8c35a6a9cac853231ba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Fri, 4 Nov 2016 14:51:54 -0700 Subject: [PATCH 139/503] net-ipv6: on device mtu change do not add mtu to mtu-less routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Routes can specify an mtu explicitly or inherit the mtu from the underlying device - this inheritance is implemented in dst->ops->mtu handlers ip6_mtu() and ip6_blackhole_mtu(). Currently changing the mtu of a device adds mtu explicitly to routes using that device. ie. # ip link set dev lo mtu 65536 # ip -6 route add local 2000::1 dev lo # ip -6 route get 2000::1 local 2000::1 dev lo table local src ... metric 1024 pref medium # ip link set dev lo mtu 65535 # ip -6 route get 2000::1 local 2000::1 dev lo table local src ... metric 1024 mtu 65535 pref medium # ip link set dev lo mtu 65536 # ip -6 route get 2000::1 local 2000::1 dev lo table local src ... metric 1024 mtu 65536 pref medium # ip -6 route del local 2000::1 After this patch the route entry no longer changes unless it already has an mtu. There is no need: this inheritance is already done in ip6_mtu() # ip link set dev lo mtu 65536 # ip -6 route add local 2000::1 dev lo # ip -6 route add local 2000::2 dev lo mtu 2000 # ip -6 route get 2000::1; ip -6 route get 2000::2 local 2000::1 dev lo table local src ... metric 1024 pref medium local 2000::2 dev lo table local src ... metric 1024 mtu 2000 pref medium # ip link set dev lo mtu 65535 # ip -6 route get 2000::1; ip -6 route get 2000::2 local 2000::1 dev lo table local src ... metric 1024 pref medium local 2000::2 dev lo table local src ... metric 1024 mtu 2000 pref medium # ip link set dev lo mtu 1501 # ip -6 route get 2000::1; ip -6 route get 2000::2 local 2000::1 dev lo table local src ... metric 1024 pref medium local 2000::2 dev lo table local src ... metric 1024 mtu 1501 pref medium # ip link set dev lo mtu 65536 # ip -6 route get 2000::1; ip -6 route get 2000::2 local 2000::1 dev lo table local src ... metric 1024 pref medium local 2000::2 dev lo table local src ... metric 1024 mtu 65536 pref medium # ip -6 route del local 2000::1 # ip -6 route del local 2000::2 This is desirable because changing device mtu and then resetting it to the previous value shouldn't change the user visible routing table. Signed-off-by: Maciej Żenczykowski CC: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7403d90dcb38..1b57e11e6e0d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2761,6 +2761,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) PMTU discouvery. */ if (rt->dst.dev == arg->dev && + dst_metric_raw(&rt->dst, RTAX_MTU) && !dst_metric_locked(&rt->dst, RTAX_MTU)) { if (rt->rt6i_flags & RTF_CACHE) { /* For RTF_CACHE with rt6i_pmtu == 0 From f91d718156fe93d0cf684cacf5f247c35a825d79 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 6 Nov 2016 18:05:06 +0200 Subject: [PATCH 140/503] Revert "net/mlx4_en: Fix panic during reboot" This reverts commit 9d2afba058722d40cc02f430229c91611c0e8d16. The original issue would possibly exist if an external module tried calling our "ethtool_ops" without checking if it still exists. The right way of solving it is by simply doing the check in the caller side. Currently, no action is required as there's no such use case. Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 12c99a2655f2..3a47e83d3e07 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2202,7 +2202,6 @@ void mlx4_en_destroy_netdev(struct net_device *dev) if (!shutdown) free_netdev(dev); - dev->ethtool_ops = NULL; } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) From d667f78514c656a6a8bf0b3d6134a7fe5cd4d317 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 7 Nov 2016 17:57:56 +0800 Subject: [PATCH 141/503] bna: Add synchronization for tx ring. We received two reports of BUG_ON in bnad_txcmpl_process() where hw_consumer_index appeared to be ahead of producer_index. Out of order write/read of these variables could explain these reports. bnad_start_xmit(), as a producer of tx descriptors, has a few memory barriers sprinkled around writes to producer_index and the device's doorbell but they're not paired with anything in bnad_txcmpl_process(), a consumer. Since we are synchronizing with a device, we must use mandatory barriers, not smp_*. Also, I didn't see the purpose of the last smp_mb() in bnad_start_xmit(). Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index f9df4b5ae90e..f42f672b0e7e 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -177,6 +177,7 @@ bnad_txcmpl_process(struct bnad *bnad, struct bna_tcb *tcb) return 0; hw_cons = *(tcb->hw_consumer_index); + rmb(); cons = tcb->consumer_index; q_depth = tcb->q_depth; @@ -3094,7 +3095,7 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) BNA_QE_INDX_INC(prod, q_depth); tcb->producer_index = prod; - smp_mb(); + wmb(); if (unlikely(!test_bit(BNAD_TXQ_TX_STARTED, &tcb->flags))) return NETDEV_TX_OK; @@ -3102,7 +3103,6 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) skb_tx_timestamp(skb); bna_txq_prod_indx_doorbell(tcb); - smp_mb(); return NETDEV_TX_OK; } From cdb26d3387f0cdf7b2a2eea581385173547ef21f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 7 Nov 2016 13:53:27 +0100 Subject: [PATCH 142/503] net: bgmac: fix reversed checks for clock control flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes regression introduced by patch adding feature flags. It was already reported and patch followed (it got accepted) but it appears it was incorrect. Instead of fixing reversed condition it broke a good one. This patch was verified to actually fix SoC hanges caused by bgmac on BCM47186B0. Fixes: db791eb2970b ("net: ethernet: bgmac: convert to feature flags") Fixes: 4af1474e6198 ("net: bgmac: Fix errant feature flag check") Cc: Jon Mason Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 91cbf92de971..49f4cafe5438 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1049,9 +1049,9 @@ static void bgmac_enable(struct bgmac *bgmac) mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >> BGMAC_DS_MM_SHIFT; - if (!(bgmac->feature_flags & BGMAC_FEAT_CLKCTLST) || mode != 0) + if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST || mode != 0) bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT); - if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST && mode == 2) + if (!(bgmac->feature_flags & BGMAC_FEAT_CLKCTLST) && mode == 2) bgmac_cco_ctl_maskset(bgmac, 1, ~0, BGMAC_CHIPCTL_1_RXC_DLL_BYPASS); From d49597fd3bc7d9534de55e9256767f073be1b33a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 9 Nov 2016 16:35:51 +0100 Subject: [PATCH 143/503] x86/cpu: Deal with broken firmware (VMWare/XEN) Both ACPI and MP specifications require that the APIC id in the respective tables must be the same as the APIC id in CPUID. The kernel retrieves the physical package id from the APIC id during the ACPI/MP table scan and builds the physical to logical package map. The physical package id which is used after a CPU comes up is retrieved from CPUID. So we rely on ACPI/MP tables and CPUID agreeing in that respect. There exist VMware and XEN implementations which violate the spec. As a result the physical to logical package map, which relies on the ACPI/MP tables does not work on those systems, because the CPUID initialized physical package id does not match the firmware id. This causes system crashes and malfunction due to invalid package mappings. The only way to cure this is to sanitize the physical package id after the CPUID enumeration and yell when the APIC ids are different. Fix up the initial APIC id, which is fine as it is only used printout purposes. If the physical package IDs differ yell and use the package information from the ACPI/MP tables so the existing logical package map just works. Chas provided the resulting dmesg output for his affected 4 virtual sockets, 1 core per socket VM: [Firmware Bug]: CPU1: APIC id mismatch. Firmware: 1 CPUID: 2 [Firmware Bug]: CPU1: Using firmware package id 1 instead of 2 .... Reported-and-tested-by: "Charles (Chas) Williams" , Reported-by: M. Vefa Bicakci Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Borislav Petkov Cc: Alok Kataria Cc: Boris Ostrovsky Cc: #4.6+ Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1611091613540.3501@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9bd910a7dd0a..cc9e980c68ec 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -978,6 +978,35 @@ static void x86_init_cache_qos(struct cpuinfo_x86 *c) } } +/* + * The physical to logical package id mapping is initialized from the + * acpi/mptables information. Make sure that CPUID actually agrees with + * that. + */ +static void sanitize_package_id(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned int pkg, apicid, cpu = smp_processor_id(); + + apicid = apic->cpu_present_to_apicid(cpu); + pkg = apicid >> boot_cpu_data.x86_coreid_bits; + + if (apicid != c->initial_apicid) { + pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n", + cpu, apicid, c->initial_apicid); + c->initial_apicid = apicid; + } + if (pkg != c->phys_proc_id) { + pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n", + cpu, pkg, c->phys_proc_id); + c->phys_proc_id = pkg; + } + c->logical_proc_id = topology_phys_to_logical_pkg(pkg); +#else + c->logical_proc_id = 0; +#endif +} + /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -1103,8 +1132,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif - /* The boot/hotplug time assigment got cleared, restore it */ - c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id); + sanitize_package_id(c); } /* From 82031ea29e454b574bc6f49a33683a693ca5d907 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Nov 2016 19:39:39 +0100 Subject: [PATCH 144/503] scripts/has-stack-protector: add -fno-PIE Adding -no-PIE to the fstack protector check. -no-PIE was introduced before -fstack-protector so there is no need for a runtime check. Without it the build stops: |Cannot use CONFIG_CC_STACKPROTECTOR_STRONG: -fstack-protector-strong available but compiler is broken due to -mcmodel=kernel + -fPIE if -fPIE is enabled by default. Tagging it stable so it is possible to compile recent stable kernels as well. Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Michal Marek --- scripts/gcc-x86_64-has-stack-protector.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh index 973e8c141567..17867e723a51 100755 --- a/scripts/gcc-x86_64-has-stack-protector.sh +++ b/scripts/gcc-x86_64-has-stack-protector.sh @@ -1,6 +1,6 @@ #!/bin/sh -echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs" +echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs" if [ "$?" -eq "0" ] ; then echo y else From 90944e40ba1838de4b2a9290cf273f9d76bd3bdd Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Nov 2016 19:39:40 +0100 Subject: [PATCH 145/503] x86/kexec: add -fno-PIE If the gcc is configured to do -fPIE by default then the build aborts later with: | Unsupported relocation type: unknown type rel type name (29) Tagging it stable so it is possible to compile recent stable kernels as well. Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Michal Marek --- arch/x86/purgatory/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index ac58c1616408..555b9fa0ad43 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -16,6 +16,7 @@ KCOV_INSTRUMENT := n KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large KBUILD_CFLAGS += -m$(BITS) +KBUILD_CFLAGS += $(call cc-option,-fno-PIE) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) From cc6acc11cad1eb1ae39707a3a6e4a97fafbeeabd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Nov 2016 15:34:05 +1100 Subject: [PATCH 146/503] kbuild: be more careful about matching preprocessed asm ___EXPORT_SYMBOL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CRC code for asm exports grabs the preprocessed asm, finds the ___EXPORT_SYMBOL and turns those into EXPORT_SYMBOL in a C program that can be preprocessed and parsed to create the CRC signatures from the type. The existing regex matching and replacement is too strict, and doesn't deal well with whitespace among other things. The line " EXPORT_SYMBOL(sym)" in a .S file would not match due to initial whitespace, for example, which resulted in x86's ___preempt_schedule failing to get CRCs. Reported-by: Philip Müller Signed-off-by: Nicholas Piggin Signed-off-by: Michal Marek --- scripts/Makefile.build | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 3e223c264469..7675d11ee65e 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -332,8 +332,8 @@ cmd_gensymtypes_S = \ (echo "\#include " ; \ echo "\#include " ; \ $(CPP) $(a_flags) $< | \ - grep ^___EXPORT_SYMBOL | \ - sed 's/___EXPORT_SYMBOL \([a-zA-Z0-9_]*\),.*/EXPORT_SYMBOL(\1);/' ) | \ + grep "\<___EXPORT_SYMBOL\>" | \ + sed 's/.*___EXPORT_SYMBOL[[:space:]]*\([a-zA-Z0-9_]*\)[[:space:]]*,.*/EXPORT_SYMBOL(\1);/' ) | \ $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | \ $(GENKSYMS) $(if $(1), -T $(2)) \ $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \ From 80513a2b9f0448eadd10ae81a42229b33ef451fb Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Fri, 21 Oct 2016 08:34:59 -0500 Subject: [PATCH 147/503] ARM: omap3: Add missing memory node in SOM-LV The skeleton.dtsi file was removed in ARM64 for different reasons as explained in commit ("3ebee5a2e141 arm64: dts: kill skeleton.dtsi"). commit ("766a1fe78fc3 ARM: omap3: Add missing memory node") had fixes for Torpedo and Overo boards, but this SOM-LV was missed. This should help prevent the DTC warning: "Node /memory has a reg or ranges property, but no unit name" Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-som-lv.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index 0ff1c2de95bf..26cce4d18405 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -13,6 +13,11 @@ }; }; + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0>; + }; + wl12xx_vmmc: wl12xx_vmmc { compatible = "regulator-fixed"; regulator-name = "vwl1271"; From 3e884493448131179a5b7cae1ddca1028ffaecc8 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 7 Nov 2016 10:51:40 -0600 Subject: [PATCH 148/503] net: qcom/emac: configure the external phy to allow pause frames Pause frames are used to enable flow control. A MAC can send and receive pause frames in order to throttle traffic. However, the PHY must be configured to allow those frames to pass through. Reviewed-by: Florian Fainelli Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 6fb3bee904d3..70a55dcc431d 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1003,6 +1003,12 @@ int emac_mac_up(struct emac_adapter *adpt) writel((u32)~DIS_INT, adpt->base + EMAC_INT_STATUS); writel(adpt->irq.mask, adpt->base + EMAC_INT_MASK); + /* Enable pause frames. Without this feature, the EMAC has been shown + * to receive (and drop) frames with FCS errors at gigabit connections. + */ + adpt->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + adpt->phydev->advertising |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + adpt->phydev->irq = PHY_IGNORE_INTERRUPT; phy_start(adpt->phydev); From df63022e182de4041b65ae22df1950d3416b577e Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 7 Nov 2016 10:51:41 -0600 Subject: [PATCH 149/503] net: qcom/emac: enable flow control if requested If the PHY has been configured to allow pause frames, then the MAC should be configured to generate and/or accept those frames. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 70a55dcc431d..0b4deb31e742 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -575,10 +575,11 @@ void emac_mac_start(struct emac_adapter *adpt) mac |= TXEN | RXEN; /* enable RX/TX */ - /* We don't have ethtool support yet, so force flow-control mode - * to 'full' always. - */ - mac |= TXFC | RXFC; + /* Configure MAC flow control to match the PHY's settings. */ + if (phydev->pause) + mac |= RXFC; + if (phydev->pause != phydev->asym_pause) + mac |= TXFC; /* setup link speed */ mac &= ~SPEED_MASK; From 9d1a6c4ea43e48c7880c85971c17939b56832d8a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Nov 2016 12:03:09 -0800 Subject: [PATCH 150/503] net: icmp_route_lookup should use rt dev to determine L3 domain icmp_send is called in response to some event. The skb may not have the device set (skb->dev is NULL), but it is expected to have an rt. Update icmp_route_lookup to use the rt on the skb to determine L3 domain. Fixes: 613d09b30f8b ("net: Use VRF device index for lookups on TX") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 38abe70e595f..48734ee6293f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; - fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev); + fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); rt = __ip_route_output_key_hash(net, fl4, @@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup(struct net *net, if (err) goto relookup_failed; - if (inet_addr_type_dev_table(net, skb_in->dev, + if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev, fl4_dec.saddr) == RTN_LOCAL) { rt2 = __ip_route_output_key(net, &fl4_dec); if (IS_ERR(rt2)) From 6dbcd8fb5968fda3a5fba019dfb0c80c3139627b Mon Sep 17 00:00:00 2001 From: John Allen Date: Mon, 7 Nov 2016 14:27:28 -0600 Subject: [PATCH 151/503] ibmvnic: Start completion queue negotiation at server-provided optimum values Use the opt_* fields to determine the starting point for negotiating the number of tx/rx completion queues with the vnic server. These contain the number of queues that the vnic server estimates that it will be able to allocate. While renegotiation may still occur, using the opt_* fields will reduce the number of times this needs to happen and will prevent driver probe timeout on systems using large numbers of ibmvnic client devices per vnic port. Signed-off-by: John Allen Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5f44c5520fbc..f6c9b6d38ac7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1505,9 +1505,8 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry) adapter->max_rx_add_entries_per_subcrq > entries_page ? entries_page : adapter->max_rx_add_entries_per_subcrq; - /* Choosing the maximum number of queues supported by firmware*/ - adapter->req_tx_queues = adapter->max_tx_queues; - adapter->req_rx_queues = adapter->max_rx_queues; + adapter->req_tx_queues = adapter->opt_tx_comp_sub_queues; + adapter->req_rx_queues = adapter->opt_rx_comp_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues; adapter->req_mtu = adapter->max_mtu; From 4053ab1bf98dd128344b9e67ef139f931a967ae1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 7 Nov 2016 22:09:07 +0100 Subject: [PATCH 152/503] vxlan: hide unused local variable A bugfix introduced a harmless warning in v4.9-rc4: drivers/net/vxlan.c: In function 'vxlan_group_used': drivers/net/vxlan.c:947:21: error: unused variable 'sock6' [-Werror=unused-variable] This hides the variable inside of the same #ifdef that is around its user. The extraneous initialization is removed at the same time, it was accidentally introduced in the same commit. Fixes: c6fcc4fc5f8b ("vxlan: avoid using stale vxlan socket.") Signed-off-by: Arnd Bergmann Acked-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f3c2fa3ab0d5..24532cdebb00 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -944,7 +944,9 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev) { struct vxlan_dev *vxlan; struct vxlan_sock *sock4; - struct vxlan_sock *sock6 = NULL; +#if IS_ENABLED(CONFIG_IPV6) + struct vxlan_sock *sock6; +#endif unsigned short family = dev->default_dst.remote_ip.sa.sa_family; sock4 = rtnl_dereference(dev->vn4_sock); From f567e950bf51290755a2539ff2aaef4c26f735d3 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 7 Nov 2016 23:22:19 +0100 Subject: [PATCH 153/503] rtnl: reset calcit fptr in rtnl_unregister() To avoid having dangling function pointers left behind, reset calcit in rtnl_unregister(), too. This is no issue so far, as only the rtnl core registers a netlink handler with a calcit hook which won't be unregistered, but may become one if new code makes use of the calcit hook. Fixes: c7ac8679bec9 ("rtnetlink: Compute and store minimum ifinfo...") Cc: Jeff Kirsher Cc: Greg Rose Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fb7348f13501..db313ec7af32 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -275,6 +275,7 @@ int rtnl_unregister(int protocol, int msgtype) rtnl_msg_handlers[protocol][msgindex].doit = NULL; rtnl_msg_handlers[protocol][msgindex].dumpit = NULL; + rtnl_msg_handlers[protocol][msgindex].calcit = NULL; return 0; } From 8da3cf2a49a6d0ca5e620c6a5eee49b99a3f0880 Mon Sep 17 00:00:00 2001 From: Allan Chou Date: Tue, 8 Nov 2016 16:08:01 -0600 Subject: [PATCH 154/503] Net Driver: Add Cypress GX3 VID=04b4 PID=3610. Add support for Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller (Vendor=04b4 ProdID=3610). Patch verified on x64 linux kernel 4.7.4, 4.8.6, 4.9-rc4 systems with the Kensington SD4600P USB-C Universal Dock with Power, which uses the Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller. A similar patch was signed-off and tested-by Allan Chou on 2015-12-01. Allan verified his similar patch on x86 Linux kernel 4.1.6 system with Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller. Tested-by: Allan Chou Tested-by: Chris Roth Tested-by: Artjom Simon Signed-off-by: Allan Chou Signed-off-by: Chris Roth Signed-off-by: David S. Miller --- drivers/net/usb/ax88179_178a.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index e6338c16081a..8a6675d92b98 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1656,6 +1656,19 @@ static const struct driver_info ax88178a_info = { .tx_fixup = ax88179_tx_fixup, }; +static const struct driver_info cypress_GX3_info = { + .description = "Cypress GX3 SuperSpeed to Gigabit Ethernet Controller", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + static const struct driver_info dlink_dub1312_info = { .description = "D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter", .bind = ax88179_bind, @@ -1717,6 +1730,10 @@ static const struct usb_device_id products[] = { /* ASIX AX88178A 10/100/1000 */ USB_DEVICE(0x0b95, 0x178a), .driver_info = (unsigned long)&ax88178a_info, +}, { + /* Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller */ + USB_DEVICE(0x04b4, 0x3610), + .driver_info = (unsigned long)&cypress_GX3_info, }, { /* D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter */ USB_DEVICE(0x2001, 0x4a00), From 9b6c14d51bd2304b92f842e96172a9cc822fc77c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 Nov 2016 09:07:26 -0800 Subject: [PATCH 155/503] net: tcp response should set oif only if it is L3 master Lorenzo noted an Android unit test failed due to e0d56fdd7342: "The expectation in the test was that the RST replying to a SYN sent to a closed port should be generated with oif=0. In other words it should not prefer the interface where the SYN came in on, but instead should follow whatever the routing table says it should do." Revert the change to ip_send_unicast_reply and tcp_v6_send_response such that the oif in the flow is set to the skb_iif only if skb_iif is an L3 master. Fixes: e0d56fdd7342 ("net: l3mdev: remove redundant calls") Reported-by: Lorenzo Colitti Signed-off-by: David Ahern Tested-by: Lorenzo Colitti Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 3 ++- net/ipv6/tcp_ipv6.c | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 49714010ac2e..9403fa3850be 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1577,7 +1577,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, } oif = arg->bound_dev_if; - oif = oif ? : skb->skb_iif; + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) + oif = skb->skb_iif; flowi4_init_output(&fl4, oif, IP4_REPLY_MARK(net, skb->mark), diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a27ab4eab39..6ca23c2e76f7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -818,8 +818,12 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); - else - fl6.flowi6_oif = oif ? : skb->skb_iif; + else { + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) + oif = skb->skb_iif; + + fl6.flowi6_oif = oif; + } fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; From 2ecb704a1290edb5e3d53a75529192e7ed2a1a28 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 10 Nov 2016 13:20:05 +0800 Subject: [PATCH 156/503] ALSA: hda - add a new condition to check if it is thinkpad Latest Thinkpad laptops use the HKEY_HID LEN0268 instead of the LEN0068, as a result neither audio mute led nor mic mute led can work any more. After adding the new HKEY_HID into the is_thinkpad(), both of them works well as before. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/thinkpad_helper.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/thinkpad_helper.c b/sound/pci/hda/thinkpad_helper.c index 6a23302297c9..4d9d320a7971 100644 --- a/sound/pci/hda/thinkpad_helper.c +++ b/sound/pci/hda/thinkpad_helper.c @@ -13,7 +13,8 @@ static void (*old_vmaster_hook)(void *, int); static bool is_thinkpad(struct hda_codec *codec) { return (codec->core.subsystem_id >> 16 == 0x17aa) && - (acpi_dev_found("LEN0068") || acpi_dev_found("IBM0068")); + (acpi_dev_found("LEN0068") || acpi_dev_found("LEN0268") || + acpi_dev_found("IBM0068")); } static void update_tpacpi_mute_led(void *private_data, int enabled) From 0ace81ec7192201af48528c309ee0b4103021f55 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 9 Nov 2016 15:04:39 -0500 Subject: [PATCH 157/503] ipv4: update comment to document GSO fragmentation cases. This is a follow-up to commit 9ee6c5dc816a ("ipv4: allow local fragmentation in ip_finish_output_gso()"), updating the comment documenting cases in which fragmentation is needed for egress GSO packets. Suggested-by: Shmulik Ladkani Reviewed-by: Shmulik Ladkani Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9403fa3850be..105908d841a3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -244,12 +244,18 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, if (skb_gso_validate_mtu(skb, mtu)) return ip_finish_output2(net, sk, skb); - /* Slowpath - GSO segment length is exceeding the dst MTU. + /* Slowpath - GSO segment length exceeds the egress MTU. * - * This can happen in two cases: - * 1) TCP GRO packet, DF bit not set - * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly - * from host network stack. + * This can happen in several cases: + * - Forwarding of a TCP GRO skb, when DF flag is not set. + * - Forwarding of an skb that arrived on a virtualization interface + * (virtio-net/vhost/tap) with TSO/GSO size set by other network + * stack. + * - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an + * interface with a smaller MTU. + * - Arriving GRO skb (or GSO skb in a virtualized environment) that is + * bridged to a NETIF_F_TSO tunnel stacked over an interface with an + * insufficent MTU. */ features = netif_skb_features(skb); BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); From 8d1d8fcb21cfc4a65731760c3100920f929e8f3d Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Wed, 9 Nov 2016 22:48:43 +0200 Subject: [PATCH 158/503] qed: configure ll2 RoCE v1/v2 flavor correctly Currently RoCE v2 won't operate with RDMA CM due to missing setting of the roce-flavour in the ll2 configuration. This patch properly sets the flavour, and deletes incorrect HSI that doesn't [yet] exist. Fixes: abd49676c707 ("qed: Add RoCE ll2 & GSI support") Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 3 --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 72eee29c677f..2777d5bb4380 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -727,9 +727,6 @@ struct core_tx_bd_flags { #define CORE_TX_BD_FLAGS_L4_PROTOCOL_SHIFT 6 #define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_MASK 0x1 #define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_SHIFT 7 -#define CORE_TX_BD_FLAGS_ROCE_FLAV_MASK 0x1 -#define CORE_TX_BD_FLAGS_ROCE_FLAV_SHIFT 12 - }; struct core_tx_bd { diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 63e1a1b0ef8e..f95385cbbd40 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -1119,6 +1119,7 @@ static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, start_bd->bd_flags.as_bitfield |= CORE_TX_BD_FLAGS_START_BD_MASK << CORE_TX_BD_FLAGS_START_BD_SHIFT; SET_FIELD(start_bd->bitfield0, CORE_TX_BD_NBDS, num_of_bds); + SET_FIELD(start_bd->bitfield0, CORE_TX_BD_ROCE_FLAV, type); DMA_REGPAIR_LE(start_bd->addr, first_frag); start_bd->nbytes = cpu_to_le16(first_frag_len); From 5c5f26090840951b4102d9a1e6db9aac41101e5a Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Wed, 9 Nov 2016 22:48:44 +0200 Subject: [PATCH 159/503] qed: Correct rdma params configuration Previous fix has broken RoCE support as the rdma_pf_params are now being set into the parameters only after the params are alrady assigned into the hw-function. Fixes: 0189efb8f4f8 ("qed*: Fix Kconfig dependencies with INFINIBAND_QEDR") Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index c418360ba02a..333c7442e48a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -839,20 +839,19 @@ static void qed_update_pf_params(struct qed_dev *cdev, { int i; + if (IS_ENABLED(CONFIG_QED_RDMA)) { + params->rdma_pf_params.num_qps = QED_ROCE_QPS; + params->rdma_pf_params.min_dpis = QED_ROCE_DPIS; + /* divide by 3 the MRs to avoid MF ILT overflow */ + params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS; + params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX; + } + for (i = 0; i < cdev->num_hwfns; i++) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; p_hwfn->pf_params = *params; } - - if (!IS_ENABLED(CONFIG_QED_RDMA)) - return; - - params->rdma_pf_params.num_qps = QED_ROCE_QPS; - params->rdma_pf_params.min_dpis = QED_ROCE_DPIS; - /* divide by 3 the MRs to avoid MF ILT overflow */ - params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS; - params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX; } static int qed_slowpath_start(struct qed_dev *cdev, From 33b1341cd1bf5c89e7ef332aa8ac3ed614a3d942 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 10 Nov 2016 12:31:04 +0100 Subject: [PATCH 160/503] mlxsw: spectrum_router: Fix handling of neighbour structure __neigh_create function works in a different way than assumed. It passes "n" as a parameter to ndo_neigh_construct. But this "n" might be destroyed right away before __neigh_create() returns in case there is already another neighbour struct in the hashtable with the same dev and primary key. That is not expected by mlxsw_sp_router_neigh_construct() and the stored "n" points to freed memory, eventually leading to crash. Fix this by doing tight 1:1 coupling between neighbour struct and internal driver neigh_entry. That allows to narrow down the key in internal driver hashtable to do lookups by "n" only. Fixes: 6cf3c971dc84 ("mlxsw: spectrum_router: Add private neigh table") Signed-off-by: Jiri Pirko Acked-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 95 +++++++------------ 1 file changed, 34 insertions(+), 61 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4573da2c5560..28630129065d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -600,15 +600,13 @@ static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) } struct mlxsw_sp_neigh_key { - unsigned char addr[sizeof(struct in6_addr)]; - struct net_device *dev; + struct neighbour *n; }; struct mlxsw_sp_neigh_entry { struct rhash_head ht_node; struct mlxsw_sp_neigh_key key; u16 rif; - struct neighbour *n; bool offloaded; struct delayed_work dw; struct mlxsw_sp_port *mlxsw_sp_port; @@ -646,19 +644,15 @@ mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work); static struct mlxsw_sp_neigh_entry * -mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len, - struct net_device *dev, u16 rif, - struct neighbour *n) +mlxsw_sp_neigh_entry_create(struct neighbour *n, u16 rif) { struct mlxsw_sp_neigh_entry *neigh_entry; neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC); if (!neigh_entry) return NULL; - memcpy(neigh_entry->key.addr, addr, addr_len); - neigh_entry->key.dev = dev; + neigh_entry->key.n = n; neigh_entry->rif = rif; - neigh_entry->n = n; INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw); INIT_LIST_HEAD(&neigh_entry->nexthop_list); return neigh_entry; @@ -671,13 +665,11 @@ mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry) } static struct mlxsw_sp_neigh_entry * -mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr, - size_t addr_len, struct net_device *dev) +mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) { - struct mlxsw_sp_neigh_key key = {{ 0 } }; + struct mlxsw_sp_neigh_key key; - memcpy(key.addr, addr, addr_len); - key.dev = dev; + key.n = n; return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, &key, mlxsw_sp_neigh_ht_params); } @@ -689,26 +681,20 @@ int mlxsw_sp_router_neigh_construct(struct net_device *dev, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_neigh_entry *neigh_entry; struct mlxsw_sp_rif *r; - u32 dip; int err; if (n->tbl != &arp_tbl) return 0; - dip = ntohl(*((__be32 *) n->primary_key)); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), - n->dev); - if (neigh_entry) { - WARN_ON(neigh_entry->n != n); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); + if (neigh_entry) return 0; - } r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); if (WARN_ON(!r)) return -EINVAL; - neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev, - r->rif, n); + neigh_entry = mlxsw_sp_neigh_entry_create(n, r->rif); if (!neigh_entry) return -ENOMEM; err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); @@ -727,14 +713,11 @@ void mlxsw_sp_router_neigh_destroy(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_neigh_entry *neigh_entry; - u32 dip; if (n->tbl != &arp_tbl) return; - dip = ntohl(*((__be32 *) n->primary_key)); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), - n->dev); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); if (!neigh_entry) return; mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); @@ -862,7 +845,7 @@ static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) * is active regardless of the traffic. */ if (!list_empty(&neigh_entry->nexthop_list)) - neigh_event_send(neigh_entry->n, NULL); + neigh_event_send(neigh_entry->key.n, NULL); } rtnl_unlock(); } @@ -908,9 +891,9 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) rtnl_lock(); list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, nexthop_neighs_list_node) { - if (!(neigh_entry->n->nud_state & NUD_VALID) && + if (!(neigh_entry->key.n->nud_state & NUD_VALID) && !list_empty(&neigh_entry->nexthop_list)) - neigh_event_send(neigh_entry->n, NULL); + neigh_event_send(neigh_entry->key.n, NULL); } rtnl_unlock(); @@ -927,7 +910,7 @@ static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work) { struct mlxsw_sp_neigh_entry *neigh_entry = container_of(work, struct mlxsw_sp_neigh_entry, dw.work); - struct neighbour *n = neigh_entry->n; + struct neighbour *n = neigh_entry->key.n; struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char rauht_pl[MLXSW_REG_RAUHT_LEN]; @@ -1030,11 +1013,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, mlxsw_sp = mlxsw_sp_port->mlxsw_sp; dip = ntohl(*((__be32 *) n->primary_key)); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, - &dip, - sizeof(__be32), - dev); - if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) { + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); + if (WARN_ON(!neigh_entry)) { mlxsw_sp_port_dev_put(mlxsw_sp_port); return NOTIFY_DONE; } @@ -1343,33 +1323,26 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, struct fib_nh *fib_nh) { struct mlxsw_sp_neigh_entry *neigh_entry; - u32 gwip = ntohl(fib_nh->nh_gw); struct net_device *dev = fib_nh->nh_dev; struct neighbour *n; u8 nud_state; - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, - sizeof(gwip), dev); - if (!neigh_entry) { - __be32 gwipn = htonl(gwip); - - n = neigh_create(&arp_tbl, &gwipn, dev); + /* Take a reference of neigh here ensuring that neigh would + * not be detructed before the nexthop entry is finished. + * The reference is taken either in neigh_lookup() or + * in neith_create() in case n is not found. + */ + n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, dev); + if (!n) { + n = neigh_create(&arp_tbl, &fib_nh->nh_gw, dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, - sizeof(gwip), dev); - if (!neigh_entry) { - neigh_release(n); - return -EINVAL; - } - } else { - /* Take a reference of neigh here ensuring that neigh would - * not be detructed before the nexthop entry is finished. - * The second branch takes the reference in neith_create() - */ - n = neigh_entry->n; - neigh_clone(n); + } + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); + if (!neigh_entry) { + neigh_release(n); + return -EINVAL; } /* If that is the first nexthop connected to that neigh, add to @@ -1403,7 +1376,7 @@ static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, if (list_empty(&nh->neigh_entry->nexthop_list)) list_del(&nh->neigh_entry->nexthop_neighs_list_node); - neigh_release(neigh_entry->n); + neigh_release(neigh_entry->key.n); } static struct mlxsw_sp_nexthop_group * @@ -1463,11 +1436,11 @@ static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh, for (i = 0; i < fi->fib_nhs; i++) { struct fib_nh *fib_nh = &fi->fib_nh[i]; - u32 gwip = ntohl(fib_nh->nh_gw); + struct neighbour *n = nh->neigh_entry->key.n; - if (memcmp(nh->neigh_entry->key.addr, - &gwip, sizeof(u32)) == 0 && - nh->neigh_entry->key.dev == fib_nh->nh_dev) + if (memcmp(n->primary_key, &fib_nh->nh_gw, + sizeof(fib_nh->nh_gw)) == 0 && + n->dev == fib_nh->nh_dev) return true; } return false; From 0e3715c9c250747280b1757ea267c577e7591e31 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 10 Nov 2016 12:31:05 +0100 Subject: [PATCH 161/503] mlxsw: spectrum_router: Ignore FIB notification events for non-init namespaces Since now, the table with same id in multiple netnamespaces were squashed to a single virtual router. That is not only incorrect, it also causes error messages when trying to use RALUE register to do double remove of FIB entries, like this one: mlxsw_spectrum 0000:03:00.0: EMAD reg access failed (tid=facb831c00007b20,reg_id=8013(ralue),type=write,status=7(bad parameter)) Since we don't allow ports to change namespaces (NETIF_F_NETNS_LOCAL), and the infrastructure is not yet prepared to handle netnamespaces, just ignore FIB notification events for non-init namespaces. That is clear to do since we don't need to offload them. Fixes: b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") Signed-off-by: Jiri Pirko Acked-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 28630129065d..040737e14a3f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1931,6 +1931,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct fib_entry_notifier_info *fen_info = ptr; int err; + if (!net_eq(fen_info->info.net, &init_net)) + return NOTIFY_DONE; + switch (event) { case FIB_EVENT_ENTRY_ADD: err = mlxsw_sp_router_fib4_add(mlxsw_sp, fen_info); From d052db11c153cfb469f13a4121966f30ecb57c66 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 6 Nov 2016 21:20:32 +0100 Subject: [PATCH 162/503] i2c: mux: demux-pinctrl: make drivers with no pinctrl work again Some drivers like i2c-gpio do not have dedicated pinctrl states. They broke when error checking for pinctrl was added. Detect them now, and in their case, simply skip over pinctrl configuration. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-demux-pinctrl.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index b3893f6282ba..3e6fe1760d82 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -69,10 +69,28 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne goto err_with_revert; } - p = devm_pinctrl_get_select(adap->dev.parent, priv->bus_name); + /* + * Check if there are pinctrl states at all. Note: we cant' use + * devm_pinctrl_get_select() because we need to distinguish between + * the -ENODEV from devm_pinctrl_get() and pinctrl_lookup_state(). + */ + p = devm_pinctrl_get(adap->dev.parent); if (IS_ERR(p)) { ret = PTR_ERR(p); - goto err_with_put; + /* continue if just no pinctrl states (e.g. i2c-gpio), otherwise exit */ + if (ret != -ENODEV) + goto err_with_put; + } else { + /* there are states. check and use them */ + struct pinctrl_state *s = pinctrl_lookup_state(p, priv->bus_name); + + if (IS_ERR(s)) { + ret = PTR_ERR(s); + goto err_with_put; + } + ret = pinctrl_select_state(p, s); + if (ret < 0) + goto err_with_put; } priv->chan[new_chan].parent_adap = adap; From f10a59eb8c1087f0ce03cf0392cd483922187066 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Thu, 10 Nov 2016 15:03:21 +0100 Subject: [PATCH 163/503] i2c: Documentation: i2c-topology: fix minor whitespace nit Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- Documentation/i2c/i2c-topology | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/i2c/i2c-topology b/Documentation/i2c/i2c-topology index e0aefeece551..1a014fede0b7 100644 --- a/Documentation/i2c/i2c-topology +++ b/Documentation/i2c/i2c-topology @@ -326,7 +326,7 @@ Two parent-locked sibling muxes This is a good topology. - .--------. + .--------. .----------. .--| dev D1 | | parent- |--' '--------' .--| locked | .--------. @@ -350,7 +350,7 @@ Mux-locked and parent-locked sibling muxes This is a good topology. - .--------. + .--------. .----------. .--| dev D1 | | mux- |--' '--------' .--| locked | .--------. From 74a5ed5c4f692df2ff0a2313ea71e81243525519 Mon Sep 17 00:00:00 2001 From: Thomas Tai Date: Thu, 3 Nov 2016 09:19:01 -0700 Subject: [PATCH 164/503] sparc64: Fix find_node warning if numa node cannot be found When booting up LDOM, find_node() warns that a physical address doesn't match a NUMA node. WARNING: CPU: 0 PID: 0 at arch/sparc/mm/init_64.c:835 find_node+0xf4/0x120 find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0.Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 4.9.0-rc3 #4 Call Trace: [0000000000468ba0] __warn+0xc0/0xe0 [0000000000468c74] warn_slowpath_fmt+0x34/0x60 [00000000004592f4] find_node+0xf4/0x120 [0000000000dd0774] add_node_ranges+0x38/0xe4 [0000000000dd0b1c] numa_parse_mdesc+0x268/0x2e4 [0000000000dd0e9c] bootmem_init+0xb8/0x160 [0000000000dd174c] paging_init+0x808/0x8fc [0000000000dcb0d0] setup_arch+0x2c8/0x2f0 [0000000000dc68a0] start_kernel+0x48/0x424 [0000000000dcb374] start_early_boot+0x27c/0x28c [0000000000a32c08] tlb_fixup_done+0x4c/0x64 [0000000000027f08] 0x27f08 It is because linux use an internal structure node_masks[] to keep the best memory latency node only. However, LDOM mdesc can contain single latency-group with multiple memory latency nodes. If the address doesn't match the best latency node within node_masks[], it should check for an alternative via mdesc. The warning message should only be printed if the address doesn't match any node_masks[] nor within mdesc. To minimize the impact of searching mdesc every time, the last matched mask and index is stored in a variable. Signed-off-by: Thomas Tai Reviewed-by: Chris Hyser Reviewed-by: Liam Merwick Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 65 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 439784b7b7ac..068eb3dcbcb5 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -802,6 +802,8 @@ struct mdesc_mblock { }; static struct mdesc_mblock *mblocks; static int num_mblocks; +static int find_numa_node_for_addr(unsigned long pa, + struct node_mem_mask *pnode_mask); static unsigned long ra_to_pa(unsigned long addr) { @@ -821,6 +823,9 @@ static unsigned long ra_to_pa(unsigned long addr) static int find_node(unsigned long addr) { + static bool search_mdesc = true; + static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL }; + static int last_index; int i; addr = ra_to_pa(addr); @@ -830,10 +835,27 @@ static int find_node(unsigned long addr) if ((addr & p->mask) == p->val) return i; } - /* The following condition has been observed on LDOM guests.*/ - WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node" - " rule. Some physical memory will be owned by node 0."); - return 0; + /* The following condition has been observed on LDOM guests because + * node_masks only contains the best latency mask and value. + * LDOM guest's mdesc can contain a single latency group to + * cover multiple address range. Print warning message only if the + * address cannot be found in node_masks nor mdesc. + */ + if ((search_mdesc) && + ((addr & last_mem_mask.mask) != last_mem_mask.val)) { + /* find the available node in the mdesc */ + last_index = find_numa_node_for_addr(addr, &last_mem_mask); + numadbg("find_node: latency group for address 0x%lx is %d\n", + addr, last_index); + if ((last_index < 0) || (last_index >= num_node_masks)) { + /* WARN_ONCE() and use default group 0 */ + WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0."); + search_mdesc = false; + last_index = 0; + } + } + + return last_index; } static u64 memblock_nid_range(u64 start, u64 end, int *nid) @@ -1160,6 +1182,41 @@ int __node_distance(int from, int to) return numa_latency[from][to]; } +static int find_numa_node_for_addr(unsigned long pa, + struct node_mem_mask *pnode_mask) +{ + struct mdesc_handle *md = mdesc_grab(); + u64 node, arc; + int i = 0; + + node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); + if (node == MDESC_NODE_NULL) + goto out; + + mdesc_for_each_node_by_name(md, node, "group") { + mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(md, arc); + struct mdesc_mlgroup *m = find_mlgroup(target); + + if (!m) + continue; + if ((pa & m->mask) == m->match) { + if (pnode_mask) { + pnode_mask->mask = m->mask; + pnode_mask->val = m->match; + } + mdesc_release(md); + return i; + } + } + i++; + } + +out: + mdesc_release(md); + return -1; +} + static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) { int i; From 07b5ab3f71d318e52c18cc3b73c1d44c908aacfa Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Wed, 9 Nov 2016 10:43:05 +0100 Subject: [PATCH 165/503] sparc32: Fix inverted invalid_frame_pointer checks on sigreturns Signed-off-by: Andreas Larsson Signed-off-by: David S. Miller --- arch/sparc/kernel/signal_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index c3c12efe0bc0..9c0c8fd0b292 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -89,7 +89,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) sf = (struct signal_frame __user *) regs->u_regs[UREG_FP]; /* 1. Make sure we are not getting garbage from the user */ - if (!invalid_frame_pointer(sf, sizeof(*sf))) + if (invalid_frame_pointer(sf, sizeof(*sf))) goto segv_and_exit; if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP])) @@ -150,7 +150,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) synchronize_user_stack(); sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP]; - if (!invalid_frame_pointer(sf, sizeof(*sf))) + if (invalid_frame_pointer(sf, sizeof(*sf))) goto segv; if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) From 7bc61cc5df808008b77a3b72cf814960c675518b Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 19 Oct 2016 10:46:18 +0300 Subject: [PATCH 166/503] drm/arcpgu: Accommodate adv7511 switch to DRM bridge ARC PGU driver starts crashing on initialization after 'commit e12c2f645557 ("drm/i2c: adv7511: Convert to drm_bridge")' This happenes because in "arcpgu_drm_hdmi_init" function we get pointer of "drm_i2c_encoder_driver" structure, which doesn't exist after adv7511 hdmi encoder interface changed from slave encoder to drm bridge. So, when we call "encoder_init" function from this structure driver crashes. Bootlog: ------------------------------------->8-------------------------------- [drm] Initialized drm 1.1.0 20060810 arcpgu e0017000.pgu: arc_pgu ID: 0xabbabaab arcpgu e0017000.pgu: assigned reserved memory node frame_buffer@9e000000 Path: (null) CPU: 0 PID: 1 Comm: swapper Not tainted 4.8.0-00001-gb5642252fa01-dirty #8 task: 9a058000 task.stack: 9a032000 [ECR ]: 0x00220100 => Invalid Read @ 0x00000004 by insn @ 0x803934e8 [EFA ]: 0x00000004 [BLINK ]: drm_atomic_helper_connector_dpms+0xa6/0x230 [ERET ]: drm_atomic_helper_connector_dpms+0xa4/0x230 [STAT32]: 0x00000846 : K DE E2 E1 BTA: 0x8016d949 SP: 0x9a033e34 FP: 0x00000000 LPS: 0x8036f6fc LPE: 0x8036f700 LPC: 0x00000000 r00: 0x8063c118 r01: 0x805b98ac r02: 0x00000b11 r03: 0x00000000 r04: 0x9a010f54 r05: 0x00000000 r06: 0x00000001 r07: 0x00000000 r08: 0x00000028 r09: 0x00000001 r10: 0x00000007 r11: 0x00000054 r12: 0x720a3033 Stack Trace: drm_atomic_helper_connector_dpms+0xa4/0x230 arcpgu_drm_hdmi_init+0xbc/0x228 arcpgu_probe+0x168/0x244 platform_drv_probe+0x26/0x64 really_probe+0x1f0/0x32c __driver_attach+0xa8/0xd0 bus_for_each_dev+0x3c/0x74 bus_add_driver+0xc2/0x184 driver_register+0x50/0xec do_one_initcall+0x3a/0x120 kernel_init_freeable+0x108/0x1a0 ------------------------------------->8-------------------------------- Fix ARC PGU driver to be able work with drm bridge hdmi encoder interface. The hdmi connector code isn't needed anymore as we expect the adv7511 bridge driver to create/manage the connector. Signed-off-by: Eugeniy Paltsev Reviewed-by: Archit Taneja Signed-off-by: Alexey Brodkin --- drivers/gpu/drm/arc/arcpgu_hdmi.c | 159 ++++-------------------------- 1 file changed, 17 insertions(+), 142 deletions(-) diff --git a/drivers/gpu/drm/arc/arcpgu_hdmi.c b/drivers/gpu/drm/arc/arcpgu_hdmi.c index b7a8b2ac4055..b69c66b4897e 100644 --- a/drivers/gpu/drm/arc/arcpgu_hdmi.c +++ b/drivers/gpu/drm/arc/arcpgu_hdmi.c @@ -14,170 +14,45 @@ * */ -#include +#include #include -#include #include "arcpgu.h" -struct arcpgu_drm_connector { - struct drm_connector connector; - struct drm_encoder_slave *encoder_slave; -}; - -static int arcpgu_drm_connector_get_modes(struct drm_connector *connector) -{ - const struct drm_encoder_slave_funcs *sfuncs; - struct drm_encoder_slave *slave; - struct arcpgu_drm_connector *con = - container_of(connector, struct arcpgu_drm_connector, connector); - - slave = con->encoder_slave; - if (slave == NULL) { - dev_err(connector->dev->dev, - "connector_get_modes: cannot find slave encoder for connector\n"); - return 0; - } - - sfuncs = slave->slave_funcs; - if (sfuncs->get_modes == NULL) - return 0; - - return sfuncs->get_modes(&slave->base, connector); -} - -static enum drm_connector_status -arcpgu_drm_connector_detect(struct drm_connector *connector, bool force) -{ - enum drm_connector_status status = connector_status_unknown; - const struct drm_encoder_slave_funcs *sfuncs; - struct drm_encoder_slave *slave; - - struct arcpgu_drm_connector *con = - container_of(connector, struct arcpgu_drm_connector, connector); - - slave = con->encoder_slave; - if (slave == NULL) { - dev_err(connector->dev->dev, - "connector_detect: cannot find slave encoder for connector\n"); - return status; - } - - sfuncs = slave->slave_funcs; - if (sfuncs && sfuncs->detect) - return sfuncs->detect(&slave->base, connector); - - dev_err(connector->dev->dev, "connector_detect: could not detect slave funcs\n"); - return status; -} - -static void arcpgu_drm_connector_destroy(struct drm_connector *connector) -{ - drm_connector_unregister(connector); - drm_connector_cleanup(connector); -} - -static const struct drm_connector_helper_funcs -arcpgu_drm_connector_helper_funcs = { - .get_modes = arcpgu_drm_connector_get_modes, -}; - -static const struct drm_connector_funcs arcpgu_drm_connector_funcs = { - .dpms = drm_helper_connector_dpms, - .reset = drm_atomic_helper_connector_reset, - .detect = arcpgu_drm_connector_detect, - .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = arcpgu_drm_connector_destroy, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -}; - -static struct drm_encoder_helper_funcs arcpgu_drm_encoder_helper_funcs = { - .dpms = drm_i2c_encoder_dpms, - .mode_fixup = drm_i2c_encoder_mode_fixup, - .mode_set = drm_i2c_encoder_mode_set, - .prepare = drm_i2c_encoder_prepare, - .commit = drm_i2c_encoder_commit, - .detect = drm_i2c_encoder_detect, -}; - static struct drm_encoder_funcs arcpgu_drm_encoder_funcs = { .destroy = drm_encoder_cleanup, }; int arcpgu_drm_hdmi_init(struct drm_device *drm, struct device_node *np) { - struct arcpgu_drm_connector *arcpgu_connector; - struct drm_i2c_encoder_driver *driver; - struct drm_encoder_slave *encoder; - struct drm_connector *connector; - struct i2c_client *i2c_slave; - int ret; + struct drm_encoder *encoder; + struct drm_bridge *bridge; + + int ret = 0; encoder = devm_kzalloc(drm->dev, sizeof(*encoder), GFP_KERNEL); if (encoder == NULL) return -ENOMEM; - i2c_slave = of_find_i2c_device_by_node(np); - if (!i2c_slave || !i2c_get_clientdata(i2c_slave)) { - dev_err(drm->dev, "failed to find i2c slave encoder\n"); + /* Locate drm bridge from the hdmi encoder DT node */ + bridge = of_drm_find_bridge(np); + if (!bridge) return -EPROBE_DEFER; - } - if (i2c_slave->dev.driver == NULL) { - dev_err(drm->dev, "failed to find i2c slave driver\n"); - return -EPROBE_DEFER; - } - - driver = - to_drm_i2c_encoder_driver(to_i2c_driver(i2c_slave->dev.driver)); - ret = driver->encoder_init(i2c_slave, drm, encoder); - if (ret) { - dev_err(drm->dev, "failed to initialize i2c encoder slave\n"); - return ret; - } - - encoder->base.possible_crtcs = 1; - encoder->base.possible_clones = 0; - ret = drm_encoder_init(drm, &encoder->base, &arcpgu_drm_encoder_funcs, + encoder->possible_crtcs = 1; + encoder->possible_clones = 0; + ret = drm_encoder_init(drm, encoder, &arcpgu_drm_encoder_funcs, DRM_MODE_ENCODER_TMDS, NULL); if (ret) return ret; - drm_encoder_helper_add(&encoder->base, - &arcpgu_drm_encoder_helper_funcs); + /* Link drm_bridge to encoder */ + bridge->encoder = encoder; + encoder->bridge = bridge; - arcpgu_connector = devm_kzalloc(drm->dev, sizeof(*arcpgu_connector), - GFP_KERNEL); - if (!arcpgu_connector) { - ret = -ENOMEM; - goto error_encoder_cleanup; - } + ret = drm_bridge_attach(drm, bridge); + if (ret) + drm_encoder_cleanup(encoder); - connector = &arcpgu_connector->connector; - drm_connector_helper_add(connector, &arcpgu_drm_connector_helper_funcs); - ret = drm_connector_init(drm, connector, &arcpgu_drm_connector_funcs, - DRM_MODE_CONNECTOR_HDMIA); - if (ret < 0) { - dev_err(drm->dev, "failed to initialize drm connector\n"); - goto error_encoder_cleanup; - } - - ret = drm_mode_connector_attach_encoder(connector, &encoder->base); - if (ret < 0) { - dev_err(drm->dev, "could not attach connector to encoder\n"); - drm_connector_unregister(connector); - goto error_connector_cleanup; - } - - arcpgu_connector->encoder_slave = encoder; - - return 0; - -error_connector_cleanup: - drm_connector_cleanup(connector); - -error_encoder_cleanup: - drm_encoder_cleanup(&encoder->base); return ret; } From d786810b2f896854506e7b698a137f074942e410 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 8 Nov 2016 13:54:41 -0500 Subject: [PATCH 167/503] perf/x86/intel/uncore: Add more Intel uncore IMC PCI IDs for SkyLake Several uncore IMC PCI IDs are missed for Intel SkyLake. Add the PCI IDs for SkyLake Y, U, H and S platforms. Rename the ID macros for 0x191f and 0x190c. The corresponding bug: https://bugzilla.kernel.org/show_bug.cgi?id=187301 The related datasheets are also attached in the bug entry for permanent reference. Reported-by: Ben Widawsky Tested-by: Ben Widawsky Signed-off-by: Kan Liang Reviewed-by: Ben Widawsky Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1478631281-5061-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snb.c | 32 ++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 5f845eef9a4d..81195cca7eae 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -8,8 +8,12 @@ #define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 #define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 -#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f -#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 +#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 +#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 +#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f +#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -616,13 +620,29 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = { static const struct pci_device_id skl_uncore_pci_ids[] = { { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_Y_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -666,8 +686,12 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ - IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */ + IMC_DEV(SKL_Y_IMC, &skl_uncore_pci_driver), /* 6th Gen Core Y */ IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */ + IMC_DEV(SKL_HD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Dual Core */ + IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ + IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ + IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ { /* end marker */ } }; From 48004881f6935704e5e4ffaf9e0ec921a25db243 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Nov 2016 16:52:04 +0000 Subject: [PATCH 168/503] drm/i915: Mark CPU cache as dirty when used for rendering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On LLC, or even snooped, machines rendering via the GPU ends up in the CPU cache. This cacheline dirt also needs to be flushed to main memory when moving to an incoherent domain, such as the display's scanout engine. Mostly, this happens because either the object is marked as dirty from its first use or is avoided by setting the object into the display domain from the start. v2: Treat WT as not requiring a clflush prior to use on the display engine as well. Fixes: 0f71979ab7fb ("drm/i915: Performed deferred clflush inside set-cache-level") References: https://bugs.freedesktop.org/show_bug.cgi?id=95414 Signed-off-by: Chris Wilson Cc: Jani Nikula Cc: Ville Syrjälä Cc: # v4.0+ Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20161107165204.7008-1-chris@chris-wilson.co.uk (cherry picked from commit 7aa6ca61ee5546d74b76610894924cdb0d4a1af0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7adb4c77cc7f..a218c2e395e7 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1281,6 +1281,12 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } +static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + return !(obj->cache_level == I915_CACHE_NONE || + obj->cache_level == I915_CACHE_WT); +} + void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request *req, unsigned int flags) @@ -1311,6 +1317,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, /* update for the implicit flush after a batch */ obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; + if (!obj->cache_dirty && gpu_write_needs_clflush(obj)) + obj->cache_dirty = true; } if (flags & EXEC_OBJECT_NEEDS_FENCE) From 9f1a7ab260300c670608a9db861187069f8b179a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 7 Nov 2016 22:20:55 +0200 Subject: [PATCH 169/503] drm/i915: Grab the rotation from the passed plane state for VLV sprites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the passed in plane_state instead of plane->state in vlv_update_plane(). Currently the two are one and the same, but if we start queuing up multiple plane updates they might not be. Looks like this was rebase fail on my part. Cc: Daniel Vetter Fixes: 8d0deca8c6e0 ("drm/i915: Pass 90/270 vs. 0/180 rotation info for intel_gen4_compute_page_offset()") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1478550057-24864-4-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit 11df4d95b3ad9e6a6a6e0907bb200610a4d24887) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_sprite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 73a521fdf1bd..dbed12c484c9 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -358,7 +358,7 @@ vlv_update_plane(struct drm_plane *dplane, int plane = intel_plane->plane; u32 sprctl; u32 sprsurf_offset, linear_offset; - unsigned int rotation = dplane->state->rotation; + unsigned int rotation = plane_state->base.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; From fc22b787890f9f9067fd130feec42297a4ee62ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Oct 2016 16:44:38 +0300 Subject: [PATCH 170/503] drm/i915: Refresh that status of MST capable connectors in ->detect() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once we've determined that the sink is MST capable we never end up running through the full detect cycle again, despite getting HPDs. Fix tht by ripping out the incorrect piece of code responsible. This got broken when I moved the long HPD handling to the ->detect() hook, but failed to remove the leftover code. Cc: Ander Conselvan de Oliveira Cc: drm-intel-fixes@lists.freedesktop.org Cc: Rui Tiago Matos Tested-by: Rui Tiago Matos Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98323 Cc: Kirill A. Shutemov Tested-by: Kirill A. Shutemov References: https://bugs.freedesktop.org/show_bug.cgi?id=98306 Fixes: 1015811609c0 ("drm/i915: Move long hpd handling into the hotplug work") Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477057478-29328-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit 1aab956c7b8872fb6976328316bfad62c6e67cf8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 3581b5a7f716..bf344d08356a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4463,21 +4463,11 @@ static enum drm_connector_status intel_dp_detect(struct drm_connector *connector, bool force) { struct intel_dp *intel_dp = intel_attached_dp(connector); - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; enum drm_connector_status status = connector->status; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); - if (intel_dp->is_mst) { - /* MST devices are disconnected from a monitor POV */ - intel_dp_unset_edid(intel_dp); - if (intel_encoder->type != INTEL_OUTPUT_EDP) - intel_encoder->type = INTEL_OUTPUT_DP; - return connector_status_disconnected; - } - /* If full detect is not performed yet, do a full detect */ if (!intel_dp->detect_done) status = intel_dp_long_pulse(intel_dp->attached_connector); From 9a2541910dc7eaaa6859eea8a0ffda673059a623 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 11 Nov 2016 12:33:20 +0100 Subject: [PATCH 171/503] ALSA: hda - Fix mic regression by ASRock mobo fixup The commit [1a3f099101b8: ALSA: hda - Fix surround output pins for ASRock B150M mobo] introduced a fixup of pin configs for ASRock mobos to fix the surround outputs. However, this overrides the pin configs of the mic pins as if they are outputs-only, effectively disabling the mic inputs. Of course, it's a regression wrt mic functionality. Actually the pins 0x18 and 0x1a don't need to be changed; we just need to disable the bogus pins 0x14 and 0x15. Then the auto-parser will pick up mic pins as switchable and assign the surround outputs there. This patch removes the incorrect pin overrides of NID 0x18 and 0x1a from the ASRock fixup. Fixes: 1a3f099101b8 ('ALSA: hda - Fix surround output pins for ASRock...') Reported-and-tested-by: Vitor Antunes Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=187431 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2f909dd8b7b8..ea81c08ddc7a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6907,8 +6907,6 @@ static const struct hda_fixup alc662_fixups[] = { .v.pins = (const struct hda_pintbl[]) { { 0x15, 0x40f000f0 }, /* disabled */ { 0x16, 0x40f000f0 }, /* disabled */ - { 0x18, 0x01014011 }, /* LO */ - { 0x1a, 0x01014012 }, /* LO */ { } } }, From 8e94a46c1770884166b31adc99eba7da65a446a7 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Wed, 9 Nov 2016 02:25:15 +0100 Subject: [PATCH 172/503] drm/amdgpu: Attach exclusive fence to prime exported bo's. (v5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit External clients which import our bo's wait only for exclusive dmabuf-fences, not on shared ones, ditto for bo's which we import from external providers and write to. Therefore attach exclusive fences on prime shared buffers if our exported buffer gets imported by an external client, or if we import a buffer from an external exporter. See discussion in thread: https://lists.freedesktop.org/archives/dri-devel/2016-October/122370.html Prime export tested on Intel iGPU + AMD Tonga dGPU as DRI3/Present Prime render offload, and with the Tonga standalone as primary gpu. v2: Add a wait for all shared fences before prime export, as suggested by Christian Koenig. v3: - Mark buffer prime_exported in amdgpu_gem_prime_pin, so we only use the exclusive fence when exporting a bo to external clients like a separate iGPU, but not when exporting/importing from/to ourselves as part of regular DRI3 fd passing. - Propagate failure of reservation_object_wait_rcu back to caller. v4: - Switch to a prime_shared_count counter instead of a flag, which gets in/decremented on prime_pin/unpin, so we can switch back to shared fences if all clients detach from our exported bo. - Also switch to exclusive fence for prime imported bo's. v5: - Drop lret, instead use int ret -> long ret, as proposed by Christian. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95472 Tested-by: Mike Lothian (v1) Signed-off-by: Mario Kleiner Reviewed-by: Christian König . Cc: Christian König Cc: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 20 +++++++++++++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 039b57e4644c..496f72b134eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -459,6 +459,7 @@ struct amdgpu_bo { u64 metadata_flags; void *metadata; u32 metadata_size; + unsigned prime_shared_count; /* list of all virtual address to which this bo * is associated to */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 651115dcce12..c02db01f6583 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -132,7 +132,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, entry->priority = min(info[i].bo_priority, AMDGPU_BO_LIST_MAX_PRIORITY); entry->tv.bo = &entry->robj->tbo; - entry->tv.shared = true; + entry->tv.shared = !entry->robj->prime_shared_count; if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS) gds_obj = entry->robj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 7700dc22f243..3826d5aea0a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -74,20 +74,36 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, if (ret) return ERR_PTR(ret); + bo->prime_shared_count = 1; return &bo->gem_base; } int amdgpu_gem_prime_pin(struct drm_gem_object *obj) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - int ret = 0; + long ret = 0; ret = amdgpu_bo_reserve(bo, false); if (unlikely(ret != 0)) return ret; + /* + * Wait for all shared fences to complete before we switch to future + * use of exclusive fence on this prime shared bo. + */ + ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, + MAX_SCHEDULE_TIMEOUT); + if (unlikely(ret < 0)) { + DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret); + amdgpu_bo_unreserve(bo); + return ret; + } + /* pin buffer into GTT */ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); + if (likely(ret == 0)) + bo->prime_shared_count++; + amdgpu_bo_unreserve(bo); return ret; } @@ -102,6 +118,8 @@ void amdgpu_gem_prime_unpin(struct drm_gem_object *obj) return; amdgpu_bo_unpin(bo); + if (bo->prime_shared_count) + bo->prime_shared_count--; amdgpu_bo_unreserve(bo); } From f23ed166f283b1a6f0a1f0b0c889e8df9a10ff85 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 2 Nov 2016 17:57:01 +1100 Subject: [PATCH 173/503] powerpc/64s: Fix system reset interrupt winkle wakeups Wakeups from winkle set the low bit of the HSPRG0 register, to distinguish it from other sleep states. This is also the PACA pointer. The system reset exception handler fails to mask this bit away before using this value before using it as the PACA pointer. Fix this by adding a new type of exception prolog macro where we already have the PACA set in r13, and have the system reset vector mask it out. The winkle wakeup handler will store the masked value back into HSPRG0. Fixes: fb479e44a9e2 ("powerpc/64s: relocation, register save fixes for system reset interrupt") Cc: stable@vger.kernel.org # v3.0+ Signed-off-by: Nicholas Piggin Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 13 +++++++++++-- arch/powerpc/kernel/exceptions-64s.S | 11 ++++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 84d49b197c32..3ce43664eadf 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -158,14 +158,17 @@ BEGIN_FTR_SECTION_NESTED(943) \ std ra,offset(r13); \ END_FTR_SECTION_NESTED(ftr,ftr,943) -#define EXCEPTION_PROLOG_0(area) \ - GET_PACA(r13); \ +#define EXCEPTION_PROLOG_0_PACA(area) \ std r9,area+EX_R9(r13); /* save r9 */ \ OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \ HMT_MEDIUM; \ std r10,area+EX_R10(r13); /* save r10 - r12 */ \ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) +#define EXCEPTION_PROLOG_0(area) \ + GET_PACA(r13); \ + EXCEPTION_PROLOG_0_PACA(area) + #define __EXCEPTION_PROLOG_1(area, extra, vec) \ OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ @@ -196,6 +199,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXCEPTION_PROLOG_1(area, extra, vec); \ EXCEPTION_PROLOG_PSERIES_1(label, h); +/* Have the PACA in r13 already */ +#define EXCEPTION_PROLOG_PSERIES_PACA(area, label, h, extra, vec) \ + EXCEPTION_PROLOG_0_PACA(area); \ + EXCEPTION_PROLOG_1(area, extra, vec); \ + EXCEPTION_PROLOG_PSERIES_1(label, h); + #define __KVMTEST(h, n) \ lbz r10,HSTATE_IN_GUEST(r13); \ cmpwi r10,0; \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 08ba447a4b3d..1ba82ea90230 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -116,7 +116,9 @@ EXC_VIRT_NONE(0x4000, 0x4100) EXC_REAL_BEGIN(system_reset, 0x100, 0x200) SET_SCRATCH0(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + GET_PACA(r13) + clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */ + EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD, IDLETEST, 0x100) EXC_REAL_END(system_reset, 0x100, 0x200) @@ -124,6 +126,9 @@ EXC_VIRT_NONE(0x4100, 0x4200) #ifdef CONFIG_PPC_P7_NAP EXC_COMMON_BEGIN(system_reset_idle_common) +BEGIN_FTR_SECTION + GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) bl pnv_restore_hyp_resource li r0,PNV_THREAD_RUNNING @@ -169,7 +174,7 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x300) SET_SCRATCH0(r13) /* save r13 */ /* * Running native on arch 2.06 or later, we may wakeup from winkle - * inside machine check. If yes, then last bit of HSPGR0 would be set + * inside machine check. If yes, then last bit of HSPRG0 would be set * to 1. Hence clear it unconditionally. */ GET_PACA(r13) @@ -388,7 +393,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) /* * Go back to winkle. Please note that this thread was woken up in * machine check from winkle and have not restored the per-subcore - * state. Hence before going back to winkle, set last bit of HSPGR0 + * state. Hence before going back to winkle, set last bit of HSPRG0 * to 1. This will make sure that if this thread gets woken up * again at reset vector 0x100 then it will get chance to restore * the subcore state. From e6740ae631db02e4f3a6742e2a38ea63718d8d17 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 7 Nov 2016 22:28:21 -0800 Subject: [PATCH 174/503] powerpc: Fix exception vector build with 2.23 era binutils The changes to use gas sections for constructing the exception vectors causes a build break when using binutils 2.23: arch/powerpc/kernel/exceptions-64s.S:770: Error: operand out of range (0xffffffffffff8100 is not between 0x0000000000000000 and 0x000000000000ffff) And so on. Reported by Hugh with binutils-2.23.2-8.1.4.ppc64 from openSUSE 13.1 and also Naveen & Denis using 2.23.52.0.1-26.el7 from RHEL 7. Strangely binutils 2.22 (what I test with) is not affected. This is caused by the use of @l in LOAD_HANDLER(). The @l was only recently added in commit a24553dd02dc ("powerpc/pseries: Remove unnecessary syscall trampoline"). Luckily the gas section changes split out the LOAD_SYSCALL_HANDLER() macro, which means we actually *don't* need to use @l in LOAD_HANDLER() any more, only in LOAD_SYSCALL_HANDLER(). So drop the @l from LOAD_HANDLER(). Fixes: 57f266497d81 ("powerpc: Use gas sections for arranging exception vectors") Signed-off-by: Hugh Dickins [mpe: Add gory details to change log] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 3ce43664eadf..9a3eee661297 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -91,7 +91,7 @@ */ #define LOAD_HANDLER(reg, label) \ ld reg,PACAKBASE(r13); /* get high part of &label */ \ - ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l; + ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label); #define __LOAD_HANDLER(reg, label) \ ld reg,PACAKBASE(r13); \ From 9a1f490f358e44a1cf463ba8124ca39fcc042992 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 Nov 2016 22:20:46 +1100 Subject: [PATCH 175/503] powerpc/oops: Fix missing pr_cont()s in show_stack() Previously we got away with printing the stack trace in multiple pieces and it usually looked right. But since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines"), KERN_CONT is now required when printing continuation lines. Use pr_cont() as appropriate. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ce6dc61b15b2..621d9b23df72 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1900,14 +1900,14 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((ip == rth) && curr_frame >= 0) { - printk(" (%pS)", + pr_cont(" (%pS)", (void *)current->ret_stack[curr_frame].ret); curr_frame--; } #endif if (firstframe) - printk(" (unreliable)"); - printk("\n"); + pr_cont(" (unreliable)"); + pr_cont("\n"); } firstframe = 0; From db5ba5ae6e8d5374429212de8e20933a8a0ce52e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 Nov 2016 22:20:47 +1100 Subject: [PATCH 176/503] powerpc/oops: Fix missing pr_cont()s in print_msr_bits() et. al. Since the KERN_CONT changes these are being horribly split across lines, for example: MSR: 8000000000009033 < SF,EE ,ME,IR ,DR,RI ,LE> So fix it by using pr_cont() where appropriate. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 621d9b23df72..38f85d7a1e06 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1282,7 +1282,7 @@ static void print_bits(unsigned long val, struct regbit *bits, const char *sep) for (; bits->bit; ++bits) if (val & bits->bit) { - printk("%s%s", s, bits->name); + pr_cont("%s%s", s, bits->name); s = sep; } } @@ -1305,9 +1305,9 @@ static void print_tm_bits(unsigned long val) * T: Transactional (bit 34) */ if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) { - printk(",TM["); + pr_cont(",TM["); print_bits(val, msr_tm_bits, ""); - printk("]"); + pr_cont("]"); } } #else @@ -1316,10 +1316,10 @@ static void print_tm_bits(unsigned long val) {} static void print_msr_bits(unsigned long val) { - printk("<"); + pr_cont("<"); print_bits(val, msr_bits, ","); print_tm_bits(val); - printk(">"); + pr_cont(">"); } #ifdef CONFIG_PPC64 From 7dae865f5878fc0c2edfb3b9165712ef33ce03df Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 3 Nov 2016 20:45:26 +1100 Subject: [PATCH 177/503] powerpc/oops: Fix missing pr_cont()s in show_regs() Fix up our oops output by converting continuation lines to use pr_cont(). Some of these are dubious, eg. printing a continuation line which starts with a newline, but seem to work OK for now. This whole function needs a rewrite in the next release. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 38f85d7a1e06..6fe8fa481f8a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1347,29 +1347,29 @@ void show_regs(struct pt_regs * regs) printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) - printk("CFAR: "REG" ", regs->orig_gpr3); + pr_cont("CFAR: "REG" ", regs->orig_gpr3); if (trap == 0x200 || trap == 0x300 || trap == 0x600) #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); + pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); #else - printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); + pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); #endif #ifdef CONFIG_PPC64 - printk("SOFTE: %ld ", regs->softe); + pr_cont("SOFTE: %ld ", regs->softe); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) - printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch); + pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch); #endif for (i = 0; i < 32; i++) { if ((i % REGS_PER_LINE) == 0) - printk("\nGPR%02d: ", i); - printk(REG " ", regs->gpr[i]); + pr_cont("\nGPR%02d: ", i); + pr_cont(REG " ", regs->gpr[i]); if (i == LAST_VOLATILE && !FULL_REGS(regs)) break; } - printk("\n"); + pr_cont("\n"); #ifdef CONFIG_KALLSYMS /* * Lookup NIP late so we have the best change of getting the From 2ffd04dee0dacff36c03a02434965a96da032bcd Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 4 Nov 2016 17:20:40 +1100 Subject: [PATCH 178/503] powerpc/oops: Fix missing pr_cont()s in instruction dump Since the KERN_CONT changes, the current code in show_instructions() prints out a whole bunch of unnecessary newlines. Change occurrences of printk("\n") to pr_cont("\n"). While we're here, change all the other cases of printk(KERN_CONT ...) to pr_cont() as well. Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6fe8fa481f8a..49a680d5ae37 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1215,7 +1215,7 @@ static void show_instructions(struct pt_regs *regs) int instr; if (!(i % 8)) - printk("\n"); + pr_cont("\n"); #if !defined(CONFIG_BOOKE) /* If executing with the IMMU off, adjust pc rather @@ -1227,18 +1227,18 @@ static void show_instructions(struct pt_regs *regs) if (!__kernel_text_address(pc) || probe_kernel_address((unsigned int __user *)pc, instr)) { - printk(KERN_CONT "XXXXXXXX "); + pr_cont("XXXXXXXX "); } else { if (regs->nip == pc) - printk(KERN_CONT "<%08x> ", instr); + pr_cont("<%08x> ", instr); else - printk(KERN_CONT "%08x ", instr); + pr_cont("%08x ", instr); } pc += sizeof(int); } - printk("\n"); + pr_cont("\n"); } struct regbit { From 18f6084a989ba1b38702f9af37a2e4049a924be6 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Thu, 10 Nov 2016 09:35:27 -0500 Subject: [PATCH 179/503] scsi: mpt3sas: Fix secure erase premature termination This is a work around for a bug with LSI Fusion MPT SAS2 when perfoming secure erase. Due to the very long time the operation takes, commands issued during the erase will time out and will trigger execution of the abort hook. Even though the abort hook is called for the specific command which timed out, this leads to entire device halt (scsi_state terminated) and premature termination of the secure erase. Set device state to busy while ATA passthrough commands are in progress. [mkp: hand applied to 4.9/scsi-fixes, tweaked patch description] Signed-off-by: Andrey Grodzovsky Acked-by: Sreekanth Reddy Cc: Cc: Sathya Prakash Cc: Chaitra P B Cc: Suganath Prabu Subramani Cc: Sreekanth Reddy Cc: Hannes Reinecke Cc: Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 8aa769a2d919..91b70bc46e7f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -4010,7 +4010,10 @@ _scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status) SAM_STAT_CHECK_CONDITION; } - +static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd) +{ + return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16); +} /** * scsih_qcmd - main scsi request entry point @@ -4038,6 +4041,13 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) if (ioc->logging_level & MPT_DEBUG_SCSI) scsi_print_command(scmd); + /* + * Lock the device for any subsequent command until command is + * done. + */ + if (ata_12_16_cmd(scmd)) + scsi_internal_device_block(scmd->device); + sas_device_priv_data = scmd->device->hostdata; if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { scmd->result = DID_NO_CONNECT << 16; @@ -4613,6 +4623,9 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) if (scmd == NULL) return 1; + if (ata_12_16_cmd(scmd)) + scsi_internal_device_unblock(scmd->device, SDEV_RUNNING); + mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); if (mpi_reply == NULL) { From 4e3264d21b90984c2165e8fe5a7b64cf25bc2c2d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 9 Nov 2016 15:36:33 -0800 Subject: [PATCH 180/503] bpf: Fix bpf_redirect to an ipip/ip6tnl dev If the bpf program calls bpf_redirect(dev, 0) and dev is an ipip/ip6tnl, it currently includes the mac header. e.g. If dev is ipip, the end result is IP-EthHdr-IP instead of IP-IP. The fix is to pull the mac header. At ingress, skb_postpull_rcsum() is not needed because the ethhdr should have been pulled once already and then got pushed back just before calling the bpf_prog. At egress, this patch calls skb_postpull_rcsum(). If bpf_redirect(dev, BPF_F_INGRESS) is called, it also fails now because it calls dev_forward_skb() which eventually calls eth_type_trans(skb, dev). The eth_type_trans() will set skb->type = PACKET_OTHERHOST because the mac address does not match the redirecting dev->dev_addr. The PACKET_OTHERHOST will eventually cause the ip_rcv() errors out. To fix this, ____dev_forward_skb() is added. Joint work with Daniel Borkmann. Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel") Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++ net/core/dev.c | 17 ++++------ net/core/filter.c | 68 ++++++++++++++++++++++++++++++++++----- 3 files changed, 81 insertions(+), 19 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 91ee3643ccc8..bf04a46f6d5b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3354,6 +3354,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb); +static __always_inline int ____dev_forward_skb(struct net_device *dev, + struct sk_buff *skb) +{ + if (skb_orphan_frags(skb, GFP_ATOMIC) || + unlikely(!is_skb_forwardable(dev, skb))) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + skb_scrub_packet(skb, true); + skb->priority = 0; + return 0; +} + void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; diff --git a/net/core/dev.c b/net/core/dev.c index eaad4c28069f..6666b28b6815 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1766,19 +1766,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_orphan_frags(skb, GFP_ATOMIC) || - unlikely(!is_skb_forwardable(dev, skb))) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; + int ret = ____dev_forward_skb(dev, skb); + + if (likely(!ret)) { + skb->protocol = eth_type_trans(skb, dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } - skb_scrub_packet(skb, true); - skb->priority = 0; - skb->protocol = eth_type_trans(skb, dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - - return 0; + return ret; } EXPORT_SYMBOL_GPL(__dev_forward_skb); diff --git a/net/core/filter.c b/net/core/filter.c index 00351cdf7d0c..b391209838ef 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1628,6 +1628,19 @@ static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) return dev_forward_skb(dev, skb); } +static inline int __bpf_rx_skb_no_mac(struct net_device *dev, + struct sk_buff *skb) +{ + int ret = ____dev_forward_skb(dev, skb); + + if (likely(!ret)) { + skb->dev = dev; + ret = netif_rx(skb); + } + + return ret; +} + static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) { int ret; @@ -1647,6 +1660,51 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) return ret; } +static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + /* skb->mac_len is not set on normal egress */ + unsigned int mlen = skb->network_header - skb->mac_header; + + __skb_pull(skb, mlen); + + /* At ingress, the mac header has already been pulled once. + * At egress, skb_pospull_rcsum has to be done in case that + * the skb is originated from ingress (i.e. a forwarded skb) + * to ensure that rcsum starts at net header. + */ + if (!skb_at_tc_ingress(skb)) + skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); + skb_pop_mac_header(skb); + skb_reset_mac_len(skb); + return flags & BPF_F_INGRESS ? + __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb); +} + +static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + bpf_push_mac_rcsum(skb); + return flags & BPF_F_INGRESS ? + __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); +} + +static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + switch (dev->type) { + case ARPHRD_TUNNEL: + case ARPHRD_TUNNEL6: + case ARPHRD_SIT: + case ARPHRD_IPGRE: + case ARPHRD_VOID: + case ARPHRD_NONE: + return __bpf_redirect_no_mac(skb, dev, flags); + default: + return __bpf_redirect_common(skb, dev, flags); + } +} + BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) { struct net_device *dev; @@ -1675,10 +1733,7 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) return -ENOMEM; } - bpf_push_mac_rcsum(clone); - - return flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone); + return __bpf_redirect(clone, dev, flags); } static const struct bpf_func_proto bpf_clone_redirect_proto = { @@ -1722,10 +1777,7 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } - bpf_push_mac_rcsum(skb); - - return ri->flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); + return __bpf_redirect(skb, dev, ri->flags); } static const struct bpf_func_proto bpf_redirect_proto = { From 90e02896f1a4627b14624245fbcbc19f8fd916cb Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 9 Nov 2016 15:36:34 -0800 Subject: [PATCH 181/503] bpf: Add test for bpf_redirect to ipip/ip6tnl The test creates two netns, ns1 and ns2. The host (the default netns) has an ipip or ip6tnl dev configured for tunneling traffic to the ns2. ping VIPS from ns1 <----> host <--tunnel--> ns2 (VIPs at loopback) The test is to have ns1 pinging VIPs configured at the loopback interface in ns2. The VIPs are 10.10.1.102 and 2401:face::66 (which are configured at lo@ns2). [Note: 0x66 => 102]. At ns1, the VIPs are routed _via_ the host. At the host, bpf programs are installed at the veth to redirect packets from a veth to the ipip/ip6tnl. The test is configured in a way so that both ingress and egress can be tested. At ns2, the ipip/ip6tnl dev is configured with the local and remote address specified. The return path is routed to the dev ipip/ip6tnl. During egress test, the host also locally tests pinging the VIPs to ensure that bpf_redirect at egress also works for the direct egress (i.e. not forwarding from dev ve1 to ve2). Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- samples/bpf/Makefile | 4 + samples/bpf/tc_l2_redirect.sh | 173 ++++++++++++++++++++++ samples/bpf/tc_l2_redirect_kern.c | 236 ++++++++++++++++++++++++++++++ samples/bpf/tc_l2_redirect_user.c | 73 +++++++++ 4 files changed, 486 insertions(+) create mode 100755 samples/bpf/tc_l2_redirect.sh create mode 100644 samples/bpf/tc_l2_redirect_kern.c create mode 100644 samples/bpf/tc_l2_redirect_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 12b7304d55dc..72c58675973e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -27,6 +27,7 @@ hostprogs-y += xdp2 hostprogs-y += test_current_task_under_cgroup hostprogs-y += trace_event hostprogs-y += sampleip +hostprogs-y += tc_l2_redirect test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -56,6 +57,7 @@ test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o libbpf.o trace_event_user.o sampleip-objs := bpf_load.o libbpf.o sampleip_user.o +tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -72,6 +74,7 @@ always += test_probe_write_user_kern.o always += trace_output_kern.o always += tcbpf1_kern.o always += tcbpf2_kern.o +always += tc_l2_redirect_kern.o always += lathist_kern.o always += offwaketime_kern.o always += spintest_kern.o @@ -111,6 +114,7 @@ HOSTLOADLIBES_xdp2 += -lelf HOSTLOADLIBES_test_current_task_under_cgroup += -lelf HOSTLOADLIBES_trace_event += -lelf HOSTLOADLIBES_sampleip += -lelf +HOSTLOADLIBES_tc_l2_redirect += -l elf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/tc_l2_redirect.sh b/samples/bpf/tc_l2_redirect.sh new file mode 100755 index 000000000000..80a05591a140 --- /dev/null +++ b/samples/bpf/tc_l2_redirect.sh @@ -0,0 +1,173 @@ +#!/bin/bash + +[[ -z $TC ]] && TC='tc' +[[ -z $IP ]] && IP='ip' + +REDIRECT_USER='./tc_l2_redirect' +REDIRECT_BPF='./tc_l2_redirect_kern.o' + +RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter) +IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding) + +function config_common { + local tun_type=$1 + + $IP netns add ns1 + $IP netns add ns2 + $IP link add ve1 type veth peer name vens1 + $IP link add ve2 type veth peer name vens2 + $IP link set dev ve1 up + $IP link set dev ve2 up + $IP link set dev ve1 mtu 1500 + $IP link set dev ve2 mtu 1500 + $IP link set dev vens1 netns ns1 + $IP link set dev vens2 netns ns2 + + $IP -n ns1 link set dev lo up + $IP -n ns1 link set dev vens1 up + $IP -n ns1 addr add 10.1.1.101/24 dev vens1 + $IP -n ns1 addr add 2401:db01::65/64 dev vens1 nodad + $IP -n ns1 route add default via 10.1.1.1 dev vens1 + $IP -n ns1 route add default via 2401:db01::1 dev vens1 + + $IP -n ns2 link set dev lo up + $IP -n ns2 link set dev vens2 up + $IP -n ns2 addr add 10.2.1.102/24 dev vens2 + $IP -n ns2 addr add 2401:db02::66/64 dev vens2 nodad + $IP -n ns2 addr add 10.10.1.102 dev lo + $IP -n ns2 addr add 2401:face::66/64 dev lo nodad + $IP -n ns2 link add ipt2 type ipip local 10.2.1.102 remote 10.2.1.1 + $IP -n ns2 link add ip6t2 type ip6tnl mode any local 2401:db02::66 remote 2401:db02::1 + $IP -n ns2 link set dev ipt2 up + $IP -n ns2 link set dev ip6t2 up + $IP netns exec ns2 $TC qdisc add dev vens2 clsact + $IP netns exec ns2 $TC filter add dev vens2 ingress bpf da obj $REDIRECT_BPF sec drop_non_tun_vip + if [[ $tun_type == "ipip" ]]; then + $IP -n ns2 route add 10.1.1.0/24 dev ipt2 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ipt2.rp_filter=0 + else + $IP -n ns2 route add 10.1.1.0/24 dev ip6t2 + $IP -n ns2 route add 2401:db01::/64 dev ip6t2 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ip6t2.rp_filter=0 + fi + + $IP addr add 10.1.1.1/24 dev ve1 + $IP addr add 2401:db01::1/64 dev ve1 nodad + $IP addr add 10.2.1.1/24 dev ve2 + $IP addr add 2401:db02::1/64 dev ve2 nodad + + $TC qdisc add dev ve2 clsact + $TC filter add dev ve2 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_forward + + sysctl -q -w net.ipv4.conf.all.rp_filter=0 + sysctl -q -w net.ipv6.conf.all.forwarding=1 +} + +function cleanup { + set +e + [[ -z $DEBUG ]] || set +x + $IP netns delete ns1 >& /dev/null + $IP netns delete ns2 >& /dev/null + $IP link del ve1 >& /dev/null + $IP link del ve2 >& /dev/null + $IP link del ipt >& /dev/null + $IP link del ip6t >& /dev/null + sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER + sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING + rm -f /sys/fs/bpf/tc/globals/tun_iface + [[ -z $DEBUG ]] || set -x + set -e +} + +function l2_to_ipip { + echo -n "l2_to_ipip $1: " + + local dir=$1 + + config_common ipip + + $IP link add ipt type ipip external + $IP link set dev ipt up + sysctl -q -w net.ipv4.conf.ipt.rp_filter=0 + sysctl -q -w net.ipv4.conf.ipt.forwarding=1 + + if [[ $dir == "egress" ]]; then + $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2 + $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect + sysctl -q -w net.ipv4.conf.ve1.forwarding=1 + else + $TC qdisc add dev ve1 clsact + $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect + fi + + $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ipt/ifindex) + + $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null + + if [[ $dir == "egress" ]]; then + # test direct egress to ve2 (i.e. not forwarding from + # ve1 to ve2). + ping -c1 10.10.1.102 >& /dev/null + fi + + cleanup + + echo "OK" +} + +function l2_to_ip6tnl { + echo -n "l2_to_ip6tnl $1: " + + local dir=$1 + + config_common ip6tnl + + $IP link add ip6t type ip6tnl mode any external + $IP link set dev ip6t up + sysctl -q -w net.ipv4.conf.ip6t.rp_filter=0 + sysctl -q -w net.ipv4.conf.ip6t.forwarding=1 + + if [[ $dir == "egress" ]]; then + $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2 + $IP route add 2401:face::/64 via 2401:db02::66 dev ve2 + $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect + sysctl -q -w net.ipv4.conf.ve1.forwarding=1 + else + $TC qdisc add dev ve1 clsact + $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect + fi + + $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ip6t/ifindex) + + $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null + $IP netns exec ns1 ping -6 -c1 2401:face::66 >& /dev/null + + if [[ $dir == "egress" ]]; then + # test direct egress to ve2 (i.e. not forwarding from + # ve1 to ve2). + ping -c1 10.10.1.102 >& /dev/null + ping -6 -c1 2401:face::66 >& /dev/null + fi + + cleanup + + echo "OK" +} + +cleanup +test_names="l2_to_ipip l2_to_ip6tnl" +test_dirs="ingress egress" +if [[ $# -ge 2 ]]; then + test_names=$1 + test_dirs=$2 +elif [[ $# -ge 1 ]]; then + test_names=$1 +fi + +for t in $test_names; do + for d in $test_dirs; do + $t $d + done +done diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c new file mode 100644 index 000000000000..92a44729dbe4 --- /dev/null +++ b/samples/bpf/tc_l2_redirect_kern.c @@ -0,0 +1,236 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +#define _htonl __builtin_bswap32 + +#define PIN_GLOBAL_NS 2 +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 flags; + __u32 id; + __u32 pinning; +}; + +/* copy of 'struct ethhdr' without __packed */ +struct eth_hdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_proto; +}; + +struct bpf_elf_map SEC("maps") tun_iface = { + .type = BPF_MAP_TYPE_ARRAY, + .size_key = sizeof(int), + .size_value = sizeof(int), + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +static __always_inline bool is_vip_addr(__be16 eth_proto, __be32 daddr) +{ + if (eth_proto == htons(ETH_P_IP)) + return (_htonl(0xffffff00) & daddr) == _htonl(0x0a0a0100); + else if (eth_proto == htons(ETH_P_IPV6)) + return (daddr == _htonl(0x2401face)); + + return false; +} + +SEC("l2_to_iptun_ingress_forward") +int _l2_to_iptun_ingress_forward(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + int key = 0, *ifindex; + + int ret; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + ifindex = bpf_map_lookup_elem(&tun_iface, &key); + if (!ifindex) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + char fmt4[] = "ingress forward to ifindex:%d daddr4:%x\n"; + struct iphdr *iph = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (iph->protocol != IPPROTO_IPIP) + return TC_ACT_OK; + + bpf_trace_printk(fmt4, sizeof(fmt4), *ifindex, + _htonl(iph->daddr)); + return bpf_redirect(*ifindex, BPF_F_INGRESS); + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + char fmt6[] = "ingress forward to ifindex:%d daddr6:%x::%x\n"; + struct ipv6hdr *ip6h = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (ip6h->nexthdr != IPPROTO_IPIP && + ip6h->nexthdr != IPPROTO_IPV6) + return TC_ACT_OK; + + bpf_trace_printk(fmt6, sizeof(fmt6), *ifindex, + _htonl(ip6h->daddr.s6_addr32[0]), + _htonl(ip6h->daddr.s6_addr32[3])); + return bpf_redirect(*ifindex, BPF_F_INGRESS); + } + + return TC_ACT_OK; +} + +SEC("l2_to_iptun_ingress_redirect") +int _l2_to_iptun_ingress_redirect(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + int key = 0, *ifindex; + + int ret; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + ifindex = bpf_map_lookup_elem(&tun_iface, &key); + if (!ifindex) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n"; + struct iphdr *iph = data + sizeof(*eth); + __be32 daddr = iph->daddr; + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (!is_vip_addr(eth->h_proto, daddr)) + return TC_ACT_OK; + + bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(daddr), *ifindex); + } else { + return TC_ACT_OK; + } + + tkey.tunnel_id = 10000; + tkey.tunnel_ttl = 64; + tkey.remote_ipv4 = 0x0a020166; /* 10.2.1.102 */ + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), 0); + return bpf_redirect(*ifindex, 0); +} + +SEC("l2_to_ip6tun_ingress_redirect") +int _l2_to_ip6tun_ingress_redirect(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + int key = 0, *ifindex; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + ifindex = bpf_map_lookup_elem(&tun_iface, &key); + if (!ifindex) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n"; + struct iphdr *iph = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (!is_vip_addr(eth->h_proto, iph->daddr)) + return TC_ACT_OK; + + bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(iph->daddr), + *ifindex); + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + char fmt6[] = "e/ingress redirect daddr6:%x to ifindex:%d\n"; + struct ipv6hdr *ip6h = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (!is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0])) + return TC_ACT_OK; + + bpf_trace_printk(fmt6, sizeof(fmt6), + _htonl(ip6h->daddr.s6_addr32[0]), *ifindex); + } else { + return TC_ACT_OK; + } + + tkey.tunnel_id = 10000; + tkey.tunnel_ttl = 64; + /* 2401:db02:0:0:0:0:0:66 */ + tkey.remote_ipv6[0] = _htonl(0x2401db02); + tkey.remote_ipv6[1] = 0; + tkey.remote_ipv6[2] = 0; + tkey.remote_ipv6[3] = _htonl(0x00000066); + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), BPF_F_TUNINFO_IPV6); + return bpf_redirect(*ifindex, 0); +} + +SEC("drop_non_tun_vip") +int _drop_non_tun_vip(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (is_vip_addr(eth->h_proto, iph->daddr)) + return TC_ACT_SHOT; + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0])) + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tc_l2_redirect_user.c b/samples/bpf/tc_l2_redirect_user.c new file mode 100644 index 000000000000..4013c5337b91 --- /dev/null +++ b/samples/bpf/tc_l2_redirect_user.c @@ -0,0 +1,73 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include + +#include +#include +#include +#include +#include + +#include "libbpf.h" + +static void usage(void) +{ + printf("Usage: tc_l2_ipip_redirect [...]\n"); + printf(" -U Update an already pinned BPF array\n"); + printf(" -i Interface index\n"); + printf(" -h Display this help\n"); +} + +int main(int argc, char **argv) +{ + const char *pinned_file = NULL; + int ifindex = -1; + int array_key = 0; + int array_fd = -1; + int ret = -1; + int opt; + + while ((opt = getopt(argc, argv, "F:U:i:")) != -1) { + switch (opt) { + /* General args */ + case 'U': + pinned_file = optarg; + break; + case 'i': + ifindex = atoi(optarg); + break; + default: + usage(); + goto out; + } + } + + if (ifindex < 0 || !pinned_file) { + usage(); + goto out; + } + + array_fd = bpf_obj_get(pinned_file); + if (array_fd < 0) { + fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n", + pinned_file, strerror(errno), errno); + goto out; + } + + /* bpf_tunnel_key.remote_ipv4 expects host byte orders */ + ret = bpf_update_elem(array_fd, &array_key, &ifindex, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + +out: + if (array_fd != -1) + close(array_fd); + return ret; +} From 34fad54c2537f7c99d07375e50cb30aa3c23bd83 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Nov 2016 16:04:46 -0800 Subject: [PATCH 182/503] net: __skb_flow_dissect() must cap its return value After Tom patch, thoff field could point past the end of the buffer, this could fool some callers. If an skb was provided, skb->len should be the upper limit. If not, hlen is supposed to be the upper limit. Fixes: a6e544b0a88b ("flow_dissector: Jump to exit code in __skb_flow_dissect") Signed-off-by: Eric Dumazet Reported-by: Yibin Yang Acked-by: Willem de Bruijn Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index ab193e5def07..69e4463a4b1b 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -122,7 +122,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_keyid *key_keyid; bool skip_vlan = false; u8 ip_proto = 0; - bool ret = false; + bool ret; if (!data) { data = skb->data; @@ -549,12 +549,17 @@ ip_proto_again: out_good: ret = true; -out_bad: + key_control->thoff = (u16)nhoff; +out: key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; - key_control->thoff = (u16)nhoff; return ret; + +out_bad: + ret = false; + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); + goto out; } EXPORT_SYMBOL(__skb_flow_dissect); From 10b217681ddec4fa3ddb375bb188fec504523da4 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 10 Nov 2016 13:21:42 +0200 Subject: [PATCH 183/503] net: bpqether.h: remove if_ether.h guard __LINUX_IF_ETHER_H is not defined anywhere, and if_ether.h can keep itself from double inclusion, though it uses a single underscore prefix. Signed-off-by: Baruch Siach Signed-off-by: David S. Miller --- include/uapi/linux/bpqether.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/uapi/linux/bpqether.h b/include/uapi/linux/bpqether.h index a6c35e1a89ad..05865edaefda 100644 --- a/include/uapi/linux/bpqether.h +++ b/include/uapi/linux/bpqether.h @@ -5,9 +5,7 @@ * Defines for the BPQETHER pseudo device driver */ -#ifndef __LINUX_IF_ETHER_H #include -#endif #define SIOCSBPQETHOPT (SIOCDEVPRIVATE+0) /* reserved */ #define SIOCSBPQETHADDR (SIOCDEVPRIVATE+1) From 02e56902e40e4c1ff57590c717e46377b72d5966 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 12 Nov 2016 21:04:23 +0000 Subject: [PATCH 184/503] x86/efi: Fix EFI memmap pointer size warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix this when building on 32-bit: arch/x86/platform/efi/efi.c: In function ‘__efi_enter_virtual_mode’: arch/x86/platform/efi/efi.c:911:5: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] (efi_memory_desc_t *)pa); ^ arch/x86/platform/efi/efi.c:918:5: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] (efi_memory_desc_t *)pa); ^ The @pa local variable is declared as phys_addr_t and that is a u64 when CONFIG_PHYS_ADDR_T_64BIT=y. (The last is enabled on 32-bit on a PAE build.) However, its value comes from __pa() which is basically doing pointer arithmetic and checking, and returns unsigned long as it is the native pointer width. So let's use an unsigned long too. It should be fine to do so because the later users cast it to a pointer too. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20161112210424.5157-2-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index bf99aa7005eb..936a488d6cf6 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -861,7 +861,7 @@ static void __init __efi_enter_virtual_mode(void) int count = 0, pg_shift = 0; void *new_memmap = NULL; efi_status_t status; - phys_addr_t pa; + unsigned long pa; efi.systab = NULL; From f6697df36bdf0bf7fce984605c2918d4a7b4269f Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sat, 12 Nov 2016 21:04:24 +0000 Subject: [PATCH 185/503] x86/efi: Prevent mixed mode boot corruption with CONFIG_VMAP_STACK=y Booting an EFI mixed mode kernel has been crashing since commit: e37e43a497d5 ("x86/mm/64: Enable vmapped stacks (CONFIG_HAVE_ARCH_VMAP_STACK=y)") The user-visible effect in my test setup was the kernel being unable to find the root file system ramdisk. This was likely caused by silent memory or page table corruption. Enabling CONFIG_DEBUG_VIRTUAL=y immediately flagged the thunking code as abusing virt_to_phys() because it was passing addresses that were not part of the kernel direct mapping. Use the slow version instead, which correctly handles all memory regions by performing a page table walk. Suggested-by: Andy Lutomirski Signed-off-by: Matt Fleming Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20161112210424.5157-3-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 80 ++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 23 deletions(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 58b0f801f66f..319148bd4b05 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -211,6 +212,35 @@ void efi_sync_low_kernel_mappings(void) memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); } +/* + * Wrapper for slow_virt_to_phys() that handles NULL addresses. + */ +static inline phys_addr_t +virt_to_phys_or_null_size(void *va, unsigned long size) +{ + bool bad_size; + + if (!va) + return 0; + + if (virt_addr_valid(va)) + return virt_to_phys(va); + + /* + * A fully aligned variable on the stack is guaranteed not to + * cross a page bounary. Try to catch strings on the stack by + * checking that 'size' is a power of two. + */ + bad_size = size > PAGE_SIZE || !is_power_of_2(size); + + WARN_ON(!IS_ALIGNED((unsigned long)va, size) || bad_size); + + return slow_virt_to_phys(va); +} + +#define virt_to_phys_or_null(addr) \ + virt_to_phys_or_null_size((addr), sizeof(*(addr))) + int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { unsigned long pfn, text; @@ -494,8 +524,8 @@ static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc) spin_lock(&rtc_lock); - phys_tm = virt_to_phys(tm); - phys_tc = virt_to_phys(tc); + phys_tm = virt_to_phys_or_null(tm); + phys_tc = virt_to_phys_or_null(tc); status = efi_thunk(get_time, phys_tm, phys_tc); @@ -511,7 +541,7 @@ static efi_status_t efi_thunk_set_time(efi_time_t *tm) spin_lock(&rtc_lock); - phys_tm = virt_to_phys(tm); + phys_tm = virt_to_phys_or_null(tm); status = efi_thunk(set_time, phys_tm); @@ -529,9 +559,9 @@ efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, spin_lock(&rtc_lock); - phys_enabled = virt_to_phys(enabled); - phys_pending = virt_to_phys(pending); - phys_tm = virt_to_phys(tm); + phys_enabled = virt_to_phys_or_null(enabled); + phys_pending = virt_to_phys_or_null(pending); + phys_tm = virt_to_phys_or_null(tm); status = efi_thunk(get_wakeup_time, phys_enabled, phys_pending, phys_tm); @@ -549,7 +579,7 @@ efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) spin_lock(&rtc_lock); - phys_tm = virt_to_phys(tm); + phys_tm = virt_to_phys_or_null(tm); status = efi_thunk(set_wakeup_time, enabled, phys_tm); @@ -558,6 +588,10 @@ efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) return status; } +static unsigned long efi_name_size(efi_char16_t *name) +{ + return ucs2_strsize(name, EFI_VAR_NAME_LEN) + 1; +} static efi_status_t efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, @@ -567,11 +601,11 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, u32 phys_name, phys_vendor, phys_attr; u32 phys_data_size, phys_data; - phys_data_size = virt_to_phys(data_size); - phys_vendor = virt_to_phys(vendor); - phys_name = virt_to_phys(name); - phys_attr = virt_to_phys(attr); - phys_data = virt_to_phys(data); + phys_data_size = virt_to_phys_or_null(data_size); + phys_vendor = virt_to_phys_or_null(vendor); + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_attr = virt_to_phys_or_null(attr); + phys_data = virt_to_phys_or_null_size(data, *data_size); status = efi_thunk(get_variable, phys_name, phys_vendor, phys_attr, phys_data_size, phys_data); @@ -586,9 +620,9 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, u32 phys_name, phys_vendor, phys_data; efi_status_t status; - phys_name = virt_to_phys(name); - phys_vendor = virt_to_phys(vendor); - phys_data = virt_to_phys(data); + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_vendor = virt_to_phys_or_null(vendor); + phys_data = virt_to_phys_or_null_size(data, data_size); /* If data_size is > sizeof(u32) we've got problems */ status = efi_thunk(set_variable, phys_name, phys_vendor, @@ -605,9 +639,9 @@ efi_thunk_get_next_variable(unsigned long *name_size, efi_status_t status; u32 phys_name_size, phys_name, phys_vendor; - phys_name_size = virt_to_phys(name_size); - phys_vendor = virt_to_phys(vendor); - phys_name = virt_to_phys(name); + phys_name_size = virt_to_phys_or_null(name_size); + phys_vendor = virt_to_phys_or_null(vendor); + phys_name = virt_to_phys_or_null_size(name, *name_size); status = efi_thunk(get_next_variable, phys_name_size, phys_name, phys_vendor); @@ -621,7 +655,7 @@ efi_thunk_get_next_high_mono_count(u32 *count) efi_status_t status; u32 phys_count; - phys_count = virt_to_phys(count); + phys_count = virt_to_phys_or_null(count); status = efi_thunk(get_next_high_mono_count, phys_count); return status; @@ -633,7 +667,7 @@ efi_thunk_reset_system(int reset_type, efi_status_t status, { u32 phys_data; - phys_data = virt_to_phys(data); + phys_data = virt_to_phys_or_null_size(data, data_size); efi_thunk(reset_system, reset_type, status, data_size, phys_data); } @@ -661,9 +695,9 @@ efi_thunk_query_variable_info(u32 attr, u64 *storage_space, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - phys_storage = virt_to_phys(storage_space); - phys_remaining = virt_to_phys(remaining_space); - phys_max = virt_to_phys(max_variable_size); + phys_storage = virt_to_phys_or_null(storage_space); + phys_remaining = virt_to_phys_or_null(remaining_space); + phys_max = virt_to_phys_or_null(max_variable_size); status = efi_thunk(query_variable_info, attr, phys_storage, phys_remaining, phys_max); From 83d2c9a9c17b1e9f23a3a0c24c03cd18e4b02520 Mon Sep 17 00:00:00 2001 From: Sven Ebenfeld Date: Mon, 7 Nov 2016 18:51:34 +0100 Subject: [PATCH 186/503] crypto: caam - do not register AES-XTS mode on LP units MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using AES-XTS on a Wandboard, we receive a Mode error: caam_jr 2102000.jr1: 20001311: CCB: desc idx 19: AES: Mode error. According to the Security Reference Manual, the Low Power AES units of the i.MX6 do not support the XTS mode. Therefore we must not register XTS implementations in the Crypto API. Signed-off-by: Sven Ebenfeld Reviewed-by: Horia Geantă Cc: # 4.4+ Fixes: c6415a6016bf "crypto: caam - add support for acipher xts(aes)" Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 156aad167cd6..f5a63ba97023 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -4583,6 +4583,15 @@ static int __init caam_algapi_init(void) if (!aes_inst && (alg_sel == OP_ALG_ALGSEL_AES)) continue; + /* + * Check support for AES modes not available + * on LP devices. + */ + if ((cha_vid & CHA_ID_LS_AES_MASK) == CHA_ID_LS_AES_LP) + if ((alg->class1_alg_type & OP_ALG_AAI_MASK) == + OP_ALG_AAI_XTS) + continue; + t_alg = caam_alg_alloc(alg); if (IS_ERR(t_alg)) { err = PTR_ERR(t_alg); From ca0a75316dc62af92943761b1cc049e15c92eb09 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 9 Nov 2016 19:51:25 -0800 Subject: [PATCH 187/503] r8152: Fix error path in open function If usb_submit_urb() called from the open function fails, the following crash may be observed. r8152 8-1:1.0 eth0: intr_urb submit failed: -19 ... r8152 8-1:1.0 eth0: v1.08.3 Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b6b7b pgd = ffffffc0e7305000 [6b6b6b6b6b6b6b7b] *pgd=0000000000000000, *pud=0000000000000000 Internal error: Oops: 96000004 [#1] PREEMPT SMP ... PC is at notifier_chain_register+0x2c/0x58 LR is at blocking_notifier_chain_register+0x54/0x70 ... Call trace: [] notifier_chain_register+0x2c/0x58 [] blocking_notifier_chain_register+0x54/0x70 [] register_pm_notifier+0x24/0x2c [] rtl8152_open+0x3dc/0x3f8 [r8152] [] __dev_open+0xac/0x104 [] __dev_change_flags+0xb0/0x148 [] dev_change_flags+0x34/0x70 [] do_setlink+0x2c8/0x888 [] rtnl_newlink+0x328/0x644 [] rtnetlink_rcv_msg+0x1a8/0x1d4 [] netlink_rcv_skb+0x68/0xd0 [] rtnetlink_rcv+0x2c/0x3c [] netlink_unicast+0x16c/0x234 [] netlink_sendmsg+0x340/0x364 [] sock_sendmsg+0x48/0x60 [] SyS_sendto+0xe0/0x120 [] SyS_send+0x40/0x4c [] el0_svc_naked+0x24/0x28 Clean up error handling to avoid registering the notifier if the open function is going to fail. Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 75c516889645..efb84f092492 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3266,10 +3266,8 @@ static int rtl8152_open(struct net_device *netdev) goto out; res = usb_autopm_get_interface(tp->intf); - if (res < 0) { - free_all_mem(tp); - goto out; - } + if (res < 0) + goto out_free; mutex_lock(&tp->control); @@ -3285,10 +3283,9 @@ static int rtl8152_open(struct net_device *netdev) netif_device_detach(tp->netdev); netif_warn(tp, ifup, netdev, "intr_urb submit failed: %d\n", res); - free_all_mem(tp); - } else { - napi_enable(&tp->napi); + goto out_unlock; } + napi_enable(&tp->napi); mutex_unlock(&tp->control); @@ -3297,7 +3294,13 @@ static int rtl8152_open(struct net_device *netdev) tp->pm_notifier.notifier_call = rtl_notifier; register_pm_notifier(&tp->pm_notifier); #endif + return 0; +out_unlock: + mutex_unlock(&tp->control); + usb_autopm_put_interface(tp->intf); +out_free: + free_all_mem(tp); out: return res; } From 969447f226b451c453ddc83cac6144eaeac6f2e3 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Lin Date: Thu, 10 Nov 2016 11:16:15 -0500 Subject: [PATCH 188/503] ipv4: use new_gw for redirect neigh lookup In v2.6, ip_rt_redirect() calls arp_bind_neighbour() which returns 0 and then the state of the neigh for the new_gw is checked. If the state isn't valid then the redirected route is deleted. This behavior is maintained up to v3.5.7 by check_peer_redirect() because rt->rt_gateway is assigned to peer->redirect_learned.a4 before calling ipv4_neigh_lookup(). After commit 5943634fc559 ("ipv4: Maintain redirect and PMTU info in struct rtable again."), ipv4_neigh_lookup() is performed without the rt_gateway assigned to the new_gw. In the case when rt_gateway (old_gw) isn't zero, the function uses it as the key. The neigh is most likely valid since the old_gw is the one that sends the ICMP redirect message. Then the new_gw is assigned to fib_nh_exception. The problem is: the new_gw ARP may never gets resolved and the traffic is blackholed. So, use the new_gw for neigh lookup. Changes from v1: - use __ipv4_neigh_lookup instead (per Eric Dumazet). Fixes: 5943634fc559 ("ipv4: Maintain redirect and PMTU info in struct rtable again.") Signed-off-by: Stephen Suryaputra Lin Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 62d4d90c1389..2a57566e6e91 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow goto reject_redirect; } - n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); + n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); + if (!n) + n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); From ac6e780070e30e4c35bd395acfe9191e6268bdd3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Nov 2016 13:12:35 -0800 Subject: [PATCH 189/503] tcp: take care of truncations done by sk_filter() With syzkaller help, Marco Grassi found a bug in TCP stack, crashing in tcp_collapse() Root cause is that sk_filter() can truncate the incoming skb, but TCP stack was not really expecting this to happen. It probably was expecting a simple DROP or ACCEPT behavior. We first need to make sure no part of TCP header could be removed. Then we need to adjust TCP_SKB_CB(skb)->end_seq Many thanks to syzkaller team and Marco for giving us a reproducer. Signed-off-by: Eric Dumazet Reported-by: Marco Grassi Reported-by: Vladis Dronov Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 19 ++++++++++++++++++- net/ipv6/tcp_ipv6.c | 6 ++++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 304a8e17bc87..123979fe12bf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1220,6 +1220,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); +int tcp_filter(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 61b7be303eec..2259114c7242 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1564,6 +1564,21 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_add_backlog); +int tcp_filter(struct sock *sk, struct sk_buff *skb) +{ + struct tcphdr *th = (struct tcphdr *)skb->data; + unsigned int eaten = skb->len; + int err; + + err = sk_filter_trim_cap(sk, skb, th->doff * 4); + if (!err) { + eaten -= skb->len; + TCP_SKB_CB(skb)->end_seq -= eaten; + } + return err; +} +EXPORT_SYMBOL(tcp_filter); + /* * From tcp_input.c */ @@ -1676,8 +1691,10 @@ process: nf_reset(skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + iph = ip_hdr(skb); skb->dev = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6ca23c2e76f7..b9f1fee9a886 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1229,7 +1229,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard; /* @@ -1457,8 +1457,10 @@ process: if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard_and_relse; - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + hdr = ipv6_hdr(skb); skb->dev = NULL; From 7b5b74efcca00f15c2aec1dc7175bfe34b6ec643 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 10 Nov 2016 19:08:39 -0500 Subject: [PATCH 190/503] Revert "include/uapi/linux/atm_zatm.h: include linux/time.h" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit cf00713a655d ("include/uapi/linux/atm_zatm.h: include linux/time.h"). This attempted to fix userspace breakage that no longer existed when the patch was merged. Almost one year earlier, commit 70ba07b675b5 ("atm: remove 'struct zatm_t_hist'") deleted the struct in question. After this patch was merged, we now have to deal with people being unable to include this header in conjunction with standard C library headers like stdlib.h (which linux-atm does). Example breakage: x86_64-pc-linux-gnu-gcc -DHAVE_CONFIG_H -I. -I../.. -I./../q2931 -I./../saal \ -I. -DCPPFLAGS_TEST -I../../src/include -O2 -march=native -pipe -g \ -frecord-gcc-switches -freport-bug -Wimplicit-function-declaration \ -Wnonnull -Wstrict-aliasing -Wparentheses -Warray-bounds \ -Wfree-nonheap-object -Wreturn-local-addr -fno-strict-aliasing -Wall \ -Wshadow -Wpointer-arith -Wwrite-strings -Wstrict-prototypes -c zntune.c In file included from /usr/include/linux/atm_zatm.h:17:0, from zntune.c:17: /usr/include/linux/time.h:9:8: error: redefinition of ‘struct timespec’ struct timespec { ^ In file included from /usr/include/sys/select.h:43:0, from /usr/include/sys/types.h:219, from /usr/include/stdlib.h:314, from zntune.c:9: /usr/include/time.h:120:8: note: originally defined here struct timespec ^ Signed-off-by: Mike Frysinger Acked-by: Mikko Rapeli Signed-off-by: David S. Miller --- include/uapi/linux/atm_zatm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h index 5cd4d4d2dd1d..9c9c6ad55f14 100644 --- a/include/uapi/linux/atm_zatm.h +++ b/include/uapi/linux/atm_zatm.h @@ -14,7 +14,6 @@ #include #include -#include #define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc) /* get pool statistics */ From 3ffb6a39b751b635a0c50b650064c38b8d371ef2 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 11 Nov 2016 00:11:42 -0500 Subject: [PATCH 191/503] bnxt_en: Fix ring arithmetic in bnxt_setup_tc(). The logic is missing the check on whether the tx and rx rings are sharing completion rings or not. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a9f9f3738022..c6909660e097 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6309,6 +6309,7 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto, struct tc_to_netdev *ntc) { struct bnxt *bp = netdev_priv(dev); + bool sh = false; u8 tc; if (ntc->type != TC_SETUP_MQPRIO) @@ -6325,12 +6326,11 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto, if (netdev_get_num_tc(dev) == tc) return 0; + if (bp->flags & BNXT_FLAG_SHARED_RINGS) + sh = true; + if (tc) { int max_rx_rings, max_tx_rings, rc; - bool sh = false; - - if (bp->flags & BNXT_FLAG_SHARED_RINGS) - sh = true; rc = bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, sh); if (rc || bp->tx_nr_rings_per_tc * tc > max_tx_rings) @@ -6348,7 +6348,8 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto, bp->tx_nr_rings = bp->tx_nr_rings_per_tc; netdev_reset_tc(dev); } - bp->cp_nr_rings = max_t(int, bp->tx_nr_rings, bp->rx_nr_rings); + bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) : + bp->tx_nr_rings + bp->rx_nr_rings; bp->num_stat_ctxs = bp->cp_nr_rings; if (netif_running(bp->dev)) From 73b9bad63ae3c902ce64221d10a0d371d059748d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 11 Nov 2016 00:11:43 -0500 Subject: [PATCH 192/503] bnxt_en: Fix VF virtual link state. If the physical link is down and the VF virtual link is set to "enable", the current code does not always work. If the link is down but the cable is attached, the firmware returns LINK_SIGNAL instead of NO_LINK. The current code is treating LINK_SIGNAL as link up. The fix is to treat link as down when the link_status != LINK. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index ec6cd18842c3..60e2af8678bd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -774,8 +774,8 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf) if (vf->flags & BNXT_VF_LINK_UP) { /* if physical link is down, force link up on VF */ - if (phy_qcfg_resp.link == - PORT_PHY_QCFG_RESP_LINK_NO_LINK) { + if (phy_qcfg_resp.link != + PORT_PHY_QCFG_RESP_LINK_LINK) { phy_qcfg_resp.link = PORT_PHY_QCFG_RESP_LINK_LINK; phy_qcfg_resp.link_speed = cpu_to_le16( From 2d644d4c7506646f9c4a2afceb7fd5f030bc0c9f Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Fri, 11 Nov 2016 16:34:25 +0100 Subject: [PATCH 193/503] mlxsw: spectrum: Fix refcount bug on span entries When binding port to a newly created span entry, its refcount is initialized to zero even though it has a bound port. That leads to unexpected behaviour when the user tries to delete that port from the span entry. Fix this by initializing the reference count to 1. Also add a warning to put function. Fixes: 763b4b70afcd ("mlxsw: spectrum: Add support in matchall mirror TC offloading") Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1ec0a4ce3c46..dda5761e91bc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -231,7 +231,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port) span_entry->used = true; span_entry->id = index; - span_entry->ref_count = 0; + span_entry->ref_count = 1; span_entry->local_port = local_port; return span_entry; } @@ -270,6 +270,7 @@ static struct mlxsw_sp_span_entry span_entry = mlxsw_sp_span_entry_find(port); if (span_entry) { + /* Already exists, just take a reference */ span_entry->ref_count++; return span_entry; } @@ -280,6 +281,7 @@ static struct mlxsw_sp_span_entry static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_span_entry *span_entry) { + WARN_ON(!span_entry->ref_count); if (--span_entry->ref_count == 0) mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); return 0; From 42cdb338f40a98e6558bae35456fe86b6e90e1ef Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Fri, 11 Nov 2016 16:34:26 +0100 Subject: [PATCH 194/503] mlxsw: spectrum_router: Correctly dump neighbour activity The device's neighbour table is periodically dumped in order to update the kernel about active neighbours. A single dump session may span multiple queries, until the response carries less records than requested or when a record (can contain up to four neighbour entries) is not full. Current code stops the session when the number of returned records is zero, which can result in infinite loop in case of high packet rate. Fix this by stopping the session according to the above logic. Fixes: c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table") Signed-off-by: Arkadi Sharshevsky Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 040737e14a3f..cbeeddd70c5a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -800,6 +800,26 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, } } +static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) +{ + u8 num_rec, last_rec_index, num_entries; + + num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); + last_rec_index = num_rec - 1; + + if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM) + return false; + if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) == + MLXSW_REG_RAUHTD_TYPE_IPV6) + return true; + + num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, + last_rec_index); + if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC) + return true; + return false; +} + static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) { char *rauhtd_pl; @@ -826,7 +846,7 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) for (i = 0; i < num_rec; i++) mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, i); - } while (num_rec); + } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); rtnl_unlock(); kfree(rauhtd_pl); From 46d054f8f540612f09987a53154aa39ae15f2e4c Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Fri, 11 Nov 2016 15:56:51 +0000 Subject: [PATCH 195/503] sfc: clear napi_hash state when copying channels efx_copy_channel() doesn't correctly clear the napi_hash related state. This means that when napi_hash_add is called for that channel nothing is done, and we are left with a copy of the napi_hash_node from the old channel. When we later call napi_hash_del() on this channel we have a stale napi_hash_node. Corruption is only seen when there are multiple entries in one of the napi_hash lists. This is made more likely by having a very large number of channels. Testing was carried out with 512 channels - 32 channels on each of 16 ports. This failure typically appears as protection faults within napi_by_id() or napi_hash_add(). efx_copy_channel() is only used when tx or rx ring sizes are changed (ethtool -G). Fixes: 36763266bbe8 ("sfc: Add support for busy polling") Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 3cf3557106c2..6b89e4a7b164 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -485,6 +485,9 @@ efx_copy_channel(const struct efx_channel *old_channel) *channel = *old_channel; channel->napi_dev = NULL; + INIT_HLIST_NODE(&channel->napi_str.napi_hash_node); + channel->napi_str.napi_id = 0; + channel->napi_str.state = 0; memset(&channel->eventq, 0, sizeof(channel->eventq)); for (j = 0; j < EFX_TXQ_TYPES; j++) { From b7f193da17fb18b752bef77ce52eb49723299bd8 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Fri, 11 Nov 2016 11:00:45 -0600 Subject: [PATCH 196/503] ibmvnic: Unmap ibmvnic_statistics structure This structure was mapped but never subsequently unmapped. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index f6c9b6d38ac7..921c40fad1c3 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3844,6 +3844,9 @@ static int ibmvnic_remove(struct vio_dev *dev) if (adapter->debugfs_dir && !IS_ERR(adapter->debugfs_dir)) debugfs_remove_recursive(adapter->debugfs_dir); + dma_unmap_single(&dev->dev, adapter->stats_token, + sizeof(struct ibmvnic_statistics), DMA_FROM_DEVICE); + if (adapter->ras_comps) dma_free_coherent(&dev->dev, adapter->ras_comp_num * From e1fac0adf0f9b2c1eb49e658e6ed070a744bbaef Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Fri, 11 Nov 2016 11:00:46 -0600 Subject: [PATCH 197/503] ibmvnic: Fix size of debugfs name buffer This mistake was causing debugfs directory creation failures when multiple ibmvnic devices were probed. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 921c40fad1c3..4f3281a03e7e 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3705,7 +3705,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) struct net_device *netdev; unsigned char *mac_addr_p; struct dentry *ent; - char buf[16]; /* debugfs name buf */ + char buf[17]; /* debugfs name buf */ int rc; dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n", From 7774d46b2037b98d3f7e414bffb1d53082dc139b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 12 Nov 2016 17:44:06 +0000 Subject: [PATCH 198/503] net: ethernet: ixp4xx_eth: fix spelling mistake in debug message Trivial fix to spelling mistake "successed" to "succeeded" in debug message. Also unwrap multi-line literal string. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/xscale/ixp4xx_eth.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 7f127dc1b7ba..fa32391720fe 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -708,8 +708,7 @@ static int eth_poll(struct napi_struct *napi, int budget) if (!qmgr_stat_below_low_watermark(rxq) && napi_reschedule(napi)) { /* not empty again */ #if DEBUG_RX - printk(KERN_DEBUG "%s: eth_poll" - " napi_reschedule successed\n", + printk(KERN_DEBUG "%s: eth_poll napi_reschedule succeeded\n", dev->name); #endif qmgr_disable_irq(rxq); From cedecbc5e0f39d2987b8e1004908e90459a82e78 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Aug 2016 09:48:00 +0000 Subject: [PATCH 199/503] ntb_pingpong: Fix db_init parameter description Fix 'db_init' parameter description. Signed-off-by: Wei Yongjun Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_pingpong.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c index 7d311799fca1..435861189d97 100644 --- a/drivers/ntb/test/ntb_pingpong.c +++ b/drivers/ntb/test/ntb_pingpong.c @@ -88,7 +88,7 @@ MODULE_PARM_DESC(delay_ms, "Milliseconds to delay the response to peer"); static unsigned long db_init = 0x7; module_param(db_init, ulong, 0644); -MODULE_PARM_DESC(delay_ms, "Initial doorbell bits to ring on the peer"); +MODULE_PARM_DESC(db_init, "Initial doorbell bits to ring on the peer"); struct pp_ctx { struct ntb_dev *ntb; From 49b89de41f8d97eb13a60c1865ed61fbebed0d15 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Aug 2016 09:48:42 +0000 Subject: [PATCH 200/503] NTB: ntb_hw_intel: Fix typo in module parameter descriptions Fix typo in module parameter descriptions. Signed-off-by: Wei Yongjun Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 0d5c29ae51de..1ee61d92c54b 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -112,17 +112,17 @@ MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, module_param_named(xeon_b2b_usd_bar4_addr64, xeon_b2b_usd_addr.bar4_addr64, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_usd_bar4_addr64, "XEON B2B USD BAR 4 64-bit address"); module_param_named(xeon_b2b_usd_bar4_addr32, xeon_b2b_usd_addr.bar4_addr32, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_usd_bar4_addr32, "XEON B2B USD split-BAR 4 32-bit address"); module_param_named(xeon_b2b_usd_bar5_addr32, xeon_b2b_usd_addr.bar5_addr32, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_usd_bar5_addr32, "XEON B2B USD split-BAR 5 32-bit address"); module_param_named(xeon_b2b_dsd_bar2_addr64, @@ -132,17 +132,17 @@ MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, module_param_named(xeon_b2b_dsd_bar4_addr64, xeon_b2b_dsd_addr.bar4_addr64, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_dsd_bar4_addr64, "XEON B2B DSD BAR 4 64-bit address"); module_param_named(xeon_b2b_dsd_bar4_addr32, xeon_b2b_dsd_addr.bar4_addr32, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_dsd_bar4_addr32, "XEON B2B DSD split-BAR 4 32-bit address"); module_param_named(xeon_b2b_dsd_bar5_addr32, xeon_b2b_dsd_addr.bar5_addr32, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_dsd_bar5_addr32, "XEON B2B DSD split-BAR 5 32-bit address"); #ifndef ioread64 From c0a88032ef8e6814d4dd84551e5f333c1de639b3 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 22 Aug 2016 18:51:35 +0200 Subject: [PATCH 201/503] ntb_transport: make DMA_OUT_RESOURCE_TO HZ independent schedule_timeout_* takes a timeout in jiffies but the code currently is passing in a constant which makes this timeout HZ dependent, so pass it through msecs_to_jiffies() to fix this up. Signed-off-by: Nicholas Mc Guire Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 8601c10acf74..4eb8adb34508 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -257,7 +257,7 @@ enum { #define NTB_QP_DEF_NUM_ENTRIES 100 #define NTB_LINK_DOWN_TIMEOUT 10 #define DMA_RETRIES 20 -#define DMA_OUT_RESOURCE_TO 50 +#define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50) static void ntb_transport_rxc_db(unsigned long data); static const struct ntb_ctx_ops ntb_transport_ops; From cdc08982a5f334cecc15d802464588115512cc36 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 22 Aug 2016 18:51:36 +0200 Subject: [PATCH 202/503] ntb: make DMA_OUT_RESOURCE_TO HZ independent schedule_timeout_* takes a timeout in jiffies but the code currently is passing in a constant which makes this timeout HZ dependent, so pass it through msecs_to_jiffies() to fix this up. Signed-off-by: Nicholas Mc Guire Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 6a50f20bf1cd..e065b695200d 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -72,7 +72,7 @@ #define MAX_THREADS 32 #define MAX_TEST_SIZE SZ_1M #define MAX_SRCS 32 -#define DMA_OUT_RESOURCE_TO 50 +#define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50) #define DMA_RETRIES 20 #define SZ_4G (1ULL << 32) #define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */ From 25ea9f2bf5f76082da919f2a91ea8d920932c1da Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 27 Oct 2016 11:06:44 -0700 Subject: [PATCH 203/503] ntb: ntb_hw_intel: init peer_addr in struct intel_ntb_dev The peer_addr member of intel_ntb_dev is not set, therefore when acquiring ntb_peer_db and ntb_peer_spad we only get the offset rather than the actual physical address. Adding fix to correct that. Signed-off-by: Dave Jiang Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 1ee61d92c54b..7310a261c858 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -1755,6 +1755,8 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev, XEON_B2B_MIN_SIZE); if (!ndev->peer_mmio) return -EIO; + + ndev->peer_addr = pci_resource_start(pdev, b2b_bar); } return 0; @@ -2019,6 +2021,7 @@ static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev) goto err_mmio; } ndev->peer_mmio = ndev->self_mmio; + ndev->peer_addr = pci_resource_start(pdev, 0); return 0; From 819baf885953b588b63bef28e5598daf9ed4ddf9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2016 10:34:18 +0300 Subject: [PATCH 204/503] ntb_perf: potential info leak in debugfs This is a static checker warning, not something I'm desperately concerned about. But snprintf() returns the number of bytes that would have been copied if there were space. We really care about the number of bytes that actually were copied so we should use scnprintf() instead. It probably won't overrun, and in that case we may as well just use sprintf() but these sorts of things make static checkers and code reviewers happier. Signed-off-by: Dan Carpenter Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index e065b695200d..e75d4fdc0866 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -589,7 +589,7 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, return -ENOMEM; if (mutex_is_locked(&perf->run_mutex)) { - out_off = snprintf(buf, 64, "running\n"); + out_off = scnprintf(buf, 64, "running\n"); goto read_from_buf; } @@ -600,14 +600,14 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, break; if (pctx->status) { - out_off += snprintf(buf + out_off, 1024 - out_off, + out_off += scnprintf(buf + out_off, 1024 - out_off, "%d: error %d\n", i, pctx->status); continue; } rate = div64_u64(pctx->copied, pctx->diff_us); - out_off += snprintf(buf + out_off, 1024 - out_off, + out_off += scnprintf(buf + out_off, 1024 - out_off, "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", i, pctx->copied, pctx->diff_us, rate); } From fa14a0acea1ffe67913ba384a2897130a36dfe03 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 1 Nov 2016 18:36:46 +0200 Subject: [PATCH 205/503] nvmet-rdma: Fix possible NULL deref when handling rdma cm events When we initiate queue teardown sequence we call rdma_destroy_qp which clears cm_id->qp, afterwards we call rdma_destroy_id, but we might see a rdma_cm event in between with a cleared cm_id->qp so watch out for that and silently ignore the event because this means that the queue teardown sequence is in progress. Signed-off-by: Bart Van Assche Signed-off-by: Sagi Grimberg --- drivers/nvme/target/rdma.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index f8d23999e0f2..cf60759cc169 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1352,7 +1352,13 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_ADDR_CHANGE: case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_TIMEWAIT_EXIT: - nvmet_rdma_queue_disconnect(queue); + /* + * We might end up here when we already freed the qp + * which means queue release sequence is in progress, + * so don't get in the way... + */ + if (queue) + nvmet_rdma_queue_disconnect(queue); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: ret = nvmet_rdma_device_removal(cm_id, queue); From 553cd9ef82edd811948782a8f73ae73c4bfeedd3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Nov 2016 08:49:18 -0600 Subject: [PATCH 206/503] nvme-rdma: reject non-connect commands before the queue is live If we reconncect we might have command queue up that get resent as soon as the queue is restarted. But until the connect command succeeded we can't send other command. Add a new flag that marks a queue as live when connect finishes, and delay any non-connect command until the queue is live based on it. Signed-off-by: Christoph Hellwig Reported-by: Steve Wise Tested-by: Steve Wise [sagig: fixes admin queue LIVE setting] Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 5a8388177959..438d6948895f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -83,6 +83,7 @@ enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), NVME_RDMA_Q_DELETING = (1 << 2), + NVME_RDMA_Q_LIVE = (1 << 3), }; struct nvme_rdma_queue { @@ -626,6 +627,7 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) break; + set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); } return ret; @@ -712,6 +714,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) if (ret) goto stop_admin_q; + set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); + ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); if (ret) goto stop_admin_q; @@ -761,8 +765,10 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) nvme_stop_keep_alive(&ctrl->ctrl); - for (i = 0; i < ctrl->queue_count; i++) + for (i = 0; i < ctrl->queue_count; i++) { clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags); + clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); + } if (ctrl->queue_count > 1) nvme_stop_queues(&ctrl->ctrl); @@ -1378,6 +1384,24 @@ nvme_rdma_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; } +/* + * We cannot accept any other command until the Connect command has completed. + */ +static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, + struct request *rq) +{ + if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { + struct nvme_command *cmd = (struct nvme_command *)rq->cmd; + + if (rq->cmd_type != REQ_TYPE_DRV_PRIV || + cmd->common.opcode != nvme_fabrics_command || + cmd->fabrics.fctype != nvme_fabrics_type_connect) + return false; + } + + return true; +} + static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1394,6 +1418,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); + if (!nvme_rdma_queue_is_ready(queue, rq)) + return BLK_MQ_RQ_QUEUE_BUSY; + dev = queue->device->dev; ib_dma_sync_single_for_cpu(dev, sqe->dma, sizeof(struct nvme_command), DMA_TO_DEVICE); @@ -1544,6 +1571,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) if (error) goto out_cleanup_queue; + set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); + error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); if (error) { dev_err(ctrl->ctrl.device, From 8242ddac1bfcf6eb8873b4d0a4e7a172c2b5b625 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 6 Nov 2016 11:03:30 +0200 Subject: [PATCH 207/503] nvmet: Don't queue fatal error work if csts.cfs is set In the transport, in case of an interal queue error like error completion in rdma we trigger a fatal error. However, multiple queues in the same controller can serr error completions and we don't want to trigger fatal error work more than once. Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b4cacb6f0258..a21437a33adb 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -838,9 +838,13 @@ static void nvmet_fatal_error_handler(struct work_struct *work) void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) { - ctrl->csts |= NVME_CSTS_CFS; - INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); - schedule_work(&ctrl->fatal_err_work); + mutex_lock(&ctrl->lock); + if (!(ctrl->csts & NVME_CSTS_CFS)) { + ctrl->csts |= NVME_CSTS_CFS; + INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); + schedule_work(&ctrl->fatal_err_work); + } + mutex_unlock(&ctrl->lock); } EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); From 766dbb179d41d6337fed2b3ca00caa5845d298ce Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 6 Nov 2016 11:09:49 +0200 Subject: [PATCH 208/503] nvmet-rdma: don't forget to delete a queue from the list of connection failed In case we accepted a queue connection and it failed, we might not remove the queue from the list until we unload and clean it up. We should delete it from the queue list on the relevant handler. Signed-off-by: Sagi Grimberg --- drivers/nvme/target/rdma.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index cf60759cc169..8c06675c2305 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1066,6 +1066,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, spin_lock_init(&queue->rsp_wr_wait_lock); INIT_LIST_HEAD(&queue->free_rsps); spin_lock_init(&queue->rsps_lock); + INIT_LIST_HEAD(&queue->queue_list); queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL); if (queue->idx < 0) { @@ -1269,7 +1270,12 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, { WARN_ON_ONCE(queue->state != NVMET_RDMA_Q_CONNECTING); - pr_err("failed to connect queue\n"); + mutex_lock(&nvmet_rdma_queue_mutex); + if (!list_empty(&queue->queue_list)) + list_del_init(&queue->queue_list); + mutex_unlock(&nvmet_rdma_queue_mutex); + + pr_err("failed to connect queue %d\n", queue->idx); schedule_work(&queue->release_work); } From c8dbc37cd81d4705fce51123f5d81ea3267a5b88 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 8 Nov 2016 09:16:02 -0800 Subject: [PATCH 209/503] nvme-rdma: stop and free io queues on connect failure While testing nvme-rdma with the spdk nvmf target over iw_cxgb4, I configured the target (mistakenly) to generate an error creating the NVMF IO queues. This resulted a "Invalid SQE Parameter" error sent back to the host on the first IO queue connect: [ 9610.928182] nvme nvme1: queue_size 128 > ctrl maxcmd 120, clamping down [ 9610.938745] nvme nvme1: creating 32 I/O queues. So nvmf_connect_io_queue() returns an error to nvmf_connect_io_queue() / nvmf_connect_io_queues(), and that is returned to nvme_rdma_create_io_queues(). In the error path, nvmf_rdma_create_io_queues() frees the queue tagset memory _before_ stopping and freeing the IB queues, which causes yet another touch-after-free crash due to SQ CQEs being flushed after the ib_cqe structs pointed-to by the flushed WRs have been freed (since they are part of the nvme_rdma_request struct). The fix is to stop and free the queues in nvmf_connect_io_queues() if there is an error connecting any of the queues. Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 438d6948895f..3d25add36d91 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -625,11 +625,18 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) for (i = 1; i < ctrl->queue_count; i++) { ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) - break; + if (ret) { + dev_info(ctrl->ctrl.device, + "failed to connect i/o queue: %d\n", ret); + goto out_free_queues; + } set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); } + return 0; + +out_free_queues: + nvme_rdma_free_io_queues(ctrl); return ret; } From 14c862dbb0a0e0a9baec20480d441e32cb54b2b9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 6 Nov 2016 11:03:59 +0200 Subject: [PATCH 210/503] nvmet-rdma: drain the queue-pair just before freeing it draining the qp right after disconnect might not suffice because the nvmet sq is not fully drained (in nvmet_sq_destroy) and we might see completions after the drain. Instead, drain right before the qp destroy which comes after the sq destruction and we can be sure that no posts come after the drain. Tested-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/target/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 8c06675c2305..005ef5d17a19 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -951,6 +951,7 @@ err_destroy_cq: static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) { + ib_drain_qp(queue->cm_id->qp); rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->cq); } @@ -1245,7 +1246,6 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) if (disconnect) { rdma_disconnect(queue->cm_id); - ib_drain_qp(queue->cm_id->qp); schedule_work(&queue->release_work); } } From f732c5b7c734cfc2c563c918fe2842175c7eb073 Mon Sep 17 00:00:00 2001 From: Axl-zhang Date: Wed, 2 Nov 2016 13:31:12 +0800 Subject: [PATCH 211/503] dmaengine: sun6i: fix the uninitialized value for v_lli dma_pool_alloc does not initialize the value of the newly allocated block for the v_lli, and the uninitilize value make the tests failed which is on pine64 with dmatest. we can fix it just change the "|=" to "=" for the v_lli->cfg. Signed-off-by: Hao Zhang Acked-by: Maxime Ripard Signed-off-by: Vinod Koul --- drivers/dma/sun6i-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 83461994e418..a2358780ab2c 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -578,7 +578,7 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( burst = convert_burst(8); width = convert_buswidth(DMA_SLAVE_BUSWIDTH_4_BYTES); - v_lli->cfg |= DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | + v_lli->cfg = DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_DST_LINEAR_MODE | DMA_CHAN_CFG_SRC_LINEAR_MODE | From 12f5908080bdccca2cb2f7ad850cb360c92f481a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 9 Nov 2016 09:47:58 -0700 Subject: [PATCH 212/503] dmaengine: cppi41: Fix list not empty warning on module removal If musb controller is configured with USB peripherals and we have enumerated with a USB host, we can get warnings on removal of the modules: WARNING: CPU: 0 PID: 1269 at drivers/dma/cppi41.c:391 cppi41_dma_free_chan_resources Fix the issue by adding the missing pm_runtime_get to cppi41_dma_free_chan_resources to make sure the pending work list is cleared on removal. Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Signed-off-by: Tony Lindgren Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index bac5f023013b..6ed99d926358 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -1072,7 +1072,12 @@ err_get_sync: static int cppi41_dma_remove(struct platform_device *pdev) { struct cppi41_dd *cdd = platform_get_drvdata(pdev); + int error; + error = pm_runtime_get_sync(&pdev->dev); + if (error < 0) + dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n", + __func__, error); of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&cdd->ddev); From 098de42ad6708866501a00155ba85350bc0b29e5 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 9 Nov 2016 09:47:59 -0700 Subject: [PATCH 213/503] dmaengine: cppi41: Fix unpaired pm runtime when only a USB hub is connected On am335x with musb host we can end up with unpaired pm runtime calls if a hub with no devices is connected and disconnected. This is because of the conditional pm runtime calls which are always a bad idea. Let's fix the issue by making them unconditional and paired in each function. Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Signed-off-by: Tony Lindgren Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index 6ed99d926358..3d2d8b5a6c91 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -318,6 +318,11 @@ static irqreturn_t cppi41_irq(int irq, void *data) while (val) { u32 desc, len; + status = pm_runtime_get(cdd->ddev.dev); + if (status < 0) + dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", + __func__, status); + q_num = __fls(val); val &= ~(1 << q_num); q_num += 32 * i; @@ -338,7 +343,6 @@ static irqreturn_t cppi41_irq(int irq, void *data) dma_cookie_complete(&c->txd); dmaengine_desc_get_callback_invoke(&c->txd, NULL); - /* Paired with cppi41_dma_issue_pending */ pm_runtime_mark_last_busy(cdd->ddev.dev); pm_runtime_put_autosuspend(cdd->ddev.dev); } @@ -460,7 +464,6 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan) struct cppi41_dd *cdd = c->cdd; int error; - /* PM runtime paired with dmaengine_desc_get_callback_invoke */ error = pm_runtime_get(cdd->ddev.dev); if ((error != -EINPROGRESS) && error < 0) { dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n", @@ -473,6 +476,9 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan) push_desc_queue(c); else pending_desc(c); + + pm_runtime_mark_last_busy(cdd->ddev.dev); + pm_runtime_put_autosuspend(cdd->ddev.dev); } static u32 get_host_pd0(u32 length) From 740b4be3f742100ea66f0f9ee9715b10ee304a90 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 11 Nov 2016 11:28:52 -0800 Subject: [PATCH 214/503] dmaengine: cpp41: Fix handling of error path If we return early on pm_runtime_get() error, we need to also call pm_runtime_put_noidle() as pointed out in a musb related thread by Johan Hovold . This is to keep the PM runtime use counts happy. Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Cc: Johan Hovold Signed-off-by: Tony Lindgren Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index 3d2d8b5a6c91..4b52126c13cf 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -366,8 +366,11 @@ static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) int error; error = pm_runtime_get_sync(cdd->ddev.dev); - if (error < 0) + if (error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); + return error; + } dma_cookie_init(chan); dma_async_tx_descriptor_init(&c->txd, chan); @@ -389,8 +392,11 @@ static void cppi41_dma_free_chan_resources(struct dma_chan *chan) int error; error = pm_runtime_get_sync(cdd->ddev.dev); - if (error < 0) + if (error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); + return; + } WARN_ON(!list_empty(&cdd->pending)); @@ -466,6 +472,7 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan) error = pm_runtime_get(cdd->ddev.dev); if ((error != -EINPROGRESS) && error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n", error); From ea08e39230e898844d9de5b60cdbb30067cebfe7 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 11 Nov 2016 13:16:22 -0500 Subject: [PATCH 215/503] sunrpc: svc_age_temp_xprts_now should not call setsockopt non-tcp transports This fixes the following panic that can occur with NFSoRDMA. general protection fault: 0000 [#1] SMP Modules linked in: rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm mlx5_ib ib_core intel_powerclamp coretemp kvm_intel kvm sg ioatdma ipmi_devintf ipmi_ssif dcdbas iTCO_wdt iTCO_vendor_support pcspkr irqbypass sb_edac shpchp dca crc32_pclmul ghash_clmulni_intel edac_core lpc_ich aesni_intel lrw gf128mul glue_helper ablk_helper mei_me mei ipmi_si cryptd wmi ipmi_msghandler acpi_pad acpi_power_meter nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt ahci fb_sys_fops ttm libahci mlx5_core tg3 crct10dif_pclmul drm crct10dif_common ptp i2c_core libata crc32c_intel pps_core fjes dm_mirror dm_region_hash dm_log dm_mod CPU: 1 PID: 120 Comm: kworker/1:1 Not tainted 3.10.0-514.el7.x86_64 #1 Hardware name: Dell Inc. PowerEdge R320/0KM5PX, BIOS 2.4.2 01/29/2015 Workqueue: events check_lifetime task: ffff88031f506dd0 ti: ffff88031f584000 task.ti: ffff88031f584000 RIP: 0010:[] [] _raw_spin_lock_bh+0x17/0x50 RSP: 0018:ffff88031f587ba8 EFLAGS: 00010206 RAX: 0000000000020000 RBX: 20041fac02080072 RCX: ffff88031f587fd8 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 20041fac02080072 RBP: ffff88031f587bb0 R08: 0000000000000008 R09: ffffffff8155be77 R10: ffff880322a59b00 R11: ffffea000bf39f00 R12: 20041fac02080072 R13: 000000000000000d R14: ffff8800c4fbd800 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff880322a40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3c52d4547e CR3: 00000000019ba000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: 20041fac02080002 ffff88031f587bd0 ffffffff81557830 20041fac02080002 ffff88031f587c78 ffff88031f587c40 ffffffff8155ae08 000000010157df32 0000000800000001 ffff88031f587c20 ffffffff81096acb ffffffff81aa37d0 Call Trace: [] lock_sock_nested+0x20/0x50 [] sock_setsockopt+0x78/0x940 [] ? lock_timer_base.isra.33+0x2b/0x50 [] kernel_setsockopt+0x4d/0x50 [] svc_age_temp_xprts_now+0x174/0x1e0 [sunrpc] [] nfsd_inetaddr_event+0x9d/0xd0 [nfsd] [] notifier_call_chain+0x4c/0x70 [] __blocking_notifier_call_chain+0x4d/0x70 [] blocking_notifier_call_chain+0x16/0x20 [] __inet_del_ifa+0x168/0x2d0 [] check_lifetime+0x25f/0x270 [] process_one_work+0x17b/0x470 [] worker_thread+0x126/0x410 [] ? rescuer_thread+0x460/0x460 [] kthread+0xcf/0xe0 [] ? kthread_create_on_node+0x140/0x140 [] ret_from_fork+0x58/0x90 [] ? kthread_create_on_node+0x140/0x140 Code: ca 75 f1 5d c3 0f 1f 80 00 00 00 00 eb d9 66 0f 1f 44 00 00 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb e8 7e 04 a0 ff b8 00 00 02 00 0f c1 03 89 c2 c1 ea 10 66 39 c2 75 03 5b 5d c3 83 e2 fe 0f RIP [] _raw_spin_lock_bh+0x17/0x50 RSP Signed-off-by: Scott Mayhew Fixes: c3d4879e ("sunrpc: Add a function to close temporary transports immediately") Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 11 +---------- net/sunrpc/svcsock.c | 21 +++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_transport.c | 6 ++++++ 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index ab02a457da1f..e5d193440374 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -25,6 +25,7 @@ struct svc_xprt_ops { void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); int (*xpo_secure_port)(struct svc_rqst *); + void (*xpo_kill_temp_xprt)(struct svc_xprt *); }; struct svc_xprt_class { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c3f652395a80..3bc1d61694cb 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1002,14 +1002,8 @@ static void svc_age_temp_xprts(unsigned long closure) void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) { struct svc_xprt *xprt; - struct svc_sock *svsk; - struct socket *sock; struct list_head *le, *next; LIST_HEAD(to_be_closed); - struct linger no_linger = { - .l_onoff = 1, - .l_linger = 0, - }; spin_lock_bh(&serv->sv_lock); list_for_each_safe(le, next, &serv->sv_tempsocks) { @@ -1027,10 +1021,7 @@ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) list_del_init(le); xprt = list_entry(le, struct svc_xprt, xpt_list); dprintk("svc_age_temp_xprts_now: closing %p\n", xprt); - svsk = container_of(xprt, struct svc_sock, sk_xprt); - sock = svsk->sk_sock; - kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, - (char *)&no_linger, sizeof(no_linger)); + xprt->xpt_ops->xpo_kill_temp_xprt(xprt); svc_close_xprt(xprt); } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 57625f64efd5..a4bc98265d88 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -438,6 +438,21 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt) return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); } +static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) +{ + struct svc_sock *svsk; + struct socket *sock; + struct linger no_linger = { + .l_onoff = 1, + .l_linger = 0, + }; + + svsk = container_of(xprt, struct svc_sock, sk_xprt); + sock = svsk->sk_sock; + kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, + (char *)&no_linger, sizeof(no_linger)); +} + /* * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo */ @@ -648,6 +663,10 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt) return NULL; } +static void svc_udp_kill_temp_xprt(struct svc_xprt *xprt) +{ +} + static struct svc_xprt *svc_udp_create(struct svc_serv *serv, struct net *net, struct sockaddr *sa, int salen, @@ -667,6 +686,7 @@ static struct svc_xprt_ops svc_udp_ops = { .xpo_has_wspace = svc_udp_has_wspace, .xpo_accept = svc_udp_accept, .xpo_secure_port = svc_sock_secure_port, + .xpo_kill_temp_xprt = svc_udp_kill_temp_xprt, }; static struct svc_xprt_class svc_udp_class = { @@ -1242,6 +1262,7 @@ static struct svc_xprt_ops svc_tcp_ops = { .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, .xpo_secure_port = svc_sock_secure_port, + .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, }; static struct svc_xprt_class svc_tcp_class = { diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 6864fb967038..1334de2715c2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -67,6 +67,7 @@ static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt); static int svc_rdma_has_wspace(struct svc_xprt *xprt); static int svc_rdma_secure_port(struct svc_rqst *); +static void svc_rdma_kill_temp_xprt(struct svc_xprt *); static struct svc_xprt_ops svc_rdma_ops = { .xpo_create = svc_rdma_create, @@ -79,6 +80,7 @@ static struct svc_xprt_ops svc_rdma_ops = { .xpo_has_wspace = svc_rdma_has_wspace, .xpo_accept = svc_rdma_accept, .xpo_secure_port = svc_rdma_secure_port, + .xpo_kill_temp_xprt = svc_rdma_kill_temp_xprt, }; struct svc_xprt_class svc_rdma_class = { @@ -1317,6 +1319,10 @@ static int svc_rdma_secure_port(struct svc_rqst *rqstp) return 1; } +static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) +{ +} + int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) { struct ib_send_wr *bad_wr, *n_wr; From b15efc38626f20f3fc8b831b826b50740d90dab9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 14 Nov 2016 11:14:37 -0200 Subject: [PATCH 216/503] gp8psk-fe: add missing MODULE_foo() macros This file was converted to a separate module at commit 7a0786c19d65 ("gp8psk: Fix DVB frontend attach"), because the DVB attach routines require it to work. However, I forgot to copy the MODULE_foo() macros from the original module, causing this warning: WARNING: modpost: missing MODULE_LICENSE() in drivers/media/dvb-frontends/gp8psk-fe.o Reported-by: Stephen Rothwell Fixes: 7a0786c19d65 ("gp8psk: Fix DVB frontend attach") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Linus Torvalds --- drivers/media/dvb-frontends/gp8psk-fe.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/media/dvb-frontends/gp8psk-fe.c b/drivers/media/dvb-frontends/gp8psk-fe.c index be19afeed7a9..93f59bfea092 100644 --- a/drivers/media/dvb-frontends/gp8psk-fe.c +++ b/drivers/media/dvb-frontends/gp8psk-fe.c @@ -1,5 +1,5 @@ -/* DVB USB compliant Linux driver for the - * - GENPIX 8pks/qpsk/DCII USB2.0 DVB-S module +/* + * Frontend driver for the GENPIX 8pks/qpsk/DCII USB2.0 DVB-S module * * Copyright (C) 2006,2007 Alan Nisota (alannisota@gmail.com) * Copyright (C) 2006,2007 Genpix Electronics (genpix@genpix-electronics.com) @@ -8,11 +8,9 @@ * * This module is based off the vp7045 and vp702x modules * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, version 2. - * - * see Documentation/dvb/README.dvb-usb for more information + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, version 2. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -395,3 +393,8 @@ static struct dvb_frontend_ops gp8psk_fe_ops = { .dishnetwork_send_legacy_command = gp8psk_fe_send_legacy_dish_cmd, .enable_high_lnb_voltage = gp8psk_fe_enable_high_lnb_voltage }; + +MODULE_AUTHOR("Alan Nisota "); +MODULE_DESCRIPTION("Frontend Driver for Genpix DVB-S"); +MODULE_VERSION("1.1"); +MODULE_LICENSE("GPL"); From 93d710a65ef02fb7fd48ae207e78f460bd7a6089 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 14 Nov 2016 15:34:17 +0100 Subject: [PATCH 217/503] i2c: mux: fix up dependencies We get the following build error from UM Linux after adding an entry to drivers/iio/gyro/Kconfig that issues "select I2C_MUX": ERROR: "devm_ioremap_resource" [drivers/i2c/muxes/i2c-mux-reg.ko] undefined! ERROR: "of_address_to_resource" [drivers/i2c/muxes/i2c-mux-reg.ko] undefined! It appears that the I2C mux core code depends on HAS_IOMEM for historical reasons, while CONFIG_I2C_MUX_REG does *not* have a direct dependency on HAS_IOMEM. This creates a situation where a allyesconfig or allmodconfig for UM Linux will select I2C_MUX, and will implicitly enable I2C_MUX_REG as well, and the compilation will fail for the register driver. Fix this up by making I2C_MUX_REG depend on HAS_IOMEM and removing the dependency from I2C_MUX. Reported-by: kbuild test robot Reported-by: Jonathan Cameron Signed-off-by: Linus Walleij Acked-by: Jonathan Cameron Acked-by: Peter Rosin Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/Kconfig | 1 - drivers/i2c/muxes/Kconfig | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index d223650a97e4..11edabf425ae 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -59,7 +59,6 @@ config I2C_CHARDEV config I2C_MUX tristate "I2C bus multiplexing support" - depends on HAS_IOMEM help Say Y here if you want the I2C core to support the ability to handle multiplexed I2C bus topologies, by presenting each diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig index e280c8ecc0b5..96de9ce5669b 100644 --- a/drivers/i2c/muxes/Kconfig +++ b/drivers/i2c/muxes/Kconfig @@ -63,6 +63,7 @@ config I2C_MUX_PINCTRL config I2C_MUX_REG tristate "Register-based I2C multiplexer" + depends on HAS_IOMEM help If you say yes to this option, support will be included for a register based I2C multiplexer. This driver provides access to From f5c9f9c72395c3291c2e35c905dedae2b98475a4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 14 Nov 2016 09:31:52 -0800 Subject: [PATCH 218/503] Revert "printk: make reading the kernel log flush pending lines" This reverts commit bfd8d3f23b51018388be0411ccbc2d56277fe294. It turns out that this flushes things much too aggressiverly, and causes lines to break up when the system logger races with new continuation lines being printed. There's a pending patch to make printk() flushing much more straightforward, but it's too invasive for 4.9, so in the meantime let's just not make the system message logging flush continuation lines. They'll be flushed by the final newline anyway. Suggested-by: Petr Mladek Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5028f4fd504a..f7a55e9ff2f7 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -783,8 +783,6 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) return ret; } -static void cont_flush(void); - static ssize_t devkmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -800,7 +798,6 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, if (ret) return ret; raw_spin_lock_irq(&logbuf_lock); - cont_flush(); while (user->seq == log_next_seq) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; @@ -863,7 +860,6 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) return -ESPIPE; raw_spin_lock_irq(&logbuf_lock); - cont_flush(); switch (whence) { case SEEK_SET: /* the first record */ @@ -902,7 +898,6 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); raw_spin_lock_irq(&logbuf_lock); - cont_flush(); if (user->seq < log_next_seq) { /* return error when data has vanished underneath us */ if (user->seq < log_first_seq) @@ -1289,7 +1284,6 @@ static int syslog_print(char __user *buf, int size) size_t skip; raw_spin_lock_irq(&logbuf_lock); - cont_flush(); if (syslog_seq < log_first_seq) { /* messages are gone, move to first one */ syslog_seq = log_first_seq; @@ -1349,7 +1343,6 @@ static int syslog_print_all(char __user *buf, int size, bool clear) return -ENOMEM; raw_spin_lock_irq(&logbuf_lock); - cont_flush(); if (buf) { u64 next_seq; u64 seq; @@ -1511,7 +1504,6 @@ int do_syslog(int type, char __user *buf, int len, int source) /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: raw_spin_lock_irq(&logbuf_lock); - cont_flush(); if (syslog_seq < log_first_seq) { /* messages are gone, move to first one */ syslog_seq = log_first_seq; @@ -3028,7 +3020,6 @@ void kmsg_dump(enum kmsg_dump_reason reason) dumper->active = true; raw_spin_lock_irqsave(&logbuf_lock, flags); - cont_flush(); dumper->cur_seq = clear_seq; dumper->cur_idx = clear_idx; dumper->next_seq = log_next_seq; @@ -3119,7 +3110,6 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, bool ret; raw_spin_lock_irqsave(&logbuf_lock, flags); - cont_flush(); ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); raw_spin_unlock_irqrestore(&logbuf_lock, flags); @@ -3162,7 +3152,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, goto out; raw_spin_lock_irqsave(&logbuf_lock, flags); - cont_flush(); if (dumper->cur_seq < log_first_seq) { /* messages are gone, move to first available one */ dumper->cur_seq = log_first_seq; From ee2bd216e1fa9fa980e6ac702e5973d157c40c48 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 14 Nov 2016 09:46:08 -0800 Subject: [PATCH 219/503] ASoC: lpass-platform: fix uninitialized variable In commit 022d00ee0b55 ("ASoC: lpass-platform: Fix broken pcm data usage") the stream specific information initialization was broken, with the dma channel information not being initialized if there was no alloc_dma_channel() helper function. Before that, the DMA channel number was implicitly initialized to zero because the backing store was allocated with devm_kzalloc(). When the init code was rewritten, that implicit initialization was lost, and gcc rightfully complains about an uninitialized variable being used. Cc: Srinivas Kandagatla Cc: Mark Brown Signed-off-by: Linus Torvalds --- sound/soc/qcom/lpass-platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index 07000f53db44..b392e51de94d 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -75,6 +75,7 @@ static int lpass_platform_pcmops_open(struct snd_pcm_substream *substream) data->i2s_port = cpu_dai->driver->id; runtime->private_data = data; + dma_ch = 0; if (v->alloc_dma_channel) dma_ch = v->alloc_dma_channel(drvdata, dir); if (dma_ch < 0) From 7020637bdf59589a403e01aca128bef643404317 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 12 Nov 2016 17:20:30 +0000 Subject: [PATCH 220/503] ps3_gelic: fix spelling mistake in debug message Trivial fix to spelling mistake "unmached" to "unmatched" in debug message. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_wireless.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c index 446ea580ad42..928c1dca2673 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c @@ -1694,7 +1694,7 @@ struct gelic_wl_scan_info *gelic_wl_find_best_bss(struct gelic_wl_info *wl) pr_debug("%s: bssid matched\n", __func__); break; } else { - pr_debug("%s: bssid unmached\n", __func__); + pr_debug("%s: bssid unmatched\n", __func__); continue; } } From 87a349f9cc0908bc0cfac0c9ece3179f650ae95a Mon Sep 17 00:00:00 2001 From: Thomas Tai Date: Fri, 11 Nov 2016 16:41:00 -0800 Subject: [PATCH 221/503] sparc64: fix compile warning section mismatch in find_node() A compile warning is introduced by a commit to fix the find_node(). This patch fix the compile warning by moving find_node() into __init section. Because find_node() is only used by memblock_nid_range() which is only used by a __init add_node_ranges(). find_node() and memblock_nid_range() should also be inside __init section. Signed-off-by: Thomas Tai Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 068eb3dcbcb5..37aa537b3ad8 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -805,7 +805,7 @@ static int num_mblocks; static int find_numa_node_for_addr(unsigned long pa, struct node_mem_mask *pnode_mask); -static unsigned long ra_to_pa(unsigned long addr) +static unsigned long __init ra_to_pa(unsigned long addr) { int i; @@ -821,7 +821,7 @@ static unsigned long ra_to_pa(unsigned long addr) return addr; } -static int find_node(unsigned long addr) +static int __init find_node(unsigned long addr) { static bool search_mdesc = true; static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL }; @@ -858,7 +858,7 @@ static int find_node(unsigned long addr) return last_index; } -static u64 memblock_nid_range(u64 start, u64 end, int *nid) +static u64 __init memblock_nid_range(u64 start, u64 end, int *nid) { *nid = find_node(start); start += PAGE_SIZE; From e2174b0c24caca170ca61eda2ae49c9561ff8896 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Nov 2016 20:56:17 +0100 Subject: [PATCH 222/503] Revert "ACPICA: FADT support cleanup" Pavel Machek reports that commit 6ea8c546f365 (ACPICA: FADT support cleanup) breaks thermal management on his Thinkpad X60 and T40p, so revert it. Link: https://bugzilla.kernel.org/show_bug.cgi?id=187311 Fixes: 6ea8c546f365 (ACPICA: FADT support cleanup) Reported-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/tbfadt.c | 10 +-- include/acpi/actbl.h | 164 +++++++++++++++-------------------- 2 files changed, 74 insertions(+), 100 deletions(-) diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c index 046c4d0394ee..5fb838e592dc 100644 --- a/drivers/acpi/acpica/tbfadt.c +++ b/drivers/acpi/acpica/tbfadt.c @@ -480,19 +480,17 @@ static void acpi_tb_convert_fadt(void) u32 i; /* - * For ACPI 1.0 FADTs (revision 1), ensure that reserved fields which + * For ACPI 1.0 FADTs (revision 1 or 2), ensure that reserved fields which * should be zero are indeed zero. This will workaround BIOSs that * inadvertently place values in these fields. * * The ACPI 1.0 reserved fields that will be zeroed are the bytes located * at offset 45, 55, 95, and the word located at offset 109, 110. * - * Note: The FADT revision value is unreliable because of BIOS errors. - * The table length is instead used as the final word on the version. - * - * Note: FADT revision 3 is the ACPI 2.0 version of the FADT. + * Note: The FADT revision value is unreliable. Only the length can be + * trusted. */ - if (acpi_gbl_FADT.header.length <= ACPI_FADT_V3_SIZE) { + if (acpi_gbl_FADT.header.length <= ACPI_FADT_V2_SIZE) { acpi_gbl_FADT.preferred_profile = 0; acpi_gbl_FADT.pstate_control = 0; acpi_gbl_FADT.cst_control = 0; diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 1b949e08015c..c19700e2a2fe 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -230,72 +230,62 @@ struct acpi_table_facs { /* Fields common to all versions of the FADT */ struct acpi_table_fadt { - struct acpi_table_header header; /* [V1] Common ACPI table header */ - u32 facs; /* [V1] 32-bit physical address of FACS */ - u32 dsdt; /* [V1] 32-bit physical address of DSDT */ - u8 model; /* [V1] System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */ - u8 preferred_profile; /* [V1] Conveys preferred power management profile to OSPM. */ - u16 sci_interrupt; /* [V1] System vector of SCI interrupt */ - u32 smi_command; /* [V1] 32-bit Port address of SMI command port */ - u8 acpi_enable; /* [V1] Value to write to SMI_CMD to enable ACPI */ - u8 acpi_disable; /* [V1] Value to write to SMI_CMD to disable ACPI */ - u8 s4_bios_request; /* [V1] Value to write to SMI_CMD to enter S4BIOS state */ - u8 pstate_control; /* [V1] Processor performance state control */ - u32 pm1a_event_block; /* [V1] 32-bit port address of Power Mgt 1a Event Reg Blk */ - u32 pm1b_event_block; /* [V1] 32-bit port address of Power Mgt 1b Event Reg Blk */ - u32 pm1a_control_block; /* [V1] 32-bit port address of Power Mgt 1a Control Reg Blk */ - u32 pm1b_control_block; /* [V1] 32-bit port address of Power Mgt 1b Control Reg Blk */ - u32 pm2_control_block; /* [V1] 32-bit port address of Power Mgt 2 Control Reg Blk */ - u32 pm_timer_block; /* [V1] 32-bit port address of Power Mgt Timer Ctrl Reg Blk */ - u32 gpe0_block; /* [V1] 32-bit port address of General Purpose Event 0 Reg Blk */ - u32 gpe1_block; /* [V1] 32-bit port address of General Purpose Event 1 Reg Blk */ - u8 pm1_event_length; /* [V1] Byte Length of ports at pm1x_event_block */ - u8 pm1_control_length; /* [V1] Byte Length of ports at pm1x_control_block */ - u8 pm2_control_length; /* [V1] Byte Length of ports at pm2_control_block */ - u8 pm_timer_length; /* [V1] Byte Length of ports at pm_timer_block */ - u8 gpe0_block_length; /* [V1] Byte Length of ports at gpe0_block */ - u8 gpe1_block_length; /* [V1] Byte Length of ports at gpe1_block */ - u8 gpe1_base; /* [V1] Offset in GPE number space where GPE1 events start */ - u8 cst_control; /* [V1] Support for the _CST object and C-States change notification */ - u16 c2_latency; /* [V1] Worst case HW latency to enter/exit C2 state */ - u16 c3_latency; /* [V1] Worst case HW latency to enter/exit C3 state */ - u16 flush_size; /* [V1] Processor memory cache line width, in bytes */ - u16 flush_stride; /* [V1] Number of flush strides that need to be read */ - u8 duty_offset; /* [V1] Processor duty cycle index in processor P_CNT reg */ - u8 duty_width; /* [V1] Processor duty cycle value bit width in P_CNT register */ - u8 day_alarm; /* [V1] Index to day-of-month alarm in RTC CMOS RAM */ - u8 month_alarm; /* [V1] Index to month-of-year alarm in RTC CMOS RAM */ - u8 century; /* [V1] Index to century in RTC CMOS RAM */ - u16 boot_flags; /* [V3] IA-PC Boot Architecture Flags (see below for individual flags) */ - u8 reserved; /* [V1] Reserved, must be zero */ - u32 flags; /* [V1] Miscellaneous flag bits (see below for individual flags) */ - /* End of Version 1 FADT fields (ACPI 1.0) */ - - struct acpi_generic_address reset_register; /* [V3] 64-bit address of the Reset register */ - u8 reset_value; /* [V3] Value to write to the reset_register port to reset the system */ - u16 arm_boot_flags; /* [V5] ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */ - u8 minor_revision; /* [V5] FADT Minor Revision (ACPI 5.1) */ - u64 Xfacs; /* [V3] 64-bit physical address of FACS */ - u64 Xdsdt; /* [V3] 64-bit physical address of DSDT */ - struct acpi_generic_address xpm1a_event_block; /* [V3] 64-bit Extended Power Mgt 1a Event Reg Blk address */ - struct acpi_generic_address xpm1b_event_block; /* [V3] 64-bit Extended Power Mgt 1b Event Reg Blk address */ - struct acpi_generic_address xpm1a_control_block; /* [V3] 64-bit Extended Power Mgt 1a Control Reg Blk address */ - struct acpi_generic_address xpm1b_control_block; /* [V3] 64-bit Extended Power Mgt 1b Control Reg Blk address */ - struct acpi_generic_address xpm2_control_block; /* [V3] 64-bit Extended Power Mgt 2 Control Reg Blk address */ - struct acpi_generic_address xpm_timer_block; /* [V3] 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */ - struct acpi_generic_address xgpe0_block; /* [V3] 64-bit Extended General Purpose Event 0 Reg Blk address */ - struct acpi_generic_address xgpe1_block; /* [V3] 64-bit Extended General Purpose Event 1 Reg Blk address */ - /* End of Version 3 FADT fields (ACPI 2.0) */ - - struct acpi_generic_address sleep_control; /* [V4] 64-bit Sleep Control register (ACPI 5.0) */ - /* End of Version 4 FADT fields (ACPI 3.0 and ACPI 4.0) (Field was originally reserved in ACPI 3.0) */ - - struct acpi_generic_address sleep_status; /* [V5] 64-bit Sleep Status register (ACPI 5.0) */ - /* End of Version 5 FADT fields (ACPI 5.0) */ - - u64 hypervisor_id; /* [V6] Hypervisor Vendor ID (ACPI 6.0) */ - /* End of Version 6 FADT fields (ACPI 6.0) */ - + struct acpi_table_header header; /* Common ACPI table header */ + u32 facs; /* 32-bit physical address of FACS */ + u32 dsdt; /* 32-bit physical address of DSDT */ + u8 model; /* System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */ + u8 preferred_profile; /* Conveys preferred power management profile to OSPM. */ + u16 sci_interrupt; /* System vector of SCI interrupt */ + u32 smi_command; /* 32-bit Port address of SMI command port */ + u8 acpi_enable; /* Value to write to SMI_CMD to enable ACPI */ + u8 acpi_disable; /* Value to write to SMI_CMD to disable ACPI */ + u8 s4_bios_request; /* Value to write to SMI_CMD to enter S4BIOS state */ + u8 pstate_control; /* Processor performance state control */ + u32 pm1a_event_block; /* 32-bit port address of Power Mgt 1a Event Reg Blk */ + u32 pm1b_event_block; /* 32-bit port address of Power Mgt 1b Event Reg Blk */ + u32 pm1a_control_block; /* 32-bit port address of Power Mgt 1a Control Reg Blk */ + u32 pm1b_control_block; /* 32-bit port address of Power Mgt 1b Control Reg Blk */ + u32 pm2_control_block; /* 32-bit port address of Power Mgt 2 Control Reg Blk */ + u32 pm_timer_block; /* 32-bit port address of Power Mgt Timer Ctrl Reg Blk */ + u32 gpe0_block; /* 32-bit port address of General Purpose Event 0 Reg Blk */ + u32 gpe1_block; /* 32-bit port address of General Purpose Event 1 Reg Blk */ + u8 pm1_event_length; /* Byte Length of ports at pm1x_event_block */ + u8 pm1_control_length; /* Byte Length of ports at pm1x_control_block */ + u8 pm2_control_length; /* Byte Length of ports at pm2_control_block */ + u8 pm_timer_length; /* Byte Length of ports at pm_timer_block */ + u8 gpe0_block_length; /* Byte Length of ports at gpe0_block */ + u8 gpe1_block_length; /* Byte Length of ports at gpe1_block */ + u8 gpe1_base; /* Offset in GPE number space where GPE1 events start */ + u8 cst_control; /* Support for the _CST object and C-States change notification */ + u16 c2_latency; /* Worst case HW latency to enter/exit C2 state */ + u16 c3_latency; /* Worst case HW latency to enter/exit C3 state */ + u16 flush_size; /* Processor memory cache line width, in bytes */ + u16 flush_stride; /* Number of flush strides that need to be read */ + u8 duty_offset; /* Processor duty cycle index in processor P_CNT reg */ + u8 duty_width; /* Processor duty cycle value bit width in P_CNT register */ + u8 day_alarm; /* Index to day-of-month alarm in RTC CMOS RAM */ + u8 month_alarm; /* Index to month-of-year alarm in RTC CMOS RAM */ + u8 century; /* Index to century in RTC CMOS RAM */ + u16 boot_flags; /* IA-PC Boot Architecture Flags (see below for individual flags) */ + u8 reserved; /* Reserved, must be zero */ + u32 flags; /* Miscellaneous flag bits (see below for individual flags) */ + struct acpi_generic_address reset_register; /* 64-bit address of the Reset register */ + u8 reset_value; /* Value to write to the reset_register port to reset the system */ + u16 arm_boot_flags; /* ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */ + u8 minor_revision; /* FADT Minor Revision (ACPI 5.1) */ + u64 Xfacs; /* 64-bit physical address of FACS */ + u64 Xdsdt; /* 64-bit physical address of DSDT */ + struct acpi_generic_address xpm1a_event_block; /* 64-bit Extended Power Mgt 1a Event Reg Blk address */ + struct acpi_generic_address xpm1b_event_block; /* 64-bit Extended Power Mgt 1b Event Reg Blk address */ + struct acpi_generic_address xpm1a_control_block; /* 64-bit Extended Power Mgt 1a Control Reg Blk address */ + struct acpi_generic_address xpm1b_control_block; /* 64-bit Extended Power Mgt 1b Control Reg Blk address */ + struct acpi_generic_address xpm2_control_block; /* 64-bit Extended Power Mgt 2 Control Reg Blk address */ + struct acpi_generic_address xpm_timer_block; /* 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */ + struct acpi_generic_address xgpe0_block; /* 64-bit Extended General Purpose Event 0 Reg Blk address */ + struct acpi_generic_address xgpe1_block; /* 64-bit Extended General Purpose Event 1 Reg Blk address */ + struct acpi_generic_address sleep_control; /* 64-bit Sleep Control register (ACPI 5.0) */ + struct acpi_generic_address sleep_status; /* 64-bit Sleep Status register (ACPI 5.0) */ + u64 hypervisor_id; /* Hypervisor Vendor ID (ACPI 6.0) */ }; /* Masks for FADT IA-PC Boot Architecture Flags (boot_flags) [Vx]=Introduced in this FADT revision */ @@ -311,8 +301,8 @@ struct acpi_table_fadt { /* Masks for FADT ARM Boot Architecture Flags (arm_boot_flags) ACPI 5.1 */ -#define ACPI_FADT_PSCI_COMPLIANT (1) /* 00: [V5] PSCI 0.2+ is implemented */ -#define ACPI_FADT_PSCI_USE_HVC (1<<1) /* 01: [V5] HVC must be used instead of SMC as the PSCI conduit */ +#define ACPI_FADT_PSCI_COMPLIANT (1) /* 00: [V5+] PSCI 0.2+ is implemented */ +#define ACPI_FADT_PSCI_USE_HVC (1<<1) /* 01: [V5+] HVC must be used instead of SMC as the PSCI conduit */ /* Masks for FADT flags */ @@ -409,34 +399,20 @@ struct acpi_table_desc { * match the expected length. In other words, the length of the * FADT is the bottom line as to what the version really is. * - * NOTE: There is no officialy released V2 of the FADT. This - * version was used only for prototyping and testing during the - * 32-bit to 64-bit transition. V3 was the first official 64-bit - * version of the FADT. - * - * Update this list of defines when a new version of the FADT is - * added to the ACPI specification. Note that the FADT version is - * only incremented when new fields are appended to the existing - * version. Therefore, the FADT version is competely independent - * from the version of the ACPI specification where it is - * defined. - * - * For reference, the various FADT lengths are as follows: - * FADT V1 size: 0x074 ACPI 1.0 - * FADT V3 size: 0x0F4 ACPI 2.0 - * FADT V4 size: 0x100 ACPI 3.0 and ACPI 4.0 - * FADT V5 size: 0x10C ACPI 5.0 - * FADT V6 size: 0x114 ACPI 6.0 + * For reference, the values below are as follows: + * FADT V1 size: 0x074 + * FADT V2 size: 0x084 + * FADT V3 size: 0x0F4 + * FADT V4 size: 0x0F4 + * FADT V5 size: 0x10C + * FADT V6 size: 0x114 */ -#define ACPI_FADT_V1_SIZE (u32) (ACPI_FADT_OFFSET (flags) + 4) /* ACPI 1.0 */ -#define ACPI_FADT_V3_SIZE (u32) (ACPI_FADT_OFFSET (sleep_control)) /* ACPI 2.0 */ -#define ACPI_FADT_V4_SIZE (u32) (ACPI_FADT_OFFSET (sleep_status)) /* ACPI 3.0 and ACPI 4.0 */ -#define ACPI_FADT_V5_SIZE (u32) (ACPI_FADT_OFFSET (hypervisor_id)) /* ACPI 5.0 */ -#define ACPI_FADT_V6_SIZE (u32) (sizeof (struct acpi_table_fadt)) /* ACPI 6.0 */ +#define ACPI_FADT_V1_SIZE (u32) (ACPI_FADT_OFFSET (flags) + 4) +#define ACPI_FADT_V2_SIZE (u32) (ACPI_FADT_OFFSET (minor_revision) + 1) +#define ACPI_FADT_V3_SIZE (u32) (ACPI_FADT_OFFSET (sleep_control)) +#define ACPI_FADT_V5_SIZE (u32) (ACPI_FADT_OFFSET (hypervisor_id)) +#define ACPI_FADT_V6_SIZE (u32) (sizeof (struct acpi_table_fadt)) -/* Update these when new FADT versions are added */ - -#define ACPI_FADT_MAX_VERSION 6 #define ACPI_FADT_CONFORMANCE "ACPI 6.1 (FADT version 6)" #endif /* __ACTBL_H__ */ From 709fb1f961ea5c287107c3f903e81c9529224c8b Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 14 Nov 2016 12:31:49 -0800 Subject: [PATCH 223/503] xtensa: wire up new pkey_{mprotect,alloc,free} syscalls Signed-off-by: Max Filippov --- arch/xtensa/include/uapi/asm/unistd.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h index de9b14b2d348..cd400af4a6b2 100644 --- a/arch/xtensa/include/uapi/asm/unistd.h +++ b/arch/xtensa/include/uapi/asm/unistd.h @@ -767,7 +767,14 @@ __SYSCALL(346, sys_preadv2, 6) #define __NR_pwritev2 347 __SYSCALL(347, sys_pwritev2, 6) -#define __NR_syscall_count 348 +#define __NR_pkey_mprotect 348 +__SYSCALL(348, sys_pkey_mprotect, 4) +#define __NR_pkey_alloc 349 +__SYSCALL(349, sys_pkey_alloc, 2) +#define __NR_pkey_free 350 +__SYSCALL(350, sys_pkey_free, 1) + +#define __NR_syscall_count 351 /* * sysxtensa syscall handler From 1bc2f5fac34535aeb3878ce32a762a221be7a851 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Mon, 14 Nov 2016 12:55:15 +0100 Subject: [PATCH 224/503] ARM: dts: omap5: board-common: fix wrong SMPS6 (VDD-DDR3) voltage DDR3L is usually specified as JEDEC standard 1.35V(1.28V~1.45V) & 1.5V(1.425V~1.575V) Therefore setting smps6 regulator to 1.2V is definitively below minimum. It appears that real world chips are more forgiving than data sheets indicate, but let's set the regulator right. Note: a board that uses other voltages (DDR with 1.5V) can overwrite by referencing &smps6_reg. Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 60a33c4b7b82..4caadb253249 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -476,8 +476,8 @@ smps6_reg: smps6 { /* VDD_DDR3 - over VDD_SMPS6 */ regulator-name = "smps6"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; + regulator-min-microvolt = <1350000>; + regulator-max-microvolt = <1350000>; regulator-always-on; regulator-boot-on; }; From 6ff1a25318ebf688ef9593fe09cd449f6fb4ad31 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 14 Nov 2016 21:46:47 +0100 Subject: [PATCH 225/503] ALSA: usb-audio: Fix use-after-free of usb_device at disconnect The usb-audio driver implements the deferred device disconnection for the device in use. In this mode, the disconnection callback returns immediately while the actual ALSA card object removal happens later when all files get closed. As Shuah reported, this code flow, however, leads to a use-after-free, detected by KASAN: BUG: KASAN: use-after-free in snd_usb_audio_free+0x134/0x160 [snd_usb_audio] at addr ffff8801c863ce10 Write of size 8 by task pulseaudio/2244 Call Trace: [] dump_stack+0x67/0x94 [] kasan_object_err+0x21/0x70 [] kasan_report_error+0x1fa/0x4e0 [] ? kasan_slab_free+0x87/0xb0 [] __asan_report_store8_noabort+0x43/0x50 [] ? snd_usb_audio_free+0x134/0x160 [snd_usb_audio] [] snd_usb_audio_free+0x134/0x160 [snd_usb_audio] [] snd_usb_audio_dev_free+0x31/0x40 [snd_usb_audio] [] __snd_device_free+0x12a/0x210 [] snd_device_free_all+0x85/0xd0 [] release_card_device+0x34/0x130 [] device_release+0x76/0x1e0 [] kobject_release+0x107/0x370 ..... Object at ffff8801c863cc80, in cache kmalloc-2048 size: 2048 Allocated: [] save_stack_trace+0x2b/0x50 [] save_stack+0x46/0xd0 [] kasan_kmalloc+0xad/0xe0 [] kmem_cache_alloc_trace+0xfa/0x240 [] usb_alloc_dev+0x57/0xc90 [] hub_event+0xf1d/0x35f0 .... Freed: [] save_stack_trace+0x2b/0x50 [] save_stack+0x46/0xd0 [] kasan_slab_free+0x71/0xb0 [] kfree+0xd9/0x280 [] usb_release_dev+0xde/0x110 [] device_release+0x76/0x1e0 .... It's the code trying to clear drvdata of the assigned usb_device where the usb_device itself was already released in usb_release_dev() after the disconnect callback. This patch fixes it by checking whether the code path is via the disconnect callback, i.e. chip->shutdown flag is set. Fixes: 79289e24194a ('ALSA: usb-audio: Refer to chip->usb_id for quirks...') Reported-and-tested-by: Shuah Khan Cc: # v4.6+ Signed-off-by: Takashi Iwai --- sound/usb/card.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 9e5276d6dda0..2ddc034673a8 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -315,7 +315,8 @@ static int snd_usb_audio_free(struct snd_usb_audio *chip) snd_usb_endpoint_free(ep); mutex_destroy(&chip->mutex); - dev_set_drvdata(&chip->dev->dev, NULL); + if (!atomic_read(&chip->shutdown)) + dev_set_drvdata(&chip->dev->dev, NULL); kfree(chip); return 0; } From 6ca595a70bc46e1a0eea3ee0681360f41555bfd9 Mon Sep 17 00:00:00 2001 From: Hoan Tran Date: Mon, 14 Nov 2016 11:19:02 -0800 Subject: [PATCH 226/503] mailbox: PCC: Fix lockdep warning when request PCC channel This patch fixes the lockdep warning below DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)) ------------[ cut here ]------------ WARNING: CPU: 1 PID: 1 at linux-next/kernel/locking/lockdep.c:2876 lockdep_trace_alloc+0xe0/0xf0 Modules linked in: CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.8.0-11756-g86c5152 #46 ... Call trace: Exception stack(0xffff8007da837890 to 0xffff8007da8379c0) 7880: ffff8007da834000 0001000000000000 78a0: ffff8007da837a70 ffff0000081111a0 00000000600000c5 000000000000003d 78c0: 9374bc6a7f3c7832 0000000000381878 ffff000009db7ab8 000000000000002f 78e0: ffff00000811aabc ffff000008be2548 ffff8007da837990 ffff00000811adf8 7900: ffff8007da834000 00000000024080c0 00000000000000c0 ffff000009021000 7920: 0000000000000000 0000000000000000 ffff000008c8f7c8 ffff8007da579810 7940: 000000000000002f ffff8007da858000 0000000000000000 0000000000000001 7960: 0000000000000001 0000000000000000 ffff00000811a468 0000000000000002 7980: 656c62617369645f 0000000000038187 00000000000000ee ffff8007da837850 79a0: ffff000009db50c0 ffff000009db569d 0000000000000006 ffff000089db568f [] lockdep_trace_alloc+0xe0/0xf0 [] __kmalloc_track_caller+0x50/0x250 [] devres_alloc_node+0x28/0x60 [] devm_request_threaded_irq+0x50/0xe0 [] pcc_mbox_request_channel+0x110/0x170 [] acpi_cppc_processor_probe+0x264/0x414 [] __acpi_processor_start+0x28/0xa0 [] acpi_processor_start+0x44/0x54 [] driver_probe_device+0x1fc/0x2b0 [] __driver_attach+0xb4/0xc0 [] bus_for_each_dev+0x5c/0xa0 [] driver_attach+0x20/0x30 [] bus_add_driver+0x110/0x230 [] driver_register+0x60/0x100 [] acpi_processor_driver_init+0x2c/0xb0 [] do_one_initcall+0x38/0x130 [] kernel_init_freeable+0x210/0x2b4 [] kernel_init+0x10/0x110 [] ret_from_fork+0x10/0x50 It's because the spinlock inside pcc_mbox_request_channel() is kept too long. This patch releases spinlock before request_irq() and free_irq() to fix this issue as spinlock is only needed to protect the channel data. Signed-off-by: Hoan Tran Reviewed-by: Prashanth Prakash Signed-off-by: Rafael J. Wysocki --- drivers/mailbox/pcc.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index 08c87fadca8c..1f32688c312d 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -65,6 +65,7 @@ #include #include #include +#include #include "mailbox.h" @@ -267,6 +268,8 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl, if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone) chan->txdone_method |= TXDONE_BY_ACK; + spin_unlock_irqrestore(&chan->lock, flags); + if (pcc_doorbell_irq[subspace_id] > 0) { int rc; @@ -275,12 +278,11 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl, if (unlikely(rc)) { dev_err(dev, "failed to register PCC interrupt %d\n", pcc_doorbell_irq[subspace_id]); + pcc_mbox_free_channel(chan); chan = ERR_PTR(rc); } } - spin_unlock_irqrestore(&chan->lock, flags); - return chan; } EXPORT_SYMBOL_GPL(pcc_mbox_request_channel); @@ -304,20 +306,19 @@ void pcc_mbox_free_channel(struct mbox_chan *chan) return; } + if (pcc_doorbell_irq[id] > 0) + devm_free_irq(chan->mbox->dev, pcc_doorbell_irq[id], chan); + spin_lock_irqsave(&chan->lock, flags); chan->cl = NULL; chan->active_req = NULL; if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK)) chan->txdone_method = TXDONE_BY_POLL; - if (pcc_doorbell_irq[id] > 0) - devm_free_irq(chan->mbox->dev, pcc_doorbell_irq[id], chan); - spin_unlock_irqrestore(&chan->lock, flags); } EXPORT_SYMBOL_GPL(pcc_mbox_free_channel); - /** * pcc_send_data - Called from Mailbox Controller code. Used * here only to ring the channel doorbell. The PCC client From 5d0d4b91bf627f14f95167b738d524156c9d440b Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sun, 13 Nov 2016 13:01:32 +0800 Subject: [PATCH 227/503] Revert "bnx2: Reset device during driver initialization" This reverts commit 3e1be7ad2d38c6bd6aeef96df9bd0a7822f4e51c. When people build bnx2 driver into kernel, it will fail to detect and load firmware because firmware is contained in initramfs and initramfs has not been uncompressed yet during do_initcalls. So revert commit 3e1be7a and work out a new way in the later patch. Signed-off-by: Baoquan He Acked-by: Paul Menzel Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index b3791b394715..c55797291b57 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -6361,6 +6361,10 @@ bnx2_open(struct net_device *dev) struct bnx2 *bp = netdev_priv(dev); int rc; + rc = bnx2_request_firmware(bp); + if (rc < 0) + goto out; + netif_carrier_off(dev); bnx2_disable_int(bp); @@ -6429,6 +6433,7 @@ open_err: bnx2_free_irq(bp); bnx2_free_mem(bp); bnx2_del_napi(bp); + bnx2_release_firmware(bp); goto out; } @@ -8575,12 +8580,6 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); - rc = bnx2_request_firmware(bp); - if (rc < 0) - goto error; - - - bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET); memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN); dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | @@ -8613,7 +8612,6 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; error: - bnx2_release_firmware(bp); pci_iounmap(pdev, bp->regview); pci_release_regions(pdev); pci_disable_device(pdev); From 6df77862f63f389df3b1ad879738e04440d7385d Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sun, 13 Nov 2016 13:01:33 +0800 Subject: [PATCH 228/503] bnx2: Wait for in-flight DMA to complete at probe stage In-flight DMA from 1st kernel could continue going in kdump kernel. New io-page table has been created before bnx2 does reset at open stage. We have to wait for the in-flight DMA to complete to avoid it look up into the newly created io-page table at probe stage. Suggested-by: Michael Chan Signed-off-by: Baoquan He Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 38 +++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index c55797291b57..1f7034d739b0 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -49,6 +49,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_CNIC) #define BCM_CNIC 1 @@ -4764,15 +4765,16 @@ bnx2_setup_msix_tbl(struct bnx2 *bp) BNX2_WR(bp, BNX2_PCI_GRC_WINDOW3_ADDR, BNX2_MSIX_PBA_ADDR); } -static int -bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) +static void +bnx2_wait_dma_complete(struct bnx2 *bp) { u32 val; - int i, rc = 0; - u8 old_port; + int i; - /* Wait for the current PCI transaction to complete before - * issuing a reset. */ + /* + * Wait for the current PCI transaction to complete before + * issuing a reset. + */ if ((BNX2_CHIP(bp) == BNX2_CHIP_5706) || (BNX2_CHIP(bp) == BNX2_CHIP_5708)) { BNX2_WR(bp, BNX2_MISC_ENABLE_CLR_BITS, @@ -4796,6 +4798,21 @@ bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) } } + return; +} + + +static int +bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) +{ + u32 val; + int i, rc = 0; + u8 old_port; + + /* Wait for the current PCI transaction to complete before + * issuing a reset. */ + bnx2_wait_dma_complete(bp); + /* Wait for the firmware to tell us it is ok to issue a reset. */ bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code, 1, 1); @@ -8580,6 +8597,15 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); + /* + * In-flight DMA from 1st kernel could continue going in kdump kernel. + * New io-page table has been created before bnx2 does reset at open stage. + * We have to wait for the in-flight DMA to complete to avoid it look up + * into the newly created io-page table. + */ + if (is_kdump_kernel()) + bnx2_wait_dma_complete(bp); + memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN); dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | From 5bf35ddfee052d44f39ebaa395d87101c8918405 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 13 Nov 2016 21:44:37 +0800 Subject: [PATCH 229/503] sctp: change sk state only when it has assocs in sctp_shutdown Now when users shutdown a sock with SEND_SHUTDOWN in sctp, even if this sock has no connection (assoc), sk state would be changed to SCTP_SS_CLOSING, which is not as we expect. Besides, after that if users try to listen on this sock, kernel could even panic when it dereference sctp_sk(sk)->bind_hash in sctp_inet_listen, as bind_hash is null when sock has no assoc. This patch is to move sk state change after checking sk assocs is not empty, and also merge these two if() conditions and reduce indent level. Fixes: d46e416c11c8 ("sctp: sctp should change socket state when shutdown is received") Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index faa48ff5cf4b..f23ad913dc7a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4285,19 +4285,18 @@ static void sctp_shutdown(struct sock *sk, int how) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; - struct sctp_association *asoc; if (!sctp_style(sk, TCP)) return; - if (how & SEND_SHUTDOWN) { + ep = sctp_sk(sk)->ep; + if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) { + struct sctp_association *asoc; + sk->sk_state = SCTP_SS_CLOSING; - ep = sctp_sk(sk)->ep; - if (!list_empty(&ep->asocs)) { - asoc = list_entry(ep->asocs.next, - struct sctp_association, asocs); - sctp_primitive_SHUTDOWN(net, asoc, NULL); - } + asoc = list_entry(ep->asocs.next, + struct sctp_association, asocs); + sctp_primitive_SHUTDOWN(net, asoc, NULL); } } From 977c1f9c8c022d0173181766b34a0db3705265a4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 7 Nov 2016 15:14:20 -0800 Subject: [PATCH 230/503] ftrace: Ignore FTRACE_FL_DISABLED while walking dyn_ftrace records ftrace_shutdown() checks for sanity of ftrace records and if dyn_ftrace->flags is not zero, it will warn. It can happen that 'flags' are set to FTRACE_FL_DISABLED at this point, since some module was loaded, but before ftrace_module_enable() cleared the flags for this module. In other words the module.c is doing: ftrace_module_init(mod); // calls ftrace_update_code() that sets flags=FTRACE_FL_DISABLED ... // here ftrace_shutdown() is called that warns, since err = prepare_coming_module(mod); // didn't have a chance to clear FTRACE_FL_DISABLED Fix it by ignoring disabled records. It's similar to what __ftrace_hash_rec_update() is already doing. Link: http://lkml.kernel.org/r/1478560460-3818619-1-git-send-email-ast@fb.com Cc: stable@vger.kernel.org Fixes: b7ffffbb46f2 "ftrace: Add infrastructure for delayed enabling of module functions" Signed-off-by: Alexei Starovoitov Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2050a7652a86..326498baab83 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2763,7 +2763,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) struct dyn_ftrace *rec; do_for_each_ftrace_rec(pg, rec) { - if (FTRACE_WARN_ON_ONCE(rec->flags)) + if (FTRACE_WARN_ON_ONCE(rec->flags & ~FTRACE_FL_DISABLED)) pr_warn(" %pS flags:%lx\n", (void *)rec->ip, rec->flags); } while_for_each_ftrace_rec(); From 546fece4eae871f033925ccf0ff2b740725ae915 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 14 Nov 2016 16:31:49 -0500 Subject: [PATCH 231/503] ftrace: Add more checks for FTRACE_FL_DISABLED in processing ip records When a module is first loaded and its function ip records are added to the ftrace list of functions to modify, they are set to DISABLED, as their text is still in a read only state. When the module is fully loaded, and can be updated, the flag is cleared, and if their's any functions that should be tracing them, it is updated at that moment. But there's several locations that do record accounting and should ignore records that are marked as disabled, or they can cause issues. Alexei already fixed one location, but others need to be addressed. Cc: stable@vger.kernel.org Fixes: b7ffffbb46f2 "ftrace: Add infrastructure for delayed enabling of module functions" Reported-by: Alexei Starovoitov Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 326498baab83..da87b3cba5b3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1862,6 +1862,10 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, /* Update rec->flags */ do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + /* We need to update only differences of filter_hash */ in_old = !!ftrace_lookup_ip(old_hash, rec->ip); in_new = !!ftrace_lookup_ip(new_hash, rec->ip); @@ -1884,6 +1888,10 @@ rollback: /* Roll back what we did above */ do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (rec == end) goto err_out; @@ -2397,6 +2405,10 @@ void __weak ftrace_replace_code(int enable) return; do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + failed = __ftrace_replace_code(rec, enable); if (failed) { ftrace_bug(failed, rec); @@ -3598,6 +3610,10 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) goto out_unlock; do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) { ret = enter_record(hash, rec, clear_filter); if (ret < 0) { @@ -3793,6 +3809,9 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, do_for_each_ftrace_rec(pg, rec) { + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (!ftrace_match_record(rec, &func_g, NULL, 0)) continue; @@ -4685,6 +4704,9 @@ ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer) do_for_each_ftrace_rec(pg, rec) { + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (ftrace_match_record(rec, &func_g, NULL, 0)) { /* if it is in the array */ exists = false; From c51e424dc79e1428afc4d697cdb6a07f7af70cbf Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 13 Nov 2016 17:50:35 -0800 Subject: [PATCH 232/503] net: stmmac: Fix lack of link transition for fixed PHYs Commit 52f95bbfcf72 ("stmmac: fix adjust link call in case of a switch is attached") added some logic to avoid polling the fixed PHY and therefore invoking the adjust_link callback more than once, since this is a fixed PHY and link events won't be generated. This works fine the first time, because we start with phydev->irq = PHY_POLL, so we call adjust_link, then we set phydev->irq = PHY_IGNORE_INTERRUPT and we stop polling the PHY. Now, if we called ndo_close(), which calls both phy_stop() and does an explicit netif_carrier_off(), we end up with a link down. Upon calling ndo_open() again, despite starting the PHY state machine, we have PHY_IGNORE_INTERRUPT set, and we generate no link event at all, so the link is permanently down. Fixes: 52f95bbfcf72 ("stmmac: fix adjust link call in case of a switch is attached") Signed-off-by: Florian Fainelli Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 48e71fad4210..e2c94ec4edd0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -880,6 +880,13 @@ static int stmmac_init_phy(struct net_device *dev) return -ENODEV; } + /* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid + * subsequent PHY polling, make sure we force a link transition if + * we have a UP/DOWN/UP transition + */ + if (phydev->is_pseudo_fixed_link) + phydev->irq = PHY_POLL; + pr_debug("stmmac_init_phy: %s: attached to PHY (UID 0x%x)" " Link = %d\n", dev->name, phydev->phy_id, phydev->link); From ac571de999e14b87890cb960ad6f03fbdde6abc8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 14 Nov 2016 11:26:32 +0100 Subject: [PATCH 233/503] mlxsw: spectrum_router: Flush FIB tables during fini Since commit b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") we reflect to the device the entire FIB table and not only FIBs that point to netdevs created by the driver. During module removal, FIBs of the second type are removed following NETDEV_UNREGISTER events sent. The other FIBs are still present in both the driver's cache and the device's table. Fix this by iterating over all the FIB tables in the device and flush them. There's no need to take locks, as we're the only writer. Fixes: b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index cbeeddd70c5a..e83072da6272 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -594,8 +594,11 @@ static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) return 0; } +static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp); + static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) { + mlxsw_sp_router_fib_flush(mlxsw_sp); kfree(mlxsw_sp->router.vrs); } @@ -1867,18 +1870,18 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_resources *resources; struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_fib_entry *tmp; struct mlxsw_sp_vr *vr; int i; - int err; resources = mlxsw_core_resources_get(mlxsw_sp->core); for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; + if (!vr->used) continue; @@ -1894,6 +1897,13 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) break; } } +} + +static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + mlxsw_sp_router_fib_flush(mlxsw_sp); mlxsw_sp->router.aborted = true; err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); if (err) From e123386bc31bbf467dc558f2f919de0b8b4ba58c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 7 Nov 2016 14:32:02 -0500 Subject: [PATCH 234/503] tile: handle __ro_after_init like parisc does The tile architecture already marks RO_DATA as read-only in the kernel, so grouping RO_AFTER_INIT_DATA with RO_DATA, as is done by default, means the kernel faults in init when it tries to write to RO_AFTER_INIT_DATA. For now, just arrange that __ro_after_init is handled like __write_once, i.e. __read_mostly. Reviewed-by: Kees Cook Signed-off-by: Chris Metcalf --- arch/tile/include/asm/cache.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h index 6160761d5f61..4810e48dbbbf 100644 --- a/arch/tile/include/asm/cache.h +++ b/arch/tile/include/asm/cache.h @@ -61,4 +61,7 @@ */ #define __write_once __read_mostly +/* __ro_after_init is the generic name for the tile arch __write_once. */ +#define __ro_after_init __read_mostly + #endif /* _ASM_TILE_CACHE_H */ From c733ab3512431436a26e0381829b45794cb13fb0 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 14 Nov 2016 19:26:22 -0200 Subject: [PATCH 235/503] scsi: qla2xxx: do not abort all commands in the adapter during EEH recovery The previous commit 1535aa75a3d8 ("qla2xxx: fix invalid DMA access after command aborts in PCI device remove") introduced a regression during an EEH recovery, since the change to the qla2x00_abort_all_cmds() function calls qla2xxx_eh_abort(), which verifies the EEH recovery condition but handles it heavy-handed. (commit a465537ad1a4 "qla2xxx: Disable the adapter and skip error recovery in case of register disconnect.") This problem warrants a more general/optimistic solution right into qla2xxx_eh_abort() (eg in case a real command abort arrives during EEH recovery, or if it takes long enough to trigger command aborts); but it's still worth to add a check to ensure the code added by the previous commit is correct and contained within its owner function. This commit just adds a 'if (!ha->flags.eeh_busy)' check around it. (ahem; a trivial fix for this -rc series; sorry for this oversight.) With it applied, both PCI device remove and EEH recovery works fine. Fixes: 1535aa75a3d8 ("scsi: qla2xxx: fix invalid DMA access after command aborts in PCI device remove") Signed-off-by: Mauricio Faria de Oliveira Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 567fa080e261..56d6142852a5 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1456,15 +1456,20 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (sp) { - /* Get a reference to the sp and drop the lock. - * The reference ensures this sp->done() call - * - and not the call in qla2xxx_eh_abort() - - * ends the SCSI command (with result 'res'). + /* Don't abort commands in adapter during EEH + * recovery as it's not accessible/responding. */ - sp_get(sp); - spin_unlock_irqrestore(&ha->hardware_lock, flags); - qla2xxx_eh_abort(GET_CMD_SP(sp)); - spin_lock_irqsave(&ha->hardware_lock, flags); + if (!ha->flags.eeh_busy) { + /* Get a reference to the sp and drop the lock. + * The reference ensures this sp->done() call + * - and not the call in qla2xxx_eh_abort() - + * ends the SCSI command (with result 'res'). + */ + sp_get(sp); + spin_unlock_irqrestore(&ha->hardware_lock, flags); + qla2xxx_eh_abort(GET_CMD_SP(sp)); + spin_lock_irqsave(&ha->hardware_lock, flags); + } req->outstanding_cmds[cnt] = NULL; sp->done(vha, sp, res); } From 60f8339eb388df8a46f8eb4282ff0e15f08f218c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 12 Nov 2016 15:01:09 +0100 Subject: [PATCH 236/503] gpio: do not double-check direction on sleeping chips When locking a GPIO line as IRQ, we go to lengths to double-check that the line is really set as input before marking it as used for IRQ. This is not good on GPIO chips that can sleep, because this function is called in IRQ-safe context. Just skip this if it can't be checked quickly. Currently this happens on sleeping expanders such as STMPE or TC3589x: BUG: scheduling while atomic: swapper/1/0x00000002 Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.9.0-rc1+ #38 Hardware name: Nomadik STn8815 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (__schedule_bug+0x54/0x80) [] (__schedule_bug) from [] (__schedule+0x3a0/0x460) [] (__schedule) from [] (schedule+0x54/0xb8) (...) This patch fixes that problem and relies on the direction read from the chip when it was added. Cc: stable@vger.kernel.org Fixes: 9c10280d85c1 ("gpio: flush direction status in gpiochip_lock_as_irq()") Cc: Patrice Chotard Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 93ed0e00c578..868128a676ba 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2737,8 +2737,11 @@ int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset) if (IS_ERR(desc)) return PTR_ERR(desc); - /* Flush direction if something changed behind our back */ - if (chip->get_direction) { + /* + * If it's fast: flush the direction setting if something changed + * behind our back + */ + if (!chip->can_sleep && chip->get_direction) { int dir = chip->get_direction(chip, offset); if (dir) From 220a04f0e53276eb3da666174bcf97489fd8644e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 14 Nov 2016 15:10:29 +0100 Subject: [PATCH 237/503] gpio: tc3589x: fix up .get_direction() The bit in the TC3589x direction register is 0 for input and 1 for output, but the gpiolib expects the reverse. Fix up the logic. Cc: stable@vger.kernel.org Fixes: 14063d71e5e6 ("gpio: tc3589x: add .get_direction() and small cleanup") Signed-off-by: Linus Walleij --- drivers/gpio/gpio-tc3589x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c index 5a5a6cb00eea..d6e21f1a70a9 100644 --- a/drivers/gpio/gpio-tc3589x.c +++ b/drivers/gpio/gpio-tc3589x.c @@ -97,7 +97,7 @@ static int tc3589x_gpio_get_direction(struct gpio_chip *chip, if (ret < 0) return ret; - return !!(ret & BIT(pos)); + return !(ret & BIT(pos)); } static int tc3589x_gpio_set_single_ended(struct gpio_chip *chip, From c0a36013639b06760f7c2c21a8387eac855432e1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 15 Nov 2016 15:28:33 +1100 Subject: [PATCH 238/503] powerpc/64: Fix setting of AIL in hypervisor mode Commit d3cbff1b5 "powerpc: Put exception configuration in a common place" broke the setting of the AIL bit (which enables taking exceptions with the MMU still on) on all processors, moving it incorrectly to a function called only on the boot CPU. This was correct for the guest case but not when running in hypervisor mode. This fixes it by partially reverting that commit, putting the setting back in cpu_ready_for_interrupts() Fixes: d3cbff1b5a90 ("powerpc: Put exception configuration in a common place") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7ac8e6eaab5b..8d586cff8a41 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -226,17 +226,25 @@ static void __init configure_exceptions(void) if (firmware_has_feature(FW_FEATURE_OPAL)) opal_configure_cores(); - /* Enable AIL if supported, and we are in hypervisor mode */ - if (early_cpu_has_feature(CPU_FTR_HVMODE) && - early_cpu_has_feature(CPU_FTR_ARCH_207S)) { - unsigned long lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); - } + /* AIL on native is done in cpu_ready_for_interrupts() */ } } static void cpu_ready_for_interrupts(void) { + /* + * Enable AIL if supported, and we are in hypervisor mode. This + * is called once for every processor. + * + * If we are not in hypervisor mode the job is done once for + * the whole partition in configure_exceptions(). + */ + if (early_cpu_has_feature(CPU_FTR_HVMODE) && + early_cpu_has_feature(CPU_FTR_ARCH_207S)) { + unsigned long lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + } + /* Set IR and DR in PACA MSR */ get_paca()->kernel_msr = MSR_KERNEL; } From 2ce9d2272b98743b911196c49e7af5841381c206 Mon Sep 17 00:00:00 2001 From: Petr Vandrovec Date: Thu, 10 Nov 2016 13:57:14 -0800 Subject: [PATCH 239/503] Fix USB CB/CBI storage devices with CONFIG_VMAP_STACK=y Some code (all error handling) submits CDBs that are allocated on the stack. This breaks with CB/CBI code that tries to create URB directly from SCSI command buffer - which happens to be in vmalloced memory with vmalloced kernel stacks. Let's make copy of the command in usb_stor_CB_transport. Signed-off-by: Petr Vandrovec Cc: stable Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/transport.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index ffd086733421..1a59f335b063 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -954,10 +954,15 @@ int usb_stor_CB_transport(struct scsi_cmnd *srb, struct us_data *us) /* COMMAND STAGE */ /* let's send the command via the control pipe */ + /* + * Command is sometime (f.e. after scsi_eh_prep_cmnd) on the stack. + * Stack may be vmallocated. So no DMA for us. Make a copy. + */ + memcpy(us->iobuf, srb->cmnd, srb->cmd_len); result = usb_stor_ctrl_transfer(us, us->send_ctrl_pipe, US_CBI_ADSC, USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, - us->ifnum, srb->cmnd, srb->cmd_len); + us->ifnum, us->iobuf, srb->cmd_len); /* check the return code for the command */ usb_stor_dbg(us, "Call to usb_stor_ctrl_transfer() returned %d\n", From 5bf7b6e86f29f064979d7b3e6dd21c5dd1feb855 Mon Sep 17 00:00:00 2001 From: Loic Pallardy Date: Tue, 15 Nov 2016 09:47:00 +0100 Subject: [PATCH 240/503] ARM: dts: STiH410-b2260: Fix typo in spi0 chipselect definition Change cs-gpio to cs-gpios. Signed-off-by: Loic Pallardy Acked-by: Patrice Chotard --- arch/arm/boot/dts/stih410-b2260.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stih410-b2260.dts b/arch/arm/boot/dts/stih410-b2260.dts index ef2ff2f518f6..7fb507fcba7e 100644 --- a/arch/arm/boot/dts/stih410-b2260.dts +++ b/arch/arm/boot/dts/stih410-b2260.dts @@ -74,7 +74,7 @@ /* Low speed expansion connector */ spi0: spi@9844000 { label = "LS-SPI0"; - cs-gpio = <&pio30 3 0>; + cs-gpios = <&pio30 3 0>; status = "okay"; }; From 59c3b76cc61d1d676f965c192cc7969aa5cb2744 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 18 Aug 2016 09:10:44 +0200 Subject: [PATCH 241/503] fuse: fix fuse_write_end() if zero bytes were copied If pos is at the beginning of a page and copied is zero then page is not zeroed but is marked uptodate. Fix by skipping everything except unlock/put of page if zero bytes were copied. Reported-by: Al Viro Fixes: 6b12c1b37e55 ("fuse: Implement write_begin/write_end callbacks") Cc: # v3.15+ Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index abc66a6237fd..2401c5dabb2a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1985,6 +1985,10 @@ static int fuse_write_end(struct file *file, struct address_space *mapping, { struct inode *inode = page->mapping->host; + /* Haven't copied anything? Skip zeroing, size extending, dirtying. */ + if (!copied) + goto unlock; + if (!PageUptodate(page)) { /* Zero any unwritten bytes at the end of the page */ size_t endoff = (pos + copied) & ~PAGE_MASK; @@ -1995,6 +1999,8 @@ static int fuse_write_end(struct file *file, struct address_space *mapping, fuse_write_update_size(inode, pos + copied); set_page_dirty(page); + +unlock: unlock_page(page); put_page(page); From 864c2357ca898c6171fe5284f5ecc795c8ce27a8 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Tue, 1 Nov 2016 11:52:58 -0700 Subject: [PATCH 242/503] perf/core: Do not set cpuctx->cgrp for unscheduled cgroups Commit: db4a835601b7 ("perf/core: Set cgroup in CPU contexts for new cgroup events") failed to verify that event->cgrp is actually the scheduled cgroup in a CPU before setting cpuctx->cgrp. This patch fixes that. Now that there is a different path for scheduled and unscheduled cgroup, add a warning to catch when cpuctx->cgrp is still set after the last cgroup event has been unsheduled. To verify the bug: # Create 2 cgroups. mkdir /dev/cgroups/devices/g1 mkdir /dev/cgroups/devices/g2 # launch a task, bind it to a cpu and move it to g1 CPU=2 while :; do : ; done & P=$! taskset -pc $CPU $P echo $P > /dev/cgroups/devices/g1/tasks # monitor g2 (it runs no tasks) and observe output perf stat -e cycles -I 1000 -C $CPU -G g2 # time counts unit events 1.000091408 7,579,527 cycles g2 2.000350111 cycles g2 3.000589181 cycles g2 4.000771428 cycles g2 # note first line that displays that a task run in g2, despite # g2 having no tasks. This is because cpuctx->cgrp was wrongly # set when context of new event was installed. # After applying the fix we obtain the right output: perf stat -e cycles -I 1000 -C $CPU -G g2 # time counts unit events 1.000119615 cycles g2 2.000389430 cycles g2 3.000590962 cycles g2 Signed-off-by: David Carrillo-Cisneros Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Nilay Vaish Cc: Paul Turner Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vegard Nossum Link: http://lkml.kernel.org/r/1478026378-86083-1-git-send-email-davidcc@google.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 0e292132efac..ff230bb4a02e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -902,6 +902,17 @@ list_update_cgroup_event(struct perf_event *event, * this will always be called from the right CPU. */ cpuctx = __get_cpu_context(ctx); + + /* Only set/clear cpuctx->cgrp if current task uses event->cgrp. */ + if (perf_cgroup_from_task(current, ctx) != event->cgrp) { + /* + * We are removing the last cpu event in this context. + * If that event is not active in this cpu, cpuctx->cgrp + * should've been cleared by perf_cgroup_switch. + */ + WARN_ON_ONCE(!add && cpuctx->cgrp); + return; + } cpuctx->cgrp = add ? event->cgrp : NULL; } From c8eaf3479e521e973eb2d4111b8ee8f5b7b564ab Mon Sep 17 00:00:00 2001 From: Filip Matusiak Date: Wed, 2 Nov 2016 10:04:26 +0100 Subject: [PATCH 243/503] mac80211: Ignore VHT IE from peer with wrong rx_mcs_map This is a workaround for VHT-enabled STAs which break the spec and have the VHT-MCS Rx map filled in with value 3 for all eight spacial streams, an example is AR9462 in AP mode. As per spec, in section 22.1.1 Introduction to the VHT PHY A VHT STA shall support at least single spactial stream VHT-MCSs 0 to 7 (transmit and receive) in all supported channel widths. Some devices in STA mode will get firmware assert when trying to associate, examples are QCA9377 & QCA6174. Packet example of broken VHT Cap IE of AR9462: Tag: VHT Capabilities (IEEE Std 802.11ac/D3.1) Tag Number: VHT Capabilities (IEEE Std 802.11ac/D3.1) (191) Tag length: 12 VHT Capabilities Info: 0x00000000 VHT Supported MCS Set Rx MCS Map: 0xffff .... .... .... ..11 = Rx 1 SS: Not Supported (0x0003) .... .... .... 11.. = Rx 2 SS: Not Supported (0x0003) .... .... ..11 .... = Rx 3 SS: Not Supported (0x0003) .... .... 11.. .... = Rx 4 SS: Not Supported (0x0003) .... ..11 .... .... = Rx 5 SS: Not Supported (0x0003) .... 11.. .... .... = Rx 6 SS: Not Supported (0x0003) ..11 .... .... .... = Rx 7 SS: Not Supported (0x0003) 11.. .... .... .... = Rx 8 SS: Not Supported (0x0003) ...0 0000 0000 0000 = Rx Highest Long GI Data Rate (in Mb/s, 0 = subfield not in use): 0x0000 Tx MCS Map: 0xffff ...0 0000 0000 0000 = Tx Highest Long GI Data Rate (in Mb/s, 0 = subfield not in use): 0x0000 Signed-off-by: Filip Matusiak Signed-off-by: Johannes Berg --- net/mac80211/vht.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index ee715764a828..6832bf6ab69f 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -270,6 +270,22 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2); } + /* + * This is a workaround for VHT-enabled STAs which break the spec + * and have the VHT-MCS Rx map filled in with value 3 for all eight + * spacial streams, an example is AR9462. + * + * As per spec, in section 22.1.1 Introduction to the VHT PHY + * A VHT STA shall support at least single spactial stream VHT-MCSs + * 0 to 7 (transmit and receive) in all supported channel widths. + */ + if (vht_cap->vht_mcs.rx_mcs_map == cpu_to_le16(0xFFFF)) { + vht_cap->vht_supported = false; + sdata_info(sdata, "Ignoring VHT IE from %pM due to invalid rx_mcs_map\n", + sta->addr); + return; + } + /* finally set up the bandwidth */ switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: From 6c18a6b4e79953ba38bc110e1e42ac45a951b25f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 3 Nov 2016 12:12:47 +0100 Subject: [PATCH 244/503] Revert "mac80211: allow using AP_LINK_PS with mac80211-generated TIM IE" This reverts commit c68df2e7be0c1238ea3c281fd744a204ef3b15a0. __sta_info_recalc_tim turns into a no-op if local->ops->set_tim is not set. This prevents the beacon TIM bit from being set for all drivers that do not implement this op (almost all of them), thus thoroughly essential AP mode powersave functionality. Cc: Emmanuel Grumbach Fixes: c68df2e7be0c ("mac80211: allow using AP_LINK_PS with mac80211-generated TIM IE") Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 78e9ecbc96e6..8e05032689f0 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -688,7 +688,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) } /* No need to do anything if the driver does all */ - if (!local->ops->set_tim) + if (ieee80211_hw_check(&local->hw, AP_LINK_PS)) return; if (sta->dead) From 8fdd136f2200e6b7237e7e48453f4a591d768e3e Mon Sep 17 00:00:00 2001 From: "Pedersen, Thomas" Date: Mon, 31 Oct 2016 11:28:40 -0700 Subject: [PATCH 245/503] cfg80211: add bitrate for 20MHz MCS 9 Some drivers (ath10k) report MCS 9 @ 20MHz, which technically isn't defined. To get more meaningful value than 0 out of this however, just extrapolate a bitrate from ratio of MCS 7 and 9 in channels where it is allowed. Signed-off-by: Thomas Pedersen [add a comment about it in the code] Signed-off-by: Johannes Berg --- net/wireless/util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 5ea12afc7706..659b507b347d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1158,7 +1158,8 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) 58500000, 65000000, 78000000, - 0, + /* not in the spec, but some devices use this: */ + 86500000, }, { 13500000, 27000000, From c1f4c9ede3c799da9f920c1df9ce524145781637 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 4 Nov 2016 10:27:52 +0100 Subject: [PATCH 246/503] mac80211: update A-MPDU flag on tx dequeue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sequence number counter is used to derive the starting sequence number. Since that counter is updated on tx dequeue, the A-MPDU flag needs to be up to date at the tme of dequeue as well. This patch prevents sending more A-MPDU frames after the session has been terminated and also ensures that aggregation starts right after the session has been established Fixes: bb42f2d13ffc ("mac80211: Move reorder-sensitive TX handlers to after TXQ dequeue") Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1c56abc49627..d08a8492a846 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3426,6 +3426,11 @@ begin: goto begin; } + if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags)) + info->flags |= IEEE80211_TX_CTL_AMPDU; + else + info->flags &= ~IEEE80211_TX_CTL_AMPDU; + if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) { struct sta_info *sta = container_of(txq->sta, struct sta_info, sta); From fff712cbe38b6d4e211df9c22aabcfd9739c1c2a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 4 Nov 2016 10:27:53 +0100 Subject: [PATCH 247/503] mac80211: remove bogus skb vif assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to ieee80211_txq_enqueue overwrites the vif pointer with the codel enqueue time, so setting it just before that call makes no sense. Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d08a8492a846..fb73e86bdf41 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1501,7 +1501,6 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, struct sta_info *sta, struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct fq *fq = &local->fq; struct ieee80211_vif *vif; struct txq_info *txqi; @@ -1526,8 +1525,6 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, if (!txqi) return false; - info->control.vif = vif; - spin_lock_bh(&fq->lock); ieee80211_txq_enqueue(local, txqi, skb); spin_unlock_bh(&fq->lock); From a786f96da0d657bf8bd56d8eebb3f31cc45605bb Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 4 Nov 2016 10:27:54 +0100 Subject: [PATCH 248/503] mac80211: fix A-MSDU aggregation with fast-xmit + txq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A-MSDU aggregation alters the QoS header after a frame has been enqueued, so it needs to be ready before enqueue and not overwritten again afterwards Fixes: bb42f2d13ffc ("mac80211: Move reorder-sensitive TX handlers to after TXQ dequeue") Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index fb73e86bdf41..bd5f4be89435 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3210,7 +3210,6 @@ static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; - *ieee80211_get_qos_ctl(hdr) = tid; hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); } else { info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; @@ -3335,6 +3334,11 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0); info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT; + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + *ieee80211_get_qos_ctl(hdr) = tid; + } + __skb_queue_head_init(&tx.skbs); tx.flags = IEEE80211_TX_UNICAST; From 4fb7f8af1f4c14a2a6cee7c9ff0cf999d918c72d Mon Sep 17 00:00:00 2001 From: Benjamin Beichler Date: Fri, 11 Nov 2016 17:37:56 +0100 Subject: [PATCH 249/503] mac80211_hwsim: fix beacon delta calculation Due to the cast from uint32_t to int64_t, a wrong next beacon timing is calculated and effectively the beacon timer stops working. This is especially bad for 802.11s mesh networks, because discovery breaks without beacons. Signed-off-by: Benjamin Beichler Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 431f13b4faf6..d3bad5779376 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -826,7 +826,7 @@ static void mac80211_hwsim_set_tsf(struct ieee80211_hw *hw, data->bcn_delta = do_div(delta, bcn_int); } else { data->tsf_offset -= delta; - data->bcn_delta = -do_div(delta, bcn_int); + data->bcn_delta = -(s64)do_div(delta, bcn_int); } } From 24c66dfd569c4744fc43aea638155ad2dc1499d8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 15 Nov 2016 13:55:59 +0000 Subject: [PATCH 250/503] ARM: fix backtrace Recent kernels have changed their behaviour to be more inconsistent when handling printk continuations. With todays kernels, the output looks sane on the console, but dmesg splits individual printk()s which do not have the KERN_CONT prefix into separate lines. Since the assembly code is not trivial to add the KERN_CONT, and we ideally want to avoid using KERN_CONT (as multiple printk()s can race between different threads), convert the assembly dumping the register values to C code, and have the C code build the output a line at a time before dumping to the console. This avoids the KERN_CONT issue, and also avoids situations where the output is intermixed with other console activity. Signed-off-by: Russell King --- arch/arm/kernel/traps.c | 20 ++++++++++++++++++++ arch/arm/lib/backtrace.S | 37 +++---------------------------------- 2 files changed, 23 insertions(+), 34 deletions(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index bc698383e822..9688ec0c6ef4 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -74,6 +74,26 @@ void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long dump_mem("", "Exception stack", frame + 4, frame + 4 + sizeof(struct pt_regs)); } +void dump_backtrace_stm(u32 *stack, u32 instruction) +{ + char str[80], *p; + unsigned int x; + int reg; + + for (reg = 10, x = 0, p = str; reg >= 0; reg--) { + if (instruction & BIT(reg)) { + p += sprintf(p, " r%d:%08x", reg, *stack--); + if (++x == 6) { + x = 0; + p = str; + printk("%s\n", str); + } + } + } + if (p != str) + printk("%s\n", str); +} + #ifndef CONFIG_ARM_UNWIND /* * Stack pointers should always be within the kernels view of diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index fab5a50503ae..7d7952e5a3b1 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -10,6 +10,7 @@ * 27/03/03 Ian Molton Clean up CONFIG_CPU * */ +#include #include #include .text @@ -83,13 +84,13 @@ for_each_frame: tst frame, mask @ Check for address exceptions teq r3, r1, lsr #11 ldreq r0, [frame, #-8] @ get sp subeq r0, r0, #4 @ point at the last arg - bleq .Ldumpstm @ dump saved registers + bleq dump_backtrace_stm @ dump saved registers 1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc} ldr r3, .Ldsi @ instruction exists, teq r3, r1, lsr #11 subeq r0, frame, #16 - bleq .Ldumpstm @ dump saved registers + bleq dump_backtrace_stm @ dump saved registers teq sv_fp, #0 @ zero saved fp means beq no_frame @ no further frames @@ -112,38 +113,6 @@ ENDPROC(c_backtrace) .long 1004b, 1006b .popsection -#define instr r4 -#define reg r5 -#define stack r6 - -.Ldumpstm: stmfd sp!, {instr, reg, stack, r7, lr} - mov stack, r0 - mov instr, r1 - mov reg, #10 - mov r7, #0 -1: mov r3, #1 - ARM( tst instr, r3, lsl reg ) - THUMB( lsl r3, reg ) - THUMB( tst instr, r3 ) - beq 2f - add r7, r7, #1 - teq r7, #6 - moveq r7, #0 - adr r3, .Lcr - addne r3, r3, #1 @ skip newline - ldr r2, [stack], #-4 - mov r1, reg - adr r0, .Lfp - bl printk -2: subs reg, reg, #1 - bpl 1b - teq r7, #0 - adrne r0, .Lcr - blne printk - ldmfd sp!, {instr, reg, stack, r7, pc} - -.Lfp: .asciz " r%d:%08x%s" -.Lcr: .asciz "\n" .Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" .align .Ldsi: .word 0xe92dd800 >> 11 @ stmfd sp!, {... fp, ip, lr, pc} From 544457fa278216c5fcea6a16e9b2ee8aadaca0ca Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 1 Nov 2016 21:58:36 +0100 Subject: [PATCH 251/503] ARM: 8624/1: proc-v7m.S: fix init section name There is no .text.init sections. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/proc-v7m.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index f6d333f09bfe..8dea61640cc1 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -96,7 +96,7 @@ ENTRY(cpu_cm7_proc_fin) ret lr ENDPROC(cpu_cm7_proc_fin) - .section ".text.init", #alloc, #execinstr + .section ".init.text", #alloc, #execinstr __v7m_cm7_setup: mov r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP) From 256ff1cf6b44cba9c9c2059f4516259e9319a808 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 15 Nov 2016 14:00:53 +0100 Subject: [PATCH 252/503] ARM: 8628/1: dma-mapping: preallocate DMA-debug hash tables in core_initcall fs_initcall is definitely too late to initialize DMA-debug hash tables, because some drivers might get probed and use DMA mapping framework already in core_initcall. Late initialization of DMA-debug results in false warning about accessing memory, that was not allocated, like this one: ------------[ cut here ]------------ WARNING: CPU: 5 PID: 1 at lib/dma-debug.c:1104 check_unmap+0xa1c/0xe50 exynos-sysmmu 10a60000.sysmmu: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x000000006ebd0000] [size=16384 bytes] Modules linked in: CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.9.0-rc5-00028-g39dde3d-dirty #44 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0x84/0xa0) [] (dump_stack) from [] (__warn+0x14c/0x180) [] (__warn) from [] (warn_slowpath_fmt+0x48/0x50) [] (warn_slowpath_fmt) from [] (check_unmap+0xa1c/0xe50) [] (check_unmap) from [] (debug_dma_unmap_page+0x98/0xc8) [] (debug_dma_unmap_page) from [] (exynos_iommu_domain_free+0x158/0x380) [] (exynos_iommu_domain_free) from [] (iommu_domain_free+0x34/0x60) [] (iommu_domain_free) from [] (release_iommu_mapping+0x30/0xb8) [] (release_iommu_mapping) from [] (arm_iommu_release_mapping+0x4c/0x50) [] (arm_iommu_release_mapping) from [] (s5p_mfc_probe+0x640/0x80c) [] (s5p_mfc_probe) from [] (platform_drv_probe+0x70/0x148) [] (platform_drv_probe) from [] (driver_probe_device+0x12c/0x6b0) [] (driver_probe_device) from [] (__driver_attach+0x128/0x17c) [] (__driver_attach) from [] (bus_for_each_dev+0x88/0xc8) [] (bus_for_each_dev) from [] (driver_attach+0x34/0x58) [] (driver_attach) from [] (bus_add_driver+0x18c/0x32c) [] (bus_add_driver) from [] (driver_register+0x98/0x148) [] (driver_register) from [] (__platform_driver_register+0x58/0x74) [] (__platform_driver_register) from [] (s5p_mfc_driver_init+0x1c/0x20) [] (s5p_mfc_driver_init) from [] (do_one_initcall+0x64/0x258) [] (do_one_initcall) from [] (kernel_init_freeable+0x3d0/0x4d0) [] (kernel_init_freeable) from [] (kernel_init+0x18/0x134) [] (kernel_init) from [] (ret_from_fork+0x14/0x3c) ---[ end trace dc54c54bd3581296 ]--- This patch moves initialization of DMA-debug to core_initcall. This is safe from the initialization perspective. dma_debug_do_init() internally calls debugfs functions and debugfs also gets initialised at core_initcall(), and that is earlier than arch code in the link order, so it will get initialized just before the DMA-debug. Reported-by: Seung-Woo Kim Signed-off-by: Marek Szyprowski Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ab4f74536057..ab7710002ba6 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1167,7 +1167,7 @@ static int __init dma_debug_do_init(void) dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); return 0; } -fs_initcall(dma_debug_do_init); +core_initcall(dma_debug_do_init); #ifdef CONFIG_ARM_DMA_USE_IOMMU From c6a385539175ebc603da53aafb7753d39089f32e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 14 Nov 2016 19:41:31 +0100 Subject: [PATCH 253/503] kbuild: Steal gcc's pie from the very beginning So Sebastian turned off the PIE for kernel builds but that was too late - Kbuild.include already uses KBUILD_CFLAGS and trying to disable gcc options with, say cc-disable-warning, fails: gcc -D__KERNEL__ -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs ... -Wno-sign-compare -fno-asynchronous-unwind-tables -Wframe-address -c -x c /dev/null -o .31392.tmp /dev/null:1:0: error: code model kernel does not support PIC mode because that returns an error and we can't disable the warning. For example in this case: KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) which leads to gcc issuing all those warnings again. So let's turn off PIE/PIC at the earliest possible moment, when we declare KBUILD_CFLAGS so that cc-disable-warning picks it up too. Also, we need the $(call cc-option ...) because -fno-PIE is supported since gcc v3.4 and our lowest supported gcc version is 3.2 right now. Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Cc: Ben Hutchings Cc: Sebastian Andrzej Siewior Signed-off-by: Michal Marek --- Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 58fc5d935ce6..77ac3f88ec37 100644 --- a/Makefile +++ b/Makefile @@ -399,11 +399,12 @@ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -std=gnu89 + -std=gnu89 $(call cc-option,-fno-PIE) + KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := -KBUILD_AFLAGS := -D__ASSEMBLY__ +KBUILD_AFLAGS := -D__ASSEMBLY__ $(call cc-option,-fno-PIE) KBUILD_AFLAGS_MODULE := -DMODULE KBUILD_CFLAGS_MODULE := -DMODULE KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds @@ -622,8 +623,6 @@ include arch/$(SRCARCH)/Makefile KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,) KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,) KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) -KBUILD_CFLAGS += $(call cc-option,-fno-PIE) -KBUILD_AFLAGS += $(call cc-option,-fno-PIE) ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION KBUILD_CFLAGS += $(call cc-option,-ffunction-sections,) From f6c365fad1034c66f9969d1435ffad9102f966bb Mon Sep 17 00:00:00 2001 From: Jia Jie Ho Date: Mon, 14 Nov 2016 17:06:49 +0800 Subject: [PATCH 254/503] net: ethernet: Fix SGMII unable to switch speed and autonego failure TSE PCS SGMII ethernet has an issue where switching speed doesn't work caused by a faulty register macro offset. This fixes the issue. Signed-off-by: Jia Jie Ho Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c index 2920e2ee3864..489ef146201e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c @@ -63,8 +63,8 @@ #define TSE_PCS_SGMII_LINK_TIMER_0 0x0D40 #define TSE_PCS_SGMII_LINK_TIMER_1 0x0003 #define TSE_PCS_SW_RESET_TIMEOUT 100 -#define TSE_PCS_USE_SGMII_AN_MASK BIT(2) -#define TSE_PCS_USE_SGMII_ENA BIT(1) +#define TSE_PCS_USE_SGMII_AN_MASK BIT(1) +#define TSE_PCS_USE_SGMII_ENA BIT(0) #define SGMII_ADAPTER_CTRL_REG 0x00 #define SGMII_ADAPTER_DISABLE 0x0001 From a5d906bb261cde5f881a949d3b0fbaa285dcc574 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 15 Nov 2016 18:05:33 +0800 Subject: [PATCH 255/503] usb: chipidea: move the lock initialization to core file This can fix below dump when the lock is accessed at host mode due to it is not initialized. [ 46.119638] INFO: trying to register non-static key. [ 46.124643] the code is fine but needs lockdep annotation. [ 46.130144] turning off the locking correctness validator. [ 46.135659] CPU: 0 PID: 690 Comm: cat Not tainted 4.9.0-rc3-00079-g4b75f1d #1210 [ 46.143075] Hardware name: Freescale i.MX6 SoloX (Device Tree) [ 46.148923] Backtrace: [ 46.151448] [] (dump_backtrace) from [] (show_stack+0x18/0x1c) [ 46.159038] r7:edf52000 [ 46.161412] r6:60000193 [ 46.163967] r5:00000000 [ 46.165035] r4:c0e25c2c [ 46.169109] [] (show_stack) from [] (dump_stack+0xb4/0xe8) [ 46.176362] [] (dump_stack) from [] (register_lock_class+0x4fc/0x56c) [ 46.184554] r10:c0e25d24 [ 46.187014] r9:edf53e70 [ 46.189569] r8:c1642444 [ 46.190637] r7:ee9da024 [ 46.193191] r6:00000000 [ 46.194258] r5:00000000 [ 46.196812] r4:00000000 [ 46.199185] r3:00000001 [ 46.203259] [] (register_lock_class) from [] (__lock_acquire+0x80/0x10f0) [ 46.211797] r10:c0e25d24 [ 46.214257] r9:edf53e70 [ 46.216813] r8:ee9da024 [ 46.217880] r7:c1642444 [ 46.220435] r6:edcd1800 [ 46.221502] r5:60000193 [ 46.224057] r4:00000000 [ 46.227953] [] (__lock_acquire) from [] (lock_acquire+0x74/0x94) [ 46.235710] r10:00000001 [ 46.238169] r9:edf53e70 [ 46.240723] r8:edf53f80 [ 46.241790] r7:00000001 [ 46.244344] r6:00000001 [ 46.245412] r5:60000193 [ 46.247966] r4:00000000 [ 46.251866] [] (lock_acquire) from [] (_raw_spin_lock_irqsave+0x40/0x54) [ 46.260319] r7:ee1c6a00 [ 46.262691] r6:c062a570 [ 46.265247] r5:20000113 [ 46.266314] r4:ee9da014 [ 46.270393] [] (_raw_spin_lock_irqsave) from [] (ci_port_test_show+0x2c/0x70) [ 46.279280] r6:eebd2000 [ 46.281652] r5:ee9da010 [ 46.284207] r4:ee9da014 [ 46.286810] [] (ci_port_test_show) from [] (seq_read+0x1ac/0x4f8) [ 46.294655] r9:edf53e70 [ 46.297028] r8:edf53f80 [ 46.299583] r7:ee1c6a00 [ 46.300650] r6:00000001 [ 46.303205] r5:00000000 [ 46.304273] r4:eebd2000 [ 46.306850] [] (seq_read) from [] (full_proxy_read+0x54/0x6c) [ 46.314348] r10:00000000 [ 46.316808] r9:c0a6ad30 [ 46.319363] r8:edf53f80 [ 46.320430] r7:00020000 [ 46.322986] r6:b6de3000 [ 46.324053] r5:ee1c6a00 [ 46.326607] r4:c0248b58 [ 46.330505] [] (full_proxy_read) from [] (__vfs_read+0x34/0x118) [ 46.338262] r9:edf52000 [ 46.340635] r8:c0107fc4 [ 46.343190] r7:00020000 [ 46.344257] r6:edf53f80 [ 46.346812] r5:c039e810 [ 46.347879] r4:ee1c6a00 [ 46.350447] [] (__vfs_read) from [] (vfs_read+0x8c/0x11c) [ 46.357597] r9:edf52000 [ 46.359969] r8:c0107fc4 [ 46.362524] r7:edf53f80 [ 46.363592] r6:b6de3000 [ 46.366147] r5:ee1c6a00 [ 46.367214] r4:00020000 [ 46.369782] [] (vfs_read) from [] (SyS_read+0x4c/0xa8) [ 46.376672] r8:c0107fc4 [ 46.379045] r7:00020000 [ 46.381600] r6:b6de3000 [ 46.382667] r5:ee1c6a00 [ 46.385222] r4:ee1c6a00 [ 46.387817] [] (SyS_read) from [] (ret_fast_syscall+0x0/0x1c) [ 46.395314] r7:00000003 [ 46.397687] r6:b6de3000 [ 46.400243] r5:00020000 [ 46.401310] r4:00020000 Cc: Fixes: 26c696c678c4 ("USB: Chipidea: rename struct ci13xxx variables from udc to ci") Signed-off-by: Peter Chen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/core.c | 1 + drivers/usb/chipidea/udc.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 69426e644d17..3dbb4a21ab44 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -914,6 +914,7 @@ static int ci_hdrc_probe(struct platform_device *pdev) if (!ci) return -ENOMEM; + spin_lock_init(&ci->lock); ci->dev = dev; ci->platdata = dev_get_platdata(dev); ci->imx28_write_fix = !!(ci->platdata->flags & diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 661f43fe0f9e..c9e80ad48fdc 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1889,8 +1889,6 @@ static int udc_start(struct ci_hdrc *ci) struct usb_otg_caps *otg_caps = &ci->platdata->ci_otg_caps; int retval = 0; - spin_lock_init(&ci->lock); - ci->gadget.ops = &usb_gadget_ops; ci->gadget.speed = USB_SPEED_UNKNOWN; ci->gadget.max_speed = USB_SPEED_HIGH; From cb434658a8ff151c221a9ac1d44fb6788100cd0d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Nov 2016 11:39:08 -0500 Subject: [PATCH 256/503] drm/amdgpu/powerplay: drop a redundant NULL check Left over from an earlier rev of the patch. Acked-by: Colin Ian King Cc: Dan Carpenter Cc: Colin King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index b0c929dd8beb..13f2b705ea49 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -1469,8 +1469,6 @@ static int smu7_get_evv_voltages(struct pp_hwmgr *hwmgr) table_info->vddgfx_lookup_table, vv_id, &sclk)) { if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_ClockStretcher)) { - if (table_info == NULL) - return -EINVAL; sclk_table = table_info->vdd_dep_on_sclk; for (j = 1; j < sclk_table->count; j++) { From 1da2c326e43b0834105993d13610647337bbad67 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 11 Nov 2016 11:24:29 +0800 Subject: [PATCH 257/503] drm/amdgpu:fix vpost_needed routine 1,cleanup description/comments 2,for FIJI & passthrough, force post when smc fw version below 22.15 3,for other cases, follow regular rules Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 27 ++++++---------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7ca07e7b25c1..3161d77bf299 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -658,12 +658,10 @@ static bool amdgpu_vpost_needed(struct amdgpu_device *adev) return false; if (amdgpu_passthrough(adev)) { - /* for FIJI: In whole GPU pass-through virtualization case - * old smc fw won't clear some registers (e.g. MEM_SIZE, BIOS_SCRATCH) - * so amdgpu_card_posted return false and driver will incorrectly skip vPost. - * but if we force vPost do in pass-through case, the driver reload will hang. - * whether doing vPost depends on amdgpu_card_posted if smc version is above - * 00160e00 for FIJI. + /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot + * some old smc fw still need driver do vPost otherwise gpu hang, while + * those smc fw version above 22.15 doesn't have this flaw, so we force + * vpost executed for smc version below 22.15 */ if (adev->asic_type == CHIP_FIJI) { int err; @@ -674,22 +672,11 @@ static bool amdgpu_vpost_needed(struct amdgpu_device *adev) return true; fw_ver = *((uint32_t *)adev->pm.fw->data + 69); - if (fw_ver >= 0x00160e00) - return !amdgpu_card_posted(adev); + if (fw_ver < 0x00160e00) + return true; } - } else { - /* in bare-metal case, amdgpu_card_posted return false - * after system reboot/boot, and return true if driver - * reloaded. - * we shouldn't do vPost after driver reload otherwise GPU - * could hang. - */ - if (amdgpu_card_posted(adev)) - return false; } - - /* we assume vPost is neede for all other cases */ - return true; + return !amdgpu_card_posted(adev); } /** From e1fafdcbe0e3e769c6a83317dd845bc99b4fe61d Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 10 Oct 2016 06:14:45 -0700 Subject: [PATCH 258/503] IB/rdmavt: rdmavt can handle non aligned page maps The initial code for rdmavt carried with it a restriction that was a vestige from the qib driver, that to dma map a page it had to be less than a page size. This is not the case on modern hardware, both qib and hfi1 will be just fine with unaligned map requests. This fixes a 4.8 regression where by an IPoIB transfer of > PAGE_SIZE will hang because the dma map page call always fails. This was introduced after commit 5faba5469522 ("IB/ipoib: Report SG feature regardless of HW UD CSUM capability") added the capability to use SG by default. Rather than override this, the HW supports it, so allow SG. Cc: Stable # 4.8 Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/dma.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c index 01f71caa3ac4..f2cefb0d9180 100644 --- a/drivers/infiniband/sw/rdmavt/dma.c +++ b/drivers/infiniband/sw/rdmavt/dma.c @@ -90,9 +90,6 @@ static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page, if (WARN_ON(!valid_dma_direction(direction))) return BAD_DMA_ADDRESS; - if (offset + size > PAGE_SIZE) - return BAD_DMA_ADDRESS; - addr = (u64)page_address(page); if (addr) addr += offset; From 39eb2795f19233330bc14a8450b4042d784b15a7 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 10 Oct 2016 06:14:50 -0700 Subject: [PATCH 259/503] IB/hfi1: Remove redundant sysfs irq affinity entry The IRQ affinity entry is not needed after the irq notifier patch has been added to the hfi1 driver. The irq affinity settings for SDMA engine should be set using the standard /proc/irq// interface. Reviewed-by: Jianxin Xiong Signed-off-by: Tadeusz Struk Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.c | 72 --------------------------- drivers/infiniband/hw/hfi1/affinity.h | 4 -- drivers/infiniband/hw/hfi1/sysfs.c | 25 ---------- 3 files changed, 101 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index a26a9a0bfc41..67ea85a56945 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -775,75 +775,3 @@ void hfi1_put_proc_affinity(int cpu) } mutex_unlock(&affinity->lock); } - -int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, - size_t count) -{ - struct hfi1_affinity_node *entry; - cpumask_var_t mask; - int ret, i; - - mutex_lock(&node_affinity.lock); - entry = node_affinity_lookup(dd->node); - - if (!entry) { - ret = -EINVAL; - goto unlock; - } - - ret = zalloc_cpumask_var(&mask, GFP_KERNEL); - if (!ret) { - ret = -ENOMEM; - goto unlock; - } - - ret = cpulist_parse(buf, mask); - if (ret) - goto out; - - if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) { - dd_dev_warn(dd, "Invalid CPU mask\n"); - ret = -EINVAL; - goto out; - } - - /* reset the SDMA interrupt affinity details */ - init_cpu_mask_set(&entry->def_intr); - cpumask_copy(&entry->def_intr.mask, mask); - - /* Reassign the affinity for each SDMA interrupt. */ - for (i = 0; i < dd->num_msix_entries; i++) { - struct hfi1_msix_entry *msix; - - msix = &dd->msix_entries[i]; - if (msix->type != IRQ_SDMA) - continue; - - ret = get_irq_affinity(dd, msix); - - if (ret) - break; - } -out: - free_cpumask_var(mask); -unlock: - mutex_unlock(&node_affinity.lock); - return ret ? ret : strnlen(buf, PAGE_SIZE); -} - -int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf) -{ - struct hfi1_affinity_node *entry; - - mutex_lock(&node_affinity.lock); - entry = node_affinity_lookup(dd->node); - - if (!entry) { - mutex_unlock(&node_affinity.lock); - return -EINVAL; - } - - cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask); - mutex_unlock(&node_affinity.lock); - return strnlen(buf, PAGE_SIZE); -} diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index b89ea3c0ee1a..42e63316afd1 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -102,10 +102,6 @@ int hfi1_get_proc_affinity(int); /* Release a CPU used by a user process. */ void hfi1_put_proc_affinity(int); -int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf); -int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, - size_t count); - struct hfi1_affinity_node { int node; struct cpu_mask_set def_intr; diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index edba22461a9c..919a5474e651 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -49,7 +49,6 @@ #include "hfi.h" #include "mad.h" #include "trace.h" -#include "affinity.h" /* * Start of per-port congestion control structures and support code @@ -623,27 +622,6 @@ static ssize_t show_tempsense(struct device *device, return ret; } -static ssize_t show_sdma_affinity(struct device *device, - struct device_attribute *attr, char *buf) -{ - struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); - struct hfi1_devdata *dd = dd_from_dev(dev); - - return hfi1_get_sdma_affinity(dd, buf); -} - -static ssize_t store_sdma_affinity(struct device *device, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); - struct hfi1_devdata *dd = dd_from_dev(dev); - - return hfi1_set_sdma_affinity(dd, buf, count); -} - /* * end of per-unit (or driver, in some cases, but replicated * per unit) functions @@ -658,8 +636,6 @@ static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); -static DEVICE_ATTR(sdma_affinity, S_IWUSR | S_IRUGO, show_sdma_affinity, - store_sdma_affinity); static struct device_attribute *hfi1_attributes[] = { &dev_attr_hw_rev, @@ -670,7 +646,6 @@ static struct device_attribute *hfi1_attributes[] = { &dev_attr_boardversion, &dev_attr_tempsense, &dev_attr_chip_reset, - &dev_attr_sdma_affinity, }; int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, From d9ac4555fb2bcd6b794aaa0b39acad81111d9f42 Mon Sep 17 00:00:00 2001 From: Jakub Pawlak Date: Mon, 10 Oct 2016 06:14:56 -0700 Subject: [PATCH 260/503] IB/hfi1: Fix integrity check flags default values Prevent setting up integrity check flags when module is loaded with NO_INTEGRITY capability. Reviewed-by: Dean Luick Signed-off-by: Jakub Pawlak Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 40 +++++++++++++++++++++---------- drivers/infiniband/hw/hfi1/pio.c | 13 +++------- drivers/infiniband/hw/hfi1/sdma.c | 19 ++------------- 3 files changed, 32 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7eef11b316ff..3c06d204bafd 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1848,7 +1848,13 @@ extern struct mutex hfi1_mutex; static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, u16 ctxt_type) { - u64 base_sc_integrity = + u64 base_sc_integrity; + + /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */ + if (HFI1_CAP_IS_KSET(NO_INTEGRITY)) + return 0; + + base_sc_integrity = SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK @@ -1863,7 +1869,6 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, | SEND_CTXT_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK | SEND_CTXT_CHECK_ENABLE_CHECK_OPCODE_SMASK | SEND_CTXT_CHECK_ENABLE_CHECK_SLID_SMASK - | SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK | SEND_CTXT_CHECK_ENABLE_CHECK_VL_SMASK | SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK; @@ -1872,18 +1877,23 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, else base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY; - if (is_ax(dd)) - /* turn off send-side job key checks - A0 */ - return base_sc_integrity & - ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; + /* turn on send-side job key checks if !A0 */ + if (!is_ax(dd)) + base_sc_integrity |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; + return base_sc_integrity; } static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) { - u64 base_sdma_integrity = + u64 base_sdma_integrity; + + /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */ + if (HFI1_CAP_IS_KSET(NO_INTEGRITY)) + return 0; + + base_sdma_integrity = SEND_DMA_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK - | SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK | SEND_DMA_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK @@ -1895,14 +1905,18 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) | SEND_DMA_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK | SEND_DMA_CHECK_ENABLE_CHECK_OPCODE_SMASK | SEND_DMA_CHECK_ENABLE_CHECK_SLID_SMASK - | SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK | SEND_DMA_CHECK_ENABLE_CHECK_VL_SMASK | SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK; - if (is_ax(dd)) - /* turn off send-side job key checks - A0 */ - return base_sdma_integrity & - ~SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; + if (!HFI1_CAP_IS_KSET(STATIC_RATE_CTRL)) + base_sdma_integrity |= + SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK; + + /* turn on send-side job key checks if !A0 */ + if (!is_ax(dd)) + base_sdma_integrity |= + SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; + return base_sdma_integrity; } diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 50a3a36d9363..d89b8745d4c1 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -668,19 +668,12 @@ void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold) void set_pio_integrity(struct send_context *sc) { struct hfi1_devdata *dd = sc->dd; - u64 reg = 0; u32 hw_context = sc->hw_context; int type = sc->type; - /* - * No integrity checks if HFI1_CAP_NO_INTEGRITY is set, or if - * we're snooping. - */ - if (likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) && - dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE) - reg = hfi1_pkt_default_send_ctxt_mask(dd, type); - - write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), reg); + write_kctxt_csr(dd, hw_context, + SC(CHECK_ENABLE), + hfi1_pkt_default_send_ctxt_mask(dd, type)); } static u32 get_buffers_allocated(struct send_context *sc) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index fd39bcaa062d..9cbe52d21077 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -2009,11 +2009,6 @@ static void sdma_hw_start_up(struct sdma_engine *sde) write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg); } -#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ -(r &= ~SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) - -#define SET_STATIC_RATE_CONTROL_SMASK(r) \ -(r |= SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) /* * set_sdma_integrity * @@ -2022,19 +2017,9 @@ static void sdma_hw_start_up(struct sdma_engine *sde) static void set_sdma_integrity(struct sdma_engine *sde) { struct hfi1_devdata *dd = sde->dd; - u64 reg; - if (unlikely(HFI1_CAP_IS_KSET(NO_INTEGRITY))) - return; - - reg = hfi1_pkt_base_sdma_integrity(dd); - - if (HFI1_CAP_IS_KSET(STATIC_RATE_CTRL)) - CLEAR_STATIC_RATE_CONTROL_SMASK(reg); - else - SET_STATIC_RATE_CONTROL_SMASK(reg); - - write_sde_csr(sde, SD(CHECK_ENABLE), reg); + write_sde_csr(sde, SD(CHECK_ENABLE), + hfi1_pkt_base_sdma_integrity(dd)); } static void init_sdma_regs( From acd7c8fe14938a315f0ac1b92a92375f7226c2fd Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 25 Oct 2016 08:57:55 -0700 Subject: [PATCH 261/503] IB/hfi1: Fix an Oops on pci device force remove This patch fixes an Oops on device unbind, when the device is used by a PSM user process. PSM processes access device resources which are freed on device removal. Similar protection exists in uverbs in ib_core for Verbs clients, but PSM doesn't use ib_uverbs hence a separate protection is required for PSM clients. Cc: Jason Gunthorpe Reviewed-by: Ira Weiny Reviewed-by: Dean Luick Reviewed-by: Dennis Dalessandro Signed-off-by: Tadeusz Struk Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 5 +++++ drivers/infiniband/hw/hfi1/file_ops.c | 19 ++++++++++++++++--- drivers/infiniband/hw/hfi1/hfi.h | 4 ++++ drivers/infiniband/hw/hfi1/init.c | 21 +++++++++++++++++++-- 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9bf5f23544d4..799215255b49 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14691,6 +14691,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_free_cntrs; + init_completion(&dd->user_comp); + + /* The user refcount starts with one to inidicate an active device */ + atomic_set(&dd->user_refcount, 1); + goto bail; bail_free_rcverr: diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 677efa0e8cd6..bd786b7bd30b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -172,6 +172,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) struct hfi1_devdata, user_cdev); + if (!atomic_inc_not_zero(&dd->user_refcount)) + return -ENXIO; + /* Just take a ref now. Not all opens result in a context assign */ kobject_get(&dd->kobj); @@ -183,11 +186,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->mm = current->mm; atomic_inc(&fd->mm->mm_count); + fp->private_data = fd; + } else { + fp->private_data = NULL; + + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + + return -ENOMEM; } - fp->private_data = fd; - - return fd ? 0 : -ENOMEM; + return 0; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -798,6 +807,10 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) done: mmdrop(fdata->mm); kobject_put(&dd->kobj); + + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + kfree(fdata); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 3c06d204bafd..368e96c109a5 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1174,6 +1174,10 @@ struct hfi1_devdata { spinlock_t aspm_lock; /* Number of verbs contexts which have disabled ASPM */ atomic_t aspm_disabled_cnt; + /* Keeps track of user space clients */ + atomic_t user_refcount; + /* Used to wait for outstanding user space clients before dev removal */ + struct completion user_comp; struct hfi1_affinity *affinity; struct rhashtable sdma_rht; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 60db61536fed..e28a6b633ea9 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1538,12 +1538,31 @@ bail: return ret; } +static void wait_for_clients(struct hfi1_devdata *dd) +{ + /* + * Remove the device init value and complete the device if there is + * no clients or wait for active clients to finish. + */ + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + + wait_for_completion(&dd->user_comp); +} + static void remove_one(struct pci_dev *pdev) { struct hfi1_devdata *dd = pci_get_drvdata(pdev); /* close debugfs files before ib unregister */ hfi1_dbg_ibdev_exit(&dd->verbs_dev); + + /* remove the /dev hfi1 interface */ + hfi1_device_remove(dd); + + /* wait for existing user space clients to finish */ + wait_for_clients(dd); + /* unregister from IB core */ hfi1_unregister_ib_device(dd); @@ -1558,8 +1577,6 @@ static void remove_one(struct pci_dev *pdev) /* wait until all of our (qsfp) queue_work() calls complete */ flush_workqueue(ib_wq); - hfi1_device_remove(dd); - postinit_cleanup(dd); } From 83fb4af6800deb4f3d19b297df6148cda5c016de Mon Sep 17 00:00:00 2001 From: Krzysztof Blaszkowski Date: Mon, 17 Oct 2016 04:19:24 -0700 Subject: [PATCH 262/503] IB/hfi1: Return ENODEV for unsupported PCI device ids. Clean up device type checking. Reviewed-by: Dean Luick Signed-off-by: Krzysztof Blaszkowski Signed-off-by: Tymoteusz Kielan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e28a6b633ea9..baea53f862f9 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1402,7 +1402,7 @@ static void postinit_cleanup(struct hfi1_devdata *dd) static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret = 0, j, pidx, initfail; - struct hfi1_devdata *dd = ERR_PTR(-EINVAL); + struct hfi1_devdata *dd; struct hfi1_pportdata *ppd; /* First, lock the non-writable module parameters */ @@ -1461,26 +1461,25 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto bail; - /* - * Do device-specific initialization, function table setup, dd - * allocation, etc. - */ - switch (ent->device) { - case PCI_DEVICE_ID_INTEL0: - case PCI_DEVICE_ID_INTEL1: - dd = hfi1_init_dd(pdev, ent); - break; - default: + if (!(ent->device == PCI_DEVICE_ID_INTEL0 || + ent->device == PCI_DEVICE_ID_INTEL1)) { hfi1_early_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n", ent->device); ret = -ENODEV; + goto clean_bail; } - if (IS_ERR(dd)) + /* + * Do device-specific initialization, function table setup, dd + * allocation, etc. + */ + dd = hfi1_init_dd(pdev, ent); + + if (IS_ERR(dd)) { ret = PTR_ERR(dd); - if (ret) goto clean_bail; /* error already printed */ + } ret = create_workqueues(dd); if (ret) From 4dfe7cceb2bfd98783b4966d7c881a7552932d31 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Mon, 17 Oct 2016 04:19:41 -0700 Subject: [PATCH 263/503] IB/hfi1: Fix a potential memory leak in hfi1_create_ctxts() In the function hfi1_create_ctxts the array "dd->rcd" is allocated and then populated with allocated resources in a loop. Previously, if error happened during the loop, only resource allocated in the current iteration would be freed. The array itself would then be freed, leaving the resources that were allocated in previous iterations and referenced by the array elements in limbo. This patch makes sure all allocated resources are freed before freeing the array "dd->rcd". Also the resource allocation now takes account of the numa node the device is attached to. Reviewed-by: Tadeusz Struk Signed-off-by: Jianxin Xiong Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index baea53f862f9..e27b65dbe293 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -144,6 +144,8 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) struct hfi1_ctxtdata *rcd; ppd = dd->pport + (i % dd->num_pports); + + /* dd->rcd[i] gets assigned inside the callee */ rcd = hfi1_create_ctxtdata(ppd, i, dd->node); if (!rcd) { dd_dev_err(dd, @@ -169,8 +171,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) if (!rcd->sc) { dd_dev_err(dd, "Unable to allocate kernel send context, failing\n"); - dd->rcd[rcd->ctxt] = NULL; - hfi1_free_ctxtdata(dd, rcd); goto nomem; } @@ -178,9 +178,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) if (ret < 0) { dd_dev_err(dd, "Failed to setup kernel receive context, failing\n"); - sc_free(rcd->sc); - dd->rcd[rcd->ctxt] = NULL; - hfi1_free_ctxtdata(dd, rcd); ret = -EFAULT; goto bail; } @@ -196,6 +193,10 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) nomem: ret = -ENOMEM; bail: + if (dd->rcd) { + for (i = 0; i < dd->num_rcv_contexts; ++i) + hfi1_free_ctxtdata(dd, dd->rcd[i]); + } kfree(dd->rcd); dd->rcd = NULL; return ret; @@ -216,7 +217,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, dd->num_rcv_contexts - dd->first_user_ctxt) kctxt_ngroups = (dd->rcv_entries.nctxt_extra - (dd->num_rcv_contexts - dd->first_user_ctxt)); - rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); + rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa); if (rcd) { u32 rcvtids, max_entries; From eacc830f95c0d8c5cbbda1bdba2ddc8f14bc248d Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 17 Oct 2016 04:19:52 -0700 Subject: [PATCH 264/503] IB/hfi1: Remove leftover snoop references A few snoop related variables were missed in the snoop/capture removal to get out of staging. Go back and clean those up too. Reviewed-by: Dean Luick Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 18 +------- drivers/infiniband/hw/hfi1/hfi.h | 40 +----------------- drivers/infiniband/hw/hfi1/trace_rx.h | 60 --------------------------- 3 files changed, 4 insertions(+), 114 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 799215255b49..859341a25e56 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6301,19 +6301,8 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf) /* leave shared count at zero for both global and VL15 */ write_global_credit(dd, vau, vl15buf, 0); - /* We may need some credits for another VL when sending packets - * with the snoop interface. Dividing it down the middle for VL15 - * and VL0 should suffice. - */ - if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) { - write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1) - << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT); - write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1) - << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT); - } else { - write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf - << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT); - } + write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf + << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT); } /* @@ -9915,9 +9904,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) u32 mask = ~((1U << ppd->lmc) - 1); u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1); - if (dd->hfi1_snoop.mode_flag) - dd_dev_info(dd, "Set lid/lmc while snooping"); - c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK); c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 368e96c109a5..93e2fcebd083 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -367,26 +367,6 @@ struct hfi1_packet { u8 etype; }; -/* - * Private data for snoop/capture support. - */ -struct hfi1_snoop_data { - int mode_flag; - struct cdev cdev; - struct device *class_dev; - /* protect snoop data */ - spinlock_t snoop_lock; - struct list_head queue; - wait_queue_head_t waitq; - void *filter_value; - int (*filter_callback)(void *hdr, void *data, void *value); - u64 dcc_cfg; /* saved value of DCC Cfg register */ -}; - -/* snoop mode_flag values */ -#define HFI1_PORT_SNOOP_MODE 1U -#define HFI1_PORT_CAPTURE_MODE 2U - struct rvt_sge_state; /* @@ -1104,8 +1084,6 @@ struct hfi1_devdata { char *portcntrnames; size_t portcntrnameslen; - struct hfi1_snoop_data hfi1_snoop; - struct err_info_rcvport err_info_rcvport; struct err_info_constraint err_info_rcv_constraint; struct err_info_constraint err_info_xmit_constraint; @@ -1141,8 +1119,8 @@ struct hfi1_devdata { rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; /* - * Handlers for outgoing data so that snoop/capture does not - * have to have its hooks in the send path + * Capability to have different send engines simply by changing a + * pointer value. */ send_routine process_pio_send; send_routine process_dma_send; @@ -1225,8 +1203,6 @@ struct hfi1_devdata *hfi1_lookup(int unit); extern u32 hfi1_cpulist_count; extern unsigned long *hfi1_cpulist; -extern unsigned int snoop_drop_send; -extern unsigned int snoop_force_capture; int hfi1_init(struct hfi1_devdata *, int); int hfi1_count_units(int *npresentp, int *nupp); int hfi1_count_active_units(void); @@ -1561,13 +1537,6 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf); void reset_link_credits(struct hfi1_devdata *dd); void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu); -int snoop_recv_handler(struct hfi1_packet *packet); -int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc); -int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc); -void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf, - u64 pbc, const void *from, size_t count); int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc); static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd) @@ -1803,8 +1772,6 @@ int kdeth_process_expected(struct hfi1_packet *packet); int kdeth_process_eager(struct hfi1_packet *packet); int process_receive_invalid(struct hfi1_packet *packet); -extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8]; - void update_sge(struct rvt_sge_state *ss, u32 length); /* global module parameter variables */ @@ -1831,9 +1798,6 @@ extern struct mutex hfi1_mutex; #define DRIVER_NAME "hfi1" #define HFI1_USER_MINOR_BASE 0 #define HFI1_TRACE_MINOR 127 -#define HFI1_DIAGPKT_MINOR 128 -#define HFI1_DIAG_MINOR_BASE 129 -#define HFI1_SNOOP_CAPTURE_BASE 200 #define HFI1_NMINORS 255 #define PCI_VENDOR_ID_INTEL 0x8086 diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 11e02b228922..f77e59fb43fe 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -253,66 +253,6 @@ TRACE_EVENT(hfi1_mmu_invalidate, ) ); -#define SNOOP_PRN \ - "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \ - "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]" - -TRACE_EVENT(snoop_capture, - TP_PROTO(struct hfi1_devdata *dd, - int hdr_len, - struct ib_header *hdr, - int data_len, - void *data), - TP_ARGS(dd, hdr_len, hdr, data_len, data), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u16, slid) - __field(u16, dlid) - __field(u32, qpn) - __field(u8, opcode) - __field(u8, sl) - __field(u16, pkey) - __field(u32, hdr_len) - __field(u32, data_len) - __field(u8, lnh) - __dynamic_array(u8, raw_hdr, hdr_len) - __dynamic_array(u8, raw_pkt, data_len) - ), - TP_fast_assign( - struct ib_other_headers *ohdr; - - __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - if (__entry->lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - DD_DEV_ASSIGN(dd); - __entry->slid = be16_to_cpu(hdr->lrh[3]); - __entry->dlid = be16_to_cpu(hdr->lrh[1]); - __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; - __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; - __entry->hdr_len = hdr_len; - __entry->data_len = data_len; - memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len); - memcpy(__get_dynamic_array(raw_pkt), data, data_len); - ), - TP_printk( - "[%s] " SNOOP_PRN, - __get_str(dev), - __entry->slid, - __entry->dlid, - __entry->qpn, - __entry->opcode, - show_ib_opcode(__entry->opcode), - __entry->sl, - __entry->pkey, - __entry->hdr_len, - __entry->data_len - ) -); - #endif /* __HFI1_TRACE_RX_H */ #undef TRACE_INCLUDE_PATH From 26ea2544ddbe8855cb251e41ff3641c61655a15f Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Mon, 17 Oct 2016 04:19:58 -0700 Subject: [PATCH 265/503] IB/hfi1: Clean up unused argument hfi1_pcie_ddinit takes the PCI device id as an argument but never uses it. Clean it up. Reviewed-by: Dennis Dalessandro Signed-off-by: Easwar Hariharan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 +- drivers/infiniband/hw/hfi1/hfi.h | 3 +-- drivers/infiniband/hw/hfi1/pcie.c | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 859341a25e56..156ddf8f3dca 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14449,7 +14449,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, * Any error printing is already done by the init code. * On return, we have the chip mapped. */ - ret = hfi1_pcie_ddinit(dd, pdev, ent); + ret = hfi1_pcie_ddinit(dd, pdev); if (ret < 0) goto bail_free; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 93e2fcebd083..d906cf08504f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1736,8 +1736,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len); int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *); void hfi1_pcie_cleanup(struct pci_dev *); -int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *, - const struct pci_device_id *); +int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); void hfi1_pcie_flr(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 89c68da1c273..4ac8f330c5cb 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -157,8 +157,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev) * fields required to re-initialize after a chip reset, or for * various other purposes */ -int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev, - const struct pci_device_id *ent) +int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) { unsigned long len; resource_size_t addr; From f0f98f74c91c68502e97e0d5526aa4e81b40b28a Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Mon, 17 Oct 2016 04:20:04 -0700 Subject: [PATCH 266/503] IB/hfi1: Delete unused lock The lock is an unused vestige from qib. Remove it. Reviewed-by: Mike Marciniszyn Signed-off-by: Easwar Hariharan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 2 -- drivers/infiniband/hw/hfi1/init.c | 1 - 2 files changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index d906cf08504f..cc87fd4e534b 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -593,8 +593,6 @@ struct hfi1_pportdata { struct mutex hls_lock; u32 host_link_state; - spinlock_t sdma_alllock ____cacheline_aligned_in_smp; - u32 lstate; /* logical link state */ /* these are the "32 bit" regs */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e27b65dbe293..0f82eebc4b9e 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -507,7 +507,6 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event); mutex_init(&ppd->hls_lock); - spin_lock_init(&ppd->sdma_alllock); spin_lock_init(&ppd->qsfp_info.qsfp_lock); ppd->qsfp_info.ppd = ppd; From 458ed666fe14a54dfb6690a1a7f541782d1342c9 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 17 Oct 2016 04:20:09 -0700 Subject: [PATCH 267/503] IB/hfi1: Fix rnr_timer addition The new s_rnr_timeout was not properly being set and the code was incorrectly setting a different timer. Found by code inspection. Cc: # 4.7.x Fixes: 08279d5c9424 ("staging/rdma/hfi1: use new RNR timer") Reviewed-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 8bc5013f39a1..83198a8a8797 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -89,7 +89,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_WAIT_RNR; - qp->s_timer.expires = jiffies + usecs_to_jiffies(to); + priv->s_rnr_timer.expires = jiffies + usecs_to_jiffies(to); add_timer(&priv->s_rnr_timer); } From 11501ab9df687c6f0852719a5165e16cd3eb3c10 Mon Sep 17 00:00:00 2001 From: Krzysztof Blaszkowski Date: Tue, 25 Oct 2016 13:12:11 -0700 Subject: [PATCH 268/503] IB/hfi1: Relocate rcvhdrcnt module parameter check. Validate the rcvhdrcnt module parameter in a single function at module load time. This allows proper error reporting. Reviewed-by: Dean Luick Signed-off-by: Krzysztof Blaszkowski Signed-off-by: Tymoteusz Kielan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 44 ++++++++++++++++++------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 0f82eebc4b9e..e3b5bc93bc70 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -262,13 +262,6 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, } rcd->eager_base = base * dd->rcv_entries.group_size; - /* Validate and initialize Rcv Hdr Q variables */ - if (rcvhdrcnt % HDRQ_INCREMENT) { - dd_dev_err(dd, - "ctxt%u: header queue count %d must be divisible by %lu\n", - rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT); - goto bail; - } rcd->rcvhdrq_cnt = rcvhdrcnt; rcd->rcvhdrqentsize = hfi1_hdrq_entsize; /* @@ -1399,6 +1392,29 @@ static void postinit_cleanup(struct hfi1_devdata *dd) hfi1_free_devdata(dd); } +static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt) +{ + if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { + hfi1_early_err(dev, "Receive header queue count too small\n"); + return -EINVAL; + } + + if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { + hfi1_early_err(dev, + "Receive header queue count cannot be greater than %u\n", + HFI1_MAX_HDRQ_EGRBUF_CNT); + return -EINVAL; + } + + if (thecnt % HDRQ_INCREMENT) { + hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n", + thecnt, HDRQ_INCREMENT); + return -EINVAL; + } + + return 0; +} + static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret = 0, j, pidx, initfail; @@ -1409,18 +1425,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) HFI1_CAP_LOCK(); /* Validate some global module parameters */ - if (rcvhdrcnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { - hfi1_early_err(&pdev->dev, "Header queue count too small\n"); - ret = -EINVAL; + ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt); + if (ret) goto bail; - } - if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { - hfi1_early_err(&pdev->dev, - "Receive header queue count cannot be greater than %u\n", - HFI1_MAX_HDRQ_EGRBUF_CNT); - ret = -EINVAL; - goto bail; - } + /* use the encoding function as a sanitization check */ if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) { hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n", From 505efe3e46d5eaab726295cd023fb86d5b789d00 Mon Sep 17 00:00:00 2001 From: Jakub Pawlak Date: Tue, 25 Oct 2016 13:12:17 -0700 Subject: [PATCH 269/503] IB/hfi1: Fix status error code for unsupported packets Set the status code BAD_L2 when unsupported type of packet is received and dropped. Reviewed-by: Dennis Dalessandro Signed-off-by: Jakub Pawlak Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.h | 3 +++ drivers/infiniband/hw/hfi1/driver.c | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 92345259a8f4..043fd21dc5f3 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -320,6 +320,9 @@ /* DC_DC8051_CFG_MODE.GENERAL bits */ #define DISABLE_SELF_GUID_CHECK 0x2 +/* Bad L2 frame error code */ +#define BAD_L2_ERR 0x6 + /* * Eager buffer minimum and maximum sizes supported by the hardware. * All power-of-two sizes in between are supported as well. diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 6563e4d38b80..dadd35eedc01 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1360,12 +1360,25 @@ int process_receive_ib(struct hfi1_packet *packet) int process_receive_bypass(struct hfi1_packet *packet) { + struct hfi1_devdata *dd = packet->rcd->dd; + if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); - dd_dev_err(packet->rcd->dd, + dd_dev_err(dd, "Bypass packets are not supported in normal operation. Dropping\n"); - incr_cntr64(&packet->rcd->dd->sw_rcv_bypass_packet_errors); + incr_cntr64(&dd->sw_rcv_bypass_packet_errors); + if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) { + u64 *flits = packet->ebuf; + + if (flits && !(packet->rhf & RHF_LEN_ERR)) { + dd->err_info_rcvport.packet_flit1 = flits[0]; + dd->err_info_rcvport.packet_flit2 = + packet->tlen > sizeof(flits[0]) ? flits[1] : 0; + } + dd->err_info_rcvport.status_and_code |= + (OPA_EI_STATUS_SMASK | BAD_L2_ERR); + } return RHF_RCV_CONTINUE; } From f2d8a0b367e735ab157222ce74a5f2481216c878 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Tue, 25 Oct 2016 13:12:23 -0700 Subject: [PATCH 270/503] IB/hfi1: Fix ECN processing in prescan_rxq When processing ECN via the prescan_rxq path, some fields in the packet structure are passed uninitialized. This can potentially cause NULL pointer exceptions during ECN handling. Reviewed-by: Ira Weiny Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index dadd35eedc01..c5efff29c147 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -599,7 +599,6 @@ static void __prescan_rxq(struct hfi1_packet *packet) dd->rhf_offset; struct rvt_qp *qp; struct ib_header *hdr; - struct ib_other_headers *ohdr; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; @@ -615,18 +614,21 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (etype != RHF_RCV_TYPE_IB) goto next; - hdr = hfi1_get_msgheader(dd, rhf_addr); + packet->hdr = hfi1_get_msgheader(dd, rhf_addr); + hdr = packet->hdr; lnh = be16_to_cpu(hdr->lrh[0]) & 3; - if (lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) - ohdr = &hdr->u.l.oth; - else + if (lnh == HFI1_LRH_BTH) { + packet->ohdr = &hdr->u.oth; + } else if (lnh == HFI1_LRH_GRH) { + packet->ohdr = &hdr->u.l.oth; + packet->rcv_flags |= HFI1_HAS_GRH; + } else { goto next; /* just in case */ + } - bth1 = be32_to_cpu(ohdr->bth[1]); + bth1 = be32_to_cpu(packet->ohdr->bth[1]); is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK)); if (!is_ecn) @@ -646,7 +648,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) /* turn off BECN, FECN */ bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK); - ohdr->bth[1] = cpu_to_be32(bth1); + packet->ohdr->bth[1] = cpu_to_be32(bth1); next: update_ps_mdata(&mdata, rcd); } From 09a7908b1ba616eed349d49058ee909907ee0885 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Tue, 25 Oct 2016 13:12:40 -0700 Subject: [PATCH 271/503] IB/hfi1: Prevent hardware counter names from being cut off Increase the size of the buffer that is used to construct per-VL and per-SDMA counter names. Reviewed-by: Dennis Dalessandro Signed-off-by: Jianxin Xiong Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 156ddf8f3dca..24d0820873cf 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12098,7 +12098,7 @@ static void update_synth_timer(unsigned long opaque) mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); } -#define C_MAX_NAME 13 /* 12 chars + one for /0 */ +#define C_MAX_NAME 16 /* 15 chars + one for /0 */ static int init_cntrs(struct hfi1_devdata *dd) { int i, rcv_ctxts, j; From 2b16056f845207967a32497f41cf92b57849f934 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 25 Oct 2016 13:12:46 -0700 Subject: [PATCH 272/503] IB/hfi1: Remove incorrect IS_ERR check Remove IS_ERR check from caching code as the function being called does not actually return error pointers. Fixes: f19bd643dbde: "IB/hfi1: Prevent NULL pointer deferences in caching code" Reported-by: Dan Carpenter Reviewed-by: Dean Luick Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index a761f804111e..77697d690f3e 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1144,7 +1144,7 @@ static int pin_vector_pages(struct user_sdma_request *req, rb_node = hfi1_mmu_rb_extract(pq->handler, (unsigned long)iovec->iov.iov_base, iovec->iov.iov_len); - if (rb_node && !IS_ERR(rb_node)) + if (rb_node) node = container_of(rb_node, struct sdma_mmu_node, rb); else rb_node = NULL; From 24803f38a5c0b6c57ed800b47e695f9ce474bc3a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 14 Nov 2016 16:16:28 +0800 Subject: [PATCH 273/503] igmp: do not remove igmp souce list info when set link down In commit 24cf3af3fed5 ("igmp: call ip_mc_clear_src..."), we forgot to remove igmpv3_clear_delrec() in ip_mc_down(), which also called ip_mc_clear_src(). This make us clear all IGMPv3 source filter info after NETDEV_DOWN. Move igmpv3_clear_delrec() to ip_mc_destroy_dev() and then no need ip_mc_clear_src() in ip_mc_destroy_dev(). On the other hand, we should restore back instead of free all source filter info in igmpv3_del_delrec(). Or we will not able to restore IGMPv3 source filter info after NETDEV_UP and NETDEV_POST_TYPE_CHANGE. Fixes: 24cf3af3fed5 ("igmp: call ip_mc_clear_src() only when ...") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 50 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 606cc3e85d2b..15db786d50ed 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -162,7 +162,7 @@ static int unsolicited_report_interval(struct in_device *in_dev) } static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); -static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr); +static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im); static void igmpv3_clear_delrec(struct in_device *in_dev); static int sf_setstate(struct ip_mc_list *pmc); static void sf_markstate(struct ip_mc_list *pmc); @@ -1130,10 +1130,15 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) spin_unlock_bh(&in_dev->mc_tomb_lock); } -static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) +/* + * restore ip_mc_list deleted records + */ +static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) { struct ip_mc_list *pmc, *pmc_prev; - struct ip_sf_list *psf, *psf_next; + struct ip_sf_list *psf; + struct net *net = dev_net(in_dev->dev); + __be32 multiaddr = im->multiaddr; spin_lock_bh(&in_dev->mc_tomb_lock); pmc_prev = NULL; @@ -1149,16 +1154,26 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) in_dev->mc_tomb = pmc->next; } spin_unlock_bh(&in_dev->mc_tomb_lock); + + spin_lock_bh(&im->lock); if (pmc) { - for (psf = pmc->tomb; psf; psf = psf_next) { - psf_next = psf->sf_next; - kfree(psf); + im->interface = pmc->interface; + im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->sfmode = pmc->sfmode; + if (pmc->sfmode == MCAST_INCLUDE) { + im->tomb = pmc->tomb; + im->sources = pmc->sources; + for (psf = im->sources; psf; psf = psf->sf_next) + psf->sf_crcount = im->crcount; } in_dev_put(pmc->interface); - kfree(pmc); } + spin_unlock_bh(&im->lock); } +/* + * flush ip_mc_list deleted records + */ static void igmpv3_clear_delrec(struct in_device *in_dev) { struct ip_mc_list *pmc, *nextpmc; @@ -1366,7 +1381,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ip_mc_hash_add(in_dev, im); #ifdef CONFIG_IP_MULTICAST - igmpv3_del_delrec(in_dev, im->multiaddr); + igmpv3_del_delrec(in_dev, im); #endif igmp_group_added(im); if (!in_dev->dead) @@ -1626,8 +1641,12 @@ void ip_mc_remap(struct in_device *in_dev) ASSERT_RTNL(); - for_each_pmc_rtnl(in_dev, pmc) + for_each_pmc_rtnl(in_dev, pmc) { +#ifdef CONFIG_IP_MULTICAST + igmpv3_del_delrec(in_dev, pmc); +#endif igmp_group_added(pmc); + } } /* Device going down */ @@ -1648,7 +1667,6 @@ void ip_mc_down(struct in_device *in_dev) in_dev->mr_gq_running = 0; if (del_timer(&in_dev->mr_gq_timer)) __in_dev_put(in_dev); - igmpv3_clear_delrec(in_dev); #endif ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS); @@ -1688,8 +1706,12 @@ void ip_mc_up(struct in_device *in_dev) #endif ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); - for_each_pmc_rtnl(in_dev, pmc) + for_each_pmc_rtnl(in_dev, pmc) { +#ifdef CONFIG_IP_MULTICAST + igmpv3_del_delrec(in_dev, pmc); +#endif igmp_group_added(pmc); + } } /* @@ -1704,13 +1726,13 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* Deactivate timers */ ip_mc_down(in_dev); +#ifdef CONFIG_IP_MULTICAST + igmpv3_clear_delrec(in_dev); +#endif while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) { in_dev->mc_list = i->next_rcu; in_dev->mc_count--; - - /* We've dropped the groups in ip_mc_down already */ - ip_mc_clear_src(i); ip_ma_put(i); } } From d2042052a0aa6a54f01a0c9e14243ec040b100e2 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 14 Nov 2016 09:27:28 +0100 Subject: [PATCH 274/503] stmmac: update the PTP header file This patch is to update this file by using BIT macros, removing not used defines and fixes some typos. Signed-off-by: Giuseppe Cavallaro Acked-by: Rayagond Kokatanur Acked-by: Alexandre TORGUE Acked-by: Richard Cochran Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_ptp.h | 72 ++++++++++--------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h index 4535df37c227..c06938c47af5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h @@ -22,51 +22,53 @@ Author: Rayagond Kokatanur ******************************************************************************/ -#ifndef __STMMAC_PTP_H__ -#define __STMMAC_PTP_H__ +#ifndef __STMMAC_PTP_H__ +#define __STMMAC_PTP_H__ + +#define PTP_GMAC4_OFFSET 0xb00 +#define PTP_GMAC3_X_OFFSET 0x700 /* IEEE 1588 PTP register offsets */ -#define PTP_TCR 0x0700 /* Timestamp Control Reg */ -#define PTP_SSIR 0x0704 /* Sub-Second Increment Reg */ -#define PTP_STSR 0x0708 /* System Time – Seconds Regr */ -#define PTP_STNSR 0x070C /* System Time – Nanoseconds Reg */ -#define PTP_STSUR 0x0710 /* System Time – Seconds Update Reg */ -#define PTP_STNSUR 0x0714 /* System Time – Nanoseconds Update Reg */ -#define PTP_TAR 0x0718 /* Timestamp Addend Reg */ -#define PTP_TTSR 0x071C /* Target Time Seconds Reg */ -#define PTP_TTNSR 0x0720 /* Target Time Nanoseconds Reg */ -#define PTP_STHWSR 0x0724 /* System Time - Higher Word Seconds Reg */ -#define PTP_TSR 0x0728 /* Timestamp Status */ +#define PTP_TCR 0x00 /* Timestamp Control Reg */ +#define PTP_SSIR 0x04 /* Sub-Second Increment Reg */ +#define PTP_STSR 0x08 /* System Time – Seconds Regr */ +#define PTP_STNSR 0x0c /* System Time – Nanoseconds Reg */ +#define PTP_STSUR 0x10 /* System Time – Seconds Update Reg */ +#define PTP_STNSUR 0x14 /* System Time – Nanoseconds Update Reg */ +#define PTP_TAR 0x18 /* Timestamp Addend Reg */ -#define PTP_STNSUR_ADDSUB_SHIFT 31 - -/* PTP TCR defines */ -#define PTP_TCR_TSENA 0x00000001 /* Timestamp Enable */ -#define PTP_TCR_TSCFUPDT 0x00000002 /* Timestamp Fine/Coarse Update */ -#define PTP_TCR_TSINIT 0x00000004 /* Timestamp Initialize */ -#define PTP_TCR_TSUPDT 0x00000008 /* Timestamp Update */ -/* Timestamp Interrupt Trigger Enable */ -#define PTP_TCR_TSTRIG 0x00000010 -#define PTP_TCR_TSADDREG 0x00000020 /* Addend Reg Update */ -#define PTP_TCR_TSENALL 0x00000100 /* Enable Timestamp for All Frames */ -/* Timestamp Digital or Binary Rollover Control */ -#define PTP_TCR_TSCTRLSSR 0x00000200 +#define PTP_STNSUR_ADDSUB_SHIFT 31 +#define PTP_DIGITAL_ROLLOVER_MODE 0x3B9ACA00 /* 10e9-1 ns */ +#define PTP_BINARY_ROLLOVER_MODE 0x80000000 /* ~0.466 ns */ +/* PTP Timestamp control register defines */ +#define PTP_TCR_TSENA BIT(0) /* Timestamp Enable */ +#define PTP_TCR_TSCFUPDT BIT(1) /* Timestamp Fine/Coarse Update */ +#define PTP_TCR_TSINIT BIT(2) /* Timestamp Initialize */ +#define PTP_TCR_TSUPDT BIT(3) /* Timestamp Update */ +#define PTP_TCR_TSTRIG BIT(4) /* Timestamp Interrupt Trigger Enable */ +#define PTP_TCR_TSADDREG BIT(5) /* Addend Reg Update */ +#define PTP_TCR_TSENALL BIT(8) /* Enable Timestamp for All Frames */ +#define PTP_TCR_TSCTRLSSR BIT(9) /* Digital or Binary Rollover Control */ /* Enable PTP packet Processing for Version 2 Format */ -#define PTP_TCR_TSVER2ENA 0x00000400 +#define PTP_TCR_TSVER2ENA BIT(10) /* Enable Processing of PTP over Ethernet Frames */ -#define PTP_TCR_TSIPENA 0x00000800 +#define PTP_TCR_TSIPENA BIT(11) /* Enable Processing of PTP Frames Sent over IPv6-UDP */ -#define PTP_TCR_TSIPV6ENA 0x00001000 +#define PTP_TCR_TSIPV6ENA BIT(12) /* Enable Processing of PTP Frames Sent over IPv4-UDP */ -#define PTP_TCR_TSIPV4ENA 0x00002000 +#define PTP_TCR_TSIPV4ENA BIT(13) /* Enable Timestamp Snapshot for Event Messages */ -#define PTP_TCR_TSEVNTENA 0x00004000 +#define PTP_TCR_TSEVNTENA BIT(14) /* Enable Snapshot for Messages Relevant to Master */ -#define PTP_TCR_TSMSTRENA 0x00008000 +#define PTP_TCR_TSMSTRENA BIT(15) /* Select PTP packets for Taking Snapshots */ -#define PTP_TCR_SNAPTYPSEL_1 0x00010000 +#define PTP_TCR_SNAPTYPSEL_1 GENMASK(17, 16) /* Enable MAC address for PTP Frame Filtering */ -#define PTP_TCR_TSENMACADDR 0x00040000 +#define PTP_TCR_TSENMACADDR BIT(18) -#endif /* __STMMAC_PTP_H__ */ +/* SSIR defines */ +#define PTP_SSIR_SSINC_MASK 0xff +#define GMAC4_PTP_SSIR_SSINC_SHIFT 16 + +#endif /* __STMMAC_PTP_H__ */ From ba1ffd74df74a9efa5290f87632a0ed55f1aa387 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 14 Nov 2016 09:27:29 +0100 Subject: [PATCH 275/503] stmmac: fix PTP support for GMAC4 Due to bad management of the descriptors, when use ptp4l, kernel panics as shown below: ----------------------------------------------------------- Unable to handle kernel NULL pointer dereference at virtual address 000001ac ... Internal error: Oops: 17 [#1] SMP ARM ... Hardware name: STi SoC with Flattened Device Tree task: c0c05e80 task.stack: c0c00000 PC is at dwmac4_wrback_get_tx_timestamp_status+0x0/0xc LR is at stmmac_tx_clean+0x2f8/0x4d4 ----------------------------------------------------------- In case of GMAC4 the extended descriptor pointers were used for getting the timestamp. These are NULL for this HW, and the normal ones must be used. The PTP also had problems on this chip due to the bad register management and issues on the algo adopted to setup the PTP and getting the timestamp values from the descriptors. Signed-off-by: Giuseppe Cavallaro Acked-by: Rayagond Kokatanur Acked-by: Alexandre TORGUE Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 5 +- .../ethernet/stmicro/stmmac/dwmac4_descs.c | 68 ++++++++++--- .../ethernet/stmicro/stmmac/dwmac4_descs.h | 4 + drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 + .../ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 43 ++++++-- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 97 ++++++++++--------- .../net/ethernet/stmicro/stmmac/stmmac_ptp.c | 9 +- 7 files changed, 154 insertions(+), 73 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index d3292c4a6eda..6fc214ce2958 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -482,11 +482,12 @@ struct stmmac_ops { /* PTP and HW Timer helpers */ struct stmmac_hwtimestamp { void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data); - u32 (*config_sub_second_increment) (void __iomem *ioaddr, u32 clk_rate); + u32 (*config_sub_second_increment)(void __iomem *ioaddr, u32 ptp_clock, + int gmac4); int (*init_systime) (void __iomem *ioaddr, u32 sec, u32 nsec); int (*config_addend) (void __iomem *ioaddr, u32 addend); int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec, - int add_sub); + int add_sub, int gmac4); u64(*get_systime) (void __iomem *ioaddr); }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index a1b17cd7886b..2ef2f0c03e76 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -204,14 +204,18 @@ static void dwmac4_rd_enable_tx_timestamp(struct dma_desc *p) static int dwmac4_wrback_get_tx_timestamp_status(struct dma_desc *p) { - return (p->des3 & TDES3_TIMESTAMP_STATUS) - >> TDES3_TIMESTAMP_STATUS_SHIFT; + /* Context type from W/B descriptor must be zero */ + if (p->des3 & TDES3_CONTEXT_TYPE) + return -EINVAL; + + /* Tx Timestamp Status is 1 so des0 and des1'll have valid values */ + if (p->des3 & TDES3_TIMESTAMP_STATUS) + return 0; + + return 1; } -/* NOTE: For RX CTX bit has to be checked before - * HAVE a specific function for TX and another one for RX - */ -static u64 dwmac4_wrback_get_timestamp(void *desc, u32 ats) +static inline u64 dwmac4_get_timestamp(void *desc, u32 ats) { struct dma_desc *p = (struct dma_desc *)desc; u64 ns; @@ -223,12 +227,54 @@ static u64 dwmac4_wrback_get_timestamp(void *desc, u32 ats) return ns; } -static int dwmac4_context_get_rx_timestamp_status(void *desc, u32 ats) +static int dwmac4_rx_check_timestamp(void *desc) { struct dma_desc *p = (struct dma_desc *)desc; + u32 own, ctxt; + int ret = 1; - return (p->des1 & RDES1_TIMESTAMP_AVAILABLE) - >> RDES1_TIMESTAMP_AVAILABLE_SHIFT; + own = p->des3 & RDES3_OWN; + ctxt = ((p->des3 & RDES3_CONTEXT_DESCRIPTOR) + >> RDES3_CONTEXT_DESCRIPTOR_SHIFT); + + if (likely(!own && ctxt)) { + if ((p->des0 == 0xffffffff) && (p->des1 == 0xffffffff)) + /* Corrupted value */ + ret = -EINVAL; + else + /* A valid Timestamp is ready to be read */ + ret = 0; + } + + /* Timestamp not ready */ + return ret; +} + +static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats) +{ + struct dma_desc *p = (struct dma_desc *)desc; + int ret = -EINVAL; + + /* Get the status from normal w/b descriptor */ + if (likely(p->des3 & TDES3_RS1V)) { + if (likely(p->des1 & RDES1_TIMESTAMP_AVAILABLE)) { + int i = 0; + + /* Check if timestamp is OK from context descriptor */ + do { + ret = dwmac4_rx_check_timestamp(desc); + if (ret < 0) + goto exit; + i++; + + } while ((ret == 1) || (i < 10)); + + if (i == 10) + ret = -EBUSY; + } + } +exit: + return ret; } static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic, @@ -373,8 +419,8 @@ const struct stmmac_desc_ops dwmac4_desc_ops = { .get_rx_frame_len = dwmac4_wrback_get_rx_frame_len, .enable_tx_timestamp = dwmac4_rd_enable_tx_timestamp, .get_tx_timestamp_status = dwmac4_wrback_get_tx_timestamp_status, - .get_timestamp = dwmac4_wrback_get_timestamp, - .get_rx_timestamp_status = dwmac4_context_get_rx_timestamp_status, + .get_rx_timestamp_status = dwmac4_wrback_get_rx_timestamp_status, + .get_timestamp = dwmac4_get_timestamp, .set_tx_ic = dwmac4_rd_set_tx_ic, .prepare_tx_desc = dwmac4_rd_prepare_tx_desc, .prepare_tso_tx_desc = dwmac4_rd_prepare_tso_tx_desc, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h index 0902a2edeaa9..9736c505211a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h @@ -59,10 +59,13 @@ #define TDES3_CTXT_TCMSSV BIT(26) /* TDES3 Common */ +#define TDES3_RS1V BIT(26) +#define TDES3_RS1V_SHIFT 26 #define TDES3_LAST_DESCRIPTOR BIT(28) #define TDES3_LAST_DESCRIPTOR_SHIFT 28 #define TDES3_FIRST_DESCRIPTOR BIT(29) #define TDES3_CONTEXT_TYPE BIT(30) +#define TDES3_CONTEXT_TYPE_SHIFT 30 /* TDS3 use for both format (read and write back) */ #define TDES3_OWN BIT(31) @@ -117,6 +120,7 @@ #define RDES3_LAST_DESCRIPTOR BIT(28) #define RDES3_FIRST_DESCRIPTOR BIT(29) #define RDES3_CONTEXT_DESCRIPTOR BIT(30) +#define RDES3_CONTEXT_DESCRIPTOR_SHIFT 30 /* RDES3 (read format) */ #define RDES3_BUFFER1_VALID_ADDR BIT(24) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index b15fc55f1b96..4d2a759b8465 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -129,6 +129,7 @@ struct stmmac_priv { int irq_wake; spinlock_t ptp_lock; void __iomem *mmcaddr; + void __iomem *ptpaddr; u32 rx_tail_addr; u32 tx_tail_addr; u32 mss; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index a77f68918010..10d6059b2f26 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -34,21 +34,29 @@ static void stmmac_config_hw_tstamping(void __iomem *ioaddr, u32 data) } static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr, - u32 ptp_clock) + u32 ptp_clock, int gmac4) { u32 value = readl(ioaddr + PTP_TCR); unsigned long data; - /* Convert the ptp_clock to nano second - * formula = (2/ptp_clock) * 1000000000 - * where, ptp_clock = 50MHz. + /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second + * formula = (1/ptp_clock) * 1000000000 + * where ptp_clock is 50MHz if fine method is used to update system */ - data = (2000000000ULL / ptp_clock); + if (value & PTP_TCR_TSCFUPDT) + data = (1000000000ULL / 50000000); + else + data = (1000000000ULL / ptp_clock); /* 0.465ns accuracy */ if (!(value & PTP_TCR_TSCTRLSSR)) data = (data * 1000) / 465; + data &= PTP_SSIR_SSINC_MASK; + + if (gmac4) + data = data << GMAC4_PTP_SSIR_SSINC_SHIFT; + writel(data, ioaddr + PTP_SSIR); return data; @@ -104,14 +112,30 @@ static int stmmac_config_addend(void __iomem *ioaddr, u32 addend) } static int stmmac_adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, - int add_sub) + int add_sub, int gmac4) { u32 value; int limit; + if (add_sub) { + /* If the new sec value needs to be subtracted with + * the system time, then MAC_STSUR reg should be + * programmed with (2^32 – ) + */ + if (gmac4) + sec = (100000000ULL - sec); + + value = readl(ioaddr + PTP_TCR); + if (value & PTP_TCR_TSCTRLSSR) + nsec = (PTP_DIGITAL_ROLLOVER_MODE - nsec); + else + nsec = (PTP_BINARY_ROLLOVER_MODE - nsec); + } + writel(sec, ioaddr + PTP_STSUR); - writel(((add_sub << PTP_STNSUR_ADDSUB_SHIFT) | nsec), - ioaddr + PTP_STNSUR); + value = (add_sub << PTP_STNSUR_ADDSUB_SHIFT) | nsec; + writel(value, ioaddr + PTP_STNSUR); + /* issue command to initialize the system time value */ value = readl(ioaddr + PTP_TCR); value |= PTP_TCR_TSUPDT; @@ -134,8 +158,9 @@ static u64 stmmac_get_systime(void __iomem *ioaddr) { u64 ns; + /* Get the TSSS value */ ns = readl(ioaddr + PTP_STNSR); - /* convert sec time value to nanosecond */ + /* Get the TSS and convert sec time value to nanosecond */ ns += readl(ioaddr + PTP_STSR) * 1000000000ULL; return ns; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e2c94ec4edd0..1f9ec02fa7f8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -340,18 +340,17 @@ out: /* stmmac_get_tx_hwtstamp - get HW TX timestamps * @priv: driver private structure - * @entry : descriptor index to be used. + * @p : descriptor pointer * @skb : the socket buffer * Description : * This function will read timestamp from the descriptor & pass it to stack. * and also perform some sanity checks. */ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv, - unsigned int entry, struct sk_buff *skb) + struct dma_desc *p, struct sk_buff *skb) { struct skb_shared_hwtstamps shhwtstamp; u64 ns; - void *desc = NULL; if (!priv->hwts_tx_en) return; @@ -360,58 +359,55 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv, if (likely(!skb || !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))) return; - if (priv->adv_ts) - desc = (priv->dma_etx + entry); - else - desc = (priv->dma_tx + entry); - /* check tx tstamp status */ - if (!priv->hw->desc->get_tx_timestamp_status((struct dma_desc *)desc)) - return; + if (!priv->hw->desc->get_tx_timestamp_status(p)) { + /* get the valid tstamp */ + ns = priv->hw->desc->get_timestamp(p, priv->adv_ts); - /* get the valid tstamp */ - ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); + memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); + shhwtstamp.hwtstamp = ns_to_ktime(ns); - memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); - shhwtstamp.hwtstamp = ns_to_ktime(ns); - /* pass tstamp to stack */ - skb_tstamp_tx(skb, &shhwtstamp); + netdev_info(priv->dev, "get valid TX hw timestamp %llu\n", ns); + /* pass tstamp to stack */ + skb_tstamp_tx(skb, &shhwtstamp); + } return; } /* stmmac_get_rx_hwtstamp - get HW RX timestamps * @priv: driver private structure - * @entry : descriptor index to be used. + * @p : descriptor pointer + * @np : next descriptor pointer * @skb : the socket buffer * Description : * This function will read received packet's timestamp from the descriptor * and pass it to stack. It also perform some sanity checks. */ -static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, - unsigned int entry, struct sk_buff *skb) +static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, + struct dma_desc *np, struct sk_buff *skb) { struct skb_shared_hwtstamps *shhwtstamp = NULL; u64 ns; - void *desc = NULL; if (!priv->hwts_rx_en) return; - if (priv->adv_ts) - desc = (priv->dma_erx + entry); - else - desc = (priv->dma_rx + entry); + /* Check if timestamp is available */ + if (!priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) { + /* For GMAC4, the valid timestamp is from CTX next desc. */ + if (priv->plat->has_gmac4) + ns = priv->hw->desc->get_timestamp(np, priv->adv_ts); + else + ns = priv->hw->desc->get_timestamp(p, priv->adv_ts); - /* exit if rx tstamp is not valid */ - if (!priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) - return; - - /* get valid tstamp */ - ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); - shhwtstamp = skb_hwtstamps(skb); - memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); - shhwtstamp->hwtstamp = ns_to_ktime(ns); + netdev_info(priv->dev, "get valid RX hw timestamp %llu\n", ns); + shhwtstamp = skb_hwtstamps(skb); + memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); + shhwtstamp->hwtstamp = ns_to_ktime(ns); + } else { + netdev_err(priv->dev, "cannot get RX hw timestamp\n"); + } } /** @@ -598,17 +594,18 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) priv->hwts_tx_en = config.tx_type == HWTSTAMP_TX_ON; if (!priv->hwts_tx_en && !priv->hwts_rx_en) - priv->hw->ptp->config_hw_tstamping(priv->ioaddr, 0); + priv->hw->ptp->config_hw_tstamping(priv->ptpaddr, 0); else { value = (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | PTP_TCR_TSCTRLSSR | tstamp_all | ptp_v2 | ptp_over_ethernet | ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en | ts_master_en | snap_type_sel); - priv->hw->ptp->config_hw_tstamping(priv->ioaddr, value); + priv->hw->ptp->config_hw_tstamping(priv->ptpaddr, value); /* program Sub Second Increment reg */ sec_inc = priv->hw->ptp->config_sub_second_increment( - priv->ioaddr, priv->clk_ptp_rate); + priv->ptpaddr, priv->clk_ptp_rate, + priv->plat->has_gmac4); temp = div_u64(1000000000ULL, sec_inc); /* calculate default added value: @@ -618,14 +615,14 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) */ temp = (u64)(temp << 32); priv->default_addend = div_u64(temp, priv->clk_ptp_rate); - priv->hw->ptp->config_addend(priv->ioaddr, + priv->hw->ptp->config_addend(priv->ptpaddr, priv->default_addend); /* initialize system time */ ktime_get_real_ts64(&now); /* lower 32 bits of tv_sec are safe until y2106 */ - priv->hw->ptp->init_systime(priv->ioaddr, (u32)now.tv_sec, + priv->hw->ptp->init_systime(priv->ptpaddr, (u32)now.tv_sec, now.tv_nsec); } @@ -1340,7 +1337,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv) priv->dev->stats.tx_packets++; priv->xstats.tx_pkt_n++; } - stmmac_get_tx_hwtstamp(priv, entry, skb); + stmmac_get_tx_hwtstamp(priv, p, skb); } if (likely(priv->tx_skbuff_dma[entry].buf)) { @@ -1486,10 +1483,13 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv) unsigned int mode = MMC_CNTRL_RESET_ON_READ | MMC_CNTRL_COUNTER_RESET | MMC_CNTRL_PRESET | MMC_CNTRL_FULL_HALF_PRESET; - if (priv->synopsys_id >= DWMAC_CORE_4_00) + if (priv->synopsys_id >= DWMAC_CORE_4_00) { + priv->ptpaddr = priv->ioaddr + PTP_GMAC4_OFFSET; priv->mmcaddr = priv->ioaddr + MMC_GMAC4_OFFSET; - else + } else { + priv->ptpaddr = priv->ioaddr + PTP_GMAC3_X_OFFSET; priv->mmcaddr = priv->ioaddr + MMC_GMAC3_X_OFFSET; + } dwmac_mmc_intr_all_mask(priv->mmcaddr); @@ -2484,7 +2484,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) if (netif_msg_rx_status(priv)) { void *rx_head; - pr_debug("%s: descriptor ring:\n", __func__); + pr_info(">>>>>> %s: descriptor ring:\n", __func__); if (priv->extend_desc) rx_head = (void *)priv->dma_erx; else @@ -2495,6 +2495,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) while (count < limit) { int status; struct dma_desc *p; + struct dma_desc *np; if (priv->extend_desc) p = (struct dma_desc *)(priv->dma_erx + entry); @@ -2514,9 +2515,11 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) next_entry = priv->cur_rx; if (priv->extend_desc) - prefetch(priv->dma_erx + next_entry); + np = (struct dma_desc *)(priv->dma_erx + next_entry); else - prefetch(priv->dma_rx + next_entry); + np = priv->dma_rx + next_entry; + + prefetch(np); if ((priv->extend_desc) && (priv->hw->desc->rx_extended_status)) priv->hw->desc->rx_extended_status(&priv->dev->stats, @@ -2568,7 +2571,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) frame_len -= ETH_FCS_LEN; if (netif_msg_rx_status(priv)) { - pr_debug("\tdesc: %p [entry %d] buff=0x%x\n", + pr_info("\tdesc: %p [entry %d] buff=0x%x\n", p, entry, des); if (frame_len > ETH_FRAME_LEN) pr_debug("\tframe size %d, COE: %d\n", @@ -2625,13 +2628,13 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) DMA_FROM_DEVICE); } - stmmac_get_rx_hwtstamp(priv, entry, skb); - if (netif_msg_pktdata(priv)) { pr_debug("frame received (%dbytes)", frame_len); print_pkt(skb->data, frame_len); } + stmmac_get_rx_hwtstamp(priv, p, np, skb); + stmmac_rx_vlan(priv->dev, skb); skb->protocol = eth_type_trans(skb, priv->dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 1477471f8d44..3eb281d1db08 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -54,7 +54,7 @@ static int stmmac_adjust_freq(struct ptp_clock_info *ptp, s32 ppb) spin_lock_irqsave(&priv->ptp_lock, flags); - priv->hw->ptp->config_addend(priv->ioaddr, addend); + priv->hw->ptp->config_addend(priv->ptpaddr, addend); spin_unlock_irqrestore(&priv->ptp_lock, flags); @@ -89,7 +89,8 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) spin_lock_irqsave(&priv->ptp_lock, flags); - priv->hw->ptp->adjust_systime(priv->ioaddr, sec, nsec, neg_adj); + priv->hw->ptp->adjust_systime(priv->ptpaddr, sec, nsec, neg_adj, + priv->plat->has_gmac4); spin_unlock_irqrestore(&priv->ptp_lock, flags); @@ -114,7 +115,7 @@ static int stmmac_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts) spin_lock_irqsave(&priv->ptp_lock, flags); - ns = priv->hw->ptp->get_systime(priv->ioaddr); + ns = priv->hw->ptp->get_systime(priv->ptpaddr); spin_unlock_irqrestore(&priv->ptp_lock, flags); @@ -141,7 +142,7 @@ static int stmmac_set_time(struct ptp_clock_info *ptp, spin_lock_irqsave(&priv->ptp_lock, flags); - priv->hw->ptp->init_systime(priv->ioaddr, ts->tv_sec, ts->tv_nsec); + priv->hw->ptp->init_systime(priv->ptpaddr, ts->tv_sec, ts->tv_nsec); spin_unlock_irqrestore(&priv->ptp_lock, flags); From ee112c12ebd22baca85812175008ef584250e415 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 14 Nov 2016 09:27:30 +0100 Subject: [PATCH 276/503] stmmac: fix PTP type ethtool stats This patch fixes the ethtool stats for PTP frames; previous version does not take care about some message types: i.e. announce, management and signaling. It also provided a broken statistic in case of "No PTP message received". Signed-off-by: Giuseppe Cavallaro Acked-by: Rayagond Kokatanur Acked-by: Alexandre TORGUE Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 19 +++++++------ drivers/net/ethernet/stmicro/stmmac/descs.h | 20 +++++++------ .../ethernet/stmicro/stmmac/dwmac4_descs.c | 27 +++++++++++------- .../net/ethernet/stmicro/stmmac/enh_desc.c | 28 ++++++++++++------- .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 19 +++++++------ 5 files changed, 69 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 6fc214ce2958..6d2de4e01f6d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -120,14 +120,17 @@ struct stmmac_extra_stats { unsigned long ip_csum_bypassed; unsigned long ipv4_pkt_rcvd; unsigned long ipv6_pkt_rcvd; - unsigned long rx_msg_type_ext_no_ptp; - unsigned long rx_msg_type_sync; - unsigned long rx_msg_type_follow_up; - unsigned long rx_msg_type_delay_req; - unsigned long rx_msg_type_delay_resp; - unsigned long rx_msg_type_pdelay_req; - unsigned long rx_msg_type_pdelay_resp; - unsigned long rx_msg_type_pdelay_follow_up; + unsigned long no_ptp_rx_msg_type_ext; + unsigned long ptp_rx_msg_type_sync; + unsigned long ptp_rx_msg_type_follow_up; + unsigned long ptp_rx_msg_type_delay_req; + unsigned long ptp_rx_msg_type_delay_resp; + unsigned long ptp_rx_msg_type_pdelay_req; + unsigned long ptp_rx_msg_type_pdelay_resp; + unsigned long ptp_rx_msg_type_pdelay_follow_up; + unsigned long ptp_rx_msg_type_announce; + unsigned long ptp_rx_msg_type_management; + unsigned long ptp_rx_msg_pkt_reserved_type; unsigned long ptp_frame_type; unsigned long ptp_ver; unsigned long timestamp_dropped; diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h index 2e4c171a2b41..e3c86d422109 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs.h @@ -155,14 +155,18 @@ #define ERDES4_L3_L4_FILT_NO_MATCH_MASK GENMASK(27, 26) /* Extended RDES4 message type definitions */ -#define RDES_EXT_NO_PTP 0 -#define RDES_EXT_SYNC 1 -#define RDES_EXT_FOLLOW_UP 2 -#define RDES_EXT_DELAY_REQ 3 -#define RDES_EXT_DELAY_RESP 4 -#define RDES_EXT_PDELAY_REQ 5 -#define RDES_EXT_PDELAY_RESP 6 -#define RDES_EXT_PDELAY_FOLLOW_UP 7 +#define RDES_EXT_NO_PTP 0x0 +#define RDES_EXT_SYNC 0x1 +#define RDES_EXT_FOLLOW_UP 0x2 +#define RDES_EXT_DELAY_REQ 0x3 +#define RDES_EXT_DELAY_RESP 0x4 +#define RDES_EXT_PDELAY_REQ 0x5 +#define RDES_EXT_PDELAY_RESP 0x6 +#define RDES_EXT_PDELAY_FOLLOW_UP 0x7 +#define RDES_PTP_ANNOUNCE 0x8 +#define RDES_PTP_MANAGEMENT 0x9 +#define RDES_PTP_SIGNALING 0xa +#define RDES_PTP_PKT_RESERVED_TYPE 0xf /* Basic descriptor structure for normal and alternate descriptors */ struct dma_desc { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 2ef2f0c03e76..a601f8d43b75 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -123,22 +123,29 @@ static int dwmac4_wrback_get_rx_status(void *data, struct stmmac_extra_stats *x, x->ipv4_pkt_rcvd++; if (rdes1 & RDES1_IPV6_HEADER) x->ipv6_pkt_rcvd++; - if (message_type == RDES_EXT_SYNC) - x->rx_msg_type_sync++; + + if (message_type == RDES_EXT_NO_PTP) + x->no_ptp_rx_msg_type_ext++; + else if (message_type == RDES_EXT_SYNC) + x->ptp_rx_msg_type_sync++; else if (message_type == RDES_EXT_FOLLOW_UP) - x->rx_msg_type_follow_up++; + x->ptp_rx_msg_type_follow_up++; else if (message_type == RDES_EXT_DELAY_REQ) - x->rx_msg_type_delay_req++; + x->ptp_rx_msg_type_delay_req++; else if (message_type == RDES_EXT_DELAY_RESP) - x->rx_msg_type_delay_resp++; + x->ptp_rx_msg_type_delay_resp++; else if (message_type == RDES_EXT_PDELAY_REQ) - x->rx_msg_type_pdelay_req++; + x->ptp_rx_msg_type_pdelay_req++; else if (message_type == RDES_EXT_PDELAY_RESP) - x->rx_msg_type_pdelay_resp++; + x->ptp_rx_msg_type_pdelay_resp++; else if (message_type == RDES_EXT_PDELAY_FOLLOW_UP) - x->rx_msg_type_pdelay_follow_up++; - else - x->rx_msg_type_ext_no_ptp++; + x->ptp_rx_msg_type_pdelay_follow_up++; + else if (message_type == RDES_PTP_ANNOUNCE) + x->ptp_rx_msg_type_announce++; + else if (message_type == RDES_PTP_MANAGEMENT) + x->ptp_rx_msg_type_management++; + else if (message_type == RDES_PTP_PKT_RESERVED_TYPE) + x->ptp_rx_msg_pkt_reserved_type++; if (rdes1 & RDES1_PTP_PACKET_TYPE) x->ptp_frame_type++; diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 38f19c99cf59..e75549327c34 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -150,22 +150,30 @@ static void enh_desc_get_ext_status(void *data, struct stmmac_extra_stats *x, x->ipv4_pkt_rcvd++; if (rdes4 & ERDES4_IPV6_PKT_RCVD) x->ipv6_pkt_rcvd++; - if (message_type == RDES_EXT_SYNC) - x->rx_msg_type_sync++; + + if (message_type == RDES_EXT_NO_PTP) + x->no_ptp_rx_msg_type_ext++; + else if (message_type == RDES_EXT_SYNC) + x->ptp_rx_msg_type_sync++; else if (message_type == RDES_EXT_FOLLOW_UP) - x->rx_msg_type_follow_up++; + x->ptp_rx_msg_type_follow_up++; else if (message_type == RDES_EXT_DELAY_REQ) - x->rx_msg_type_delay_req++; + x->ptp_rx_msg_type_delay_req++; else if (message_type == RDES_EXT_DELAY_RESP) - x->rx_msg_type_delay_resp++; + x->ptp_rx_msg_type_delay_resp++; else if (message_type == RDES_EXT_PDELAY_REQ) - x->rx_msg_type_pdelay_req++; + x->ptp_rx_msg_type_pdelay_req++; else if (message_type == RDES_EXT_PDELAY_RESP) - x->rx_msg_type_pdelay_resp++; + x->ptp_rx_msg_type_pdelay_resp++; else if (message_type == RDES_EXT_PDELAY_FOLLOW_UP) - x->rx_msg_type_pdelay_follow_up++; - else - x->rx_msg_type_ext_no_ptp++; + x->ptp_rx_msg_type_pdelay_follow_up++; + else if (message_type == RDES_PTP_ANNOUNCE) + x->ptp_rx_msg_type_announce++; + else if (message_type == RDES_PTP_MANAGEMENT) + x->ptp_rx_msg_type_management++; + else if (message_type == RDES_PTP_PKT_RESERVED_TYPE) + x->ptp_rx_msg_pkt_reserved_type++; + if (rdes4 & ERDES4_PTP_FRAME_TYPE) x->ptp_frame_type++; if (rdes4 & ERDES4_PTP_VER) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 1e06173fc9d7..c5d0142adda2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -115,14 +115,17 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = { STMMAC_STAT(ip_csum_bypassed), STMMAC_STAT(ipv4_pkt_rcvd), STMMAC_STAT(ipv6_pkt_rcvd), - STMMAC_STAT(rx_msg_type_ext_no_ptp), - STMMAC_STAT(rx_msg_type_sync), - STMMAC_STAT(rx_msg_type_follow_up), - STMMAC_STAT(rx_msg_type_delay_req), - STMMAC_STAT(rx_msg_type_delay_resp), - STMMAC_STAT(rx_msg_type_pdelay_req), - STMMAC_STAT(rx_msg_type_pdelay_resp), - STMMAC_STAT(rx_msg_type_pdelay_follow_up), + STMMAC_STAT(no_ptp_rx_msg_type_ext), + STMMAC_STAT(ptp_rx_msg_type_sync), + STMMAC_STAT(ptp_rx_msg_type_follow_up), + STMMAC_STAT(ptp_rx_msg_type_delay_req), + STMMAC_STAT(ptp_rx_msg_type_delay_resp), + STMMAC_STAT(ptp_rx_msg_type_pdelay_req), + STMMAC_STAT(ptp_rx_msg_type_pdelay_resp), + STMMAC_STAT(ptp_rx_msg_type_pdelay_follow_up), + STMMAC_STAT(ptp_rx_msg_type_announce), + STMMAC_STAT(ptp_rx_msg_type_management), + STMMAC_STAT(ptp_rx_msg_pkt_reserved_type), STMMAC_STAT(ptp_frame_type), STMMAC_STAT(ptp_ver), STMMAC_STAT(timestamp_dropped), From c7a4e3d8c0d43a4f31f8b2ccf476e5a26eb85142 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Mon, 14 Nov 2016 16:32:52 +0300 Subject: [PATCH 277/503] net: arc_emac: annonce IFF_MULTICAST support Multicast support was implemented by commit 775dd682e2b0ec7 ('arc_emac: implement promiscuous mode and multicast filtering'). It can be enabled explicity using 'ifconfig eth0 multicast'. The patch is needed in order to remove explicit configuration as most devices has multicast mode enabled by default. Signed-off-by: Alexander Kochetkov Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index b0da9693f28a..2e4ee86a7e51 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -764,8 +764,6 @@ int arc_emac_probe(struct net_device *ndev, int interface) ndev->netdev_ops = &arc_emac_netdev_ops; ndev->ethtool_ops = &arc_emac_ethtool_ops; ndev->watchdog_timeo = TX_TIMEOUT; - /* FIXME :: no multicast support yet */ - ndev->flags &= ~IFF_MULTICAST; priv = netdev_priv(ndev); priv->dev = dev; From d0e3f65b34c528ec2b7d1ba9a620b483f71788d3 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Mon, 14 Nov 2016 16:32:53 +0300 Subject: [PATCH 278/503] net: arc_emac: don't pass multicast packets to kernel in non-multicast mode The patch disable capturing multicast packets when multicast mode disabled for ethernet ('ifconfig eth0 -multicast'). In that case no multicast packet will be passed to kernel. Signed-off-by: Alexander Kochetkov Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 2e4ee86a7e51..be865b4dada2 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -460,7 +460,7 @@ static void arc_emac_set_rx_mode(struct net_device *ndev) if (ndev->flags & IFF_ALLMULTI) { arc_reg_set(priv, R_LAFL, ~0); arc_reg_set(priv, R_LAFH, ~0); - } else { + } else if (ndev->flags & IFF_MULTICAST) { struct netdev_hw_addr *ha; unsigned int filter[2] = { 0, 0 }; int bit; @@ -472,6 +472,9 @@ static void arc_emac_set_rx_mode(struct net_device *ndev) arc_reg_set(priv, R_LAFL, filter[0]); arc_reg_set(priv, R_LAFH, filter[1]); + } else { + arc_reg_set(priv, R_LAFL, 0); + arc_reg_set(priv, R_LAFH, 0); } } } From 73e2d5e34b6cdd1080038daf3d6d6d744a9eefe6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Mon, 14 Nov 2016 23:40:30 +0100 Subject: [PATCH 279/503] udp: restore UDPlite many-cast delivery Honor udptable parameter that is passed to __udp*_lib_mcast_deliver(), otherwise udplite broadcast/multicast use the wrong table and it breaks. Fixes: 2dc41cff7545 ("udp: Use hash2 for long hash1 chains in __udp*_lib_mcast_deliver.") Signed-off-by: Pablo Neira Ayuso Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 +++--- net/ipv6/udp.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d123d68f4d1d..0de9d5d2b9ae 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1652,10 +1652,10 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, if (use_hash2) { hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & - udp_table.mask; - hash2 = udp4_portaddr_hash(net, daddr, hnum) & udp_table.mask; + udptable->mask; + hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: - hslot = &udp_table.hash2[hash2]; + hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b2ef061e6836..e5056d4873d1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -706,10 +706,10 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, if (use_hash2) { hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & - udp_table.mask; - hash2 = udp6_portaddr_hash(net, daddr, hnum) & udp_table.mask; + udptable->mask; + hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: - hslot = &udp_table.hash2[hash2]; + hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } From e88a2766143a27bfe6704b4493b214de4094cf29 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Nov 2016 16:28:42 -0800 Subject: [PATCH 280/503] gro_cells: mark napi struct as not busy poll candidates Rolf Neugebauer reported very long delays at netns dismantle. Eric W. Biederman was kind enough to look at this problem and noticed synchronize_net() occurring from netif_napi_del() that was added in linux-4.5 Busy polling makes no sense for tunnels NAPI. If busy poll is used for sessions over tunnels, the poller will need to poll the physical device queue anyway. netif_tx_napi_add() could be used here, but function name is misleading, and renaming it is not stable material, so set NAPI_STATE_NO_BUSY_POLL bit directly. This will avoid inserting gro_cells napi structures in napi_hash[] and avoid the problematic synchronize_net() (per possible cpu) that Rolf reported. Fixes: 93d05d4a320c ("net: provide generic busy polling to all NAPI drivers") Signed-off-by: Eric Dumazet Reported-by: Rolf Neugebauer Reported-by: Eric W. Biederman Acked-by: Cong Wang Tested-by: Rolf Neugebauer Signed-off-by: David S. Miller --- include/net/gro_cells.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index d15214d673b2..2a1abbf8da74 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -68,6 +68,9 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); __skb_queue_head_init(&cell->napi_skbs); + + set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); + netif_napi_add(dev, &cell->napi, gro_cell_poll, 64); napi_enable(&cell->napi); } From 7e75f74a171a8146cc3ee92d5562878b40c25fb5 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 15 Nov 2016 10:39:03 +0100 Subject: [PATCH 281/503] rtnetlink: fix rtnl_vfinfo_size The size reported by rtnl_vfinfo_size doesn't match the space used by rtnl_fill_vfinfo. rtnl_vfinfo_size currently doesn't account for the nest attributes used by statistics (added in commit 3b766cd83232), nor for struct ifla_vf_tx_rate (since commit ed616689a3d9, which added ifla_vf_rate to the dump without removing ifla_vf_tx_rate, but replaced ifla_vf_tx_rate with ifla_vf_rate in the size computation). Fixes: 3b766cd83232 ("net/core: Add reading VF statistics through the PF netdevice") Fixes: ed616689a3d9 ("net-next:v4: Add support to configure SR-IOV VF minimum and maximum Tx rate through ip tool") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index db313ec7af32..96f4bf274e30 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -840,18 +840,20 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, if (dev->dev.parent && dev_is_pci(dev->dev.parent) && (ext_filter_mask & RTEXT_FILTER_VF)) { int num_vfs = dev_num_vf(dev->dev.parent); - size_t size = nla_total_size(sizeof(struct nlattr)); - size += nla_total_size(num_vfs * sizeof(struct nlattr)); + size_t size = nla_total_size(0); size += num_vfs * - (nla_total_size(sizeof(struct ifla_vf_mac)) + - nla_total_size(MAX_VLAN_LIST_LEN * - sizeof(struct nlattr)) + + (nla_total_size(0) + + nla_total_size(sizeof(struct ifla_vf_mac)) + + nla_total_size(sizeof(struct ifla_vf_vlan)) + + nla_total_size(0) + /* nest IFLA_VF_VLAN_LIST */ nla_total_size(MAX_VLAN_LIST_LEN * sizeof(struct ifla_vf_vlan_info)) + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + + nla_total_size(sizeof(struct ifla_vf_tx_rate)) + nla_total_size(sizeof(struct ifla_vf_rate)) + nla_total_size(sizeof(struct ifla_vf_link_state)) + nla_total_size(sizeof(struct ifla_vf_rss_query_en)) + + nla_total_size(0) + /* nest IFLA_VF_STATS */ /* IFLA_VF_STATS_RX_PACKETS */ nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_TX_PACKETS */ From b3cfaa31e3851c743d3f9d3441710f7ff6f7e868 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 15 Nov 2016 11:16:35 +0100 Subject: [PATCH 282/503] rtnetlink: fix rtnl message size computation for XDP rtnl_xdp_size() only considers the size of the actual payload attribute, and misses the space taken by the attribute used for nesting (IFLA_XDP). Fixes: d1fdd9138682 ("rtnl: add option for setting link xdp prog") Signed-off-by: Sabrina Dubroca Reviewed-by: Brenden Blanco Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 96f4bf274e30..a6529c55ffb7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -901,7 +901,8 @@ static size_t rtnl_port_size(const struct net_device *dev, static size_t rtnl_xdp_size(const struct net_device *dev) { - size_t xdp_size = nla_total_size(1); /* XDP_ATTACHED */ + size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */ + nla_total_size(1); /* XDP_ATTACHED */ if (!dev->netdev_ops->ndo_xdp) return 0; From bc9db5ad3253c8e17969bd802c47b73e63f125ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 11 Nov 2016 19:14:24 +0200 Subject: [PATCH 283/503] drm/i915: Assume non-DP++ port if dvo_port is HDMI and there's no AUX ch specified in the VBT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My heuristic for detecting type 1 DVI DP++ adaptors based on the VBT port information apparently didn't survive the reality of buggy VBTs. In this particular case we have a machine with a natice HDMI port, but the VBT tells us it's a DP++ port based on its capabilities. The dvo_port information in VBT does claim that we're dealing with a HDMI port though, but we have other machines which do the same even when they actually have DP++ ports. So that piece of information alone isn't sufficient to tell the two apart. After staring at a bunch of VBTs from various machines, I have to conclude that the only other semi-reliable clue we can use is the presence of the AUX channel in the VBT. On this particular machine AUX channel is specified as zero, whereas on some of the other machines which listed the DP++ port as HDMI have a non-zero AUX channel. I've also seen VBTs which have dvo_port a DP but have a zero AUX channel. I believe those we need to treat as DP ports, so we'll limit the AUX channel check to just the cases where dvo_port is HDMI. If we encounter any more serious failures with this heuristic I think we'll have to have to throw it out entirely. But that could mean that there is a risk of type 1 DVI dongle users getting greeted by a black screen, so I'd rather not go there unless absolutely necessary. v2: Remove the duplicate PORT_A check (Daniel) Fix some typos in the commit message Cc: Daniel Otero Cc: stable@vger.kernel.org Tested-by: Daniel Otero Fixes: d61992565bd3 ("drm/i915: Determine DP++ type 1 DVI adaptor presence based on VBT") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97994 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1478884464-14251-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter (cherry picked from commit 7a17995a3dc8613f778a9e2fd20e870f17789544) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_bios.c | 30 ++++++++++++++++++++------- drivers/gpu/drm/i915/intel_vbt_defs.h | 3 ++- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 1f8af87c6294..cf2560708e03 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1143,7 +1143,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, if (!child) return; - aux_channel = child->raw[25]; + aux_channel = child->common.aux_channel; ddc_pin = child->common.ddc_pin; is_dvi = child->common.device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING; @@ -1673,7 +1673,8 @@ bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port) return false; } -bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port) +static bool child_dev_is_dp_dual_mode(const union child_device_config *p_child, + enum port port) { static const struct { u16 dp, hdmi; @@ -1687,22 +1688,35 @@ bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum por [PORT_D] = { DVO_PORT_DPD, DVO_PORT_HDMID, }, [PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, }, }; - int i; if (port == PORT_A || port >= ARRAY_SIZE(port_mapping)) return false; - if (!dev_priv->vbt.child_dev_num) + if ((p_child->common.device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) != + (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS)) return false; + if (p_child->common.dvo_port == port_mapping[port].dp) + return true; + + /* Only accept a HDMI dvo_port as DP++ if it has an AUX channel */ + if (p_child->common.dvo_port == port_mapping[port].hdmi && + p_child->common.aux_channel != 0) + return true; + + return false; +} + +bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, + enum port port) +{ + int i; + for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { const union child_device_config *p_child = &dev_priv->vbt.child_dev[i]; - if ((p_child->common.dvo_port == port_mapping[port].dp || - p_child->common.dvo_port == port_mapping[port].hdmi) && - (p_child->common.device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) == - (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS)) + if (child_dev_is_dp_dual_mode(p_child, port)) return true; } diff --git a/drivers/gpu/drm/i915/intel_vbt_defs.h b/drivers/gpu/drm/i915/intel_vbt_defs.h index 68db9621f1f0..8886cab19f98 100644 --- a/drivers/gpu/drm/i915/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/intel_vbt_defs.h @@ -280,7 +280,8 @@ struct common_child_dev_config { u8 dp_support:1; u8 tmds_support:1; u8 support_reserved:5; - u8 not_common3[12]; + u8 aux_channel; + u8 not_common3[11]; u8 iboost_level; } __packed; From 9164b4ceb7b492a77c7fe770a4b9d1375c9cd45a Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Tue, 15 Nov 2016 13:01:57 +0100 Subject: [PATCH 284/503] x86/sysfb: Add support for 64bit EFI lfb_base The screen_info object was extended to support 64-bit lfb_base addresses in: ae2ee627dc87 ("efifb: Add support for 64-bit frame buffer addresses") However, the x86 simple-framebuffer setup code never made use of it. Fix it to properly assemble and verify the lfb_base before advertising simple-framebuffer devices. In particular, this means if VIDEO_CAPABILITY_64BIT_BASE is set, the screen_info->ext_lfb_base field will contain the upper 32bit of the actual lfb_base. Make sure the address is not 0 (i.e., unset), as well as does not overflow the physical address type. Signed-off-by: David Herrmann Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Tom Gundersen Link: http://lkml.kernel.org/r/20161115120158.15388-2-dh.herrmann@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/sysfb_simplefb.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 764a29f84de7..35b86415871f 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -67,6 +67,20 @@ __init int create_simplefb(const struct screen_info *si, struct platform_device *pd; struct resource res; unsigned long len; + u64 base; + + /* + * If the 64BIT_BASE capability is set, ext_lfb_base will contain the + * upper half of the base address. Assemble the address, then make sure + * it is valid and we can actually access it. + */ + base = si->lfb_base; + if (si->capabilities & VIDEO_CAPABILITY_64BIT_BASE) + base |= (u64)si->ext_lfb_base << 32; + if (!base || (u64)(resource_size_t)base != base) { + printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n"); + return -EINVAL; + } /* don't use lfb_size as it may contain the whole VMEM instead of only * the part that is occupied by the framebuffer */ @@ -81,8 +95,8 @@ __init int create_simplefb(const struct screen_info *si, memset(&res, 0, sizeof(res)); res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; res.name = simplefb_resname; - res.start = si->lfb_base; - res.end = si->lfb_base + len - 1; + res.start = base; + res.end = res.start + len - 1; if (res.end <= res.start) return -EINVAL; From f96acec8c8020807429d21324547f4b904c37177 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Tue, 15 Nov 2016 13:01:58 +0100 Subject: [PATCH 285/503] x86/sysfb: Fix lfb_size calculation The screen_info.lfb_size field is shifted by 16 bits *only* in case of VBE. This has historical reasons since VBE advertised it similarly. However, in case of EFI framebuffers, the size is no longer shifted. Fix the x86 simple-framebuffer setup code to use the correct size in the non-VBE case. While at it, avoid variable abbreviations and rename 'len' to 'length', and use the correct types matching the screen_info definition. Signed-off-by: David Herrmann Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Tom Gundersen Link: http://lkml.kernel.org/r/20161115120158.15388-3-dh.herrmann@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/sysfb_simplefb.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 35b86415871f..85195d447a92 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -66,8 +66,8 @@ __init int create_simplefb(const struct screen_info *si, { struct platform_device *pd; struct resource res; - unsigned long len; - u64 base; + u64 base, size; + u32 length; /* * If the 64BIT_BASE capability is set, ext_lfb_base will contain the @@ -82,11 +82,20 @@ __init int create_simplefb(const struct screen_info *si, return -EINVAL; } - /* don't use lfb_size as it may contain the whole VMEM instead of only - * the part that is occupied by the framebuffer */ - len = mode->height * mode->stride; - len = PAGE_ALIGN(len); - if (len > (u64)si->lfb_size << 16) { + /* + * Don't use lfb_size as IORESOURCE size, since it may contain the + * entire VMEM, and thus require huge mappings. Use just the part we + * need, that is, the part where the framebuffer is located. But verify + * that it does not exceed the advertised VMEM. + * Note that in case of VBE, the lfb_size is shifted by 16 bits for + * historical reasons. + */ + size = si->lfb_size; + if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) + size <<= 16; + length = mode->height * mode->stride; + length = PAGE_ALIGN(length); + if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } @@ -96,7 +105,7 @@ __init int create_simplefb(const struct screen_info *si, res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; res.name = simplefb_resname; res.start = base; - res.end = res.start + len - 1; + res.end = res.start + length - 1; if (res.end <= res.start) return -EINVAL; From c499336cea8bbe15554c6fcea2138658c5395bfe Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 15 Nov 2016 13:40:10 -0500 Subject: [PATCH 286/503] perf/x86/uncore: Fix crash by removing bogus event_list[] handling for SNB client uncore IMC Vince Weaver reported the following bug when KASAN is enabled: [ 205.748005] BUG: KASAN: slab-out-of-bounds in snb_uncore_imc_event_del+0x6c/0xa0 at addr ffff8800caa43768 [ 205.758324] Read of size 8 by task perf_fuzzer/6618 It's caused by accessing box->event_list. For client IMC, there are no generic counters. It defines its own fixed free running counters. So event_list and n_events are unused. They can be removed safely, which fixes the bug. ( There's still the separate question of how uninitialized state snuck into this data structure - but that's a separate fix. ) Reported-by: Vince Weaver Tested-by: Vince Weaver Signed-off-by: Kan Liang Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Stephane Eranian Cc: Vince Weaver Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: davej@codemonkey.org.uk Cc: dvyukov@google.com Cc: eranian@gmail.com Link: http://lkml.kernel.org/r/1479235210-29090-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snb.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 81195cca7eae..a3dcc12bef4a 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -490,24 +490,12 @@ static int snb_uncore_imc_event_add(struct perf_event *event, int flags) snb_uncore_imc_event_start(event, 0); - box->n_events++; - return 0; } static void snb_uncore_imc_event_del(struct perf_event *event, int flags) { - struct intel_uncore_box *box = uncore_event_to_box(event); - int i; - snb_uncore_imc_event_stop(event, PERF_EF_UPDATE); - - for (i = 0; i < box->n_events; i++) { - if (event == box->event_list[i]) { - --box->n_events; - break; - } - } } int snb_pci2phy_map_init(int devid) From 2c8c34167c987e463d62a55384fcec7fa8d03a54 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 29 Sep 2016 12:59:39 +0300 Subject: [PATCH 287/503] mfd: lpss: Fix Intel Kaby Lake PCH-H properties There are a few issues on Intel Kaby Lake PCH-H properties added by commit a6a576b78e09 ("mfd: lpss: Add Intel Kaby Lake PCH-H PCI IDs"): - Input clock of I2C controller on Intel Kaby Lake PCH-H is 120 MHz not 133 MHz. This was probably copy-paste error from Intel Broxton I2C properties. - There is no default I2C SDA hold time specified which is used when ACPI doesn't provide it. I got information from Windows driver team that Kaby Lake PCH-H can use the same configuration than Intel Sunrisepoint PCH. - Common HS-UART properties are not used. Fix these by reusing the Sunrisepoint properties on Kaby Lake PCH-H. Fixes: a6a576b78e09 ("mfd: lpss: Add Intel Kaby Lake PCH-H PCI IDs") Reported-by: Xiang A Wang Signed-off-by: Jarkko Nikula Acked-by: Mika Westerberg Signed-off-by: Lee Jones --- drivers/mfd/intel-lpss-pci.c | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/drivers/mfd/intel-lpss-pci.c b/drivers/mfd/intel-lpss-pci.c index 3228fd182a99..9ff243970e93 100644 --- a/drivers/mfd/intel-lpss-pci.c +++ b/drivers/mfd/intel-lpss-pci.c @@ -123,19 +123,6 @@ static const struct intel_lpss_platform_info apl_i2c_info = { .properties = apl_i2c_properties, }; -static const struct intel_lpss_platform_info kbl_info = { - .clk_rate = 120000000, -}; - -static const struct intel_lpss_platform_info kbl_uart_info = { - .clk_rate = 120000000, - .clk_con_id = "baudclk", -}; - -static const struct intel_lpss_platform_info kbl_i2c_info = { - .clk_rate = 133000000, -}; - static const struct pci_device_id intel_lpss_pci_ids[] = { /* BXT A-Step */ { PCI_VDEVICE(INTEL, 0x0aac), (kernel_ulong_t)&bxt_i2c_info }, @@ -207,15 +194,15 @@ static const struct pci_device_id intel_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0xa161), (kernel_ulong_t)&spt_i2c_info }, { PCI_VDEVICE(INTEL, 0xa166), (kernel_ulong_t)&spt_uart_info }, /* KBL-H */ - { PCI_VDEVICE(INTEL, 0xa2a7), (kernel_ulong_t)&kbl_uart_info }, - { PCI_VDEVICE(INTEL, 0xa2a8), (kernel_ulong_t)&kbl_uart_info }, - { PCI_VDEVICE(INTEL, 0xa2a9), (kernel_ulong_t)&kbl_info }, - { PCI_VDEVICE(INTEL, 0xa2aa), (kernel_ulong_t)&kbl_info }, - { PCI_VDEVICE(INTEL, 0xa2e0), (kernel_ulong_t)&kbl_i2c_info }, - { PCI_VDEVICE(INTEL, 0xa2e1), (kernel_ulong_t)&kbl_i2c_info }, - { PCI_VDEVICE(INTEL, 0xa2e2), (kernel_ulong_t)&kbl_i2c_info }, - { PCI_VDEVICE(INTEL, 0xa2e3), (kernel_ulong_t)&kbl_i2c_info }, - { PCI_VDEVICE(INTEL, 0xa2e6), (kernel_ulong_t)&kbl_uart_info }, + { PCI_VDEVICE(INTEL, 0xa2a7), (kernel_ulong_t)&spt_uart_info }, + { PCI_VDEVICE(INTEL, 0xa2a8), (kernel_ulong_t)&spt_uart_info }, + { PCI_VDEVICE(INTEL, 0xa2a9), (kernel_ulong_t)&spt_info }, + { PCI_VDEVICE(INTEL, 0xa2aa), (kernel_ulong_t)&spt_info }, + { PCI_VDEVICE(INTEL, 0xa2e0), (kernel_ulong_t)&spt_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa2e1), (kernel_ulong_t)&spt_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa2e2), (kernel_ulong_t)&spt_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa2e3), (kernel_ulong_t)&spt_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa2e6), (kernel_ulong_t)&spt_uart_info }, { } }; MODULE_DEVICE_TABLE(pci, intel_lpss_pci_ids); From 274e43edcda6f709aa67e436b3123e45a6270923 Mon Sep 17 00:00:00 2001 From: Azhar Shaikh Date: Wed, 12 Oct 2016 10:12:20 -0700 Subject: [PATCH 288/503] mfd: intel-lpss: Do not put device in reset state on suspend Commit 41a3da2b8e163 ("mfd: intel-lpss: Save register context on suspend") saved the register context while going to suspend and also put the device in reset state. Due to the resetting of device, system cannot enter S3/S0ix states when no_console_suspend flag is enabled. The system and serial console both hang. The resetting of device is not needed while going to suspend. Hence remove this code. Cc: stable@vger.kernel.org Fixes: 41a3da2b8e163 ("mfd: intel-lpss: Save register context on suspend") Signed-off-by: Azhar Shaikh Acked-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Lee Jones --- drivers/mfd/intel-lpss.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c index 41b113875d64..70c646b0097d 100644 --- a/drivers/mfd/intel-lpss.c +++ b/drivers/mfd/intel-lpss.c @@ -502,9 +502,6 @@ int intel_lpss_suspend(struct device *dev) for (i = 0; i < LPSS_PRIV_REG_COUNT; i++) lpss->priv_ctx[i] = readl(lpss->priv + i * 4); - /* Put the device into reset state */ - writel(0, lpss->priv + LPSS_PRIV_RESETS); - return 0; } EXPORT_SYMBOL_GPL(intel_lpss_suspend); From 9600702082b29fd3f8a6d744df74ad4c48d4a432 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 17 Oct 2016 10:32:13 +0300 Subject: [PATCH 289/503] mfd: intel_soc_pmic_bxtwc: Fix usbc interrupt The wcove USB Type-C driver is currently being flooded with interrupts that are not targeted to it. The reason for that is because all CHRG first level interrupts are mapped to it. This fixes the issue by introducing separate irq for the usbc device, and mapping only USB Type-C PHY interrupts to it. Fixes: 9c6235c86332 ("mfd: intel_soc_pmic_bxtwc: Add bxt_wcove_usbc device") Signed-off-by: Heikki Krogerus Signed-off-by: Lee Jones --- drivers/mfd/intel_soc_pmic_bxtwc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/intel_soc_pmic_bxtwc.c b/drivers/mfd/intel_soc_pmic_bxtwc.c index 43e54b7e908f..f9a8c5203873 100644 --- a/drivers/mfd/intel_soc_pmic_bxtwc.c +++ b/drivers/mfd/intel_soc_pmic_bxtwc.c @@ -86,6 +86,7 @@ enum bxtwc_irqs_level2 { BXTWC_THRM2_IRQ, BXTWC_BCU_IRQ, BXTWC_ADC_IRQ, + BXTWC_USBC_IRQ, BXTWC_CHGR0_IRQ, BXTWC_CHGR1_IRQ, BXTWC_GPIO0_IRQ, @@ -111,7 +112,8 @@ static const struct regmap_irq bxtwc_regmap_irqs_level2[] = { REGMAP_IRQ_REG(BXTWC_THRM2_IRQ, 2, 0xff), REGMAP_IRQ_REG(BXTWC_BCU_IRQ, 3, 0x1f), REGMAP_IRQ_REG(BXTWC_ADC_IRQ, 4, 0xff), - REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 5, 0x3f), + REGMAP_IRQ_REG(BXTWC_USBC_IRQ, 5, BIT(5)), + REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 5, 0x1f), REGMAP_IRQ_REG(BXTWC_CHGR1_IRQ, 6, 0x1f), REGMAP_IRQ_REG(BXTWC_GPIO0_IRQ, 7, 0xff), REGMAP_IRQ_REG(BXTWC_GPIO1_IRQ, 8, 0x3f), @@ -146,7 +148,7 @@ static struct resource adc_resources[] = { }; static struct resource usbc_resources[] = { - DEFINE_RES_IRQ_NAMED(BXTWC_CHGR0_IRQ, "USBC"), + DEFINE_RES_IRQ(BXTWC_USBC_IRQ), }; static struct resource charger_resources[] = { From f40584200bc4af7aa4399635b9ac213c62a13ae7 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 1 Nov 2016 10:22:53 +0100 Subject: [PATCH 290/503] mfd: stmpe: Fix RESET regression on STMPE2401 Since commit c4dd1ba355aae2bc3d1213da6c66c53e3c31e028 ("mfd: stmpe: Add reset support for all STMPE variant") we're resetting the STMPE expanders before use. This caused a regression on the STMP2401 on the Nomadik NHK8815: stmpe-i2c 0-0043: stmpe2401 detected, chip id: 0x101 nmk-i2c 101f8000.i2c0: write to slave 0x43 timed out nmk-i2c 101f8000.i2c0: no ack received after address transmission stmpe-i2c 0-0044: stmpe2401 detected, chip id: 0x101 nmk-i2c 101f8000.i2c0: write to slave 0x44 timed out nmk-i2c 101f8000.i2c0: no ack received after address transmission It turns out that we start to poll for the reset bit to go low again too quickly: the STMPE2401 is not yet online and ready to be asked for the status of the RESET bit. By introducing a 10ms delay before starting to hammer the register for information, we get back to normal: stmpe-i2c 0-0043: stmpe2401 detected, chip id: 0x101 stmpe-i2c 0-0044: stmpe2401 detected, chip id: 0x101 Cc: stable@vger.kernel.org Cc: Amelie Delaunay Fixes: c4dd1ba355aa ("mfd: stmpe: Add reset support for all STMPE variant") Signed-off-by: Linus Walleij Acked-by: Patrice Chotard Signed-off-by: Lee Jones --- drivers/mfd/stmpe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c index cfdae8a3d779..b0c7bcdaf5df 100644 --- a/drivers/mfd/stmpe.c +++ b/drivers/mfd/stmpe.c @@ -851,6 +851,8 @@ static int stmpe_reset(struct stmpe *stmpe) if (ret < 0) return ret; + msleep(10); + timeout = jiffies + msecs_to_jiffies(100); while (time_before(jiffies, timeout)) { ret = __stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_SYS_CTRL]); From 722f191080de641f023feaa7d5648caf377844f5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 11:38:18 +0100 Subject: [PATCH 291/503] mfd: core: Fix device reference leak in mfd_clone_cell Make sure to drop the reference taken by bus_find_device_by_name() before returning from mfd_clone_cell(). Fixes: a9bbba996302 ("mfd: add platform_device sharing support for mfd") Signed-off-by: Johan Hovold Signed-off-by: Lee Jones --- drivers/mfd/mfd-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index 3ac486a597f3..c57e407020f1 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -399,6 +399,8 @@ int mfd_clone_cell(const char *cell, const char **clones, size_t n_clones) clones[i]); } + put_device(dev); + return 0; } EXPORT_SYMBOL(mfd_clone_cell); From 2ab13292d7a314fa45de0acc808e41aaad31989c Mon Sep 17 00:00:00 2001 From: Paul Jakma Date: Wed, 16 Nov 2016 10:13:49 +0000 Subject: [PATCH 292/503] USB: serial: cp210x: add ID for the Zone DPMX The BRIM Brothers Zone DPMX is a bicycle powermeter. This ID is for the USB serial interface in its charging dock for the control pods, via which some settings for the pods can be modified. Signed-off-by: Paul Jakma Cc: Barry Redmond Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index f61477bed3a8..243ac5ebe46a 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -131,6 +131,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */ { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */ + { USB_DEVICE(0x10C4, 0x8962) }, /* Brim Brothers charging dock */ { USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */ { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */ { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */ From 19ff7fcc76e6911a955742b40f85ba1030ccba5e Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 16 Nov 2016 11:52:19 -0500 Subject: [PATCH 293/503] orangefs: add .owner to debugfs file_operations Without ".owner = THIS_MODULE" it is possible to crash the kernel by unloading the Orangefs module while someone is reading debugfs files. Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index d484068ca716..38887cc5577f 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -114,6 +114,7 @@ static const struct seq_operations help_debug_ops = { }; const struct file_operations debug_help_fops = { + .owner = THIS_MODULE, .open = orangefs_debug_help_open, .read = seq_read, .release = seq_release, @@ -121,6 +122,7 @@ const struct file_operations debug_help_fops = { }; static const struct file_operations kernel_debug_fops = { + .owner = THIS_MODULE, .open = orangefs_debug_open, .read = orangefs_debug_read, .write = orangefs_debug_write, From 4cb19355ea19995941ccaad115dbfac6b75215ca Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 16 Nov 2016 09:00:38 -0800 Subject: [PATCH 294/503] device-dax: fail all private mapping attempts The device-dax implementation originally tried to be tricky and allow private read-only mappings, but in the process allowed writable MAP_PRIVATE + MAP_NORESERVE mappings. For simplicity and predictability just fail all private mapping attempts since device-dax memory is statically allocated and will never support overcommit. Cc: Cc: Dave Hansen Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Reported-by: Pawel Lebioda Signed-off-by: Dan Williams --- drivers/dax/dax.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 0e499bfca41c..3d94ff20fdca 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -270,8 +270,8 @@ static int check_vma(struct dax_dev *dax_dev, struct vm_area_struct *vma, if (!dax_dev->alive) return -ENXIO; - /* prevent private / writable mappings from being established */ - if ((vma->vm_flags & (VM_NORESERVE|VM_SHARED|VM_WRITE)) == VM_WRITE) { + /* prevent private mappings from being established */ + if ((vma->vm_flags & VM_SHARED) != VM_SHARED) { dev_info(dev, "%s: %s: fail, attempted private mapping\n", current->comm, func); return -EINVAL; From f23cc643f9baec7f71f2b74692da3cf03abbbfda Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 14 Nov 2016 15:45:36 -0500 Subject: [PATCH 295/503] bpf: fix range arithmetic for bpf map access I made some invalid assumptions with BPF_AND and BPF_MOD that could result in invalid accesses to bpf map entries. Fix this up by doing a few things 1) Kill BPF_MOD support. This doesn't actually get used by the compiler in real life and just adds extra complexity. 2) Fix the logic for BPF_AND, don't allow AND of negative numbers and set the minimum value to 0 for positive AND's. 3) Don't do operations on the ranges if they are set to the limits, as they are by definition undefined, and allowing arithmetic operations on those values could make them appear valid when they really aren't. This fixes the testcase provided by Jann as well as a few other theoretical problems. Reported-by: Jann Horn Signed-off-by: Josef Bacik Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 5 +-- kernel/bpf/verifier.c | 70 ++++++++++++++++++++++++------------ 2 files changed, 50 insertions(+), 25 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7035b997aaa5..6aaf425cebc3 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -14,7 +14,7 @@ * are obviously wrong for any sort of memory access. */ #define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024) -#define BPF_REGISTER_MIN_RANGE -(1024 * 1024 * 1024) +#define BPF_REGISTER_MIN_RANGE -1 struct bpf_reg_state { enum bpf_reg_type type; @@ -22,7 +22,8 @@ struct bpf_reg_state { * Used to determine if any memory access using this register will * result in a bad access. */ - u64 min_value, max_value; + s64 min_value; + u64 max_value; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 99a7e5b388f2..6a936159c6e0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -216,8 +216,8 @@ static void print_verifier_state(struct bpf_verifier_state *state) reg->map_ptr->key_size, reg->map_ptr->value_size); if (reg->min_value != BPF_REGISTER_MIN_RANGE) - verbose(",min_value=%llu", - (unsigned long long)reg->min_value); + verbose(",min_value=%lld", + (long long)reg->min_value); if (reg->max_value != BPF_REGISTER_MAX_RANGE) verbose(",max_value=%llu", (unsigned long long)reg->max_value); @@ -758,7 +758,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, * index'es we need to make sure that whatever we use * will have a set floor within our range. */ - if ((s64)reg->min_value < 0) { + if (reg->min_value < 0) { verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno); return -EACCES; @@ -1468,7 +1468,8 @@ static void check_reg_overflow(struct bpf_reg_state *reg) { if (reg->max_value > BPF_REGISTER_MAX_RANGE) reg->max_value = BPF_REGISTER_MAX_RANGE; - if ((s64)reg->min_value < BPF_REGISTER_MIN_RANGE) + if (reg->min_value < BPF_REGISTER_MIN_RANGE || + reg->min_value > BPF_REGISTER_MAX_RANGE) reg->min_value = BPF_REGISTER_MIN_RANGE; } @@ -1476,7 +1477,8 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn) { struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; - u64 min_val = BPF_REGISTER_MIN_RANGE, max_val = BPF_REGISTER_MAX_RANGE; + s64 min_val = BPF_REGISTER_MIN_RANGE; + u64 max_val = BPF_REGISTER_MAX_RANGE; bool min_set = false, max_set = false; u8 opcode = BPF_OP(insn->code); @@ -1512,22 +1514,43 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, return; } + /* If one of our values was at the end of our ranges then we can't just + * do our normal operations to the register, we need to set the values + * to the min/max since they are undefined. + */ + if (min_val == BPF_REGISTER_MIN_RANGE) + dst_reg->min_value = BPF_REGISTER_MIN_RANGE; + if (max_val == BPF_REGISTER_MAX_RANGE) + dst_reg->max_value = BPF_REGISTER_MAX_RANGE; + switch (opcode) { case BPF_ADD: - dst_reg->min_value += min_val; - dst_reg->max_value += max_val; + if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) + dst_reg->min_value += min_val; + if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) + dst_reg->max_value += max_val; break; case BPF_SUB: - dst_reg->min_value -= min_val; - dst_reg->max_value -= max_val; + if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) + dst_reg->min_value -= min_val; + if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) + dst_reg->max_value -= max_val; break; case BPF_MUL: - dst_reg->min_value *= min_val; - dst_reg->max_value *= max_val; + if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) + dst_reg->min_value *= min_val; + if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) + dst_reg->max_value *= max_val; break; case BPF_AND: - /* & is special since it could end up with 0 bits set. */ - dst_reg->min_value &= min_val; + /* Disallow AND'ing of negative numbers, ain't nobody got time + * for that. Otherwise the minimum is 0 and the max is the max + * value we could AND against. + */ + if (min_val < 0) + dst_reg->min_value = BPF_REGISTER_MIN_RANGE; + else + dst_reg->min_value = 0; dst_reg->max_value = max_val; break; case BPF_LSH: @@ -1537,24 +1560,25 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, */ if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) dst_reg->min_value = BPF_REGISTER_MIN_RANGE; - else + else if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) dst_reg->min_value <<= min_val; if (max_val > ilog2(BPF_REGISTER_MAX_RANGE)) dst_reg->max_value = BPF_REGISTER_MAX_RANGE; - else + else if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value <<= max_val; break; case BPF_RSH: - dst_reg->min_value >>= min_val; - dst_reg->max_value >>= max_val; - break; - case BPF_MOD: - /* % is special since it is an unsigned modulus, so the floor - * will always be 0. + /* RSH by a negative number is undefined, and the BPF_RSH is an + * unsigned shift, so make the appropriate casts. */ - dst_reg->min_value = 0; - dst_reg->max_value = max_val - 1; + if (min_val < 0 || dst_reg->min_value < 0) + dst_reg->min_value = BPF_REGISTER_MIN_RANGE; + else + dst_reg->min_value = + (u64)(dst_reg->min_value) >> min_val; + if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) + dst_reg->max_value >>= max_val; break; default: reset_reg_range_values(regs, insn->dst_reg); From 3b7093346b326e5d3590c7d49f6aefe6fa5b2c9a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 15 Nov 2016 05:46:06 -0500 Subject: [PATCH 296/503] ipv4: Restore fib_trie_flush_external function and fix call ordering The patch that removed the FIB offload infrastructure was a bit too aggressive and also removed code needed to clean up us splitting the table if additional rules were added. Specifically the function fib_trie_flush_external was called at the end of a new rule being added to flush the foreign trie entries from the main trie. I updated the code so that we only call fib_trie_flush_external on the main table so that we flush the entries for local from main. This way we don't call it for every rule change which is what was happening previously. Fixes: 347e3b28c1ba2 ("switchdev: remove FIB offload infrastructure") Reported-by: Eric Dumazet Cc: Jiri Pirko Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_frontend.c | 20 +++++++++---- net/ipv4/fib_trie.c | 65 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 5 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b9314b48e39f..f390c3bb05c5 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -243,6 +243,7 @@ int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); +void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c3b80478226e..161fc0f0d752 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -151,7 +151,7 @@ static void fib_replace_table(struct net *net, struct fib_table *old, int fib_unmerge(struct net *net) { - struct fib_table *old, *new; + struct fib_table *old, *new, *main_table; /* attempt to fetch local table if it has been allocated */ old = fib_get_table(net, RT_TABLE_LOCAL); @@ -162,11 +162,21 @@ int fib_unmerge(struct net *net) if (!new) return -ENOMEM; + /* table is already unmerged */ + if (new == old) + return 0; + /* replace merged table with clean table */ - if (new != old) { - fib_replace_table(net, old, new); - fib_free_table(old); - } + fib_replace_table(net, old, new); + fib_free_table(old); + + /* attempt to fetch main table if it has been allocated */ + main_table = fib_get_table(net, RT_TABLE_MAIN); + if (!main_table) + return 0; + + /* flush local entries from main table */ + fib_table_flush_external(main_table); return 0; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 4cff74d4133f..735edc9d41a2 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1760,6 +1760,71 @@ out: return NULL; } +/* Caller must hold RTNL */ +void fib_table_flush_external(struct fib_table *tb) +{ + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; + unsigned long cindex = 1; + struct hlist_node *tmp; + struct fib_alias *fa; + + /* walk trie in reverse order */ + for (;;) { + unsigned char slen = 0; + struct key_vector *n; + + if (!(cindex--)) { + t_key pkey = pn->key; + + /* cannot resize the trie vector */ + if (IS_TRIE(pn)) + break; + + /* resize completed node */ + pn = resize(t, pn); + cindex = get_index(pkey, pn); + + continue; + } + + /* grab the next available node */ + n = get_child(pn, cindex); + if (!n) + continue; + + if (IS_TNODE(n)) { + /* record pn and cindex for leaf walking */ + pn = n; + cindex = 1ul << n->bits; + + continue; + } + + hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { + /* if alias was cloned to local then we just + * need to remove the local copy from main + */ + if (tb->tb_id != fa->tb_id) { + hlist_del_rcu(&fa->fa_list); + alias_free_mem_rcu(fa); + continue; + } + + /* record local slen */ + slen = fa->fa_slen; + } + + /* update leaf slen */ + n->slen = slen; + + if (hlist_empty(&n->leaf)) { + put_child_root(pn, n->key, NULL); + node_free(n); + } + } +} + /* Caller must hold RTNL. */ int fib_table_flush(struct net *net, struct fib_table *tb) { From 3114cdfe66c156345b0ae34e2990472f277e0c1b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 15 Nov 2016 05:46:12 -0500 Subject: [PATCH 297/503] ipv4: Fix memory leak in exception case for splitting tries Fix a small memory leak that can occur where we leak a fib_alias in the event of us not being able to insert it into the local table. Fixes: 0ddcf43d5d4a0 ("ipv4: FIB Local/MAIN table collapse") Reported-by: Eric Dumazet Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 735edc9d41a2..026f309c51e9 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1743,8 +1743,10 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) local_l = fib_find_node(lt, &local_tp, l->key); if (fib_insert_alias(lt, local_tp, local_l, new_fa, - NULL, l->key)) + NULL, l->key)) { + kmem_cache_free(fn_alias_kmem, new_fa); goto out; + } } /* stop loop if key wrapped back to 0 */ From 612e94bd99912f3b2ac616c00c3dc7f166a98005 Mon Sep 17 00:00:00 2001 From: Radha Mohan Chintakuntla Date: Tue, 15 Nov 2016 17:37:16 +0530 Subject: [PATCH 298/503] net: thunderx: Introduce BGX_ID_MASK macro to extract bgx_id This patch fixes the 'bgx_id' determination on 83xx where there are 4 BGX blocks instead of 2 on other platforms. Signed-off-by: Radha Mohan Chintakuntla Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 4 ++-- drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 8bbaedbb7b94..050e21fbb147 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1242,8 +1242,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid); if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) { - bgx->bgx_id = - (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1; + bgx->bgx_id = (pci_resource_start(pdev, + PCI_CFG_REG_BAR_NUM) >> 24) & BGX_ID_MASK; bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE; bgx->max_lmac = MAX_LMAC_PER_BGX; bgx_vnic[bgx->bgx_id] = bgx; diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index d59c71e4a000..01cc7c859131 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -28,6 +28,8 @@ #define MAX_DMAC_PER_LMAC 8 #define MAX_FRAME_SIZE 9216 +#define BGX_ID_MASK 0x3 + #define MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE 2 /* Registers */ From 712c3185344050c591d78584542bd945e4f6f778 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 15 Nov 2016 17:37:36 +0530 Subject: [PATCH 299/503] net: thunderx: Program LMAC credits based on MTU Programming LMAC credits taking 9K frame size by default is incorrect as for an interface which is one of the many on the same BGX/QLM no of credits available will be less as Tx FIFO will be divided across all interfaces. So let's say a BGX with 40G interface and another BGX with multiple 10G, bandwidth of 10G interfaces will be effected when traffic is running on both 40G and 10G interfaces simultaneously. This patch fixes this issue by programming credits based on netdev's MTU. Also fixed configuring MTU to HW and added CQE counter for pkts which exceed this value. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 3 +- .../net/ethernet/cavium/thunder/nic_main.c | 38 +++++++++++++------ drivers/net/ethernet/cavium/thunder/nic_reg.h | 1 + .../net/ethernet/cavium/thunder/nicvf_main.c | 38 ++++++++++--------- .../ethernet/cavium/thunder/nicvf_queues.c | 3 ++ .../ethernet/cavium/thunder/nicvf_queues.h | 2 + 6 files changed, 54 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 30426109711c..cd2d379df5c5 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -47,7 +47,7 @@ /* Min/Max packet size */ #define NIC_HW_MIN_FRS 64 -#define NIC_HW_MAX_FRS 9200 /* 9216 max packet including FCS */ +#define NIC_HW_MAX_FRS 9190 /* Excluding L2 header and FCS */ /* Max pkinds */ #define NIC_MAX_PKIND 16 @@ -282,7 +282,6 @@ struct nicvf { u8 node; u8 cpi_alg; - u16 mtu; bool link_up; u8 duplex; u32 speed; diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 2bbf4cbf08b2..85c9e6201e8b 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "nic_reg.h" #include "nic.h" @@ -260,18 +261,31 @@ static void nic_get_bgx_stats(struct nicpf *nic, struct bgx_stats_msg *bgx) /* Update hardware min/max frame size */ static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf) { - if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS)) { - dev_err(&nic->pdev->dev, - "Invalid MTU setting from VF%d rejected, should be between %d and %d\n", - vf, NIC_HW_MIN_FRS, NIC_HW_MAX_FRS); - return 1; - } - new_frs += ETH_HLEN; - if (new_frs <= nic->pkind.maxlen) - return 0; + int bgx, lmac, lmac_cnt; + u64 lmac_credits; - nic->pkind.maxlen = new_frs; - nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG, *(u64 *)&nic->pkind); + if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS)) + return 1; + + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac += bgx * MAX_LMAC_PER_BGX; + + new_frs += VLAN_ETH_HLEN + ETH_FCS_LEN + 4; + + /* Update corresponding LMAC credits */ + lmac_cnt = bgx_get_lmac_count(nic->node, bgx); + lmac_credits = nic_reg_read(nic, NIC_PF_LMAC_0_7_CREDIT + (lmac * 8)); + lmac_credits &= ~(0xFFFFFULL << 12); + lmac_credits |= (((((48 * 1024) / lmac_cnt) - new_frs) / 16) << 12); + nic_reg_write(nic, NIC_PF_LMAC_0_7_CREDIT + (lmac * 8), lmac_credits); + + /* Enforce MTU in HW + * This config is supported only from 88xx pass 2.0 onwards. + */ + if (!pass1_silicon(nic->pdev)) + nic_reg_write(nic, + NIC_PF_LMAC_0_7_CFG2 + (lmac * 8), new_frs); return 0; } @@ -464,7 +478,7 @@ static int nic_init_hw(struct nicpf *nic) /* PKIND configuration */ nic->pkind.minlen = 0; - nic->pkind.maxlen = NIC_HW_MAX_FRS + ETH_HLEN; + nic->pkind.maxlen = NIC_HW_MAX_FRS + VLAN_ETH_HLEN + ETH_FCS_LEN + 4; nic->pkind.lenerr_en = 1; nic->pkind.rx_hdr = 0; nic->pkind.hdr_sl = 0; diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h index edf779f5a227..80d46337cf29 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_reg.h +++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h @@ -106,6 +106,7 @@ #define NIC_PF_MPI_0_2047_CFG (0x210000) #define NIC_PF_RSSI_0_4097_RQ (0x220000) #define NIC_PF_LMAC_0_7_CFG (0x240000) +#define NIC_PF_LMAC_0_7_CFG2 (0x240100) #define NIC_PF_LMAC_0_7_SW_XOFF (0x242000) #define NIC_PF_LMAC_0_7_CREDIT (0x244000) #define NIC_PF_CHAN_0_255_TX_CFG (0x400000) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 45a13f718863..8f833612da77 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1189,6 +1189,17 @@ int nicvf_stop(struct net_device *netdev) return 0; } +static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) +{ + union nic_mbx mbx = {}; + + mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS; + mbx.frs.max_frs = mtu; + mbx.frs.vf_id = nic->vf_id; + + return nicvf_send_msg_to_pf(nic, &mbx); +} + int nicvf_open(struct net_device *netdev) { int err, qidx; @@ -1196,8 +1207,6 @@ int nicvf_open(struct net_device *netdev) struct queue_set *qs = nic->qs; struct nicvf_cq_poll *cq_poll = NULL; - nic->mtu = netdev->mtu; - netif_carrier_off(netdev); err = nicvf_register_misc_interrupt(nic); @@ -1248,9 +1257,12 @@ int nicvf_open(struct net_device *netdev) if (nic->sqs_mode) nicvf_get_primary_vf_struct(nic); - /* Configure receive side scaling */ - if (!nic->sqs_mode) + /* Configure receive side scaling and MTU */ + if (!nic->sqs_mode) { nicvf_rss_init(nic); + if (nicvf_update_hw_max_frs(nic, netdev->mtu)) + goto cleanup; + } err = nicvf_register_interrupts(nic); if (err) @@ -1297,17 +1309,6 @@ napi_del: return err; } -static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) -{ - union nic_mbx mbx = {}; - - mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS; - mbx.frs.max_frs = mtu; - mbx.frs.vf_id = nic->vf_id; - - return nicvf_send_msg_to_pf(nic, &mbx); -} - static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) { struct nicvf *nic = netdev_priv(netdev); @@ -1318,10 +1319,13 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) if (new_mtu < NIC_HW_MIN_FRS) return -EINVAL; + netdev->mtu = new_mtu; + + if (!netif_running(netdev)) + return 0; + if (nicvf_update_hw_max_frs(nic, new_mtu)) return -EINVAL; - netdev->mtu = new_mtu; - nic->mtu = new_mtu; return 0; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index a4fc50155881..f0e0ca61438e 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1530,6 +1530,9 @@ int nicvf_check_cqe_tx_errs(struct nicvf *nic, case CQ_TX_ERROP_SUBDC_ERR: stats->tx.subdesc_err++; break; + case CQ_TX_ERROP_MAX_SIZE_VIOL: + stats->tx.max_size_exceeded++; + break; case CQ_TX_ERROP_IMM_SIZE_OFLOW: stats->tx.imm_size_oflow++; break; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 869f3386028b..8f4718edc0fe 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -158,6 +158,7 @@ enum CQ_TX_ERROP_E { CQ_TX_ERROP_DESC_FAULT = 0x10, CQ_TX_ERROP_HDR_CONS_ERR = 0x11, CQ_TX_ERROP_SUBDC_ERR = 0x12, + CQ_TX_ERROP_MAX_SIZE_VIOL = 0x13, CQ_TX_ERROP_IMM_SIZE_OFLOW = 0x80, CQ_TX_ERROP_DATA_SEQUENCE_ERR = 0x81, CQ_TX_ERROP_MEM_SEQUENCE_ERR = 0x82, @@ -177,6 +178,7 @@ struct cmp_queue_stats { u64 desc_fault; u64 hdr_cons_err; u64 subdesc_err; + u64 max_size_exceeded; u64 imm_size_oflow; u64 data_seq_err; u64 mem_seq_err; From cadcf95a4f70362c96a8fe39ff5d5df830d4db7f Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 15 Nov 2016 17:37:54 +0530 Subject: [PATCH 300/503] net: thunderx: Fix configuration of L3/L4 length checking This patch fixes enabling of HW verification of L3/L4 length and TCP/UDP checksum which is currently being cleared. Also fixed VLAN stripping config which is being cleared when multiqset is enabled. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index f0e0ca61438e..f914eef6573a 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -538,9 +538,12 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs, mbx.rq.cfg = (1ULL << 62) | (RQ_CQ_DROP << 8); nicvf_send_msg_to_pf(nic, &mbx); - nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, 0x00); - if (!nic->sqs_mode) + if (!nic->sqs_mode && (qidx == 0)) { + /* Enable checking L3/L4 length and TCP/UDP checksums */ + nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, + (BIT(24) | BIT(23) | BIT(21))); nicvf_config_vlan_stripping(nic, nic->netdev->features); + } /* Enable Receive queue */ memset(&rq_cfg, 0, sizeof(struct rq_cfg)); From 964cb69bdc9db255f7c3a80f6e1bed8a25e4c60e Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 15 Nov 2016 17:38:16 +0530 Subject: [PATCH 301/503] net: thunderx: Fix VF driver's interface statistics This patch fixes multiple issues 1. Convert all driver statistics to percpu counters for accuracy. 2. To avoid multiple CQEs posted by a TSO packet appended to HW, TSO pkt's SQE has 'post_cqe' not set but a dummy SQE is added for getting HW transmit completion notification. This dummy SQE has 'dont_send' set and HW drops the pkt pointed to in this thus Tx drop counter increases. This patch fixes this by subtracting SW tx tso counter from HW Tx drop counter for actual packet drop counter. 3. Reset all individual queue's and VNIC HW stats when interface is going down. 4. Getrid off unnecessary counters in hot path. 5. Bringout all CQE error stats i.e both Rx and Tx. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 59 +++++----- .../net/ethernet/cavium/thunder/nic_main.c | 1 + .../ethernet/cavium/thunder/nicvf_ethtool.c | 105 +++++++++-------- .../net/ethernet/cavium/thunder/nicvf_main.c | 106 +++++++++--------- .../ethernet/cavium/thunder/nicvf_queues.c | 96 ++++++++-------- .../ethernet/cavium/thunder/nicvf_queues.h | 24 +--- 6 files changed, 196 insertions(+), 195 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index cd2d379df5c5..86bd93ce2ea3 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -178,11 +178,11 @@ enum tx_stats_reg_offset { struct nicvf_hw_stats { u64 rx_bytes; + u64 rx_frames; u64 rx_ucast_frames; u64 rx_bcast_frames; u64 rx_mcast_frames; - u64 rx_fcs_errors; - u64 rx_l2_errors; + u64 rx_drops; u64 rx_drop_red; u64 rx_drop_red_bytes; u64 rx_drop_overrun; @@ -191,6 +191,19 @@ struct nicvf_hw_stats { u64 rx_drop_mcast; u64 rx_drop_l3_bcast; u64 rx_drop_l3_mcast; + u64 rx_fcs_errors; + u64 rx_l2_errors; + + u64 tx_bytes; + u64 tx_frames; + u64 tx_ucast_frames; + u64 tx_bcast_frames; + u64 tx_mcast_frames; + u64 tx_drops; +}; + +struct nicvf_drv_stats { + /* CQE Rx errs */ u64 rx_bgx_truncated_pkts; u64 rx_jabber_errs; u64 rx_fcs_errs; @@ -216,34 +229,30 @@ struct nicvf_hw_stats { u64 rx_l4_pclp; u64 rx_truncated_pkts; - u64 tx_bytes_ok; - u64 tx_ucast_frames_ok; - u64 tx_bcast_frames_ok; - u64 tx_mcast_frames_ok; - u64 tx_drops; -}; - -struct nicvf_drv_stats { - /* Rx */ - u64 rx_frames_ok; - u64 rx_frames_64; - u64 rx_frames_127; - u64 rx_frames_255; - u64 rx_frames_511; - u64 rx_frames_1023; - u64 rx_frames_1518; - u64 rx_frames_jumbo; - u64 rx_drops; + /* CQE Tx errs */ + u64 tx_desc_fault; + u64 tx_hdr_cons_err; + u64 tx_subdesc_err; + u64 tx_max_size_exceeded; + u64 tx_imm_size_oflow; + u64 tx_data_seq_err; + u64 tx_mem_seq_err; + u64 tx_lock_viol; + u64 tx_data_fault; + u64 tx_tstmp_conflict; + u64 tx_tstmp_timeout; + u64 tx_mem_fault; + u64 tx_csum_overlap; + u64 tx_csum_overflow; + /* driver debug stats */ u64 rcv_buffer_alloc_failures; - - /* Tx */ - u64 tx_frames_ok; - u64 tx_drops; u64 tx_tso; u64 tx_timeout; u64 txq_stop; u64 txq_wake; + + struct u64_stats_sync syncp; }; struct nicvf { @@ -297,7 +306,7 @@ struct nicvf { /* Stats */ struct nicvf_hw_stats hw_stats; - struct nicvf_drv_stats drv_stats; + struct nicvf_drv_stats __percpu *drv_stats; struct bgx_stats bgx_stats; /* MSI-X */ diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 85c9e6201e8b..6677b96e1f3f 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -851,6 +851,7 @@ static int nic_reset_stat_counters(struct nicpf *nic, nic_reg_write(nic, reg_addr, 0); } } + return 0; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index ad4fddb55421..432bf6be57cb 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -36,11 +36,11 @@ struct nicvf_stat { static const struct nicvf_stat nicvf_hw_stats[] = { NICVF_HW_STAT(rx_bytes), + NICVF_HW_STAT(rx_frames), NICVF_HW_STAT(rx_ucast_frames), NICVF_HW_STAT(rx_bcast_frames), NICVF_HW_STAT(rx_mcast_frames), - NICVF_HW_STAT(rx_fcs_errors), - NICVF_HW_STAT(rx_l2_errors), + NICVF_HW_STAT(rx_drops), NICVF_HW_STAT(rx_drop_red), NICVF_HW_STAT(rx_drop_red_bytes), NICVF_HW_STAT(rx_drop_overrun), @@ -49,50 +49,59 @@ static const struct nicvf_stat nicvf_hw_stats[] = { NICVF_HW_STAT(rx_drop_mcast), NICVF_HW_STAT(rx_drop_l3_bcast), NICVF_HW_STAT(rx_drop_l3_mcast), - NICVF_HW_STAT(rx_bgx_truncated_pkts), - NICVF_HW_STAT(rx_jabber_errs), - NICVF_HW_STAT(rx_fcs_errs), - NICVF_HW_STAT(rx_bgx_errs), - NICVF_HW_STAT(rx_prel2_errs), - NICVF_HW_STAT(rx_l2_hdr_malformed), - NICVF_HW_STAT(rx_oversize), - NICVF_HW_STAT(rx_undersize), - NICVF_HW_STAT(rx_l2_len_mismatch), - NICVF_HW_STAT(rx_l2_pclp), - NICVF_HW_STAT(rx_ip_ver_errs), - NICVF_HW_STAT(rx_ip_csum_errs), - NICVF_HW_STAT(rx_ip_hdr_malformed), - NICVF_HW_STAT(rx_ip_payload_malformed), - NICVF_HW_STAT(rx_ip_ttl_errs), - NICVF_HW_STAT(rx_l3_pclp), - NICVF_HW_STAT(rx_l4_malformed), - NICVF_HW_STAT(rx_l4_csum_errs), - NICVF_HW_STAT(rx_udp_len_errs), - NICVF_HW_STAT(rx_l4_port_errs), - NICVF_HW_STAT(rx_tcp_flag_errs), - NICVF_HW_STAT(rx_tcp_offset_errs), - NICVF_HW_STAT(rx_l4_pclp), - NICVF_HW_STAT(rx_truncated_pkts), - NICVF_HW_STAT(tx_bytes_ok), - NICVF_HW_STAT(tx_ucast_frames_ok), - NICVF_HW_STAT(tx_bcast_frames_ok), - NICVF_HW_STAT(tx_mcast_frames_ok), + NICVF_HW_STAT(rx_fcs_errors), + NICVF_HW_STAT(rx_l2_errors), + NICVF_HW_STAT(tx_bytes), + NICVF_HW_STAT(tx_frames), + NICVF_HW_STAT(tx_ucast_frames), + NICVF_HW_STAT(tx_bcast_frames), + NICVF_HW_STAT(tx_mcast_frames), + NICVF_HW_STAT(tx_drops), }; static const struct nicvf_stat nicvf_drv_stats[] = { - NICVF_DRV_STAT(rx_frames_ok), - NICVF_DRV_STAT(rx_frames_64), - NICVF_DRV_STAT(rx_frames_127), - NICVF_DRV_STAT(rx_frames_255), - NICVF_DRV_STAT(rx_frames_511), - NICVF_DRV_STAT(rx_frames_1023), - NICVF_DRV_STAT(rx_frames_1518), - NICVF_DRV_STAT(rx_frames_jumbo), - NICVF_DRV_STAT(rx_drops), + NICVF_DRV_STAT(rx_bgx_truncated_pkts), + NICVF_DRV_STAT(rx_jabber_errs), + NICVF_DRV_STAT(rx_fcs_errs), + NICVF_DRV_STAT(rx_bgx_errs), + NICVF_DRV_STAT(rx_prel2_errs), + NICVF_DRV_STAT(rx_l2_hdr_malformed), + NICVF_DRV_STAT(rx_oversize), + NICVF_DRV_STAT(rx_undersize), + NICVF_DRV_STAT(rx_l2_len_mismatch), + NICVF_DRV_STAT(rx_l2_pclp), + NICVF_DRV_STAT(rx_ip_ver_errs), + NICVF_DRV_STAT(rx_ip_csum_errs), + NICVF_DRV_STAT(rx_ip_hdr_malformed), + NICVF_DRV_STAT(rx_ip_payload_malformed), + NICVF_DRV_STAT(rx_ip_ttl_errs), + NICVF_DRV_STAT(rx_l3_pclp), + NICVF_DRV_STAT(rx_l4_malformed), + NICVF_DRV_STAT(rx_l4_csum_errs), + NICVF_DRV_STAT(rx_udp_len_errs), + NICVF_DRV_STAT(rx_l4_port_errs), + NICVF_DRV_STAT(rx_tcp_flag_errs), + NICVF_DRV_STAT(rx_tcp_offset_errs), + NICVF_DRV_STAT(rx_l4_pclp), + NICVF_DRV_STAT(rx_truncated_pkts), + + NICVF_DRV_STAT(tx_desc_fault), + NICVF_DRV_STAT(tx_hdr_cons_err), + NICVF_DRV_STAT(tx_subdesc_err), + NICVF_DRV_STAT(tx_max_size_exceeded), + NICVF_DRV_STAT(tx_imm_size_oflow), + NICVF_DRV_STAT(tx_data_seq_err), + NICVF_DRV_STAT(tx_mem_seq_err), + NICVF_DRV_STAT(tx_lock_viol), + NICVF_DRV_STAT(tx_data_fault), + NICVF_DRV_STAT(tx_tstmp_conflict), + NICVF_DRV_STAT(tx_tstmp_timeout), + NICVF_DRV_STAT(tx_mem_fault), + NICVF_DRV_STAT(tx_csum_overlap), + NICVF_DRV_STAT(tx_csum_overflow), + NICVF_DRV_STAT(rcv_buffer_alloc_failures), - NICVF_DRV_STAT(tx_frames_ok), NICVF_DRV_STAT(tx_tso), - NICVF_DRV_STAT(tx_drops), NICVF_DRV_STAT(tx_timeout), NICVF_DRV_STAT(txq_stop), NICVF_DRV_STAT(txq_wake), @@ -278,8 +287,8 @@ static void nicvf_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { struct nicvf *nic = netdev_priv(netdev); - int stat; - int sqs; + int stat, tmp_stats; + int sqs, cpu; nicvf_update_stats(nic); @@ -289,9 +298,13 @@ static void nicvf_get_ethtool_stats(struct net_device *netdev, for (stat = 0; stat < nicvf_n_hw_stats; stat++) *(data++) = ((u64 *)&nic->hw_stats) [nicvf_hw_stats[stat].index]; - for (stat = 0; stat < nicvf_n_drv_stats; stat++) - *(data++) = ((u64 *)&nic->drv_stats) - [nicvf_drv_stats[stat].index]; + for (stat = 0; stat < nicvf_n_drv_stats; stat++) { + tmp_stats = 0; + for_each_possible_cpu(cpu) + tmp_stats += ((u64 *)per_cpu_ptr(nic->drv_stats, cpu)) + [nicvf_drv_stats[stat].index]; + *(data++) = tmp_stats; + } nicvf_get_qset_stats(nic, stats, &data); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 8f833612da77..9dc79c0578d8 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -69,25 +69,6 @@ static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx) return qidx; } -static inline void nicvf_set_rx_frame_cnt(struct nicvf *nic, - struct sk_buff *skb) -{ - if (skb->len <= 64) - nic->drv_stats.rx_frames_64++; - else if (skb->len <= 127) - nic->drv_stats.rx_frames_127++; - else if (skb->len <= 255) - nic->drv_stats.rx_frames_255++; - else if (skb->len <= 511) - nic->drv_stats.rx_frames_511++; - else if (skb->len <= 1023) - nic->drv_stats.rx_frames_1023++; - else if (skb->len <= 1518) - nic->drv_stats.rx_frames_1518++; - else - nic->drv_stats.rx_frames_jumbo++; -} - /* The Cavium ThunderX network controller can *only* be found in SoCs * containing the ThunderX ARM64 CPU implementation. All accesses to the device * registers on this platform are implicitly strongly ordered with respect @@ -514,7 +495,6 @@ static int nicvf_init_resources(struct nicvf *nic) } static void nicvf_snd_pkt_handler(struct net_device *netdev, - struct cmp_queue *cq, struct cqe_send_t *cqe_tx, int cqe_type, int budget, unsigned int *tx_pkts, unsigned int *tx_bytes) @@ -536,7 +516,7 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, __func__, cqe_tx->sq_qs, cqe_tx->sq_idx, cqe_tx->sqe_ptr, hdr->subdesc_cnt); - nicvf_check_cqe_tx_errs(nic, cq, cqe_tx); + nicvf_check_cqe_tx_errs(nic, cqe_tx); skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; if (skb) { /* Check for dummy descriptor used for HW TSO offload on 88xx */ @@ -630,8 +610,6 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, return; } - nicvf_set_rx_frame_cnt(nic, skb); - nicvf_set_rxhash(netdev, cqe_rx, skb); skb_record_rx_queue(skb, rq_idx); @@ -703,7 +681,7 @@ loop: work_done++; break; case CQE_TYPE_SEND: - nicvf_snd_pkt_handler(netdev, cq, + nicvf_snd_pkt_handler(netdev, (void *)cq_desc, CQE_TYPE_SEND, budget, &tx_pkts, &tx_bytes); tx_done++; @@ -740,7 +718,7 @@ done: nic = nic->pnicvf; if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { netif_tx_start_queue(txq); - nic->drv_stats.txq_wake++; + this_cpu_inc(nic->drv_stats->txq_wake); if (netif_msg_tx_err(nic)) netdev_warn(netdev, "%s: Transmit queue wakeup SQ%d\n", @@ -1084,7 +1062,7 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) if (!netif_tx_queue_stopped(txq) && !nicvf_sq_append_skb(nic, skb)) { netif_tx_stop_queue(txq); - nic->drv_stats.txq_stop++; + this_cpu_inc(nic->drv_stats->txq_stop); if (netif_msg_tx_err(nic)) netdev_warn(netdev, "%s: Transmit ring full, stopping SQ%d\n", @@ -1202,7 +1180,7 @@ static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) int nicvf_open(struct net_device *netdev) { - int err, qidx; + int cpu, err, qidx; struct nicvf *nic = netdev_priv(netdev); struct queue_set *qs = nic->qs; struct nicvf_cq_poll *cq_poll = NULL; @@ -1262,6 +1240,11 @@ int nicvf_open(struct net_device *netdev) nicvf_rss_init(nic); if (nicvf_update_hw_max_frs(nic, netdev->mtu)) goto cleanup; + + /* Clear percpu stats */ + for_each_possible_cpu(cpu) + memset(per_cpu_ptr(nic->drv_stats, cpu), 0, + sizeof(struct nicvf_drv_stats)); } err = nicvf_register_interrupts(nic); @@ -1288,9 +1271,6 @@ int nicvf_open(struct net_device *netdev) for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); - nic->drv_stats.txq_stop = 0; - nic->drv_stats.txq_wake = 0; - return 0; cleanup: nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); @@ -1383,9 +1363,10 @@ void nicvf_update_lmac_stats(struct nicvf *nic) void nicvf_update_stats(struct nicvf *nic) { - int qidx; + int qidx, cpu; + u64 tmp_stats = 0; struct nicvf_hw_stats *stats = &nic->hw_stats; - struct nicvf_drv_stats *drv_stats = &nic->drv_stats; + struct nicvf_drv_stats *drv_stats; struct queue_set *qs = nic->qs; #define GET_RX_STATS(reg) \ @@ -1408,21 +1389,33 @@ void nicvf_update_stats(struct nicvf *nic) stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST); stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST); - stats->tx_bytes_ok = GET_TX_STATS(TX_OCTS); - stats->tx_ucast_frames_ok = GET_TX_STATS(TX_UCAST); - stats->tx_bcast_frames_ok = GET_TX_STATS(TX_BCAST); - stats->tx_mcast_frames_ok = GET_TX_STATS(TX_MCAST); + stats->tx_bytes = GET_TX_STATS(TX_OCTS); + stats->tx_ucast_frames = GET_TX_STATS(TX_UCAST); + stats->tx_bcast_frames = GET_TX_STATS(TX_BCAST); + stats->tx_mcast_frames = GET_TX_STATS(TX_MCAST); stats->tx_drops = GET_TX_STATS(TX_DROP); - drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok + - stats->tx_bcast_frames_ok + - stats->tx_mcast_frames_ok; - drv_stats->rx_frames_ok = stats->rx_ucast_frames + - stats->rx_bcast_frames + - stats->rx_mcast_frames; - drv_stats->rx_drops = stats->rx_drop_red + - stats->rx_drop_overrun; - drv_stats->tx_drops = stats->tx_drops; + /* On T88 pass 2.0, the dummy SQE added for TSO notification + * via CQE has 'dont_send' set. Hence HW drops the pkt pointed + * pointed by dummy SQE and results in tx_drops counter being + * incremented. Subtracting it from tx_tso counter will give + * exact tx_drops counter. + */ + if (nic->t88 && nic->hw_tso) { + for_each_possible_cpu(cpu) { + drv_stats = per_cpu_ptr(nic->drv_stats, cpu); + tmp_stats += drv_stats->tx_tso; + } + stats->tx_drops = tmp_stats - stats->tx_drops; + } + stats->tx_frames = stats->tx_ucast_frames + + stats->tx_bcast_frames + + stats->tx_mcast_frames; + stats->rx_frames = stats->rx_ucast_frames + + stats->rx_bcast_frames + + stats->rx_mcast_frames; + stats->rx_drops = stats->rx_drop_red + + stats->rx_drop_overrun; /* Update RQ and SQ stats */ for (qidx = 0; qidx < qs->rq_cnt; qidx++) @@ -1436,18 +1429,17 @@ static struct rtnl_link_stats64 *nicvf_get_stats64(struct net_device *netdev, { struct nicvf *nic = netdev_priv(netdev); struct nicvf_hw_stats *hw_stats = &nic->hw_stats; - struct nicvf_drv_stats *drv_stats = &nic->drv_stats; nicvf_update_stats(nic); stats->rx_bytes = hw_stats->rx_bytes; - stats->rx_packets = drv_stats->rx_frames_ok; - stats->rx_dropped = drv_stats->rx_drops; + stats->rx_packets = hw_stats->rx_frames; + stats->rx_dropped = hw_stats->rx_drops; stats->multicast = hw_stats->rx_mcast_frames; - stats->tx_bytes = hw_stats->tx_bytes_ok; - stats->tx_packets = drv_stats->tx_frames_ok; - stats->tx_dropped = drv_stats->tx_drops; + stats->tx_bytes = hw_stats->tx_bytes; + stats->tx_packets = hw_stats->tx_frames; + stats->tx_dropped = hw_stats->tx_drops; return stats; } @@ -1460,7 +1452,7 @@ static void nicvf_tx_timeout(struct net_device *dev) netdev_warn(dev, "%s: Transmit timed out, resetting\n", dev->name); - nic->drv_stats.tx_timeout++; + this_cpu_inc(nic->drv_stats->tx_timeout); schedule_work(&nic->reset_task); } @@ -1594,6 +1586,12 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_free_netdev; } + nic->drv_stats = netdev_alloc_pcpu_stats(struct nicvf_drv_stats); + if (!nic->drv_stats) { + err = -ENOMEM; + goto err_free_netdev; + } + err = nicvf_set_qset_resources(nic); if (err) goto err_free_netdev; @@ -1652,6 +1650,8 @@ err_unregister_interrupts: nicvf_unregister_interrupts(nic); err_free_netdev: pci_set_drvdata(pdev, NULL); + if (nic->drv_stats) + free_percpu(nic->drv_stats); free_netdev(netdev); err_release_regions: pci_release_regions(pdev); @@ -1679,6 +1679,8 @@ static void nicvf_remove(struct pci_dev *pdev) unregister_netdev(pnetdev); nicvf_unregister_interrupts(nic); pci_set_drvdata(pdev, NULL); + if (nic->drv_stats) + free_percpu(nic->drv_stats); free_netdev(netdev); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index f914eef6573a..bdce5915baae 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -104,7 +104,8 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, order); if (!nic->rb_page) { - nic->drv_stats.rcv_buffer_alloc_failures++; + this_cpu_inc(nic->pnicvf->drv_stats-> + rcv_buffer_alloc_failures); return -ENOMEM; } nic->rb_page_offset = 0; @@ -483,9 +484,12 @@ static void nicvf_reset_rcv_queue_stats(struct nicvf *nic) { union nic_mbx mbx = {}; - /* Reset all RXQ's stats */ + /* Reset all RQ/SQ and VF stats */ mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER; + mbx.reset_stat.rx_stat_mask = 0x3FFF; + mbx.reset_stat.tx_stat_mask = 0x1F; mbx.reset_stat.rq_stat_mask = 0xFFFF; + mbx.reset_stat.sq_stat_mask = 0xFFFF; nicvf_send_msg_to_pf(nic, &mbx); } @@ -1032,7 +1036,7 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, hdr->tso_max_paysize = skb_shinfo(skb)->gso_size; /* For non-tunneled pkts, point this to L2 ethertype */ hdr->inner_l3_offset = skb_network_offset(skb) - 2; - nic->drv_stats.tx_tso++; + this_cpu_inc(nic->pnicvf->drv_stats->tx_tso); } } @@ -1164,7 +1168,7 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt); - nic->drv_stats.tx_tso++; + this_cpu_inc(nic->pnicvf->drv_stats->tx_tso); return 1; } @@ -1425,8 +1429,6 @@ void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx) /* Check for errors in the receive cmp.queue entry */ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) { - struct nicvf_hw_stats *stats = &nic->hw_stats; - if (!cqe_rx->err_level && !cqe_rx->err_opcode) return 0; @@ -1438,76 +1440,76 @@ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) switch (cqe_rx->err_opcode) { case CQ_RX_ERROP_RE_PARTIAL: - stats->rx_bgx_truncated_pkts++; + this_cpu_inc(nic->drv_stats->rx_bgx_truncated_pkts); break; case CQ_RX_ERROP_RE_JABBER: - stats->rx_jabber_errs++; + this_cpu_inc(nic->drv_stats->rx_jabber_errs); break; case CQ_RX_ERROP_RE_FCS: - stats->rx_fcs_errs++; + this_cpu_inc(nic->drv_stats->rx_fcs_errs); break; case CQ_RX_ERROP_RE_RX_CTL: - stats->rx_bgx_errs++; + this_cpu_inc(nic->drv_stats->rx_bgx_errs); break; case CQ_RX_ERROP_PREL2_ERR: - stats->rx_prel2_errs++; + this_cpu_inc(nic->drv_stats->rx_prel2_errs); break; case CQ_RX_ERROP_L2_MAL: - stats->rx_l2_hdr_malformed++; + this_cpu_inc(nic->drv_stats->rx_l2_hdr_malformed); break; case CQ_RX_ERROP_L2_OVERSIZE: - stats->rx_oversize++; + this_cpu_inc(nic->drv_stats->rx_oversize); break; case CQ_RX_ERROP_L2_UNDERSIZE: - stats->rx_undersize++; + this_cpu_inc(nic->drv_stats->rx_undersize); break; case CQ_RX_ERROP_L2_LENMISM: - stats->rx_l2_len_mismatch++; + this_cpu_inc(nic->drv_stats->rx_l2_len_mismatch); break; case CQ_RX_ERROP_L2_PCLP: - stats->rx_l2_pclp++; + this_cpu_inc(nic->drv_stats->rx_l2_pclp); break; case CQ_RX_ERROP_IP_NOT: - stats->rx_ip_ver_errs++; + this_cpu_inc(nic->drv_stats->rx_ip_ver_errs); break; case CQ_RX_ERROP_IP_CSUM_ERR: - stats->rx_ip_csum_errs++; + this_cpu_inc(nic->drv_stats->rx_ip_csum_errs); break; case CQ_RX_ERROP_IP_MAL: - stats->rx_ip_hdr_malformed++; + this_cpu_inc(nic->drv_stats->rx_ip_hdr_malformed); break; case CQ_RX_ERROP_IP_MALD: - stats->rx_ip_payload_malformed++; + this_cpu_inc(nic->drv_stats->rx_ip_payload_malformed); break; case CQ_RX_ERROP_IP_HOP: - stats->rx_ip_ttl_errs++; + this_cpu_inc(nic->drv_stats->rx_ip_ttl_errs); break; case CQ_RX_ERROP_L3_PCLP: - stats->rx_l3_pclp++; + this_cpu_inc(nic->drv_stats->rx_l3_pclp); break; case CQ_RX_ERROP_L4_MAL: - stats->rx_l4_malformed++; + this_cpu_inc(nic->drv_stats->rx_l4_malformed); break; case CQ_RX_ERROP_L4_CHK: - stats->rx_l4_csum_errs++; + this_cpu_inc(nic->drv_stats->rx_l4_csum_errs); break; case CQ_RX_ERROP_UDP_LEN: - stats->rx_udp_len_errs++; + this_cpu_inc(nic->drv_stats->rx_udp_len_errs); break; case CQ_RX_ERROP_L4_PORT: - stats->rx_l4_port_errs++; + this_cpu_inc(nic->drv_stats->rx_l4_port_errs); break; case CQ_RX_ERROP_TCP_FLAG: - stats->rx_tcp_flag_errs++; + this_cpu_inc(nic->drv_stats->rx_tcp_flag_errs); break; case CQ_RX_ERROP_TCP_OFFSET: - stats->rx_tcp_offset_errs++; + this_cpu_inc(nic->drv_stats->rx_tcp_offset_errs); break; case CQ_RX_ERROP_L4_PCLP: - stats->rx_l4_pclp++; + this_cpu_inc(nic->drv_stats->rx_l4_pclp); break; case CQ_RX_ERROP_RBDR_TRUNC: - stats->rx_truncated_pkts++; + this_cpu_inc(nic->drv_stats->rx_truncated_pkts); break; } @@ -1515,56 +1517,52 @@ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) } /* Check for errors in the send cmp.queue entry */ -int nicvf_check_cqe_tx_errs(struct nicvf *nic, - struct cmp_queue *cq, struct cqe_send_t *cqe_tx) +int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx) { - struct cmp_queue_stats *stats = &cq->stats; - switch (cqe_tx->send_status) { case CQ_TX_ERROP_GOOD: - stats->tx.good++; return 0; case CQ_TX_ERROP_DESC_FAULT: - stats->tx.desc_fault++; + this_cpu_inc(nic->drv_stats->tx_desc_fault); break; case CQ_TX_ERROP_HDR_CONS_ERR: - stats->tx.hdr_cons_err++; + this_cpu_inc(nic->drv_stats->tx_hdr_cons_err); break; case CQ_TX_ERROP_SUBDC_ERR: - stats->tx.subdesc_err++; + this_cpu_inc(nic->drv_stats->tx_subdesc_err); break; case CQ_TX_ERROP_MAX_SIZE_VIOL: - stats->tx.max_size_exceeded++; + this_cpu_inc(nic->drv_stats->tx_max_size_exceeded); break; case CQ_TX_ERROP_IMM_SIZE_OFLOW: - stats->tx.imm_size_oflow++; + this_cpu_inc(nic->drv_stats->tx_imm_size_oflow); break; case CQ_TX_ERROP_DATA_SEQUENCE_ERR: - stats->tx.data_seq_err++; + this_cpu_inc(nic->drv_stats->tx_data_seq_err); break; case CQ_TX_ERROP_MEM_SEQUENCE_ERR: - stats->tx.mem_seq_err++; + this_cpu_inc(nic->drv_stats->tx_mem_seq_err); break; case CQ_TX_ERROP_LOCK_VIOL: - stats->tx.lock_viol++; + this_cpu_inc(nic->drv_stats->tx_lock_viol); break; case CQ_TX_ERROP_DATA_FAULT: - stats->tx.data_fault++; + this_cpu_inc(nic->drv_stats->tx_data_fault); break; case CQ_TX_ERROP_TSTMP_CONFLICT: - stats->tx.tstmp_conflict++; + this_cpu_inc(nic->drv_stats->tx_tstmp_conflict); break; case CQ_TX_ERROP_TSTMP_TIMEOUT: - stats->tx.tstmp_timeout++; + this_cpu_inc(nic->drv_stats->tx_tstmp_timeout); break; case CQ_TX_ERROP_MEM_FAULT: - stats->tx.mem_fault++; + this_cpu_inc(nic->drv_stats->tx_mem_fault); break; case CQ_TX_ERROP_CK_OVERLAP: - stats->tx.csum_overlap++; + this_cpu_inc(nic->drv_stats->tx_csum_overlap); break; case CQ_TX_ERROP_CK_OFLOW: - stats->tx.csum_overflow++; + this_cpu_inc(nic->drv_stats->tx_csum_overflow); break; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 8f4718edc0fe..2e3c940c1093 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -172,26 +172,6 @@ enum CQ_TX_ERROP_E { CQ_TX_ERROP_ENUM_LAST = 0x8a, }; -struct cmp_queue_stats { - struct tx_stats { - u64 good; - u64 desc_fault; - u64 hdr_cons_err; - u64 subdesc_err; - u64 max_size_exceeded; - u64 imm_size_oflow; - u64 data_seq_err; - u64 mem_seq_err; - u64 lock_viol; - u64 data_fault; - u64 tstmp_conflict; - u64 tstmp_timeout; - u64 mem_fault; - u64 csum_overlap; - u64 csum_overflow; - } tx; -} ____cacheline_aligned_in_smp; - enum RQ_SQ_STATS { RQ_SQ_STATS_OCTS, RQ_SQ_STATS_PKTS, @@ -243,7 +223,6 @@ struct cmp_queue { spinlock_t lock; /* lock to serialize processing CQEs */ void *desc; struct q_desc_mem dmem; - struct cmp_queue_stats stats; int irq; } ____cacheline_aligned_in_smp; @@ -338,6 +317,5 @@ u64 nicvf_queue_reg_read(struct nicvf *nic, void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx); void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx); int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx); -int nicvf_check_cqe_tx_errs(struct nicvf *nic, - struct cmp_queue *cq, struct cqe_send_t *cqe_tx); +int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx); #endif /* NICVF_QUEUES_H */ From c94acf805d93e7beb5898ac97ff327ae0b6f04dd Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 15 Nov 2016 17:38:29 +0530 Subject: [PATCH 302/503] net: thunderx: Fix memory leak and other issues upon interface toggle This patch fixes the following 1. When interface is being teardown and queues are being cleaned up, free pending SKBs that are in SQ which are either not transmitted or freed as NAPI is disabled by that time. 2. While interface initialization, delay CFG_DONE notification till the end to avoid corner cases where TXQs are enabled but CQ interrupts are not which results blocking transmission and kicking off watchdog. 3. Check for IFF_UP while re-enabling RBDR interrupts from tasklet. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 11 +++++------ drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 14 +++++++++++++- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 9dc79c0578d8..8a37012c9c89 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -473,9 +473,6 @@ int nicvf_set_real_num_queues(struct net_device *netdev, static int nicvf_init_resources(struct nicvf *nic) { int err; - union nic_mbx mbx = {}; - - mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; /* Enable Qset */ nicvf_qset_config(nic, true); @@ -488,9 +485,6 @@ static int nicvf_init_resources(struct nicvf *nic) return err; } - /* Send VF config done msg to PF */ - nicvf_write_to_mbx(nic, &mbx); - return 0; } @@ -1184,6 +1178,7 @@ int nicvf_open(struct net_device *netdev) struct nicvf *nic = netdev_priv(netdev); struct queue_set *qs = nic->qs; struct nicvf_cq_poll *cq_poll = NULL; + union nic_mbx mbx = {}; netif_carrier_off(netdev); @@ -1271,6 +1266,10 @@ int nicvf_open(struct net_device *netdev) for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); + /* Send VF config done msg to PF */ + mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; + nicvf_write_to_mbx(nic, &mbx); + return 0; cleanup: nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index bdce5915baae..747ef0882976 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -271,7 +271,8 @@ refill: rbdr_idx, new_rb); next_rbdr: /* Re-enable RBDR interrupts only if buffer allocation is success */ - if (!nic->rb_alloc_fail && rbdr->enable) + if (!nic->rb_alloc_fail && rbdr->enable && + netif_running(nic->pnicvf->netdev)) nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx); if (rbdr_idx) @@ -362,6 +363,8 @@ static int nicvf_init_snd_queue(struct nicvf *nic, static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) { + struct sk_buff *skb; + if (!sq) return; if (!sq->dmem.base) @@ -372,6 +375,15 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) sq->dmem.q_len * TSO_HEADER_SIZE, sq->tso_hdrs, sq->tso_hdrs_phys); + /* Free pending skbs in the queue */ + smp_rmb(); + while (sq->head != sq->tail) { + skb = (struct sk_buff *)sq->skbuff[sq->head]; + if (skb) + dev_kfree_skb_any(skb); + sq->head++; + sq->head &= (sq->dmem.q_len - 1); + } kfree(sq->skbuff); nicvf_free_q_desc_mem(nic, &sq->dmem); } From 3ca0b51decf780ce6277b088a9f28cd6fb71e372 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 16 Nov 2016 11:02:00 -0800 Subject: [PATCH 303/503] clk: berlin: Pass correct type to hw provider registration Dan Carpenter reports that we're passing a pointer to a pointer here when we should just be passing a pointer. Pass the right pointer so that the of_clk_hw_onecell_get() sees the appropriate data pointer on its end. Reported-by: Dan Carpenter Cc: Jisheng Zhang Cc: Alexandre Belloni Cc: Sebastian Hesselbarth Cc: Stephen Boyd Fixes: f6475e298297 ("clk: berlin: Migrate to clk_hw based registration and OF APIs") Signed-off-by: Stephen Boyd --- drivers/clk/berlin/bg2.c | 2 +- drivers/clk/berlin/bg2q.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/berlin/bg2.c b/drivers/clk/berlin/bg2.c index edf3b96b3b73..1d99292e2039 100644 --- a/drivers/clk/berlin/bg2.c +++ b/drivers/clk/berlin/bg2.c @@ -685,7 +685,7 @@ static void __init berlin2_clock_setup(struct device_node *np) } /* register clk-provider */ - of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data); + of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data); return; diff --git a/drivers/clk/berlin/bg2q.c b/drivers/clk/berlin/bg2q.c index 0718e831475f..3b784b593afd 100644 --- a/drivers/clk/berlin/bg2q.c +++ b/drivers/clk/berlin/bg2q.c @@ -382,7 +382,7 @@ static void __init berlin2q_clock_setup(struct device_node *np) } /* register clk-provider */ - of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data); + of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data); return; From bdfdabfedc30c9574dde6198a1739d2be03bf934 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 16 Nov 2016 11:02:00 -0800 Subject: [PATCH 304/503] clk: efm32gg: Pass correct type to hw provider registration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dan Carpenter reports that we're passing a pointer to a pointer here when we should just be passing a pointer. Pass the right pointer so that the of_clk_hw_onecell_get() sees the appropriate data pointer on its end. Reported-by: Dan Carpenter Cc: Stephen Boyd Cc: Uwe Kleine-König Fixes: 9337631f52a8 ("clk: efm32gg: Migrate to clk_hw based OF and registration APIs") Signed-off-by: Stephen Boyd --- drivers/clk/clk-efm32gg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-efm32gg.c b/drivers/clk/clk-efm32gg.c index 8802a2dd56ac..f674778fb3ac 100644 --- a/drivers/clk/clk-efm32gg.c +++ b/drivers/clk/clk-efm32gg.c @@ -82,6 +82,6 @@ static void __init efm32gg_cmu_init(struct device_node *np) hws[clk_HFPERCLKDAC0] = clk_hw_register_gate(NULL, "HFPERCLK.DAC0", "HFXO", 0, base + CMU_HFPERCLKEN0, 17, 0, NULL); - of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data); + of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data); } CLK_OF_DECLARE(efm32ggcmu, "efm32gg,cmu", efm32gg_cmu_init); From da7800a88c5a3b798f763d6f9f343e9a49860c4f Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 14 Nov 2016 16:36:08 +0800 Subject: [PATCH 305/503] drm/amd/powerplay: avoid out of bounds access on array ps. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit check array index first and then visit the array. Signed-off-by: Rex Zhu Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 13f2b705ea49..08cd0bd3ebe5 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -2984,19 +2984,19 @@ static int smu7_get_pp_table_entry_callback_func_v0(struct pp_hwmgr *hwmgr, if (!(data->mc_micro_code_feature & DISABLE_MC_LOADMICROCODE) && memory_clock > data->highest_mclk) data->highest_mclk = memory_clock; - performance_level = &(ps->performance_levels - [ps->performance_level_count++]); - PP_ASSERT_WITH_CODE( (ps->performance_level_count < smum_get_mac_definition(hwmgr->smumgr, SMU_MAX_LEVELS_GRAPHICS)), "Performance levels exceeds SMC limit!", return -EINVAL); PP_ASSERT_WITH_CODE( - (ps->performance_level_count <= + (ps->performance_level_count < hwmgr->platform_descriptor.hardwareActivityPerformanceLevels), - "Performance levels exceeds Driver limit!", - return -EINVAL); + "Performance levels exceeds Driver limit, Skip!", + return 0); + + performance_level = &(ps->performance_levels + [ps->performance_level_count++]); /* Performance levels are arranged from low to high. */ performance_level->memory_clock = memory_clock; From d48756228ee9161ac8836b346589a43fabdc9f3c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 15 Nov 2016 15:56:26 -0500 Subject: [PATCH 306/503] nvme/pci: Don't free queues on error The nvme_remove function tears down all allocated resources in the correct order, so no need to free queues on error during initialization. This fixes possible use-after-free errors when queues are still associated with a blk-mq hctx. Reported-by: Scott Bauer Tested-by: Scott Bauer Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0248d0e21fee..5e52034ab010 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1242,20 +1242,16 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) result = nvme_enable_ctrl(&dev->ctrl, cap); if (result) - goto free_nvmeq; + return result; nvmeq->cq_vector = 0; result = queue_request_irq(nvmeq); if (result) { nvmeq->cq_vector = -1; - goto free_nvmeq; + return result; } return result; - - free_nvmeq: - nvme_free_queues(dev, 0); - return result; } static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) @@ -1317,10 +1313,8 @@ static int nvme_create_io_queues(struct nvme_dev *dev) max = min(dev->max_qid, dev->queue_count - 1); for (i = dev->online_queues; i <= max; i++) { ret = nvme_create_queue(dev->queues[i], i); - if (ret) { - nvme_free_queues(dev, i); + if (ret) break; - } } /* @@ -1460,13 +1454,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) result = queue_request_irq(adminq); if (result) { adminq->cq_vector = -1; - goto free_queues; + return result; } return nvme_create_io_queues(dev); - - free_queues: - nvme_free_queues(dev, 1); - return result; } static void nvme_del_queue_end(struct request *req, int error) From f9c22ec6c1c511285dc539b83aabdabdb6baf245 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 15 Nov 2016 17:39:02 -0500 Subject: [PATCH 307/503] gpio: Remove GPIO_DEVRES option This option was added in 6a89a314ab107a12af08c71420c19a37a30fc2d3 to allow use of the devm_gpio_* functions without CONFIG_GPIOLIB. However, only a few months later in b69ac52449c658b7ac40034dc3c5f5f4a71a723d, CONFIG_GPIOLIB was added as a dependency, defeating the original purpose of this option. Instead of that patch, the original commit could have just been reverted (and in fact was partially so in 403c1d0be5ccbd750d25c59d8358843a81e52e3b). Further, since this option has a dependency on HAS_IOMEM, even though it does not require it, it causes build failures when !HAS_IOMEM (e.g. in a uml build). Fix that by completely removing the option, in essence completing the reversion of the original commit. Signed-off-by: Keno Fischer Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 4 ---- drivers/gpio/Makefile | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index d011cb89d25e..ed37e5908b91 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -22,10 +22,6 @@ menuconfig GPIOLIB if GPIOLIB -config GPIO_DEVRES - def_bool y - depends on HAS_IOMEM - config OF_GPIO def_bool y depends on OF diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index ab28a2daeacc..d074c2299393 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -2,7 +2,7 @@ ccflags-$(CONFIG_DEBUG_GPIO) += -DDEBUG -obj-$(CONFIG_GPIO_DEVRES) += devres.o +obj-$(CONFIG_GPIOLIB) += devres.o obj-$(CONFIG_GPIOLIB) += gpiolib.o obj-$(CONFIG_GPIOLIB) += gpiolib-legacy.o obj-$(CONFIG_OF_GPIO) += gpiolib-of.o From 963abe5c8a0273a1cf5913556da1b1189de0e57a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Nov 2016 22:24:12 -0800 Subject: [PATCH 308/503] virtio-net: add a missing synchronize_net() It seems many drivers do not respect napi_hash_del() contract. When napi_hash_del() is used before netif_napi_del(), an RCU grace period is needed before freeing NAPI object. Fixes: 91815639d880 ("virtio-net: rx busy polling support") Signed-off-by: Eric Dumazet Cc: Jason Wang Cc: Michael S. Tsirkin Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fd8b1e62301f..7276d5a95bd0 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1497,6 +1497,11 @@ static void virtnet_free_queues(struct virtnet_info *vi) netif_napi_del(&vi->rq[i].napi); } + /* We called napi_hash_del() before netif_napi_del(), + * we need to respect an RCU grace period before freeing vi->rq + */ + synchronize_net(); + kfree(vi->rq); kfree(vi->sq); } From d5a4b1a540b8a9a44888b383472a80b84765aaa0 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 16 Nov 2016 17:27:34 +0800 Subject: [PATCH 309/503] tools/power/acpi: Remove direct kernel source include reference Avoid breaking cross-compiled ACPI tools builds by rearranging the handling of kernel header files. This patch also contains OUTPUT/srctree cleanups in order to make above fix working for various build environments. Fixes: e323c02dee59 (ACPICA: MSVC9: Fix inclusion order issue) Reported-and-tested-by: Yisheng Xie Reported-by: Andy Shevchenko Signed-off-by: Lv Zheng [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- include/acpi/platform/aclinux.h | 3 ++ tools/power/acpi/Makefile.config | 23 +++++++------- tools/power/acpi/Makefile.rules | 40 ++++++++++++++++-------- tools/power/acpi/tools/acpidbg/Makefile | 4 +-- tools/power/acpi/tools/acpidbg/acpidbg.c | 8 ++++- tools/power/acpi/tools/acpidump/Makefile | 12 +++---- 6 files changed, 56 insertions(+), 34 deletions(-) diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index a5d98d171866..e861a24f06f2 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -191,6 +191,9 @@ #ifndef __init #define __init #endif +#ifndef __iomem +#define __iomem +#endif /* Host-dependent types and defines for user-space ACPICA */ diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index a538ff44b108..a1883bbb0144 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -8,18 +8,19 @@ # as published by the Free Software Foundation; version 2 # of the License. -include ../../../../scripts/Makefile.include +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif -OUTPUT=./ +include $(srctree)/../../scripts/Makefile.include + +OUTPUT=$(srctree)/ ifeq ("$(origin O)", "command line") - OUTPUT := $(O)/ -endif - -ifneq ($(OUTPUT),) -# check that the output directory actually exists -OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) -$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) + OUTPUT := $(O)/power/acpi/ endif +#$(info Determined 'OUTPUT' to be $(OUTPUT)) # --- CONFIGURATION BEGIN --- @@ -70,8 +71,8 @@ WARNINGS := -Wall WARNINGS += $(call cc-supports,-Wstrict-prototypes) WARNINGS += $(call cc-supports,-Wdeclaration-after-statement) -KERNEL_INCLUDE := ../../../include -ACPICA_INCLUDE := ../../../drivers/acpi/acpica +KERNEL_INCLUDE := $(OUTPUT)include +ACPICA_INCLUDE := $(srctree)/../../../drivers/acpi/acpica CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE) CFLAGS += $(WARNINGS) diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules index ec87a9e562c0..373738338f51 100644 --- a/tools/power/acpi/Makefile.rules +++ b/tools/power/acpi/Makefile.rules @@ -8,28 +8,42 @@ # as published by the Free Software Foundation; version 2 # of the License. -$(OUTPUT)$(TOOL): $(TOOL_OBJS) FORCE - $(ECHO) " LD " $@ - $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(TOOL_OBJS) -L$(OUTPUT) -o $@ +objdir := $(OUTPUT)tools/$(TOOL)/ +toolobjs := $(addprefix $(objdir),$(TOOL_OBJS)) +$(OUTPUT)$(TOOL): $(toolobjs) FORCE + $(ECHO) " LD " $(subst $(OUTPUT),,$@) + $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(toolobjs) -L$(OUTPUT) -o $@ + $(ECHO) " STRIP " $(subst $(OUTPUT),,$@) $(QUIET) $(STRIPCMD) $@ -$(OUTPUT)%.o: %.c - $(ECHO) " CC " $@ +$(KERNEL_INCLUDE): + $(ECHO) " MKDIR " $(subst $(OUTPUT),,$@) + $(QUIET) mkdir -p $(KERNEL_INCLUDE) + $(ECHO) " CP " $(subst $(OUTPUT),,$@) + $(QUIET) cp -rf $(srctree)/../../../include/acpi $(KERNEL_INCLUDE)/ + +$(objdir)%.o: %.c $(KERNEL_INCLUDE) + $(ECHO) " CC " $(subst $(OUTPUT),,$@) $(QUIET) $(CC) -c $(CFLAGS) -o $@ $< all: $(OUTPUT)$(TOOL) clean: - -find $(OUTPUT) \( -not -type d \) \ - -and \( -name '*~' -o -name '*.[oas]' \) \ - -type f -print \ - | xargs rm -f - -rm -f $(OUTPUT)$(TOOL) + $(ECHO) " RMOBJ " $(subst $(OUTPUT),,$(objdir)) + $(QUIET) find $(objdir) \( -not -type d \)\ + -and \( -name '*~' -o -name '*.[oas]' \)\ + -type f -print | xargs rm -f + $(ECHO) " RM " $(TOOL) + $(QUIET) rm -f $(OUTPUT)$(TOOL) + $(ECHO) " RMINC " $(subst $(OUTPUT),,$(KERNEL_INCLUDE)) + $(QUIET) rm -rf $(KERNEL_INCLUDE) install-tools: - $(INSTALL) -d $(DESTDIR)${sbindir} - $(INSTALL_PROGRAM) $(OUTPUT)$(TOOL) $(DESTDIR)${sbindir} + $(ECHO) " INST " $(TOOL) + $(QUIET) $(INSTALL) -d $(DESTDIR)$(sbindir) + $(QUIET) $(INSTALL_PROGRAM) $(OUTPUT)$(TOOL) $(DESTDIR)$(sbindir) uninstall-tools: - - rm -f $(DESTDIR)${sbindir}/$(TOOL) + $(ECHO) " UNINST " $(TOOL) + $(QUIET) rm -f $(DESTDIR)$(sbindir)/$(TOOL) install: all install-tools $(EXTRA_INSTALL) uninstall: uninstall-tools $(EXTRA_UNINSTALL) diff --git a/tools/power/acpi/tools/acpidbg/Makefile b/tools/power/acpi/tools/acpidbg/Makefile index 352df4b41ae9..f2d06e773eb4 100644 --- a/tools/power/acpi/tools/acpidbg/Makefile +++ b/tools/power/acpi/tools/acpidbg/Makefile @@ -17,9 +17,7 @@ vpath %.c \ ../../os_specific/service_layers\ . CFLAGS += -DACPI_APPLICATION -DACPI_SINGLE_THREAD -DACPI_DEBUGGER\ - -I.\ - -I../../../../../drivers/acpi/acpica\ - -I../../../../../include + -I. LDFLAGS += -lpthread TOOL_OBJS = \ acpidbg.o diff --git a/tools/power/acpi/tools/acpidbg/acpidbg.c b/tools/power/acpi/tools/acpidbg/acpidbg.c index a88ac45b7756..4308362d7068 100644 --- a/tools/power/acpi/tools/acpidbg/acpidbg.c +++ b/tools/power/acpi/tools/acpidbg/acpidbg.c @@ -12,10 +12,16 @@ #include /* Headers not included by include/acpi/platform/aclinux.h */ +#include +#include +#include +#include +#include #include #include #include -#include +#include +#include "../../../../../include/linux/circ_buf.h" #define ACPI_AML_FILE "/sys/kernel/debug/acpi/acpidbg" #define ACPI_AML_SEC_TICK 1 diff --git a/tools/power/acpi/tools/acpidump/Makefile b/tools/power/acpi/tools/acpidump/Makefile index 04b5db7c7c0b..f7c7af1f9258 100644 --- a/tools/power/acpi/tools/acpidump/Makefile +++ b/tools/power/acpi/tools/acpidump/Makefile @@ -19,9 +19,7 @@ vpath %.c \ ./\ ../../common\ ../../os_specific/service_layers -CFLAGS += -DACPI_DUMP_APP -I.\ - -I../../../../../drivers/acpi/acpica\ - -I../../../../../include +CFLAGS += -DACPI_DUMP_APP -I. TOOL_OBJS = \ apdump.o\ apfiles.o\ @@ -49,7 +47,9 @@ TOOL_OBJS = \ include ../../Makefile.rules -install-man: ../../man/acpidump.8 - $(INSTALL_DATA) -D $< $(DESTDIR)${mandir}/man8/acpidump.8 +install-man: $(srctree)/man/acpidump.8 + $(ECHO) " INST " acpidump.8 + $(QUIET) $(INSTALL_DATA) -D $< $(DESTDIR)$(mandir)/man8/acpidump.8 uninstall-man: - - rm -f $(DESTDIR)${mandir}/man8/acpidump.8 + $(ECHO) " UNINST " acpidump.8 + $(QUIET) rm -f $(DESTDIR)$(mandir)/man8/acpidump.8 From ea339343d64a14594d882ccb52e8619d42defe5e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Nov 2016 06:12:42 -0800 Subject: [PATCH 310/503] be2net: do not call napi_hash_del() Calling napi_hash_del() before netif_napi_del() is dangerous if a synchronize_rcu() is not enforced before NAPI struct freeing. Lets leave this detail to core networking stack and feel more comfortable. Signed-off-by: Eric Dumazet Cc: Sathya Perla Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index cece8a08edca..93aa2939142a 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2813,7 +2813,6 @@ static void be_evt_queues_destroy(struct be_adapter *adapter) if (eqo->q.created) { be_eq_clean(eqo); be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ); - napi_hash_del(&eqo->napi); netif_napi_del(&eqo->napi); free_cpumask_var(eqo->affinity_mask); } From 5f00a8d8a2c2fd99528ab1a3632f0e77f4d25202 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Nov 2016 06:19:02 -0800 Subject: [PATCH 311/503] cxgb4: do not call napi_hash_del() Calling napi_hash_del() before netif_napi_del() is dangerous if a synchronize_rcu() is not enforced before NAPI struct freeing. Lets leave this detail to core networking stack and feel more comfortable. Signed-off-by: Eric Dumazet Cc: Hariprasad S Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 1e74fd6085df..e19a0ca8e5dd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2951,7 +2951,6 @@ void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, rq->cntxt_id, fl_id, 0xffff); dma_free_coherent(adap->pdev_dev, (rq->size + 1) * rq->iqe_len, rq->desc, rq->phys_addr); - napi_hash_del(&rq->napi); netif_napi_del(&rq->napi); rq->netdev = NULL; rq->cntxt_id = rq->abs_id = 0; From 955e16026d08a601d02b961d13b6db9d6c13c8c9 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 16 Nov 2016 01:02:33 -0800 Subject: [PATCH 312/503] net/phy/vitesse: Configure RGMII skew on VSC8601, if needed With RGMII, we need a 1.5 to 2ns skew between clock and data lines. The VSC8601 can handle this internally. While the VSC8601 can set more fine-grained delays, the standard skew settings work out of the box. The same heuristic is used to determine when this skew should be enabled as in vsc824x_config_init(). Tested on custom board with AM3352 SOC and VSC801 PHY. Signed-off-by: Alexandru Gagniuc Signed-off-by: David S. Miller --- drivers/net/phy/vitesse.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 2e37eb337d48..24b4a09468dd 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -62,6 +62,10 @@ /* Vitesse Extended Page Access Register */ #define MII_VSC82X4_EXT_PAGE_ACCESS 0x1f +/* Vitesse VSC8601 Extended PHY Control Register 1 */ +#define MII_VSC8601_EPHY_CTL 0x17 +#define MII_VSC8601_EPHY_CTL_RGMII_SKEW (1 << 8) + #define PHY_ID_VSC8234 0x000fc620 #define PHY_ID_VSC8244 0x000fc6c0 #define PHY_ID_VSC8514 0x00070670 @@ -111,6 +115,34 @@ static int vsc824x_config_init(struct phy_device *phydev) return err; } +/* This adds a skew for both TX and RX clocks, so the skew should only be + * applied to "rgmii-id" interfaces. It may not work as expected + * on "rgmii-txid", "rgmii-rxid" or "rgmii" interfaces. */ +static int vsc8601_add_skew(struct phy_device *phydev) +{ + int ret; + + ret = phy_read(phydev, MII_VSC8601_EPHY_CTL); + if (ret < 0) + return ret; + + ret |= MII_VSC8601_EPHY_CTL_RGMII_SKEW; + return phy_write(phydev, MII_VSC8601_EPHY_CTL, ret); +} + +static int vsc8601_config_init(struct phy_device *phydev) +{ + int ret = 0; + + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) + ret = vsc8601_add_skew(phydev); + + if (ret < 0) + return ret; + + return genphy_config_init(phydev); +} + static int vsc824x_ack_interrupt(struct phy_device *phydev) { int err = 0; @@ -275,7 +307,7 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_init = &genphy_config_init, + .config_init = &vsc8601_config_init, .config_aneg = &genphy_config_aneg, .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, From 2a3811068fbc6bf09bb09d166b65394b091c1085 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 16 Nov 2016 23:51:19 +0000 Subject: [PATCH 313/503] ARM: Fix XIP kernels Commit 7619751f8c90 ("ARM: 8595/2: apply more __ro_after_init") caused a regression with XIP kernels by moving the __ro_after_init data into the read-only section. With XIP kernels, the read-only section is located in read-only memory from the very beginning. Work around this by moving the __ro_after_init data back into the .data section, which will be in RAM, and hence will be writable. It should be noted that in doing so, this remains writable after init. Fixes: 7619751f8c90 ("ARM: 8595/2: apply more __ro_after_init") Reported-by: Andrea Merello Tested-by: Andrea Merello [ XIP stm32 ] Tested-by: Alexandre Torgue Signed-off-by: Russell King --- arch/arm/kernel/vmlinux-xip.lds.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 7fa487ef7e2f..37b2a11af345 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -3,6 +3,9 @@ * Written by Martin Mares */ +/* No __ro_after_init data in the .rodata section - which will always be ro */ +#define RO_AFTER_INIT_DATA + #include #include #include @@ -223,6 +226,8 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_end = .; + *(.data..ro_after_init) + NOSAVE_DATA CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) READ_MOSTLY_DATA(L1_CACHE_BYTES) From 5b810a242c28e1d8d64d718cebe75b79d86a0b2d Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 27 Oct 2016 16:36:26 +0300 Subject: [PATCH 314/503] IB/uverbs: Fix leak of XRC target QPs The real QP is destroyed in case of the ref count reaches zero, but for XRC target QPs this call was missed and caused to QP leaks. Let's call to destroy for all flows. Fixes: 0e0ec7e0638e ('RDMA/core: Export ib_open_qp() to share XRC...') Signed-off-by: Tariq Toukan Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 0012fa58c105..44b1104eb168 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -262,12 +262,9 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, container_of(uobj, struct ib_uqp_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_qp_idr, uobj); - if (qp != qp->real_qp) { - ib_close_qp(qp); - } else { + if (qp == qp->real_qp) ib_uverbs_detach_umcast(qp, uqp); - ib_destroy_qp(qp); - } + ib_destroy_qp(qp); ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } From 9db0ff53cb9b43ed75bacd42a89c1a0ab048b2b0 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 27 Oct 2016 16:36:27 +0300 Subject: [PATCH 315/503] IB/cm: Mark stale CM id's whenever the mad agent was unregistered When there is a CM id object that has port assigned to it, it means that the cm-id asked for the specific port that it should go by it, but if that port was removed (hot-unplug event) the cm-id was not updated. In order to fix that the port keeps a list of all the cm-id's that are planning to go by it, whenever the port is removed it marks all of them as invalid. This commit fixes a kernel panic which happens when running traffic between guests and we force reboot a guest mid traffic, it triggers a kernel panic: Call Trace: [] ? panic+0xa7/0x16f [] ? oops_end+0xe4/0x100 [] ? no_context+0xfb/0x260 [] ? del_timer_sync+0x22/0x30 [] ? __bad_area_nosemaphore+0x125/0x1e0 [] ? process_timeout+0x0/0x10 [] ? bad_area_nosemaphore+0x13/0x20 [] ? __do_page_fault+0x31f/0x480 [] ? default_wake_function+0x0/0x20 [] ? free_msg+0x55/0x70 [mlx5_core] [] ? cmd_exec+0x124/0x840 [mlx5_core] [] ? find_busiest_group+0x244/0x9f0 [] ? do_page_fault+0x3e/0xa0 [] ? page_fault+0x25/0x30 [] ? cm_alloc_msg+0x35/0xc0 [ib_cm] [] ? ib_send_cm_dreq+0xb1/0x1e0 [ib_cm] [] ? cm_destroy_id+0x176/0x320 [ib_cm] [] ? ib_destroy_cm_id+0x10/0x20 [ib_cm] [] ? ipoib_cm_free_rx_reap_list+0xa7/0x110 [ib_ipoib] [] ? ipoib_cm_rx_reap+0x0/0x20 [ib_ipoib] [] ? ipoib_cm_rx_reap+0x15/0x20 [ib_ipoib] [] ? worker_thread+0x170/0x2a0 [] ? autoremove_wake_function+0x0/0x40 [] ? worker_thread+0x0/0x2a0 [] ? kthread+0x96/0xa0 [] ? child_rip+0xa/0x20 [] ? kthread+0x0/0xa0 [] ? child_rip+0x0/0x20 Fixes: a977049dacde ("[PATCH] IB: Add the kernel CM implementation") Signed-off-by: Mark Bloch Signed-off-by: Erez Shitrit Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 126 ++++++++++++++++++++++++++++++----- 1 file changed, 110 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c99525512b34..71c7c4c328ef 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -80,6 +80,8 @@ static struct ib_cm { __be32 random_id_operand; struct list_head timewait_list; struct workqueue_struct *wq; + /* Sync on cm change port state */ + spinlock_t state_lock; } cm; /* Counter indexes ordered by attribute ID */ @@ -161,6 +163,8 @@ struct cm_port { struct ib_mad_agent *mad_agent; struct kobject port_obj; u8 port_num; + struct list_head cm_priv_prim_list; + struct list_head cm_priv_altr_list; struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; }; @@ -241,6 +245,12 @@ struct cm_id_private { u8 service_timeout; u8 target_ack_delay; + struct list_head prim_list; + struct list_head altr_list; + /* Indicates that the send port mad is registered and av is set */ + int prim_send_port_not_ready; + int altr_send_port_not_ready; + struct list_head work_list; atomic_t work_count; }; @@ -259,20 +269,47 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, struct ib_mad_agent *mad_agent; struct ib_mad_send_buf *m; struct ib_ah *ah; + struct cm_av *av; + unsigned long flags, flags2; + int ret = 0; + /* don't let the port to be released till the agent is down */ + spin_lock_irqsave(&cm.state_lock, flags2); + spin_lock_irqsave(&cm.lock, flags); + if (!cm_id_priv->prim_send_port_not_ready) + av = &cm_id_priv->av; + else if (!cm_id_priv->altr_send_port_not_ready && + (cm_id_priv->alt_av.port)) + av = &cm_id_priv->alt_av; + else { + pr_info("%s: not valid CM id\n", __func__); + ret = -ENODEV; + spin_unlock_irqrestore(&cm.lock, flags); + goto out; + } + spin_unlock_irqrestore(&cm.lock, flags); + /* Make sure the port haven't released the mad yet */ mad_agent = cm_id_priv->av.port->mad_agent; - ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr); - if (IS_ERR(ah)) - return PTR_ERR(ah); + if (!mad_agent) { + pr_info("%s: not a valid MAD agent\n", __func__); + ret = -ENODEV; + goto out; + } + ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + goto out; + } m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, - cm_id_priv->av.pkey_index, + av->pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { ib_destroy_ah(ah); - return PTR_ERR(m); + ret = PTR_ERR(m); + goto out; } /* Timeout set by caller if response is expected. */ @@ -282,7 +319,10 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, atomic_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; *msg = m; - return 0; + +out: + spin_unlock_irqrestore(&cm.state_lock, flags2); + return ret; } static int cm_alloc_response_msg(struct cm_port *port, @@ -352,7 +392,8 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, grh, &av->ah_attr); } -static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) +static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av, + struct cm_id_private *cm_id_priv) { struct cm_device *cm_dev; struct cm_port *port = NULL; @@ -387,7 +428,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) &av->ah_attr); av->timeout = path->packet_life_time + 1; - return 0; + spin_lock_irqsave(&cm.lock, flags); + if (&cm_id_priv->av == av) + list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list); + else if (&cm_id_priv->alt_av == av) + list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list); + else + ret = -EINVAL; + + spin_unlock_irqrestore(&cm.lock, flags); + + return ret; } static int cm_alloc_id(struct cm_id_private *cm_id_priv) @@ -677,6 +728,8 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, spin_lock_init(&cm_id_priv->lock); init_completion(&cm_id_priv->comp); INIT_LIST_HEAD(&cm_id_priv->work_list); + INIT_LIST_HEAD(&cm_id_priv->prim_list); + INIT_LIST_HEAD(&cm_id_priv->altr_list); atomic_set(&cm_id_priv->work_count, -1); atomic_set(&cm_id_priv->refcount, 1); return &cm_id_priv->id; @@ -892,6 +945,15 @@ retest: break; } + spin_lock_irq(&cm.lock); + if (!list_empty(&cm_id_priv->altr_list) && + (!cm_id_priv->altr_send_port_not_ready)) + list_del(&cm_id_priv->altr_list); + if (!list_empty(&cm_id_priv->prim_list) && + (!cm_id_priv->prim_send_port_not_ready)) + list_del(&cm_id_priv->prim_list); + spin_unlock_irq(&cm.lock); + cm_free_id(cm_id->local_id); cm_deref_id(cm_id_priv); wait_for_completion(&cm_id_priv->comp); @@ -1192,12 +1254,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av); + ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, + cm_id_priv); if (ret) goto error1; if (param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, - &cm_id_priv->alt_av); + &cm_id_priv->alt_av, cm_id_priv); if (ret) goto error1; } @@ -1653,7 +1716,8 @@ static int cm_req_handler(struct cm_work *work) dev_put(gid_attr.ndev); } work->path[0].gid_type = gid_attr.gid_type; - ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); + ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, + cm_id_priv); } if (ret) { int err = ib_get_cached_gid(work->port->cm_dev->ib_device, @@ -1672,7 +1736,8 @@ static int cm_req_handler(struct cm_work *work) goto rejected; } if (req_msg->alt_local_lid) { - ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av); + ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, + cm_id_priv); if (ret) { ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, &work->path[0].sgid, @@ -2727,7 +2792,8 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av); + ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, + cm_id_priv); if (ret) goto out; cm_id_priv->alt_av.timeout = @@ -2839,7 +2905,8 @@ static int cm_lap_handler(struct cm_work *work) cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); - cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av); + cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, + cm_id_priv); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -3031,7 +3098,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - ret = cm_init_av_by_path(param->path, &cm_id_priv->av); + ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv); if (ret) goto out; @@ -3468,7 +3535,9 @@ out: static int cm_migrate(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; + struct cm_av tmp_av; unsigned long flags; + int tmp_send_port_not_ready; int ret = 0; cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -3477,7 +3546,14 @@ static int cm_migrate(struct ib_cm_id *cm_id) (cm_id->lap_state == IB_CM_LAP_UNINIT || cm_id->lap_state == IB_CM_LAP_IDLE)) { cm_id->lap_state = IB_CM_LAP_IDLE; + /* Swap address vector */ + tmp_av = cm_id_priv->av; cm_id_priv->av = cm_id_priv->alt_av; + cm_id_priv->alt_av = tmp_av; + /* Swap port send ready state */ + tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready; + cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready; + cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready; } else ret = -EINVAL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -3888,6 +3964,9 @@ static void cm_add_one(struct ib_device *ib_device) port->cm_dev = cm_dev; port->port_num = i; + INIT_LIST_HEAD(&port->cm_priv_prim_list); + INIT_LIST_HEAD(&port->cm_priv_altr_list); + ret = cm_create_port_fs(port); if (ret) goto error1; @@ -3945,6 +4024,8 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) { struct cm_device *cm_dev = client_data; struct cm_port *port; + struct cm_id_private *cm_id_priv; + struct ib_mad_agent *cur_mad_agent; struct ib_port_modify port_modify = { .clr_port_cap_mask = IB_PORT_CM_SUP }; @@ -3968,15 +4049,27 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); + /* Mark all the cm_id's as not valid */ + spin_lock_irq(&cm.lock); + list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list) + cm_id_priv->altr_send_port_not_ready = 1; + list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list) + cm_id_priv->prim_send_port_not_ready = 1; + spin_unlock_irq(&cm.lock); /* * We flush the queue here after the going_down set, this * verify that no new works will be queued in the recv handler, * after that we can call the unregister_mad_agent */ flush_workqueue(cm.wq); - ib_unregister_mad_agent(port->mad_agent); + spin_lock_irq(&cm.state_lock); + cur_mad_agent = port->mad_agent; + port->mad_agent = NULL; + spin_unlock_irq(&cm.state_lock); + ib_unregister_mad_agent(cur_mad_agent); cm_remove_port_fs(port); } + device_unregister(cm_dev->device); kfree(cm_dev); } @@ -3989,6 +4082,7 @@ static int __init ib_cm_init(void) INIT_LIST_HEAD(&cm.device_list); rwlock_init(&cm.device_lock); spin_lock_init(&cm.lock); + spin_lock_init(&cm.state_lock); cm.listen_service_table = RB_ROOT; cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); cm.remote_id_table = RB_ROOT; From aeb76df46d1158d5f7f3d30f993a1bb6ee9c67a0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 31 Oct 2016 07:50:56 +0200 Subject: [PATCH 316/503] IB/core: Set routable RoCE gid type for ipv4/ipv6 networks On Thu, Oct 27, 2016 at 04:36:28PM +0300, Leon Romanovsky wrote: > From: Mark Bloch > > If the underlying netowrk type is ipv4 or ipv6 and the device supports > routable RoCE, prefer it so the traffic could cross subnets. > > Signed-off-by: Mark Bloch > Signed-off-by: Maor Gottlieb > Signed-off-by: Leon Romanovsky > --- Hi Doug, Please take the following v1 of this patch where I fixed spelling error from "netowrk" to be "network". Thanks. >From 09f96ba3e9b4442cfb44dca04c6726e55525c9c3 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sun, 11 Sep 2016 06:25:10 +0000 Subject: [PATCH rdma-rc v1 3/6] IB/core: Set routable RoCE gid type for ipv4/ipv6 networks If the underlying network type is ipv4 or ipv6 and the device supports routable RoCE, prefer it so the traffic could cross subnets. Signed-off-by: Mark Bloch Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 36bf50ebb187..9ca0da0a37c4 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2436,6 +2436,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos) return 0; } +static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, + unsigned long supported_gids, + enum ib_gid_type default_gid) +{ + if ((network_type == RDMA_NETWORK_IPV4 || + network_type == RDMA_NETWORK_IPV6) && + test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) + return IB_GID_TYPE_ROCE_UDP_ENCAP; + + return default_gid; +} + static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) { struct rdma_route *route = &id_priv->id.route; @@ -2461,6 +2473,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->num_paths = 1; if (addr->dev_addr.bound_dev_if) { + unsigned long supported_gids; + ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); if (!ndev) { ret = -ENODEV; @@ -2484,7 +2498,12 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->path_rec->net = &init_net; route->path_rec->ifindex = ndev->ifindex; - route->path_rec->gid_type = id_priv->gid_type; + supported_gids = roce_gid_type_mask_support(id_priv->id.device, + id_priv->id.port_num); + route->path_rec->gid_type = + cma_route_gid_type(addr->dev_addr.network, + supported_gids, + id_priv->gid_type); } if (!ndev) { ret = -ENODEV; From 61c3702863be9e9f1ef12ed5a5b17bae6cdfac0b Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 27 Oct 2016 16:36:29 +0300 Subject: [PATCH 317/503] IB/core: Add missing check for addr_resolve callback return value When calling rdma_resolve_ip inside rdma_addr_find_l2_eth_by_grh, the return status of the request was ignored in the callback function causing a successful return and an empty dmac. Signed-off-by: Mark Bloch Signed-off-by: Alex Vesker Reviewed-by: Or Gerlitz Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index b136d3acc5bd..0f58f46dbad7 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -699,13 +699,16 @@ EXPORT_SYMBOL(rdma_addr_cancel); struct resolve_cb_context { struct rdma_dev_addr *addr; struct completion comp; + int status; }; static void resolve_cb(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context) { - memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct - rdma_dev_addr)); + if (!status) + memcpy(((struct resolve_cb_context *)context)->addr, + addr, sizeof(struct rdma_dev_addr)); + ((struct resolve_cb_context *)context)->status = status; complete(&((struct resolve_cb_context *)context)->comp); } @@ -743,6 +746,10 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, wait_for_completion(&ctx.comp); + ret = ctx.status; + if (ret) + return ret; + memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); if (!dev) From 3c7ba5760ab8eedec01159b267bb9bfcffe522ac Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 27 Oct 2016 16:36:31 +0300 Subject: [PATCH 318/503] IB/core: Avoid unsigned int overflow in sg_alloc_table sg_alloc_table gets unsigned int as parameter while the driver returns it as size_t. Check npages isn't greater than maximum unsigned int. Fixes: eeb8461e36c9 ("IB: Refactor umem to use linear SG table") Signed-off-by: Mark Bloch Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 224ad274ea0b..84b4eff90395 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -175,7 +175,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, cur_base = addr & PAGE_MASK; - if (npages == 0) { + if (npages == 0 || npages > UINT_MAX) { ret = -EINVAL; goto out; } From 90be7c8ab72853ff9fc407f01518a898df1f3045 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 27 Oct 2016 16:36:39 +0300 Subject: [PATCH 319/503] IB/mlx5: Fix memory leak in query device We need to free dev->port when we fail to enable RoCE or initialize node data. Fixes: 0837e86a7a34 ('IB/mlx5: Add per port counters') Signed-off-by: Majd Dibbiny Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 22174774dbb8..bb61487861cc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3115,7 +3115,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) } err = init_node_data(dev); if (err) - goto err_dealloc; + goto err_free_port; mutex_init(&dev->flow_db.lock); mutex_init(&dev->cap_mask_mutex); @@ -3125,7 +3125,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (ll == IB_LINK_LAYER_ETHERNET) { err = mlx5_enable_roce(dev); if (err) - goto err_dealloc; + goto err_free_port; } err = create_dev_resources(&dev->devr); From efd7f40082a0dfd112eb87ff2124467a5739216f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 27 Oct 2016 16:36:40 +0300 Subject: [PATCH 320/503] IB/mlx5: Validate requested RQT size Validate that the requested size of RQT is supported by firmware. Fixes: c5f9092936fe ('IB/mlx5: Add Receive Work Queue Indirection table operations') Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 41f4c2afbcdd..2be0d06b27dc 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4815,6 +4815,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, udata->inlen)) return ERR_PTR(-EOPNOTSUPP); + if (init_attr->log_ind_tbl_size > + MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) { + mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n", + init_attr->log_ind_tbl_size, + MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)); + return ERR_PTR(-EINVAL); + } + min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); if (udata->outlen && udata->outlen < min_resp_len) return ERR_PTR(-EINVAL); From 16b0e0695a73b68d8ca40288c8f9614ef208917b Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 27 Oct 2016 16:36:41 +0300 Subject: [PATCH 321/503] IB/mlx5: Use cache line size to select CQE stride When creating kernel CQs use 128B CQE stride if the cache line size is 128B, 64B otherwise. This prevents multiple CQEs from residing in a 128B cache line, which can cause retries when there are concurrent read and writes in one cache line. Tested with IPoIB on PPC64, saw ~5% throughput improvement. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Daniel Jurgens Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 79d017baf6f4..fcd04b881ec1 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -932,8 +932,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, if (err) goto err_create; } else { - /* for now choose 64 bytes till we have a proper interface */ - cqe_size = 64; + cqe_size = cache_line_size() == 128 ? 128 : 64; err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb, &index, &inlen); if (err) From 6bc1a656ab9f57f0112823b4a36930c9a29d1f89 Mon Sep 17 00:00:00 2001 From: Moshe Lazer Date: Thu, 27 Oct 2016 16:36:42 +0300 Subject: [PATCH 322/503] IB/mlx5: Resolve soft lock on massive reg MRs When calling reg_mr of large MRs (e.g. 4GB) from multiple processes and MR caches can't supply the required amount of MRs the slow-path of MR allocation may be used. In this case we need to serialize the slow-path between the processes to avoid soft lock. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Moshe Lazer Signed-off-by: Maor Gottlieb Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 ++ drivers/infiniband/hw/mlx5/mr.c | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index dcdcd195fe53..7d689903c87c 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -626,6 +626,8 @@ struct mlx5_ib_dev { struct mlx5_ib_resources devr; struct mlx5_mr_cache cache; struct timer_list delay_timer; + /* Prevents soft lock on massive reg MRs */ + struct mutex slow_path_mutex; int fill_delay; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_odp_caps odp_caps; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index d4ad672b905b..4e9012463c37 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -610,6 +610,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) int err; int i; + mutex_init(&dev->slow_path_mutex); cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); if (!cache->wq) { mlx5_ib_warn(dev, "failed to create work queue\n"); @@ -1182,9 +1183,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto error; } - if (!mr) + if (!mr) { + mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, page_shift, access_flags); + mutex_unlock(&dev->slow_path_mutex); + } if (IS_ERR(mr)) { err = PTR_ERR(mr); From dbaaff2a2caa03d472b5cc53a3fbfd415c97dc26 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 27 Oct 2016 16:36:44 +0300 Subject: [PATCH 323/503] IB/mlx5: Fix fatal error dispatching When an internal error condition is detected, make sure to set the device inactive after dispatching the event so ULPs can get a notification of this event. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Maor Gottlieb Reviewed-by: Mohamad Haj Yahia Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index bb61487861cc..a014ad38d889 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2311,14 +2311,14 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, { struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context; struct ib_event ibev; - + bool fatal = false; u8 port = 0; switch (event) { case MLX5_DEV_EVENT_SYS_ERROR: - ibdev->ib_active = false; ibev.event = IB_EVENT_DEVICE_FATAL; mlx5_ib_handle_internal_error(ibdev); + fatal = true; break; case MLX5_DEV_EVENT_PORT_UP: @@ -2370,6 +2370,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, if (ibdev->ib_active) ib_dispatch_event(&ibev); + + if (fatal) + ibdev->ib_active = false; } static void get_ext_port_caps(struct mlx5_ib_dev *dev) From a1ab8402d15d2305d2315d96ec3294bfdf16587e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 27 Oct 2016 16:36:46 +0300 Subject: [PATCH 324/503] IB/mlx5: Fix NULL pointer dereference on debug print For XRC QP CQs may not exist. Check before attempting dereference. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2be0d06b27dc..59c4c89460d1 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2052,8 +2052,8 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn, - to_mcq(init_attr->recv_cq)->mcq.cqn, - to_mcq(init_attr->send_cq)->mcq.cqn); + init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1, + init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1); qp->trans_qp.xrcdn = xrcdn; From 37995116fecfce2b61ee3da6e73b3e394c6818f9 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 10 Nov 2016 11:30:54 +0200 Subject: [PATCH 325/503] IB/mlx4: Check gid_index return value Check the returned GID index value and return an error if it is invalid. Fixes: 5070cd2239bd ('IB/mlx4: Replace mechanism for RoCE GID management') Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Reviewed-by: Yuval Shaia Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/ah.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 5fc623362731..b9bf0759f10a 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -102,7 +102,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr if (vlan_tag < 0x1000) vlan_tag |= (ah_attr->sl & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); - ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index); + ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index); + if (ret < 0) + return ERR_PTR(ret); + ah->av.eth.gid_index = ret; ah->av.eth.vlan = cpu_to_be16(vlan_tag); ah->av.eth.hop_limit = ah_attr->grh.hop_limit; if (ah_attr->static_rate) { From 593ff73bcfdc79f79a8a0df55504f75ad3e5d1a9 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 10 Nov 2016 11:30:55 +0200 Subject: [PATCH 326/503] IB/mlx4: Fix create CQ error flow Currently, if ib_copy_to_udata fails, the CQ won't be deleted from the radix tree and the HW (HW2SW). Fixes: 225c7b1feef1 ('IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters') Signed-off-by: Matan Barak Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 1ea686b9e0f9..6a0fec357dae 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -253,11 +253,14 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, if (context) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) { err = -EFAULT; - goto err_dbmap; + goto err_cq_free; } return &cq->ibcq; +err_cq_free: + mlx4_cq_free(dev->dev, &cq->mcq); + err_dbmap: if (context) mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db); From 1454ca3a97e147bb91e98b087446c39cf6692a48 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:14 +0200 Subject: [PATCH 327/503] IB/rxe: Fix kernel panic in UDP tunnel with GRO and RX checksum Missing initialization of udp_tunnel_sock_cfg causes to following kernel panic, while kernel tries to execute gro_receive(). While being there, we converted udp_port_cfg to use the same initialization scheme as udp_tunnel_sock_cfg. ------------[ cut here ]------------ kernel tried to execute NX-protected page - exploit attempt? (uid: 0) BUG: unable to handle kernel paging request at ffffffffa0588c50 IP: [] __this_module+0x50/0xffffffffffff8400 [ib_rxe] PGD 1c09067 PUD 1c0a063 PMD bb394067 PTE 80000000ad5e8163 Oops: 0011 [#1] SMP Modules linked in: ib_rxe ip6_udp_tunnel udp_tunnel CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.7.0-rc3+ #2 Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011 task: ffff880235e4e680 ti: ffff880235e68000 task.ti: ffff880235e68000 RIP: 0010:[] [] __this_module+0x50/0xffffffffffff8400 [ib_rxe] RSP: 0018:ffff880237343c80 EFLAGS: 00010282 RAX: 00000000dffe482d RBX: ffff8800ae330900 RCX: 000000002001b712 RDX: ffff8800ae330900 RSI: ffff8800ae102578 RDI: ffff880235589c00 RBP: ffff880237343cb0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ae33e262 R13: ffff880235589c00 R14: 0000000000000014 R15: ffff8800ae102578 FS: 0000000000000000(0000) GS:ffff880237340000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa0588c50 CR3: 0000000001c06000 CR4: 00000000000006e0 Stack: ffffffff8160860e ffff8800ae330900 ffff8800ae102578 0000000000000014 000000000000004e ffff8800ae102578 ffff880237343ce0 ffffffff816088fb 0000000000000000 ffff8800ae330900 0000000000000000 00000000ffad0000 Call Trace: [] ? udp_gro_receive+0xde/0x130 [] udp4_gro_receive+0x10b/0x2d0 [] inet_gro_receive+0x1d3/0x270 [] dev_gro_receive+0x269/0x3b0 [] napi_gro_receive+0x38/0x120 [] mlx5e_handle_rx_cqe+0x27e/0x340 [mlx5_core] [] mlx5e_poll_rx_cq+0x66/0x6d0 [mlx5_core] [] mlx5e_napi_poll+0x8e/0x400 [mlx5_core] [] net_rx_action+0x160/0x380 [] __do_softirq+0xd7/0x2c5 [] irq_exit+0xf5/0x100 [] do_IRQ+0x56/0xd0 [] common_interrupt+0x8c/0x8c [] ? native_safe_halt+0x6/0x10 [] default_idle+0x1e/0xd0 [] arch_cpu_idle+0xf/0x20 [] default_idle_call+0x3c/0x50 [] cpu_startup_entry+0x323/0x3c0 [] start_secondary+0x15c/0x1a0 RIP [] __this_module+0x50/0xffffffffffff8400 [ib_rxe] RSP CR2: ffffffffa0588c50 ---[ end trace 489ee31fa7614ac5 ]--- Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception in interrupt ------------[ cut here ]------------ Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index b8258e4f0aea..ffff5a54cb34 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -243,10 +243,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, { int err; struct socket *sock; - struct udp_port_cfg udp_cfg; - struct udp_tunnel_sock_cfg tnl_cfg; - - memset(&udp_cfg, 0, sizeof(udp_cfg)); + struct udp_port_cfg udp_cfg = {0}; + struct udp_tunnel_sock_cfg tnl_cfg = {0}; if (ipv6) { udp_cfg.family = AF_INET6; @@ -264,10 +262,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, return ERR_PTR(err); } - tnl_cfg.sk_user_data = NULL; tnl_cfg.encap_type = 1; tnl_cfg.encap_rcv = rxe_udp_encap_recv; - tnl_cfg.encap_destroy = NULL; /* Setup UDP tunnel */ setup_udp_tunnel_sock(net, sock, &tnl_cfg); From 002e062e13db10973adb8302f231e48b477c7ccf Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:15 +0200 Subject: [PATCH 328/503] IB/rxe: Fix handling of erroneous WR To correctly handle a erroneous WR this fix does the following 1. Make sure the bad WQE causes a user completion event. 2. Call rxe_completer to handle the erred WQE. Before the fix, when rxe_requester found a bad WQE, it changed its status to IB_WC_LOC_PROT_ERR and exit with 0 for non RC QPs. If this was the 1st WQE then there would be no ACK to invoke the completer and this bad WQE would be stuck in the QP's send-q. On top of that the requester exiting with 0 caused rxe_do_task to endlessly invoke rxe_requester, resulting in a soft-lockup attached below. In case the WQE was not the 1st and rxe_completer did get a chance to handle the bad WQE, it did not cause a complete event since the WQE's IB_SEND_SIGNALED flag was not set. Setting WQE status to IB_SEND_SIGNALED is subject to IBA spec version 1.2.1, section 10.7.3.1 Signaled Completions. NMI watchdog: BUG: soft lockup - CPU#7 stuck for 22s! [] ? rxe_pool_get_index+0x35/0xb0 [rdma_rxe] [] lookup_mem+0x3c/0xc0 [rdma_rxe] [] copy_data+0x1c4/0x230 [rdma_rxe] [] rxe_requester+0x9d0/0x1100 [rdma_rxe] [] ? kfree_skbmem+0x5a/0x60 [] rxe_do_task+0x89/0xf0 [rdma_rxe] [] rxe_run_task+0x12/0x30 [rdma_rxe] [] rxe_post_send+0x41a/0x550 [rdma_rxe] [] ? __kmalloc+0x182/0x200 [] ? down_read+0x12/0x40 [] ib_uverbs_post_send+0x532/0x540 [ib_uverbs] [] ? tcp_sendmsg+0x402/0xb80 [] ib_uverbs_write+0x18c/0x3f0 [ib_uverbs] [] ? inet_recvmsg+0x7e/0xb0 [] ? sock_recvmsg+0x3d/0x50 [] __vfs_write+0x37/0x140 [] vfs_write+0xb2/0x1b0 [] SyS_write+0x55/0xc0 [] entry_SYSCALL_64_fastpath+0x1a/0xa Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_req.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 832846b73ea0..22bd9630dcd9 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -696,7 +696,8 @@ next_wqe: qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - goto complete; + __rxe_do_task(&qp->comp.task); + return 0; } payload = mtu; } @@ -745,13 +746,17 @@ err: wqe->status = IB_WC_LOC_PROT_ERR; wqe->state = wqe_state_error; -complete: - if (qp_type(qp) != IB_QPT_RC) { - while (rxe_completer(qp) == 0) - ; - } - - return 0; + /* + * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS + * ---------8<---------8<------------- + * ...Note that if a completion error occurs, a Work Completion + * will always be generated, even if the signaling + * indicator requests an Unsignaled Completion. + * ---------8<---------8<------------- + */ + wqe->wr.send_flags |= IB_SEND_SIGNALED; + __rxe_do_task(&qp->comp.task); + return -EAGAIN; exit: return -EAGAIN; From aa75b07b478a774b1432e2df1be5cd8ae834de0f Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:17 +0200 Subject: [PATCH 329/503] IB/rxe: Clear queue buffer when modifying QP to reset RXE resets the send-q only once in rxe_qp_init_req() when QP is created, but when the QP is reused after QP reset, the send-q holds previous garbage data. This garbage data wrongly fails CQEs that otherwise should have completed successfully. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_qp.c | 1 + drivers/infiniband/sw/rxe/rxe_queue.c | 9 +++++++++ drivers/infiniband/sw/rxe/rxe_queue.h | 2 ++ 3 files changed, 12 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index b8036cfbce04..95aaaa282a04 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -522,6 +522,7 @@ static void rxe_qp_reset(struct rxe_qp *qp) if (qp->sq.queue) { __rxe_do_task(&qp->comp.task); __rxe_do_task(&qp->req.task); + rxe_queue_reset(qp->sq.queue); } /* cleanup attributes */ diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index 08274254eb88..d14bf496d62d 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -84,6 +84,15 @@ err1: return -EINVAL; } +inline void rxe_queue_reset(struct rxe_queue *q) +{ + /* queue is comprised from header and the memory + * of the actual queue. See "struct rxe_queue_buf" in rxe_queue.h + * reset only the queue itself and not the management header + */ + memset(q->buf->data, 0, q->buf_size - sizeof(struct rxe_queue_buf)); +} + struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, unsigned int elem_size) diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 239fd609c31e..8c8641c87817 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -84,6 +84,8 @@ int do_mmap_info(struct rxe_dev *rxe, size_t buf_size, struct rxe_mmap_info **ip_p); +void rxe_queue_reset(struct rxe_queue *q); + struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, unsigned int elem_size); From 6d931308f55faaef3f30bd0346c47f99528b229d Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:18 +0200 Subject: [PATCH 330/503] IB/rxe: Update qp state for user query The method rxe_qp_error() transitions QP to error state and make sure the QP is drained. It did not though update the QP state for user's query. This patch fixes this. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_qp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 95aaaa282a04..c3e60e4bde6e 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -574,6 +574,7 @@ void rxe_qp_error(struct rxe_qp *qp) { qp->req.state = QP_STATE_ERROR; qp->resp.state = QP_STATE_ERROR; + qp->attr.qp_state = IB_QPS_ERR; /* drain work and packet queues */ rxe_run_task(&qp->resp.task, 1); From 4ff522ea47944ffd3d4d27023ace8bc6a722c834 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 18 Oct 2016 14:04:39 -0700 Subject: [PATCH 331/503] iw_cxgb4: set *bad_wr for post_send/post_recv errors There are a few cases in c4iw_post_send() and c4iw_post_receive() where *bad_wr is not set when an error is returned. This can cause a crash if the application tries to use bad_wr. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/qp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index f57deba6717c..5790e1dbd618 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -797,11 +797,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -EINVAL; } num_wrs = t4_sq_avail(&qhp->wq); if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -ENOMEM; } while (wr) { @@ -934,11 +936,13 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -EINVAL; } num_wrs = t4_rq_avail(&qhp->wq); if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -ENOMEM; } while (wr) { From 5c6b2aaf9316fd0983c0c999d920306ddc65bd2d Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 3 Nov 2016 12:09:38 -0700 Subject: [PATCH 332/503] iw_cxgb4: invalidate the mr when posting a read_w_inv wr Also, rearrange things a bit to have a common c4iw_invalidate_mr() function used everywhere that we need to invalidate. Fixes: 49b53a93a64a ("iw_cxgb4: add fast-path for small REG_MR operations") Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cq.c | 17 +++-------------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +- drivers/infiniband/hw/cxgb4/mem.c | 12 ++++++++++++ drivers/infiniband/hw/cxgb4/qp.c | 16 ++++++++-------- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 867b8cf82be8..19c6477af19f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -666,18 +666,6 @@ skip_cqe: return ret; } -static void invalidate_mr(struct c4iw_dev *rhp, u32 rkey) -{ - struct c4iw_mr *mhp; - unsigned long flags; - - spin_lock_irqsave(&rhp->lock, flags); - mhp = get_mhp(rhp, rkey >> 8); - if (mhp) - mhp->attr.state = 0; - spin_unlock_irqrestore(&rhp->lock, flags); -} - /* * Get one cq entry from c4iw and map it to openib. * @@ -733,7 +721,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); wc->wc_flags |= IB_WC_WITH_INVALIDATE; - invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); + c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); } } else { switch (CQE_OPCODE(&cqe)) { @@ -762,7 +750,8 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) /* Invalidate the MR if the fastreg failed */ if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) - invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe)); + c4iw_invalidate_mr(qhp->rhp, + CQE_WRID_FR_STAG(&cqe)); break; default: printk(KERN_ERR MOD "Unexpected opcode %d " diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 7e7f79e55006..4788e1a46fde 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -999,6 +999,6 @@ extern int db_coalescing_threshold; extern int use_dsgl; void c4iw_drain_rq(struct ib_qp *qp); void c4iw_drain_sq(struct ib_qp *qp); - +void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); #endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 80e27749420a..410408f886c1 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -770,3 +770,15 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) kfree(mhp); return 0; } + +void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey) +{ + struct c4iw_mr *mhp; + unsigned long flags; + + spin_lock_irqsave(&rhp->lock, flags); + mhp = get_mhp(rhp, rkey >> 8); + if (mhp) + mhp->attr.state = 0; + spin_unlock_irqrestore(&rhp->lock, flags); +} diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 5790e1dbd618..b7ac97b27c88 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -706,12 +706,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) +static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { - struct c4iw_mr *mhp = get_mhp(dev, wr->ex.invalidate_rkey >> 8); - - mhp->attr.state = 0; wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); wqe->inv.r2 = 0; *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16); @@ -842,10 +838,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ_WITH_INV: fw_opcode = FW_RI_RDMA_READ_WR; swsqe->opcode = FW_RI_READ_REQ; - if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) + if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) { + c4iw_invalidate_mr(qhp->rhp, + wr->sg_list[0].lkey); fw_flags = FW_RI_RDMA_READ_INVALIDATE; - else + } else { fw_flags = 0; + } err = build_rdma_read(wqe, wr, &len16); if (err) break; @@ -878,7 +877,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_flags |= FW_RI_LOCAL_FENCE_FLAG; fw_opcode = FW_RI_INV_LSTAG_WR; swsqe->opcode = FW_RI_LOCAL_INV; - err = build_inv_stag(qhp->rhp, wqe, wr, &len16); + err = build_inv_stag(wqe, wr, &len16); + c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey); break; default: PDBG("%s post of type=%d TBD!\n", __func__, From e47112d9d6009bf6b7438cedc0270316d6b0370d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 15 Nov 2016 15:58:15 -0800 Subject: [PATCH 333/503] net: dsa: b53: Fix VLAN usage and how we treat CPU port We currently have a fundamental problem in how we treat the CPU port and its VLAN membership. As soon as a second VLAN is configured to be untagged, the CPU automatically becomes untagged for that VLAN as well, and yet, we don't gracefully make sure that the CPU becomes tagged in the other VLANs it could be a member of. This results in only one VLAN being effectively usable from the CPU's perspective. Instead of having some pretty complex logic which tries to maintain the CPU port's default VLAN and its untagged properties, just do something very simple which consists in neither altering the CPU port's PVID settings, nor its untagged settings: - whenever a VLAN is added, the CPU is automatically a member of this VLAN group, as a tagged member - PVID settings for downstream ports do not alter the CPU port's PVID since it now is part of all VLANs in the system This means that a typical example where e.g: LAN ports are in VLAN1, and WAN port is in VLAN2, now require having two VLAN interfaces for the host to properly terminate and send traffic from/to. Fixes: Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Reported-by: Hartmut Knaack Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 7717b19dc806..947adda3397d 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -962,9 +962,10 @@ static void b53_vlan_add(struct dsa_switch *ds, int port, vl->members |= BIT(port) | BIT(cpu_port); if (untagged) - vl->untag |= BIT(port) | BIT(cpu_port); + vl->untag |= BIT(port); else - vl->untag &= ~(BIT(port) | BIT(cpu_port)); + vl->untag &= ~BIT(port); + vl->untag &= ~BIT(cpu_port); b53_set_vlan_entry(dev, vid, vl); b53_fast_age_vlan(dev, vid); @@ -973,8 +974,6 @@ static void b53_vlan_add(struct dsa_switch *ds, int port, if (pvid) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), vlan->vid_end); - b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port), - vlan->vid_end); b53_fast_age_vlan(dev, vid); } } @@ -984,7 +983,6 @@ static int b53_vlan_del(struct dsa_switch *ds, int port, { struct b53_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - unsigned int cpu_port = dev->cpu_port; struct b53_vlan *vl; u16 vid; u16 pvid; @@ -997,8 +995,6 @@ static int b53_vlan_del(struct dsa_switch *ds, int port, b53_get_vlan_entry(dev, vid, vl); vl->members &= ~BIT(port); - if ((vl->members & BIT(cpu_port)) == BIT(cpu_port)) - vl->members = 0; if (pvid == vid) { if (is5325(dev) || is5365(dev)) @@ -1007,18 +1003,14 @@ static int b53_vlan_del(struct dsa_switch *ds, int port, pvid = 0; } - if (untagged) { + if (untagged) vl->untag &= ~(BIT(port)); - if ((vl->untag & BIT(cpu_port)) == BIT(cpu_port)) - vl->untag = 0; - } b53_set_vlan_entry(dev, vid, vl); b53_fast_age_vlan(dev, vid); } b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), pvid); - b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port), pvid); b53_fast_age_vlan(dev, pvid); return 0; From e5f6f564fd191d365fcd775c06a732a488205588 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Nov 2016 06:31:52 -0800 Subject: [PATCH 334/503] bnxt: add a missing rcu synchronization Add a missing synchronize_net() call to avoid potential use after free, since we explicitly call napi_hash_del() to factorize the RCU grace period. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Eric Dumazet Cc: Michael Chan Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c6909660e097..e18635b2a002 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4934,6 +4934,10 @@ static void bnxt_del_napi(struct bnxt *bp) napi_hash_del(&bnapi->napi); netif_napi_del(&bnapi->napi); } + /* We called napi_hash_del() before netif_napi_del(), we need + * to respect an RCU grace period before freeing napi structures. + */ + synchronize_net(); } static void bnxt_init_napi(struct bnxt *bp) From 4a59015372840a6fc35d7fd40638a9d5dc3ec958 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 13 Nov 2016 21:23:34 +0100 Subject: [PATCH 335/503] xattr: Fix setting security xattrs on sockfs The IOP_XATTR flag is set on sockfs because sockfs supports getting the "system.sockprotoname" xattr. Since commit 6c6ef9f2, this flag is checked for setxattr support as well. This is wrong on sockfs because security xattr support there is supposed to be provided by security_inode_setsecurity. The smack security module relies on socket labels (xattrs). Fix this by adding a security xattr handler on sockfs that returns -EAGAIN, and by checking for -EAGAIN in setxattr. We cannot simply check for -EOPNOTSUPP in setxattr because there are filesystems that neither have direct security xattr support nor support via security_inode_setsecurity. A more proper fix might be to move the call to security_inode_setsecurity into sockfs, but it's not clear to me if that is safe: we would end up calling security_inode_post_setxattr after that as well. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- fs/xattr.c | 22 ++++++++++++++-------- net/socket.c | 15 +++++++++++++++ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index 3368659c471e..2d13b4e62fae 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -170,7 +170,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; - int error = -EOPNOTSUPP; + int error = -EAGAIN; int issec = !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); @@ -183,15 +183,21 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, security_inode_post_setxattr(dentry, name, value, size, flags); } - } else if (issec) { - const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; - + } else { if (unlikely(is_bad_inode(inode))) return -EIO; - error = security_inode_setsecurity(inode, suffix, value, - size, flags); - if (!error) - fsnotify_xattr(dentry); + } + if (error == -EAGAIN) { + error = -EOPNOTSUPP; + + if (issec) { + const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; + + error = security_inode_setsecurity(inode, suffix, value, + size, flags); + if (!error) + fsnotify_xattr(dentry); + } } return error; diff --git a/net/socket.c b/net/socket.c index 272518b087c8..73dc69f9681e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -341,8 +341,23 @@ static const struct xattr_handler sockfs_xattr_handler = { .get = sockfs_xattr_get, }; +static int sockfs_security_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *suffix, const void *value, + size_t size, int flags) +{ + /* Handled by LSM. */ + return -EAGAIN; +} + +static const struct xattr_handler sockfs_security_xattr_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .set = sockfs_security_xattr_set, +}; + static const struct xattr_handler *sockfs_xattr_handlers[] = { &sockfs_xattr_handler, + &sockfs_security_xattr_handler, NULL }; From 680bb946a1ae04fe0ff369a4965f76b48c07dc54 Mon Sep 17 00:00:00 2001 From: Abhi Das Date: Wed, 16 Nov 2016 21:44:23 -0600 Subject: [PATCH 336/503] fix iov_iter_advance() for ITER_PIPE iov_iter_advance() needs to decrement iter->count by the number of bytes we'd moved beyond. Normal flavours do that, but ITER_PIPE doesn't and ITER_PIPE generic_file_read_iter() for O_DIRECT files ends up with a bogus fallback to page cache read, resulting in incorrect values for file offset and bytes read. Signed-off-by: Abhi Das Signed-off-by: Al Viro --- lib/iov_iter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f0c7f1481bae..f2bd21b93dfc 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -683,10 +683,11 @@ static void pipe_advance(struct iov_iter *i, size_t size) struct pipe_inode_info *pipe = i->pipe; struct pipe_buffer *buf; int idx = i->idx; - size_t off = i->iov_offset; + size_t off = i->iov_offset, orig_sz; if (unlikely(i->count < size)) size = i->count; + orig_sz = size; if (size) { if (off) /* make it relative to the beginning of buffer */ @@ -713,6 +714,7 @@ static void pipe_advance(struct iov_iter *i, size_t size) pipe->nrbufs--; } } + i->count -= orig_sz; } void iov_iter_advance(struct iov_iter *i, size_t size) From 553bbc11aa6c1f9e0f529a06aeeca15fbe4a3985 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 16 Nov 2016 15:17:09 +0100 Subject: [PATCH 337/503] x86/boot: Avoid warning for zero-filling .bss The latest binutils are warning about a .fill directive with an explicit value in a .bss section: arch/x86/kernel/head_32.S: Assembler messages: arch/x86/kernel/head_32.S:677: Warning: ignoring fill value in section `.bss..page_aligned' arch/x86/kernel/head_32.S:679: Warning: ignoring fill value in section `.bss..page_aligned' This comes from the 'ENTRY()' macro padding the space between the symbols with 'nop' via: .align 4,0x90 Open-coding the .globl directive without the padding avoids that warning, as all the symbols are already page aligned. Signed-off-by: Arnd Bergmann Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161116141726.2013389-1-arnd@arndb.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b6b2f0264af3..2dabea46f039 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -665,14 +665,17 @@ __PAGE_ALIGNED_BSS initial_pg_pmd: .fill 1024*KPMDS,4,0 #else -ENTRY(initial_page_table) +.globl initial_page_table +initial_page_table: .fill 1024,4,0 #endif initial_pg_fixmap: .fill 1024,4,0 -ENTRY(empty_zero_page) +.globl empty_zero_page +empty_zero_page: .fill 4096,1,0 -ENTRY(swapper_pg_dir) +.globl swapper_pg_dir +swapper_pg_dir: .fill 1024,4,0 EXPORT_SYMBOL(empty_zero_page) From d5afc1b68a6ddc27746d31f775025afe75ec8122 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 16 Nov 2016 10:24:15 -0800 Subject: [PATCH 338/503] dmaengine: cppi41: More PM runtime fixes Fix use of u32 instead of int for checking for negative errors values as pointed out by Dan Carpenter . And while testing the PM runtime error path by randomly returning failed values in runtime resume, I noticed two more places that need fixing: - If pm_runtime_get_sync() fails in probe, we still need to do pm_runtime_put_sync() to keep the use count happy. We could call pm_runtime_put_noidle() on the error path, but we're just going to call pm_runtime_disable() after that so pm_runtime_put_sync() will do what we want - We should print an error if pm_runtime_get_sync() fails in cppi41_dma_alloc_chan_resources() so we know where it happens Reported-by: Dan Carpenter Fixes: 740b4be3f742 ("dmaengine: cpp41: Fix handling of error path") Signed-off-by: Tony Lindgren Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index 4b52126c13cf..d5ba43a87a68 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -317,11 +317,12 @@ static irqreturn_t cppi41_irq(int irq, void *data) while (val) { u32 desc, len; + int error; - status = pm_runtime_get(cdd->ddev.dev); - if (status < 0) + error = pm_runtime_get(cdd->ddev.dev); + if (error < 0) dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", - __func__, status); + __func__, error); q_num = __fls(val); val &= ~(1 << q_num); @@ -367,6 +368,8 @@ static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) error = pm_runtime_get_sync(cdd->ddev.dev); if (error < 0) { + dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", + __func__, error); pm_runtime_put_noidle(cdd->ddev.dev); return error; @@ -1072,8 +1075,8 @@ err_chans: deinit_cppi41(dev, cdd); err_init_cppi: pm_runtime_dont_use_autosuspend(dev); - pm_runtime_put_sync(dev); err_get_sync: + pm_runtime_put_sync(dev); pm_runtime_disable(dev); iounmap(cdd->usbss_mem); iounmap(cdd->ctrl_mem); From a5a40d4624cd2328c69768f6eb41716fc249d7be Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Oct 2016 23:29:10 +0200 Subject: [PATCH 339/503] crypto: caam - fix type mismatch warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building the caam driver on arm64 produces a harmless warning: drivers/crypto/caam/caamalg.c:140:139: warning: comparison of distinct pointer types lacks a cast We can use min_t to tell the compiler which type we want it to use here. Fixes: 5ecf8ef9103c ("crypto: caam - fix sg dump") Signed-off-by: Arnd Bergmann Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index f5a63ba97023..954a64c7757b 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -137,7 +137,7 @@ static void dbg_dump_sg(const char *level, const char *prefix_str, } buf = it_page + it->offset; - len = min(tlen, it->length); + len = min_t(size_t, tlen, it->length); print_hex_dump(level, prefix_str, prefix_type, rowsize, groupsize, buf, len, ascii); tlen -= len; From c723bd6ec2b50e7c8b3424d9cb8febd8ffa3da1f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 16 Nov 2016 13:21:22 -0600 Subject: [PATCH 340/503] usb: musb: Fix broken use of static variable for multiple instances We can't use static variable first for checking when musb is initialized when we have multiple musb instances like on am335x. Tested-by: Ladislav Michl Reviewed-by: Johan Hovold Tested-by: Laurent Pinchart Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 9 +++++---- drivers/usb/musb/musb_core.h | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index e01116e4c067..f1ea4494dcb2 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2291,6 +2291,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) if (status) goto fail5; + musb->is_initialized = 1; pm_runtime_mark_last_busy(musb->controller); pm_runtime_put_autosuspend(musb->controller); @@ -2629,7 +2630,6 @@ static int musb_runtime_suspend(struct device *dev) static int musb_runtime_resume(struct device *dev) { struct musb *musb = dev_to_musb(dev); - static int first = 1; /* * When pm_runtime_get_sync called for the first time in driver @@ -2640,9 +2640,10 @@ static int musb_runtime_resume(struct device *dev) * Also context restore without save does not make * any sense */ - if (!first) - musb_restore_context(musb); - first = 0; + if (!musb->is_initialized) + return 0; + + musb_restore_context(musb); if (musb->need_finish_resume) { musb->need_finish_resume = 0; diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index 2cb88a498f8a..c04abf424c5c 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -385,6 +385,8 @@ struct musb { int a_wait_bcon; /* VBUS timeout in msecs */ unsigned long idle_timeout; /* Next timeout in jiffies */ + unsigned is_initialized:1; + /* active means connected and not suspended */ unsigned is_active:1; From ea2f35c01d5ea72b43b9b4fb4c5b9417a9eb2fb8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 16 Nov 2016 13:21:23 -0600 Subject: [PATCH 341/503] usb: musb: Fix sleeping function called from invalid context for hdrc glue Commit 65b3f50ed6fa ("usb: musb: Add PM runtime support for MUSB DSPS glue layer") wrongly added a call for pm_runtime_get_sync to otg_timer that runs in softirq context. That causes a "BUG: sleeping function called from invalid context" every time when polling the cable status: [] (__might_sleep) from [] (__pm_runtime_resume+0x9c/0xa0) [] (__pm_runtime_resume) from [] (otg_timer+0x3c/0x254) [] (otg_timer) from [] (call_timer_fn+0xfc/0x41c) [] (call_timer_fn) from [] (expire_timers+0x120/0x210) [] (expire_timers) from [] (run_timer_softirq+0xa4/0xdc) [] (run_timer_softirq) from [] (__do_softirq+0x12c/0x594) I did not notice that as I did not have CONFIG_DEBUG_ATOMIC_SLEEP enabled. And looks like also musb_gadget_queue() suffers from the same problem. Let's fix the issue by using a list of delayed work then call it on resume. Note that we want to do this only when musb core and it's parent devices are awake, and we need to make sure the DSPS glue timer is stopped as noted by Johan Hovold . Note that we already are re-enabling the timer with mod_timer() in dsps_musb_enable(). Later on we may be able to remove other delayed work in the musb driver and just do it from pending_resume_work. But this should be done only for delayed work that does not have other timing requirements beyond just being run on resume. Fixes: 65b3f50ed6fa ("usb: musb: Add PM runtime support for MUSB DSPS glue layer") Reported-by: Johan Hovold Reviewed-by: Johan Hovold Tested-by: Laurent Pinchart Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 109 +++++++++++++++++++++++++++++++-- drivers/usb/musb/musb_core.h | 7 +++ drivers/usb/musb/musb_dsps.c | 36 ++++++++--- drivers/usb/musb/musb_gadget.c | 33 ++++++++-- 4 files changed, 167 insertions(+), 18 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index f1ea4494dcb2..384de6cd26f5 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1969,6 +1969,7 @@ static struct musb *allocate_instance(struct device *dev, INIT_LIST_HEAD(&musb->control); INIT_LIST_HEAD(&musb->in_bulk); INIT_LIST_HEAD(&musb->out_bulk); + INIT_LIST_HEAD(&musb->pending_list); musb->vbuserr_retry = VBUSERR_RETRY_COUNT; musb->a_wait_bcon = OTG_TIME_A_WAIT_BCON; @@ -2018,6 +2019,84 @@ static void musb_free(struct musb *musb) musb_host_free(musb); } +struct musb_pending_work { + int (*callback)(struct musb *musb, void *data); + void *data; + struct list_head node; +}; + +/* + * Called from musb_runtime_resume(), musb_resume(), and + * musb_queue_resume_work(). Callers must take musb->lock. + */ +static int musb_run_resume_work(struct musb *musb) +{ + struct musb_pending_work *w, *_w; + unsigned long flags; + int error = 0; + + spin_lock_irqsave(&musb->list_lock, flags); + list_for_each_entry_safe(w, _w, &musb->pending_list, node) { + if (w->callback) { + error = w->callback(musb, w->data); + if (error < 0) { + dev_err(musb->controller, + "resume callback %p failed: %i\n", + w->callback, error); + } + } + list_del(&w->node); + devm_kfree(musb->controller, w); + } + spin_unlock_irqrestore(&musb->list_lock, flags); + + return error; +} + +/* + * Called to run work if device is active or else queue the work to happen + * on resume. Caller must take musb->lock and must hold an RPM reference. + * + * Note that we cowardly refuse queuing work after musb PM runtime + * resume is done calling musb_run_resume_work() and return -EINPROGRESS + * instead. + */ +int musb_queue_resume_work(struct musb *musb, + int (*callback)(struct musb *musb, void *data), + void *data) +{ + struct musb_pending_work *w; + unsigned long flags; + int error; + + if (WARN_ON(!callback)) + return -EINVAL; + + if (pm_runtime_active(musb->controller)) + return callback(musb, data); + + w = devm_kzalloc(musb->controller, sizeof(*w), GFP_ATOMIC); + if (!w) + return -ENOMEM; + + w->callback = callback; + w->data = data; + spin_lock_irqsave(&musb->list_lock, flags); + if (musb->is_runtime_suspended) { + list_add_tail(&w->node, &musb->pending_list); + error = 0; + } else { + dev_err(musb->controller, "could not add resume work %p\n", + callback); + devm_kfree(musb->controller, w); + error = -EINPROGRESS; + } + spin_unlock_irqrestore(&musb->list_lock, flags); + + return error; +} +EXPORT_SYMBOL_GPL(musb_queue_resume_work); + static void musb_deassert_reset(struct work_struct *work) { struct musb *musb; @@ -2065,6 +2144,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) } spin_lock_init(&musb->lock); + spin_lock_init(&musb->list_lock); musb->board_set_power = plat->set_power; musb->min_power = plat->min_power; musb->ops = plat->platform_ops; @@ -2558,6 +2638,7 @@ static int musb_suspend(struct device *dev) musb_platform_disable(musb); musb_generic_disable(musb); + WARN_ON(!list_empty(&musb->pending_list)); spin_lock_irqsave(&musb->lock, flags); @@ -2579,9 +2660,11 @@ static int musb_suspend(struct device *dev) static int musb_resume(struct device *dev) { - struct musb *musb = dev_to_musb(dev); - u8 devctl; - u8 mask; + struct musb *musb = dev_to_musb(dev); + unsigned long flags; + int error; + u8 devctl; + u8 mask; /* * For static cmos like DaVinci, register values were preserved @@ -2615,6 +2698,13 @@ static int musb_resume(struct device *dev) musb_start(musb); + spin_lock_irqsave(&musb->lock, flags); + error = musb_run_resume_work(musb); + if (error) + dev_err(musb->controller, "resume work failed with %i\n", + error); + spin_unlock_irqrestore(&musb->lock, flags); + return 0; } @@ -2623,13 +2713,16 @@ static int musb_runtime_suspend(struct device *dev) struct musb *musb = dev_to_musb(dev); musb_save_context(musb); + musb->is_runtime_suspended = 1; return 0; } static int musb_runtime_resume(struct device *dev) { - struct musb *musb = dev_to_musb(dev); + struct musb *musb = dev_to_musb(dev); + unsigned long flags; + int error; /* * When pm_runtime_get_sync called for the first time in driver @@ -2651,6 +2744,14 @@ static int musb_runtime_resume(struct device *dev) msecs_to_jiffies(USB_RESUME_TIMEOUT)); } + spin_lock_irqsave(&musb->lock, flags); + error = musb_run_resume_work(musb); + if (error) + dev_err(musb->controller, "resume work failed with %i\n", + error); + musb->is_runtime_suspended = 0; + spin_unlock_irqrestore(&musb->lock, flags); + return 0; } diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index c04abf424c5c..15b1f93c7037 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -303,6 +303,7 @@ struct musb_context_registers { struct musb { /* device lock */ spinlock_t lock; + spinlock_t list_lock; /* resume work list lock */ struct musb_io io; const struct musb_platform_ops *ops; @@ -337,6 +338,7 @@ struct musb { struct list_head control; /* of musb_qh */ struct list_head in_bulk; /* of musb_qh */ struct list_head out_bulk; /* of musb_qh */ + struct list_head pending_list; /* pending work list */ struct timer_list otg_timer; struct notifier_block nb; @@ -386,6 +388,7 @@ struct musb { unsigned long idle_timeout; /* Next timeout in jiffies */ unsigned is_initialized:1; + unsigned is_runtime_suspended:1; /* active means connected and not suspended */ unsigned is_active:1; @@ -542,6 +545,10 @@ extern irqreturn_t musb_interrupt(struct musb *); extern void musb_hnp_stop(struct musb *musb); +int musb_queue_resume_work(struct musb *musb, + int (*callback)(struct musb *musb, void *data), + void *data); + static inline void musb_platform_set_vbus(struct musb *musb, int is_on) { if (musb->ops->set_vbus) diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index 0f17d2140db6..6096c84ab67a 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -185,24 +185,19 @@ static void dsps_musb_disable(struct musb *musb) musb_writel(reg_base, wrp->coreintr_clear, wrp->usb_bitmap); musb_writel(reg_base, wrp->epintr_clear, wrp->txep_bitmap | wrp->rxep_bitmap); + del_timer_sync(&glue->timer); musb_writeb(musb->mregs, MUSB_DEVCTL, 0); } -static void otg_timer(unsigned long _musb) +/* Caller must take musb->lock */ +static int dsps_check_status(struct musb *musb, void *unused) { - struct musb *musb = (void *)_musb; void __iomem *mregs = musb->mregs; struct device *dev = musb->controller; struct dsps_glue *glue = dev_get_drvdata(dev->parent); const struct dsps_musb_wrapper *wrp = glue->wrp; u8 devctl; - unsigned long flags; int skip_session = 0; - int err; - - err = pm_runtime_get_sync(dev); - if (err < 0) - dev_err(dev, "Poll could not pm_runtime_get: %i\n", err); /* * We poll because DSPS IP's won't expose several OTG-critical @@ -212,7 +207,6 @@ static void otg_timer(unsigned long _musb) dev_dbg(musb->controller, "Poll devctl %02x (%s)\n", devctl, usb_otg_state_string(musb->xceiv->otg->state)); - spin_lock_irqsave(&musb->lock, flags); switch (musb->xceiv->otg->state) { case OTG_STATE_A_WAIT_VRISE: mod_timer(&glue->timer, jiffies + @@ -245,8 +239,30 @@ static void otg_timer(unsigned long _musb) default: break; } - spin_unlock_irqrestore(&musb->lock, flags); + return 0; +} + +static void otg_timer(unsigned long _musb) +{ + struct musb *musb = (void *)_musb; + struct device *dev = musb->controller; + unsigned long flags; + int err; + + err = pm_runtime_get(dev); + if ((err != -EINPROGRESS) && err < 0) { + dev_err(dev, "Poll could not pm_runtime_get: %i\n", err); + pm_runtime_put_noidle(dev); + + return; + } + + spin_lock_irqsave(&musb->lock, flags); + err = musb_queue_resume_work(musb, dsps_check_status, NULL); + if (err < 0) + dev_err(dev, "%s resume work: %i\n", __func__, err); + spin_unlock_irqrestore(&musb->lock, flags); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); } diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 4042ea017985..910f50967627 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1222,13 +1222,22 @@ void musb_ep_restart(struct musb *musb, struct musb_request *req) rxstate(musb, req); } +static int musb_ep_restart_resume_work(struct musb *musb, void *data) +{ + struct musb_request *req = data; + + musb_ep_restart(musb, req); + + return 0; +} + static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req, gfp_t gfp_flags) { struct musb_ep *musb_ep; struct musb_request *request; struct musb *musb; - int status = 0; + int status; unsigned long lockflags; if (!ep || !req) @@ -1245,6 +1254,17 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req, if (request->ep != musb_ep) return -EINVAL; + status = pm_runtime_get(musb->controller); + if ((status != -EINPROGRESS) && status < 0) { + dev_err(musb->controller, + "pm runtime get failed in %s\n", + __func__); + pm_runtime_put_noidle(musb->controller); + + return status; + } + status = 0; + trace_musb_req_enq(request); /* request is mine now... */ @@ -1255,7 +1275,6 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req, map_dma_buffer(request, musb, musb_ep); - pm_runtime_get_sync(musb->controller); spin_lock_irqsave(&musb->lock, lockflags); /* don't queue if the ep is down */ @@ -1271,8 +1290,14 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req, list_add_tail(&request->list, &musb_ep->req_list); /* it this is the head of the queue, start i/o ... */ - if (!musb_ep->busy && &request->list == musb_ep->req_list.next) - musb_ep_restart(musb, request); + if (!musb_ep->busy && &request->list == musb_ep->req_list.next) { + status = musb_queue_resume_work(musb, + musb_ep_restart_resume_work, + request); + if (status < 0) + dev_err(musb->controller, "%s resume work: %i\n", + __func__, status); + } unlock: spin_unlock_irqrestore(&musb->lock, lockflags); From 2bff3916fda9145587c0312b6f5c43d82504980c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 16 Nov 2016 13:21:24 -0600 Subject: [PATCH 342/503] usb: musb: Fix PM for hub disconnect With a USB hub disconnected, devctl can be 0x19 for about a second on am335x and will stay forever on at least omap3. And we get no further interrupts when devctl session bit clears. This keeps PM runtime active. Let's fix the issue by polling devctl until the session bit clears or times out. We can do this by making musb->irq_work into delayed_work. And with the polling implemented, we can now also have the quirk for invalid VBUS it to avoid disconnecting too early while VBUS is ramping up. Fixes: 467d5c980709 ("usb: musb: Implement session bit based runtime PM for musb-core") Fixes: 65b3f50ed6fa ("usb: musb: Add PM runtime support for MUSB DSPS Tested-by: Ladislav Michl Tested-by: Laurent Pinchart Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 29 +++++++++++++++++++---------- drivers/usb/musb/musb_core.h | 4 ++-- drivers/usb/musb/musb_gadget.c | 6 +++--- drivers/usb/musb/tusb6010.c | 6 +++--- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 384de6cd26f5..c3e172e15ec3 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -986,7 +986,7 @@ b_host: } #endif - schedule_work(&musb->irq_work); + schedule_delayed_work(&musb->irq_work, 0); return handled; } @@ -1855,14 +1855,23 @@ static void musb_pm_runtime_check_session(struct musb *musb) MUSB_DEVCTL_HR; switch (devctl & ~s) { case MUSB_QUIRK_B_INVALID_VBUS_91: - if (!musb->session && !musb->quirk_invalid_vbus) { - musb->quirk_invalid_vbus = true; + if (musb->quirk_retries--) { musb_dbg(musb, - "First invalid vbus, assume no session"); + "Poll devctl on invalid vbus, assume no session"); + schedule_delayed_work(&musb->irq_work, + msecs_to_jiffies(1000)); + return; } - break; case MUSB_QUIRK_A_DISCONNECT_19: + if (musb->quirk_retries--) { + musb_dbg(musb, + "Poll devctl on possible host mode disconnect"); + schedule_delayed_work(&musb->irq_work, + msecs_to_jiffies(1000)); + + return; + } if (!musb->session) break; musb_dbg(musb, "Allow PM on possible host mode disconnect"); @@ -1886,9 +1895,9 @@ static void musb_pm_runtime_check_session(struct musb *musb) if (error < 0) dev_err(musb->controller, "Could not enable: %i\n", error); + musb->quirk_retries = 3; } else { musb_dbg(musb, "Allow PM with no session: %02x", devctl); - musb->quirk_invalid_vbus = false; pm_runtime_mark_last_busy(musb->controller); pm_runtime_put_autosuspend(musb->controller); } @@ -1899,7 +1908,7 @@ static void musb_pm_runtime_check_session(struct musb *musb) /* Only used to provide driver mode change events */ static void musb_irq_work(struct work_struct *data) { - struct musb *musb = container_of(data, struct musb, irq_work); + struct musb *musb = container_of(data, struct musb, irq_work.work); musb_pm_runtime_check_session(musb); @@ -2288,7 +2297,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) musb_generic_disable(musb); /* Init IRQ workqueue before request_irq */ - INIT_WORK(&musb->irq_work, musb_irq_work); + INIT_DELAYED_WORK(&musb->irq_work, musb_irq_work); INIT_DELAYED_WORK(&musb->deassert_reset_work, musb_deassert_reset); INIT_DELAYED_WORK(&musb->finish_resume_work, musb_host_finish_resume); @@ -2385,7 +2394,7 @@ fail4: musb_host_cleanup(musb); fail3: - cancel_work_sync(&musb->irq_work); + cancel_delayed_work_sync(&musb->irq_work); cancel_delayed_work_sync(&musb->finish_resume_work); cancel_delayed_work_sync(&musb->deassert_reset_work); if (musb->dma_controller) @@ -2452,7 +2461,7 @@ static int musb_remove(struct platform_device *pdev) */ musb_exit_debugfs(musb); - cancel_work_sync(&musb->irq_work); + cancel_delayed_work_sync(&musb->irq_work); cancel_delayed_work_sync(&musb->finish_resume_work); cancel_delayed_work_sync(&musb->deassert_reset_work); pm_runtime_get_sync(musb->controller); diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index 15b1f93c7037..91817d77d59c 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -310,7 +310,7 @@ struct musb { struct musb_context_registers context; irqreturn_t (*isr)(int, void *); - struct work_struct irq_work; + struct delayed_work irq_work; struct delayed_work deassert_reset_work; struct delayed_work finish_resume_work; struct delayed_work gadget_work; @@ -381,7 +381,7 @@ struct musb { int port_mode; /* MUSB_PORT_MODE_* */ bool session; - bool quirk_invalid_vbus; + unsigned long quirk_retries; bool is_host; int a_wait_bcon; /* VBUS timeout in msecs */ diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 910f50967627..a55173c9e564 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1114,7 +1114,7 @@ static int musb_gadget_enable(struct usb_ep *ep, musb_ep->dma ? "dma, " : "", musb_ep->packet_sz); - schedule_work(&musb->irq_work); + schedule_delayed_work(&musb->irq_work, 0); fail: spin_unlock_irqrestore(&musb->lock, flags); @@ -1158,7 +1158,7 @@ static int musb_gadget_disable(struct usb_ep *ep) musb_ep->desc = NULL; musb_ep->end_point.desc = NULL; - schedule_work(&musb->irq_work); + schedule_delayed_work(&musb->irq_work, 0); spin_unlock_irqrestore(&(musb->lock), flags); @@ -1994,7 +1994,7 @@ static int musb_gadget_stop(struct usb_gadget *g) */ /* Force check of devctl register for PM runtime */ - schedule_work(&musb->irq_work); + schedule_delayed_work(&musb->irq_work, 0); pm_runtime_mark_last_busy(musb->controller); pm_runtime_put_autosuspend(musb->controller); diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c index df7c9f46be54..e85cc8e4e7a9 100644 --- a/drivers/usb/musb/tusb6010.c +++ b/drivers/usb/musb/tusb6010.c @@ -724,7 +724,7 @@ tusb_otg_ints(struct musb *musb, u32 int_src, void __iomem *tbase) dev_dbg(musb->controller, "vbus change, %s, otg %03x\n", usb_otg_state_string(musb->xceiv->otg->state), otg_stat); idle_timeout = jiffies + (1 * HZ); - schedule_work(&musb->irq_work); + schedule_delayed_work(&musb->irq_work, 0); } else /* A-dev state machine */ { dev_dbg(musb->controller, "vbus change, %s, otg %03x\n", @@ -814,7 +814,7 @@ tusb_otg_ints(struct musb *musb, u32 int_src, void __iomem *tbase) break; } } - schedule_work(&musb->irq_work); + schedule_delayed_work(&musb->irq_work, 0); return idle_timeout; } @@ -864,7 +864,7 @@ static irqreturn_t tusb_musb_interrupt(int irq, void *__hci) musb_writel(tbase, TUSB_PRCM_WAKEUP_CLEAR, reg); if (reg & ~TUSB_PRCM_WNORCS) { musb->is_active = 1; - schedule_work(&musb->irq_work); + schedule_delayed_work(&musb->irq_work, 0); } dev_dbg(musb->controller, "wake %sactive %02x\n", musb->is_active ? "" : "in", reg); From 536d599d4a5104a8f1f771d3a8db97138b0c9ebb Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 16 Nov 2016 13:21:25 -0600 Subject: [PATCH 343/503] usb: musb: Add missing pm_runtime_disable and drop 2430 PM timeout We are missing pm_runtime_disable() in 2430 glue layer. Further, we only need to enable PM runtime and disable it on exit. With musb_core.c doing PM, the glue layer as a parent will always be active when musb_core.c is active. This fixes host enumeration issues with some devices as reported by Ladislav Michl . And holding an RPM reference while deregistering the child would lead to a crash in omap2430_runtime_suspend() which dereferences the now freed child's driver data on put as pointed out by Johan Hovold : Unable to handle kernel paging request at virtual address 6b6b6f17 ... [] (omap2430_runtime_suspend) from [] (pm_generic_runtime_suspend+0x3c/0x48) [] (pm_generic_runtime_suspend) from [] (_od_runtime_suspend+0x1c/0x30) [] (_od_runtime_suspend) from [] (__rpm_callback+0x3c/0x70) [] (__rpm_callback) from [] (rpm_callback+0x30/0x90) [] (rpm_callback) from [] (rpm_suspend+0x118/0x6b4) [] (rpm_suspend) from [] (rpm_idle+0x104/0x440) [] (rpm_idle) from [] (__pm_runtime_idle+0x7c/0xb0) [] (__pm_runtime_idle) from [] (omap2430_remove+0x38/0x58) [] (omap2430_remove) from [] (platform_drv_remove+0x34/0x4c) Note that if changes are needed to the autosuspend timeout, it should be done in musb_core.c. Reported-by: Ladislav Michl Fixes: 87326e858448 ("usb: musb: Remove extra PM runtime calls from 2430 glue layer") Tested-by: Ladislav Michl Reviewed-by: Johan Hovold Tested-by: Laurent Pinchart Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index cc1225485509..e8be8e39ab8f 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -513,17 +513,18 @@ static int omap2430_probe(struct platform_device *pdev) } pm_runtime_enable(glue->dev); - pm_runtime_use_autosuspend(glue->dev); - pm_runtime_set_autosuspend_delay(glue->dev, 100); ret = platform_device_add(musb); if (ret) { dev_err(&pdev->dev, "failed to register musb device\n"); - goto err2; + goto err3; } return 0; +err3: + pm_runtime_disable(glue->dev); + err2: platform_device_put(musb); @@ -535,10 +536,7 @@ static int omap2430_remove(struct platform_device *pdev) { struct omap2430_glue *glue = platform_get_drvdata(pdev); - pm_runtime_get_sync(glue->dev); platform_device_unregister(glue->musb); - pm_runtime_put_sync(glue->dev); - pm_runtime_dont_use_autosuspend(glue->dev); pm_runtime_disable(glue->dev); return 0; From 247529170d72ee16bbdfc94c3a696c79ea645c3a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 16 Nov 2016 13:21:26 -0600 Subject: [PATCH 344/503] usb: musb: Drop pointless PM runtime code for dsps glue This already gets done automatically by PM runtime and we have a separate autosuspend timeout in musb_core.c. Reviewed-by: Johan Hovold Tested-by: Laurent Pinchart Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_dsps.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index 6096c84ab67a..feae1561b9ab 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -783,28 +783,13 @@ static int dsps_probe(struct platform_device *pdev) platform_set_drvdata(pdev, glue); pm_runtime_enable(&pdev->dev); - pm_runtime_use_autosuspend(&pdev->dev); - pm_runtime_set_autosuspend_delay(&pdev->dev, 200); - - ret = pm_runtime_get_sync(&pdev->dev); - if (ret < 0) { - dev_err(&pdev->dev, "pm_runtime_get_sync FAILED"); - goto err2; - } - ret = dsps_create_musb_pdev(glue, pdev); if (ret) - goto err3; - - pm_runtime_mark_last_busy(&pdev->dev); - pm_runtime_put_autosuspend(&pdev->dev); + goto err; return 0; -err3: - pm_runtime_put_sync(&pdev->dev); -err2: - pm_runtime_dont_use_autosuspend(&pdev->dev); +err: pm_runtime_disable(&pdev->dev); return ret; } @@ -815,9 +800,6 @@ static int dsps_remove(struct platform_device *pdev) platform_device_unregister(glue->musb); - /* disable usbss clocks */ - pm_runtime_dont_use_autosuspend(&pdev->dev); - pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); return 0; From f7c4a46352b58c04e4d2111df7fe0358ce84546d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 16 Nov 2016 13:21:27 -0600 Subject: [PATCH 345/503] phy: twl4030-usb: Fix for musb session bit based PM Now with musb driver implementing generic session bit based PM, we need to have the USB PHYs behaving in a sane way for platforms implementing PM. Currently twl4030-usb enables PM in twl4030_phy_power_on() and then disables it in twl4030_phy_power_off(). This will block PM runtime for the SoC when no cable is connected. Fix the issue by moving PM runtime autosuspend call to happen where it gets called in twl4030_phy_power_on(). Note that this patch should not be backported to anything before commit 467d5c980709 ("usb: musb: Implement session bit based runtime PM for musb-core") as before that all the glue layers implemented their own PM. Fixes: 467d5c980709 ("usb: musb: Implement session bit based runtime PM for musb-core") Tested-by: Ladislav Michl Tested-by: Laurent Pinchart Signed-off-by: Tony Lindgren Acked-by: Kishon Vijay Abraham I Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/phy/phy-twl4030-usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c index 87e6334eab93..547ca7b3f098 100644 --- a/drivers/phy/phy-twl4030-usb.c +++ b/drivers/phy/phy-twl4030-usb.c @@ -459,8 +459,6 @@ static int twl4030_phy_power_off(struct phy *phy) struct twl4030_usb *twl = phy_get_drvdata(phy); dev_dbg(twl->dev, "%s\n", __func__); - pm_runtime_mark_last_busy(twl->dev); - pm_runtime_put_autosuspend(twl->dev); return 0; } @@ -472,6 +470,8 @@ static int twl4030_phy_power_on(struct phy *phy) dev_dbg(twl->dev, "%s\n", __func__); pm_runtime_get_sync(twl->dev); schedule_delayed_work(&twl->id_workaround_work, HZ); + pm_runtime_mark_last_busy(twl->dev); + pm_runtime_put_autosuspend(twl->dev); return 0; } From cfc44a4d147ea605d66ccb917cc24467d15ff867 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 16 Nov 2016 10:27:02 -0800 Subject: [PATCH 346/503] net: check dead netns for peernet2id_alloc() Andrei reports we still allocate netns ID from idr after we destroy it in cleanup_net(). cleanup_net(): ... idr_destroy(&net->netns_ids); ... list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); -> rollback_registered_many() -> rtmsg_ifinfo_build_skb() -> rtnl_fill_ifinfo() -> peernet2id_alloc() After that point we should not even access net->netns_ids, we should check the death of the current netns as early as we can in peernet2id_alloc(). For net-next we can consider to avoid sending rtmsg totally, it is a good optimization for netns teardown path. Fixes: 0c7aecd4bde4 ("netns: add rtnl cmd to add and get peer netns ids") Reported-by: Andrei Vagin Cc: Nicolas Dichtel Signed-off-by: Cong Wang Acked-by: Andrei Vagin Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/net_namespace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f61c0e02a413..7001da910c6b 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -219,6 +219,8 @@ int peernet2id_alloc(struct net *net, struct net *peer) bool alloc; int id; + if (atomic_read(&net->count) == 0) + return NETNSA_NSID_NOT_ASSIGNED; spin_lock_irqsave(&net->nsid_lock, flags); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); From 48c1699d5335bc045b50989a06b1c526b17a25ff Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 16 Nov 2016 15:20:36 +0100 Subject: [PATCH 347/503] of_mdio: fix node leak in of_phy_register_fixed_link error path Make sure to drop the of_node reference also on failure to parse the speed property in of_phy_register_fixed_link(). Fixes: 3be2a49e5c08 ("of: provide a binding for fixed link PHYs") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/of/of_mdio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index b470f7e3521d..8f4648383fb2 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -456,8 +456,11 @@ int of_phy_register_fixed_link(struct device_node *np) status.link = 1; status.duplex = of_property_read_bool(fixed_link_node, "full-duplex"); - if (of_property_read_u32(fixed_link_node, "speed", &status.speed)) + if (of_property_read_u32(fixed_link_node, "speed", + &status.speed)) { + of_node_put(fixed_link_node); return -EINVAL; + } status.pause = of_property_read_bool(fixed_link_node, "pause"); status.asym_pause = of_property_read_bool(fixed_link_node, "asym-pause"); From 3ae30f4ce65e9d4de274b1472169ab3c27f5c666 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 16 Nov 2016 15:20:37 +0100 Subject: [PATCH 348/503] of_mdio: fix device reference leak in of_phy_find_device Make sure to drop the reference taken by bus_find_device() before returning NULL from of_phy_find_device() when the found device is not a PHY. Fixes: 6ed742363b9c ("of: of_mdio: Ensure mdio device is a PHY") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/of/of_mdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 8f4648383fb2..5a3145a02547 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -292,6 +292,7 @@ struct phy_device *of_phy_find_device(struct device_node *phy_np) mdiodev = to_mdio_device(d); if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY) return to_phy_device(d); + put_device(d); } return NULL; From 13c9d934a5a1d04f055c20c2253090e9afd9a5d1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 16 Nov 2016 15:20:38 +0100 Subject: [PATCH 349/503] net: phy: fixed_phy: fix of_node leak in fixed_phy_unregister Make sure to drop the of_node reference taken in fixed_phy_register() when deregistering a PHY. Fixes: a75951217472 ("net: phy: extend fixed driver with fixed_phy_register()") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/phy/fixed_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index c649c101bbab..eb5167210681 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -279,7 +279,7 @@ EXPORT_SYMBOL_GPL(fixed_phy_register); void fixed_phy_unregister(struct phy_device *phy) { phy_device_remove(phy); - + of_node_put(phy->mdio.dev.of_node); fixed_phy_del(phy->mdio.addr); } EXPORT_SYMBOL_GPL(fixed_phy_unregister); From b5c2d49544e5930c96e2632a7eece3f4325a1888 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 16 Nov 2016 16:26:46 +0100 Subject: [PATCH 350/503] ip6_tunnel: disable caching when the traffic class is inherited If an ip6 tunnel is configured to inherit the traffic class from the inner header, the dst_cache must be disabled or it will foul the policy routing. The issue is apprently there since at leat Linux-2.6.12-rc2. Reported-by: Liam McBirnie Cc: Liam McBirnie Acked-by: Hannes Frederic Sowa Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 87784560dc46..0a4759b89da2 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1034,6 +1034,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, int mtu; unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; unsigned int max_headroom = psh_hlen; + bool use_cache = false; u8 hop_limit; int err = -1; @@ -1066,7 +1067,15 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); - } else if (!fl6->flowi6_mark) + } else if (!(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only only if the routing decision does + * not depend on the current inner header value + */ + use_cache = true; + } + + if (use_cache) dst = dst_cache_get(&t->dst_cache); if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) @@ -1150,7 +1159,7 @@ route_lookup: if (t->encap.type != TUNNEL_ENCAP_NONE) goto tx_err_dst_release; } else { - if (!fl6->flowi6_mark && ndst) + if (use_cache && ndst) dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); } skb_dst_set(skb, dst); From 5d1904204c99596b50a700f092fe49d78edba400 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Thu, 10 Nov 2016 17:16:33 +0800 Subject: [PATCH 351/503] mremap: fix race between mremap() and page cleanning Prior to 3.15, there was a race between zap_pte_range() and page_mkclean() where writes to a page could be lost. Dave Hansen discovered by inspection that there is a similar race between move_ptes() and page_mkclean(). We've been able to reproduce the issue by enlarging the race window with a msleep(), but have not been able to hit it without modifying the code. So, we think it's a real issue, but is difficult or impossible to hit in practice. The zap_pte_range() issue is fixed by commit 1cf35d47712d("mm: split 'tlb_flush_mmu()' into tlb flushing and memory freeing parts"). And this patch is to fix the race between page_mkclean() and mremap(). Here is one possible way to hit the race: suppose a process mmapped a file with READ | WRITE and SHARED, it has two threads and they are bound to 2 different CPUs, e.g. CPU1 and CPU2. mmap returned X, then thread 1 did a write to addr X so that CPU1 now has a writable TLB for addr X on it. Thread 2 starts mremaping from addr X to Y while thread 1 cleaned the page and then did another write to the old addr X again. The 2nd write from thread 1 could succeed but the value will get lost. thread 1 thread 2 (bound to CPU1) (bound to CPU2) 1: write 1 to addr X to get a writeable TLB on this CPU 2: mremap starts 3: move_ptes emptied PTE for addr X and setup new PTE for addr Y and then dropped PTL for X and Y 4: page laundering for N by doing fadvise FADV_DONTNEED. When done, pageframe N is deemed clean. 5: *write 2 to addr X 6: tlb flush for addr X 7: munmap (Y, pagesize) to make the page unmapped 8: fadvise with FADV_DONTNEED again to kick the page off the pagecache 9: pread the page from file to verify the value. If 1 is there, it means we have lost the written 2. *the write may or may not cause segmentation fault, it depends on if the TLB is still on the CPU. Please note that this is only one specific way of how the race could occur, it didn't mean that the race could only occur in exact the above config, e.g. more than 2 threads could be involved and fadvise() could be done in another thread, etc. For anonymous pages, they could race between mremap() and page reclaim: THP: a huge PMD is moved by mremap to a new huge PMD, then the new huge PMD gets unmapped/splitted/pagedout before the flush tlb happened for the old huge PMD in move_page_tables() and we could still write data to it. The normal anonymous page has similar situation. To fix this, check for any dirty PTE in move_ptes()/move_huge_pmd() and if any, did the flush before dropping the PTL. If we did the flush for every move_ptes()/move_huge_pmd() call then we do not need to do the flush in move_pages_tables() for the whole range. But if we didn't, we still need to do the whole range flush. Alternatively, we can track which part of the range is flushed in move_ptes()/move_huge_pmd() and which didn't to avoid flushing the whole range in move_page_tables(). But that would require multiple tlb flushes for the different sub-ranges and should be less efficient than the single whole range flush. KBuild test on my Sandybridge desktop doesn't show any noticeable change. v4.9-rc4: real 5m14.048s user 32m19.800s sys 4m50.320s With this commit: real 5m13.888s user 32m19.330s sys 4m51.200s Reported-by: Dave Hansen Signed-off-by: Aaron Lu Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 2 +- mm/huge_memory.c | 9 ++++++++- mm/mremap.c | 30 +++++++++++++++++++++--------- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9b9f65d99873..e35e6de633b9 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -22,7 +22,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned char *vec); extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd); + pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush); extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index cdcd25cb30fe..eff3de359d50 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1426,11 +1426,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd) + pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) { spinlock_t *old_ptl, *new_ptl; pmd_t pmd; struct mm_struct *mm = vma->vm_mm; + bool force_flush = false; if ((old_addr & ~HPAGE_PMD_MASK) || (new_addr & ~HPAGE_PMD_MASK) || @@ -1455,6 +1456,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, new_ptl = pmd_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + if (pmd_present(*old_pmd) && pmd_dirty(*old_pmd)) + force_flush = true; pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); @@ -1467,6 +1470,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); if (new_ptl != old_ptl) spin_unlock(new_ptl); + if (force_flush) + flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); + else + *need_flush = true; spin_unlock(old_ptl); return true; } diff --git a/mm/mremap.c b/mm/mremap.c index da22ad2a5678..6ccecc03f56a 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -104,11 +104,13 @@ static pte_t move_soft_dirty_pte(pte_t pte) static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long old_addr, unsigned long old_end, struct vm_area_struct *new_vma, pmd_t *new_pmd, - unsigned long new_addr, bool need_rmap_locks) + unsigned long new_addr, bool need_rmap_locks, bool *need_flush) { struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; spinlock_t *old_ptl, *new_ptl; + bool force_flush = false; + unsigned long len = old_end - old_addr; /* * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma @@ -146,6 +148,14 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_pte++, new_addr += PAGE_SIZE) { if (pte_none(*old_pte)) continue; + + /* + * We are remapping a dirty PTE, make sure to + * flush TLB before we drop the PTL for the + * old PTE or we may race with page_mkclean(). + */ + if (pte_present(*old_pte) && pte_dirty(*old_pte)) + force_flush = true; pte = ptep_get_and_clear(mm, old_addr, old_pte); pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); pte = move_soft_dirty_pte(pte); @@ -156,6 +166,10 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap(new_pte - 1); + if (force_flush) + flush_tlb_range(vma, old_end - len, old_end); + else + *need_flush = true; pte_unmap_unlock(old_pte - 1, old_ptl); if (need_rmap_locks) drop_rmap_locks(vma); @@ -201,13 +215,12 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (need_rmap_locks) take_rmap_locks(vma); moved = move_huge_pmd(vma, old_addr, new_addr, - old_end, old_pmd, new_pmd); + old_end, old_pmd, new_pmd, + &need_flush); if (need_rmap_locks) drop_rmap_locks(vma); - if (moved) { - need_flush = true; + if (moved) continue; - } } split_huge_pmd(vma, old_pmd, old_addr); if (pmd_trans_unstable(old_pmd)) @@ -220,11 +233,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma, extent = next - new_addr; if (extent > LATENCY_LIMIT) extent = LATENCY_LIMIT; - move_ptes(vma, old_pmd, old_addr, old_addr + extent, - new_vma, new_pmd, new_addr, need_rmap_locks); - need_flush = true; + move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, + new_pmd, new_addr, need_rmap_locks, &need_flush); } - if (likely(need_flush)) + if (need_flush) flush_tlb_range(vma, old_end-len, old_addr); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); From 30a391a13ab9215d7569da4e1773c5bb4deed96d Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Wed, 16 Nov 2016 17:16:10 -0500 Subject: [PATCH 352/503] net sched filters: pass netlink message flags in event notification Userland client should be able to read an event, and reflect it back to the kernel, therefore it needs to extract complete set of netlink flags. For example, this will allow "tc monitor" to distinguish Add and Replace operations. Signed-off-by: Roman Mashak Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2b2a7974e4bb..8e93d4afe5ea 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -112,7 +112,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL; it_chain = &tp->next) - tfilter_notify(net, oskb, n, tp, 0, event, false); + tfilter_notify(net, oskb, n, tp, n->nlmsg_flags, event, false); } /* Select new prio value from the range, managed by kernel. */ @@ -430,7 +430,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) { + if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, + n->nlmsg_flags, event) <= 0) { kfree_skb(skb); return -EINVAL; } From e9f01049d1ea4679a3258b8423fe54bae424ee0e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 18 Nov 2016 10:26:39 +1000 Subject: [PATCH 353/503] Revert "drm/mediatek: fix a typo of OD_CFG to OD_RELAYMODE" This reverts commit 83ba62bc700bab710b22be3a1bf6cf973f754273. Signed-off-by: Dave Airlie --- drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index aa5f20fabd10..df33b3ca6ffd 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -123,7 +123,7 @@ static void mtk_od_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int bpc) { writel(w << 16 | h, comp->regs + DISP_OD_SIZE); - writel(OD_RELAYMODE, comp->regs + OD_CFG); + writel(OD_RELAYMODE, comp->regs + OD_RELAYMODE); mtk_dither_set(comp, bpc, DISP_OD_CFG); } From 7d40c2cf080950eab63a0747482027f5f1dae0d3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 18 Nov 2016 10:27:00 +1000 Subject: [PATCH 354/503] Revert "drm/mediatek: set vblank_disable_allowed to true" This reverts commit f752fff611b99f5679224f3990a1f531ea64b1ec. Signed-off-by: Dave Airlie --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 0b2ae47eb52c..cf83f6507ec8 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -217,7 +217,6 @@ static int mtk_drm_kms_init(struct drm_device *drm) if (ret < 0) goto err_component_unbind; - drm->vblank_disable_allowed = true; drm_kms_helper_poll_init(drm); drm_mode_config_reset(drm); From 1c8018f7a7a60a649260fdd7e8645a356299e920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Wed, 2 Nov 2016 08:57:04 +0100 Subject: [PATCH 355/503] ipmi/bt-bmc: change compatible node to 'aspeed, ast2400-ibt-bmc' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Aspeed SoCs have two BT interfaces : one is IPMI compliant and the other is H8S/2168 compliant. The current ipmi/bt-bmc driver implements the IPMI version and we should reflect its nature in the compatible node name using 'aspeed,ast2400-ibt-bmc' instead of 'aspeed,ast2400-bt-bmc'. The latter should be used for a H8S interface driver if it is implemented one day. Signed-off-by: Cédric Le Goater Signed-off-by: Olof Johansson --- .../{aspeed,ast2400-bt-bmc.txt => aspeed,ast2400-ibt-bmc.txt} | 4 ++-- drivers/char/ipmi/bt-bmc.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) rename Documentation/devicetree/bindings/ipmi/{aspeed,ast2400-bt-bmc.txt => aspeed,ast2400-ibt-bmc.txt} (85%) diff --git a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt similarity index 85% rename from Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt rename to Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt index fbbacd958240..6f28969af9dc 100644 --- a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt +++ b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt @@ -6,7 +6,7 @@ perform in-band IPMI communication with their host. Required properties: -- compatible : should be "aspeed,ast2400-bt-bmc" +- compatible : should be "aspeed,ast2400-ibt-bmc" - reg: physical address and size of the registers Optional properties: @@ -17,7 +17,7 @@ Optional properties: Example: ibt@1e789140 { - compatible = "aspeed,ast2400-bt-bmc"; + compatible = "aspeed,ast2400-ibt-bmc"; reg = <0x1e789140 0x18>; interrupts = <8>; }; diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c index b49e61320952..fc9e8891eae3 100644 --- a/drivers/char/ipmi/bt-bmc.c +++ b/drivers/char/ipmi/bt-bmc.c @@ -484,7 +484,7 @@ static int bt_bmc_remove(struct platform_device *pdev) } static const struct of_device_id bt_bmc_match[] = { - { .compatible = "aspeed,ast2400-bt-bmc" }, + { .compatible = "aspeed,ast2400-ibt-bmc" }, { }, }; @@ -502,4 +502,4 @@ module_platform_driver(bt_bmc_driver); MODULE_DEVICE_TABLE(of, bt_bmc_match); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alistair Popple "); -MODULE_DESCRIPTION("Linux device interface to the BT interface"); +MODULE_DESCRIPTION("Linux device interface to the IPMI BT interface"); From 68d85d0e03eab60c238ebe673c7cea1cf70275d4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 29 Oct 2016 16:31:17 +0000 Subject: [PATCH 356/503] i2c: digicolor: use clk_disable_unprepare instead of clk_unprepare since clk_prepare_enable() is used to get i2c->clk, we should use clk_disable_unprepare() to release it for the error path. Signed-off-by: Wei Yongjun Acked-by: Baruch Siach Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-digicolor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-digicolor.c b/drivers/i2c/busses/i2c-digicolor.c index 49f2084f7bb5..50813a24c541 100644 --- a/drivers/i2c/busses/i2c-digicolor.c +++ b/drivers/i2c/busses/i2c-digicolor.c @@ -347,7 +347,7 @@ static int dc_i2c_probe(struct platform_device *pdev) ret = i2c_add_adapter(&i2c->adap); if (ret < 0) { - clk_unprepare(i2c->clk); + clk_disable_unprepare(i2c->clk); return ret; } From 96ed1fe511a8b4948e53f3bad431d8737e8f231f Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 18 Nov 2016 14:08:56 +1100 Subject: [PATCH 357/503] powerpc/mm/radix: Invalidate ERAT on tlbiel for POWER9 DD1 On POWER9 DD1, when we do a local TLB invalidate we also need to explicitly invalidate the ERAT. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 1 + arch/powerpc/mm/tlb-radix.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 0132831b3081..c56ea8c84abb 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -460,5 +460,6 @@ #define PPC_SLBIA(IH) stringify_in_c(.long PPC_INST_SLBIA | \ ((IH & 0x7) << 21)) +#define PPC_INVALIDATE_ERAT PPC_SLBIA(7) #endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index bda8c43be78a..3493cf4e0452 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -50,6 +50,8 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) { __tlbiel_pid(pid, set, ric); } + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); return; } @@ -83,6 +85,8 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("ptesync": : :"memory"); + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); } static inline void _tlbie_va(unsigned long va, unsigned long pid, From 9853a55ef1bb66d7411136046060bbfb69c714fa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 15 Nov 2016 12:05:11 +0100 Subject: [PATCH 358/503] cfg80211: limit scan results cache size It's possible to make scanning consume almost arbitrary amounts of memory, e.g. by sending beacon frames with random BSSIDs at high rates while somebody is scanning. Limit the number of BSS table entries we're willing to cache to 1000, limiting maximum memory usage to maybe 4-5MB, but lower in practice - that would be the case for having both full-sized beacon and probe response frames for each entry; this seems not possible in practice, so a limit of 1000 entries will likely be closer to 0.5 MB. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/wireless/core.h | 1 + net/wireless/scan.c | 69 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/net/wireless/core.h b/net/wireless/core.h index 08d2e948c9ad..f0c0c8a48c92 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -71,6 +71,7 @@ struct cfg80211_registered_device { struct list_head bss_list; struct rb_root bss_tree; u32 bss_generation; + u32 bss_entries; struct cfg80211_scan_request *scan_req; /* protected by RTNL */ struct sk_buff *scan_msg; struct cfg80211_sched_scan_request __rcu *sched_scan_req; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index b5bd58d0f731..35ad69fd0838 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -57,6 +57,19 @@ * also linked into the probe response struct. */ +/* + * Limit the number of BSS entries stored in mac80211. Each one is + * a bit over 4k at most, so this limits to roughly 4-5M of memory. + * If somebody wants to really attack this though, they'd likely + * use small beacons, and only one type of frame, limiting each of + * the entries to a much smaller size (in order to generate more + * entries in total, so overhead is bigger.) + */ +static int bss_entries_limit = 1000; +module_param(bss_entries_limit, int, 0644); +MODULE_PARM_DESC(bss_entries_limit, + "limit to number of scan BSS entries (per wiphy, default 1000)"); + #define IEEE80211_SCAN_RESULT_EXPIRE (30 * HZ) static void bss_free(struct cfg80211_internal_bss *bss) @@ -137,6 +150,10 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev, list_del_init(&bss->list); rb_erase(&bss->rbn, &rdev->bss_tree); + rdev->bss_entries--; + WARN_ONCE((rdev->bss_entries == 0) ^ list_empty(&rdev->bss_list), + "rdev bss entries[%d]/list[empty:%d] corruption\n", + rdev->bss_entries, list_empty(&rdev->bss_list)); bss_ref_put(rdev, bss); return true; } @@ -163,6 +180,40 @@ static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev, rdev->bss_generation++; } +static bool cfg80211_bss_expire_oldest(struct cfg80211_registered_device *rdev) +{ + struct cfg80211_internal_bss *bss, *oldest = NULL; + bool ret; + + lockdep_assert_held(&rdev->bss_lock); + + list_for_each_entry(bss, &rdev->bss_list, list) { + if (atomic_read(&bss->hold)) + continue; + + if (!list_empty(&bss->hidden_list) && + !bss->pub.hidden_beacon_bss) + continue; + + if (oldest && time_before(oldest->ts, bss->ts)) + continue; + oldest = bss; + } + + if (WARN_ON(!oldest)) + return false; + + /* + * The callers make sure to increase rdev->bss_generation if anything + * gets removed (and a new entry added), so there's no need to also do + * it here. + */ + + ret = __cfg80211_unlink_bss(rdev, oldest); + WARN_ON(!ret); + return ret; +} + void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool send_message) { @@ -689,6 +740,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, const u8 *ie; int i, ssidlen; u8 fold = 0; + u32 n_entries = 0; ies = rcu_access_pointer(new->pub.beacon_ies); if (WARN_ON(!ies)) @@ -712,6 +764,12 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, /* This is the bad part ... */ list_for_each_entry(bss, &rdev->bss_list, list) { + /* + * we're iterating all the entries anyway, so take the + * opportunity to validate the list length accounting + */ + n_entries++; + if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid)) continue; if (bss->pub.channel != new->pub.channel) @@ -740,6 +798,10 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, new->pub.beacon_ies); } + WARN_ONCE(n_entries != rdev->bss_entries, + "rdev bss entries[%d]/list[len:%d] corruption\n", + rdev->bss_entries, n_entries); + return true; } @@ -894,7 +956,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, } } + if (rdev->bss_entries >= bss_entries_limit && + !cfg80211_bss_expire_oldest(rdev)) { + kfree(new); + goto drop; + } + list_add_tail(&new->list, &rdev->bss_list); + rdev->bss_entries++; rb_insert_bss(rdev, new); found = new; } From c2d75e03d6307bda0e14b616818a6f7b09fd623a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 17 Nov 2016 09:57:23 -0600 Subject: [PATCH 359/503] x86/unwind: Prevent KASAN false positive warnings in guess unwinder The guess unwinder scans the entire stack, which can cause KASAN "stack-out-of-bounds" false positive warnings. Tell KASAN to ignore it. Reported-by: Peter Zijlstra Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: davej@codemonkey.org.uk Cc: dvyukov@google.com Link: http://lkml.kernel.org/r/61939c0b2b2d63ce97ba59cba3b00fd47c2962cf.1479398226.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_guess.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c index 2d721e533cf4..b80e8bf43cc6 100644 --- a/arch/x86/kernel/unwind_guess.c +++ b/arch/x86/kernel/unwind_guess.c @@ -7,11 +7,13 @@ unsigned long unwind_get_return_address(struct unwind_state *state) { + unsigned long addr = READ_ONCE_NOCHECK(*state->sp); + if (unwind_done(state)) return 0; return ftrace_graph_ret_addr(state->task, &state->graph_idx, - *state->sp, state->sp); + addr, state->sp); } EXPORT_SYMBOL_GPL(unwind_get_return_address); @@ -23,8 +25,10 @@ bool unwind_next_frame(struct unwind_state *state) return false; do { + unsigned long addr = READ_ONCE_NOCHECK(*state->sp); + for (state->sp++; state->sp < info->end; state->sp++) - if (__kernel_text_address(*state->sp)) + if (__kernel_text_address(addr)) return true; state->sp = info->next_sp; From 91e08ab0c8515450258d7ad9033bfe69bebad25a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 17 Nov 2016 09:57:24 -0600 Subject: [PATCH 360/503] x86/dumpstack: Prevent KASAN false positive warnings The oops stack dump code scans the entire stack, which can cause KASAN "stack-out-of-bounds" false positive warnings. Tell KASAN to ignore it. Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: davej@codemonkey.org.uk Cc: dvyukov@google.com Link: http://lkml.kernel.org/r/5f6e80c4b0c7f7f0b6211900847a247cdaad753c.1479398226.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 9b7cf5c28f5f..85f854b98a9d 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -112,7 +112,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, for (; stack < stack_info.end; stack++) { unsigned long real_addr; int reliable = 0; - unsigned long addr = *stack; + unsigned long addr = READ_ONCE_NOCHECK(*stack); unsigned long *ret_addr_p = unwind_get_return_address_ptr(&state); From e40ed1542dd779e5037a22c6b534e57127472365 Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Thu, 17 Nov 2016 10:15:06 -0600 Subject: [PATCH 361/503] perf/x86: Add perf support for AMD family-17h processors This patch enables perf core PMU support for the new AMD family-17h processors. In family-17h, there is no PMC-event constraint. All events, irrespective of the type, can be measured using any of the six generic performance counters. Signed-off-by: Janakarajan Natarajan Acked-by: Borislav Petkov Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1479399306-13375-1-git-send-email-Janakarajan.Natarajan@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index f5f4b3fbbbc2..afb222b63cae 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -662,7 +662,13 @@ static int __init amd_core_pmu_init(void) pr_cont("Fam15h "); x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; break; - + case 0x17: + pr_cont("Fam17h "); + /* + * In family 17h, there are no event constraints in the PMC hardware. + * We fallback to using default amd_get_event_constraints. + */ + break; default: pr_err("core perfctr but no constraints; unknown hardware!\n"); return -ENODEV; From 9e3f7a29694049edd728e2400ab57ad7553e5aa9 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 16 Nov 2016 09:20:57 +0000 Subject: [PATCH 362/503] arm64: KVM: pmu: Fix AArch32 cycle counter access We're missing the handling code for the cycle counter accessed from a 32bit guest, leading to unexpected results. Cc: stable@vger.kernel.org # 4.6+ Signed-off-by: Wei Huang Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f302fdb3a030..87e7e6608cd8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -597,8 +597,14 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, idx = ARMV8_PMU_CYCLE_IDX; } else { - BUG(); + return false; } + } else if (r->CRn == 0 && r->CRm == 9) { + /* PMCCNTR */ + if (pmu_access_event_counter_el0_disabled(vcpu)) + return false; + + idx = ARMV8_PMU_CYCLE_IDX; } else if (r->CRn == 14 && (r->CRm & 12) == 8) { /* PMEVCNTRn_EL0 */ if (pmu_access_event_counter_el0_disabled(vcpu)) @@ -606,7 +612,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); } else { - BUG(); + return false; } if (!pmu_counter_idx_valid(vcpu, idx)) From b112c84a6ff035271d41d548c10215f18443d6a6 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 16 Nov 2016 11:09:20 -0600 Subject: [PATCH 363/503] KVM: arm64: Fix the issues when guest PMCCFILTR is configured KVM calls kvm_pmu_set_counter_event_type() when PMCCFILTR is configured. But this function can't deals with PMCCFILTR correctly because the evtCount bits of PMCCFILTR, which is reserved 0, conflits with the SW_INCR event type of other PMXEVTYPER registers. To fix it, when eventsel == 0, this function shouldn't return immediately; instead it needs to check further if select_idx is ARMV8_PMU_CYCLE_IDX. Another issue is that KVM shouldn't copy the eventsel bits of PMCCFILTER blindly to attr.config. Instead it ought to convert the request to the "cpu cycle" event type (i.e. 0x11). To support this patch and to prevent duplicated definitions, a limited set of ARMv8 perf event types were relocated from perf_event.c to asm/perf_event.h. Cc: stable@vger.kernel.org # 4.6+ Acked-by: Will Deacon Signed-off-by: Wei Huang Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/perf_event.h | 10 +++++++++- arch/arm64/kernel/perf_event.c | 10 +--------- virt/kvm/arm/pmu.c | 8 +++++--- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 2065f46fa740..38b6a2b49d68 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -46,7 +46,15 @@ #define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ #define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ -#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */ +/* + * PMUv3 event types: required events + */ +#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 +#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 /* * Event filters for PMUv3 diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index a9310a69fffd..57ae9d9ed9bb 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -31,17 +31,9 @@ /* * ARMv8 PMUv3 Performance Events handling code. - * Common event types. + * Common event types (some are defined in asm/perf_event.h). */ -/* Required events. */ -#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 -#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 -#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 - /* At least one of the following is required. */ #define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08 #define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 6e9c40eea208..69ccce308458 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -305,7 +305,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) continue; type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) & ARMV8_PMU_EVTYPE_EVENT; - if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) + if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) && (enable & BIT(i))) { reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; reg = lower_32_bits(reg); @@ -379,7 +379,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, eventsel = data & ARMV8_PMU_EVTYPE_EVENT; /* Software increment event does't need to be backed by a perf event */ - if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR && + select_idx != ARMV8_PMU_CYCLE_IDX) return; memset(&attr, 0, sizeof(struct perf_event_attr)); @@ -391,7 +392,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; attr.exclude_hv = 1; /* Don't count EL2 events */ attr.exclude_host = 1; /* Don't count host events */ - attr.config = eventsel; + attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ? + ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel; counter = kvm_pmu_get_counter_value(vcpu, select_idx); /* The initial sample period (overflow count) of an event. */ From 05e78c6933d613a7da0d0473f4c19c865af04c2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20H=C3=A4dicke?= Date: Fri, 4 Nov 2016 00:23:26 +0100 Subject: [PATCH 364/503] usb: gadget: f_fs: fix wrong parenthesis in ffs_func_req_match() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Properly check the return code of ffs_func_revmap_intf() and ffs_func_revmap_ep() for a non-negative value. Instead of checking the return code, the comparison was performed for the last parameter of the function calls, because of wrong parenthesis. This also fixes the following static checker warning: drivers/usb/gadget/function/f_fs.c:3152 ffs_func_req_match() warn: always true condition '(((creq->wIndex)) >= 0) => (0-u16max >= 0)' Reported-by: Dan Carpenter Signed-off-by: Felix Hädicke Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index e40d47d47d82..17989b72cdae 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3225,11 +3225,11 @@ static bool ffs_func_req_match(struct usb_function *f, switch (creq->bRequestType & USB_RECIP_MASK) { case USB_RECIP_INTERFACE: - return ffs_func_revmap_intf(func, - le16_to_cpu(creq->wIndex) >= 0); + return (ffs_func_revmap_intf(func, + le16_to_cpu(creq->wIndex)) >= 0); case USB_RECIP_ENDPOINT: - return ffs_func_revmap_ep(func, - le16_to_cpu(creq->wIndex) >= 0); + return (ffs_func_revmap_ep(func, + le16_to_cpu(creq->wIndex)) >= 0); default: return (bool) (func->ffs->user_flags & FUNCTIONFS_ALL_CTRL_RECIP); From cac4a185405d4415eca269cae976438b44a37ae0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 17 Nov 2016 15:46:23 +0530 Subject: [PATCH 365/503] powerpc/mm: Fix missing update of HID register on secondary CPUs We need to update on secondaries for the selected MMU mode. Fixes: ad410674f560 ("powerpc/mm: Update the HID bit when switching from radix to hash") Reported-by: Michael Neuling Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 4 ++++ arch/powerpc/mm/pgtable-radix.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 44d3c3a38e3e..5503078090cd 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1029,6 +1029,10 @@ void hash__early_init_mmu_secondary(void) { /* Initialize hash table for that CPU */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + update_hid_for_hash(); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) mtspr(SPRN_SDR1, _SDR1); else diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index ed7bddc456b7..688b54517655 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -388,6 +388,10 @@ void radix__early_init_mmu_secondary(void) * update partition table control register and UPRT */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + update_hid_for_radix(); + lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); From b0921d5c9ed6ffa8a4d6afc5ee5f136b87445f14 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 15 Nov 2016 11:13:16 +0100 Subject: [PATCH 366/503] mmc: sdhci-of-esdhc: fixup PRESENT_STATE read Since commit 87a18a6a5652 ("mmc: mmc: Use ->card_busy() to detect busy cards in __mmc_switch()") the ESDHC driver is broken: mmc0: Card stuck in programming state! __mmc_switch mmc0: error -110 whilst initialising MMC card Since this commit __mmc_switch() uses ->card_busy(), which is sdhci_card_busy() for the esdhc driver. sdhci_card_busy() uses the PRESENT_STATE register, specifically the DAT0 signal level bit. But the ESDHC uses a non-conformant PRESENT_STATE register, thus a read fixup is required to make the driver work again. Signed-off-by: Michael Walle Fixes: 87a18a6a5652 ("mmc: mmc: Use ->card_busy() to detect busy cards in __mmc_switch()") Acked-by: Yangbo Lu Acked-by: Adrian Hunter Cc: # v4.8+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-esdhc.c | 14 ++++++++++++++ drivers/mmc/host/sdhci.h | 1 + 2 files changed, 15 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index fb71c866eacc..1bb11e4a9fe5 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -66,6 +66,20 @@ static u32 esdhc_readl_fixup(struct sdhci_host *host, return ret; } } + /* + * The DAT[3:0] line signal levels and the CMD line signal level are + * not compatible with standard SDHC register. The line signal levels + * DAT[7:0] are at bits 31:24 and the command line signal level is at + * bit 23. All other bits are the same as in the standard SDHC + * register. + */ + if (spec_reg == SDHCI_PRESENT_STATE) { + ret = value & 0x000fffff; + ret |= (value >> 4) & SDHCI_DATA_LVL_MASK; + ret |= (value << 1) & SDHCI_CMD_LVL; + return ret; + } + ret = value; return ret; } diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 766df17fb7eb..2570455b219a 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -73,6 +73,7 @@ #define SDHCI_DATA_LVL_MASK 0x00F00000 #define SDHCI_DATA_LVL_SHIFT 20 #define SDHCI_DATA_0_LVL_MASK 0x00100000 +#define SDHCI_CMD_LVL 0x01000000 #define SDHCI_HOST_CONTROL 0x28 #define SDHCI_CTRL_LED 0x01 From a8348bca2944d397a528772f5c0ccb47a8b58af4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 17 Nov 2016 22:07:58 +0800 Subject: [PATCH 367/503] crypto: algif_hash - Fix NULL hash crash with shash Recently algif_hash has been changed to allow null hashes. This triggers a bug when used with an shash algorithm whereby it will cause a crash during the digest operation. This patch fixes it by avoiding the digest operation and instead doing an init followed by a final which avoids the buggy code in shash. This patch also ensures that the result buffer is freed after an error so that it is not returned as a genuine hash result on the next recv call. The shash/ahash wrapper code will be fixed later to handle this case correctly. Fixes: 493b2ed3f760 ("crypto: algif_hash - Handle NULL hashes correctly") Signed-off-by: Herbert Xu Tested-by: Laura Abbott --- crypto/algif_hash.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 2d8466f9e49b..05e21b464433 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -214,23 +214,26 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0); - if (ctx->more) { + if (!result) { + err = af_alg_wait_for_completion( + crypto_ahash_init(&ctx->req), + &ctx->completion); + if (err) + goto unlock; + } + + if (!result || ctx->more) { ctx->more = 0; err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req), &ctx->completion); if (err) goto unlock; - } else if (!result) { - err = af_alg_wait_for_completion( - crypto_ahash_digest(&ctx->req), - &ctx->completion); } err = memcpy_to_msg(msg, ctx->result, len); - hash_free_result(sk, ctx); - unlock: + hash_free_result(sk, ctx); release_sock(sk); return err ?: len; From 23ea44c2150d14b97518435a65cc74111804fbeb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 Nov 2016 16:06:28 -0500 Subject: [PATCH 368/503] NFSv4.1: Fix a regression in DELEGRETURN We don't want to call nfs4_free_revoked_stateid() in the case where the delegreturn was successful. Reported-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 45b38ee4813c..8e25327077e2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5569,6 +5569,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case 0: renew_lease(data->res.server, data->timestamp); + break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_EXPIRED: @@ -5579,8 +5580,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: task->tk_status = 0; - if (data->roc) - pnfs_roc_set_barrier(data->inode, data->roc_barrier); break; default: if (nfs4_async_handle_error(task, data->res.server, @@ -5590,6 +5589,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) } } data->rpc_status = task->tk_status; + if (data->roc && data->rpc_status == 0) + pnfs_roc_set_barrier(data->inode, data->roc_barrier); } static void nfs4_delegreturn_release(void *calldata) From 3e7dfb1659c2888fc0152ec2b02a5e932397bb0a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Nov 2016 11:19:55 -0500 Subject: [PATCH 369/503] NFSv4: Fix CLOSE races with OPEN If the reply to a successful CLOSE call races with an OPEN to the same file, we can end up scribbling over the stateid that represents the new open state. The race looks like: Client Server ====== ====== CLOSE stateid A on file "foo" CLOSE stateid A, return stateid C OPEN file "foo" OPEN "foo", return stateid B Receive reply to OPEN Reset open state for "foo" Associate stateid B to "foo" Receive CLOSE for A Reset open state for "foo" Replace stateid B with C The fix is to examine the argument of the CLOSE, and check for a match with the current stateid "other" field. If the two do not match, then the above race occurred, and we should just ignore the CLOSE. Reported-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 7 +++++++ fs/nfs/nfs4proc.c | 12 ++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9b3a82abab07..1452177c822d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -542,6 +542,13 @@ static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state) return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0; } +static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state *state, + const nfs4_stateid *stateid) +{ + return test_bit(NFS_OPEN_STATE, &state->flags) && + nfs4_stateid_match_other(&state->open_stateid, stateid); +} + #else #define nfs4_close_state(a, b) do { } while (0) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8e25327077e2..0b3cdf856333 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1451,7 +1451,6 @@ static void nfs_resync_open_stateid_locked(struct nfs4_state *state) } static void nfs_clear_open_stateid_locked(struct nfs4_state *state, - nfs4_stateid *arg_stateid, nfs4_stateid *stateid, fmode_t fmode) { clear_bit(NFS_O_RDWR_STATE, &state->flags); @@ -1469,10 +1468,9 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state, } if (stateid == NULL) return; - /* Handle races with OPEN */ - if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) || - (nfs4_stateid_match_other(stateid, &state->open_stateid) && - !nfs4_stateid_is_newer(stateid, &state->open_stateid))) { + /* Handle OPEN+OPEN_DOWNGRADE races */ + if (nfs4_stateid_match_other(stateid, &state->open_stateid) && + !nfs4_stateid_is_newer(stateid, &state->open_stateid)) { nfs_resync_open_stateid_locked(state); return; } @@ -1486,7 +1484,9 @@ static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) { write_seqlock(&state->seqlock); - nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode); + /* Ignore, if the CLOSE argment doesn't match the current stateid */ + if (nfs4_state_match_open_stateid_other(state, arg_stateid)) + nfs_clear_open_stateid_locked(state, stateid, fmode); write_sequnlock(&state->seqlock); if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) nfs4_schedule_state_manager(state->owner->so_server->nfs_client); From 06ba3b2133dc203e1e9bc36cee7f0839b79a9e8b Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 17 Nov 2016 09:14:25 -0600 Subject: [PATCH 370/503] net: sky2: Fix shutdown crash The sky2 frequently crashes during machine shutdown with: sky2_get_stats+0x60/0x3d8 [sky2] dev_get_stats+0x68/0xd8 rtnl_fill_stats+0x54/0x140 rtnl_fill_ifinfo+0x46c/0xc68 rtmsg_ifinfo_build_skb+0x7c/0xf0 rtmsg_ifinfo.part.22+0x3c/0x70 rtmsg_ifinfo+0x50/0x5c netdev_state_change+0x4c/0x58 linkwatch_do_dev+0x50/0x88 __linkwatch_run_queue+0x104/0x1a4 linkwatch_event+0x30/0x3c process_one_work+0x140/0x3e0 worker_thread+0x60/0x44c kthread+0xdc/0xf0 ret_from_fork+0x10/0x50 This is caused by the sky2 being called after it has been shutdown. A previous thread about this can be found here: https://lkml.org/lkml/2016/4/12/410 An alternative fix is to assure that IFF_UP gets cleared by calling dev_close() during shutdown. This is similar to what the bnx2/tg3/xgene and maybe others are doing to assure that the driver isn't being called following _shutdown(). Signed-off-by: Jeremy Linton Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/sky2.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index f05ea56dcff2..941c8e2c944e 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -5220,6 +5220,19 @@ static SIMPLE_DEV_PM_OPS(sky2_pm_ops, sky2_suspend, sky2_resume); static void sky2_shutdown(struct pci_dev *pdev) { + struct sky2_hw *hw = pci_get_drvdata(pdev); + int port; + + for (port = 0; port < hw->ports; port++) { + struct net_device *ndev = hw->dev[port]; + + rtnl_lock(); + if (netif_running(ndev)) { + dev_close(ndev); + netif_device_detach(ndev); + } + rtnl_unlock(); + } sky2_suspend(&pdev->dev); pci_wake_from_d3(pdev, device_may_wakeup(&pdev->dev)); pci_set_power_state(pdev, PCI_D3hot); From c46ab7e08c79be7400f6d59edbc6f26a91941c5a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:39:58 +0100 Subject: [PATCH 371/503] net: ethernet: ti: cpsw: fix bad register access in probe error path Make sure to keep the platform device runtime-resumed throughout probe to avoid accessing the CPSW registers in the error path (e.g. for deferred probe) with clocks disabled: Unhandled fault: external abort on non-linefetch (0x1008) at 0xd0872d08 ... [] (cpsw_ale_control_set) from [] (cpsw_ale_destroy+0x2c/0x44) [] (cpsw_ale_destroy) from [] (cpsw_probe+0xbd0/0x10c4) [] (cpsw_probe) from [] (platform_drv_probe+0x5c/0xc0) Fixes: df828598a755 ("netdev: driver: ethernet: Add TI CPSW driver") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index c6cff3d2ff05..f60f8ab7c1e3 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2641,13 +2641,12 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_runtime_disable_ret; } cpsw->version = readl(&cpsw->regs->id_ver); - pm_runtime_put_sync(&pdev->dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 1); cpsw->wr_regs = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(cpsw->wr_regs)) { ret = PTR_ERR(cpsw->wr_regs); - goto clean_runtime_disable_ret; + goto clean_pm_runtime_put_ret; } memset(&dma_params, 0, sizeof(dma_params)); @@ -2684,7 +2683,7 @@ static int cpsw_probe(struct platform_device *pdev) default: dev_err(priv->dev, "unknown version 0x%08x\n", cpsw->version); ret = -ENODEV; - goto clean_runtime_disable_ret; + goto clean_pm_runtime_put_ret; } for (i = 0; i < cpsw->data.slaves; i++) { struct cpsw_slave *slave = &cpsw->slaves[i]; @@ -2713,7 +2712,7 @@ static int cpsw_probe(struct platform_device *pdev) if (!cpsw->dma) { dev_err(priv->dev, "error initializing dma\n"); ret = -ENOMEM; - goto clean_runtime_disable_ret; + goto clean_pm_runtime_put_ret; } cpsw->txch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0); @@ -2815,12 +2814,16 @@ static int cpsw_probe(struct platform_device *pdev) } } + pm_runtime_put(&pdev->dev); + return 0; clean_ale_ret: cpsw_ale_destroy(cpsw->ale); clean_dma_ret: cpdma_ctlr_destroy(cpsw->dma); +clean_pm_runtime_put_ret: + pm_runtime_put_sync(&pdev->dev); clean_runtime_disable_ret: pm_runtime_disable(&pdev->dev); clean_ndev_ret: From 86e1d5adcef961eb383ce4eacbe0ef22f06e2045 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:39:59 +0100 Subject: [PATCH 372/503] net: ethernet: ti: cpsw: fix mdio device reference leak Make sure to drop the reference taken by of_find_device_by_node() when looking up an mdio device from a phy_id property during probe. Fixes: 549985ee9c72 ("cpsw: simplify the setup of the register pointers") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index f60f8ab7c1e3..84c5d214557e 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2397,6 +2397,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), PHY_ID_FMT, mdio->name, phyid); + put_device(&mdio->dev); } else { dev_err(&pdev->dev, "No slave[%d] phy_id, phy-handle, or fixed-link property\n", From a4e32b0d0a26ba2f2ba1c65bd403d06ccc1df29c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:40:00 +0100 Subject: [PATCH 373/503] net: ethernet: ti: cpsw: fix deferred probe Make sure to deregister all child devices also on probe errors to avoid leaks and to fix probe deferral: cpsw 4a100000.ethernet: omap_device: omap_device_enable() called from invalid state 1 cpsw 4a100000.ethernet: use pm_runtime_put_sync_suspend() in driver? cpsw: probe of 4a100000.ethernet failed with error -22 Add generic helper to undo the effects of cpsw_probe_dt(), which will also be used in a follow-on patch to fix further leaks that have been introduced more recently. Note that the platform device is now runtime-resumed before registering any child devices in order to make sure that it is synchronously suspended after having deregistered the children in the error path. Fixes: 1fb19aa730e4 ("net: cpsw: Add parent<->child relation support between cpsw and mdio") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 41 ++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 84c5d214557e..5d14abb06486 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2441,6 +2441,11 @@ no_phy_slave: return 0; } +static void cpsw_remove_dt(struct platform_device *pdev) +{ + of_platform_depopulate(&pdev->dev); +} + static int cpsw_probe_dual_emac(struct cpsw_priv *priv) { struct cpsw_common *cpsw = priv->cpsw; @@ -2585,10 +2590,19 @@ static int cpsw_probe(struct platform_device *pdev) /* Select default pin state */ pinctrl_pm_select_default_state(&pdev->dev); + /* Need to enable clocks with runtime PM api to access module + * registers + */ + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) { + pm_runtime_put_noidle(&pdev->dev); + goto clean_runtime_disable_ret; + } + if (cpsw_probe_dt(&cpsw->data, pdev)) { dev_err(&pdev->dev, "cpsw: platform data missing\n"); ret = -ENODEV; - goto clean_runtime_disable_ret; + goto clean_dt_ret; } data = &cpsw->data; cpsw->rx_ch_num = 1; @@ -2609,7 +2623,7 @@ static int cpsw_probe(struct platform_device *pdev) GFP_KERNEL); if (!cpsw->slaves) { ret = -ENOMEM; - goto clean_runtime_disable_ret; + goto clean_dt_ret; } for (i = 0; i < data->slaves; i++) cpsw->slaves[i].slave_num = i; @@ -2621,7 +2635,7 @@ static int cpsw_probe(struct platform_device *pdev) if (IS_ERR(clk)) { dev_err(priv->dev, "fck is not found\n"); ret = -ENODEV; - goto clean_runtime_disable_ret; + goto clean_dt_ret; } cpsw->bus_freq_mhz = clk_get_rate(clk) / 1000000; @@ -2629,25 +2643,17 @@ static int cpsw_probe(struct platform_device *pdev) ss_regs = devm_ioremap_resource(&pdev->dev, ss_res); if (IS_ERR(ss_regs)) { ret = PTR_ERR(ss_regs); - goto clean_runtime_disable_ret; + goto clean_dt_ret; } cpsw->regs = ss_regs; - /* Need to enable clocks with runtime PM api to access module - * registers - */ - ret = pm_runtime_get_sync(&pdev->dev); - if (ret < 0) { - pm_runtime_put_noidle(&pdev->dev); - goto clean_runtime_disable_ret; - } cpsw->version = readl(&cpsw->regs->id_ver); res = platform_get_resource(pdev, IORESOURCE_MEM, 1); cpsw->wr_regs = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(cpsw->wr_regs)) { ret = PTR_ERR(cpsw->wr_regs); - goto clean_pm_runtime_put_ret; + goto clean_dt_ret; } memset(&dma_params, 0, sizeof(dma_params)); @@ -2684,7 +2690,7 @@ static int cpsw_probe(struct platform_device *pdev) default: dev_err(priv->dev, "unknown version 0x%08x\n", cpsw->version); ret = -ENODEV; - goto clean_pm_runtime_put_ret; + goto clean_dt_ret; } for (i = 0; i < cpsw->data.slaves; i++) { struct cpsw_slave *slave = &cpsw->slaves[i]; @@ -2713,7 +2719,7 @@ static int cpsw_probe(struct platform_device *pdev) if (!cpsw->dma) { dev_err(priv->dev, "error initializing dma\n"); ret = -ENOMEM; - goto clean_pm_runtime_put_ret; + goto clean_dt_ret; } cpsw->txch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0); @@ -2823,7 +2829,8 @@ clean_ale_ret: cpsw_ale_destroy(cpsw->ale); clean_dma_ret: cpdma_ctlr_destroy(cpsw->dma); -clean_pm_runtime_put_ret: +clean_dt_ret: + cpsw_remove_dt(pdev); pm_runtime_put_sync(&pdev->dev); clean_runtime_disable_ret: pm_runtime_disable(&pdev->dev); @@ -2850,7 +2857,7 @@ static int cpsw_remove(struct platform_device *pdev) cpsw_ale_destroy(cpsw->ale); cpdma_ctlr_destroy(cpsw->dma); - of_platform_depopulate(&pdev->dev); + cpsw_remove_dt(pdev); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); if (cpsw->data.dual_emac) From 8cbcc466fd4abd38a14b9d9b76c63a2cb7006554 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:40:01 +0100 Subject: [PATCH 374/503] net: ethernet: ti: cpsw: fix of_node and phydev leaks Make sure to drop references taken and deregister devices registered during probe on probe errors (including deferred probe) and driver unbind. Specifically, PHY of-node references were never released and fixed-link PHY devices were never deregistered. Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing") Fixes: 1f71e8c96fc6 ("drivers: net: cpsw: Add support for fixed-link PHY") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 5d14abb06486..c3b78bc4fe58 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2443,6 +2443,41 @@ no_phy_slave: static void cpsw_remove_dt(struct platform_device *pdev) { + struct net_device *ndev = platform_get_drvdata(pdev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct cpsw_platform_data *data = &cpsw->data; + struct device_node *node = pdev->dev.of_node; + struct device_node *slave_node; + int i = 0; + + for_each_available_child_of_node(node, slave_node) { + struct cpsw_slave_data *slave_data = &data->slave_data[i]; + + if (strcmp(slave_node->name, "slave")) + continue; + + if (of_phy_is_fixed_link(slave_node)) { + struct phy_device *phydev; + + phydev = of_phy_find_device(slave_node); + if (phydev) { + fixed_phy_unregister(phydev); + /* Put references taken by + * of_phy_find_device() and + * of_phy_register_fixed_link(). + */ + phy_device_free(phydev); + phy_device_free(phydev); + } + } + + of_node_put(slave_data->phy_node); + + i++; + if (i == data->slaves) + break; + } + of_platform_depopulate(&pdev->dev); } From a7fe9d466f6a33558a38c7ca9d58bcc83512d577 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:40:02 +0100 Subject: [PATCH 375/503] net: ethernet: ti: cpsw: fix secondary-emac probe error path Make sure to deregister the primary device in case the secondary emac fails to probe. kernel BUG at /home/johan/work/omicron/src/linux/net/core/dev.c:7743! ... [] (free_netdev) from [] (cpsw_probe+0x9cc/0xe50) [] (cpsw_probe) from [] (platform_drv_probe+0x5c/0xc0) Fixes: d9ba8f9e6298 ("driver: net: ethernet: cpsw: dual emac interface implementation") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index c3b78bc4fe58..11b2daef3158 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2852,7 +2852,7 @@ static int cpsw_probe(struct platform_device *pdev) ret = cpsw_probe_dual_emac(priv); if (ret) { cpsw_err(priv, probe, "error probe slave 2 emac interface\n"); - goto clean_ale_ret; + goto clean_unregister_netdev_ret; } } @@ -2860,6 +2860,8 @@ static int cpsw_probe(struct platform_device *pdev) return 0; +clean_unregister_netdev_ret: + unregister_netdev(ndev); clean_ale_ret: cpsw_ale_destroy(cpsw->ale); clean_dma_ret: From 3420ea88509f9d585b39f36e737022faf0286d9a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:40:03 +0100 Subject: [PATCH 376/503] net: ethernet: ti: cpsw: add missing sanity check Make sure to check for allocation failures before dereferencing a NULL-pointer during probe. Fixes: 649a1688c960 ("net: ethernet: ti: cpsw: create common struct to hold shared driver data") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 11b2daef3158..1387299030e4 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2588,6 +2588,9 @@ static int cpsw_probe(struct platform_device *pdev) int irq; cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL); + if (!cpsw) + return -ENOMEM; + cpsw->dev = &pdev->dev; ndev = alloc_etherdev_mq(sizeof(struct cpsw_priv), CPSW_MAX_QUEUES); From 23a09873221c02106cf767a86743a55873f0d05b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:40:04 +0100 Subject: [PATCH 377/503] net: ethernet: ti: cpsw: fix fixed-link phy probe deferral Make sure to propagate errors from of_phy_register_fixed_link() which can fail with -EPROBE_DEFER. Fixes: 1f71e8c96fc6 ("drivers: net: cpsw: Add support for fixed-link PHY") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 1387299030e4..58947aae31c7 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2375,8 +2375,11 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, * to the PHY is the Ethernet MAC DT node. */ ret = of_phy_register_fixed_link(slave_node); - if (ret) + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "failed to register fixed-link phy: %d\n", ret); return ret; + } slave_data->phy_node = of_node_get(slave_node); } else if (parp) { u32 phyid; @@ -2637,11 +2640,10 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_runtime_disable_ret; } - if (cpsw_probe_dt(&cpsw->data, pdev)) { - dev_err(&pdev->dev, "cpsw: platform data missing\n"); - ret = -ENODEV; + ret = cpsw_probe_dt(&cpsw->data, pdev); + if (ret) goto clean_dt_ret; - } + data = &cpsw->data; cpsw->rx_ch_num = 1; cpsw->tx_ch_num = 1; From 06a77b07e3b44aea2b3c0e64de420ea2cfdcbaa9 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 17 Nov 2016 15:55:26 -0800 Subject: [PATCH 378/503] af_unix: conditionally use freezable blocking calls in read Commit 2b15af6f95 ("af_unix: use freezable blocking calls in read") converts schedule_timeout() to its freezable version, it was probably correct at that time, but later, commit 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets") breaks the strong requirement for a freezable sleep, according to commit 0f9548ca1091: We shouldn't try_to_freeze if locks are held. Holding a lock can cause a deadlock if the lock is later acquired in the suspend or hibernate path (e.g. by dpm). Holding a lock can also cause a deadlock in the case of cgroup_freezer if a lock is held inside a frozen cgroup that is later acquired by a process outside that group. The pipe_lock is still held at that point. So use freezable version only for the recvmsg call path, avoid impact for Android. Fixes: 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets") Reported-by: Dmitry Vyukov Cc: Tejun Heo Cc: Colin Cross Cc: Rafael J. Wysocki Cc: Hannes Frederic Sowa Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/unix/af_unix.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5d1c14a2f268..2358f2690ec5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2199,7 +2199,8 @@ out: * Sleep until more data has arrived. But check for races.. */ static long unix_stream_data_wait(struct sock *sk, long timeo, - struct sk_buff *last, unsigned int last_len) + struct sk_buff *last, unsigned int last_len, + bool freezable) { struct sk_buff *tail; DEFINE_WAIT(wait); @@ -2220,7 +2221,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); unix_state_unlock(sk); - timeo = freezable_schedule_timeout(timeo); + if (freezable) + timeo = freezable_schedule_timeout(timeo); + else + timeo = schedule_timeout(timeo); unix_state_lock(sk); if (sock_flag(sk, SOCK_DEAD)) @@ -2250,7 +2254,8 @@ struct unix_stream_read_state { unsigned int splice_flags; }; -static int unix_stream_read_generic(struct unix_stream_read_state *state) +static int unix_stream_read_generic(struct unix_stream_read_state *state, + bool freezable) { struct scm_cookie scm; struct socket *sock = state->socket; @@ -2330,7 +2335,7 @@ again: mutex_unlock(&u->iolock); timeo = unix_stream_data_wait(sk, timeo, last, - last_len); + last_len, freezable); if (signal_pending(current)) { err = sock_intr_errno(timeo); @@ -2472,7 +2477,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, .flags = flags }; - return unix_stream_read_generic(&state); + return unix_stream_read_generic(&state, true); } static int unix_stream_splice_actor(struct sk_buff *skb, @@ -2503,7 +2508,7 @@ static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, flags & SPLICE_F_NONBLOCK) state.flags = MSG_DONTWAIT; - return unix_stream_read_generic(&state); + return unix_stream_read_generic(&state, false); } static int unix_shutdown(struct socket *sock, int mode) From 0f5258cd91e9d78a1ee30696314bec3c33321a93 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 18 Nov 2016 09:41:46 +0000 Subject: [PATCH 379/503] netns: fix get_net_ns_by_fd(int pid) typo The argument to get_net_ns_by_fd() is a /proc/$PID/ns/net file descriptor not a pid. Fix the typo. Signed-off-by: Stefan Hajnoczi Acked-by: Rami Rosen Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fc4f757107df..0940598c002f 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -170,7 +170,7 @@ static inline struct net *copy_net_ns(unsigned long flags, extern struct list_head net_namespace_list; struct net *get_net_ns_by_pid(pid_t pid); -struct net *get_net_ns_by_fd(int pid); +struct net *get_net_ns_by_fd(int fd); #ifdef CONFIG_SYSCTL void ipx_register_sysctl(void); From f82ef3e10a870acc19fa04f80ef5877eaa26f41e Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 18 Nov 2016 15:50:39 +0100 Subject: [PATCH 380/503] rtnetlink: fix FDB size computation Add missing NDA_VLAN attribute's size. Fixes: 1e53d5bb8878 ("net: Pass VLAN ID to rtnl_fdb_notify.") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a6529c55ffb7..2b9d7d08ed4d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2852,7 +2852,10 @@ nla_put_failure: static inline size_t rtnl_fdb_nlmsg_size(void) { - return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN); + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(ETH_ALEN) + /* NDA_LLADDR */ + nla_total_size(sizeof(u16)) + /* NDA_VLAN */ + 0; } static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type, From c88c545bf3202ca2cdb45df93eb40e3bcdbb3742 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 28 Oct 2016 10:12:40 -0700 Subject: [PATCH 381/503] sparc64: Add FORCE_MAX_ZONEORDER and default to 13 This change allows ATU (new IOMMU) in SPARC systems to request large (32M) contiguous memory during boot for creating IOTSB backing store. Signed-off-by: Dave Kleikamp Signed-off-by: Tushar Dave Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index b23c76b42d6e..5202eb4ba2db 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -89,6 +89,10 @@ config ARCH_DEFCONFIG config ARCH_PROC_KCORE_TEXT def_bool y +config ARCH_ATU + bool + default y if SPARC64 + config IOMMU_HELPER bool default y if SPARC64 @@ -304,6 +308,20 @@ config ARCH_SPARSEMEM_ENABLE config ARCH_SPARSEMEM_DEFAULT def_bool y if SPARC64 +config FORCE_MAX_ZONEORDER + int "Maximum zone order" + default "13" + help + The kernel memory allocator divides physically contiguous memory + blocks into "zones", where each zone is a power of two number of + pages. This option selects the largest power of two that the kernel + keeps in the memory allocator. If you need to allocate very large + blocks of physically contiguous memory, then you may need to + increase this value. + + This config option is actually maximum order plus one. For example, + a value of 13 means that the largest free memory block is 2^12 pages. + source "mm/Kconfig" if SPARC64 From f0248c1524fae654e9746e6843b9657fb3917387 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Fri, 28 Oct 2016 10:12:41 -0700 Subject: [PATCH 382/503] sparc64: Add ATU (new IOMMU) support ATU (Address Translation Unit) is a new IOMMU in SPARC supported with Hypervisor IOMMU v2 APIs. Current SPARC IOMMU supports only 32bit address ranges and one TSB per PCIe root complex that has a 2GB per root complex DVMA space limit. The limit has become a scalability bottleneck nowadays that a typical 10G/40G NIC can consume 300MB-500MB DVMA space per instance. When DVMA resource is exhausted, devices will not be usable since the driver can't allocate DVMA. ATU removes bottleneck by allowing guest os to create IOTSB of size 32G (or more) with 64bit address ranges available in ATU HW. 32G is more than enough DVMA space to be shared by all PCIe devices under root complex contrast to 2G space provided by legacy IOMMU. ATU allows PCIe devices to use 64bit DMA addressing. Devices which choose to use 32bit DMA mask will continue to work with the existing legacy IOMMU. Signed-off-by: Tushar Dave Reviewed-by: chris hyser Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- arch/sparc/include/asm/hypervisor.h | 337 ++++++++++++++++++++++++++++ arch/sparc/include/asm/iommu_64.h | 26 +++ arch/sparc/kernel/hvapi.c | 1 + arch/sparc/kernel/pci_sun4v.c | 140 ++++++++++++ arch/sparc/kernel/pci_sun4v.h | 7 + arch/sparc/kernel/pci_sun4v_asm.S | 18 ++ 6 files changed, 529 insertions(+) diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 666d5ba230d2..7b15df8be008 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2335,6 +2335,342 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle, */ #define HV_FAST_PCI_MSG_SETVALID 0xd3 +/* PCI IOMMU v2 definitions and services + * + * While the PCI IO definitions above is valid IOMMU v2 adds new PCI IO + * definitions and services. + * + * CTE Clump Table Entry. First level table entry in the ATU. + * + * pci_device_list + * A 32-bit aligned list of pci_devices. + * + * pci_device_listp + * real address of a pci_device_list. 32-bit aligned. + * + * iotte IOMMU translation table entry. + * + * iotte_attributes + * IO Attributes for IOMMU v2 mappings. In addition to + * read, write IOMMU v2 supports relax ordering + * + * io_page_list A 64-bit aligned list of real addresses. Each real + * address in an io_page_list must be properly aligned + * to the pagesize of the given IOTSB. + * + * io_page_list_p Real address of an io_page_list, 64-bit aligned. + * + * IOTSB IO Translation Storage Buffer. An aligned table of + * IOTTEs. Each IOTSB has a pagesize, table size, and + * virtual address associated with it that must match + * a pagesize and table size supported by the un-derlying + * hardware implementation. The alignment requirements + * for an IOTSB depend on the pagesize used for that IOTSB. + * Each IOTTE in an IOTSB maps one pagesize-sized page. + * The size of the IOTSB dictates how large of a virtual + * address space the IOTSB is capable of mapping. + * + * iotsb_handle An opaque identifier for an IOTSB. A devhandle plus + * iotsb_handle represents a binding of an IOTSB to a + * PCI root complex. + * + * iotsb_index Zero-based IOTTE number within an IOTSB. + */ + +/* pci_iotsb_conf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_CONF + * ARG0: devhandle + * ARG1: r_addr + * ARG2: size + * ARG3: pagesize + * ARG4: iova + * RET0: status + * RET1: iotsb_handle + * ERRORS: EINVAL Invalid devhandle, size, iova, or pagesize + * EBADALIGN r_addr is not properly aligned + * ENORADDR r_addr is not a valid real address + * ETOOMANY No further IOTSBs may be configured + * EBUSY Duplicate devhandle, raddir, iova combination + * + * Create an IOTSB suitable for the PCI root complex identified by devhandle, + * for the DMA virtual address defined by the argument iova. + * + * r_addr is the properly aligned base address of the IOTSB and size is the + * IOTSB (table) size in bytes.The IOTSB is required to be zeroed prior to + * being configured. If it contains any values other than zeros then the + * behavior is undefined. + * + * pagesize is the size of each page in the IOTSB. Note that the combination of + * size (table size) and pagesize must be valid. + * + * virt is the DMA virtual address this IOTSB will map. + * + * If successful, the opaque 64-bit handle iotsb_handle is returned in ret1. + * Once configured, privileged access to the IOTSB memory is prohibited and + * creates undefined behavior. The only permitted access is indirect via these + * services. + */ +#define HV_FAST_PCI_IOTSB_CONF 0x190 + +/* pci_iotsb_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_INFO + * ARG0: devhandle + * ARG1: iotsb_handle + * RET0: status + * RET1: r_addr + * RET2: size + * RET3: pagesize + * RET4: iova + * RET5: #bound + * ERRORS: EINVAL Invalid devhandle or iotsb_handle + * + * This service returns configuration information about an IOTSB previously + * created with pci_iotsb_conf. + * + * iotsb_handle value 0 may be used with this service to inquire about the + * legacy IOTSB that may or may not exist. If the service succeeds, the return + * values describe the legacy IOTSB and I/O virtual addresses mapped by that + * table. However, the table base address r_addr may contain the value -1 which + * indicates a memory range that cannot be accessed or be reclaimed. + * + * The return value #bound contains the number of PCI devices that iotsb_handle + * is currently bound to. + */ +#define HV_FAST_PCI_IOTSB_INFO 0x191 + +/* pci_iotsb_unconf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_UNCONF + * ARG0: devhandle + * ARG1: iotsb_handle + * RET0: status + * ERRORS: EINVAL Invalid devhandle or iotsb_handle + * EBUSY The IOTSB is bound and may not be unconfigured + * + * This service unconfigures the IOTSB identified by the devhandle and + * iotsb_handle arguments, previously created with pci_iotsb_conf. + * The IOTSB must not be currently bound to any device or the service will fail + * + * If the call succeeds, iotsb_handle is no longer valid. + */ +#define HV_FAST_PCI_IOTSB_UNCONF 0x192 + +/* pci_iotsb_bind() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_BIND + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: pci_device + * RET0: status + * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or pci_device + * EBUSY A PCI function is already bound to an IOTSB at the same + * address range as specified by devhandle, iotsb_handle. + * + * This service binds the PCI function specified by the argument pci_device to + * the IOTSB specified by the arguments devhandle and iotsb_handle. + * + * The PCI device function is bound to the specified IOTSB with the IOVA range + * specified when the IOTSB was configured via pci_iotsb_conf. If the function + * is already bound then it is unbound first. + */ +#define HV_FAST_PCI_IOTSB_BIND 0x193 + +/* pci_iotsb_unbind() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_UNBIND + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: pci_device + * RET0: status + * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or pci_device + * ENOMAP The PCI function was not bound to the specified IOTSB + * + * This service unbinds the PCI device specified by the argument pci_device + * from the IOTSB identified * by the arguments devhandle and iotsb_handle. + * + * If the PCI device is not bound to the specified IOTSB then this service will + * fail with status ENOMAP + */ +#define HV_FAST_PCI_IOTSB_UNBIND 0x194 + +/* pci_iotsb_get_binding() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_GET_BINDING + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: iova + * RET0: status + * RET1: iotsb_handle + * ERRORS: EINVAL Invalid devhandle, pci_device, or iova + * ENOMAP The PCI function is not bound to an IOTSB at iova + * + * This service returns the IOTSB binding, iotsb_handle, for a given pci_device + * and DMA virtual address, iova. + * + * iova must be the base address of a DMA virtual address range as defined by + * the iommu-address-ranges property in the root complex device node defined + * by the argument devhandle. + */ +#define HV_FAST_PCI_IOTSB_GET_BINDING 0x195 + +/* pci_iotsb_map() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_MAP + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: index_count + * ARG3: iotte_attributes + * ARG4: io_page_list_p + * RET0: status + * RET1: #mapped + * ERRORS: EINVAL Invalid devhandle, iotsb_handle, #iottes, + * iotsb_index or iotte_attributes + * EBADALIGN Improperly aligned io_page_list_p or I/O page + * address in the I/O page list. + * ENORADDR Invalid io_page_list_p or I/O page address in + * the I/O page list. + * + * This service creates and flushes mappings in the IOTSB defined by the + * arguments devhandle, iotsb. + * + * The index_count argument consists of two fields. Bits 63:48 contain #iotte + * and bits 47:0 contain iotsb_index + * + * The first mapping is created in the IOTSB index specified by iotsb_index. + * Subsequent mappings are created at iotsb_index+1 and so on. + * + * The attributes of each mapping are defined by the argument iotte_attributes. + * + * The io_page_list_p specifies the real address of the 64-bit-aligned list of + * #iottes I/O page addresses. Each page address must be a properly aligned + * real address of a page to be mapped in the IOTSB. The first entry in the I/O + * page list contains the real address of the first page, the 2nd entry for the + * 2nd page, and so on. + * + * #iottes must be greater than zero. + * + * The return value #mapped is the actual number of mappings created, which may + * be less than or equal to the argument #iottes. If the function returns + * successfully with a #mapped value less than the requested #iottes then the + * caller should continue to invoke the service with updated iotsb_index, + * #iottes, and io_page_list_p arguments until all pages are mapped. + * + * This service must not be used to demap a mapping. In other words, all + * mappings must be valid and have one or both of the RW attribute bits set. + * + * Note: + * It is implementation-defined whether I/O page real address validity checking + * is done at time mappings are established or deferred until they are + * accessed. + */ +#define HV_FAST_PCI_IOTSB_MAP 0x196 + +/* pci_iotsb_map_one() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_MAP_ONE + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: iotsb_index + * ARG3: iotte_attributes + * ARG4: r_addr + * RET0: status + * ERRORS: EINVAL Invalid devhandle,iotsb_handle, iotsb_index + * or iotte_attributes + * EBADALIGN Improperly aligned r_addr + * ENORADDR Invalid r_addr + * + * This service creates and flushes a single mapping in the IOTSB defined by the + * arguments devhandle, iotsb. + * + * The mapping for the page at r_addr is created at the IOTSB index specified by + * iotsb_index with the attributes iotte_attributes. + * + * This service must not be used to demap a mapping. In other words, the mapping + * must be valid and have one or both of the RW attribute bits set. + * + * Note: + * It is implementation-defined whether I/O page real address validity checking + * is done at time mappings are established or deferred until they are + * accessed. + */ +#define HV_FAST_PCI_IOTSB_MAP_ONE 0x197 + +/* pci_iotsb_demap() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_DEMAP + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: iotsb_index + * ARG3: #iottes + * RET0: status + * RET1: #unmapped + * ERRORS: EINVAL Invalid devhandle, iotsb_handle, iotsb_index or #iottes + * + * This service unmaps and flushes up to #iottes mappings starting at index + * iotsb_index from the IOTSB defined by the arguments devhandle, iotsb. + * + * #iottes must be greater than zero. + * + * The actual number of IOTTEs unmapped is returned in #unmapped and may be less + * than or equal to the requested number of IOTTEs, #iottes. + * + * If #unmapped is less than #iottes, the caller should continue to invoke this + * service with updated iotsb_index and #iottes arguments until all pages are + * demapped. + */ +#define HV_FAST_PCI_IOTSB_DEMAP 0x198 + +/* pci_iotsb_getmap() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_GETMAP + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: iotsb_index + * RET0: status + * RET1: r_addr + * RET2: iotte_attributes + * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or iotsb_index + * ENOMAP No mapping was found + * + * This service returns the mapping specified by index iotsb_index from the + * IOTSB defined by the arguments devhandle, iotsb. + * + * Upon success, the real address of the mapping shall be returned in + * r_addr and thethe IOTTE mapping attributes shall be returned in + * iotte_attributes. + * + * The return value iotte_attributes may not include optional features used in + * the call to create the mapping. + */ +#define HV_FAST_PCI_IOTSB_GETMAP 0x199 + +/* pci_iotsb_sync_mappings() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_SYNC_MAPPINGS + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: iotsb_index + * ARG3: #iottes + * RET0: status + * RET1: #synced + * ERROS: EINVAL Invalid devhandle, iotsb_handle, iotsb_index, or #iottes + * + * This service synchronizes #iottes mappings starting at index iotsb_index in + * the IOTSB defined by the arguments devhandle, iotsb. + * + * #iottes must be greater than zero. + * + * The actual number of IOTTEs synchronized is returned in #synced, which may + * be less than or equal to the requested number, #iottes. + * + * Upon a successful return, #synced is less than #iottes, the caller should + * continue to invoke this service with updated iotsb_index and #iottes + * arguments until all pages are synchronized. + */ +#define HV_FAST_PCI_IOTSB_SYNC_MAPPINGS 0x19a + /* Logical Domain Channel services. */ #define LDC_CHANNEL_DOWN 0 @@ -2993,6 +3329,7 @@ unsigned long sun4v_m7_set_perfreg(unsigned long reg_num, #define HV_GRP_SDIO 0x0108 #define HV_GRP_SDIO_ERR 0x0109 #define HV_GRP_REBOOT_DATA 0x0110 +#define HV_GRP_ATU 0x0111 #define HV_GRP_M7_PERF 0x0114 #define HV_GRP_NIAG_PERF 0x0200 #define HV_GRP_FIRE_PERF 0x0201 diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h index cd0d69fa7592..93daa5965b3d 100644 --- a/arch/sparc/include/asm/iommu_64.h +++ b/arch/sparc/include/asm/iommu_64.h @@ -24,8 +24,34 @@ struct iommu_arena { unsigned int limit; }; +#define ATU_64_SPACE_SIZE 0x800000000 /* 32G */ + +/* Data structures for SPARC ATU architecture */ +struct atu_iotsb { + void *table; /* IOTSB table base virtual addr*/ + u64 ra; /* IOTSB table real addr */ + u64 dvma_size; /* ranges[3].size or OS slected 32G size */ + u64 dvma_base; /* ranges[3].base */ + u64 table_size; /* IOTSB table size */ + u64 page_size; /* IO PAGE size for IOTSB */ + u32 iotsb_num; /* tsbnum is same as iotsb_handle */ +}; + +struct atu_ranges { + u64 base; + u64 size; +}; + +struct atu { + struct atu_ranges *ranges; + struct atu_iotsb *iotsb; + u64 base; + u64 size; +}; + struct iommu { struct iommu_map_table tbl; + struct atu *atu; spinlock_t lock; u32 dma_addr_mask; iopte_t *page_table; diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 662500fa555f..267731234ce8 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -39,6 +39,7 @@ static struct api_info api_table[] = { { .group = HV_GRP_SDIO, }, { .group = HV_GRP_SDIO_ERR, }, { .group = HV_GRP_REBOOT_DATA, }, + { .group = HV_GRP_ATU, .flags = FLAG_PRE_API }, { .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API }, { .group = HV_GRP_FIRE_PERF, }, { .group = HV_GRP_N2_CPU, }, diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index db57d8acdc01..2afb86c73da9 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -44,6 +44,9 @@ static struct vpci_version vpci_versions[] = { { .major = 1, .minor = 1 }, }; +static unsigned long vatu_major = 1; +static unsigned long vatu_minor = 1; + #define PGLIST_NENTS (PAGE_SIZE / sizeof(u64)) struct iommu_batch { @@ -581,6 +584,107 @@ static unsigned long probe_existing_entries(struct pci_pbm_info *pbm, return cnt; } +static int pci_sun4v_atu_alloc_iotsb(struct pci_pbm_info *pbm) +{ + struct atu *atu = pbm->iommu->atu; + struct atu_iotsb *iotsb; + void *table; + u64 table_size; + u64 iotsb_num; + unsigned long order; + unsigned long err; + + iotsb = kzalloc(sizeof(*iotsb), GFP_KERNEL); + if (!iotsb) { + err = -ENOMEM; + goto out_err; + } + atu->iotsb = iotsb; + + /* calculate size of IOTSB */ + table_size = (atu->size / IO_PAGE_SIZE) * 8; + order = get_order(table_size); + table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!table) { + err = -ENOMEM; + goto table_failed; + } + iotsb->table = table; + iotsb->ra = __pa(table); + iotsb->dvma_size = atu->size; + iotsb->dvma_base = atu->base; + iotsb->table_size = table_size; + iotsb->page_size = IO_PAGE_SIZE; + + /* configure and register IOTSB with HV */ + err = pci_sun4v_iotsb_conf(pbm->devhandle, + iotsb->ra, + iotsb->table_size, + iotsb->page_size, + iotsb->dvma_base, + &iotsb_num); + if (err) { + pr_err(PFX "pci_iotsb_conf failed error: %ld\n", err); + goto iotsb_conf_failed; + } + iotsb->iotsb_num = iotsb_num; + + return 0; + +iotsb_conf_failed: + free_pages((unsigned long)table, order); +table_failed: + kfree(iotsb); +out_err: + return err; +} + +static int pci_sun4v_atu_init(struct pci_pbm_info *pbm) +{ + struct atu *atu = pbm->iommu->atu; + unsigned long err; + const u64 *ranges; + const u32 *page_size; + int len; + + ranges = of_get_property(pbm->op->dev.of_node, "iommu-address-ranges", + &len); + if (!ranges) { + pr_err(PFX "No iommu-address-ranges\n"); + return -EINVAL; + } + + page_size = of_get_property(pbm->op->dev.of_node, "iommu-pagesizes", + NULL); + if (!page_size) { + pr_err(PFX "No iommu-pagesizes\n"); + return -EINVAL; + } + + /* There are 4 iommu-address-ranges supported. Each range is pair of + * {base, size}. The ranges[0] and ranges[1] are 32bit address space + * while ranges[2] and ranges[3] are 64bit space. We want to use 64bit + * address ranges to support 64bit addressing. Because 'size' for + * address ranges[2] and ranges[3] are same we can select either of + * ranges[2] or ranges[3] for mapping. However due to 'size' is too + * large for OS to allocate IOTSB we are using fix size 32G + * (ATU_64_SPACE_SIZE) which is more than enough for all PCIe devices + * to share. + */ + atu->ranges = (struct atu_ranges *)ranges; + atu->base = atu->ranges[3].base; + atu->size = ATU_64_SPACE_SIZE; + + /* Create IOTSB */ + err = pci_sun4v_atu_alloc_iotsb(pbm); + if (err) { + pr_err(PFX "Error creating ATU IOTSB\n"); + return err; + } + + return 0; +} + static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm) { static const u32 vdma_default[] = { 0x80000000, 0x80000000 }; @@ -918,6 +1022,18 @@ static int pci_sun4v_pbm_init(struct pci_pbm_info *pbm, pci_sun4v_scan_bus(pbm, &op->dev); + /* if atu_init fails its not complete failure. + * we can still continue using legacy iommu. + */ + if (pbm->iommu->atu) { + err = pci_sun4v_atu_init(pbm); + if (err) { + kfree(pbm->iommu->atu); + pbm->iommu->atu = NULL; + pr_err(PFX "ATU init failed, err=%d\n", err); + } + } + pbm->next = pci_pbm_root; pci_pbm_root = pbm; @@ -931,8 +1047,10 @@ static int pci_sun4v_probe(struct platform_device *op) struct pci_pbm_info *pbm; struct device_node *dp; struct iommu *iommu; + struct atu *atu; u32 devhandle; int i, err = -ENODEV; + static bool hv_atu = true; dp = op->dev.of_node; @@ -954,6 +1072,19 @@ static int pci_sun4v_probe(struct platform_device *op) pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n", vpci_major, vpci_minor); + err = sun4v_hvapi_register(HV_GRP_ATU, vatu_major, &vatu_minor); + if (err) { + /* don't return an error if we fail to register the + * ATU group, but ATU hcalls won't be available. + */ + hv_atu = false; + pr_err(PFX "Could not register hvapi ATU err=%d\n", + err); + } else { + pr_info(PFX "Registered hvapi ATU major[%lu] minor[%lu]\n", + vatu_major, vatu_minor); + } + dma_ops = &sun4v_dma_ops; } @@ -991,6 +1122,14 @@ static int pci_sun4v_probe(struct platform_device *op) } pbm->iommu = iommu; + iommu->atu = NULL; + if (hv_atu) { + atu = kzalloc(sizeof(*atu), GFP_KERNEL); + if (!atu) + pr_err(PFX "Could not allocate atu\n"); + else + iommu->atu = atu; + } err = pci_sun4v_pbm_init(pbm, op, devhandle); if (err) @@ -1001,6 +1140,7 @@ static int pci_sun4v_probe(struct platform_device *op) return 0; out_free_iommu: + kfree(iommu->atu); kfree(pbm->iommu); out_free_controller: diff --git a/arch/sparc/kernel/pci_sun4v.h b/arch/sparc/kernel/pci_sun4v.h index 5642212390b2..0ef6d1c456e7 100644 --- a/arch/sparc/kernel/pci_sun4v.h +++ b/arch/sparc/kernel/pci_sun4v.h @@ -89,4 +89,11 @@ unsigned long pci_sun4v_msg_setvalid(unsigned long devhandle, unsigned long msinum, unsigned long valid); +/* Sun4v HV IOMMU v2 APIs */ +unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle, + unsigned long ra, + unsigned long table_size, + unsigned long page_size, + unsigned long dvma_base, + u64 *iotsb_num); #endif /* !(_PCI_SUN4V_H) */ diff --git a/arch/sparc/kernel/pci_sun4v_asm.S b/arch/sparc/kernel/pci_sun4v_asm.S index e606d46c6815..fd94d0e4a41d 100644 --- a/arch/sparc/kernel/pci_sun4v_asm.S +++ b/arch/sparc/kernel/pci_sun4v_asm.S @@ -360,3 +360,21 @@ ENTRY(pci_sun4v_msg_setvalid) mov %o0, %o0 ENDPROC(pci_sun4v_msg_setvalid) + /* + * %o0: devhandle + * %o1: r_addr + * %o2: size + * %o3: pagesize + * %o4: virt + * %o5: &iotsb_num/&iotsb_handle + * + * returns %o0: status + * %o1: iotsb_num/iotsb_handle + */ +ENTRY(pci_sun4v_iotsb_conf) + mov %o5, %g1 + mov HV_FAST_PCI_IOTSB_CONF, %o5 + ta HV_FAST_TRAP + retl + stx %o1, [%g1] +ENDPROC(pci_sun4v_iotsb_conf) From 31f077dc7dffd4a444932a9fe7fe84d9c7b90b73 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Fri, 28 Oct 2016 10:12:42 -0700 Subject: [PATCH 383/503] sparc64: Initialize iommu_map_table and iommu_pool Like legacy IOMMU, use common iommu_map_table and iommu_pool for ATU. This change initializes iommu_map_table and iommu_pool for ATU. Signed-off-by: Tushar Dave Reviewed-by: chris hyser Reviewed-by: Sowmini Varadhan Signed-off-by: David S. Miller --- arch/sparc/include/asm/iommu_64.h | 2 ++ arch/sparc/kernel/pci_sun4v.c | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h index 93daa5965b3d..f24f356f2503 100644 --- a/arch/sparc/include/asm/iommu_64.h +++ b/arch/sparc/include/asm/iommu_64.h @@ -45,8 +45,10 @@ struct atu_ranges { struct atu { struct atu_ranges *ranges; struct atu_iotsb *iotsb; + struct iommu_map_table tbl; u64 base; u64 size; + u64 dma_addr_mask; }; struct iommu { diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 2afb86c73da9..242477cbfdf2 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -644,6 +644,8 @@ static int pci_sun4v_atu_init(struct pci_pbm_info *pbm) struct atu *atu = pbm->iommu->atu; unsigned long err; const u64 *ranges; + u64 map_size, num_iotte; + u64 dma_mask; const u32 *page_size; int len; @@ -682,6 +684,23 @@ static int pci_sun4v_atu_init(struct pci_pbm_info *pbm) return err; } + /* Create ATU iommu map. + * One bit represents one iotte in IOTSB table. + */ + dma_mask = (roundup_pow_of_two(atu->size) - 1UL); + num_iotte = atu->size / IO_PAGE_SIZE; + map_size = num_iotte / 8; + atu->tbl.table_map_base = atu->base; + atu->dma_addr_mask = dma_mask; + atu->tbl.map = kzalloc(map_size, GFP_KERNEL); + if (!atu->tbl.map) + return -ENOMEM; + + iommu_tbl_pool_init(&atu->tbl, num_iotte, IO_PAGE_SHIFT, + NULL, false /* no large_pool */, + 0 /* default npools */, + false /* want span boundary checking */); + return 0; } From 5116ab4eabed575b7cca61a6e89b7d6fb7440970 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Fri, 28 Oct 2016 10:12:43 -0700 Subject: [PATCH 384/503] sparc64: Bind PCIe devices to use IOMMU v2 service In order to use Hypervisor (HV) IOMMU v2 API for map/demap, each PCIe device has to be bound to IOTSB using HV API pci_iotsb_bind(). Signed-off-by: Tushar Dave Reviewed-by: chris hyser Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- arch/sparc/kernel/pci_sun4v.c | 43 +++++++++++++++++++++++++++++++ arch/sparc/kernel/pci_sun4v.h | 3 +++ arch/sparc/kernel/pci_sun4v_asm.S | 14 ++++++++++ 3 files changed, 60 insertions(+) diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 242477cbfdf2..d4208aa93383 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -216,6 +216,43 @@ range_alloc_fail: return NULL; } +unsigned long dma_4v_iotsb_bind(unsigned long devhandle, + unsigned long iotsb_num, + struct pci_bus *bus_dev) +{ + struct pci_dev *pdev; + unsigned long err; + unsigned int bus; + unsigned int device; + unsigned int fun; + + list_for_each_entry(pdev, &bus_dev->devices, bus_list) { + if (pdev->subordinate) { + /* No need to bind pci bridge */ + dma_4v_iotsb_bind(devhandle, iotsb_num, + pdev->subordinate); + } else { + bus = bus_dev->number; + device = PCI_SLOT(pdev->devfn); + fun = PCI_FUNC(pdev->devfn); + err = pci_sun4v_iotsb_bind(devhandle, iotsb_num, + HV_PCI_DEVICE_BUILD(bus, + device, + fun)); + + /* If bind fails for one device it is going to fail + * for rest of the devices because we are sharing + * IOTSB. So in case of failure simply return with + * error. + */ + if (err) + return err; + } + } + + return 0; +} + static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry, unsigned long npages) { @@ -629,6 +666,12 @@ static int pci_sun4v_atu_alloc_iotsb(struct pci_pbm_info *pbm) } iotsb->iotsb_num = iotsb_num; + err = dma_4v_iotsb_bind(pbm->devhandle, iotsb_num, pbm->pci_bus); + if (err) { + pr_err(PFX "pci_iotsb_bind failed error: %ld\n", err); + goto iotsb_conf_failed; + } + return 0; iotsb_conf_failed: diff --git a/arch/sparc/kernel/pci_sun4v.h b/arch/sparc/kernel/pci_sun4v.h index 0ef6d1c456e7..1019e0fe6e9d 100644 --- a/arch/sparc/kernel/pci_sun4v.h +++ b/arch/sparc/kernel/pci_sun4v.h @@ -96,4 +96,7 @@ unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle, unsigned long page_size, unsigned long dvma_base, u64 *iotsb_num); +unsigned long pci_sun4v_iotsb_bind(unsigned long devhandle, + unsigned long iotsb_num, + unsigned int pci_device); #endif /* !(_PCI_SUN4V_H) */ diff --git a/arch/sparc/kernel/pci_sun4v_asm.S b/arch/sparc/kernel/pci_sun4v_asm.S index fd94d0e4a41d..22024a96c317 100644 --- a/arch/sparc/kernel/pci_sun4v_asm.S +++ b/arch/sparc/kernel/pci_sun4v_asm.S @@ -378,3 +378,17 @@ ENTRY(pci_sun4v_iotsb_conf) retl stx %o1, [%g1] ENDPROC(pci_sun4v_iotsb_conf) + + /* + * %o0: devhandle + * %o1: iotsb_num/iotsb_handle + * %o2: pci_device + * + * returns %o0: status + */ +ENTRY(pci_sun4v_iotsb_bind) + mov HV_FAST_PCI_IOTSB_BIND, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(pci_sun4v_iotsb_bind) From f08978b0fdbf37d3c91efb60a20bdee3ba8f59c6 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Fri, 28 Oct 2016 10:12:44 -0700 Subject: [PATCH 385/503] sparc64: Enable sun4v dma ops to use IOMMU v2 APIs Add Hypervisor IOMMU v2 APIs pci_iotsb_map(), pci_iotsb_demap() and enable sun4v dma ops to use IOMMU v2 API for all PCIe devices with 64bit DMA mask. Signed-off-by: Tushar Dave Reviewed-by: chris hyser Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- arch/sparc/include/asm/hypervisor.h | 6 + arch/sparc/kernel/pci_sun4v.c | 216 ++++++++++++++++++++-------- arch/sparc/kernel/pci_sun4v.h | 11 ++ arch/sparc/kernel/pci_sun4v_asm.S | 36 +++++ 4 files changed, 211 insertions(+), 58 deletions(-) diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 7b15df8be008..73cb8978df58 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2377,6 +2377,12 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle, * iotsb_index Zero-based IOTTE number within an IOTSB. */ +/* The index_count argument consists of two fields: + * bits 63:48 #iottes and bits 47:0 iotsb_index + */ +#define HV_PCI_IOTSB_INDEX_COUNT(__iottes, __iotsb_index) \ + (((u64)(__iottes) << 48UL) | ((u64)(__iotsb_index))) + /* pci_iotsb_conf() * TRAP: HV_FAST_TRAP * FUNCTION: HV_FAST_PCI_IOTSB_CONF diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index d4208aa93383..06981cc716b6 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -72,34 +72,57 @@ static inline void iommu_batch_start(struct device *dev, unsigned long prot, uns } /* Interrupts must be disabled. */ -static long iommu_batch_flush(struct iommu_batch *p) +static long iommu_batch_flush(struct iommu_batch *p, u64 mask) { struct pci_pbm_info *pbm = p->dev->archdata.host_controller; + u64 *pglist = p->pglist; + u64 index_count; unsigned long devhandle = pbm->devhandle; unsigned long prot = p->prot; unsigned long entry = p->entry; - u64 *pglist = p->pglist; unsigned long npages = p->npages; + unsigned long iotsb_num; + unsigned long ret; + long num; /* VPCI maj=1, min=[0,1] only supports read and write */ if (vpci_major < 2) prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE); while (npages != 0) { - long num; - - num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry), - npages, prot, __pa(pglist)); - if (unlikely(num < 0)) { - if (printk_ratelimit()) - printk("iommu_batch_flush: IOMMU map of " - "[%08lx:%08llx:%lx:%lx:%lx] failed with " - "status %ld\n", - devhandle, HV_PCI_TSBID(0, entry), - npages, prot, __pa(pglist), num); - return -1; + if (mask <= DMA_BIT_MASK(32)) { + num = pci_sun4v_iommu_map(devhandle, + HV_PCI_TSBID(0, entry), + npages, + prot, + __pa(pglist)); + if (unlikely(num < 0)) { + pr_err_ratelimited("%s: IOMMU map of [%08lx:%08llx:%lx:%lx:%lx] failed with status %ld\n", + __func__, + devhandle, + HV_PCI_TSBID(0, entry), + npages, prot, __pa(pglist), + num); + return -1; + } + } else { + index_count = HV_PCI_IOTSB_INDEX_COUNT(npages, entry), + iotsb_num = pbm->iommu->atu->iotsb->iotsb_num; + ret = pci_sun4v_iotsb_map(devhandle, + iotsb_num, + index_count, + prot, + __pa(pglist), + &num); + if (unlikely(ret != HV_EOK)) { + pr_err_ratelimited("%s: ATU map of [%08lx:%lx:%llx:%lx:%lx] failed with status %ld\n", + __func__, + devhandle, iotsb_num, + index_count, prot, + __pa(pglist), ret); + return -1; + } } - entry += num; npages -= num; pglist += num; @@ -111,19 +134,19 @@ static long iommu_batch_flush(struct iommu_batch *p) return 0; } -static inline void iommu_batch_new_entry(unsigned long entry) +static inline void iommu_batch_new_entry(unsigned long entry, u64 mask) { struct iommu_batch *p = this_cpu_ptr(&iommu_batch); if (p->entry + p->npages == entry) return; if (p->entry != ~0UL) - iommu_batch_flush(p); + iommu_batch_flush(p, mask); p->entry = entry; } /* Interrupts must be disabled. */ -static inline long iommu_batch_add(u64 phys_page) +static inline long iommu_batch_add(u64 phys_page, u64 mask) { struct iommu_batch *p = this_cpu_ptr(&iommu_batch); @@ -131,28 +154,31 @@ static inline long iommu_batch_add(u64 phys_page) p->pglist[p->npages++] = phys_page; if (p->npages == PGLIST_NENTS) - return iommu_batch_flush(p); + return iommu_batch_flush(p, mask); return 0; } /* Interrupts must be disabled. */ -static inline long iommu_batch_end(void) +static inline long iommu_batch_end(u64 mask) { struct iommu_batch *p = this_cpu_ptr(&iommu_batch); BUG_ON(p->npages >= PGLIST_NENTS); - return iommu_batch_flush(p); + return iommu_batch_flush(p, mask); } static void *dma_4v_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addrp, gfp_t gfp, unsigned long attrs) { + u64 mask; unsigned long flags, order, first_page, npages, n; unsigned long prot = 0; struct iommu *iommu; + struct atu *atu; + struct iommu_map_table *tbl; struct page *page; void *ret; long entry; @@ -177,14 +203,21 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, memset((char *)first_page, 0, PAGE_SIZE << order); iommu = dev->archdata.iommu; + atu = iommu->atu; - entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, + mask = dev->coherent_dma_mask; + if (mask <= DMA_BIT_MASK(32)) + tbl = &iommu->tbl; + else + tbl = &atu->tbl; + + entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL, (unsigned long)(-1), 0); if (unlikely(entry == IOMMU_ERROR_CODE)) goto range_alloc_fail; - *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT)); + *dma_addrp = (tbl->table_map_base + (entry << IO_PAGE_SHIFT)); ret = (void *) first_page; first_page = __pa(first_page); @@ -196,12 +229,12 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, entry); for (n = 0; n < npages; n++) { - long err = iommu_batch_add(first_page + (n * PAGE_SIZE)); + long err = iommu_batch_add(first_page + (n * PAGE_SIZE), mask); if (unlikely(err < 0L)) goto iommu_map_fail; } - if (unlikely(iommu_batch_end() < 0L)) + if (unlikely(iommu_batch_end(mask) < 0L)) goto iommu_map_fail; local_irq_restore(flags); @@ -209,7 +242,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, return ret; iommu_map_fail: - iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE); + iommu_tbl_range_free(tbl, *dma_addrp, npages, IOMMU_ERROR_CODE); range_alloc_fail: free_pages(first_page, order); @@ -253,18 +286,27 @@ unsigned long dma_4v_iotsb_bind(unsigned long devhandle, return 0; } -static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry, - unsigned long npages) +static void dma_4v_iommu_demap(struct device *dev, unsigned long devhandle, + dma_addr_t dvma, unsigned long iotsb_num, + unsigned long entry, unsigned long npages) { - u32 devhandle = *(u32 *)demap_arg; unsigned long num, flags; + unsigned long ret; local_irq_save(flags); do { - num = pci_sun4v_iommu_demap(devhandle, - HV_PCI_TSBID(0, entry), - npages); - + if (dvma <= DMA_BIT_MASK(32)) { + num = pci_sun4v_iommu_demap(devhandle, + HV_PCI_TSBID(0, entry), + npages); + } else { + ret = pci_sun4v_iotsb_demap(devhandle, iotsb_num, + entry, npages, &num); + if (unlikely(ret != HV_EOK)) { + pr_err_ratelimited("pci_iotsb_demap() failed with error: %ld\n", + ret); + } + } entry += num; npages -= num; } while (npages != 0); @@ -276,16 +318,28 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, { struct pci_pbm_info *pbm; struct iommu *iommu; + struct atu *atu; + struct iommu_map_table *tbl; unsigned long order, npages, entry; + unsigned long iotsb_num; u32 devhandle; npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; iommu = dev->archdata.iommu; pbm = dev->archdata.host_controller; + atu = iommu->atu; devhandle = pbm->devhandle; - entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT); - dma_4v_iommu_demap(&devhandle, entry, npages); - iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE); + + if (dvma <= DMA_BIT_MASK(32)) { + tbl = &iommu->tbl; + iotsb_num = 0; /* we don't care for legacy iommu */ + } else { + tbl = &atu->tbl; + iotsb_num = atu->iotsb->iotsb_num; + } + entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT); + dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages); + iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE); order = get_order(size); if (order < 10) free_pages((unsigned long)cpu, order); @@ -297,13 +351,17 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, unsigned long attrs) { struct iommu *iommu; + struct atu *atu; + struct iommu_map_table *tbl; + u64 mask; unsigned long flags, npages, oaddr; unsigned long i, base_paddr; - u32 bus_addr, ret; unsigned long prot; + dma_addr_t bus_addr, ret; long entry; iommu = dev->archdata.iommu; + atu = iommu->atu; if (unlikely(direction == DMA_NONE)) goto bad; @@ -312,13 +370,19 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; - entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, + mask = *dev->dma_mask; + if (mask <= DMA_BIT_MASK(32)) + tbl = &iommu->tbl; + else + tbl = &atu->tbl; + + entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL, (unsigned long)(-1), 0); if (unlikely(entry == IOMMU_ERROR_CODE)) goto bad; - bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT)); + bus_addr = (tbl->table_map_base + (entry << IO_PAGE_SHIFT)); ret = bus_addr | (oaddr & ~IO_PAGE_MASK); base_paddr = __pa(oaddr & IO_PAGE_MASK); prot = HV_PCI_MAP_ATTR_READ; @@ -333,11 +397,11 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, iommu_batch_start(dev, prot, entry); for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) { - long err = iommu_batch_add(base_paddr); + long err = iommu_batch_add(base_paddr, mask); if (unlikely(err < 0L)) goto iommu_map_fail; } - if (unlikely(iommu_batch_end() < 0L)) + if (unlikely(iommu_batch_end(mask) < 0L)) goto iommu_map_fail; local_irq_restore(flags); @@ -350,7 +414,7 @@ bad: return DMA_ERROR_CODE; iommu_map_fail: - iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE); + iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE); return DMA_ERROR_CODE; } @@ -360,7 +424,10 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, { struct pci_pbm_info *pbm; struct iommu *iommu; + struct atu *atu; + struct iommu_map_table *tbl; unsigned long npages; + unsigned long iotsb_num; long entry; u32 devhandle; @@ -372,14 +439,23 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, iommu = dev->archdata.iommu; pbm = dev->archdata.host_controller; + atu = iommu->atu; devhandle = pbm->devhandle; npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; bus_addr &= IO_PAGE_MASK; - entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT; - dma_4v_iommu_demap(&devhandle, entry, npages); - iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE); + + if (bus_addr <= DMA_BIT_MASK(32)) { + iotsb_num = 0; /* we don't care for legacy iommu */ + tbl = &iommu->tbl; + } else { + iotsb_num = atu->iotsb->iotsb_num; + tbl = &atu->tbl; + } + entry = (bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT; + dma_4v_iommu_demap(dev, devhandle, bus_addr, iotsb_num, entry, npages); + iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE); } static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, @@ -393,12 +469,17 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, unsigned long seg_boundary_size; int outcount, incount, i; struct iommu *iommu; + struct atu *atu; + struct iommu_map_table *tbl; + u64 mask; unsigned long base_shift; long err; BUG_ON(direction == DMA_NONE); iommu = dev->archdata.iommu; + atu = iommu->atu; + if (nelems == 0 || !iommu) return 0; @@ -424,7 +505,15 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, max_seg_size = dma_get_max_seg_size(dev); seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, IO_PAGE_SIZE) >> IO_PAGE_SHIFT; - base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT; + + mask = *dev->dma_mask; + if (mask <= DMA_BIT_MASK(32)) + tbl = &iommu->tbl; + else + tbl = &atu->tbl; + + base_shift = tbl->table_map_base >> IO_PAGE_SHIFT; + for_each_sg(sglist, s, nelems, i) { unsigned long paddr, npages, entry, out_entry = 0, slen; @@ -437,27 +526,26 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, /* Allocate iommu entries for that segment */ paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); - entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, + entry = iommu_tbl_range_alloc(dev, tbl, npages, &handle, (unsigned long)(-1), 0); /* Handle failure */ if (unlikely(entry == IOMMU_ERROR_CODE)) { - if (printk_ratelimit()) - printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" - " npages %lx\n", iommu, paddr, npages); + pr_err_ratelimited("iommu_alloc failed, iommu %p paddr %lx npages %lx\n", + tbl, paddr, npages); goto iommu_map_failed; } - iommu_batch_new_entry(entry); + iommu_batch_new_entry(entry, mask); /* Convert entry to a dma_addr_t */ - dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT); + dma_addr = tbl->table_map_base + (entry << IO_PAGE_SHIFT); dma_addr |= (s->offset & ~IO_PAGE_MASK); /* Insert into HW table */ paddr &= IO_PAGE_MASK; while (npages--) { - err = iommu_batch_add(paddr); + err = iommu_batch_add(paddr, mask); if (unlikely(err < 0L)) goto iommu_map_failed; paddr += IO_PAGE_SIZE; @@ -492,7 +580,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, dma_next = dma_addr + slen; } - err = iommu_batch_end(); + err = iommu_batch_end(mask); if (unlikely(err < 0L)) goto iommu_map_failed; @@ -515,7 +603,7 @@ iommu_map_failed: vaddr = s->dma_address & IO_PAGE_MASK; npages = iommu_num_pages(s->dma_address, s->dma_length, IO_PAGE_SIZE); - iommu_tbl_range_free(&iommu->tbl, vaddr, npages, + iommu_tbl_range_free(tbl, vaddr, npages, IOMMU_ERROR_CODE); /* XXX demap? XXX */ s->dma_address = DMA_ERROR_CODE; @@ -536,13 +624,16 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, struct pci_pbm_info *pbm; struct scatterlist *sg; struct iommu *iommu; + struct atu *atu; unsigned long flags, entry; + unsigned long iotsb_num; u32 devhandle; BUG_ON(direction == DMA_NONE); iommu = dev->archdata.iommu; pbm = dev->archdata.host_controller; + atu = iommu->atu; devhandle = pbm->devhandle; local_irq_save(flags); @@ -552,15 +643,24 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, dma_addr_t dma_handle = sg->dma_address; unsigned int len = sg->dma_length; unsigned long npages; - struct iommu_map_table *tbl = &iommu->tbl; + struct iommu_map_table *tbl; unsigned long shift = IO_PAGE_SHIFT; if (!len) break; npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); + + if (dma_handle <= DMA_BIT_MASK(32)) { + iotsb_num = 0; /* we don't care for legacy iommu */ + tbl = &iommu->tbl; + } else { + iotsb_num = atu->iotsb->iotsb_num; + tbl = &atu->tbl; + } entry = ((dma_handle - tbl->table_map_base) >> shift); - dma_4v_iommu_demap(&devhandle, entry, npages); - iommu_tbl_range_free(&iommu->tbl, dma_handle, npages, + dma_4v_iommu_demap(dev, devhandle, dma_handle, iotsb_num, + entry, npages); + iommu_tbl_range_free(tbl, dma_handle, npages, IOMMU_ERROR_CODE); sg = sg_next(sg); } diff --git a/arch/sparc/kernel/pci_sun4v.h b/arch/sparc/kernel/pci_sun4v.h index 1019e0fe6e9d..22603a4e48bf 100644 --- a/arch/sparc/kernel/pci_sun4v.h +++ b/arch/sparc/kernel/pci_sun4v.h @@ -99,4 +99,15 @@ unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle, unsigned long pci_sun4v_iotsb_bind(unsigned long devhandle, unsigned long iotsb_num, unsigned int pci_device); +unsigned long pci_sun4v_iotsb_map(unsigned long devhandle, + unsigned long iotsb_num, + unsigned long iotsb_index_iottes, + unsigned long io_attributes, + unsigned long io_page_list_pa, + long *mapped); +unsigned long pci_sun4v_iotsb_demap(unsigned long devhandle, + unsigned long iotsb_num, + unsigned long iotsb_index, + unsigned long iottes, + unsigned long *demapped); #endif /* !(_PCI_SUN4V_H) */ diff --git a/arch/sparc/kernel/pci_sun4v_asm.S b/arch/sparc/kernel/pci_sun4v_asm.S index 22024a96c317..578f09657916 100644 --- a/arch/sparc/kernel/pci_sun4v_asm.S +++ b/arch/sparc/kernel/pci_sun4v_asm.S @@ -392,3 +392,39 @@ ENTRY(pci_sun4v_iotsb_bind) retl nop ENDPROC(pci_sun4v_iotsb_bind) + + /* + * %o0: devhandle + * %o1: iotsb_num/iotsb_handle + * %o2: index_count + * %o3: iotte_attributes + * %o4: io_page_list_p + * %o5: &mapped + * + * returns %o0: status + * %o1: #mapped + */ +ENTRY(pci_sun4v_iotsb_map) + mov %o5, %g1 + mov HV_FAST_PCI_IOTSB_MAP, %o5 + ta HV_FAST_TRAP + retl + stx %o1, [%g1] +ENDPROC(pci_sun4v_iotsb_map) + + /* + * %o0: devhandle + * %o1: iotsb_num/iotsb_handle + * %o2: iotsb_index + * %o3: #iottes + * %o4: &demapped + * + * returns %o0: status + * %o1: #demapped + */ +ENTRY(pci_sun4v_iotsb_demap) + mov HV_FAST_PCI_IOTSB_DEMAP, %o5 + ta HV_FAST_TRAP + retl + stx %o1, [%o4] +ENDPROC(pci_sun4v_iotsb_demap) From d30a6b84df00128e03588564925dc828a53e6865 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Fri, 28 Oct 2016 10:12:45 -0700 Subject: [PATCH 386/503] sparc64: Enable 64-bit DMA ATU 64bit addressing allows PCIe devices with 64bit DMA capabilities to use ATU for 64bit DMA. Signed-off-by: Tushar Dave Reviewed-by: chris hyser Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 4 ++++ arch/sparc/kernel/iommu.c | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 5202eb4ba2db..60145c9b9f84 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -93,6 +93,10 @@ config ARCH_ATU bool default y if SPARC64 +config ARCH_DMA_ADDR_T_64BIT + bool + default y if ARCH_ATU + config IOMMU_HELPER bool default y if SPARC64 diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 5c615abff030..852a3291db96 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -760,8 +760,12 @@ int dma_supported(struct device *dev, u64 device_mask) struct iommu *iommu = dev->archdata.iommu; u64 dma_addr_mask = iommu->dma_addr_mask; - if (device_mask >= (1UL << 32UL)) - return 0; + if (device_mask > DMA_BIT_MASK(32)) { + if (iommu->atu) + dma_addr_mask = iommu->atu->dma_addr_mask; + else + return 0; + } if ((device_mask & dma_addr_mask) == dma_addr_mask) return 1; From 5cc7861eb5b425c7a30ff7676a4b9d0ca62d5c76 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Nov 2016 11:19:56 -0500 Subject: [PATCH 387/503] NFSv4: Don't call close if the open stateid has already been cleared Ensure we test to see if the open stateid is actually set, before we send a CLOSE. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0b3cdf856333..2d1481eb1929 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3122,7 +3122,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) } else if (is_rdwr) calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; - if (!nfs4_valid_open_stateid(state)) + if (!nfs4_valid_open_stateid(state) || + test_bit(NFS_OPEN_STATE, &state->flags) == 0) call_close = 0; spin_unlock(&state->owner->so_lock); From 266439c94df9e6aee3390c6e1cfdb645e566f704 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Mon, 17 Oct 2016 13:56:59 -0700 Subject: [PATCH 388/503] sunqe: Fix compiler warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sunqe uses '__u32' for dma handle while invoking kernel DMA APIs, instead of using dma_addr_t. This hasn't caused any 'incompatible pointer type' warning on SPARC because until now dma_addr_t is of type u32. However, recent changes in SPARC ATU (iommu) enables 64bit DMA and therefore dma_addr_t becomes of type u64. This makes 'incompatible pointer type' warnings inevitable. e.g. drivers/net/ethernet/sun/sunqe.c: In function ‘qec_ether_init’: drivers/net/ethernet/sun/sunqe.c:883: warning: passing argument 3 of ‘dma_alloc_coherent’ from incompatible pointer type ./include/linux/dma-mapping.h:445: note: expected ‘dma_addr_t *’ but argument is of type ‘__u32 *’ drivers/net/ethernet/sun/sunqe.c:885: warning: passing argument 3 of ‘dma_alloc_coherent’ from incompatible pointer type ./include/linux/dma-mapping.h:445: note: expected ‘dma_addr_t *’ but argument is of type ‘__u32 *’ This patch resolves above compiler warnings. Signed-off-by: Tushar Dave Reviewed-by: chris hyser Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunqe.c | 11 ++++++----- drivers/net/ethernet/sun/sunqe.h | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c index 9b825780b3be..9582948145c1 100644 --- a/drivers/net/ethernet/sun/sunqe.c +++ b/drivers/net/ethernet/sun/sunqe.c @@ -124,7 +124,7 @@ static void qe_init_rings(struct sunqe *qep) { struct qe_init_block *qb = qep->qe_block; struct sunqe_buffers *qbufs = qep->buffers; - __u32 qbufs_dvma = qep->buffers_dvma; + __u32 qbufs_dvma = (__u32)qep->buffers_dvma; int i; qep->rx_new = qep->rx_old = qep->tx_new = qep->tx_old = 0; @@ -144,6 +144,7 @@ static int qe_init(struct sunqe *qep, int from_irq) void __iomem *mregs = qep->mregs; void __iomem *gregs = qecp->gregs; unsigned char *e = &qep->dev->dev_addr[0]; + __u32 qblk_dvma = (__u32)qep->qblock_dvma; u32 tmp; int i; @@ -152,8 +153,8 @@ static int qe_init(struct sunqe *qep, int from_irq) return -EAGAIN; /* Setup initial rx/tx init block pointers. */ - sbus_writel(qep->qblock_dvma + qib_offset(qe_rxd, 0), cregs + CREG_RXDS); - sbus_writel(qep->qblock_dvma + qib_offset(qe_txd, 0), cregs + CREG_TXDS); + sbus_writel(qblk_dvma + qib_offset(qe_rxd, 0), cregs + CREG_RXDS); + sbus_writel(qblk_dvma + qib_offset(qe_txd, 0), cregs + CREG_TXDS); /* Enable/mask the various irq's. */ sbus_writel(0, cregs + CREG_RIMASK); @@ -413,7 +414,7 @@ static void qe_rx(struct sunqe *qep) struct net_device *dev = qep->dev; struct qe_rxd *this; struct sunqe_buffers *qbufs = qep->buffers; - __u32 qbufs_dvma = qep->buffers_dvma; + __u32 qbufs_dvma = (__u32)qep->buffers_dvma; int elem = qep->rx_new; u32 flags; @@ -572,7 +573,7 @@ static int qe_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct sunqe *qep = netdev_priv(dev); struct sunqe_buffers *qbufs = qep->buffers; - __u32 txbuf_dvma, qbufs_dvma = qep->buffers_dvma; + __u32 txbuf_dvma, qbufs_dvma = (__u32)qep->buffers_dvma; unsigned char *txbuf; int len, entry; diff --git a/drivers/net/ethernet/sun/sunqe.h b/drivers/net/ethernet/sun/sunqe.h index 581781b6b2fa..ae190b77431b 100644 --- a/drivers/net/ethernet/sun/sunqe.h +++ b/drivers/net/ethernet/sun/sunqe.h @@ -334,12 +334,12 @@ struct sunqe { void __iomem *qcregs; /* QEC per-channel Registers */ void __iomem *mregs; /* Per-channel MACE Registers */ struct qe_init_block *qe_block; /* RX and TX descriptors */ - __u32 qblock_dvma; /* RX and TX descriptors */ + dma_addr_t qblock_dvma; /* RX and TX descriptors */ spinlock_t lock; /* Protects txfull state */ int rx_new, rx_old; /* RX ring extents */ int tx_new, tx_old; /* TX ring extents */ struct sunqe_buffers *buffers; /* CPU visible address. */ - __u32 buffers_dvma; /* DVMA visible address. */ + dma_addr_t buffers_dvma; /* DVMA visible address. */ struct sunqec *parent; u8 mconfig; /* Base MACE mconfig value */ struct platform_device *op; /* QE's OF device struct */ From 1a9bbccaf8182da368dae454b57dc1c55074d266 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Mon, 17 Oct 2016 13:57:00 -0700 Subject: [PATCH 389/503] sunbmac: Fix compiler warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sunbmac uses '__u32' for dma handle while invoking kernel DMA APIs, instead of using dma_addr_t. This hasn't caused any 'incompatible pointer type' warning on SPARC because until now dma_addr_t is of type u32. However, recent changes in SPARC ATU (iommu) enables 64bit DMA and therefore dma_addr_t becomes of type u64. This makes 'incompatible pointer type' warnings inevitable. e.g. drivers/net/ethernet/sun/sunbmac.c: In function ‘bigmac_ether_init’: drivers/net/ethernet/sun/sunbmac.c:1166: warning: passing argument 3 of ‘dma_alloc_coherent’ from incompatible pointer type ./include/linux/dma-mapping.h:445: note: expected ‘dma_addr_t *’ but argument is of type ‘__u32 *’ This patch resolves above compiler warning. Signed-off-by: Tushar Dave Reviewed-by: chris hyser Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunbmac.c | 5 +++-- drivers/net/ethernet/sun/sunbmac.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index aa4f9d2d8fa9..02f452730d52 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -623,6 +623,7 @@ static int bigmac_init_hw(struct bigmac *bp, int from_irq) void __iomem *gregs = bp->gregs; void __iomem *cregs = bp->creg; void __iomem *bregs = bp->bregs; + __u32 bblk_dvma = (__u32)bp->bblock_dvma; unsigned char *e = &bp->dev->dev_addr[0]; /* Latch current counters into statistics. */ @@ -671,9 +672,9 @@ static int bigmac_init_hw(struct bigmac *bp, int from_irq) bregs + BMAC_XIFCFG); /* Tell the QEC where the ring descriptors are. */ - sbus_writel(bp->bblock_dvma + bib_offset(be_rxd, 0), + sbus_writel(bblk_dvma + bib_offset(be_rxd, 0), cregs + CREG_RXDS); - sbus_writel(bp->bblock_dvma + bib_offset(be_txd, 0), + sbus_writel(bblk_dvma + bib_offset(be_txd, 0), cregs + CREG_TXDS); /* Setup the FIFO pointers into QEC local memory. */ diff --git a/drivers/net/ethernet/sun/sunbmac.h b/drivers/net/ethernet/sun/sunbmac.h index 06dd21707353..532fc56830cf 100644 --- a/drivers/net/ethernet/sun/sunbmac.h +++ b/drivers/net/ethernet/sun/sunbmac.h @@ -291,7 +291,7 @@ struct bigmac { void __iomem *bregs; /* BigMAC Registers */ void __iomem *tregs; /* BigMAC Transceiver */ struct bmac_init_block *bmac_block; /* RX and TX descriptors */ - __u32 bblock_dvma; /* RX and TX descriptors */ + dma_addr_t bblock_dvma; /* RX and TX descriptors */ spinlock_t lock; From d41cbfc9a64d11835a5b5b90caa7d6f3a88eb1df Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Mon, 14 Nov 2016 11:51:37 -0500 Subject: [PATCH 390/503] NFSv4.1: Handle NFS4ERR_OLD_STATEID in nfs4_reclaim_open_state Now that we're doing TEST_STATEID in nfs4_reclaim_open_state(), we can have a NFS4ERR_OLD_STATEID returned from nfs41_open_expired() . Instead of marking state recovery as failed, mark the state for recovery again. Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5f4281ec5f72..0959c9661662 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1547,6 +1547,7 @@ restart: ssleep(1); case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_OLD_STATEID: case -NFS4ERR_BAD_STATEID: case -NFS4ERR_RECLAIM_BAD: case -NFS4ERR_RECLAIM_CONFLICT: From e6b5f1be7afe1657c40c08082c562b1a036a54c1 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 2 Nov 2016 09:36:32 -0700 Subject: [PATCH 391/503] config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc This new config parameter limits the space used for "Lock debugging: prove locking correctness" by about 4MB. The current sparc systems have the limitation of 32MB size for kernel size including .text, .data and .bss sections. With PROVE_LOCKING feature, the kernel size could grow beyond this limit and causing system boot-up issues. With this option, kernel limits the size of the entries of lock_chains, stack_trace etc., so that kernel fits in required size limit. This is not visible to user and only used for sparc. Signed-off-by: Babu Moger Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 1 + lib/Kconfig.debug | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 60145c9b9f84..165ecdd24d22 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -43,6 +43,7 @@ config SPARC select ARCH_HAS_SG_CHAIN select CPU_NO_EFFICIENT_FFS select HAVE_ARCH_HARDENED_USERCOPY + select PROVE_LOCKING_SMALL if PROVE_LOCKING config SPARC32 def_bool !64BIT diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b01e547d4d04..a6c8db1d62f6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1085,6 +1085,9 @@ config PROVE_LOCKING For more details, see Documentation/locking/lockdep-design.txt. +config PROVE_LOCKING_SMALL + bool + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT From e245d99e6cc4a0b904b87b46b4f60d46fb405987 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 2 Nov 2016 09:36:33 -0700 Subject: [PATCH 392/503] lockdep: Limit static allocations if PROVE_LOCKING_SMALL is defined Reduce the size of data structure for lockdep entries by half if PROVE_LOCKING_SMALL if defined. This is used only for sparc. Signed-off-by: Babu Moger Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- kernel/locking/lockdep_internals.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index 51c4b24b6328..c2b88490d857 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -45,6 +45,14 @@ enum { #define LOCKF_USED_IN_IRQ_READ \ (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) +/* + * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text, + * .data and .bss to fit in required 32MB limit for the kernel. With + * PROVE_LOCKING we could go over this limit and cause system boot-up problems. + * So, reduce the static allocations for lockdeps related structures so that + * everything fits in current required size limit. + */ +#ifdef CONFIG_PROVE_LOCKING_SMALL /* * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies * we track. @@ -54,18 +62,24 @@ enum { * table (if it's not there yet), and we check it for lock order * conflicts and deadlocks. */ +#define MAX_LOCKDEP_ENTRIES 16384UL +#define MAX_LOCKDEP_CHAINS_BITS 15 +#define MAX_STACK_TRACE_ENTRIES 262144UL +#else #define MAX_LOCKDEP_ENTRIES 32768UL #define MAX_LOCKDEP_CHAINS_BITS 16 -#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) - -#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) /* * Stack-trace: tightly packed array of stack backtrace * addresses. Protected by the hash_lock. */ #define MAX_STACK_TRACE_ENTRIES 524288UL +#endif + +#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) + +#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[]; From dbfa048db97c15ee3fff2ee17b19e61f3ab12d53 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 15 Nov 2016 11:12:05 +0100 Subject: [PATCH 393/503] MAINTAINERS: Add LED subsystem co-maintainer Mark me as a co-maintainer of LED subsystem. Signed-off-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 411e3b87b8c2..2433e471634f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7084,6 +7084,7 @@ F: drivers/scsi/53c700* LED SUBSYSTEM M: Richard Purdie M: Jacek Anaszewski +M: Pavel Machek L: linux-leds@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git S: Maintained From 178c7ae944444c198a1d9646477ab10d2d51f03e Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 19 Nov 2016 01:40:10 +0300 Subject: [PATCH 394/503] net: macb: add check for dma mapping error in start_xmit() at91ether_start_xmit() does not check for dma mapping errors. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index b32444a3ed79..533653bd7aec 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2673,6 +2673,12 @@ static int at91ether_start_xmit(struct sk_buff *skb, struct net_device *dev) lp->skb_length = skb->len; lp->skb_physaddr = dma_map_single(NULL, skb->data, skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(NULL, lp->skb_physaddr)) { + dev_kfree_skb_any(skb); + dev->stats.tx_dropped++; + netdev_err(dev, "%s: DMA mapping error\n", __func__); + return NETDEV_TX_OK; + } /* Set address of the data in the Transmit Address register */ macb_writel(lp, TAR, lp->skb_physaddr); From 9dd35d6882a10629b95f2bc41a541740ef24c226 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 18 Nov 2016 22:21:17 +0800 Subject: [PATCH 395/503] sparc: drop duplicate header scatterlist.h Drop duplicate header scatterlist.h from iommu_common.h. Signed-off-by: Geliang Tang Signed-off-by: David S. Miller --- arch/sparc/kernel/iommu_common.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h index b40cec252905..828493329f68 100644 --- a/arch/sparc/kernel/iommu_common.h +++ b/arch/sparc/kernel/iommu_common.h @@ -13,7 +13,6 @@ #include #include #include -#include #include From 8b9534406456313beb7bf9051150b50c63049ab7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 16 Nov 2016 18:31:30 +0100 Subject: [PATCH 396/503] KVM: x86: do not go through vcpu in __get_kvmclock_ns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Going through the first VCPU is wrong if you follow a KVM_SET_CLOCK with a KVM_GET_CLOCK immediately after, without letting the VCPU run and call kvm_guest_time_update. To fix this, compute the kvmclock value ourselves, using the master clock (tsc, nsec) pair as the base and the host CPU frequency as the scale. Reported-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3017de0431bd..7d3d9d4d6124 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1724,18 +1724,23 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) static u64 __get_kvmclock_ns(struct kvm *kvm) { - struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0); struct kvm_arch *ka = &kvm->arch; - s64 ns; + struct pvclock_vcpu_time_info hv_clock; - if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) { - u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); - ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc); - } else { - ns = ktime_get_boot_ns() + ka->kvmclock_offset; + spin_lock(&ka->pvclock_gtod_sync_lock); + if (!ka->use_master_clock) { + spin_unlock(&ka->pvclock_gtod_sync_lock); + return ktime_get_boot_ns() + ka->kvmclock_offset; } - return ns; + hv_clock.tsc_timestamp = ka->master_cycle_now; + hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; + spin_unlock(&ka->pvclock_gtod_sync_lock); + + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + return __pvclock_read_cycles(&hv_clock, rdtsc()); } u64 get_kvmclock_ns(struct kvm *kvm) From 910170442944e1f8674fd5ddbeeb8ccd1877ea98 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 12 Sep 2016 10:49:11 +0800 Subject: [PATCH 397/503] iommu/vt-d: Fix PASID table allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Somehow I ended up with an off-by-three error in calculating the size of the PASID and PASID State tables, which triggers allocations failures as those tables unfortunately have to be physically contiguous. In fact, even the *correct* maximum size of 8MiB is problematic and is wont to lead to allocation failures. Since I have extracted a promise that this *will* be fixed in hardware, I'm happy to limit it on the current hardware to a maximum of 0x20000 PASIDs, which gives us 1MiB tables — still not ideal, but better than before. Reported by Mika Kuoppala and also by Xunlei Pang who submitted a simpler patch to fix only the allocation (and not the free) to the "correct" limit... which was still problematic. Signed-off-by: David Woodhouse Cc: stable@vger.kernel.org --- drivers/iommu/intel-svm.c | 26 ++++++++++++++++---------- include/linux/intel-iommu.h | 1 + 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 8ebb3530afa7..cb72e0011310 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -39,10 +39,18 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) struct page *pages; int order; - order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT; - if (order < 0) - order = 0; + /* Start at 2 because it's defined as 2^(1+PSS) */ + iommu->pasid_max = 2 << ecap_pss(iommu->ecap); + /* Eventually I'm promised we will get a multi-level PASID table + * and it won't have to be physically contiguous. Until then, + * limit the size because 8MiB contiguous allocations can be hard + * to come by. The limit of 0x20000, which is 1MiB for each of + * the PASID and PASID-state tables, is somewhat arbitrary. */ + if (iommu->pasid_max > 0x20000) + iommu->pasid_max = 0x20000; + + order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); if (!pages) { pr_warn("IOMMU: %s: Failed to allocate PASID table\n", @@ -53,6 +61,8 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order); if (ecap_dis(iommu->ecap)) { + /* Just making it explicit... */ + BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry)); pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); if (pages) iommu->pasid_state_table = page_address(pages); @@ -68,11 +78,7 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) int intel_svm_free_pasid_tables(struct intel_iommu *iommu) { - int order; - - order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT; - if (order < 0) - order = 0; + int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); if (iommu->pasid_table) { free_pages((unsigned long)iommu->pasid_table, order); @@ -371,8 +377,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } svm->iommu = iommu; - if (pasid_max > 2 << ecap_pss(iommu->ecap)) - pasid_max = 2 << ecap_pss(iommu->ecap); + if (pasid_max > iommu->pasid_max) + pasid_max = iommu->pasid_max; /* Do not use PASID 0 in caching mode (virtualised IOMMU) */ ret = idr_alloc(&iommu->pasid_idr, svm, diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 2d9b650047a5..d49e26c6cdc7 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -429,6 +429,7 @@ struct intel_iommu { struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ struct idr pasid_idr; + u32 pasid_max; #endif struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ From 1650b4ebc99da4c137bfbfc531be4a2405f951dd Mon Sep 17 00:00:00 2001 From: Ignacio Alvarado Date: Fri, 4 Nov 2016 12:15:55 -0700 Subject: [PATCH 398/503] KVM: Disable irq while unregistering user notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function user_notifier_unregister should be called only once for each registered user notifier. Function kvm_arch_hardware_disable can be executed from an IPI context which could cause a race condition with a VCPU returning to user mode and attempting to unregister the notifier. Signed-off-by: Ignacio Alvarado Cc: stable@vger.kernel.org Fixes: 18863bdd60f8 ("KVM: x86 shared msr infrastructure") Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7d3d9d4d6124..2f27af4f312a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -210,7 +210,18 @@ static void kvm_on_user_return(struct user_return_notifier *urn) struct kvm_shared_msrs *locals = container_of(urn, struct kvm_shared_msrs, urn); struct kvm_shared_msr_values *values; + unsigned long flags; + /* + * Disabling irqs at this point since the following code could be + * interrupted and executed through kvm_arch_hardware_disable() + */ + local_irq_save(flags); + if (locals->registered) { + locals->registered = false; + user_return_notifier_unregister(urn); + } + local_irq_restore(flags); for (slot = 0; slot < shared_msrs_global.nr; ++slot) { values = &locals->values[slot]; if (values->host != values->curr) { @@ -218,8 +229,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn) values->curr = values->host; } } - locals->registered = false; - user_return_notifier_unregister(urn); } static void shared_msr_update(unsigned slot, u32 msr) From e3fd9a93a12a1020067a676e826877623cee8e2b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 9 Nov 2016 17:48:15 +0100 Subject: [PATCH 399/503] kvm: kvmclock: let KVM_GET_CLOCK return whether the master clock is in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace can read the exact value of kvmclock by reading the TSC and fetching the timekeeping parameters out of guest memory. This however is brittle and not necessary anymore with KVM 4.11. Provide a mechanism that lets userspace know if the new KVM_GET_CLOCK semantics are in effect, and---since we are at it---if the clock is stable across all VCPUs. Cc: Radim Krčmář Cc: Marcelo Tosatti Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- Documentation/virtual/kvm/api.txt | 11 +++++++++++ arch/x86/kvm/x86.c | 10 +++++++--- include/uapi/linux/kvm.h | 7 +++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 739db9ab16b2..6bbceb9a3a19 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -777,6 +777,17 @@ Gets the current timestamp of kvmclock as seen by the current guest. In conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios such as migration. +When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the +set of bits that KVM can return in struct kvm_clock_data's flag member. + +The only flag defined now is KVM_CLOCK_TSC_STABLE. If set, the returned +value is the exact kvmclock value seen by all VCPUs at the instant +when KVM_GET_CLOCK was called. If clear, the returned value is simply +CLOCK_MONOTONIC plus a constant offset; the offset can be modified +with KVM_SET_CLOCK. KVM will try to make all VCPUs follow this clock, +but the exact value read by each VCPU could differ, because the host +TSC is not stable. + struct kvm_clock_data { __u64 clock; /* kvmclock current value */ __u32 flags; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2f27af4f312a..3320804bb2ac 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2610,7 +2610,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_PIT_STATE2: case KVM_CAP_SET_IDENTITY_MAP_ADDR: case KVM_CAP_XEN_HVM: - case KVM_CAP_ADJUST_CLOCK: case KVM_CAP_VCPU_EVENTS: case KVM_CAP_HYPERV: case KVM_CAP_HYPERV_VAPIC: @@ -2637,6 +2636,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #endif r = 1; break; + case KVM_CAP_ADJUST_CLOCK: + r = KVM_CLOCK_TSC_STABLE; + break; case KVM_CAP_X86_SMM: /* SMBASE is usually relocated above 1M on modern chipsets, * and SMM handlers might indeed rely on 4G segment limits, @@ -4117,9 +4119,11 @@ long kvm_arch_vm_ioctl(struct file *filp, struct kvm_clock_data user_ns; u64 now_ns; - now_ns = get_kvmclock_ns(kvm); + local_irq_disable(); + now_ns = __get_kvmclock_ns(kvm); user_ns.clock = now_ns; - user_ns.flags = 0; + user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; + local_irq_enable(); memset(&user_ns.pad, 0, sizeof(user_ns.pad)); r = -EFAULT; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 300ef255d1e0..4ee67cb99143 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -972,12 +972,19 @@ struct kvm_irqfd { __u8 pad[16]; }; +/* For KVM_CAP_ADJUST_CLOCK */ + +/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ +#define KVM_CLOCK_TSC_STABLE 2 + struct kvm_clock_data { __u64 clock; __u32 flags; __u32 pad[9]; }; +/* For KVM_CAP_SW_TLB */ + #define KVM_MMU_FSL_BOOKE_NOHV 0 #define KVM_MMU_FSL_BOOKE_HV 1 From 22583f0d9c85e60c9860bc8a0ebff59fe08be6d7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Nov 2016 15:55:45 +0100 Subject: [PATCH 400/503] KVM: async_pf: avoid recursive flushing of work items MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was reported by syzkaller: [ INFO: possible recursive locking detected ] 4.9.0-rc4+ #49 Not tainted --------------------------------------------- kworker/2:1/5658 is trying to acquire lock: ([ 1644.769018] (&work->work) [< inline >] list_empty include/linux/compiler.h:243 [] flush_work+0x0/0x660 kernel/workqueue.c:1511 but task is already holding lock: ([ 1644.769018] (&work->work) [] process_one_work+0x94b/0x1900 kernel/workqueue.c:2093 stack backtrace: CPU: 2 PID: 5658 Comm: kworker/2:1 Not tainted 4.9.0-rc4+ #49 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: events async_pf_execute ffff8800676ff630 ffffffff81c2e46b ffffffff8485b930 ffff88006b1fc480 0000000000000000 ffffffff8485b930 ffff8800676ff7e0 ffffffff81339b27 ffff8800676ff7e8 0000000000000046 ffff88006b1fcce8 ffff88006b1fccf0 Call Trace: ... [] flush_work+0x93/0x660 kernel/workqueue.c:2846 [] __cancel_work_timer+0x17a/0x410 kernel/workqueue.c:2916 [] cancel_work_sync+0x17/0x20 kernel/workqueue.c:2951 [] kvm_clear_async_pf_completion_queue+0xd7/0x400 virt/kvm/async_pf.c:126 [< inline >] kvm_free_vcpus arch/x86/kvm/x86.c:7841 [] kvm_arch_destroy_vm+0x23d/0x620 arch/x86/kvm/x86.c:7946 [< inline >] kvm_destroy_vm virt/kvm/kvm_main.c:731 [] kvm_put_kvm+0x40e/0x790 virt/kvm/kvm_main.c:752 [] async_pf_execute+0x23d/0x4f0 virt/kvm/async_pf.c:111 [] process_one_work+0x9fc/0x1900 kernel/workqueue.c:2096 [] worker_thread+0xef/0x1480 kernel/workqueue.c:2230 [] kthread+0x244/0x2d0 kernel/kthread.c:209 [] ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:433 The reason is that kvm_put_kvm is causing the destruction of the VM, but the page fault is still on the ->queue list. The ->queue list is owned by the VCPU, not by the work items, so we cannot just add list_del to the work item. Instead, use work->vcpu to note async page faults that have been resolved and will be processed through the done list. There is no need to flush those. Cc: Dmitry Vyukov Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- virt/kvm/async_pf.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 8035cc1eb955..efeceb0a222d 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -91,6 +91,7 @@ static void async_pf_execute(struct work_struct *work) spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); + apf->vcpu = NULL; spin_unlock(&vcpu->async_pf.lock); /* @@ -113,6 +114,8 @@ static void async_pf_execute(struct work_struct *work) void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) { + spin_lock(&vcpu->async_pf.lock); + /* cancel outstanding work queue item */ while (!list_empty(&vcpu->async_pf.queue)) { struct kvm_async_pf *work = @@ -120,6 +123,14 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) typeof(*work), queue); list_del(&work->queue); + /* + * We know it's present in vcpu->async_pf.done, do + * nothing here. + */ + if (!work->vcpu) + continue; + + spin_unlock(&vcpu->async_pf.lock); #ifdef CONFIG_KVM_ASYNC_PF_SYNC flush_work(&work->work); #else @@ -129,9 +140,9 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) kmem_cache_free(async_pf_cache, work); } #endif + spin_lock(&vcpu->async_pf.lock); } - spin_lock(&vcpu->async_pf.lock); while (!list_empty(&vcpu->async_pf.done)) { struct kvm_async_pf *work = list_first_entry(&vcpu->async_pf.done, From 7301d6abaea926d685832f7e1f0c37dd206b01f4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Nov 2016 15:55:46 +0100 Subject: [PATCH 401/503] KVM: x86: fix missed SRCU usage in kvm_lapic_set_vapic_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: [ INFO: suspicious RCU usage. ] 4.9.0-rc4+ #47 Not tainted ------------------------------- ./include/linux/kvm_host.h:536 suspicious rcu_dereference_check() usage! stack backtrace: CPU: 1 PID: 6679 Comm: syz-executor Not tainted 4.9.0-rc4+ #47 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff880039e2f6d0 ffffffff81c2e46b ffff88003e3a5b40 0000000000000000 0000000000000001 ffffffff83215600 ffff880039e2f700 ffffffff81334ea9 ffffc9000730b000 0000000000000004 ffff88003c4f8420 ffff88003d3f8000 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0xb3/0x118 lib/dump_stack.c:51 [] lockdep_rcu_suspicious+0x139/0x180 kernel/locking/lockdep.c:4445 [< inline >] __kvm_memslots include/linux/kvm_host.h:534 [< inline >] kvm_memslots include/linux/kvm_host.h:541 [] kvm_gfn_to_hva_cache_init+0xa1e/0xce0 virt/kvm/kvm_main.c:1941 [] kvm_lapic_set_vapic_addr+0xed/0x140 arch/x86/kvm/lapic.c:2217 Reported-by: Dmitry Vyukov Fixes: fda4e2e85589191b123d31cdc21fd33ee70f50fd Cc: Andrew Honig Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3320804bb2ac..04c5d96b1d67 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3431,6 +3431,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, }; case KVM_SET_VAPIC_ADDR: { struct kvm_vapic_addr va; + int idx; r = -EINVAL; if (!lapic_in_kernel(vcpu)) @@ -3438,7 +3439,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&va, argp, sizeof va)) goto out; + idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } case KVM_X86_SETUP_MCE: { From a2b07739ff5ded8ca7e9c7ff0749ed6f0d36aee2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Nov 2016 15:55:47 +0100 Subject: [PATCH 402/503] kvm: x86: merge kvm_arch_set_irq and kvm_arch_set_irq_inatomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_arch_set_irq is unused since commit b97e6de9c96. Merge its functionality with kvm_arch_set_irq_inatomic. Reported-by: Jiang Biao Signed-off-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/irq_comm.c | 58 +++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 25810b144b58..4da03030d5a7 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -156,6 +156,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, } +static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + if (!level) + return -1; + + return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); +} + int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) @@ -163,18 +173,26 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq irq; int r; - if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) - return -EWOULDBLOCK; + switch (e->type) { + case KVM_IRQ_ROUTING_HV_SINT: + return kvm_hv_set_sint(e, kvm, irq_source_id, level, + line_status); - if (kvm_msi_route_invalid(kvm, e)) - return -EINVAL; + case KVM_IRQ_ROUTING_MSI: + if (kvm_msi_route_invalid(kvm, e)) + return -EINVAL; - kvm_set_msi_irq(kvm, e, &irq); + kvm_set_msi_irq(kvm, e, &irq); - if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) - return r; - else - return -EWOULDBLOCK; + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) + return r; + break; + + default: + break; + } + + return -EWOULDBLOCK; } int kvm_request_irq_source_id(struct kvm *kvm) @@ -254,16 +272,6 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } -static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level, - bool line_status) -{ - if (!level) - return -1; - - return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); -} - int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) @@ -423,18 +431,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, srcu_read_unlock(&kvm->irq_srcu, idx); } -int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, - int irq_source_id, int level, bool line_status) -{ - switch (irq->type) { - case KVM_IRQ_ROUTING_HV_SINT: - return kvm_hv_set_sint(irq, kvm, irq_source_id, level, - line_status); - default: - return -EWOULDBLOCK; - } -} - void kvm_arch_irq_routing_update(struct kvm *kvm) { kvm_hv_irq_routing_update(kvm); From ad092de60f865c1ad94221bd06d381ecea446cc8 Mon Sep 17 00:00:00 2001 From: Alex Hemme Date: Sat, 19 Nov 2016 10:48:38 +0100 Subject: [PATCH 403/503] i2c: i2c-mux-pca954x: fix deselect enabling for device-tree Deselect functionality can be ignored for device-trees with "i2c-mux-idle-disconnect" entries if no platform_data is available. By enabling the deselect functionality outside the platform_data block the logic works as it did in previous kernels. Fixes: 7fcac9807175 ("i2c: i2c-mux-pca954x: convert to use an explicit i2c mux core") Cc: # v4.7+ Signed-off-by: Alex Hemme Signed-off-by: Ziyang Wu [touched up a few minor issues /peda] Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-pca954x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 1091346f2480..8bc3d36d2837 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -268,9 +268,9 @@ static int pca954x_probe(struct i2c_client *client, /* discard unconfigured channels */ break; idle_disconnect_pd = pdata->modes[num].deselect_on_exit; - data->deselect |= (idle_disconnect_pd - || idle_disconnect_dt) << num; } + data->deselect |= (idle_disconnect_pd || + idle_disconnect_dt) << num; ret = i2c_mux_add_adapter(muxc, force, num, class); From 3c7018ebf8dbf14e7cd4f5dc648c51fc979f45bb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 13 Nov 2016 20:35:52 -0500 Subject: [PATCH 404/503] fscrypto: don't use on-stack buffer for filename encryption With the new (in 4.9) option to use a virtually-mapped stack (CONFIG_VMAP_STACK), stack buffers cannot be used as input/output for the scatterlist crypto API because they may not be directly mappable to struct page. For short filenames, fname_encrypt() was encrypting a stack buffer holding the padded filename. Fix it by encrypting the filename in-place in the output buffer, thereby making the temporary buffer unnecessary. This bug could most easily be observed in a CONFIG_DEBUG_SG kernel because this allowed the BUG in sg_set_buf() to be triggered. Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/fname.c | 53 +++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 9a28133ac3b8..9b774f4b50c8 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -39,65 +39,54 @@ static void fname_crypt_complete(struct crypto_async_request *req, int res) static int fname_encrypt(struct inode *inode, const struct qstr *iname, struct fscrypt_str *oname) { - u32 ciphertext_len; struct skcipher_request *req = NULL; DECLARE_FS_COMPLETION_RESULT(ecr); struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; int res = 0; char iv[FS_CRYPTO_BLOCK_SIZE]; - struct scatterlist src_sg, dst_sg; + struct scatterlist sg; int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); - char *workbuf, buf[32], *alloc_buf = NULL; - unsigned lim; + unsigned int lim; + unsigned int cryptlen; lim = inode->i_sb->s_cop->max_namelen(inode); if (iname->len <= 0 || iname->len > lim) return -EIO; - ciphertext_len = max(iname->len, (u32)FS_CRYPTO_BLOCK_SIZE); - ciphertext_len = round_up(ciphertext_len, padding); - ciphertext_len = min(ciphertext_len, lim); + /* + * Copy the filename to the output buffer for encrypting in-place and + * pad it with the needed number of NUL bytes. + */ + cryptlen = max_t(unsigned int, iname->len, FS_CRYPTO_BLOCK_SIZE); + cryptlen = round_up(cryptlen, padding); + cryptlen = min(cryptlen, lim); + memcpy(oname->name, iname->name, iname->len); + memset(oname->name + iname->len, 0, cryptlen - iname->len); - if (ciphertext_len <= sizeof(buf)) { - workbuf = buf; - } else { - alloc_buf = kmalloc(ciphertext_len, GFP_NOFS); - if (!alloc_buf) - return -ENOMEM; - workbuf = alloc_buf; - } + /* Initialize the IV */ + memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); - /* Allocate request */ + /* Set up the encryption request */ req = skcipher_request_alloc(tfm, GFP_NOFS); if (!req) { printk_ratelimited(KERN_ERR - "%s: crypto_request_alloc() failed\n", __func__); - kfree(alloc_buf); + "%s: skcipher_request_alloc() failed\n", __func__); return -ENOMEM; } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, fname_crypt_complete, &ecr); + sg_init_one(&sg, oname->name, cryptlen); + skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv); - /* Copy the input */ - memcpy(workbuf, iname->name, iname->len); - if (iname->len < ciphertext_len) - memset(workbuf + iname->len, 0, ciphertext_len - iname->len); - - /* Initialize IV */ - memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); - - /* Create encryption request */ - sg_init_one(&src_sg, workbuf, ciphertext_len); - sg_init_one(&dst_sg, oname->name, ciphertext_len); - skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv); + /* Do the encryption */ res = crypto_skcipher_encrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { + /* Request is being completed asynchronously; wait for it */ wait_for_completion(&ecr.completion); res = ecr.res; } - kfree(alloc_buf); skcipher_request_free(req); if (res < 0) { printk_ratelimited(KERN_ERR @@ -105,7 +94,7 @@ static int fname_encrypt(struct inode *inode, return res; } - oname->len = ciphertext_len; + oname->len = cryptlen; return 0; } From 0f0909e242f73c1154272cf04f07fc9afe13e5b8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 13 Nov 2016 20:41:09 -0500 Subject: [PATCH 405/503] fscrypto: don't use on-stack buffer for key derivation With the new (in 4.9) option to use a virtually-mapped stack (CONFIG_VMAP_STACK), stack buffers cannot be used as input/output for the scatterlist crypto API because they may not be directly mappable to struct page. get_crypt_info() was using a stack buffer to hold the output from the encryption operation used to derive the per-file key. Fix it by using a heap buffer. This bug could most easily be observed in a CONFIG_DEBUG_SG kernel because this allowed the BUG in sg_set_buf() to be triggered. Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/keyinfo.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 82f0285f5d08..67fb6d8876d0 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -185,7 +185,7 @@ int get_crypt_info(struct inode *inode) struct crypto_skcipher *ctfm; const char *cipher_str; int keysize; - u8 raw_key[FS_MAX_KEY_SIZE]; + u8 *raw_key = NULL; int res; res = fscrypt_initialize(); @@ -238,6 +238,15 @@ retry: if (res) goto out; + /* + * This cannot be a stack buffer because it is passed to the scatterlist + * crypto API as part of key derivation. + */ + res = -ENOMEM; + raw_key = kmalloc(FS_MAX_KEY_SIZE, GFP_NOFS); + if (!raw_key) + goto out; + if (fscrypt_dummy_context_enabled(inode)) { memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE); goto got_key; @@ -276,7 +285,8 @@ got_key: if (res) goto out; - memzero_explicit(raw_key, sizeof(raw_key)); + kzfree(raw_key); + raw_key = NULL; if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) { put_crypt_info(crypt_info); goto retry; @@ -287,7 +297,7 @@ out: if (res == -ENOKEY) res = 0; put_crypt_info(crypt_info); - memzero_explicit(raw_key, sizeof(raw_key)); + kzfree(raw_key); return res; } From 8cdf3372fe8368f56315e66bea9f35053c418093 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 18 Nov 2016 13:00:24 -0500 Subject: [PATCH 406/503] ext4: sanity check the block and cluster size at mount time If the block size or cluster size is insane, reject the mount. This is important for security reasons (although we shouldn't be just depending on this check). Ref: http://www.securityfocus.com/archive/1/539661 Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1332506 Reported-by: Borislav Petkov Reported-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/ext4.h | 1 + fs/ext4/super.c | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 282a51b07c57..a8a750f59621 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -235,6 +235,7 @@ struct ext4_io_submit { #define EXT4_MAX_BLOCK_SIZE 65536 #define EXT4_MIN_BLOCK_LOG_SIZE 10 #define EXT4_MAX_BLOCK_LOG_SIZE 16 +#define EXT4_MAX_CLUSTER_LOG_SIZE 30 #ifdef __KERNEL__ # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) #else diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 20da99da0a34..52b0530c5d65 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3565,7 +3565,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (blocksize < EXT4_MIN_BLOCK_SIZE || blocksize > EXT4_MAX_BLOCK_SIZE) { ext4_msg(sb, KERN_ERR, - "Unsupported filesystem blocksize %d", blocksize); + "Unsupported filesystem blocksize %d (%d log_block_size)", + blocksize, le32_to_cpu(es->s_log_block_size)); + goto failed_mount; + } + if (le32_to_cpu(es->s_log_block_size) > + (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log block size: %u", + le32_to_cpu(es->s_log_block_size)); goto failed_mount; } @@ -3697,6 +3705,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "block size (%d)", clustersize, blocksize); goto failed_mount; } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); + goto failed_mount; + } sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - le32_to_cpu(es->s_log_block_size); sbi->s_clusters_per_group = From 32c231164b762dddefa13af5a0101032c70b50ef Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 18 Nov 2016 22:13:00 +0100 Subject: [PATCH 407/503] l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{,6}_bind() Lock socket before checking the SOCK_ZAPPED flag in l2tp_ip6_bind(). Without lock, a concurrent call could modify the socket flags between the sock_flag(sk, SOCK_ZAPPED) test and the lock_sock() call. This way, a socket could be inserted twice in l2tp_ip6_bind_table. Releasing it would then leave a stale pointer there, generating use-after-free errors when walking through the list or modifying adjacent entries. BUG: KASAN: use-after-free in l2tp_ip6_close+0x22e/0x290 at addr ffff8800081b0ed8 Write of size 8 by task syz-executor/10987 CPU: 0 PID: 10987 Comm: syz-executor Not tainted 4.8.0+ #39 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014 ffff880031d97838 ffffffff829f835b ffff88001b5a1640 ffff8800081b0ec0 ffff8800081b15a0 ffff8800081b6d20 ffff880031d97860 ffffffff8174d3cc ffff880031d978f0 ffff8800081b0e80 ffff88001b5a1640 ffff880031d978e0 Call Trace: [] dump_stack+0xb3/0x118 lib/dump_stack.c:15 [] kasan_object_err+0x1c/0x70 mm/kasan/report.c:156 [< inline >] print_address_description mm/kasan/report.c:194 [] kasan_report_error+0x1f6/0x4d0 mm/kasan/report.c:283 [< inline >] kasan_report mm/kasan/report.c:303 [] __asan_report_store8_noabort+0x3e/0x40 mm/kasan/report.c:329 [< inline >] __write_once_size ./include/linux/compiler.h:249 [< inline >] __hlist_del ./include/linux/list.h:622 [< inline >] hlist_del_init ./include/linux/list.h:637 [] l2tp_ip6_close+0x22e/0x290 net/l2tp/l2tp_ip6.c:239 [] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415 [] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422 [] sock_release+0x8d/0x1d0 net/socket.c:570 [] sock_close+0x16/0x20 net/socket.c:1017 [] __fput+0x28c/0x780 fs/file_table.c:208 [] ____fput+0x15/0x20 fs/file_table.c:244 [] task_work_run+0xf9/0x170 [] do_exit+0x85e/0x2a00 [] do_group_exit+0x108/0x330 [] get_signal+0x617/0x17a0 kernel/signal.c:2307 [] do_signal+0x7f/0x18f0 [] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156 [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 [] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259 [] entry_SYSCALL_64_fastpath+0xc4/0xc6 Object at ffff8800081b0ec0, in cache L2TP/IPv6 size: 1448 Allocated: PID = 10987 [ 1116.897025] [] save_stack_trace+0x16/0x20 [ 1116.897025] [] save_stack+0x46/0xd0 [ 1116.897025] [] kasan_kmalloc+0xad/0xe0 [ 1116.897025] [] kasan_slab_alloc+0x12/0x20 [ 1116.897025] [< inline >] slab_post_alloc_hook mm/slab.h:417 [ 1116.897025] [< inline >] slab_alloc_node mm/slub.c:2708 [ 1116.897025] [< inline >] slab_alloc mm/slub.c:2716 [ 1116.897025] [] kmem_cache_alloc+0xc8/0x2b0 mm/slub.c:2721 [ 1116.897025] [] sk_prot_alloc+0x69/0x2b0 net/core/sock.c:1326 [ 1116.897025] [] sk_alloc+0x38/0xae0 net/core/sock.c:1388 [ 1116.897025] [] inet6_create+0x2d7/0x1000 net/ipv6/af_inet6.c:182 [ 1116.897025] [] __sock_create+0x37b/0x640 net/socket.c:1153 [ 1116.897025] [< inline >] sock_create net/socket.c:1193 [ 1116.897025] [< inline >] SYSC_socket net/socket.c:1223 [ 1116.897025] [] SyS_socket+0xef/0x1b0 net/socket.c:1203 [ 1116.897025] [] entry_SYSCALL_64_fastpath+0x23/0xc6 Freed: PID = 10987 [ 1116.897025] [] save_stack_trace+0x16/0x20 [ 1116.897025] [] save_stack+0x46/0xd0 [ 1116.897025] [] kasan_slab_free+0x71/0xb0 [ 1116.897025] [< inline >] slab_free_hook mm/slub.c:1352 [ 1116.897025] [< inline >] slab_free_freelist_hook mm/slub.c:1374 [ 1116.897025] [< inline >] slab_free mm/slub.c:2951 [ 1116.897025] [] kmem_cache_free+0xc8/0x330 mm/slub.c:2973 [ 1116.897025] [< inline >] sk_prot_free net/core/sock.c:1369 [ 1116.897025] [] __sk_destruct+0x32b/0x4f0 net/core/sock.c:1444 [ 1116.897025] [] sk_destruct+0x44/0x80 net/core/sock.c:1452 [ 1116.897025] [] __sk_free+0x53/0x220 net/core/sock.c:1460 [ 1116.897025] [] sk_free+0x23/0x30 net/core/sock.c:1471 [ 1116.897025] [] sk_common_release+0x28c/0x3e0 ./include/net/sock.h:1589 [ 1116.897025] [] l2tp_ip6_close+0x1fe/0x290 net/l2tp/l2tp_ip6.c:243 [ 1116.897025] [] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415 [ 1116.897025] [] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422 [ 1116.897025] [] sock_release+0x8d/0x1d0 net/socket.c:570 [ 1116.897025] [] sock_close+0x16/0x20 net/socket.c:1017 [ 1116.897025] [] __fput+0x28c/0x780 fs/file_table.c:208 [ 1116.897025] [] ____fput+0x15/0x20 fs/file_table.c:244 [ 1116.897025] [] task_work_run+0xf9/0x170 [ 1116.897025] [] do_exit+0x85e/0x2a00 [ 1116.897025] [] do_group_exit+0x108/0x330 [ 1116.897025] [] get_signal+0x617/0x17a0 kernel/signal.c:2307 [ 1116.897025] [] do_signal+0x7f/0x18f0 [ 1116.897025] [] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156 [ 1116.897025] [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 [ 1116.897025] [] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259 [ 1116.897025] [] entry_SYSCALL_64_fastpath+0xc4/0xc6 Memory state around the buggy address: ffff8800081b0d80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8800081b0e00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8800081b0e80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ^ ffff8800081b0f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8800081b0f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== The same issue exists with l2tp_ip_bind() and l2tp_ip_bind_table. Fixes: c51ce49735c1 ("l2tp: fix oops in L2TP IP sockets for connect() AF_UNSPEC case") Reported-by: Baozeng Ding Reported-by: Andrey Konovalov Tested-by: Baozeng Ding Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 5 +++-- net/l2tp/l2tp_ip6.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index fce25afb652a..982f6c44ea01 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -251,8 +251,6 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret; int chk_addr_ret; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr_len < sizeof(struct sockaddr_l2tpip)) return -EINVAL; if (addr->l2tp_family != AF_INET) @@ -267,6 +265,9 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) read_unlock_bh(&l2tp_ip_lock); lock_sock(sk); + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out; + if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip)) goto out; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ad3468c32b53..9978d01ba0ba 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -269,8 +269,6 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int addr_type; int err; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr->l2tp_family != AF_INET6) return -EINVAL; if (addr_len < sizeof(*addr)) @@ -296,6 +294,9 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) lock_sock(sk); err = -EINVAL; + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_unlock; + if (sk->sk_state != TCP_CLOSE) goto out_unlock; From 3f0ae05d6fea0ed5b19efdbc9c9f8e02685a3af3 Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Sat, 19 Nov 2016 23:28:32 +0800 Subject: [PATCH 408/503] rtnl: fix the loop index update error in rtnl_dump_ifinfo() If the link is filtered out, loop index should also be updated. If not, loop index will not be correct. Fixes: dc599f76c22b0 ("net: Add support for filtering link dump by master device and kind") Signed-off-by: Zhang Shengju Acked-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2b9d7d08ed4d..a99917b5de33 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1609,7 +1609,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) head = &net->dev_index_head[h]; hlist_for_each_entry(dev, head, index_hlist) { if (link_dump_filtered(dev, master_idx, kind_ops)) - continue; + goto cont; if (idx < s_idx) goto cont; err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, From 51b9a31c42edcd089f5b229633477ab5128faf03 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Sat, 19 Nov 2016 14:47:07 -0500 Subject: [PATCH 409/503] tipc: eliminate obsolete socket locking policy description The comment block in socket.c describing the locking policy is obsolete, and does not reflect current reality. We remove it in this commit. Since the current locking policy is much simpler and follows a mainstream approach, we see no need to add a new description. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 48 +---------------------------------------------- 1 file changed, 1 insertion(+), 47 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f9f5f3c3dab5..db32777ab591 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,7 +1,7 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2007, 2012-2015, Ericsson AB + * Copyright (c) 2001-2007, 2012-2016, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * @@ -129,54 +129,8 @@ static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; static const struct proto_ops msg_ops; static struct proto tipc_proto; - static const struct rhashtable_params tsk_rht_params; -/* - * Revised TIPC socket locking policy: - * - * Most socket operations take the standard socket lock when they start - * and hold it until they finish (or until they need to sleep). Acquiring - * this lock grants the owner exclusive access to the fields of the socket - * data structures, with the exception of the backlog queue. A few socket - * operations can be done without taking the socket lock because they only - * read socket information that never changes during the life of the socket. - * - * Socket operations may acquire the lock for the associated TIPC port if they - * need to perform an operation on the port. If any routine needs to acquire - * both the socket lock and the port lock it must take the socket lock first - * to avoid the risk of deadlock. - * - * The dispatcher handling incoming messages cannot grab the socket lock in - * the standard fashion, since invoked it runs at the BH level and cannot block. - * Instead, it checks to see if the socket lock is currently owned by someone, - * and either handles the message itself or adds it to the socket's backlog - * queue; in the latter case the queued message is processed once the process - * owning the socket lock releases it. - * - * NOTE: Releasing the socket lock while an operation is sleeping overcomes - * the problem of a blocked socket operation preventing any other operations - * from occurring. However, applications must be careful if they have - * multiple threads trying to send (or receive) on the same socket, as these - * operations might interfere with each other. For example, doing a connect - * and a receive at the same time might allow the receive to consume the - * ACK message meant for the connect. While additional work could be done - * to try and overcome this, it doesn't seem to be worthwhile at the present. - * - * NOTE: Releasing the socket lock while an operation is sleeping also ensures - * that another operation that must be performed in a non-blocking manner is - * not delayed for very long because the lock has already been taken. - * - * NOTE: This code assumes that certain fields of a port/socket pair are - * constant over its lifetime; such fields can be examined without taking - * the socket lock and/or port lock, and do not need to be re-read even - * after resuming processing after waiting. These fields include: - * - socket type - * - pointer to socket sk structure (aka tipc_sock structure) - * - pointer to port structure - * - port reference - */ - static u32 tsk_own_node(struct tipc_sock *tsk) { return msg_prevnode(&tsk->phdr); From 9c763584b7c8911106bb77af7e648bef09af9d80 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Nov 2016 13:52:19 -0800 Subject: [PATCH 410/503] Linux 4.9-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9bc877d073d7..0ede48ba5aaf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From 3d40658c977769ce2138f286cf131537bf68bdfe Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 31 Aug 2016 21:10:06 -0700 Subject: [PATCH 411/503] apparmor: fix change_hat not finding hat after policy replacement After a policy replacement, the task cred may be out of date and need to be updated. However change_hat is using the stale profiles from the out of date cred resulting in either: a stale profile being applied or, incorrect failure when searching for a hat profile as it has been migrated to the new parent profile. Fixes: 01e2b670aa898a39259bc85c78e3d74820f4d3b6 (failure to find hat) Fixes: 898127c34ec03291c86f4ff3856d79e9e18952bc (stale policy being applied) Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1000287 Cc: stable@vger.kernel.org Signed-off-by: John Johansen Signed-off-by: James Morris --- security/apparmor/domain.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index fc3036b34e51..a4d90aa1045a 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -621,8 +621,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest) /* released below */ cred = get_current_cred(); cxt = cred_cxt(cred); - profile = aa_cred_profile(cred); - previous_profile = cxt->previous; + profile = aa_get_newest_profile(aa_cred_profile(cred)); + previous_profile = aa_get_newest_profile(cxt->previous); if (unconfined(profile)) { info = "unconfined"; @@ -718,6 +718,8 @@ audit: out: aa_put_profile(hat); kfree(name); + aa_put_profile(profile); + aa_put_profile(previous_profile); put_cred(cred); return error; From fc0e81b2bea0ebceb71889b61d2240856141c9ee Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 19 Nov 2016 18:42:40 -0800 Subject: [PATCH 412/503] x86/traps: Ignore high word of regs->cs in early_fixup_exception() On the 80486 DX, it seems that some exceptions may leave garbage in the high bits of CS. This causes sporadic failures in which early_fixup_exception() refuses to fix up an exception. As far as I can tell, this has been buggy for a long time, but the problem seems to have been exacerbated by commits: 1e02ce4cccdc ("x86: Store a per-cpu shadow copy of CR4") e1bfc11c5a6f ("x86/init: Fix cr4_init_shadow() on CR4-less machines") This appears to have broken for as long as we've had early exception handling. [ Note to stable maintainers: This patch is needed all the way back to 3.4, but it will only apply to 4.6 and up, as it depends on commit: 0e861fbb5bda ("x86/head: Move early exception panic code into early_fixup_exception()") If you want to backport to kernels before 4.6, please don't backport the prerequisites (there was a big chain of them that rewrote a lot of the early exception machinery); instead, ask me and I can send you a one-liner that will apply. ] Reported-by: Matthew Whitehead Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 4c5023a3fa2e ("x86-32: Handle exception table entries during early boot") Link: http://lkml.kernel.org/r/cb32c69920e58a1a58e7b5cad975038a69c0ce7d.1479609510.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/extable.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 79ae939970d3..fcd06f7526de 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -135,7 +135,12 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) if (early_recursion_flag > 2) goto halt_loop; - if (regs->cs != __KERNEL_CS) + /* + * Old CPUs leave the high bits of CS on the stack + * undefined. I'm not sure which CPUs do this, but at least + * the 486 DX works this way. + */ + if ((regs->cs & 0xFFFF) != __KERNEL_CS) goto fail; /* From ed68d7e9b9cfb64f3045ffbcb108df03c09a0f98 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 19 Nov 2016 15:37:30 -0800 Subject: [PATCH 413/503] x86/boot: Fail the boot if !M486 and CPUID is missing Linux will have all kinds of sporadic problems on systems that don't have the CPUID instruction unless CONFIG_M486=y. In particular, sync_core() will explode. I believe that these kernels had a better chance of working before commit 05fb3c199bb0 ("x86/boot: Initialize FPU and X86_FEATURE_ALWAYS even if we don't have CPUID"). That commit inadvertently fixed a serious bug: we used to fail to detect the FPU if CPUID wasn't present. Because we also used to forget to set X86_FEATURE_ALWAYS, we end up with no cpu feature bits set at all. This meant that alternative patching didn't do anything and, if paravirt was disabled, we could plausibly finish the entire boot process without calling sync_core(). Rather than trying to work around these issues, just have the kernel fail loudly if it's running on a CPUID-less 486, doesn't have CPUID, and doesn't have CONFIG_M486 set. Reported-by: Matthew Whitehead Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/70eac6639f23df8be5fe03fa1984aedd5d40077a.1479598603.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/cpu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 26240dde081e..4224ede43b4e 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -87,6 +87,12 @@ int validate_cpu(void) return -1; } + if (CONFIG_X86_MINIMUM_CPU_FAMILY <= 4 && !IS_ENABLED(CONFIG_M486) && + !has_eflag(X86_EFLAGS_ID)) { + printf("This kernel requires a CPU with the CPUID instruction. Build with CONFIG_M486=y to run on this CPU.\n"); + return -1; + } + if (err_flags) { puts("This kernel requires the following features " "not present on the CPU:\n"); From b22cbe404a9cc3c7949e380fa1861e31934c8978 Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Thu, 17 Nov 2016 09:11:35 -0800 Subject: [PATCH 414/503] x86/fpu: Fix invalid FPU ptrace state after execve() Robert O'Callahan reported that after an execve PTRACE_GETREGSET NT_X86_XSTATE continues to return the pre-exec register values until the exec'ed task modifies FPU state. The test code is at: https://bugzilla.redhat.com/attachment.cgi?id=1164286. What is happening is fpu__clear() does not properly clear fpstate. Fix it by doing just that. Reported-by: Robert O'Callahan Signed-off-by: Yu-cheng Yu Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: David Hansen Cc: Fenghua Yu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V. Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1479402695-6553-1-git-send-email-yu-cheng.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 47004010ad5d..ebb4e95fbd74 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -521,14 +521,14 @@ void fpu__clear(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ - if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) { - /* FPU state will be reallocated lazily at the first use. */ - fpu__drop(fpu); - } else { - if (!fpu->fpstate_active) { - fpu__activate_curr(fpu); - user_fpu_begin(); - } + fpu__drop(fpu); + + /* + * Make sure fpstate is cleared and initialized. + */ + if (static_cpu_has(X86_FEATURE_FPU)) { + fpu__activate_curr(fpu); + user_fpu_begin(); copy_init_fpstate_to_fpregs(); } } From 8c5c86fb6abec7d76ec4d51a46714161bceab315 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 18 Nov 2016 18:52:24 +0200 Subject: [PATCH 415/503] x86/platform/intel-mid: Register watchdog device after SCU Watchdog device in Intel Tangier relies on SCU to be present. It uses the SCU IPC channel to send commands and receive responses. If watchdog driver is initialized quite before SCU and a command has been sent the result is always an error like the following: intel_mid_wdt: Error stopping watchdog: 0xffffffed Register watchdog device whne SCU is ready to avoid described issue. Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161118165224.175514-1-andriy.shevchenko@linux.intel.com [ Small cleanups. ] Signed-off-by: Ingo Molnar --- .../intel-mid/device_libs/platform_wdt.c | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c index de734134bc8d..4f96cd009962 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c @@ -14,7 +14,9 @@ #include #include #include + #include +#include #include #define TANGIER_EXT_TIMER0_MSI 15 @@ -50,14 +52,34 @@ static struct intel_mid_wdt_pdata tangier_pdata = { .probe = tangier_probe, }; -static int __init register_mid_wdt(void) +static int wdt_scu_status_change(struct notifier_block *nb, + unsigned long code, void *data) { - if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) { - wdt_dev.dev.platform_data = &tangier_pdata; - return platform_device_register(&wdt_dev); + if (code == SCU_DOWN) { + platform_device_unregister(&wdt_dev); + return 0; } - return -ENODEV; + return platform_device_register(&wdt_dev); } +static struct notifier_block wdt_scu_notifier = { + .notifier_call = wdt_scu_status_change, +}; + +static int __init register_mid_wdt(void) +{ + if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER) + return -ENODEV; + + wdt_dev.dev.platform_data = &tangier_pdata; + + /* + * We need to be sure that the SCU IPC is ready before watchdog device + * can be registered: + */ + intel_scu_notifier_add(&wdt_scu_notifier); + + return 0; +} rootfs_initcall(register_mid_wdt); From a980ce352fcd408d30b044455e5f6e959d6258b6 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 18 Nov 2016 13:07:19 -0800 Subject: [PATCH 416/503] x86/build: Build compressed x86 kernels as PIE when !CONFIG_RELOCATABLE as well Since the bootloader may load the compressed x86 kernel at any address, it should always be built as PIE, not just when CONFIG_RELOCATABLE=y. Otherwise, linker in binutils 2.27 will optimize GOT load into the absolute address when building the compressed x86 kernel as a non-PIE executable. Signed-off-by: H.J. Lu Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org [ Small wording changes. ] Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 536ccfcc01c6..34d9e15857c3 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -40,8 +40,8 @@ GCOV_PROFILE := n UBSAN_SANITIZE :=n LDFLAGS := -m elf_$(UTS_MACHINE) -ifeq ($(CONFIG_RELOCATABLE),y) -# If kernel is relocatable, build compressed kernel as PIE. +# Compressed kernel should be built as PIE since it may be loaded at any +# address by the bootloader. ifeq ($(CONFIG_X86_32),y) LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker) else @@ -51,7 +51,6 @@ else LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \ && echo "-z noreloc-overflow -pie --no-dynamic-linker") endif -endif LDFLAGS_vmlinux := -T hostprogs-y := mkpiggy From e5dce2868818ca8706924f7bdc7939d481eefab0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 18 Nov 2016 19:27:23 +0200 Subject: [PATCH 417/503] x86/platform/intel-mid: Rename platform_wdt to platform_mrfld_wdt Rename the watchdog platform library file to explicitly show that is used only on Intel Merrifield platforms. Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161118172723.179761-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/Makefile | 2 +- .../device_libs/{platform_wdt.c => platform_mrfld_wdt.c} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename arch/x86/platform/intel-mid/device_libs/{platform_wdt.c => platform_mrfld_wdt.c} (97%) diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index 429d08be7848..dd6cfa4ad3ac 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -28,4 +28,4 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o # MISC Devices obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o -obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_wdt.o +obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c similarity index 97% rename from arch/x86/platform/intel-mid/device_libs/platform_wdt.c rename to arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c index 4f96cd009962..3f1f1c77d090 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c @@ -1,5 +1,5 @@ /* - * platform_wdt.c: Watchdog platform library file + * Intel Merrifield watchdog platform device library file * * (C) Copyright 2014 Intel Corporation * Author: David Cohen From 647f80a1f233bb66fc58fb25664d029e0f12f3ae Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Mon, 21 Nov 2016 10:51:48 +0900 Subject: [PATCH 418/503] mmc: dw_mmc: fix the error handling for dma operation When dma->start is failed,then it has to fall back to PIO mode for current transfer. But Host controller was already set to bits relevant to DMA operation. If needs to use the PIO mode, Host controller has to stop the DMA operation. (It's more stable than now.) When it occurred error, it's not running any request. Fixes: 3fc7eaef44db ("mmc: dw_mmc: Add external dma interface support") Reported-by: Marek Szyprowski Signed-off-by: Jaehoon Chung Reviewed-by: Shawn Lin Cc: # v4.3+ Signed-off-by: Jaehoon Chung Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 50a674be6655..df478ae72e23 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -1058,6 +1058,7 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data) spin_unlock_irqrestore(&host->irq_lock, irqflags); if (host->dma_ops->start(host, sg_len)) { + host->dma_ops->stop(host); /* We can't do DMA, try PIO for this one */ dev_dbg(host->dev, "%s: fall back to PIO mode for current transfer\n", From e96271f3ed7e702fa36dd0605c0c5b5f065af816 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 18 Nov 2016 13:38:43 +0200 Subject: [PATCH 419/503] perf/core: Fix address filter parser The token table passed into match_token() must be null-terminated, which it currently is not in the perf's address filter string parser, as caught by Vince's perf_fuzzer and KASAN. It doesn't blow up otherwise because of the alignment padding of the table to the next element in the .rodata, which is luck. Fixing by adding a null-terminator to the token table. Reported-by: Vince Weaver Tested-by: Vince Weaver Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Thomas Gleixner Cc: dvyukov@google.com Cc: stable@vger.kernel.org # v4.7+ Fixes: 375637bc524 ("perf/core: Introduce address range filtering") Link: http://lkml.kernel.org/r/877f81f264.fsf@ashishki-desk.ger.corp.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index ff230bb4a02e..6ee1febdf6ff 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8029,6 +8029,7 @@ restart: * if is not specified, the range is treated as a single address. */ enum { + IF_ACT_NONE = -1, IF_ACT_FILTER, IF_ACT_START, IF_ACT_STOP, @@ -8052,6 +8053,7 @@ static const match_table_t if_tokens = { { IF_SRC_KERNEL, "%u/%u" }, { IF_SRC_FILEADDR, "%u@%s" }, { IF_SRC_KERNELADDR, "%u" }, + { IF_ACT_NONE, NULL }, }; /* From ec638db8cb9ddd5ca08b23f2835b6c9c15eb616d Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Mon, 14 Nov 2016 11:08:45 -0800 Subject: [PATCH 420/503] thermal/powerclamp: add back module device table Commit 3105f234e0aba43e44e277c20f9b32ee8add43d4 replaced module cpu id table with a cpu feature check, which is logically correct. But we need the module device table to allow module auto loading. Cc: stable@vger.kernel.org # 4.8 Fixes:3105f234 thermal/powerclamp: correct cpu support check Signed-off-by: Jacob Pan Signed-off-by: Zhang Rui --- drivers/thermal/intel_powerclamp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index 7a223074df3d..afada655f861 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -669,9 +669,16 @@ static struct thermal_cooling_device_ops powerclamp_cooling_ops = { .set_cur_state = powerclamp_set_cur_state, }; +static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = { + { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_MWAIT }, + {} +}; +MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); + static int __init powerclamp_probe(void) { - if (!boot_cpu_has(X86_FEATURE_MWAIT)) { + + if (!x86_match_cpu(intel_powerclamp_ids)) { pr_err("CPU does not support MWAIT"); return -ENODEV; } From 9713adc2a1a5488f4889c657a0c0ce0c16056d3c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 21 Nov 2016 14:25:49 +0100 Subject: [PATCH 421/503] Revert "ACPI: Execute _PTS before system reboot" Revert commit 2c85025c75df (ACPI: Execute _PTS before system reboot) as it is reported to cause poweroff and reboot to hang on Dell Latitude E7250. Link: https://bugzilla.kernel.org/show_bug.cgi?id=187061 Reported-by: Gianpaolo Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 2b38c1bb0446..7a2e4d45b266 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -47,32 +47,15 @@ static void acpi_sleep_tts_switch(u32 acpi_state) } } -static void acpi_sleep_pts_switch(u32 acpi_state) -{ - acpi_status status; - - status = acpi_execute_simple_method(NULL, "\\_PTS", acpi_state); - if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { - /* - * OS can't evaluate the _PTS object correctly. Some warning - * message will be printed. But it won't break anything. - */ - printk(KERN_NOTICE "Failure in evaluating _PTS object\n"); - } -} - -static int sleep_notify_reboot(struct notifier_block *this, +static int tts_notify_reboot(struct notifier_block *this, unsigned long code, void *x) { acpi_sleep_tts_switch(ACPI_STATE_S5); - - acpi_sleep_pts_switch(ACPI_STATE_S5); - return NOTIFY_DONE; } -static struct notifier_block sleep_notifier = { - .notifier_call = sleep_notify_reboot, +static struct notifier_block tts_notifier = { + .notifier_call = tts_notify_reboot, .next = NULL, .priority = 0, }; @@ -916,9 +899,9 @@ int __init acpi_sleep_init(void) pr_info(PREFIX "(supports%s)\n", supported); /* - * Register the sleep_notifier to reboot notifier list so that the _TTS - * and _PTS object can also be evaluated when the system enters S5. + * Register the tts_notifier to reboot notifier list so that the _TTS + * object can also be evaluated when the system enters S5. */ - register_reboot_notifier(&sleep_notifier); + register_reboot_notifier(&tts_notifier); return 0; } From 6bc5445c0180a0c7cc61a95d131c7eac66459692 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sun, 20 Nov 2016 17:22:38 +0000 Subject: [PATCH 422/503] ethernet: stmmac: make DWMAC_STM32 depend on it's associated SoC There's not much point, except compile test, enabling the stmmac platform drivers unless the STM32 SoC is enabled. It's not useful without it. Signed-off-by: Peter Robinson Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 3818c5e06eba..4b78168a5f3c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -107,7 +107,7 @@ config DWMAC_STI config DWMAC_STM32 tristate "STM32 DWMAC support" default ARCH_STM32 - depends on OF && HAS_IOMEM + depends on OF && HAS_IOMEM && (ARCH_STM32 || COMPILE_TEST) select MFD_SYSCON ---help--- Support for ethernet controller on STM32 SOCs. From d75a6a0e3933acbba44e4ad8d8f3c4d4f76b6e03 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 18 Nov 2016 21:11:39 -0500 Subject: [PATCH 423/503] NFSv4.1: Keep a reference on lock states while checking While walking the list of lock_states, keep a reference on each nfs4_lock_state to be checked, otherwise the lock state could be removed while the check performs TEST_STATEID and possible FREE_STATEID. Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2d1481eb1929..e6dc95e0f97e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2564,15 +2564,23 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) static int nfs41_check_expired_locks(struct nfs4_state *state) { int status, ret = NFS_OK; - struct nfs4_lock_state *lsp; + struct nfs4_lock_state *lsp, *prev = NULL; struct nfs_server *server = NFS_SERVER(state->inode); if (!test_bit(LK_STATE_IN_USE, &state->flags)) goto out; + + spin_lock(&state->state_lock); list_for_each_entry(lsp, &state->lock_states, ls_locks) { if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { struct rpc_cred *cred = lsp->ls_state->owner->so_cred; + atomic_inc(&lsp->ls_count); + spin_unlock(&state->state_lock); + + nfs4_put_lock_state(prev); + prev = lsp; + status = nfs41_test_and_free_expired_stateid(server, &lsp->ls_stateid, cred); @@ -2585,10 +2593,14 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) set_bit(NFS_LOCK_LOST, &lsp->ls_flags); } else if (status != NFS_OK) { ret = status; - break; + nfs4_put_lock_state(prev); + goto out; } + spin_lock(&state->state_lock); } - }; + } + spin_unlock(&state->state_lock); + nfs4_put_lock_state(prev); out: return ret; } From 7c6ae610a1f0a9d3cebf790e0245b4e0f76aa86e Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 21 Nov 2016 08:56:21 +0800 Subject: [PATCH 424/503] net: l2tp: Treat NET_XMIT_CN as success in l2tp_eth_dev_xmit The tc could return NET_XMIT_CN as one congestion notification, but it does not mean the packe is lost. Other modules like ipvlan, macvlan, and others treat NET_XMIT_CN as success too. So l2tp_eth_dev_xmit should add the NET_XMIT_CN check. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 965f7e344cef..3dc97b4f982b 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -97,7 +97,7 @@ static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int len = skb->len; int ret = l2tp_xmit_skb(session, skb, session->hdr_len); - if (likely(ret == NET_XMIT_SUCCESS)) { + if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { atomic_long_add(len, &priv->tx_bytes); atomic_long_inc(&priv->tx_packets); } else { From 7082c5c3f2407c52022507ffaf644dbbab97a883 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Nov 2016 10:08:37 +0100 Subject: [PATCH 425/503] tcp: zero ca_priv area when switching cc algorithms We need to zero out the private data area when application switches connection to different algorithm (TCP_CONGESTION setsockopt). When congestion ops get assigned at connect time everything is already zeroed because sk_alloc uses GFP_ZERO flag. But in the setsockopt case this contains whatever previous cc placed there. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/tcp_cong.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 1294af4e0127..f9038d6b109e 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -200,8 +200,10 @@ static void tcp_reinit_congestion_control(struct sock *sk, icsk->icsk_ca_ops = ca; icsk->icsk_ca_setsockopt = 1; - if (sk->sk_state != TCP_CLOSE) + if (sk->sk_state != TCP_CLOSE) { + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); tcp_init_congestion_control(sk); + } } /* Manage refcounts on socket close. */ From effb46b40f8053fd19698daf9e6b5833cabeba29 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 21 Nov 2016 15:33:07 +0200 Subject: [PATCH 426/503] watchdog: wdat_wdt: Select WATCHDOG_CORE The WDAT watchdog driver uses functionality provided by the watchdog timer core but it did not select it explicitly. This results following linker error when only WDAT_WDT is enabled in Kconfig: drivers/built-in.o: In function `wdat_wdt_probe': drivers/watchdog/wdat_wdt.c:444: undefined reference to `devm_watchdog_register_device' Fix this by explicitly selecting WATCHDOG_CORE when WDAT watchdog driver is enabled. Fixes: 058dfc767008 (ACPI / watchdog: Add support for WDAT hardware watchdog) Reported-by: Vegard Nossum Signed-off-by: Mika Westerberg Reviewed-by: Guenter Roeck Signed-off-by: Rafael J. Wysocki --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 50dbaa805658..616a0b2d7768 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -155,6 +155,7 @@ config TANGOX_WATCHDOG config WDAT_WDT tristate "ACPI Watchdog Action Table (WDAT)" depends on ACPI + select WATCHDOG_CORE select ACPI_WATCHDOG help This driver adds support for systems with ACPI Watchdog Action From 7a43906f5cbfb74712af168988455e350707e310 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Nov 2016 18:08:05 +1100 Subject: [PATCH 427/503] powerpc: Set missing wakeup bit in LPCR on POWER9 There is a new bit, LPCR_PECE_HVEE (Hypervisor Virtualization Exit Enable), which controls wakeup from STOP states on Hypervisor Virtualization Interrupts (which happen to also be all external interrupts in host or bare metal mode). It needs to be set or we will miss wakeups. Fixes: 9baaef0a22c8 ("powerpc/irq: Add support for HV virtualization interrupts") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Benjamin Herrenschmidt [mpe: Rename it to HVEE to match the name in the ISA] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kernel/cpu_setup_power.S | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 9cd4e8cbc78c..9e1499f98def 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -355,6 +355,7 @@ #define LPCR_PECE0 ASM_CONST(0x0000000000004000) /* ext. exceptions can cause exit */ #define LPCR_PECE1 ASM_CONST(0x0000000000002000) /* decrementer can cause exit */ #define LPCR_PECE2 ASM_CONST(0x0000000000001000) /* machine check etc can cause exit */ +#define LPCR_PECE_HVEE ASM_CONST(0x0000400000000000) /* P9 Wakeup on HV interrupts */ #define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */ #define LPCR_MER_SH 11 #define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */ diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 52ff3f025437..37c027ca83b2 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -98,8 +98,8 @@ _GLOBAL(__setup_cpu_power9) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - ori r3, r3, LPCR_HVICE + LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) + or r3, r3, r4 bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 @@ -118,8 +118,8 @@ _GLOBAL(__restore_cpu_power9) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - ori r3, r3, LPCR_HVICE + LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) + or r3, r3, r4 bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 From 8acf7a106326eb94e143552de81f34308149121c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Nov 2016 15:34:00 +0800 Subject: [PATCH 428/503] crypto: algif_hash - Fix result clobbering in recvmsg Recently an init call was added to hash_recvmsg so as to reset the hash state in case a sendmsg call was never made. Unfortunately this ended up clobbering the result if the previous sendmsg was done with a MSG_MORE flag. This patch fixes it by excluding that case when we make the init call. Fixes: a8348bca2944 ("algif_hash - Fix NULL hash crash with shash") Reported-by: Patrick Steinhardt Signed-off-by: Herbert Xu --- crypto/algif_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 05e21b464433..d19b09cdf284 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -214,7 +214,7 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0); - if (!result) { + if (!result && !ctx->more) { err = af_alg_wait_for_completion( crypto_ahash_init(&ctx->req), &ctx->completion); From c8467f7a3620698bf3c22f0e199b550fb611a8ae Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Nov 2016 16:26:19 +0800 Subject: [PATCH 429/503] crypto: scatterwalk - Remove unnecessary aliasing check in map_and_copy The aliasing check in map_and_copy is no longer necessary because the IPsec ESP code no longer provides an IV that points into the actual request data. As this check is now triggering BUG checks due to the vmalloced stack code, I'm removing it. Reported-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/scatterwalk.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index 52ce17a3dd63..c16c94f88733 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -68,10 +68,6 @@ void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg, sg = scatterwalk_ffwd(tmp, sg, start); - if (sg_page(sg) == virt_to_page(buf) && - sg->offset == offset_in_page(buf)) - return; - scatterwalk_start(&walk, sg); scatterwalk_copychunks(buf, &walk, nbytes, out); scatterwalk_done(&walk, out, 0); From 9e5f68842276672a05737c23e407250f776cbf35 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Nov 2016 14:52:22 +1100 Subject: [PATCH 430/503] powerpc: Fix missing CRCs, add more asm-prototypes.h declarations After patch 4efca4ed0 ("kbuild: modversions for EXPORT_SYMBOL() for asm"), asm exports can get modversions CRCs generated if they have C definitions in asm-prototypes.h. This patch adds missing definitions for 32 and 64 bit allmodconfig builds. Fixes: 9445aa1a3062 ("ppc: move exports to definitions") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-prototypes.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index d1492736d852..e0baba1535e6 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -14,6 +14,10 @@ #include #include +#include +#include +#include +#include #include @@ -109,4 +113,12 @@ void early_setup_secondary(void); /* time */ void accumulate_stolen_time(void); +/* misc runtime */ +extern u64 __bswapdi2(u64); +extern s64 __lshrdi3(s64, int); +extern s64 __ashldi3(s64, int); +extern s64 __ashrdi3(s64, int); +extern int __cmpdi2(s64, s64); +extern int __ucmpdi2(u64, u64); + #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ From 18f649ef344127ef6de23a5a4272dbe2fdb73dde Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 14 Nov 2016 19:46:09 +0100 Subject: [PATCH 431/503] sched/autogroup: Fix autogroup_move_group() to never skip sched_move_task() The PF_EXITING check in task_wants_autogroup() is no longer needed. Remove it, but see the next patch. However the comment is correct in that autogroup_move_group() must always change task_group() for every thread so the sysctl_ check is very wrong; we can race with cgroups and even sys_setsid() is not safe because a task running with task_group() == ag->tg must participate in refcounting: int main(void) { int sctl = open("/proc/sys/kernel/sched_autogroup_enabled", O_WRONLY); assert(sctl > 0); if (fork()) { wait(NULL); // destroy the child's ag/tg pause(); } assert(pwrite(sctl, "1\n", 2, 0) == 2); assert(setsid() > 0); if (fork()) pause(); kill(getppid(), SIGKILL); sleep(1); // The child has gone, the grandchild runs with kref == 1 assert(pwrite(sctl, "0\n", 2, 0) == 2); assert(setsid() > 0); // runs with the freed ag/tg for (;;) sleep(1); return 0; } crashes the kernel. It doesn't really need sleep(1), it doesn't matter if autogroup_move_group() actually frees the task_group or this happens later. Reported-by: Vern Lovejoy Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: hartsjc@redhat.com Cc: vbendel@redhat.com Link: http://lkml.kernel.org/r/20161114184609.GA15965@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/auto_group.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index a5d966cb8891..ad2b19ad6ca0 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c @@ -111,14 +111,11 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) { if (tg != &root_task_group) return false; - /* - * We can only assume the task group can't go away on us if - * autogroup_move_group() can see us on ->thread_group list. + * If we race with autogroup_move_group() the caller can use the old + * value of signal->autogroup but in this case sched_move_task() will + * be called again before autogroup_kref_put(). */ - if (p->flags & PF_EXITING) - return false; - return true; } @@ -138,13 +135,17 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) } p->signal->autogroup = autogroup_kref_get(ag); - - if (!READ_ONCE(sysctl_sched_autogroup_enabled)) - goto out; - + /* + * We can't avoid sched_move_task() after we changed signal->autogroup, + * this process can already run with task_group() == prev->tg or we can + * race with cgroup code which can read autogroup = prev under rq->lock. + * In the latter case for_each_thread() can not miss a migrating thread, + * cpu_cgroup_attach() must not be possible after cgroup_exit() and it + * can't be removed from thread list, we hold ->siglock. + */ for_each_thread(p, t) sched_move_task(t); -out: + unlock_task_sighand(p, &flags); autogroup_kref_put(prev); } From 8e5bfa8c1f8471aa4a2d30be631ef2b50e10abaf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 14 Nov 2016 19:46:12 +0100 Subject: [PATCH 432/503] sched/autogroup: Do not use autogroup->tg in zombie threads Exactly because for_each_thread() in autogroup_move_group() can't see it and update its ->sched_task_group before _put() and possibly free(). So the exiting task needs another sched_move_task() before exit_notify() and we need to re-introduce the PF_EXITING (or similar) check removed by the previous change for another reason. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: hartsjc@redhat.com Cc: vbendel@redhat.com Cc: vlovejoy@redhat.com Link: http://lkml.kernel.org/r/20161114184612.GA15968@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ kernel/exit.c | 1 + kernel/sched/auto_group.c | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index 348f51b0ec92..e9c009dc3a4a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2567,6 +2567,7 @@ extern void sched_autogroup_create_attach(struct task_struct *p); extern void sched_autogroup_detach(struct task_struct *p); extern void sched_autogroup_fork(struct signal_struct *sig); extern void sched_autogroup_exit(struct signal_struct *sig); +extern void sched_autogroup_exit_task(struct task_struct *p); #ifdef CONFIG_PROC_FS extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); @@ -2576,6 +2577,7 @@ static inline void sched_autogroup_create_attach(struct task_struct *p) { } static inline void sched_autogroup_detach(struct task_struct *p) { } static inline void sched_autogroup_fork(struct signal_struct *sig) { } static inline void sched_autogroup_exit(struct signal_struct *sig) { } +static inline void sched_autogroup_exit_task(struct task_struct *p) { } #endif extern int yield_to(struct task_struct *p, bool preempt); diff --git a/kernel/exit.c b/kernel/exit.c index 9d68c45ebbe3..3076f3089919 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -836,6 +836,7 @@ void __noreturn do_exit(long code) */ perf_event_exit_task(tsk); + sched_autogroup_exit_task(tsk); cgroup_exit(tsk); /* diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index ad2b19ad6ca0..f1c8fd566246 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c @@ -115,10 +115,26 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) * If we race with autogroup_move_group() the caller can use the old * value of signal->autogroup but in this case sched_move_task() will * be called again before autogroup_kref_put(). + * + * However, there is no way sched_autogroup_exit_task() could tell us + * to avoid autogroup->tg, so we abuse PF_EXITING flag for this case. */ + if (p->flags & PF_EXITING) + return false; + return true; } +void sched_autogroup_exit_task(struct task_struct *p) +{ + /* + * We are going to call exit_notify() and autogroup_move_group() can't + * see this thread after that: we can no longer use signal->autogroup. + * See the PF_EXITING check in task_wants_autogroup(). + */ + sched_move_task(p); +} + static void autogroup_move_group(struct task_struct *p, struct autogroup *ag) { @@ -142,6 +158,9 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) * In the latter case for_each_thread() can not miss a migrating thread, * cpu_cgroup_attach() must not be possible after cgroup_exit() and it * can't be removed from thread list, we hold ->siglock. + * + * If an exiting thread was already removed from thread list we rely on + * sched_autogroup_exit_task(). */ for_each_thread(p, t) sched_move_task(t); From ae31fe51a3cceaa0cabdb3058f69669ecb47f12e Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 22 Nov 2016 10:57:42 +0100 Subject: [PATCH 433/503] perf/x86: Restore TASK_SIZE check on frame pointer The following commit: 75925e1ad7f5 ("perf/x86: Optimize stack walk user accesses") ... switched from copy_from_user_nmi() to __copy_from_user_nmi() with a manual access_ok() check. Unfortunately, copy_from_user_nmi() does an explicit check against TASK_SIZE, whereas the access_ok() uses whatever the current address limit of the task is. We are getting NMIs when __probe_kernel_read() has switched to KERNEL_DS, and then see vmalloc faults when we access what looks like pointers into vmalloc space: [] WARNING: CPU: 3 PID: 3685731 at arch/x86/mm/fault.c:435 vmalloc_fault+0x289/0x290 [] CPU: 3 PID: 3685731 Comm: sh Tainted: G W 4.6.0-5_fbk1_223_gdbf0f40 #1 [] Call Trace: [] [] dump_stack+0x4d/0x6c [] [] __warn+0xd3/0xf0 [] [] warn_slowpath_null+0x1d/0x20 [] [] vmalloc_fault+0x289/0x290 [] [] __do_page_fault+0x330/0x490 [] [] do_page_fault+0xc/0x10 [] [] page_fault+0x22/0x30 [] [] ? perf_callchain_user+0x100/0x2a0 [] [] get_perf_callchain+0x17f/0x190 [] [] perf_callchain+0x67/0x80 [] [] perf_prepare_sample+0x2a0/0x370 [] [] perf_event_output+0x20/0x60 [] [] ? perf_event_update_userpage+0xc7/0x130 [] [] __perf_event_overflow+0x181/0x1d0 [] [] perf_event_overflow+0x14/0x20 [] [] intel_pmu_handle_irq+0x1d3/0x490 [] [] ? copy_user_enhanced_fast_string+0x7/0x10 [] [] ? vunmap_page_range+0x1a1/0x2f0 [] [] ? unmap_kernel_range_noflush+0x11/0x20 [] [] ? ghes_copy_tofrom_phys+0x116/0x1f0 [] [] ? x2apic_send_IPI_self+0x1d/0x20 [] [] perf_event_nmi_handler+0x2d/0x50 [] [] nmi_handle+0x61/0x110 [] [] default_do_nmi+0x44/0x110 [] [] do_nmi+0xdb/0x150 [] [] end_repeat_nmi+0x1a/0x1e [] [] ? copy_user_enhanced_fast_string+0x7/0x10 [] [] ? copy_user_enhanced_fast_string+0x7/0x10 [] [] ? copy_user_enhanced_fast_string+0x7/0x10 [] <> [] ? __probe_kernel_read+0x3e/0xa0 Fix this by moving the valid_user_frame() check to before the uaccess that loads the return address and the pointer to the next frame. Signed-off-by: Johannes Weiner Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Fixes: 75925e1ad7f5 ("perf/x86: Optimize stack walk user accesses") Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index d31735f37ed7..9d4bf3ab049e 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2352,7 +2352,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent frame.next_frame = 0; frame.return_address = 0; - if (!access_ok(VERIFY_READ, fp, 8)) + if (!valid_user_frame(fp, sizeof(frame))) break; bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4); @@ -2362,9 +2362,6 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent if (bytes != 0) break; - if (!valid_user_frame(fp, sizeof(frame))) - break; - perf_callchain_store(entry, cs_base + frame.return_address); fp = compat_ptr(ss_base + frame.next_frame); } @@ -2413,7 +2410,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs frame.next_frame = NULL; frame.return_address = 0; - if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2)) + if (!valid_user_frame(fp, sizeof(frame))) break; bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp)); @@ -2423,9 +2420,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs if (bytes != 0) break; - if (!valid_user_frame(fp, sizeof(frame))) - break; - perf_callchain_store(entry, frame.return_address); fp = (void __user *)frame.next_frame; } From b8000586c90b4804902058a38d3a59ce5708e695 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Nov 2016 18:17:31 +0100 Subject: [PATCH 434/503] perf/x86/intel: Cure bogus unwind from PEBS entries Vince Weaver reported that perf_fuzzer + KASAN detects that PEBS event unwinds sometimes do 'weird' things. In particular, we seemed to be ending up unwinding from random places on the NMI stack. While it was somewhat expected that the event record BP,SP would not match the interrupt BP,SP in that the interrupt is strictly later than the record event, it was overlooked that it could be on an already overwritten stack. Therefore, don't copy the recorded BP,SP over the interrupted BP,SP when we need stack unwinds. Note that its still possible the unwind doesn't full match the actual event, as its entirely possible to have done an (I)RET between record and interrupt, but on average it should still point in the general direction of where the event came from. Also, it's the best we can do, considering. The particular scenario that triggered the bogus NMI stack unwind was a PEBS event with very short period, upon enabling the event at the tail of the PMI handler (FREEZE_ON_PMI is not used), it instantly triggers a record (while still on the NMI stack) which in turn triggers the next PMI. This then causes back-to-back NMIs and we'll try and unwind the stack-frame from the last NMI, which obviously is now overwritten by our own. Analyzed-by: Josh Poimboeuf Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Stephane Eranian Cc: Thomas Gleixner Cc: davej@codemonkey.org.uk Cc: dvyukov@google.com Cc: stable@vger.kernel.org Fixes: ca037701a025 ("perf, x86: Add PEBS infrastructure") Link: http://lkml.kernel.org/r/20161117171731.GV3157@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 35 +++++++++++++++++++++++------------ arch/x86/events/perf_event.h | 2 +- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 0319311dbdbb..be202390bbd3 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1108,20 +1108,20 @@ static void setup_pebs_sample_data(struct perf_event *event, } /* - * We use the interrupt regs as a base because the PEBS record - * does not contain a full regs set, specifically it seems to - * lack segment descriptors, which get used by things like - * user_mode(). + * We use the interrupt regs as a base because the PEBS record does not + * contain a full regs set, specifically it seems to lack segment + * descriptors, which get used by things like user_mode(). * - * In the simple case fix up only the IP and BP,SP regs, for - * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. - * A possible PERF_SAMPLE_REGS will have to transfer all regs. + * In the simple case fix up only the IP for PERF_SAMPLE_IP. + * + * We must however always use BP,SP from iregs for the unwinder to stay + * sane; the record BP,SP can point into thin air when the record is + * from a previous PMI context or an (I)RET happend between the record + * and PMI. */ *regs = *iregs; regs->flags = pebs->flags; set_linear_ip(regs, pebs->ip); - regs->bp = pebs->bp; - regs->sp = pebs->sp; if (sample_type & PERF_SAMPLE_REGS_INTR) { regs->ax = pebs->ax; @@ -1130,10 +1130,21 @@ static void setup_pebs_sample_data(struct perf_event *event, regs->dx = pebs->dx; regs->si = pebs->si; regs->di = pebs->di; - regs->bp = pebs->bp; - regs->sp = pebs->sp; - regs->flags = pebs->flags; + /* + * Per the above; only set BP,SP if we don't need callchains. + * + * XXX: does this make sense? + */ + if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { + regs->bp = pebs->bp; + regs->sp = pebs->sp; + } + + /* + * Preserve PERF_EFLAGS_VM from set_linear_ip(). + */ + regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM); #ifndef CONFIG_X86_32 regs->r8 = pebs->r8; regs->r9 = pebs->r9; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 5874d8de1f8d..a77ee026643d 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -113,7 +113,7 @@ struct debug_store { * Per register state. */ struct er_account { - raw_spinlock_t lock; /* per-core: protect structure */ + raw_spinlock_t lock; /* per-core: protect structure */ u64 config; /* extra MSR config */ u64 reg; /* extra MSR number */ atomic_t ref; /* reference count */ From 033ac60c7f21f9996a0fab2fd04f334afbf77b33 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 18 Nov 2016 13:53:54 +0100 Subject: [PATCH 435/503] perf/x86/intel/uncore: Allow only a single PMU/box within an events group Group validation expects all events to be of the same PMU; however is_uncore_pmu() is too wide, it matches _all_ uncore events, even across PMUs. This triggers failure when we group different events from different uncore PMUs, like: perf stat -vv -e '{uncore_cbox_0/config=0x0334/,uncore_qpi_0/event=1/}' -a sleep 1 Fix is_uncore_pmu() by only matching events to the box at hand. Note that generic code; ran after this step; will disallow this mixture of PMU events. Reported-by: Jiri Olsa Tested-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Vince Weaver Link: http://lkml.kernel.org/r/20161118125354.GQ3117@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index efca2685d876..dbaaf7dc8373 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -319,9 +319,9 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, */ static int uncore_pmu_event_init(struct perf_event *event); -static bool is_uncore_event(struct perf_event *event) +static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event) { - return event->pmu->event_init == uncore_pmu_event_init; + return &box->pmu->pmu == event->pmu; } static int @@ -340,7 +340,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, n = box->n_events; - if (is_uncore_event(leader)) { + if (is_box_event(box, leader)) { box->event_list[n] = leader; n++; } @@ -349,7 +349,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, return n; list_for_each_entry(event, &leader->sibling_list, group_entry) { - if (!is_uncore_event(event) || + if (!is_box_event(box, event) || event->state <= PERF_EVENT_STATE_OFF) continue; From 7a79279e7186c4ac8b753cbd335ecc4ba81b5970 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 22 Nov 2016 13:56:54 +0000 Subject: [PATCH 436/503] drm/arm: hdlcd: fix plane base address update While testing HDMI with Xorg on the Juno board, I find that when Xorg starts up or shuts down, the display is shifted significantly to the right and wrapped in the active region. (No sync bars are visible.) The timings are correct, it behaves as if the start address has been shifted many pixels _into_ the framebuffer. This occurs whenever the display mode size is changed - using xrandr in Xorg shows that changing the resolution triggers the problem almost every time, but changing the refresh rate does not. Using devmem2 to disable and re-enable the HDLCD resolves the issue, and repeated disable/enable cycles do not make the issue re-appear. Further debugging shows that we try to update the controller configuration while enabled. Alwys ensure that the HDLCD is disabled prior to updating the controller timings, and use drm_crtc_vblank_off()/drm_crtc_vblank_on() so that DRM knows whether it can expect vblank interrupts. Signed-off-by: Russell King Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_crtc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c index 48019ae22ddb..28341b32067f 100644 --- a/drivers/gpu/drm/arm/hdlcd_crtc.c +++ b/drivers/gpu/drm/arm/hdlcd_crtc.c @@ -150,15 +150,14 @@ static void hdlcd_crtc_enable(struct drm_crtc *crtc) clk_prepare_enable(hdlcd->clk); hdlcd_crtc_mode_set_nofb(crtc); hdlcd_write(hdlcd, HDLCD_REG_COMMAND, 1); + drm_crtc_vblank_on(crtc); } static void hdlcd_crtc_disable(struct drm_crtc *crtc) { struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc); - if (!crtc->state->active) - return; - + drm_crtc_vblank_off(crtc); hdlcd_write(hdlcd, HDLCD_REG_COMMAND, 0); clk_disable_unprepare(hdlcd->clk); } From 4345a64ac931a8dc499f1fc69880952412f36c3e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 17 Nov 2016 21:13:56 +0100 Subject: [PATCH 437/503] parisc: Fix printk continuations in system detection Signed-off-by: Helge Deller --- arch/parisc/kernel/inventory.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/parisc/kernel/inventory.c b/arch/parisc/kernel/inventory.c index 545f9d2fe711..c05d1876d27c 100644 --- a/arch/parisc/kernel/inventory.c +++ b/arch/parisc/kernel/inventory.c @@ -58,7 +58,7 @@ void __init setup_pdc(void) status = pdc_system_map_find_mods(&module_result, &module_path, 0); if (status == PDC_OK) { pdc_type = PDC_TYPE_SYSTEM_MAP; - printk("System Map.\n"); + pr_cont("System Map.\n"); return; } @@ -77,7 +77,7 @@ void __init setup_pdc(void) status = pdc_pat_cell_get_number(&cell_info); if (status == PDC_OK) { pdc_type = PDC_TYPE_PAT; - printk("64 bit PAT.\n"); + pr_cont("64 bit PAT.\n"); return; } #endif @@ -97,12 +97,12 @@ void __init setup_pdc(void) case 0xC: /* 715/64, at least */ pdc_type = PDC_TYPE_SNAKE; - printk("Snake.\n"); + pr_cont("Snake.\n"); return; default: /* Everything else */ - printk("Unsupported.\n"); + pr_cont("Unsupported.\n"); panic("If this is a 64-bit machine, please try a 64-bit kernel.\n"); } } From c9b8af1330198ae241cd545e1f040019010d44d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Nov 2016 11:17:30 -0800 Subject: [PATCH 438/503] flow_dissect: call init_default_flow_dissectors() earlier Andre Noll reported panics after my recent fix (commit 34fad54c2537 "net: __skb_flow_dissect() must cap its return value") After some more headaches, Alexander root caused the problem to init_default_flow_dissectors() being called too late, in case a network driver like IGB is not a module and receives DHCP message very early. Fix is to call init_default_flow_dissectors() much earlier, as it is a core infrastructure and does not depend on another kernel service. Fixes: 06635a35d13d4 ("flow_dissect: use programable dissector in skb_flow_dissect and friends") Signed-off-by: Eric Dumazet Reported-by: Andre Noll Diagnosed-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 69e4463a4b1b..c6d8207ffa7e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1013,4 +1013,4 @@ static int __init init_default_flow_dissectors(void) return 0; } -late_initcall_sync(init_default_flow_dissectors); +core_initcall(init_default_flow_dissectors); From d55b352b01bc78fbc3d1bb650140668b87e58bf9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 22 Nov 2016 21:50:52 +0100 Subject: [PATCH 439/503] NFSv4.x: hide array-bounds warning A correct bugfix introduced a harmless warning that shows up with gcc-7: fs/nfs/callback.c: In function 'nfs_callback_up': fs/nfs/callback.c:214:14: error: array subscript is outside array bounds [-Werror=array-bounds] What happens here is that the 'minorversion == 0' check tells the compiler that we assume minorversion can be something other than 0, but when CONFIG_NFS_V4_1 is disabled that would be invalid and result in an out-of-bounds access. The added check for IS_ENABLED(CONFIG_NFS_V4_1) tells gcc that this really can't happen, which makes the code slightly smaller and also avoids the warning. The bugfix that introduced the warning is marked for stable backports, we want this one backported to the same releases. Fixes: 98b0f80c2396 ("NFSv4.x: Fix a refcount leak in nfs_callback_up_net") Cc: stable@vger.kernel.org # v3.7+ Signed-off-by: Arnd Bergmann Signed-off-by: Anna Schumaker --- fs/nfs/callback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 532d8e242d4d..484bebc20bca 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -197,7 +197,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, } ret = -EPROTONOSUPPORT; - if (minorversion == 0) + if (!IS_ENABLED(CONFIG_NFS_V4_1) || minorversion == 0) ret = nfs4_callback_up_net(serv, net); else if (xprt->ops->bc_up) ret = xprt->ops->bc_up(serv, net); From 39385cb5f3274735b03ed1f8e7ff517b02a0beed Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 12 Nov 2016 17:03:07 +0200 Subject: [PATCH 440/503] Bluetooth: Fix using the correct source address type The hci_get_route() API is used to look up local HCI devices, however so far it has been incapable of dealing with anything else than the public address of HCI devices. This completely breaks with LE-only HCI devices that do not come with a public address, but use a static random address instead. This patch exteds the hci_get_route() API with a src_type parameter that's used for comparing with the right address of each HCI device. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/6lowpan.c | 4 ++-- net/bluetooth/hci_conn.c | 26 ++++++++++++++++++++++++-- net/bluetooth/l2cap_core.c | 2 +- net/bluetooth/rfcomm/tty.c | 2 +- net/bluetooth/sco.c | 2 +- 6 files changed, 30 insertions(+), 8 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f00bf667ec33..554671c81f4a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1018,7 +1018,7 @@ static inline void hci_set_drvdata(struct hci_dev *hdev, void *data) } struct hci_dev *hci_dev_get(int index); -struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src); +struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, u8 src_type); struct hci_dev *hci_alloc_dev(void); void hci_free_dev(struct hci_dev *hdev); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index d020299baba4..1904a93f47d5 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -1090,7 +1090,6 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, { struct hci_conn *hcon; struct hci_dev *hdev; - bdaddr_t *src = BDADDR_ANY; int n; n = sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu", @@ -1101,7 +1100,8 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, if (n < 7) return -EINVAL; - hdev = hci_get_route(addr, src); + /* The LE_PUBLIC address type is ignored because of BDADDR_ANY */ + hdev = hci_get_route(addr, BDADDR_ANY, BDADDR_LE_PUBLIC); if (!hdev) return -ENOENT; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3809617aa98d..dc59eae54717 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -613,7 +613,7 @@ int hci_conn_del(struct hci_conn *conn) return 0; } -struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) +struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type) { int use_src = bacmp(src, BDADDR_ANY); struct hci_dev *hdev = NULL, *d; @@ -634,7 +634,29 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) */ if (use_src) { - if (!bacmp(&d->bdaddr, src)) { + bdaddr_t id_addr; + u8 id_addr_type; + + if (src_type == BDADDR_BREDR) { + if (!lmp_bredr_capable(d)) + continue; + bacpy(&id_addr, &d->bdaddr); + id_addr_type = BDADDR_BREDR; + } else { + if (!lmp_le_capable(d)) + continue; + + hci_copy_identity_address(d, &id_addr, + &id_addr_type); + + /* Convert from HCI to three-value type */ + if (id_addr_type == ADDR_LE_DEV_PUBLIC) + id_addr_type = BDADDR_LE_PUBLIC; + else + id_addr_type = BDADDR_LE_RANDOM; + } + + if (!bacmp(&id_addr, src) && id_addr_type == src_type) { hdev = d; break; } } else { diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index d4cad29b033f..577f1c01454a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7060,7 +7060,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, BT_DBG("%pMR -> %pMR (type %u) psm 0x%2.2x", &chan->src, dst, dst_type, __le16_to_cpu(psm)); - hdev = hci_get_route(dst, &chan->src); + hdev = hci_get_route(dst, &chan->src, chan->src_type); if (!hdev) return -EHOSTUNREACH; diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 8e385a0ae60e..2f2cb5e27cdd 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -178,7 +178,7 @@ static void rfcomm_reparent_device(struct rfcomm_dev *dev) struct hci_dev *hdev; struct hci_conn *conn; - hdev = hci_get_route(&dev->dst, &dev->src); + hdev = hci_get_route(&dev->dst, &dev->src, BDADDR_BREDR); if (!hdev) return; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index f52bcbf2e58c..3125ce670c2f 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -219,7 +219,7 @@ static int sco_connect(struct sock *sk) BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst); - hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src); + hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR); if (!hdev) return -EHOSTUNREACH; From 8478132a8784605fe07ede555f7277d989368d73 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 23 Nov 2016 10:00:03 +0000 Subject: [PATCH 441/503] Revert "arm: move exports to definitions" This reverts commit 4dd1837d7589f468ed109556513f476e7a7f9121. Moving the exports for assembly code into the assembly files breaks KSYM trimming, but also breaks modversions. While fixing the KSYM trimming is trivial, fixing modversions brings us to a technically worse position that we had prior to the above change: - We end up with the prototype definitions divorsed from everything else, which means that adding or removing assembly level ksyms become more fragile: * if adding a new assembly ksyms export, a missed prototype in asm-prototypes.h results in a successful build if no module in the selected configuration makes use of the symbol. * when removing a ksyms export, asm-prototypes.h will get forgotten, with armksyms.c, you'll get a build error if you forget to touch the file. - We end up with the same amount of include files and prototypes, they're just in a header file instead of a .c file with their exports. As for lines of code, we don't get much of a size reduction: (original commit) 47 files changed, 131 insertions(+), 208 deletions(-) (fix for ksyms trimming) 7 files changed, 18 insertions(+), 5 deletions(-) (two fixes for modversions) 1 file changed, 34 insertions(+) 3 files changed, 7 insertions(+), 2 deletions(-) which results in a net total of only 25 lines deleted. As there does not seem to be much benefit from this change of approach, revert the change. Signed-off-by: Russell King --- arch/arm/include/asm/Kbuild | 1 - arch/arm/kernel/Makefile | 2 +- arch/arm/kernel/armksyms.c | 183 ++++++++++++++++++++++++++ arch/arm/kernel/entry-ftrace.S | 3 - arch/arm/kernel/head.S | 3 - arch/arm/kernel/smccc-call.S | 3 - arch/arm/lib/ashldi3.S | 3 - arch/arm/lib/ashrdi3.S | 3 - arch/arm/lib/bitops.h | 5 - arch/arm/lib/bswapsdi2.S | 3 - arch/arm/lib/clear_user.S | 4 - arch/arm/lib/copy_from_user.S | 2 - arch/arm/lib/copy_page.S | 2 - arch/arm/lib/copy_to_user.S | 4 - arch/arm/lib/csumipv6.S | 3 +- arch/arm/lib/csumpartial.S | 2 - arch/arm/lib/csumpartialcopy.S | 1 - arch/arm/lib/csumpartialcopygeneric.S | 2 - arch/arm/lib/csumpartialcopyuser.S | 1 - arch/arm/lib/delay.c | 2 - arch/arm/lib/div64.S | 2 - arch/arm/lib/findbit.S | 9 -- arch/arm/lib/getuser.S | 9 -- arch/arm/lib/io-readsb.S | 2 - arch/arm/lib/io-readsl.S | 2 - arch/arm/lib/io-readsw-armv3.S | 3 +- arch/arm/lib/io-readsw-armv4.S | 2 - arch/arm/lib/io-writesb.S | 2 - arch/arm/lib/io-writesl.S | 2 - arch/arm/lib/io-writesw-armv3.S | 2 - arch/arm/lib/io-writesw-armv4.S | 2 - arch/arm/lib/lib1funcs.S | 9 -- arch/arm/lib/lshrdi3.S | 3 - arch/arm/lib/memchr.S | 2 - arch/arm/lib/memcpy.S | 3 - arch/arm/lib/memmove.S | 2 - arch/arm/lib/memset.S | 3 - arch/arm/lib/memzero.S | 2 - arch/arm/lib/muldi3.S | 3 - arch/arm/lib/putuser.S | 5 - arch/arm/lib/strchr.S | 2 - arch/arm/lib/strrchr.S | 2 - arch/arm/lib/uaccess_with_memcpy.c | 3 - arch/arm/lib/ucmpdi2.S | 3 - arch/arm/mach-imx/Makefile | 1 + arch/arm/mach-imx/ssi-fiq-ksym.c | 20 +++ arch/arm/mach-imx/ssi-fiq.S | 7 +- 47 files changed, 208 insertions(+), 131 deletions(-) create mode 100644 arch/arm/kernel/armksyms.c create mode 100644 arch/arm/mach-imx/ssi-fiq-ksym.c diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 0745538b26d3..55e0e3ea9cb6 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -8,7 +8,6 @@ generic-y += early_ioremap.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h -generic-y += export.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 68c2c097cffe..ad325a8c7e1e 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -33,7 +33,7 @@ endif obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_ISA_DMA_API) += dma.o obj-$(CONFIG_FIQ) += fiq.o fiqasm.o -obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_MODULES) += armksyms.o module.o obj-$(CONFIG_ARM_MODULE_PLTS) += module-plts.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c new file mode 100644 index 000000000000..7e45f69a0ddc --- /dev/null +++ b/arch/arm/kernel/armksyms.c @@ -0,0 +1,183 @@ +/* + * linux/arch/arm/kernel/armksyms.c + * + * Copyright (C) 2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * libgcc functions - functions that are used internally by the + * compiler... (prototypes are not correct though, but that + * doesn't really matter since they're not versioned). + */ +extern void __ashldi3(void); +extern void __ashrdi3(void); +extern void __divsi3(void); +extern void __lshrdi3(void); +extern void __modsi3(void); +extern void __muldi3(void); +extern void __ucmpdi2(void); +extern void __udivsi3(void); +extern void __umodsi3(void); +extern void __do_div64(void); +extern void __bswapsi2(void); +extern void __bswapdi2(void); + +extern void __aeabi_idiv(void); +extern void __aeabi_idivmod(void); +extern void __aeabi_lasr(void); +extern void __aeabi_llsl(void); +extern void __aeabi_llsr(void); +extern void __aeabi_lmul(void); +extern void __aeabi_uidiv(void); +extern void __aeabi_uidivmod(void); +extern void __aeabi_ulcmp(void); + +extern void fpundefinstr(void); + +void mmioset(void *, unsigned int, size_t); +void mmiocpy(void *, const void *, size_t); + + /* platform dependent support */ +EXPORT_SYMBOL(arm_delay_ops); + + /* networking */ +EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_from_user); +EXPORT_SYMBOL(csum_partial_copy_nocheck); +EXPORT_SYMBOL(__csum_ipv6_magic); + + /* io */ +#ifndef __raw_readsb +EXPORT_SYMBOL(__raw_readsb); +#endif +#ifndef __raw_readsw +EXPORT_SYMBOL(__raw_readsw); +#endif +#ifndef __raw_readsl +EXPORT_SYMBOL(__raw_readsl); +#endif +#ifndef __raw_writesb +EXPORT_SYMBOL(__raw_writesb); +#endif +#ifndef __raw_writesw +EXPORT_SYMBOL(__raw_writesw); +#endif +#ifndef __raw_writesl +EXPORT_SYMBOL(__raw_writesl); +#endif + + /* string / mem functions */ +EXPORT_SYMBOL(strchr); +EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memchr); +EXPORT_SYMBOL(__memzero); + +EXPORT_SYMBOL(mmioset); +EXPORT_SYMBOL(mmiocpy); + +#ifdef CONFIG_MMU +EXPORT_SYMBOL(copy_page); + +EXPORT_SYMBOL(arm_copy_from_user); +EXPORT_SYMBOL(arm_copy_to_user); +EXPORT_SYMBOL(arm_clear_user); + +EXPORT_SYMBOL(__get_user_1); +EXPORT_SYMBOL(__get_user_2); +EXPORT_SYMBOL(__get_user_4); +EXPORT_SYMBOL(__get_user_8); + +#ifdef __ARMEB__ +EXPORT_SYMBOL(__get_user_64t_1); +EXPORT_SYMBOL(__get_user_64t_2); +EXPORT_SYMBOL(__get_user_64t_4); +EXPORT_SYMBOL(__get_user_32t_8); +#endif + +EXPORT_SYMBOL(__put_user_1); +EXPORT_SYMBOL(__put_user_2); +EXPORT_SYMBOL(__put_user_4); +EXPORT_SYMBOL(__put_user_8); +#endif + + /* gcc lib functions */ +EXPORT_SYMBOL(__ashldi3); +EXPORT_SYMBOL(__ashrdi3); +EXPORT_SYMBOL(__divsi3); +EXPORT_SYMBOL(__lshrdi3); +EXPORT_SYMBOL(__modsi3); +EXPORT_SYMBOL(__muldi3); +EXPORT_SYMBOL(__ucmpdi2); +EXPORT_SYMBOL(__udivsi3); +EXPORT_SYMBOL(__umodsi3); +EXPORT_SYMBOL(__do_div64); +EXPORT_SYMBOL(__bswapsi2); +EXPORT_SYMBOL(__bswapdi2); + +#ifdef CONFIG_AEABI +EXPORT_SYMBOL(__aeabi_idiv); +EXPORT_SYMBOL(__aeabi_idivmod); +EXPORT_SYMBOL(__aeabi_lasr); +EXPORT_SYMBOL(__aeabi_llsl); +EXPORT_SYMBOL(__aeabi_llsr); +EXPORT_SYMBOL(__aeabi_lmul); +EXPORT_SYMBOL(__aeabi_uidiv); +EXPORT_SYMBOL(__aeabi_uidivmod); +EXPORT_SYMBOL(__aeabi_ulcmp); +#endif + + /* bitops */ +EXPORT_SYMBOL(_set_bit); +EXPORT_SYMBOL(_test_and_set_bit); +EXPORT_SYMBOL(_clear_bit); +EXPORT_SYMBOL(_test_and_clear_bit); +EXPORT_SYMBOL(_change_bit); +EXPORT_SYMBOL(_test_and_change_bit); +EXPORT_SYMBOL(_find_first_zero_bit_le); +EXPORT_SYMBOL(_find_next_zero_bit_le); +EXPORT_SYMBOL(_find_first_bit_le); +EXPORT_SYMBOL(_find_next_bit_le); + +#ifdef __ARMEB__ +EXPORT_SYMBOL(_find_first_zero_bit_be); +EXPORT_SYMBOL(_find_next_zero_bit_be); +EXPORT_SYMBOL(_find_first_bit_be); +EXPORT_SYMBOL(_find_next_bit_be); +#endif + +#ifdef CONFIG_FUNCTION_TRACER +#ifdef CONFIG_OLD_MCOUNT +EXPORT_SYMBOL(mcount); +#endif +EXPORT_SYMBOL(__gnu_mcount_nc); +#endif + +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT +EXPORT_SYMBOL(__pv_phys_pfn_offset); +EXPORT_SYMBOL(__pv_offset); +#endif + +#ifdef CONFIG_HAVE_ARM_SMCCC +EXPORT_SYMBOL(arm_smccc_smc); +EXPORT_SYMBOL(arm_smccc_hvc); +#endif diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S index b629d3f11c3d..c73c4030ca5d 100644 --- a/arch/arm/kernel/entry-ftrace.S +++ b/arch/arm/kernel/entry-ftrace.S @@ -7,7 +7,6 @@ #include #include #include -#include #include "entry-header.S" @@ -154,7 +153,6 @@ ENTRY(mcount) __mcount _old #endif ENDPROC(mcount) -EXPORT_SYMBOL(mcount) #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(ftrace_caller_old) @@ -207,7 +205,6 @@ UNWIND(.fnstart) #endif UNWIND(.fnend) ENDPROC(__gnu_mcount_nc) -EXPORT_SYMBOL(__gnu_mcount_nc) #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(ftrace_caller) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index f41cee4c5746..04286fd9e09c 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -22,7 +22,6 @@ #include #include #include -#include #if defined(CONFIG_DEBUG_LL) && !defined(CONFIG_DEBUG_SEMIHOSTING) #include CONFIG_DEBUG_LL_INCLUDE @@ -728,8 +727,6 @@ __pv_phys_pfn_offset: __pv_offset: .quad 0 .size __pv_offset, . -__pv_offset -EXPORT_SYMBOL(__pv_phys_pfn_offset) -EXPORT_SYMBOL(__pv_offset) #endif #include "head-common.S" diff --git a/arch/arm/kernel/smccc-call.S b/arch/arm/kernel/smccc-call.S index 37669e7e13af..2e48b674aab1 100644 --- a/arch/arm/kernel/smccc-call.S +++ b/arch/arm/kernel/smccc-call.S @@ -16,7 +16,6 @@ #include #include #include -#include /* * Wrap c macros in asm macros to delay expansion until after the @@ -52,7 +51,6 @@ UNWIND( .fnend) ENTRY(arm_smccc_smc) SMCCC SMCCC_SMC ENDPROC(arm_smccc_smc) -EXPORT_SYMBOL(arm_smccc_smc) /* * void smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, @@ -62,4 +60,3 @@ EXPORT_SYMBOL(arm_smccc_smc) ENTRY(arm_smccc_hvc) SMCCC SMCCC_HVC ENDPROC(arm_smccc_hvc) -EXPORT_SYMBOL(arm_smccc_hvc) diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S index a7e7de89bd75..b05e95840651 100644 --- a/arch/arm/lib/ashldi3.S +++ b/arch/arm/lib/ashldi3.S @@ -28,7 +28,6 @@ Boston, MA 02110-1301, USA. */ #include #include -#include #ifdef __ARMEB__ #define al r1 @@ -53,5 +52,3 @@ ENTRY(__aeabi_llsl) ENDPROC(__ashldi3) ENDPROC(__aeabi_llsl) -EXPORT_SYMBOL(__ashldi3) -EXPORT_SYMBOL(__aeabi_llsl) diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S index 490336e42518..275d7d2341a4 100644 --- a/arch/arm/lib/ashrdi3.S +++ b/arch/arm/lib/ashrdi3.S @@ -28,7 +28,6 @@ Boston, MA 02110-1301, USA. */ #include #include -#include #ifdef __ARMEB__ #define al r1 @@ -53,5 +52,3 @@ ENTRY(__aeabi_lasr) ENDPROC(__ashrdi3) ENDPROC(__aeabi_lasr) -EXPORT_SYMBOL(__ashrdi3) -EXPORT_SYMBOL(__aeabi_lasr) diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index df06638b327c..7d807cfd8ef5 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -1,6 +1,5 @@ #include #include -#include #if __LINUX_ARM_ARCH__ >= 6 .macro bitop, name, instr @@ -26,7 +25,6 @@ UNWIND( .fnstart ) bx lr UNWIND( .fnend ) ENDPROC(\name ) -EXPORT_SYMBOL(\name ) .endm .macro testop, name, instr, store @@ -57,7 +55,6 @@ UNWIND( .fnstart ) 2: bx lr UNWIND( .fnend ) ENDPROC(\name ) -EXPORT_SYMBOL(\name ) .endm #else .macro bitop, name, instr @@ -77,7 +74,6 @@ UNWIND( .fnstart ) ret lr UNWIND( .fnend ) ENDPROC(\name ) -EXPORT_SYMBOL(\name ) .endm /** @@ -106,6 +102,5 @@ UNWIND( .fnstart ) ret lr UNWIND( .fnend ) ENDPROC(\name ) -EXPORT_SYMBOL(\name ) .endm #endif diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S index f05f78247304..07cda737bb11 100644 --- a/arch/arm/lib/bswapsdi2.S +++ b/arch/arm/lib/bswapsdi2.S @@ -1,6 +1,5 @@ #include #include -#include #if __LINUX_ARM_ARCH__ >= 6 ENTRY(__bswapsi2) @@ -36,5 +35,3 @@ ENTRY(__bswapdi2) ret lr ENDPROC(__bswapdi2) #endif -EXPORT_SYMBOL(__bswapsi2) -EXPORT_SYMBOL(__bswapdi2) diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S index b566154f5cf4..e936352ccb00 100644 --- a/arch/arm/lib/clear_user.S +++ b/arch/arm/lib/clear_user.S @@ -10,7 +10,6 @@ #include #include #include -#include .text @@ -51,9 +50,6 @@ USER( strnebt r2, [r0]) UNWIND(.fnend) ENDPROC(arm_clear_user) ENDPROC(__clear_user_std) -#ifndef CONFIG_UACCESS_WITH_MEMCPY -EXPORT_SYMBOL(arm_clear_user) -#endif .pushsection .text.fixup,"ax" .align 0 diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 63e4c1ed0225..7a4b06049001 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -13,7 +13,6 @@ #include #include #include -#include /* * Prototype: @@ -95,7 +94,6 @@ ENTRY(arm_copy_from_user) #include "copy_template.S" ENDPROC(arm_copy_from_user) -EXPORT_SYMBOL(arm_copy_from_user) .pushsection .fixup,"ax" .align 0 diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S index d97851d4af7a..6ee2f6706f86 100644 --- a/arch/arm/lib/copy_page.S +++ b/arch/arm/lib/copy_page.S @@ -13,7 +13,6 @@ #include #include #include -#include #define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 )) @@ -46,4 +45,3 @@ ENTRY(copy_page) PLD( beq 2b ) ldmfd sp!, {r4, pc} @ 3 ENDPROC(copy_page) -EXPORT_SYMBOL(copy_page) diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 592c179112d1..caf5019d8161 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -13,7 +13,6 @@ #include #include #include -#include /* * Prototype: @@ -100,9 +99,6 @@ WEAK(arm_copy_to_user) ENDPROC(arm_copy_to_user) ENDPROC(__copy_to_user_std) -#ifndef CONFIG_UACCESS_WITH_MEMCPY -EXPORT_SYMBOL(arm_copy_to_user) -#endif .pushsection .text.fixup,"ax" .align 0 diff --git a/arch/arm/lib/csumipv6.S b/arch/arm/lib/csumipv6.S index 68603b5ee537..3ac6ef01bc43 100644 --- a/arch/arm/lib/csumipv6.S +++ b/arch/arm/lib/csumipv6.S @@ -9,7 +9,6 @@ */ #include #include -#include .text @@ -31,4 +30,4 @@ ENTRY(__csum_ipv6_magic) adcs r0, r0, #0 ldmfd sp!, {pc} ENDPROC(__csum_ipv6_magic) -EXPORT_SYMBOL(__csum_ipv6_magic) + diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S index 830b20e81c37..984e0f29d548 100644 --- a/arch/arm/lib/csumpartial.S +++ b/arch/arm/lib/csumpartial.S @@ -9,7 +9,6 @@ */ #include #include -#include .text @@ -141,4 +140,3 @@ ENTRY(csum_partial) bne 4b b .Lless4 ENDPROC(csum_partial) -EXPORT_SYMBOL(csum_partial) diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S index 9c3383fed129..d03fc71fc88c 100644 --- a/arch/arm/lib/csumpartialcopy.S +++ b/arch/arm/lib/csumpartialcopy.S @@ -49,6 +49,5 @@ #define FN_ENTRY ENTRY(csum_partial_copy_nocheck) #define FN_EXIT ENDPROC(csum_partial_copy_nocheck) -#define FN_EXPORT EXPORT_SYMBOL(csum_partial_copy_nocheck) #include "csumpartialcopygeneric.S" diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index 8b94d20e51d1..10b45909610c 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -8,7 +8,6 @@ * published by the Free Software Foundation. */ #include -#include /* * unsigned int @@ -332,4 +331,3 @@ FN_ENTRY mov r5, r4, get_byte_1 b .Lexit FN_EXIT -FN_EXPORT diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index 5d495edf3d83..1712f132b80d 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -73,7 +73,6 @@ #define FN_ENTRY ENTRY(csum_partial_copy_from_user) #define FN_EXIT ENDPROC(csum_partial_copy_from_user) -#define FN_EXPORT EXPORT_SYMBOL(csum_partial_copy_from_user) #include "csumpartialcopygeneric.S" diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c index 69aad80a3af4..2cef11884857 100644 --- a/arch/arm/lib/delay.c +++ b/arch/arm/lib/delay.c @@ -24,7 +24,6 @@ #include #include #include -#include #include /* @@ -35,7 +34,6 @@ struct arm_delay_ops arm_delay_ops __ro_after_init = { .const_udelay = __loop_const_udelay, .udelay = __loop_udelay, }; -EXPORT_SYMBOL(arm_delay_ops); static const struct delay_timer *delay_timer; static bool delay_calibrated; diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S index 0c9e1c18fc9e..a9eafe4981eb 100644 --- a/arch/arm/lib/div64.S +++ b/arch/arm/lib/div64.S @@ -15,7 +15,6 @@ #include #include #include -#include #ifdef __ARMEB__ #define xh r0 @@ -211,4 +210,3 @@ Ldiv0_64: UNWIND(.fnend) ENDPROC(__do_div64) -EXPORT_SYMBOL(__do_div64) diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index 26302b8cd38f..7848780e8834 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -15,7 +15,6 @@ */ #include #include -#include .text /* @@ -38,7 +37,6 @@ ENTRY(_find_first_zero_bit_le) 3: mov r0, r1 @ no free bits ret lr ENDPROC(_find_first_zero_bit_le) -EXPORT_SYMBOL(_find_first_zero_bit_le) /* * Purpose : Find next 'zero' bit @@ -59,7 +57,6 @@ ENTRY(_find_next_zero_bit_le) add r2, r2, #1 @ align bit pointer b 2b @ loop for next bit ENDPROC(_find_next_zero_bit_le) -EXPORT_SYMBOL(_find_next_zero_bit_le) /* * Purpose : Find a 'one' bit @@ -81,7 +78,6 @@ ENTRY(_find_first_bit_le) 3: mov r0, r1 @ no free bits ret lr ENDPROC(_find_first_bit_le) -EXPORT_SYMBOL(_find_first_bit_le) /* * Purpose : Find next 'one' bit @@ -101,7 +97,6 @@ ENTRY(_find_next_bit_le) add r2, r2, #1 @ align bit pointer b 2b @ loop for next bit ENDPROC(_find_next_bit_le) -EXPORT_SYMBOL(_find_next_bit_le) #ifdef __ARMEB__ @@ -121,7 +116,6 @@ ENTRY(_find_first_zero_bit_be) 3: mov r0, r1 @ no free bits ret lr ENDPROC(_find_first_zero_bit_be) -EXPORT_SYMBOL(_find_first_zero_bit_be) ENTRY(_find_next_zero_bit_be) teq r1, #0 @@ -139,7 +133,6 @@ ENTRY(_find_next_zero_bit_be) add r2, r2, #1 @ align bit pointer b 2b @ loop for next bit ENDPROC(_find_next_zero_bit_be) -EXPORT_SYMBOL(_find_next_zero_bit_be) ENTRY(_find_first_bit_be) teq r1, #0 @@ -157,7 +150,6 @@ ENTRY(_find_first_bit_be) 3: mov r0, r1 @ no free bits ret lr ENDPROC(_find_first_bit_be) -EXPORT_SYMBOL(_find_first_bit_be) ENTRY(_find_next_bit_be) teq r1, #0 @@ -174,7 +166,6 @@ ENTRY(_find_next_bit_be) add r2, r2, #1 @ align bit pointer b 2b @ loop for next bit ENDPROC(_find_next_bit_be) -EXPORT_SYMBOL(_find_next_bit_be) #endif diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 9d09a38e73af..8ecfd15c3a02 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -31,7 +31,6 @@ #include #include #include -#include ENTRY(__get_user_1) check_uaccess r0, 1, r1, r2, __get_user_bad @@ -39,7 +38,6 @@ ENTRY(__get_user_1) mov r0, #0 ret lr ENDPROC(__get_user_1) -EXPORT_SYMBOL(__get_user_1) ENTRY(__get_user_2) check_uaccess r0, 2, r1, r2, __get_user_bad @@ -60,7 +58,6 @@ rb .req r0 mov r0, #0 ret lr ENDPROC(__get_user_2) -EXPORT_SYMBOL(__get_user_2) ENTRY(__get_user_4) check_uaccess r0, 4, r1, r2, __get_user_bad @@ -68,7 +65,6 @@ ENTRY(__get_user_4) mov r0, #0 ret lr ENDPROC(__get_user_4) -EXPORT_SYMBOL(__get_user_4) ENTRY(__get_user_8) check_uaccess r0, 8, r1, r2, __get_user_bad @@ -82,7 +78,6 @@ ENTRY(__get_user_8) mov r0, #0 ret lr ENDPROC(__get_user_8) -EXPORT_SYMBOL(__get_user_8) #ifdef __ARMEB__ ENTRY(__get_user_32t_8) @@ -96,7 +91,6 @@ ENTRY(__get_user_32t_8) mov r0, #0 ret lr ENDPROC(__get_user_32t_8) -EXPORT_SYMBOL(__get_user_32t_8) ENTRY(__get_user_64t_1) check_uaccess r0, 1, r1, r2, __get_user_bad8 @@ -104,7 +98,6 @@ ENTRY(__get_user_64t_1) mov r0, #0 ret lr ENDPROC(__get_user_64t_1) -EXPORT_SYMBOL(__get_user_64t_1) ENTRY(__get_user_64t_2) check_uaccess r0, 2, r1, r2, __get_user_bad8 @@ -121,7 +114,6 @@ rb .req r0 mov r0, #0 ret lr ENDPROC(__get_user_64t_2) -EXPORT_SYMBOL(__get_user_64t_2) ENTRY(__get_user_64t_4) check_uaccess r0, 4, r1, r2, __get_user_bad8 @@ -129,7 +121,6 @@ ENTRY(__get_user_64t_4) mov r0, #0 ret lr ENDPROC(__get_user_64t_4) -EXPORT_SYMBOL(__get_user_64t_4) #endif __get_user_bad8: diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S index 3dff7a3a2aef..c31b2f3153f1 100644 --- a/arch/arm/lib/io-readsb.S +++ b/arch/arm/lib/io-readsb.S @@ -9,7 +9,6 @@ */ #include #include -#include .Linsb_align: rsb ip, ip, #4 cmp ip, r2 @@ -122,4 +121,3 @@ ENTRY(__raw_readsb) ldmfd sp!, {r4 - r6, pc} ENDPROC(__raw_readsb) -EXPORT_SYMBOL(__raw_readsb) diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S index bfd39682325b..2ed86fa5465f 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib/io-readsl.S @@ -9,7 +9,6 @@ */ #include #include -#include ENTRY(__raw_readsl) teq r2, #0 @ do we have to check for the zero len? @@ -78,4 +77,3 @@ ENTRY(__raw_readsl) strb r3, [r1, #0] ret lr ENDPROC(__raw_readsl) -EXPORT_SYMBOL(__raw_readsl) diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S index b3af3db6caac..413da9914529 100644 --- a/arch/arm/lib/io-readsw-armv3.S +++ b/arch/arm/lib/io-readsw-armv3.S @@ -9,7 +9,6 @@ */ #include #include -#include .Linsw_bad_alignment: adr r0, .Linsw_bad_align_msg @@ -104,4 +103,4 @@ ENTRY(__raw_readsw) ldmfd sp!, {r4, r5, r6, pc} -EXPORT_SYMBOL(__raw_readsw) + diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S index 3c7a7a40b33e..d9a45e9692ae 100644 --- a/arch/arm/lib/io-readsw-armv4.S +++ b/arch/arm/lib/io-readsw-armv4.S @@ -9,7 +9,6 @@ */ #include #include -#include .macro pack, rd, hw1, hw2 #ifndef __ARMEB__ @@ -130,4 +129,3 @@ ENTRY(__raw_readsw) strneb ip, [r1] ldmfd sp!, {r4, pc} ENDPROC(__raw_readsw) -EXPORT_SYMBOL(__raw_readsw) diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S index fa3633594415..a46bbc9b168b 100644 --- a/arch/arm/lib/io-writesb.S +++ b/arch/arm/lib/io-writesb.S @@ -9,7 +9,6 @@ */ #include #include -#include .macro outword, rd #ifndef __ARMEB__ @@ -93,4 +92,3 @@ ENTRY(__raw_writesb) ldmfd sp!, {r4, r5, pc} ENDPROC(__raw_writesb) -EXPORT_SYMBOL(__raw_writesb) diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S index 98ed6aec0b47..4ea2435988c1 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib/io-writesl.S @@ -9,7 +9,6 @@ */ #include #include -#include ENTRY(__raw_writesl) teq r2, #0 @ do we have to check for the zero len? @@ -66,4 +65,3 @@ ENTRY(__raw_writesl) bne 6b ret lr ENDPROC(__raw_writesl) -EXPORT_SYMBOL(__raw_writesl) diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S index 577184c082bb..121789eb6802 100644 --- a/arch/arm/lib/io-writesw-armv3.S +++ b/arch/arm/lib/io-writesw-armv3.S @@ -9,7 +9,6 @@ */ #include #include -#include .Loutsw_bad_alignment: adr r0, .Loutsw_bad_align_msg @@ -125,4 +124,3 @@ ENTRY(__raw_writesw) strne ip, [r0] ldmfd sp!, {r4, r5, r6, pc} -EXPORT_SYMBOL(__raw_writesw) diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S index e335f489d1fc..269f90c51ad2 100644 --- a/arch/arm/lib/io-writesw-armv4.S +++ b/arch/arm/lib/io-writesw-armv4.S @@ -9,7 +9,6 @@ */ #include #include -#include .macro outword, rd #ifndef __ARMEB__ @@ -99,4 +98,3 @@ ENTRY(__raw_writesw) strneh ip, [r0] ret lr ENDPROC(__raw_writesw) -EXPORT_SYMBOL(__raw_writesw) diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index f541bc013bff..9397b2e532af 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -36,7 +36,6 @@ Boston, MA 02111-1307, USA. */ #include #include #include -#include .macro ARM_DIV_BODY dividend, divisor, result, curbit @@ -239,8 +238,6 @@ UNWIND(.fnstart) UNWIND(.fnend) ENDPROC(__udivsi3) ENDPROC(__aeabi_uidiv) -EXPORT_SYMBOL(__udivsi3) -EXPORT_SYMBOL(__aeabi_uidiv) ENTRY(__umodsi3) UNWIND(.fnstart) @@ -259,7 +256,6 @@ UNWIND(.fnstart) UNWIND(.fnend) ENDPROC(__umodsi3) -EXPORT_SYMBOL(__umodsi3) #ifdef CONFIG_ARM_PATCH_IDIV .align 3 @@ -307,8 +303,6 @@ UNWIND(.fnstart) UNWIND(.fnend) ENDPROC(__divsi3) ENDPROC(__aeabi_idiv) -EXPORT_SYMBOL(__divsi3) -EXPORT_SYMBOL(__aeabi_idiv) ENTRY(__modsi3) UNWIND(.fnstart) @@ -333,7 +327,6 @@ UNWIND(.fnstart) UNWIND(.fnend) ENDPROC(__modsi3) -EXPORT_SYMBOL(__modsi3) #ifdef CONFIG_AEABI @@ -350,7 +343,6 @@ UNWIND(.save {r0, r1, ip, lr} ) UNWIND(.fnend) ENDPROC(__aeabi_uidivmod) -EXPORT_SYMBOL(__aeabi_uidivmod) ENTRY(__aeabi_idivmod) UNWIND(.fnstart) @@ -364,7 +356,6 @@ UNWIND(.save {r0, r1, ip, lr} ) UNWIND(.fnend) ENDPROC(__aeabi_idivmod) -EXPORT_SYMBOL(__aeabi_idivmod) #endif diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S index e40833981417..922dcd88b02b 100644 --- a/arch/arm/lib/lshrdi3.S +++ b/arch/arm/lib/lshrdi3.S @@ -28,7 +28,6 @@ Boston, MA 02110-1301, USA. */ #include #include -#include #ifdef __ARMEB__ #define al r1 @@ -53,5 +52,3 @@ ENTRY(__aeabi_llsr) ENDPROC(__lshrdi3) ENDPROC(__aeabi_llsr) -EXPORT_SYMBOL(__lshrdi3) -EXPORT_SYMBOL(__aeabi_llsr) diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S index 44182bf686a5..74a5bed6d999 100644 --- a/arch/arm/lib/memchr.S +++ b/arch/arm/lib/memchr.S @@ -11,7 +11,6 @@ */ #include #include -#include .text .align 5 @@ -25,4 +24,3 @@ ENTRY(memchr) 2: movne r0, #0 ret lr ENDPROC(memchr) -EXPORT_SYMBOL(memchr) diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 1be5b6ddf37c..64111bd4440b 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -13,7 +13,6 @@ #include #include #include -#include #define LDR1W_SHIFT 0 #define STR1W_SHIFT 0 @@ -69,5 +68,3 @@ ENTRY(memcpy) ENDPROC(memcpy) ENDPROC(mmiocpy) -EXPORT_SYMBOL(memcpy) -EXPORT_SYMBOL(mmiocpy) diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index 71dcc5400d02..69a9d47fc5ab 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -13,7 +13,6 @@ #include #include #include -#include .text @@ -226,4 +225,3 @@ ENTRY(memmove) 18: backward_copy_shift push=24 pull=8 ENDPROC(memmove) -EXPORT_SYMBOL(memmove) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 7b72044cba62..3c65e3bd790f 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -12,7 +12,6 @@ #include #include #include -#include .text .align 5 @@ -136,5 +135,3 @@ UNWIND( .fnstart ) UNWIND( .fnend ) ENDPROC(memset) ENDPROC(mmioset) -EXPORT_SYMBOL(memset) -EXPORT_SYMBOL(mmioset) diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S index 6dec26ed5bcc..0eded952e089 100644 --- a/arch/arm/lib/memzero.S +++ b/arch/arm/lib/memzero.S @@ -10,7 +10,6 @@ #include #include #include -#include .text .align 5 @@ -136,4 +135,3 @@ UNWIND( .fnstart ) ret lr @ 1 UNWIND( .fnend ) ENDPROC(__memzero) -EXPORT_SYMBOL(__memzero) diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S index b8f12388ccac..204305956925 100644 --- a/arch/arm/lib/muldi3.S +++ b/arch/arm/lib/muldi3.S @@ -12,7 +12,6 @@ #include #include -#include #ifdef __ARMEB__ #define xh r0 @@ -47,5 +46,3 @@ ENTRY(__aeabi_lmul) ENDPROC(__muldi3) ENDPROC(__aeabi_lmul) -EXPORT_SYMBOL(__muldi3) -EXPORT_SYMBOL(__aeabi_lmul) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index 11de126e2ed6..38d660d3705f 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -31,7 +31,6 @@ #include #include #include -#include ENTRY(__put_user_1) check_uaccess r0, 1, r1, ip, __put_user_bad @@ -39,7 +38,6 @@ ENTRY(__put_user_1) mov r0, #0 ret lr ENDPROC(__put_user_1) -EXPORT_SYMBOL(__put_user_1) ENTRY(__put_user_2) check_uaccess r0, 2, r1, ip, __put_user_bad @@ -64,7 +62,6 @@ ENTRY(__put_user_2) mov r0, #0 ret lr ENDPROC(__put_user_2) -EXPORT_SYMBOL(__put_user_2) ENTRY(__put_user_4) check_uaccess r0, 4, r1, ip, __put_user_bad @@ -72,7 +69,6 @@ ENTRY(__put_user_4) mov r0, #0 ret lr ENDPROC(__put_user_4) -EXPORT_SYMBOL(__put_user_4) ENTRY(__put_user_8) check_uaccess r0, 8, r1, ip, __put_user_bad @@ -86,7 +82,6 @@ ENTRY(__put_user_8) mov r0, #0 ret lr ENDPROC(__put_user_8) -EXPORT_SYMBOL(__put_user_8) __put_user_bad: mov r0, #-EFAULT diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S index 7301f6e6046c..013d64c71e8d 100644 --- a/arch/arm/lib/strchr.S +++ b/arch/arm/lib/strchr.S @@ -11,7 +11,6 @@ */ #include #include -#include .text .align 5 @@ -26,4 +25,3 @@ ENTRY(strchr) subeq r0, r0, #1 ret lr ENDPROC(strchr) -EXPORT_SYMBOL(strchr) diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S index aaf9fd98b754..3cec1c7482c4 100644 --- a/arch/arm/lib/strrchr.S +++ b/arch/arm/lib/strrchr.S @@ -11,7 +11,6 @@ */ #include #include -#include .text .align 5 @@ -25,4 +24,3 @@ ENTRY(strrchr) mov r0, r3 ret lr ENDPROC(strrchr) -EXPORT_SYMBOL(strrchr) diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 1626e3a551a1..6bd1089b07e0 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -157,7 +156,6 @@ arm_copy_to_user(void __user *to, const void *from, unsigned long n) } return n; } -EXPORT_SYMBOL(arm_copy_to_user); static unsigned long noinline __clear_user_memset(void __user *addr, unsigned long n) @@ -215,7 +213,6 @@ unsigned long arm_clear_user(void __user *addr, unsigned long n) } return n; } -EXPORT_SYMBOL(arm_clear_user); #if 0 diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S index 127a91af46f3..ad4a6309141a 100644 --- a/arch/arm/lib/ucmpdi2.S +++ b/arch/arm/lib/ucmpdi2.S @@ -12,7 +12,6 @@ #include #include -#include #ifdef __ARMEB__ #define xh r0 @@ -36,7 +35,6 @@ ENTRY(__ucmpdi2) ret lr ENDPROC(__ucmpdi2) -EXPORT_SYMBOL(__ucmpdi2) #ifdef CONFIG_AEABI @@ -50,7 +48,6 @@ ENTRY(__aeabi_ulcmp) ret lr ENDPROC(__aeabi_ulcmp) -EXPORT_SYMBOL(__aeabi_ulcmp) #endif diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 737450fe790c..cab128913e72 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -32,6 +32,7 @@ endif ifdef CONFIG_SND_IMX_SOC obj-y += ssi-fiq.o +obj-y += ssi-fiq-ksym.o endif # i.MX21 based machines diff --git a/arch/arm/mach-imx/ssi-fiq-ksym.c b/arch/arm/mach-imx/ssi-fiq-ksym.c new file mode 100644 index 000000000000..792090f9a032 --- /dev/null +++ b/arch/arm/mach-imx/ssi-fiq-ksym.c @@ -0,0 +1,20 @@ +/* + * Exported ksyms for the SSI FIQ handler + * + * Copyright (C) 2009, Sascha Hauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +#include + +EXPORT_SYMBOL(imx_ssi_fiq_tx_buffer); +EXPORT_SYMBOL(imx_ssi_fiq_rx_buffer); +EXPORT_SYMBOL(imx_ssi_fiq_start); +EXPORT_SYMBOL(imx_ssi_fiq_end); +EXPORT_SYMBOL(imx_ssi_fiq_base); + diff --git a/arch/arm/mach-imx/ssi-fiq.S b/arch/arm/mach-imx/ssi-fiq.S index fd7917f1c204..a8b93c5f29b5 100644 --- a/arch/arm/mach-imx/ssi-fiq.S +++ b/arch/arm/mach-imx/ssi-fiq.S @@ -8,7 +8,6 @@ #include #include -#include /* * r8 = bit 0-15: tx offset, bit 16-31: tx buffer size @@ -145,8 +144,4 @@ imx_ssi_fiq_tx_buffer: .word 0x0 .L_imx_ssi_fiq_end: imx_ssi_fiq_end: -EXPORT_SYMBOL(imx_ssi_fiq_tx_buffer) -EXPORT_SYMBOL(imx_ssi_fiq_rx_buffer) -EXPORT_SYMBOL(imx_ssi_fiq_start) -EXPORT_SYMBOL(imx_ssi_fiq_end) -EXPORT_SYMBOL(imx_ssi_fiq_base) + From 5499a6b22e5508b921c447757685b0a5e40a07ed Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 23 Nov 2016 14:33:25 +0100 Subject: [PATCH 442/503] can: bcm: fix support for CAN FD frames Since commit 6f3b911d5f29b98 ("can: bcm: add support for CAN FD frames") the CAN broadcast manager supports CAN and CAN FD data frames. As these data frames are embedded in struct can[fd]_frames which have a different length the access to the provided array of CAN frames became dependend of op->cfsiz. By using a struct canfd_frame pointer for the array of CAN frames the new offset calculation based on op->cfsiz was accidently applied to CAN FD frame element lengths. This fix makes the pointer to the arrays of the different CAN frame types a void pointer so that the offset calculation in bytes accesses the correct CAN frame elements. Reference: http://marc.info/?l=linux-netdev&m=147980658909653 Reported-by: Andrey Konovalov Signed-off-by: Oliver Hartkopp Tested-by: Andrey Konovalov Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 8af9d25ff988..436a7537e6a9 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -77,7 +77,7 @@ (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \ (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) -#define CAN_BCM_VERSION "20160617" +#define CAN_BCM_VERSION "20161123" MODULE_DESCRIPTION("PF_CAN broadcast manager protocol"); MODULE_LICENSE("Dual BSD/GPL"); @@ -109,8 +109,9 @@ struct bcm_op { u32 count; u32 nframes; u32 currframe; - struct canfd_frame *frames; - struct canfd_frame *last_frames; + /* void pointers to arrays of struct can[fd]_frame */ + void *frames; + void *last_frames; struct canfd_frame sframe; struct canfd_frame last_sframe; struct sock *sk; @@ -681,7 +682,7 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data) if (op->flags & RX_FILTER_ID) { /* the easiest case */ - bcm_rx_update_and_send(op, &op->last_frames[0], rxframe); + bcm_rx_update_and_send(op, op->last_frames, rxframe); goto rx_starttimer; } @@ -1068,7 +1069,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, if (msg_head->nframes) { /* update CAN frames content */ - err = memcpy_from_msg((u8 *)op->frames, msg, + err = memcpy_from_msg(op->frames, msg, msg_head->nframes * op->cfsiz); if (err < 0) return err; @@ -1118,7 +1119,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } if (msg_head->nframes) { - err = memcpy_from_msg((u8 *)op->frames, msg, + err = memcpy_from_msg(op->frames, msg, msg_head->nframes * op->cfsiz); if (err < 0) { if (op->frames != &op->sframe) @@ -1163,6 +1164,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, /* check flags */ if (op->flags & RX_RTR_FRAME) { + struct canfd_frame *frame0 = op->frames; /* no timers in RTR-mode */ hrtimer_cancel(&op->thrtimer); @@ -1174,8 +1176,8 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, * prevent a full-load-loopback-test ... ;-] */ if ((op->flags & TX_CP_CAN_ID) || - (op->frames[0].can_id == op->can_id)) - op->frames[0].can_id = op->can_id & ~CAN_RTR_FLAG; + (frame0->can_id == op->can_id)) + frame0->can_id = op->can_id & ~CAN_RTR_FLAG; } else { if (op->flags & SETTIMER) { From 1ffb3c40ffb5c51bc39736409b11816c4260218e Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 21 Nov 2016 11:48:39 +0100 Subject: [PATCH 443/503] HID: cp2112: make transfer buffers DMA capable Kernel v4.9 strictly enforces DMA capable buffers, so we need to remove buffers allocated on the stack. Use a spinlock to prevent concurrent accesses to the buffer. Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-cp2112.c | 115 +++++++++++++++++++++++++++------------ 1 file changed, 79 insertions(+), 36 deletions(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 086d8a507157..60d30203a5fa 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -32,6 +32,11 @@ #include #include "hid-ids.h" +#define CP2112_REPORT_MAX_LENGTH 64 +#define CP2112_GPIO_CONFIG_LENGTH 5 +#define CP2112_GPIO_GET_LENGTH 2 +#define CP2112_GPIO_SET_LENGTH 3 + enum { CP2112_GPIO_CONFIG = 0x02, CP2112_GPIO_GET = 0x03, @@ -161,6 +166,8 @@ struct cp2112_device { atomic_t read_avail; atomic_t xfer_avail; struct gpio_chip gc; + u8 *in_out_buffer; + spinlock_t lock; }; static int gpio_push_pull = 0xFF; @@ -171,62 +178,86 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) { struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; - u8 buf[5]; + u8 *buf = dev->in_out_buffer; + unsigned long flags; int ret; + spin_lock_irqsave(&dev->lock, flags); + ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, - sizeof(buf), HID_FEATURE_REPORT, - HID_REQ_GET_REPORT); - if (ret != sizeof(buf)) { + CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT, + HID_REQ_GET_REPORT); + if (ret != CP2112_GPIO_CONFIG_LENGTH) { hid_err(hdev, "error requesting GPIO config: %d\n", ret); - return ret; + goto exit; } buf[1] &= ~(1 << offset); buf[2] = gpio_push_pull; - ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, sizeof(buf), - HID_FEATURE_REPORT, HID_REQ_SET_REPORT); + ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, + CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT, + HID_REQ_SET_REPORT); if (ret < 0) { hid_err(hdev, "error setting GPIO config: %d\n", ret); - return ret; + goto exit; } - return 0; + ret = 0; + +exit: + spin_unlock_irqrestore(&dev->lock, flags); + return ret <= 0 ? ret : -EIO; } static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value) { struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; - u8 buf[3]; + u8 *buf = dev->in_out_buffer; + unsigned long flags; int ret; + spin_lock_irqsave(&dev->lock, flags); + buf[0] = CP2112_GPIO_SET; buf[1] = value ? 0xff : 0; buf[2] = 1 << offset; - ret = hid_hw_raw_request(hdev, CP2112_GPIO_SET, buf, sizeof(buf), - HID_FEATURE_REPORT, HID_REQ_SET_REPORT); + ret = hid_hw_raw_request(hdev, CP2112_GPIO_SET, buf, + CP2112_GPIO_SET_LENGTH, HID_FEATURE_REPORT, + HID_REQ_SET_REPORT); if (ret < 0) hid_err(hdev, "error setting GPIO values: %d\n", ret); + + spin_unlock_irqrestore(&dev->lock, flags); } static int cp2112_gpio_get(struct gpio_chip *chip, unsigned offset) { struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; - u8 buf[2]; + u8 *buf = dev->in_out_buffer; + unsigned long flags; int ret; - ret = hid_hw_raw_request(hdev, CP2112_GPIO_GET, buf, sizeof(buf), - HID_FEATURE_REPORT, HID_REQ_GET_REPORT); - if (ret != sizeof(buf)) { + spin_lock_irqsave(&dev->lock, flags); + + ret = hid_hw_raw_request(hdev, CP2112_GPIO_GET, buf, + CP2112_GPIO_GET_LENGTH, HID_FEATURE_REPORT, + HID_REQ_GET_REPORT); + if (ret != CP2112_GPIO_GET_LENGTH) { hid_err(hdev, "error requesting GPIO values: %d\n", ret); - return ret; + ret = ret < 0 ? ret : -EIO; + goto exit; } - return (buf[1] >> offset) & 1; + ret = (buf[1] >> offset) & 1; + +exit: + spin_unlock_irqrestore(&dev->lock, flags); + + return ret; } static int cp2112_gpio_direction_output(struct gpio_chip *chip, @@ -234,27 +265,33 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip, { struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; - u8 buf[5]; + u8 *buf = dev->in_out_buffer; + unsigned long flags; int ret; + spin_lock_irqsave(&dev->lock, flags); + ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, - sizeof(buf), HID_FEATURE_REPORT, - HID_REQ_GET_REPORT); - if (ret != sizeof(buf)) { + CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT, + HID_REQ_GET_REPORT); + if (ret != CP2112_GPIO_CONFIG_LENGTH) { hid_err(hdev, "error requesting GPIO config: %d\n", ret); - return ret; + goto fail; } buf[1] |= 1 << offset; buf[2] = gpio_push_pull; - ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, sizeof(buf), - HID_FEATURE_REPORT, HID_REQ_SET_REPORT); + ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, + CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT, + HID_REQ_SET_REPORT); if (ret < 0) { hid_err(hdev, "error setting GPIO config: %d\n", ret); - return ret; + goto fail; } + spin_unlock_irqrestore(&dev->lock, flags); + /* * Set gpio value when output direction is already set, * as specified in AN495, Rev. 0.2, cpt. 4.4 @@ -262,6 +299,10 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip, cp2112_gpio_set(chip, offset, value); return 0; + +fail: + spin_unlock_irqrestore(&dev->lock, flags); + return ret < 0 ? ret : -EIO; } static int cp2112_hid_get(struct hid_device *hdev, unsigned char report_number, @@ -1007,6 +1048,17 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id) struct cp2112_smbus_config_report config; int ret; + dev = devm_kzalloc(&hdev->dev, sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->in_out_buffer = devm_kzalloc(&hdev->dev, CP2112_REPORT_MAX_LENGTH, + GFP_KERNEL); + if (!dev->in_out_buffer) + return -ENOMEM; + + spin_lock_init(&dev->lock); + ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); @@ -1063,12 +1115,6 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id) goto err_power_normal; } - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) { - ret = -ENOMEM; - goto err_power_normal; - } - hid_set_drvdata(hdev, (void *)dev); dev->hdev = hdev; dev->adap.owner = THIS_MODULE; @@ -1087,7 +1133,7 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id) if (ret) { hid_err(hdev, "error registering i2c adapter\n"); - goto err_free_dev; + goto err_power_normal; } hid_dbg(hdev, "adapter registered\n"); @@ -1123,8 +1169,6 @@ err_gpiochip_remove: gpiochip_remove(&dev->gc); err_free_i2c: i2c_del_adapter(&dev->adap); -err_free_dev: - kfree(dev); err_power_normal: hid_hw_power(hdev, PM_HINT_NORMAL); err_hid_close: @@ -1149,7 +1193,6 @@ static void cp2112_remove(struct hid_device *hdev) */ hid_hw_close(hdev); hid_hw_stop(hdev); - kfree(dev); } static int cp2112_raw_event(struct hid_device *hdev, struct hid_report *report, From 061232f0d47fa10103f3efa3e890f002a930d902 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 21 Nov 2016 11:48:40 +0100 Subject: [PATCH 444/503] HID: lg: make transfer buffers DMA capable Kernel v4.9 strictly enforces DMA capable buffers, so we need to remove buffers allocated on the stack. [jkosina@suse.cz: fix up second usage of hid_hw_raw_request(), spotted by 0day build bot] Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-lg.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index 76f644deb0a7..c5c5fbe9d605 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -756,11 +756,16 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id) /* Setup wireless link with Logitech Wii wheel */ if (hdev->product == USB_DEVICE_ID_LOGITECH_WII_WHEEL) { - unsigned char buf[] = { 0x00, 0xAF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + const unsigned char cbuf[] = { 0x00, 0xAF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + u8 *buf = kmemdup(cbuf, sizeof(cbuf), GFP_KERNEL); - ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(buf), + if (!buf) { + ret = -ENOMEM; + goto err_free; + } + + ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(cbuf), HID_FEATURE_REPORT, HID_REQ_SET_REPORT); - if (ret >= 0) { /* insert a little delay of 10 jiffies ~ 40ms */ wait_queue_head_t wait; @@ -772,9 +777,10 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id) buf[1] = 0xB2; get_random_bytes(&buf[2], 2); - ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(buf), + ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(cbuf), HID_FEATURE_REPORT, HID_REQ_SET_REPORT); } + kfree(buf); } if (drv_data->quirks & LG_FF) From b7a87ad6775f3ed69e6573b91ed3c2f1338884ad Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 21 Nov 2016 11:48:41 +0100 Subject: [PATCH 445/503] HID: magicmouse: make transfer buffers DMA capable Kernel v4.9 strictly enforces DMA capable buffers, so we need to remove buffers allocated on the stack. Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-magicmouse.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index d6fa496d0ca2..20b40ad26325 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -493,7 +493,8 @@ static int magicmouse_input_configured(struct hid_device *hdev, static int magicmouse_probe(struct hid_device *hdev, const struct hid_device_id *id) { - __u8 feature[] = { 0xd7, 0x01 }; + const u8 feature[] = { 0xd7, 0x01 }; + u8 *buf; struct magicmouse_sc *msc; struct hid_report *report; int ret; @@ -544,6 +545,12 @@ static int magicmouse_probe(struct hid_device *hdev, } report->size = 6; + buf = kmemdup(feature, sizeof(feature), GFP_KERNEL); + if (!buf) { + ret = -ENOMEM; + goto err_stop_hw; + } + /* * Some devices repond with 'invalid report id' when feature * report switching it into multitouch mode is sent to it. @@ -552,8 +559,9 @@ static int magicmouse_probe(struct hid_device *hdev, * but there seems to be no other way of switching the mode. * Thus the super-ugly hacky success check below. */ - ret = hid_hw_raw_request(hdev, feature[0], feature, sizeof(feature), + ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(feature), HID_FEATURE_REPORT, HID_REQ_SET_REPORT); + kfree(buf); if (ret != -EIO && ret != sizeof(feature)) { hid_err(hdev, "unable to request touch data (%d)\n", ret); goto err_stop_hw; From 6dab07df555b652d8d989348b2ce04498d7f9a70 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 21 Nov 2016 11:48:42 +0100 Subject: [PATCH 446/503] HID: rmi: make transfer buffers DMA capable Kernel v4.9 strictly enforces DMA capable buffers, so we need to remove buffers allocated on the stack. Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-rmi.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c index 9cd2ca34a6be..be89bcbf6a71 100644 --- a/drivers/hid/hid-rmi.c +++ b/drivers/hid/hid-rmi.c @@ -188,10 +188,16 @@ static int rmi_set_page(struct hid_device *hdev, u8 page) static int rmi_set_mode(struct hid_device *hdev, u8 mode) { int ret; - u8 txbuf[2] = {RMI_SET_RMI_MODE_REPORT_ID, mode}; + const u8 txbuf[2] = {RMI_SET_RMI_MODE_REPORT_ID, mode}; + u8 *buf; - ret = hid_hw_raw_request(hdev, RMI_SET_RMI_MODE_REPORT_ID, txbuf, + buf = kmemdup(txbuf, sizeof(txbuf), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = hid_hw_raw_request(hdev, RMI_SET_RMI_MODE_REPORT_ID, buf, sizeof(txbuf), HID_FEATURE_REPORT, HID_REQ_SET_REPORT); + kfree(buf); if (ret < 0) { dev_err(&hdev->dev, "unable to set rmi mode to %d (%d)\n", mode, ret); From d443a0aa3a291e5f78072f2fa464e03bc83fafad Mon Sep 17 00:00:00 2001 From: Song Hongyan Date: Tue, 15 Nov 2016 01:11:10 +0000 Subject: [PATCH 447/503] HID: hid-sensor-hub: clear memory to avoid random data When user tried to read some fields like hysteresis from IIO sysfs on some systems, it fails. The reason is that this field is a byte field and caller of sensor_hub_get_feature() passes a buffer of 4 bytes. Here the function sensor_hub_get_feature() copies the single byte from the report to the caller buffer and returns "1" as the number of bytes copied. So caller can use the return value. But this is done by multiple callers, so if we just change the sensor_hub_get_feature so that caller buffer is initialized with 0s then we don't to change all functions. Signed-off-by: Song Hongyan Acked-by: Jonathan Cameron Signed-off-by: Jiri Kosina --- drivers/hid/hid-sensor-hub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index c5c3d6111729..60875625cbdf 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -212,6 +212,7 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, __s32 value; int ret = 0; + memset(buffer, 0, buffer_size); mutex_lock(&data->mutex); report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT); if (!report || (field_index >= report->maxfield)) { From 1db4496f167bcc7c6541d449355ade2e7d339d52 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Wed, 23 Nov 2016 02:22:24 +0100 Subject: [PATCH 448/503] drm/amdgpu: fix power state when port pm is unavailable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When PCIe port PM is not enabled (system BIOS is pre-2015 or the pcie_port_pm=off parameter is set), legacy ATPX PM should still be marked as supported. Otherwise the GPU can fail to power on after runtime suspend. This affected a Dell Inspiron 5548. Ideally the BIOS date in the PCI core is lowered to 2013 (the first year where hybrid graphics platforms using power resources was introduced), but that seems more risky at this point and would not solve the pcie_port_pm=off issue. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98505 Reported-and-tested-by: Nayan Deshmukh Signed-off-by: Peter Wu Signed-off-by: Alex Deucher Cc: # 4.8+ Acked-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index dae35a96a694..02ca5dd978f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -34,6 +34,7 @@ struct amdgpu_atpx { static struct amdgpu_atpx_priv { bool atpx_detected; + bool bridge_pm_usable; /* handle for device - and atpx */ acpi_handle dhandle; acpi_handle other_handle; @@ -205,7 +206,11 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) atpx->is_hybrid = false; if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) { printk("ATPX Hybrid Graphics\n"); - atpx->functions.power_cntl = false; + /* + * Disable legacy PM methods only when pcie port PM is usable, + * otherwise the device might fail to power off or power on. + */ + atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable; atpx->is_hybrid = true; } @@ -480,6 +485,7 @@ static int amdgpu_atpx_power_state(enum vga_switcheroo_client_id id, */ static bool amdgpu_atpx_pci_probe_handle(struct pci_dev *pdev) { + struct pci_dev *parent_pdev = pci_upstream_bridge(pdev); acpi_handle dhandle, atpx_handle; acpi_status status; @@ -494,6 +500,7 @@ static bool amdgpu_atpx_pci_probe_handle(struct pci_dev *pdev) } amdgpu_atpx_priv.dhandle = dhandle; amdgpu_atpx_priv.atpx.handle = atpx_handle; + amdgpu_atpx_priv.bridge_pm_usable = parent_pdev && parent_pdev->bridge_d3; return true; } From d3ac31f3b4bf9fade93d69770cb9c34912e017be Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Wed, 23 Nov 2016 02:22:25 +0100 Subject: [PATCH 449/503] drm/radeon: fix power state when port pm is unavailable (v2) When PCIe port PM is not enabled (system BIOS is pre-2015 or the pcie_port_pm=off parameter is set), legacy ATPX PM should still be marked as supported. Otherwise the GPU can fail to power on after runtime suspend. This affected a Dell Inspiron 5548. Ideally the BIOS date in the PCI core is lowered to 2013 (the first year where hybrid graphics platforms using power resources was introduced), but that seems more risky at this point and would not solve the pcie_port_pm=off issue. v2: agd: fix typo Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98505 Signed-off-by: Peter Wu Signed-off-by: Alex Deucher Cc: # 4.8+ Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_atpx_handler.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 2fdcd04bc93f..4129b12521a6 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -34,6 +34,7 @@ struct radeon_atpx { static struct radeon_atpx_priv { bool atpx_detected; + bool bridge_pm_usable; /* handle for device - and atpx */ acpi_handle dhandle; struct radeon_atpx atpx; @@ -203,7 +204,11 @@ static int radeon_atpx_validate(struct radeon_atpx *atpx) atpx->is_hybrid = false; if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) { printk("ATPX Hybrid Graphics\n"); - atpx->functions.power_cntl = false; + /* + * Disable legacy PM methods only when pcie port PM is usable, + * otherwise the device might fail to power off or power on. + */ + atpx->functions.power_cntl = !radeon_atpx_priv.bridge_pm_usable; atpx->is_hybrid = true; } @@ -474,6 +479,7 @@ static int radeon_atpx_power_state(enum vga_switcheroo_client_id id, */ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev) { + struct pci_dev *parent_pdev = pci_upstream_bridge(pdev); acpi_handle dhandle, atpx_handle; acpi_status status; @@ -487,6 +493,7 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev) radeon_atpx_priv.dhandle = dhandle; radeon_atpx_priv.atpx.handle = atpx_handle; + radeon_atpx_priv.bridge_pm_usable = parent_pdev && parent_pdev->bridge_d3; return true; } From e658a6f14d7c0243205f035979d0ecf6c12a036f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 16 Nov 2016 11:18:05 -0500 Subject: [PATCH 450/503] tile: avoid using clocksource_cyc2ns with absolute cycle count For large values of "mult" and long uptimes, the intermediate result of "cycles * mult" can overflow 64 bits. For example, the tile platform calls clocksource_cyc2ns with a 1.2 GHz clock; we have mult = 853, and after 208.5 days, we overflow 64 bits. Since clocksource_cyc2ns() is intended to be used for relative cycle counts, not absolute cycle counts, performance is more importance than accepting a wider range of cycle values. So, just use mult_frac() directly in tile's sched_clock(). Commit 4cecf6d401a0 ("sched, x86: Avoid unnecessary overflow in sched_clock") by Salman Qazi results in essentially the same generated code for x86 as this change does for tile. In fact, a follow-on change by Salman introduced mult_frac() and switched to using it, so the C code was largely identical at that point too. Peter Zijlstra then added mul_u64_u32_shr() and switched x86 to use it. This is, in principle, better; by optimizing the 64x64->64 multiplies to be 32x32->64 multiplies we can potentially save some time. However, the compiler piplines the 64x64->64 multiplies pretty well, and the conditional branch in the generic mul_u64_u32_shr() causes some bubbles in execution, with the result that it's pretty much a wash. If tilegx provided its own implementation of mul_u64_u32_shr() without the conditional branch, we could potentially save 3 cycles, but that seems like small gain for a fair amount of additional build scaffolding; no other platform currently provides a mul_u64_u32_shr() override, and tile doesn't currently have an header to put the override in. Additionally, gcc currently has an optimization bug that prevents it from recognizing the opportunity to use a 32x32->64 multiply, and so the result would be no better than the existing mult_frac() until such time as the compiler is fixed. For now, just using mult_frac() seems like the right answer. Cc: stable@kernel.org [v3.4+] Signed-off-by: Chris Metcalf --- arch/tile/kernel/time.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index 178989e6d3e3..ea960d660917 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -218,8 +218,8 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num) */ unsigned long long sched_clock(void) { - return clocksource_cyc2ns(get_cycles(), - sched_clock_mult, SCHED_CLOCK_SHIFT); + return mult_frac(get_cycles(), + sched_clock_mult, 1ULL << SCHED_CLOCK_SHIFT); } int setup_profiling_timer(unsigned int multiplier) From 22a1e7783e173ab3d86018eb590107d68df46c11 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 17 Nov 2016 10:49:31 +0100 Subject: [PATCH 451/503] xc2028: Fix use-after-free bug properly The commit 8dfbcc4351a0 ("[media] xc2028: avoid use after free") tried to address the reported use-after-free by clearing the reference. However, it's clearing the wrong pointer; it sets NULL to priv->ctrl.fname, but it's anyway overwritten by the next line memcpy(&priv->ctrl, p, sizeof(priv->ctrl)). OTOH, the actual code accessing the freed string is the strcmp() call with priv->fname: if (!firmware_name[0] && p->fname && priv->fname && strcmp(p->fname, priv->fname)) free_firmware(priv); where priv->fname points to the previous file name, and this was already freed by kfree(). For fixing the bug properly, this patch does the following: - Keep the copy of firmware file name in only priv->fname, priv->ctrl.fname isn't changed; - The allocation is done only when the firmware gets loaded; - The kfree() is called in free_firmware() commonly Fixes: commit 8dfbcc4351a0 ('[media] xc2028: avoid use after free') Cc: Signed-off-by: Takashi Iwai Signed-off-by: Mauro Carvalho Chehab --- drivers/media/tuners/tuner-xc2028.c | 37 +++++++++++++---------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/drivers/media/tuners/tuner-xc2028.c b/drivers/media/tuners/tuner-xc2028.c index 317ef63ee789..8d96a22647b3 100644 --- a/drivers/media/tuners/tuner-xc2028.c +++ b/drivers/media/tuners/tuner-xc2028.c @@ -281,6 +281,14 @@ static void free_firmware(struct xc2028_data *priv) int i; tuner_dbg("%s called\n", __func__); + /* free allocated f/w string */ + if (priv->fname != firmware_name) + kfree(priv->fname); + priv->fname = NULL; + + priv->state = XC2028_NO_FIRMWARE; + memset(&priv->cur_fw, 0, sizeof(priv->cur_fw)); + if (!priv->firm) return; @@ -291,9 +299,6 @@ static void free_firmware(struct xc2028_data *priv) priv->firm = NULL; priv->firm_size = 0; - priv->state = XC2028_NO_FIRMWARE; - - memset(&priv->cur_fw, 0, sizeof(priv->cur_fw)); } static int load_all_firmwares(struct dvb_frontend *fe, @@ -884,9 +889,8 @@ read_not_reliable: return 0; fail: - priv->state = XC2028_NO_FIRMWARE; + free_firmware(priv); - memset(&priv->cur_fw, 0, sizeof(priv->cur_fw)); if (retry_count < 8) { msleep(50); retry_count++; @@ -1332,11 +1336,8 @@ static int xc2028_dvb_release(struct dvb_frontend *fe) mutex_lock(&xc2028_list_mutex); /* only perform final cleanup if this is the last instance */ - if (hybrid_tuner_report_instance_count(priv) == 1) { + if (hybrid_tuner_report_instance_count(priv) == 1) free_firmware(priv); - kfree(priv->ctrl.fname); - priv->ctrl.fname = NULL; - } if (priv) hybrid_tuner_release_state(priv); @@ -1399,19 +1400,8 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg) /* * Copy the config data. - * For the firmware name, keep a local copy of the string, - * in order to avoid troubles during device release. */ - kfree(priv->ctrl.fname); - priv->ctrl.fname = NULL; memcpy(&priv->ctrl, p, sizeof(priv->ctrl)); - if (p->fname) { - priv->ctrl.fname = kstrdup(p->fname, GFP_KERNEL); - if (priv->ctrl.fname == NULL) { - rc = -ENOMEM; - goto unlock; - } - } /* * If firmware name changed, frees firmware. As free_firmware will @@ -1426,10 +1416,15 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg) if (priv->state == XC2028_NO_FIRMWARE) { if (!firmware_name[0]) - priv->fname = priv->ctrl.fname; + priv->fname = kstrdup(p->fname, GFP_KERNEL); else priv->fname = firmware_name; + if (!priv->fname) { + rc = -ENOMEM; + goto unlock; + } + rc = request_firmware_nowait(THIS_MODULE, 1, priv->fname, priv->i2c_props.adap->dev.parent, From ffa54a238c69184414a8f3dc35a18aed875290e7 Mon Sep 17 00:00:00 2001 From: Kirill Esipov Date: Mon, 21 Nov 2016 19:53:31 +0300 Subject: [PATCH 452/503] net: phy: micrel: fix KSZ8041FTL supported value Fix setting of SUPPORTED_FIBRE bit as it was not present in features of KSZ8041. Signed-off-by: Kirill Esipov Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 081df68d2ce1..ea92d524d5a8 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -318,12 +318,12 @@ static int ksz8041_config_init(struct phy_device *phydev) /* Limit supported and advertised modes in fiber mode */ if (of_property_read_bool(of_node, "micrel,fiber-mode")) { phydev->dev_flags |= MICREL_PHY_FXEN; - phydev->supported &= SUPPORTED_FIBRE | - SUPPORTED_100baseT_Full | + phydev->supported &= SUPPORTED_100baseT_Full | SUPPORTED_100baseT_Half; - phydev->advertising &= ADVERTISED_FIBRE | - ADVERTISED_100baseT_Full | + phydev->supported |= SUPPORTED_FIBRE; + phydev->advertising &= ADVERTISED_100baseT_Full | ADVERTISED_100baseT_Half; + phydev->advertising |= ADVERTISED_FIBRE; phydev->autoneg = AUTONEG_DISABLE; } From c3891fa2543cbab26093f5e425b8a50cd6837f16 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 22 Nov 2016 09:54:36 +0800 Subject: [PATCH 453/503] driver: macvlan: Check if need rollback multicast setting in macvlan_open When dev_set_promiscuity failed in macvlan_open, it always invokes dev_set_allmulti without checking if necessary. Now check the IFF_ALLMULTI flag firstly before rollback the multicast setting in the error handler. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d2d6f12a112f..26d6f0bbe14b 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -623,7 +623,8 @@ hash_add: return 0; clear_multi: - dev_set_allmulti(lowerdev, -1); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(lowerdev, -1); del_unicast: dev_uc_del(lowerdev, dev->dev_addr); out: From 920c1cd36642ac21a7b2fdc47ab44b9634d570f9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 21 Nov 2016 18:28:36 -0800 Subject: [PATCH 454/503] netdevice.h: fix kernel-doc warning Fix kernel-doc warning in (missing ':'): ..//include/linux/netdevice.h:1904: warning: No description found for parameter 'prio_tc_map[TC_BITMASK + 1]' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf04a46f6d5b..e16a2a980ea8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1619,7 +1619,7 @@ enum netdev_priv_flags { * @dcbnl_ops: Data Center Bridging netlink ops * @num_tc: Number of traffic classes in the net device * @tc_to_txq: XXX: need comments on this one - * @prio_tc_map XXX: need comments on this one + * @prio_tc_map: XXX: need comments on this one * * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp * From 57aac71b3e9ed890cf2219dd980c36f859b43d6a Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Tue, 22 Nov 2016 06:14:40 +0100 Subject: [PATCH 455/503] bnxt_en: Fix a VXLAN vs GENEVE issue Knowing that: #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) and that 'bnxt_hwrm_tunnel_dst_port_alloc()' is only called with one of these 2 constants, the TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE can not trigger. Replace the bit test that overlap by an equality test, just as in 'bnxt_hwrm_tunnel_dst_port_free()' above. Signed-off-by: Christophe JAILLET Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e18635b2a002..e41d8bd094ae 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3210,11 +3210,17 @@ static int bnxt_hwrm_tunnel_dst_port_alloc(struct bnxt *bp, __be16 port, goto err_out; } - if (tunnel_type & TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN) + switch (tunnel_type) { + case TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN: bp->vxlan_fw_dst_port_id = resp->tunnel_dst_port_id; - - else if (tunnel_type & TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE) + break; + case TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE: bp->nge_fw_dst_port_id = resp->tunnel_dst_port_id; + break; + default: + break; + } + err_out: mutex_unlock(&bp->hwrm_cmd_lock); return rc; From 93af205656bed3d8d3f4b85b2a3749c7ed7d996a Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Tue, 22 Nov 2016 14:14:28 +0800 Subject: [PATCH 456/503] rtnetlink: fix the wrong minimal dump size getting from rtnl_calcit() For RT netlink, calcit() function should return the minimal size for netlink dump message. This will make sure that dump message for every network device can be stored. Currently, rtnl_calcit() function doesn't account the size of header of netlink message, this patch will fix it. Signed-off-by: Zhang Shengju Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a99917b5de33..deb35acbefd0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2737,7 +2737,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) ext_filter_mask)); } - return min_ifinfo_dump_size; + return nlmsg_total_size(min_ifinfo_dump_size); } static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) From a4cd0271ead09439fa03ce38fa79654dd1e5484b Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 21 Nov 2016 23:24:43 -0800 Subject: [PATCH 457/503] net: revert "net: l2tp: Treat NET_XMIT_CN as success in l2tp_eth_dev_xmit" This reverts commit 7c6ae610a1f0, because l2tp_xmit_skb() never returns NET_XMIT_CN, it ignores the return value of l2tp_xmit_core(). Cc: Gao Feng Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 3dc97b4f982b..965f7e344cef 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -97,7 +97,7 @@ static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int len = skb->len; int ret = l2tp_xmit_skb(session, skb, session->hdr_len); - if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { + if (likely(ret == NET_XMIT_SUCCESS)) { atomic_long_add(len, &priv->tx_bytes); atomic_long_inc(&priv->tx_packets); } else { From b6e01232e25629907df9db19f25da7d4e8f5b589 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 22 Nov 2016 16:20:39 +0200 Subject: [PATCH 458/503] net/mlx4_en: Free netdev resources under state lock Make sure mlx4_en_free_resources is called under the netdev state lock. This is needed since RCU dereference of XDP prog should be protected. Fixes: 326fe02d1ed6 ("net/mlx4_en: protect ring->xdp_prog with rcu_read_lock") Signed-off-by: Tariq Toukan Reported-by: Sagi Grimberg CC: Brenden Blanco Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 3a47e83d3e07..a60f635da78b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -129,6 +129,9 @@ static enum mlx4_net_trans_rule_id mlx4_ip_proto_to_trans_rule_id(u8 ip_proto) } }; +/* Must not acquire state_lock, as its corresponding work_sync + * is done under it. + */ static void mlx4_en_filter_work(struct work_struct *work) { struct mlx4_en_filter *filter = container_of(work, @@ -2189,13 +2192,13 @@ void mlx4_en_destroy_netdev(struct net_device *dev) mutex_lock(&mdev->state_lock); mdev->pndev[priv->port] = NULL; mdev->upper[priv->port] = NULL; - mutex_unlock(&mdev->state_lock); #ifdef CONFIG_RFS_ACCEL mlx4_en_cleanup_filters(priv); #endif mlx4_en_free_resources(priv); + mutex_unlock(&mdev->state_lock); kfree(priv->tx_ring); kfree(priv->tx_cq); From a1ff57416af9a7971a801d553cd53edd8afb28d6 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 23 Nov 2016 13:55:13 +1100 Subject: [PATCH 459/503] powerpc/boot: Fix the early OPAL console wrappers When configured with CONFIG_PPC_EARLY_DEBUG_OPAL=y the kernel expects the OPAL entry and base addresses to be passed in r8 and r9 respectively. Currently the wrapper does not attempt to restore these values before entering the decompressed kernel which causes the kernel to branch into whatever happens to be in r9 when doing a write to the OPAL console in early boot. This patch adds a platform_ops hook that can be used to branch into the new kernel. The OPAL console driver patches this at runtime so that if the console is used it will be restored just prior to entering the kernel. Fixes: 656ad58ef19e ("powerpc/boot: Add OPAL console to epapr wrappers") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman --- arch/powerpc/boot/main.c | 8 ++++++-- arch/powerpc/boot/opal-calls.S | 13 +++++++++++++ arch/powerpc/boot/opal.c | 11 +++++++++++ arch/powerpc/boot/ops.h | 1 + 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index 57d42d129033..78aaf4ffd7ab 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -232,8 +232,12 @@ void start(void) console_ops.close(); kentry = (kernel_entry_t) vmlinux.addr; - if (ft_addr) - kentry(ft_addr, 0, NULL); + if (ft_addr) { + if(platform_ops.kentry) + platform_ops.kentry(ft_addr, vmlinux.addr); + else + kentry(ft_addr, 0, NULL); + } else kentry((unsigned long)initrd.addr, initrd.size, loader_info.promptr); diff --git a/arch/powerpc/boot/opal-calls.S b/arch/powerpc/boot/opal-calls.S index ff2f1b97bc53..2a99fc9a3ccf 100644 --- a/arch/powerpc/boot/opal-calls.S +++ b/arch/powerpc/boot/opal-calls.S @@ -12,6 +12,19 @@ .text + .globl opal_kentry +opal_kentry: + /* r3 is the fdt ptr */ + mtctr r4 + li r4, 0 + li r5, 0 + li r6, 0 + li r7, 0 + ld r11,opal@got(r2) + ld r8,0(r11) + ld r9,8(r11) + bctr + #define OPAL_CALL(name, token) \ .globl name; \ name: \ diff --git a/arch/powerpc/boot/opal.c b/arch/powerpc/boot/opal.c index 1f37e1c1d6d8..d7b4fd47eb44 100644 --- a/arch/powerpc/boot/opal.c +++ b/arch/powerpc/boot/opal.c @@ -23,14 +23,25 @@ struct opal { static u32 opal_con_id; +/* see opal-wrappers.S */ int64_t opal_console_write(int64_t term_number, u64 *length, const u8 *buffer); int64_t opal_console_read(int64_t term_number, uint64_t *length, u8 *buffer); int64_t opal_console_write_buffer_space(uint64_t term_number, uint64_t *length); int64_t opal_console_flush(uint64_t term_number); int64_t opal_poll_events(uint64_t *outstanding_event_mask); +void opal_kentry(unsigned long fdt_addr, void *vmlinux_addr); + static int opal_con_open(void) { + /* + * When OPAL loads the boot kernel it stashes the OPAL base and entry + * address in r8 and r9 so the kernel can use the OPAL console + * before unflattening the devicetree. While executing the wrapper will + * probably trash r8 and r9 so this kentry hook restores them before + * entering the decompressed kernel. + */ + platform_ops.kentry = opal_kentry; return 0; } diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h index 309d1b127e96..fad1862f4b2d 100644 --- a/arch/powerpc/boot/ops.h +++ b/arch/powerpc/boot/ops.h @@ -30,6 +30,7 @@ struct platform_ops { void * (*realloc)(void *ptr, unsigned long size); void (*exit)(void); void * (*vmlinux_alloc)(unsigned long size); + void (*kentry)(unsigned long fdt_addr, void *vmlinux_addr); }; extern struct platform_ops platform_ops; From 1ee6f347f81925fa8f3816e69ca1b49021f37850 Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Tue, 18 Oct 2016 16:23:59 +0800 Subject: [PATCH 460/503] drm/mediatek: fix a typo of DISP_OD_CFG to OD_RELAYMODE If we want to set the hardware OD to relay mode, we have to set DISP_OD_CFG register rather than OD_RELAYMODE; otherwise, the system will access the wrong address. Change-Id: Ifb9bb4caa63df906437d48b5d5326b6d04ea332a Fixes: 7216436420414144646f5d8343d061355fd23483 ("drm/mediatek: set mt8173 dithering function") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Bibby Hsieh Acked-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index df33b3ca6ffd..48cc01fd20c7 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -123,7 +123,7 @@ static void mtk_od_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int bpc) { writel(w << 16 | h, comp->regs + DISP_OD_SIZE); - writel(OD_RELAYMODE, comp->regs + OD_RELAYMODE); + writel(OD_RELAYMODE, comp->regs + DISP_OD_CFG); mtk_dither_set(comp, bpc, DISP_OD_CFG); } From f6c872397028837c80685ee96c4011c62abe9a73 Mon Sep 17 00:00:00 2001 From: Jitao Shi Date: Wed, 16 Nov 2016 11:20:54 +0800 Subject: [PATCH 461/503] drm/mediatek: fixed the calc method of data rate per lane Tune dsi frame rate by pixel clock, dsi add some extra signal (i.e. Tlpx, Ths-prepare, Ths-zero, Ths-trail,Ths-exit) when enter and exit LP mode, those signals will cause h-time larger than normal and reduce FPS. So need to multiply a coefficient to offset the extra signal's effect. coefficient = ((htotal*bpp/lane_number)+Tlpx+Ths_prep+Ths_zero+ Ths_trail+Ths_exit)/(htotal*bpp/lane_number) Signed-off-by: Jitao Shi Reviewed-by: Daniel Kurtz --- drivers/gpu/drm/mediatek/mtk_dsi.c | 68 ++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 28b2044ed9f2..eaa5a2240c0c 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -86,7 +86,7 @@ #define DSI_PHY_TIMECON0 0x110 #define LPX (0xff << 0) -#define HS_PRPR (0xff << 8) +#define HS_PREP (0xff << 8) #define HS_ZERO (0xff << 16) #define HS_TRAIL (0xff << 24) @@ -102,10 +102,16 @@ #define CLK_TRAIL (0xff << 24) #define DSI_PHY_TIMECON3 0x11c -#define CLK_HS_PRPR (0xff << 0) +#define CLK_HS_PREP (0xff << 0) #define CLK_HS_POST (0xff << 8) #define CLK_HS_EXIT (0xff << 16) +#define T_LPX 5 +#define T_HS_PREP 6 +#define T_HS_TRAIL 8 +#define T_HS_EXIT 7 +#define T_HS_ZERO 10 + #define NS_TO_CYCLE(n, c) ((n) / (c) + (((n) % (c)) ? 1 : 0)) struct phy; @@ -161,20 +167,18 @@ static void mtk_dsi_mask(struct mtk_dsi *dsi, u32 offset, u32 mask, u32 data) static void dsi_phy_timconfig(struct mtk_dsi *dsi) { u32 timcon0, timcon1, timcon2, timcon3; - unsigned int ui, cycle_time; - unsigned int lpx; + u32 ui, cycle_time; ui = 1000 / dsi->data_rate + 0x01; cycle_time = 8000 / dsi->data_rate + 0x01; - lpx = 5; - timcon0 = (8 << 24) | (0xa << 16) | (0x6 << 8) | lpx; - timcon1 = (7 << 24) | (5 * lpx << 16) | ((3 * lpx) / 2) << 8 | - (4 * lpx); + timcon0 = T_LPX | T_HS_PREP << 8 | T_HS_ZERO << 16 | T_HS_TRAIL << 24; + timcon1 = 4 * T_LPX | (3 * T_LPX / 2) << 8 | 5 * T_LPX << 16 | + T_HS_EXIT << 24; timcon2 = ((NS_TO_CYCLE(0x64, cycle_time) + 0xa) << 24) | (NS_TO_CYCLE(0x150, cycle_time) << 16); - timcon3 = (2 * lpx) << 16 | NS_TO_CYCLE(80 + 52 * ui, cycle_time) << 8 | - NS_TO_CYCLE(0x40, cycle_time); + timcon3 = NS_TO_CYCLE(0x40, cycle_time) | (2 * T_LPX) << 16 | + NS_TO_CYCLE(80 + 52 * ui, cycle_time) << 8; writel(timcon0, dsi->regs + DSI_PHY_TIMECON0); writel(timcon1, dsi->regs + DSI_PHY_TIMECON1); @@ -202,19 +206,47 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi) { struct device *dev = dsi->dev; int ret; + u64 pixel_clock, total_bits; + u32 htotal, htotal_bits, bit_per_pixel, overhead_cycles, overhead_bits; if (++dsi->refcount != 1) return 0; - /** - * data_rate = (pixel_clock / 1000) * pixel_dipth * mipi_ratio; - * pixel_clock unit is Khz, data_rata unit is MHz, so need divide 1000. - * mipi_ratio is mipi clk coefficient for balance the pixel clk in mipi. - * we set mipi_ratio is 1.05. - */ - dsi->data_rate = dsi->vm.pixelclock * 3 * 21 / (1 * 1000 * 10); + switch (dsi->format) { + case MIPI_DSI_FMT_RGB565: + bit_per_pixel = 16; + break; + case MIPI_DSI_FMT_RGB666_PACKED: + bit_per_pixel = 18; + break; + case MIPI_DSI_FMT_RGB666: + case MIPI_DSI_FMT_RGB888: + default: + bit_per_pixel = 24; + break; + } - ret = clk_set_rate(dsi->hs_clk, dsi->data_rate * 1000000); + /** + * vm.pixelclock is in kHz, pixel_clock unit is Hz, so multiply by 1000 + * htotal_time = htotal * byte_per_pixel / num_lanes + * overhead_time = lpx + hs_prepare + hs_zero + hs_trail + hs_exit + * mipi_ratio = (htotal_time + overhead_time) / htotal_time + * data_rate = pixel_clock * bit_per_pixel * mipi_ratio / num_lanes; + */ + pixel_clock = dsi->vm.pixelclock * 1000; + htotal = dsi->vm.hactive + dsi->vm.hback_porch + dsi->vm.hfront_porch + + dsi->vm.hsync_len; + htotal_bits = htotal * bit_per_pixel; + + overhead_cycles = T_LPX + T_HS_PREP + T_HS_ZERO + T_HS_TRAIL + + T_HS_EXIT; + overhead_bits = overhead_cycles * dsi->lanes * 8; + total_bits = htotal_bits + overhead_bits; + + dsi->data_rate = DIV_ROUND_UP_ULL(pixel_clock * total_bits, + htotal * dsi->lanes); + + ret = clk_set_rate(dsi->hs_clk, dsi->data_rate); if (ret < 0) { dev_err(dev, "Failed to set data rate: %d\n", ret); goto err_refcount; From 5ad45307d990020b25a8f7486178b6e033790f70 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Fri, 18 Nov 2016 11:06:10 +0100 Subject: [PATCH 462/503] drm/mediatek: fix null pointer dereference The probe function requests the interrupt before initializing the ddp component. Which leads to a null pointer dereference at boot. Fix this by requesting the interrput after all components got initialized properly. Fixes: 119f5173628a ("drm/mediatek: Add DRM Driver for Mediatek SoC MT8173.") Signed-off-by: Matthias Brugger Change-Id: I57193a7ab554dfb37c35a455900689333adf511c --- drivers/gpu/drm/mediatek/mtk_disp_ovl.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index f75c5b5a536c..c70310206ac5 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -251,13 +251,6 @@ static int mtk_disp_ovl_probe(struct platform_device *pdev) if (irq < 0) return irq; - ret = devm_request_irq(dev, irq, mtk_disp_ovl_irq_handler, - IRQF_TRIGGER_NONE, dev_name(dev), priv); - if (ret < 0) { - dev_err(dev, "Failed to request irq %d: %d\n", irq, ret); - return ret; - } - comp_id = mtk_ddp_comp_get_id(dev->of_node, MTK_DISP_OVL); if (comp_id < 0) { dev_err(dev, "Failed to identify by alias: %d\n", comp_id); @@ -273,6 +266,13 @@ static int mtk_disp_ovl_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); + ret = devm_request_irq(dev, irq, mtk_disp_ovl_irq_handler, + IRQF_TRIGGER_NONE, dev_name(dev), priv); + if (ret < 0) { + dev_err(dev, "Failed to request irq %d: %d\n", irq, ret); + return ret; + } + ret = component_add(dev, &mtk_disp_ovl_component_ops); if (ret) dev_err(dev, "Failed to add component: %d\n", ret); From 2bf413d56b7de72ab800a6edb009177e5669b929 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 18 Nov 2016 19:40:04 +0000 Subject: [PATCH 463/503] i2c: designware: report short transfers Rather than reporting success for a short transfer due to interrupt latency, report an error both to the caller, as well as to the kernel log. Signed-off-by: Russell King Reviewed-by: Mika Westerberg Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c index 11e866d05368..066a2ba6aeda 100644 --- a/drivers/i2c/busses/i2c-designware-core.c +++ b/drivers/i2c/busses/i2c-designware-core.c @@ -758,7 +758,7 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) } /* no error */ - if (likely(!dev->cmd_err)) { + if (likely(!dev->cmd_err && !dev->status)) { ret = num; goto done; } @@ -768,6 +768,11 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) ret = i2c_dw_handle_tx_abort(dev); goto done; } + + if (dev->status) + dev_err(dev->dev, + "transfer terminated early - interrupt latency too high?\n"); + ret = -EIO; done: From 4d6d5f1d08d2138dc43b28966eb6200e3db2e623 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 18 Nov 2016 19:40:10 +0000 Subject: [PATCH 464/503] i2c: designware: fix rx fifo depth tracking When loading the TX fifo to receive bytes on the I2C bus, we incorrectly count the number of bytes: rx_limit = dev->rx_fifo_depth - dw_readl(dev, DW_IC_RXFLR); while (buf_len > 0 && tx_limit > 0 && rx_limit > 0) { if (rx_limit - dev->rx_outstanding <= 0) break; rx_limit--; dev->rx_outstanding++; } DW_IC_RXFLR indicates how many bytes are available to be read in the FIFO, dev->rx_fifo_depth is the FIFO size, and dev->rx_outstanding is the number of bytes that we've requested to be read so far, but which have not been read. Firstly, increasing dev->rx_outstanding and decreasing rx_limit and then comparing them results in each byte consuming "two" bytes in this tracking, so this is obviously wrong. Secondly, the number of bytes that _could_ be received into the FIFO at any time is the number of bytes we have so far requested but not yet read from the FIFO - in other words dev->rx_outstanding. So, in order to request enough bytes to fill the RX FIFO, we need to request dev->rx_fifo_depth - dev->rx_outstanding bytes. Modifying the code thusly results in us reaching the maximum number of bytes outstanding each time we queue more "receive" operations, provided the transfer allows that to happen. Signed-off-by: Russell King Reviewed-by: Mika Westerberg Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c index 066a2ba6aeda..c53058d6139c 100644 --- a/drivers/i2c/busses/i2c-designware-core.c +++ b/drivers/i2c/busses/i2c-designware-core.c @@ -611,7 +611,7 @@ i2c_dw_xfer_msg(struct dw_i2c_dev *dev) if (msgs[dev->msg_write_idx].flags & I2C_M_RD) { /* avoid rx buffer overrun */ - if (rx_limit - dev->rx_outstanding <= 0) + if (dev->rx_outstanding >= dev->rx_fifo_depth) break; dw_writel(dev, cmd | 0x100, DW_IC_DATA_CMD); From 1031398035a25e5c90c66befb6ff41fa4746df98 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Nov 2016 01:26:07 +0000 Subject: [PATCH 465/503] MIPS: Mask out limit field when calculating wired entry count Since MIPSr6 the Wired register is split into 2 fields, with the upper 16 bits of the register indicating a limit on the value that the wired entry count in the bottom 16 bits of the register can take. This means that simply reading the wired register doesn't get us a valid TLB entry index any longer, and we instead need to retrieve only the lower 16 bits of the register. Introduce a new num_wired_entries() function which does this on MIPSr6 or higher and simply returns the value of the wired register on older architecture revisions, and make use of it when reading the number of wired entries. Since commit e710d6668309 ("MIPS: tlb-r4k: If there are wired entries, don't use TLBINVF") we have been using a non-zero number of wired entries to determine whether we should avoid use of the tlbinvf instruction (which would invalidate wired entries) and instead loop over TLB entries in local_flush_tlb_all(). This loop begins with the number of wired entries, or before this patch some large bogus TLB index on MIPSr6 systems. Thus since the aforementioned commit some MIPSr6 systems with FTLBs have been prone to leaving stale address translations in the FTLB & crashing in various weird & wonderful ways when we later observe the wrong memory. Signed-off-by: Paul Burton Cc: Matt Redfearn Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14557/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mipsregs.h | 6 ++++++ arch/mips/include/asm/tlb.h | 13 +++++++++++++ arch/mips/mm/init.c | 4 ++-- arch/mips/mm/tlb-r4k.c | 6 +++--- 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 7dd2dd47909a..df78b2ca70eb 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -214,6 +214,12 @@ #error Bad page size configuration for hugetlbfs! #endif +/* + * Wired register bits + */ +#define MIPSR6_WIRED_LIMIT (_ULCAST_(0xffff) << 16) +#define MIPSR6_WIRED_WIRED (_ULCAST_(0xffff) << 0) + /* * Values used for computation of new tlb entries */ diff --git a/arch/mips/include/asm/tlb.h b/arch/mips/include/asm/tlb.h index 4a2349302b55..dd179fd8acda 100644 --- a/arch/mips/include/asm/tlb.h +++ b/arch/mips/include/asm/tlb.h @@ -1,6 +1,9 @@ #ifndef __ASM_TLB_H #define __ASM_TLB_H +#include +#include + /* * MIPS doesn't need any special per-pte or per-vma handling, except * we need to flush cache for area to be unmapped. @@ -22,6 +25,16 @@ ((CKSEG0 + ((idx) << (PAGE_SHIFT + 1))) | \ (cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0)) +static inline unsigned int num_wired_entries(void) +{ + unsigned int wired = read_c0_wired(); + + if (cpu_has_mips_r6) + wired &= MIPSR6_WIRED_WIRED; + + return wired; +} + #include #endif /* __ASM_TLB_H */ diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 3a6edecc3f38..e86ebcf5c071 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -118,7 +118,7 @@ static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot) writex_c0_entrylo1(entrylo); } #endif - tlbidx = read_c0_wired(); + tlbidx = num_wired_entries(); write_c0_wired(tlbidx + 1); write_c0_index(tlbidx); mtc0_tlbw_hazard(); @@ -147,7 +147,7 @@ void kunmap_coherent(void) local_irq_save(flags); old_ctx = read_c0_entryhi(); - wired = read_c0_wired() - 1; + wired = num_wired_entries() - 1; write_c0_wired(wired); write_c0_index(wired); write_c0_entryhi(UNIQUE_ENTRYHI(wired)); diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index bba9c1484b41..0596505770db 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -65,7 +65,7 @@ void local_flush_tlb_all(void) write_c0_entrylo0(0); write_c0_entrylo1(0); - entry = read_c0_wired(); + entry = num_wired_entries(); /* * Blast 'em all away. @@ -385,7 +385,7 @@ void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, old_ctx = read_c0_entryhi(); htw_stop(); old_pagemask = read_c0_pagemask(); - wired = read_c0_wired(); + wired = num_wired_entries(); write_c0_wired(wired + 1); write_c0_index(wired); tlbw_use_hazard(); /* What is the hazard here? */ @@ -449,7 +449,7 @@ __init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, htw_stop(); old_ctx = read_c0_entryhi(); old_pagemask = read_c0_pagemask(); - wired = read_c0_wired(); + wired = num_wired_entries(); if (--temp_tlb_entry < wired) { printk(KERN_WARNING "No TLB space left for add_temporary_entry\n"); From 764d3be6e415b40056834bfd29b994dc3f837606 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 22 Nov 2016 16:57:40 +0100 Subject: [PATCH 466/503] ipv6: bump genid when the IFA_F_TENTATIVE flag is clear When an ipv6 address has the tentative flag set, it can't be used as source for egress traffic, while the associated route, if any, can be looked up and even stored into some dst_cache. In the latter scenario, the source ipv6 address selected and stored in the cache is most probably wrong (e.g. with link-local scope) and the entity using the dst_cache will experience lack of ipv6 connectivity until said cache is cleared or invalidated. Overall this may cause lack of connectivity over most IPv6 tunnels (comprising geneve and vxlan), if the first egress packet reaches the tunnel before the DaD is completed for the used ipv6 address. This patch bumps a new genid after that the IFA_F_TENTATIVE flag is cleared, so that dst_cache will be invalidated on next lookup and ipv6 connectivity restored. Fixes: 0c1d70af924b ("net: use dst_cache for vxlan device") Fixes: 468dfffcd762 ("geneve: add dst caching support") Acked-by: Hannes Frederic Sowa Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 060dd9922018..4bc5ba3ae452 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -183,7 +183,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, static void addrconf_dad_start(struct inet6_ifaddr *ifp); static void addrconf_dad_work(struct work_struct *w); -static void addrconf_dad_completed(struct inet6_ifaddr *ifp); +static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id); static void addrconf_dad_run(struct inet6_dev *idev); static void addrconf_rs_timer(unsigned long data); static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); @@ -2898,6 +2898,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; spin_unlock_bh(&ifp->lock); + rt_genid_bump_ipv6(dev_net(idev->dev)); ipv6_ifa_notify(RTM_NEWADDR, ifp); in6_ifa_put(ifp); } @@ -3740,7 +3741,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; - bool notify = false; + bool bump_id, notify = false; addrconf_join_solict(dev, &ifp->addr); @@ -3755,11 +3756,12 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) idev->cnf.accept_dad < 1 || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { + bump_id = ifp->flags & IFA_F_TENTATIVE; ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); read_unlock_bh(&idev->lock); - addrconf_dad_completed(ifp); + addrconf_dad_completed(ifp, bump_id); return; } @@ -3819,8 +3821,8 @@ static void addrconf_dad_work(struct work_struct *w) struct inet6_ifaddr, dad_work); struct inet6_dev *idev = ifp->idev; + bool bump_id, disable_ipv6 = false; struct in6_addr mcaddr; - bool disable_ipv6 = false; enum { DAD_PROCESS, @@ -3890,11 +3892,12 @@ static void addrconf_dad_work(struct work_struct *w) * DAD was successful */ + bump_id = ifp->flags & IFA_F_TENTATIVE; ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); write_unlock_bh(&idev->lock); - addrconf_dad_completed(ifp); + addrconf_dad_completed(ifp, bump_id); goto out; } @@ -3931,7 +3934,7 @@ static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp) return true; } -static void addrconf_dad_completed(struct inet6_ifaddr *ifp) +static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id) { struct net_device *dev = ifp->idev->dev; struct in6_addr lladdr; @@ -3983,6 +3986,9 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) spin_unlock(&ifp->lock); write_unlock_bh(&ifp->idev->lock); } + + if (bump_id) + rt_genid_bump_ipv6(dev_net(dev)); } static void addrconf_dad_run(struct inet6_dev *idev) From 18594e9bc4a27e72d7961a7afe4250a502d1538d Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Thu, 24 Nov 2016 13:38:04 +0100 Subject: [PATCH 467/503] init: use pr_cont() when displaying rotator during ramdisk loading. Otherwise each individual rotator char would be printed in a new line: (...) [ 0.642350] - [ 0.644374] | [ 0.646367] - (...) Signed-off-by: Nicolas Schichan Signed-off-by: Linus Torvalds --- init/do_mounts_rd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 8a09b32e07d6..dd4104c9aa12 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -272,7 +272,7 @@ int __init rd_load_image(char *from) sys_write(out_fd, buf, BLOCK_SIZE); #if !defined(CONFIG_S390) if (!(i % 16)) { - printk("%c\b", rotator[rotate & 0x3]); + pr_cont("%c\b", rotator[rotate & 0x3]); rotate++; } #endif From 444fdad88f35de9fd1c130b2c4e4550671758fd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 22 Nov 2016 20:20:14 +0100 Subject: [PATCH 468/503] KVM: x86: fix out-of-bounds access in lapic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cluster xAPIC delivery incorrectly assumed that dest_id <= 0xff. With enabled KVM_X2APIC_API_USE_32BIT_IDS in KVM_CAP_X2APIC_API, a userspace can send an interrupt with dest_id that results in out-of-bounds access. Found by syzkaller: BUG: KASAN: slab-out-of-bounds in kvm_irq_delivery_to_apic_fast+0x11fa/0x1210 at addr ffff88003d9ca750 Read of size 8 by task syz-executor/22923 CPU: 0 PID: 22923 Comm: syz-executor Not tainted 4.9.0-rc4+ #49 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [...] Call Trace: [...] __dump_stack lib/dump_stack.c:15 [...] dump_stack+0xb3/0x118 lib/dump_stack.c:51 [...] kasan_object_err+0x1c/0x70 mm/kasan/report.c:156 [...] print_address_description mm/kasan/report.c:194 [...] kasan_report_error mm/kasan/report.c:283 [...] kasan_report+0x231/0x500 mm/kasan/report.c:303 [...] __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:329 [...] kvm_irq_delivery_to_apic_fast+0x11fa/0x1210 arch/x86/kvm/lapic.c:824 [...] kvm_irq_delivery_to_apic+0x132/0x9a0 arch/x86/kvm/irq_comm.c:72 [...] kvm_set_msi+0x111/0x160 arch/x86/kvm/irq_comm.c:157 [...] kvm_send_userspace_msi+0x201/0x280 arch/x86/kvm/../../../virt/kvm/irqchip.c:74 [...] kvm_vm_ioctl+0xba5/0x1670 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3015 [...] vfs_ioctl fs/ioctl.c:43 [...] do_vfs_ioctl+0x18c/0x1040 fs/ioctl.c:679 [...] SYSC_ioctl fs/ioctl.c:694 [...] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:685 [...] entry_SYSCALL_64_fastpath+0x1f/0xc2 Reported-by: Dmitry Vyukov Cc: stable@vger.kernel.org Fixes: e45115b62f9a ("KVM: x86: use physical LAPIC array for logical x2APIC") Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 23b99f305382..6f69340f9fa3 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -138,7 +138,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, *mask = dest_id & 0xff; return true; case KVM_APIC_MODE_XAPIC_CLUSTER: - *cluster = map->xapic_cluster_map[dest_id >> 4]; + *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf]; *mask = dest_id & 0xf; return true; default: From 2117d5398c81554fbf803f5fd1dc55eb78216c0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 23 Nov 2016 21:15:00 +0100 Subject: [PATCH 469/503] KVM: x86: drop error recovery in em_jmp_far and em_ret_far MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit em_jmp_far and em_ret_far assumed that setting IP can only fail in 64 bit mode, but syzkaller proved otherwise (and SDM agrees). Code segment was restored upon failure, but it was left uninitialized outside of long mode, which could lead to a leak of host kernel stack. We could have fixed that by always saving and restoring the CS, but we take a simpler approach and just break any guest that manages to fail as the error recovery is error-prone and modern CPUs don't need emulator for this. Found by syzkaller: WARNING: CPU: 2 PID: 3668 at arch/x86/kvm/emulate.c:2217 em_ret_far+0x428/0x480 Kernel panic - not syncing: panic_on_warn set ... CPU: 2 PID: 3668 Comm: syz-executor Not tainted 4.9.0-rc4+ #49 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [...] Call Trace: [...] __dump_stack lib/dump_stack.c:15 [...] dump_stack+0xb3/0x118 lib/dump_stack.c:51 [...] panic+0x1b7/0x3a3 kernel/panic.c:179 [...] __warn+0x1c4/0x1e0 kernel/panic.c:542 [...] warn_slowpath_null+0x2c/0x40 kernel/panic.c:585 [...] em_ret_far+0x428/0x480 arch/x86/kvm/emulate.c:2217 [...] em_ret_far_imm+0x17/0x70 arch/x86/kvm/emulate.c:2227 [...] x86_emulate_insn+0x87a/0x3730 arch/x86/kvm/emulate.c:5294 [...] x86_emulate_instruction+0x520/0x1ba0 arch/x86/kvm/x86.c:5545 [...] emulate_instruction arch/x86/include/asm/kvm_host.h:1116 [...] complete_emulated_io arch/x86/kvm/x86.c:6870 [...] complete_emulated_mmio+0x4e9/0x710 arch/x86/kvm/x86.c:6934 [...] kvm_arch_vcpu_ioctl_run+0x3b7a/0x5a90 arch/x86/kvm/x86.c:6978 [...] kvm_vcpu_ioctl+0x61e/0xdd0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2557 [...] vfs_ioctl fs/ioctl.c:43 [...] do_vfs_ioctl+0x18c/0x1040 fs/ioctl.c:679 [...] SYSC_ioctl fs/ioctl.c:694 [...] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:685 [...] entry_SYSCALL_64_fastpath+0x1f/0xc2 Reported-by: Dmitry Vyukov Cc: stable@vger.kernel.org Fixes: d1442d85cc30 ("KVM: x86: Handle errors when RIP is set during far jumps") Signed-off-by: Radim Krčmář --- arch/x86/kvm/emulate.c | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index cbd7b92585bb..a3ce9d260d68 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2105,16 +2105,10 @@ static int em_iret(struct x86_emulate_ctxt *ctxt) static int em_jmp_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned short sel, old_sel; - struct desc_struct old_desc, new_desc; - const struct x86_emulate_ops *ops = ctxt->ops; + unsigned short sel; + struct desc_struct new_desc; u8 cpl = ctxt->ops->cpl(ctxt); - /* Assignment of RIP may only fail in 64-bit mode */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - ops->get_segment(ctxt, &old_sel, &old_desc, NULL, - VCPU_SREG_CS); - memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, @@ -2124,12 +2118,10 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) return rc; rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); - if (rc != X86EMUL_CONTINUE) { - WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); - /* assigning eip failed; restore the old cs */ - ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); - return rc; - } + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; + return rc; } @@ -2189,14 +2181,8 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; unsigned long eip, cs; - u16 old_cs; int cpl = ctxt->ops->cpl(ctxt); - struct desc_struct old_desc, new_desc; - const struct x86_emulate_ops *ops = ctxt->ops; - - if (ctxt->mode == X86EMUL_MODE_PROT64) - ops->get_segment(ctxt, &old_cs, &old_desc, NULL, - VCPU_SREG_CS); + struct desc_struct new_desc; rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -2213,10 +2199,10 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; rc = assign_eip_far(ctxt, eip, &new_desc); - if (rc != X86EMUL_CONTINUE) { - WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); - ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); - } + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; + return rc; } From 81cdb259fb6d8c1c4ecfeea389ff5a73c07f5755 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 23 Nov 2016 21:15:27 +0100 Subject: [PATCH 470/503] KVM: x86: fix out-of-bounds accesses of rtc_eoi map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVM was using arrays of size KVM_MAX_VCPUS with vcpu_id, but ID can be bigger that the maximal number of VCPUs, resulting in out-of-bounds access. Found by syzkaller: BUG: KASAN: slab-out-of-bounds in __apic_accept_irq+0xb33/0xb50 at addr [...] Write of size 1 by task a.out/27101 CPU: 1 PID: 27101 Comm: a.out Not tainted 4.9.0-rc5+ #49 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [...] Call Trace: [...] __apic_accept_irq+0xb33/0xb50 arch/x86/kvm/lapic.c:905 [...] kvm_apic_set_irq+0x10e/0x180 arch/x86/kvm/lapic.c:495 [...] kvm_irq_delivery_to_apic+0x732/0xc10 arch/x86/kvm/irq_comm.c:86 [...] ioapic_service+0x41d/0x760 arch/x86/kvm/ioapic.c:360 [...] ioapic_set_irq+0x275/0x6c0 arch/x86/kvm/ioapic.c:222 [...] kvm_ioapic_inject_all arch/x86/kvm/ioapic.c:235 [...] kvm_set_ioapic+0x223/0x310 arch/x86/kvm/ioapic.c:670 [...] kvm_vm_ioctl_set_irqchip arch/x86/kvm/x86.c:3668 [...] kvm_arch_vm_ioctl+0x1a08/0x23c0 arch/x86/kvm/x86.c:3999 [...] kvm_vm_ioctl+0x1fa/0x1a70 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3099 Reported-by: Dmitry Vyukov Cc: stable@vger.kernel.org Fixes: af1bae5497b9 ("KVM: x86: bump KVM_MAX_VCPU_ID to 1023") Reviewed-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 2 +- arch/x86/kvm/ioapic.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 1a22de70f7f7..6e219e5c07d2 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -94,7 +94,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) { ioapic->rtc_status.pending_eoi = 0; - bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPUS); + bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID); } static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 7d2692a49657..1cc6e54436db 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -42,13 +42,13 @@ struct kvm_vcpu; struct dest_map { /* vcpu bitmap where IRQ has been sent */ - DECLARE_BITMAP(map, KVM_MAX_VCPUS); + DECLARE_BITMAP(map, KVM_MAX_VCPU_ID); /* * Vector sent to a given vcpu, only valid when * the vcpu's bit in map is set */ - u8 vectors[KVM_MAX_VCPUS]; + u8 vectors[KVM_MAX_VCPU_ID]; }; From df492896e6dfb44fd1154f5402428d8e52705081 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 23 Nov 2016 21:25:48 +0100 Subject: [PATCH 471/503] KVM: x86: check for pic and ioapic presence before use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split irqchip allows pic and ioapic routes to be used without them being created, which results in NULL access. Check for NULL and avoid it. (The setup is too racy for a nicer solutions.) Found by syzkaller: general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 3 PID: 11923 Comm: kworker/3:2 Not tainted 4.9.0-rc5+ #27 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: events irqfd_inject task: ffff88006a06c7c0 task.stack: ffff880068638000 RIP: 0010:[...] [...] __lock_acquire+0xb35/0x3380 kernel/locking/lockdep.c:3221 RSP: 0000:ffff88006863ea20 EFLAGS: 00010006 RAX: dffffc0000000000 RBX: dffffc0000000000 RCX: 0000000000000000 RDX: 0000000000000039 RSI: 0000000000000000 RDI: 1ffff1000d0c7d9e RBP: ffff88006863ef58 R08: 0000000000000001 R09: 0000000000000000 R10: 00000000000001c8 R11: 0000000000000000 R12: ffff88006a06c7c0 R13: 0000000000000001 R14: ffffffff8baab1a0 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff88006d100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004abdd0 CR3: 000000003e2f2000 CR4: 00000000000026e0 Stack: ffffffff894d0098 1ffff1000d0c7d56 ffff88006863ecd0 dffffc0000000000 ffff88006a06c7c0 0000000000000000 ffff88006863ecf8 0000000000000082 0000000000000000 ffffffff815dd7c1 ffffffff00000000 ffffffff00000000 Call Trace: [...] lock_acquire+0x2a2/0x790 kernel/locking/lockdep.c:3746 [...] __raw_spin_lock include/linux/spinlock_api_smp.h:144 [...] _raw_spin_lock+0x38/0x50 kernel/locking/spinlock.c:151 [...] spin_lock include/linux/spinlock.h:302 [...] kvm_ioapic_set_irq+0x4c/0x100 arch/x86/kvm/ioapic.c:379 [...] kvm_set_ioapic_irq+0x8f/0xc0 arch/x86/kvm/irq_comm.c:52 [...] kvm_set_irq+0x239/0x640 arch/x86/kvm/../../../virt/kvm/irqchip.c:101 [...] irqfd_inject+0xb4/0x150 arch/x86/kvm/../../../virt/kvm/eventfd.c:60 [...] process_one_work+0xb40/0x1ba0 kernel/workqueue.c:2096 [...] worker_thread+0x214/0x18a0 kernel/workqueue.c:2230 [...] kthread+0x328/0x3e0 kernel/kthread.c:209 [...] ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:433 Reported-by: Dmitry Vyukov Cc: stable@vger.kernel.org Fixes: 49df6397edfc ("KVM: x86: Split the APIC from the rest of IRQCHIP.") Signed-off-by: Radim Krčmář --- arch/x86/kvm/irq_comm.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 4da03030d5a7..6c0191615f23 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -41,6 +41,15 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, bool line_status) { struct kvm_pic *pic = pic_irqchip(kvm); + + /* + * XXX: rejecting pic routes when pic isn't in use would be better, + * but the default routing table is installed while kvm->arch.vpic is + * NULL and KVM_CREATE_IRQCHIP can race with KVM_IRQ_LINE. + */ + if (!pic) + return -1; + return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); } @@ -49,6 +58,10 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, bool line_status) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; + + if (!ioapic) + return -1; + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, line_status); } From 30c7be26fd3587abcb69587f781098e3ca2d565b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Nov 2016 09:06:45 -0800 Subject: [PATCH 472/503] udplite: call proper backlog handlers In commits 93821778def10 ("udp: Fix rcv socket locking") and f7ad74fef3af ("net/ipv6/udp: UDP encapsulation: break backlog_rcv into __udpv6_queue_rcv_skb") UDP backlog handlers were renamed, but UDPlite was forgotten. This leads to crashes if UDPlite header is pulled twice, which happens starting from commit e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Bug found by syzkaller team, thanks a lot guys ! Note that backlog use in UDP/UDPlite is scheduled to be removed starting from linux-4.10, so this patch is only needed up to linux-4.9 Fixes: 93821778def1 ("udp: Fix rcv socket locking") Fixes: f7ad74fef3af ("net/ipv6/udp: UDP encapsulation: break backlog_rcv into __udpv6_queue_rcv_skb") Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Cc: Benjamin LaHaise Cc: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/udp.c | 2 +- net/ipv4/udp_impl.h | 2 +- net/ipv4/udplite.c | 2 +- net/ipv6/udp.c | 2 +- net/ipv6/udp_impl.h | 2 +- net/ipv6/udplite.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0de9d5d2b9ae..5bab6c3f7a2f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1455,7 +1455,7 @@ static void udp_v4_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash); } -static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 7e0fe4bdd967..feb50a16398d 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -25,7 +25,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); -int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index af817158d830..ff450c2aad9b 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -50,7 +50,7 @@ struct proto udplite_prot = { .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, + .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e5056d4873d1..e4a8000d59ad 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -514,7 +514,7 @@ out: return; } -static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index f6eb1ab34f4b..e78bdc76dcc3 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -26,7 +26,7 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); -int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udpv6_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 47d0d2b87106..2f5101a12283 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -45,7 +45,7 @@ struct proto udplitev6_prot = { .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, - .backlog_rcv = udpv6_queue_rcv_skb, + .backlog_rcv = __udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, From 867d1212bf3c53dc057f7bca72155048cc51d18c Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 22 Nov 2016 13:14:08 -0500 Subject: [PATCH 473/503] bnxt: do not busy-poll when link is down When busy polling while a link is down (during a link-flap test), TX timeouts were observed as well as the following messages in the ring buffer: bnxt_en 0008:01:00.2 enP8p1s0f2d2: Resp cmpl intr err msg: 0x51 bnxt_en 0008:01:00.2 enP8p1s0f2d2: hwrm_ring_free tx failed. rc:-1 bnxt_en 0008:01:00.2 enP8p1s0f2d2: Resp cmpl intr err msg: 0x51 bnxt_en 0008:01:00.2 enP8p1s0f2d2: hwrm_ring_free rx failed. rc:-1 These were resolved by checking for link status and returning if link was not up. Signed-off-by: Andy Gospodarek Signed-off-by: Michael Chan Tested-by: Rob Miller Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e41d8bd094ae..ee1a803aa11a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1811,6 +1811,9 @@ static int bnxt_busy_poll(struct napi_struct *napi) if (atomic_read(&bp->intr_sem) != 0) return LL_FLUSH_FAILED; + if (!bp->link_info.link_up) + return LL_FLUSH_FAILED; + if (!bnxt_lock_poll(bnapi)) return LL_FLUSH_BUSY; From 76da8706d90d8641eeb9b8e579942ed80b6c0880 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 22 Nov 2016 11:40:58 -0800 Subject: [PATCH 474/503] net: dsa: bcm_sf2: Ensure we re-negotiate EEE during after link change In case the link change and EEE is enabled or disabled, always try to re-negotiate this with the link partner. Fixes: 450b05c15f9c ("net: dsa: bcm_sf2: add support for controlling EEE") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index e3ee27ce13dd..9ec33b51a0ed 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -588,6 +588,7 @@ static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); + struct ethtool_eee *p = &priv->port_sts[port].eee; u32 id_mode_dis = 0, port_mode; const char *str = NULL; u32 reg; @@ -662,6 +663,9 @@ force_link: reg |= DUPLX_MODE; core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port)); + + if (!phydev->is_pseudo_fixed_link) + p->eee_enabled = bcm_sf2_eee_init(ds, port, phydev); } static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, From 19a8bb28d1c66670a2aebf9c78ec21c0b942f4b8 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Tue, 22 Nov 2016 20:57:04 -0500 Subject: [PATCH 475/503] net sched filters: fix filter handle ID in tfilter_notify_chain() Should pass valid filter handle, not the netlink flags. Fixes: 30a391a13ab92 ("net sched filters: pass netlink message flags in event notification") Signed-off-by: Roman Mashak Signed-off-by: Jamal Hadi Salim Reported-by: Cong Wang Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8e93d4afe5ea..b05d4a2155b0 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -112,7 +112,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL; it_chain = &tp->next) - tfilter_notify(net, oskb, n, tp, n->nlmsg_flags, event, false); + tfilter_notify(net, oskb, n, tp, 0, event, false); } /* Select new prio value from the range, managed by kernel. */ From d74200024009c8d974c7484446c9eb1622408a17 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 19 Oct 2016 15:34:16 +0530 Subject: [PATCH 476/503] gpu/drm/exynos/exynos_hdmi - Unmap region obtained by of_iomap Free memory mapping, if hdmi_probe is not successful. Signed-off-by: Arvind Yadav Signed-off-by: Inki Dae Signed-off-by: Dave Airlie --- drivers/gpu/drm/exynos/exynos_hdmi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index e8fb6ef947ee..38eaa63afb31 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1907,6 +1907,8 @@ err_disable_pm_runtime: err_hdmiphy: if (hdata->hdmiphy_port) put_device(&hdata->hdmiphy_port->dev); + if (hdata->regs_hdmiphy) + iounmap(hdata->regs_hdmiphy); err_ddc: put_device(&hdata->ddc_adpt->dev); @@ -1929,6 +1931,9 @@ static int hdmi_remove(struct platform_device *pdev) if (hdata->hdmiphy_port) put_device(&hdata->hdmiphy_port->dev); + if (hdata->regs_hdmiphy) + iounmap(hdata->regs_hdmiphy); + put_device(&hdata->ddc_adpt->dev); return 0; From 2b95fda2c4fcb6d6625963f889247538f247fce0 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 24 Nov 2016 13:23:03 +0000 Subject: [PATCH 477/503] X.509: Fix double free in x509_cert_parse() [ver #3] We shouldn't free cert->pub->key in x509_cert_parse() because x509_free_certificate() also does this: BUG: Double free or freeing an invalid pointer ... Call Trace: [] dump_stack+0x63/0x83 [] kasan_object_err+0x21/0x70 [] kasan_report_double_free+0x49/0x60 [] kasan_slab_free+0x9d/0xc0 [] kfree+0x8a/0x1a0 [] public_key_free+0x1f/0x30 [] x509_free_certificate+0x24/0x90 [] x509_cert_parse+0x2bc/0x300 [] x509_key_preparse+0x3e/0x330 [] asymmetric_key_preparse+0x6f/0x100 [] key_create_or_update+0x260/0x5f0 [] SyS_add_key+0x199/0x2a0 [] entry_SYSCALL_64_fastpath+0x1e/0xad Object at ffff880110bd1900, in cache kmalloc-512 size: 512 .... Freed: PID = 2579 [] save_stack_trace+0x1b/0x20 [] save_stack+0x46/0xd0 [] kasan_slab_free+0x73/0xc0 [] kfree+0x8a/0x1a0 [] x509_cert_parse+0x2a3/0x300 [] x509_key_preparse+0x3e/0x330 [] asymmetric_key_preparse+0x6f/0x100 [] key_create_or_update+0x260/0x5f0 [] SyS_add_key+0x199/0x2a0 [] entry_SYSCALL_64_fastpath+0x1e/0xad Fixes: db6c43bd2132 ("crypto: KEYS: convert public key and digsig asym to the akcipher api") Signed-off-by: Andrey Ryabinin Cc: Signed-off-by: David Howells Signed-off-by: James Morris --- crypto/asymmetric_keys/x509_cert_parser.c | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 865f46ea724f..c80765b211cf 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -133,7 +133,6 @@ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen) return cert; error_decode: - kfree(cert->pub->key); kfree(ctx); error_no_ctx: x509_free_certificate(cert); From f5527fffff3f002b0a6b376163613b82f69de073 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 24 Nov 2016 13:23:10 +0000 Subject: [PATCH 478/503] mpi: Fix NULL ptr dereference in mpi_powm() [ver #3] This fixes CVE-2016-8650. If mpi_powm() is given a zero exponent, it wants to immediately return either 1 or 0, depending on the modulus. However, if the result was initalised with zero limb space, no limbs space is allocated and a NULL-pointer exception ensues. Fix this by allocating a minimal amount of limb space for the result when the 0-exponent case when the result is 1 and not touching the limb space when the result is 0. This affects the use of RSA keys and X.509 certificates that carry them. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] mpi_powm+0x32/0x7e6 PGD 0 Oops: 0002 [#1] SMP Modules linked in: CPU: 3 PID: 3014 Comm: keyctl Not tainted 4.9.0-rc6-fscache+ #278 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 task: ffff8804011944c0 task.stack: ffff880401294000 RIP: 0010:[] [] mpi_powm+0x32/0x7e6 RSP: 0018:ffff880401297ad8 EFLAGS: 00010212 RAX: 0000000000000000 RBX: ffff88040868bec0 RCX: ffff88040868bba0 RDX: ffff88040868b260 RSI: ffff88040868bec0 RDI: ffff88040868bee0 RBP: ffff880401297ba8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000047 R11: ffffffff8183b210 R12: 0000000000000000 R13: ffff8804087c7600 R14: 000000000000001f R15: ffff880401297c50 FS: 00007f7a7918c700(0000) GS:ffff88041fb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000401250000 CR4: 00000000001406e0 Stack: ffff88040868bec0 0000000000000020 ffff880401297b00 ffffffff81376cd4 0000000000000100 ffff880401297b10 ffffffff81376d12 ffff880401297b30 ffffffff81376f37 0000000000000100 0000000000000000 ffff880401297ba8 Call Trace: [] ? __sg_page_iter_next+0x43/0x66 [] ? sg_miter_get_next_page+0x1b/0x5d [] ? sg_miter_next+0x17/0xbd [] ? mpi_read_raw_from_sgl+0xf2/0x146 [] rsa_verify+0x9d/0xee [] ? pkcs1pad_sg_set_buf+0x2e/0xbb [] pkcs1pad_verify+0xc0/0xe1 [] public_key_verify_signature+0x1b0/0x228 [] x509_check_for_self_signed+0xa1/0xc4 [] x509_cert_parse+0x167/0x1a1 [] x509_key_preparse+0x21/0x1a1 [] asymmetric_key_preparse+0x34/0x61 [] key_create_or_update+0x145/0x399 [] SyS_add_key+0x154/0x19e [] do_syscall_64+0x80/0x191 [] entry_SYSCALL64_slow_path+0x25/0x25 Code: 56 41 55 41 54 53 48 81 ec a8 00 00 00 44 8b 71 04 8b 42 04 4c 8b 67 18 45 85 f6 89 45 80 0f 84 b4 06 00 00 85 c0 75 2f 41 ff ce <49> c7 04 24 01 00 00 00 b0 01 75 0b 48 8b 41 18 48 83 38 01 0f RIP [] mpi_powm+0x32/0x7e6 RSP CR2: 0000000000000000 ---[ end trace d82015255d4a5d8d ]--- Basically, this is a backport of a libgcrypt patch: http://git.gnupg.org/cgi-bin/gitweb.cgi?p=libgcrypt.git;a=patch;h=6e1adb05d290aeeb1c230c763970695f4a538526 Fixes: cdec9cb5167a ("crypto: GnuPG based MPI lib - source files (part 1)") Signed-off-by: Andrey Ryabinin Signed-off-by: David Howells cc: Dmitry Kasatkin cc: linux-ima-devel@lists.sourceforge.net cc: stable@vger.kernel.org Signed-off-by: James Morris --- lib/mpi/mpi-pow.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c index 5464c8744ea9..e24388a863a7 100644 --- a/lib/mpi/mpi-pow.c +++ b/lib/mpi/mpi-pow.c @@ -64,8 +64,13 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) if (!esize) { /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 * depending on if MOD equals 1. */ - rp[0] = 1; res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; + if (res->nlimbs) { + if (mpi_resize(res, 1) < 0) + goto enomem; + rp = res->d; + rp[0] = 1; + } res->sign = 0; goto leave; } From 984d7a1ec67ce3a46324fa4bcb4c745bbc266cf2 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Nov 2016 15:09:54 +0530 Subject: [PATCH 479/503] powerpc/mm: Fixup kernel read only mapping With commit e58e87adc8bf9 ("powerpc/mm: Update _PAGE_KERNEL_RO") we started using the ppp value 0b110 to map kernel readonly. But that facility was only added as part of ISA 2.04. For earlier ISA version only supported ppp bit value for readonly mapping is 0b011. (This implies both user and kernel get mapped using the same ppp bit value for readonly mapping.). Update the code such that for earlier architecture version we use ppp value 0b011 for readonly mapping. We don't differentiate between power5+ and power5 here and apply the new ppp bits only from power6 (ISA 2.05). This keep the changes minimal. This fixes issue with PS3 spu usage reported at https://lkml.kernel.org/r/rep.1421449714.geoff@infradead.org Fixes: e58e87adc8bf9 ("powerpc/mm: Update _PAGE_KERNEL_RO") Cc: stable@vger.kernel.org # v4.7+ Tested-by: Geoff Levand Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu.h | 14 ++++++++++---- arch/powerpc/mm/hash_utils_64.c | 8 ++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e88368354e49..e311c25751a4 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -28,6 +28,12 @@ * Individual features below. */ +/* + * Kernel read only support. + * We added the ppp value 0b110 in ISA 2.04. + */ +#define MMU_FTR_KERNEL_RO ASM_CONST(0x00004000) + /* * We need to clear top 16bits of va (from the remaining 64 bits )in * tlbie* instructions @@ -103,10 +109,10 @@ #define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2 #define MMU_FTRS_PPC970 MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA #define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE -#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE -#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE -#define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE -#define MMU_FTRS_POWER9 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE +#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO +#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO +#define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO +#define MMU_FTRS_POWER9 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO #define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ MMU_FTR_CI_LARGE_PAGE #define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 5503078090cd..78dabf065ba9 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -193,8 +193,12 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) /* * Kernel read only mapped with ppp bits 0b110 */ - if (!(pteflags & _PAGE_WRITE)) - rflags |= (HPTE_R_PP0 | 0x2); + if (!(pteflags & _PAGE_WRITE)) { + if (mmu_has_feature(MMU_FTR_KERNEL_RO)) + rflags |= (HPTE_R_PP0 | 0x2); + else + rflags |= 0x3; + } } else { if (pteflags & _PAGE_RWX) rflags |= 0x2; From d29ccdb3f0e5dccb170200c9f3d573eaa5af261b Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 14 Oct 2016 10:17:31 +0100 Subject: [PATCH 480/503] mfd: syscon: Support native-endian regmaps The regmap devicetree binding documentation states that a native-endian property should be supported as well as big-endian & little-endian, however syscon in its duplication of the parsing of these properties omits support for native-endian. Fix this by setting REGMAP_ENDIAN_NATIVE when a native-endian property is found. Signed-off-by: Paul Burton Cc: Lee Jones Cc: Arnd Bergmann Cc: Guenter Roeck Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Signed-off-by: Lee Jones --- drivers/mfd/syscon.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c index 2f2225e845ef..b93fe4c4957a 100644 --- a/drivers/mfd/syscon.c +++ b/drivers/mfd/syscon.c @@ -73,8 +73,10 @@ static struct syscon *of_syscon_register(struct device_node *np) /* Parse the device's DT node for an endianness specification */ if (of_property_read_bool(np, "big-endian")) syscon_config.val_format_endian = REGMAP_ENDIAN_BIG; - else if (of_property_read_bool(np, "little-endian")) + else if (of_property_read_bool(np, "little-endian")) syscon_config.val_format_endian = REGMAP_ENDIAN_LITTLE; + else if (of_property_read_bool(np, "native-endian")) + syscon_config.val_format_endian = REGMAP_ENDIAN_NATIVE; /* * search for reg-io-width property in DT. If it is not provided, From 2a872a5dcec7052e9fd948ee77a62187791735ff Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 9 Nov 2016 13:26:25 +0000 Subject: [PATCH 481/503] MIPS: mm: Fix output of __do_page_fault Since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") the output from __do_page_fault on MIPS has been pretty unreadable due to the lack of KERN_CONT markers. Use pr_cont to provide the appropriate markers & restore the expected output. Signed-off-by: Matt Redfearn Cc: Paul Gortmaker Cc: Kirill A. Shutemov Cc: Andrew Morton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14544/ Signed-off-by: Ralf Baechle --- arch/mips/mm/fault.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index d56a855828c2..3bef306cdfdb 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -209,17 +209,18 @@ bad_area_nosemaphore: if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && __ratelimit(&ratelimit_state)) { - pr_info("\ndo_page_fault(): sending SIGSEGV to %s for invalid %s %0*lx", + pr_info("do_page_fault(): sending SIGSEGV to %s for invalid %s %0*lx\n", tsk->comm, write ? "write access to" : "read access from", field, address); pr_info("epc = %0*lx in", field, (unsigned long) regs->cp0_epc); - print_vma_addr(" ", regs->cp0_epc); + print_vma_addr(KERN_CONT " ", regs->cp0_epc); + pr_cont("\n"); pr_info("ra = %0*lx in", field, (unsigned long) regs->regs[31]); - print_vma_addr(" ", regs->regs[31]); - pr_info("\n"); + print_vma_addr(KERN_CONT " ", regs->regs[31]); + pr_cont("\n"); } current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f; info.si_signo = SIGSEGV; From 3cfc43df7af0533b39b97bb03980e02e9716fc52 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 16 Sep 2016 08:56:59 +0530 Subject: [PATCH 482/503] mfd: wm8994-core: Disable regulators before removing them The order in which resources were freed in wm8994_device_exit() isn't correct. The regulators are removed before they are disabled. Fix it by reordering code a bit, which makes it exact opposite of wm8994_device_init() as well. Signed-off-by: Viresh Kumar Acked-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/wm8994-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c index 7eec619a6023..1e644aa53a2d 100644 --- a/drivers/mfd/wm8994-core.c +++ b/drivers/mfd/wm8994-core.c @@ -604,10 +604,10 @@ err: static void wm8994_device_exit(struct wm8994 *wm8994) { pm_runtime_disable(wm8994->dev); - mfd_remove_devices(wm8994->dev); wm8994_irq_exit(wm8994); regulator_bulk_disable(wm8994->num_supplies, wm8994->supplies); + mfd_remove_devices(wm8994->dev); } static const struct of_device_id wm8994_of_match[] = { From 1a41741fd60b0a2d1102c3d1ff9d58cb324a8d29 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 27 Oct 2016 15:50:18 +0530 Subject: [PATCH 483/503] mfd: wm8994-core: Don't use managed regulator bulk get API The kernel WARNs and then crashes today if wm8994_device_init() fails after calling devm_regulator_bulk_get(). That happens because there are multiple devices involved here and the order in which managed resources are freed isn't correct. The regulators are added as children of wm8994->dev. Whereas, devm_regulator_bulk_get() receives wm8994->dev as the device, though it gets the same regulators which were added as children of wm8994->dev earlier. During failures, the children are removed first and the core eventually calls regulator_unregister() for them. As regulator_put() was never done for them (opposite of devm_regulator_bulk_get()), the kernel WARNs at WARN_ON(rdev->open_count); And eventually it crashes from debugfs_remove_recursive(). --------x------------------x---------------- wm8994 3-001a: Device is not a WM8994, ID is 0 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at /mnt/ssd/all/work/repos/devel/linux/drivers/regulator/core.c:4072 regulator_unregister+0xc8/0xd0 Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.8.0-rc6-00154-g54fe84cbd50b #41 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x88/0x9c) [] (dump_stack) from [] (__warn+0xe8/0x100) [] (__warn) from [] (warn_slowpath_null+0x20/0x28) [] (warn_slowpath_null) from [] (regulator_unregister+0xc8/0xd0) [] (regulator_unregister) from [] (release_nodes+0x16c/0x1dc) [] (release_nodes) from [] (__device_release_driver+0x8c/0x110) [] (__device_release_driver) from [] (device_release_driver+0x1c/0x28) [] (device_release_driver) from [] (bus_remove_device+0xd8/0x104) [] (bus_remove_device) from [] (device_del+0x10c/0x218) [] (device_del) from [] (platform_device_del+0x1c/0x88) [] (platform_device_del) from [] (platform_device_unregister+0xc/0x20) [] (platform_device_unregister) from [] (mfd_remove_devices_fn+0x5c/0x64) [] (mfd_remove_devices_fn) from [] (device_for_each_child_reverse+0x4c/0x78) [] (device_for_each_child_reverse) from [] (mfd_remove_devices+0x20/0x30) [] (mfd_remove_devices) from [] (wm8994_device_init+0x2ac/0x7f0) [] (wm8994_device_init) from [] (i2c_device_probe+0x178/0x1fc) [] (i2c_device_probe) from [] (driver_probe_device+0x214/0x2c0) [] (driver_probe_device) from [] (__driver_attach+0xac/0xb0) [] (__driver_attach) from [] (bus_for_each_dev+0x68/0x9c) [] (bus_for_each_dev) from [] (bus_add_driver+0x1a0/0x218) [] (bus_add_driver) from [] (driver_register+0x78/0xf8) [] (driver_register) from [] (i2c_register_driver+0x34/0x84) [] (i2c_register_driver) from [] (do_one_initcall+0x40/0x170) [] (do_one_initcall) from [] (kernel_init_freeable+0x15c/0x1fc) [] (kernel_init_freeable) from [] (kernel_init+0x8/0x114) [] (kernel_init) from [] (ret_from_fork+0x14/0x3c) ---[ end trace 0919d3d0bc998260 ]--- [snip..] Unable to handle kernel NULL pointer dereference at virtual address 00000078 pgd = c0004000 [00000078] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 4.8.0-rc6-00154-g54fe84cbd50b #41 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) task: ee874000 task.stack: ee878000 PC is at down_write+0x14/0x54 LR is at debugfs_remove_recursive+0x30/0x150 [snip..] [] (down_write) from [] (debugfs_remove_recursive+0x30/0x150) [] (debugfs_remove_recursive) from [] (_regulator_put+0x24/0xac) [] (_regulator_put) from [] (regulator_put+0x1c/0x2c) [] (regulator_put) from [] (release_nodes+0x16c/0x1dc) [] (release_nodes) from [] (driver_probe_device+0xec/0x2c0) [] (driver_probe_device) from [] (__driver_attach+0xac/0xb0) [] (__driver_attach) from [] (bus_for_each_dev+0x68/0x9c) [] (bus_for_each_dev) from [] (bus_add_driver+0x1a0/0x218) [] (bus_add_driver) from [] (driver_register+0x78/0xf8) [] (driver_register) from [] (i2c_register_driver+0x34/0x84) [] (i2c_register_driver) from [] (do_one_initcall+0x40/0x170) [] (do_one_initcall) from [] (kernel_init_freeable+0x15c/0x1fc) [] (kernel_init_freeable) from [] (kernel_init+0x8/0x114) [] (kernel_init) from [] (ret_from_fork+0x14/0x3c) Code: e1a04000 f590f000 e3a03001 e34f3fff (e1902f9f) ---[ end trace 0919d3d0bc998262 ]--- --------x------------------x---------------- Fix the kernel warnings and crashes by using regulator_bulk_get() instead of devm_regulator_bulk_get() and explicitly freeing the supplies in exit paths. Tested on Exynos 5250, dual core ARM A15 machine. Signed-off-by: Viresh Kumar Acked-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/wm8994-core.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c index 1e644aa53a2d..8588dbad3301 100644 --- a/drivers/mfd/wm8994-core.c +++ b/drivers/mfd/wm8994-core.c @@ -393,8 +393,13 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq) BUG(); goto err; } - - ret = devm_regulator_bulk_get(wm8994->dev, wm8994->num_supplies, + + /* + * Can't use devres helper here as some of the supplies are provided by + * wm8994->dev's children (regulators) and those regulators are + * unregistered by the devres core before the supplies are freed. + */ + ret = regulator_bulk_get(wm8994->dev, wm8994->num_supplies, wm8994->supplies); if (ret != 0) { dev_err(wm8994->dev, "Failed to get supplies: %d\n", ret); @@ -405,7 +410,7 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq) wm8994->supplies); if (ret != 0) { dev_err(wm8994->dev, "Failed to enable supplies: %d\n", ret); - goto err; + goto err_regulator_free; } ret = wm8994_reg_read(wm8994, WM8994_SOFTWARE_RESET); @@ -596,6 +601,8 @@ err_irq: err_enable: regulator_bulk_disable(wm8994->num_supplies, wm8994->supplies); +err_regulator_free: + regulator_bulk_free(wm8994->num_supplies, wm8994->supplies); err: mfd_remove_devices(wm8994->dev); return ret; @@ -607,6 +614,7 @@ static void wm8994_device_exit(struct wm8994 *wm8994) wm8994_irq_exit(wm8994); regulator_bulk_disable(wm8994->num_supplies, wm8994->supplies); + regulator_bulk_free(wm8994->num_supplies, wm8994->supplies); mfd_remove_devices(wm8994->dev); } From 741dc7bf1c7c7d93b853bb55efe77baa27e1b0a9 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sun, 20 Nov 2016 21:12:36 -0500 Subject: [PATCH 484/503] parisc: Fix races in parisc_setup_cache_timing() Helge reported to me the following startup crash: [ 0.000000] Linux version 4.8.0-1-parisc64-smp (debian-kernel@lists.debian.org) (gcc version 5.4.1 20161019 (GCC) ) #1 SMP Debian 4.8.7-1 (2016-11-13) [ 0.000000] The 64-bit Kernel has started... [ 0.000000] Kernel default page size is 4 KB. Huge pages enabled with 1 MB physical and 2 MB virtual size. [ 0.000000] Determining PDC firmware type: System Map. [ 0.000000] model 9000/785/J5000 [ 0.000000] Total Memory: 2048 MB [ 0.000000] Memory: 2018528K/2097152K available (9272K kernel code, 3053K rwdata, 1319K rodata, 1024K init, 840K bss, 78624K reserved, 0K cma-reserved) [ 0.000000] virtual kernel memory layout: [ 0.000000] vmalloc : 0x0000000000008000 - 0x000000003f000000 (1007 MB) [ 0.000000] memory : 0x0000000040000000 - 0x00000000c0000000 (2048 MB) [ 0.000000] .init : 0x0000000040100000 - 0x0000000040200000 (1024 kB) [ 0.000000] .data : 0x0000000040b0e000 - 0x0000000040f533e0 (4372 kB) [ 0.000000] .text : 0x0000000040200000 - 0x0000000040b0e000 (9272 kB) [ 0.768910] Brought up 1 CPUs [ 0.992465] NET: Registered protocol family 16 [ 2.429981] Releasing cpu 1 now, hpa=fffffffffffa2000 [ 2.635751] CPU(s): 2 out of 2 PA8500 (PCX-W) at 440.000000 MHz online [ 2.726692] Setting cache flush threshold to 1024 kB [ 2.729932] Not-handled unaligned insn 0x43ffff80 [ 2.798114] Setting TLB flush threshold to 140 kB [ 2.928039] Unaligned handler failed, ret = -1 [ 3.000419] _______________________________ [ 3.000419] < Your System ate a SPARC! Gah! > [ 3.000419] ------------------------------- [ 3.000419] \ ^__^ [ 3.000419] (__)\ )\/\ [ 3.000419] U ||----w | [ 3.000419] || || [ 9.340055] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.8.0-1-parisc64-smp #1 Debian 4.8.7-1 [ 9.448082] task: 00000000bfd48060 task.stack: 00000000bfd50000 [ 9.528040] [ 10.760029] IASQ: 0000000000000000 0000000000000000 IAOQ: 000000004025d154 000000004025d158 [ 10.868052] IIR: 43ffff80 ISR: 0000000000340000 IOR: 000001ff54150960 [ 10.960029] CPU: 1 CR30: 00000000bfd50000 CR31: 0000000011111111 [ 11.052057] ORIG_R28: 000000004021e3b4 [ 11.100045] IAOQ[0]: irq_exit+0x94/0x120 [ 11.152062] IAOQ[1]: irq_exit+0x98/0x120 [ 11.208031] RP(r2): irq_exit+0xb8/0x120 [ 11.256074] Backtrace: [ 11.288067] [<00000000402cd944>] cpu_startup_entry+0x1e4/0x598 [ 11.368058] [<0000000040109528>] smp_callin+0x2c0/0x2f0 [ 11.436308] [<00000000402b53fc>] update_curr+0x18c/0x2d0 [ 11.508055] [<00000000402b73b8>] dequeue_entity+0x2c0/0x1030 [ 11.584040] [<00000000402b3cc0>] set_next_entity+0x80/0xd30 [ 11.660069] [<00000000402c1594>] pick_next_task_fair+0x614/0x720 [ 11.740085] [<000000004020dd34>] __schedule+0x394/0xa60 [ 11.808054] [<000000004020e488>] schedule+0x88/0x118 [ 11.876039] [<0000000040283d3c>] rescuer_thread+0x4d4/0x5b0 [ 11.948090] [<000000004028fc4c>] kthread+0x1ec/0x248 [ 12.016053] [<0000000040205020>] end_fault_vector+0x20/0xc0 [ 12.092239] [<00000000402050c0>] _switch_to_ret+0x0/0xf40 [ 12.164044] [ 12.184036] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.8.0-1-parisc64-smp #1 Debian 4.8.7-1 [ 12.244040] Backtrace: [ 12.244040] [<000000004021c480>] show_stack+0x68/0x80 [ 12.244040] [<00000000406f332c>] dump_stack+0xec/0x168 [ 12.244040] [<000000004021c74c>] die_if_kernel+0x25c/0x430 [ 12.244040] [<000000004022d320>] handle_unaligned+0xb48/0xb50 [ 12.244040] [ 12.632066] ---[ end trace 9ca05a7215c7bbb2 ]--- [ 12.692036] Kernel panic - not syncing: Attempted to kill the idle task! We have the insn 0x43ffff80 in IIR but from IAOQ we should have: 4025d150: 0f f3 20 df ldd,s r19(r31),r31 4025d154: 0f 9f 00 9c ldw r31(ret0),ret0 4025d158: bf 80 20 58 cmpb,*<> r0,ret0,4025d18c Cpu0 has just completed running parisc_setup_cache_timing: [ 2.429981] Releasing cpu 1 now, hpa=fffffffffffa2000 [ 2.635751] CPU(s): 2 out of 2 PA8500 (PCX-W) at 440.000000 MHz online [ 2.726692] Setting cache flush threshold to 1024 kB [ 2.729932] Not-handled unaligned insn 0x43ffff80 [ 2.798114] Setting TLB flush threshold to 140 kB [ 2.928039] Unaligned handler failed, ret = -1 From the backtrace, cpu1 is in smp_callin: void __init smp_callin(void) { int slave_id = cpu_now_booting; smp_cpu_init(slave_id); preempt_disable(); flush_cache_all_local(); /* start with known state */ flush_tlb_all_local(NULL); local_irq_enable(); /* Interrupts have been off until now */ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); So, it has just flushed its caches and the TLB. It would seem either the flushes in parisc_setup_cache_timing or smp_callin have corrupted kernel memory. The attached patch reworks parisc_setup_cache_timing to remove the races in setting the cache and TLB flush thresholds. It also corrects the number of bytes flushed in the TLB calculation. The patch flushes the cache and TLB on cpu0 before starting the secondary processors so that they are started from a known state. Tested with a few reboots on c8000. Signed-off-by: John David Anglin Cc: # v3.18+ Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 31 ++++++++++++------------------- arch/parisc/kernel/setup.c | 4 ++++ 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 629eb464d5ba..c263301648f3 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -369,6 +369,7 @@ void __init parisc_setup_cache_timing(void) { unsigned long rangetime, alltime; unsigned long size, start; + unsigned long threshold; alltime = mfctl(16); flush_data_cache(); @@ -382,17 +383,12 @@ void __init parisc_setup_cache_timing(void) printk(KERN_DEBUG "Whole cache flush %lu cycles, flushing %lu bytes %lu cycles\n", alltime, size, rangetime); - /* Racy, but if we see an intermediate value, it's ok too... */ - parisc_cache_flush_threshold = size * alltime / rangetime; - - parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold); - if (!parisc_cache_flush_threshold) - parisc_cache_flush_threshold = FLUSH_THRESHOLD; - - if (parisc_cache_flush_threshold > cache_info.dc_size) - parisc_cache_flush_threshold = cache_info.dc_size; - - printk(KERN_INFO "Setting cache flush threshold to %lu kB\n", + threshold = L1_CACHE_ALIGN(size * alltime / rangetime); + if (threshold > cache_info.dc_size) + threshold = cache_info.dc_size; + if (threshold) + parisc_cache_flush_threshold = threshold; + printk(KERN_INFO "Cache flush threshold set to %lu KiB\n", parisc_cache_flush_threshold/1024); /* calculate TLB flush threshold */ @@ -401,7 +397,7 @@ void __init parisc_setup_cache_timing(void) flush_tlb_all(); alltime = mfctl(16) - alltime; - size = PAGE_SIZE; + size = 0; start = (unsigned long) _text; rangetime = mfctl(16); while (start < (unsigned long) _end) { @@ -414,13 +410,10 @@ void __init parisc_setup_cache_timing(void) printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n", alltime, size, rangetime); - parisc_tlb_flush_threshold = size * alltime / rangetime; - parisc_tlb_flush_threshold *= num_online_cpus(); - parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold); - if (!parisc_tlb_flush_threshold) - parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD; - - printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n", + threshold = PAGE_ALIGN(num_online_cpus() * size * alltime / rangetime); + if (threshold) + parisc_tlb_flush_threshold = threshold; + printk(KERN_INFO "TLB flush threshold set to %lu KiB\n", parisc_tlb_flush_threshold/1024); } diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 81d6f6391944..2e66a887788e 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -334,6 +334,10 @@ static int __init parisc_init(void) /* tell PDC we're Linux. Nevermind failure. */ pdc_stable_write(0x40, &osid, sizeof(osid)); + /* start with known state */ + flush_cache_all_local(); + flush_tlb_all_local(NULL); + processor_init(); #ifdef CONFIG_SMP pr_info("CPU(s): %d out of %d %s at %d.%06d MHz online\n", From 43b1f6abd59063a088416a0df042b36450f91f75 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 22 Nov 2016 18:08:30 +0100 Subject: [PATCH 485/503] parisc: Switch to generic sched_clock implementation Drop the open-coded sched_clock() function and replace it by the provided GENERIC_SCHED_CLOCK implementation. We have seen quite some hung tasks in the past, which seem to be fixed by this patch. Signed-off-by: Helge Deller Cc: # v4.7+ Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 4 ++- arch/parisc/kernel/time.c | 57 ++++++++------------------------------- 2 files changed, 14 insertions(+), 47 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 71c4a3aa3752..a14b86587013 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -34,7 +34,9 @@ config PARISC select HAVE_ARCH_HASH select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK - select HAVE_UNSTABLE_SCHED_CLOCK if (SMP || !64BIT) + select GENERIC_SCHED_CLOCK + select HAVE_UNSTABLE_SCHED_CLOCK if SMP + select GENERIC_CLOCKEVENTS select ARCH_NO_COHERENT_DMA_MMAP select CPU_NO_EFFICIENT_FFS diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 9b63b876a13a..325f30d82b64 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -39,18 +40,6 @@ static unsigned long clocktick __read_mostly; /* timer cycles per tick */ -#ifndef CONFIG_64BIT -/* - * The processor-internal cycle counter (Control Register 16) is used as time - * source for the sched_clock() function. This register is 64bit wide on a - * 64-bit kernel and 32bit on a 32-bit kernel. Since sched_clock() always - * requires a 64bit counter we emulate on the 32-bit kernel the higher 32bits - * with a per-cpu variable which we increase every time the counter - * wraps-around (which happens every ~4 secounds). - */ -static DEFINE_PER_CPU(unsigned long, cr16_high_32_bits); -#endif - /* * We keep time on PA-RISC Linux by using the Interval Timer which is * a pair of registers; one is read-only and one is write-only; both @@ -121,12 +110,6 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id) */ mtctl(next_tick, 16); -#if !defined(CONFIG_64BIT) - /* check for overflow on a 32bit kernel (every ~4 seconds). */ - if (unlikely(next_tick < now)) - this_cpu_inc(cr16_high_32_bits); -#endif - /* Skip one clocktick on purpose if we missed next_tick. * The new CR16 must be "later" than current CR16 otherwise * itimer would not fire until CR16 wrapped - e.g 4 seconds @@ -208,7 +191,7 @@ EXPORT_SYMBOL(profile_pc); /* clock source code */ -static cycle_t read_cr16(struct clocksource *cs) +static cycle_t notrace read_cr16(struct clocksource *cs) { return get_cycles(); } @@ -287,26 +270,9 @@ void read_persistent_clock(struct timespec *ts) } -/* - * sched_clock() framework - */ - -static u32 cyc2ns_mul __read_mostly; -static u32 cyc2ns_shift __read_mostly; - -u64 sched_clock(void) +static u64 notrace read_cr16_sched_clock(void) { - u64 now; - - /* Get current cycle counter (Control Register 16). */ -#ifdef CONFIG_64BIT - now = mfctl(16); -#else - now = mfctl(16) + (((u64) this_cpu_read(cr16_high_32_bits)) << 32); -#endif - - /* return the value in ns (cycles_2_ns) */ - return mul_u64_u32_shr(now, cyc2ns_mul, cyc2ns_shift); + return get_cycles(); } @@ -316,17 +282,16 @@ u64 sched_clock(void) void __init time_init(void) { - unsigned long current_cr16_khz; + unsigned long cr16_hz; - current_cr16_khz = PAGE0->mem_10msec/10; /* kHz */ clocktick = (100 * PAGE0->mem_10msec) / HZ; - - /* calculate mult/shift values for cr16 */ - clocks_calc_mult_shift(&cyc2ns_mul, &cyc2ns_shift, current_cr16_khz, - NSEC_PER_MSEC, 0); - start_cpu_itimer(); /* get CPU 0 started */ + cr16_hz = 100 * PAGE0->mem_10msec; /* Hz */ + /* register at clocksource framework */ - clocksource_register_khz(&clocksource_cr16, current_cr16_khz); + clocksource_register_hz(&clocksource_cr16, cr16_hz); + + /* register as sched_clock source */ + sched_clock_register(read_cr16_sched_clock, BITS_PER_LONG, cr16_hz); } From c0452fb9fb8f49c7d68ab9fa0ad092016be7b45f Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Thu, 24 Nov 2016 20:06:32 -0500 Subject: [PATCH 486/503] parisc: Fix race in pci-dma.c We are still troubled by occasional random segmentation faults and memory memory corruption on SMP machines. The causes quite a few package builds to fail on the Debian buildd machines for parisc. When gcc-6 failed to build three times in a row, I looked again at the TLB related code. I found a couple of issues. This is the first. In general, we need to ensure page table updates and corresponding TLB purges are atomic. The attached patch fixes an instance in pci-dma.c where the page table update was not guarded by the TLB lock. Tested on rp3440 and c8000. So far, no further random segmentation faults have been observed. Signed-off-by: John David Anglin Cc: # v3.16+ Signed-off-by: Helge Deller --- arch/parisc/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 02d9ed0f3949..494ff6e8c88a 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -95,8 +95,8 @@ static inline int map_pte_uncached(pte_t * pte, if (!pte_none(*pte)) printk(KERN_ERR "map_pte_uncached: page already exists\n"); - set_pte(pte, __mk_pte(*paddr_ptr, PAGE_KERNEL_UNC)); purge_tlb_start(flags); + set_pte(pte, __mk_pte(*paddr_ptr, PAGE_KERNEL_UNC)); pdtlb_kernel(orig_vaddr); purge_tlb_end(flags); vaddr += PAGE_SIZE; From 5035b230e7b67ac12691ed3b5495bbb617027b68 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Thu, 24 Nov 2016 20:18:14 -0500 Subject: [PATCH 487/503] parisc: Also flush data TLB in flush_icache_page_asm This is the second issue I noticed in reviewing the parisc TLB code. The fic instruction may use either the instruction or data TLB in flushing the instruction cache. Thus, on machines with a split TLB, we should also flush the data TLB after setting up the temporary alias registers. Although this has no functional impact, I changed the pdtlb and pitlb instructions to consistently use the index register %r0. These instructions do not support integer displacements. Tested on rp3440 and c8000. Signed-off-by: John David Anglin Cc: # v3.16+ Signed-off-by: Helge Deller --- arch/parisc/kernel/pacache.S | 37 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index 985e06da37f5..1b39a2acaadf 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -96,7 +96,7 @@ fitmanyloop: /* Loop if LOOP >= 2 */ fitmanymiddle: /* Loop if LOOP >= 2 */ addib,COND(>) -1, %r31, fitmanymiddle /* Adjusted inner loop decr */ - pitlbe 0(%sr1, %r28) + pitlbe %r0(%sr1, %r28) pitlbe,m %arg1(%sr1, %r28) /* Last pitlbe and addr adjust */ addib,COND(>) -1, %r29, fitmanymiddle /* Middle loop decr */ copy %arg3, %r31 /* Re-init inner loop count */ @@ -139,7 +139,7 @@ fdtmanyloop: /* Loop if LOOP >= 2 */ fdtmanymiddle: /* Loop if LOOP >= 2 */ addib,COND(>) -1, %r31, fdtmanymiddle /* Adjusted inner loop decr */ - pdtlbe 0(%sr1, %r28) + pdtlbe %r0(%sr1, %r28) pdtlbe,m %arg1(%sr1, %r28) /* Last pdtlbe and addr adjust */ addib,COND(>) -1, %r29, fdtmanymiddle /* Middle loop decr */ copy %arg3, %r31 /* Re-init inner loop count */ @@ -626,12 +626,12 @@ ENTRY_CFI(copy_user_page_asm) /* Purge any old translations */ #ifdef CONFIG_PA20 - pdtlb,l 0(%r28) - pdtlb,l 0(%r29) + pdtlb,l %r0(%r28) + pdtlb,l %r0(%r29) #else tlb_lock %r20,%r21,%r22 - pdtlb 0(%r28) - pdtlb 0(%r29) + pdtlb %r0(%r28) + pdtlb %r0(%r29) tlb_unlock %r20,%r21,%r22 #endif @@ -774,10 +774,10 @@ ENTRY_CFI(clear_user_page_asm) /* Purge any old translation */ #ifdef CONFIG_PA20 - pdtlb,l 0(%r28) + pdtlb,l %r0(%r28) #else tlb_lock %r20,%r21,%r22 - pdtlb 0(%r28) + pdtlb %r0(%r28) tlb_unlock %r20,%r21,%r22 #endif @@ -858,10 +858,10 @@ ENTRY_CFI(flush_dcache_page_asm) /* Purge any old translation */ #ifdef CONFIG_PA20 - pdtlb,l 0(%r28) + pdtlb,l %r0(%r28) #else tlb_lock %r20,%r21,%r22 - pdtlb 0(%r28) + pdtlb %r0(%r28) tlb_unlock %r20,%r21,%r22 #endif @@ -898,10 +898,10 @@ ENTRY_CFI(flush_dcache_page_asm) sync #ifdef CONFIG_PA20 - pdtlb,l 0(%r25) + pdtlb,l %r0(%r25) #else tlb_lock %r20,%r21,%r22 - pdtlb 0(%r25) + pdtlb %r0(%r25) tlb_unlock %r20,%r21,%r22 #endif @@ -931,13 +931,18 @@ ENTRY_CFI(flush_icache_page_asm) depwi 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ #endif - /* Purge any old translation */ + /* Purge any old translation. Note that the FIC instruction + * may use either the instruction or data TLB. Given that we + * have a flat address space, it's not clear which TLB will be + * used. So, we purge both entries. */ #ifdef CONFIG_PA20 + pdtlb,l %r0(%r28) pitlb,l %r0(%sr4,%r28) #else tlb_lock %r20,%r21,%r22 - pitlb (%sr4,%r28) + pdtlb %r0(%r28) + pitlb %r0(%sr4,%r28) tlb_unlock %r20,%r21,%r22 #endif @@ -976,10 +981,12 @@ ENTRY_CFI(flush_icache_page_asm) sync #ifdef CONFIG_PA20 + pdtlb,l %r0(%r28) pitlb,l %r0(%sr4,%r25) #else tlb_lock %r20,%r21,%r22 - pitlb (%sr4,%r25) + pdtlb %r0(%r28) + pitlb %r0(%sr4,%r25) tlb_unlock %r20,%r21,%r22 #endif From f7db0ec9572f66b36c0d4d6bc4b564da53c8b35d Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 23 Nov 2016 22:24:35 +0800 Subject: [PATCH 488/503] dwc_eth_qos: drop duplicate headers Drop duplicate headers types.h and delay.h from dwc_eth_qos.c. Signed-off-by: Geliang Tang Signed-off-by: David S. Miller --- drivers/net/ethernet/synopsys/dwc_eth_qos.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index 5eedac495077..4ba2421e625d 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -33,7 +33,6 @@ #include #include -#include #include #include #include @@ -43,7 +42,6 @@ #include #include -#include #include #include From 89119f08354b628548118cacd686a7700372ad19 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 25 Nov 2016 17:22:27 +0200 Subject: [PATCH 489/503] Revert "i2c: designware: do not disable adapter after transfer" This reverts commit 0317e6c0f1dc1ba86b8d9dccc010c5e77b8355fa. Srinivas reported recently touchscreen and touchpad stopped working in Haswell based machine in Linux 4.9-rc series with timeout errors from i2c_designware: [ 16.508013] i2c_designware INT33C3:00: controller timed out [ 16.508302] i2c_hid i2c-MSFT0001:02: failed to change power setting. [ 17.532016] i2c_designware INT33C3:00: controller timed out [ 18.556022] i2c_designware INT33C3:00: controller timed out [ 18.556315] i2c_hid i2c-ATML1000:00: failed to retrieve report from device. I managed to reproduce similar errors on another Haswell based machine where touchscreen initialization fails maybe in every 1/5 - 1/2 boots. Since root cause for these errors is not clear yet and debugging is ongoing it's better to revert this commit as we are near to release. Reported-by: Srinivas Pandruvada Signed-off-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-core.c | 55 ++++++++---------------- 1 file changed, 18 insertions(+), 37 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c index c53058d6139c..b403fa5ecf49 100644 --- a/drivers/i2c/busses/i2c-designware-core.c +++ b/drivers/i2c/busses/i2c-designware-core.c @@ -91,9 +91,7 @@ DW_IC_INTR_TX_ABRT | \ DW_IC_INTR_STOP_DET) -#define DW_IC_STATUS_ACTIVITY 0x1 -#define DW_IC_STATUS_TFE BIT(2) -#define DW_IC_STATUS_MST_ACTIVITY BIT(5) +#define DW_IC_STATUS_ACTIVITY 0x1 #define DW_IC_SDA_HOLD_RX_SHIFT 16 #define DW_IC_SDA_HOLD_RX_MASK GENMASK(23, DW_IC_SDA_HOLD_RX_SHIFT) @@ -478,25 +476,9 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev) { struct i2c_msg *msgs = dev->msgs; u32 ic_tar = 0; - bool enabled; - enabled = dw_readl(dev, DW_IC_ENABLE_STATUS) & 1; - - if (enabled) { - u32 ic_status; - - /* - * Only disable adapter if ic_tar and ic_con can't be - * dynamically updated - */ - ic_status = dw_readl(dev, DW_IC_STATUS); - if (!dev->dynamic_tar_update_enabled || - (ic_status & DW_IC_STATUS_MST_ACTIVITY) || - !(ic_status & DW_IC_STATUS_TFE)) { - __i2c_dw_enable_and_wait(dev, false); - enabled = false; - } - } + /* Disable the adapter */ + __i2c_dw_enable_and_wait(dev, false); /* if the slave address is ten bit address, enable 10BITADDR */ if (dev->dynamic_tar_update_enabled) { @@ -526,8 +508,8 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev) /* enforce disabled interrupts (due to HW issues) */ i2c_dw_disable_int(dev); - if (!enabled) - __i2c_dw_enable(dev, true); + /* Enable the adapter */ + __i2c_dw_enable(dev, true); /* Clear and enable interrupts */ dw_readl(dev, DW_IC_CLR_INTR); @@ -708,8 +690,7 @@ static int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev) } /* - * Prepare controller for a transaction and start transfer by calling - * i2c_dw_xfer_init() + * Prepare controller for a transaction and call i2c_dw_xfer_msg */ static int i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) @@ -752,6 +733,16 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) goto done; } + /* + * We must disable the adapter before returning and signaling the end + * of the current transfer. Otherwise the hardware might continue + * generating interrupts which in turn causes a race condition with + * the following transfer. Needs some more investigation if the + * additional interrupts are a hardware bug or this driver doesn't + * handle them correctly yet. + */ + __i2c_dw_enable(dev, false); + if (dev->msg_err) { ret = dev->msg_err; goto done; @@ -893,19 +884,9 @@ static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id) */ tx_aborted: - if ((stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET)) - || dev->msg_err) { - /* - * We must disable interruts before returning and signaling - * the end of the current transfer. Otherwise the hardware - * might continue generating interrupts for non-existent - * transfers. - */ - i2c_dw_disable_int(dev); - dw_readl(dev, DW_IC_CLR_INTR); - + if ((stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET)) || dev->msg_err) complete(&dev->cmd_complete); - } else if (unlikely(dev->accessor_flags & ACCESS_INTR_MASK)) { + else if (unlikely(dev->accessor_flags & ACCESS_INTR_MASK)) { /* workaround to trigger pending interrupt */ stat = dw_readl(dev, DW_IC_INTR_MASK); i2c_dw_disable_int(dev); From 686564434e88b67ea8dbbf9150286d04c83bd193 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 25 Nov 2016 00:13:56 +0100 Subject: [PATCH 490/503] MAINTAINERS: Add bug tracking system location entry type Following the kernel Bugzilla discussion during the Kernel Summit (https://lwn.net/Articles/705245/), add bug tracking system location entry type (B) to MAINTAINERS and populate it for several subsystems known to be using the kernel BZ actively (and add the upstream BZ for ACPICA too). Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ad9b965e5e44..8d4148406923 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -77,6 +77,7 @@ Descriptions of section entries: Q: Patchwork web based patch tracking system site T: SCM tree type and location. Type is one of: git, hg, quilt, stgit, topgit + B: Bug tracking system location. S: Status, one of the following: Supported: Someone is actually paid to look after this. Maintained: Someone actually looks after it. @@ -281,6 +282,7 @@ L: linux-acpi@vger.kernel.org W: https://01.org/linux-acpi Q: https://patchwork.kernel.org/project/linux-acpi/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm +B: https://bugzilla.kernel.org S: Supported F: drivers/acpi/ F: drivers/pnp/pnpacpi/ @@ -304,6 +306,8 @@ W: https://acpica.org/ W: https://github.com/acpica/acpica/ Q: https://patchwork.kernel.org/project/linux-acpi/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm +B: https://bugzilla.kernel.org +B: https://bugs.acpica.org S: Supported F: drivers/acpi/acpica/ F: include/acpi/ @@ -313,6 +317,7 @@ ACPI FAN DRIVER M: Zhang Rui L: linux-acpi@vger.kernel.org W: https://01.org/linux-acpi +B: https://bugzilla.kernel.org S: Supported F: drivers/acpi/fan.c @@ -328,6 +333,7 @@ ACPI THERMAL DRIVER M: Zhang Rui L: linux-acpi@vger.kernel.org W: https://01.org/linux-acpi +B: https://bugzilla.kernel.org S: Supported F: drivers/acpi/*thermal* @@ -335,6 +341,7 @@ ACPI VIDEO DRIVER M: Zhang Rui L: linux-acpi@vger.kernel.org W: https://01.org/linux-acpi +B: https://bugzilla.kernel.org S: Supported F: drivers/acpi/acpi_video.c @@ -5663,6 +5670,7 @@ HIBERNATION (aka Software Suspend, aka swsusp) M: "Rafael J. Wysocki" M: Pavel Machek L: linux-pm@vger.kernel.org +B: https://bugzilla.kernel.org S: Supported F: arch/x86/power/ F: drivers/base/power/ @@ -9624,6 +9632,7 @@ POWER MANAGEMENT CORE M: "Rafael J. Wysocki" L: linux-pm@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm +B: https://bugzilla.kernel.org S: Supported F: drivers/base/power/ F: include/linux/pm.h @@ -11613,6 +11622,7 @@ M: "Rafael J. Wysocki" M: Len Brown M: Pavel Machek L: linux-pm@vger.kernel.org +B: https://bugzilla.kernel.org S: Supported F: Documentation/power/ F: arch/x86/kernel/acpi/ From cd3caefb4663e3811d37cc2afad3cce642d60061 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 25 Nov 2016 15:44:47 -0800 Subject: [PATCH 491/503] Fix subtle CONFIG_MODVERSIONS problems CONFIG_MODVERSIONS has been broken for pretty much the whole 4.9 series, and quite frankly, nobody has cared very deeply. We absolutely know how to fix it, and it's not _complicated_, but it's not exactly pretty either. This oneliner fixes it without the ugliness, and allows for further future cleanups. "We've secretly replaced their regular MODVERSIONS with nothing at all, let's see if they notice" Signed-off-by: Linus Torvalds --- init/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/init/Kconfig b/init/Kconfig index 34407f15e6d3..c4fbc1e55c25 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1945,6 +1945,7 @@ config MODULE_FORCE_UNLOAD config MODVERSIONS bool "Module versioning support" + depends on BROKEN help Usually, you have to use modules compiled with your kernel. Saying Y here makes it sometimes possible to use modules From 97db8afa2ab919fc400fe982f5054060868bdf07 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 24 Nov 2016 00:08:13 +0100 Subject: [PATCH 492/503] net: ethernet: mvneta: Remove IFF_UNICAST_FLT which is not implemented The mvneta driver advertises it supports IFF_UNICAST_FLT. However, it actually does not. The hardware probably does support it, but there is no code to configure the filter. As a quick and simple fix, remove the flag. This will cause the core to fall back to promiscuous mode. Signed-off-by: Andrew Lunn Fixes: b50b72de2f2f ("net: mvneta: enable features before registering the driver") Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 5cb07c2017bf..0c0a45af950f 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4151,7 +4151,7 @@ static int mvneta_probe(struct platform_device *pdev) dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; dev->hw_features |= dev->features; dev->vlan_features |= dev->features; - dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; dev->gso_max_segs = MVNETA_MAX_TSO_SEGS; err = register_netdev(dev); From f79675563a6bbfc2ff85684bbbaea9ef092664d2 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 23 Nov 2016 21:05:26 -0500 Subject: [PATCH 493/503] tipc: fix compatibility bug in link monitoring commit 817298102b0b ("tipc: fix link priority propagation") introduced a compatibility problem between TIPC versions newer than Linux 4.6 and those older than Linux 4.4. In versions later than 4.4, link STATE messages only contain a non-zero link priority value when the sender wants the receiver to change its priority. This has the effect that the receiver resets itself in order to apply the new priority. This works well, and is consistent with the said commit. However, in versions older than 4.4 a valid link priority is present in all sent link STATE messages, leading to cyclic link establishment and reset on the 4.6+ node. We fix this by adding a test that the received value should not only be valid, but also differ from the current value in order to cause the receiving link endpoint to reset. Reported-by: Amar Nv Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 1055164c6232..ecc12411155e 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1492,8 +1492,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) l->tolerance = peers_tol; - if (peers_prio && in_range(peers_prio, TIPC_MIN_LINK_PRI, - TIPC_MAX_LINK_PRI)) { + /* Update own prio if peer indicates a different value */ + if ((peers_prio != l->priority) && + in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) { l->priority = peers_prio; rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } From d876a4d2afecacf4b4d8b11479e9f1ed0080bb2e Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 23 Nov 2016 23:46:09 -0500 Subject: [PATCH 494/503] tipc: improve sanity check for received domain records In commit 35c55c9877f8 ("tipc: add neighbor monitoring framework") we added a data area to the link monitor STATE messages under the assumption that previous versions did not use any such data area. For versions older than Linux 4.3 this assumption is not correct. In those version, all STATE messages sent out from a node inadvertently contain a 16 byte data area containing a string; -a leftover from previous RESET messages which were using this during the setup phase. This string serves no purpose in STATE messages, and should no be there. Unfortunately, this data area is delivered to the link monitor framework, where a sanity check catches that it is not a correct domain record, and drops it. It also issues a rate limited warning about the event. Since such events occur much more frequently than anticipated, we now choose to remove the warning in order to not fill the kernel log with useless contents. We also make the sanity check stricter, to further reduce the risk that such data is inavertently admitted. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/monitor.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index ed97a5876ebe..9e109bb1a207 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -455,14 +455,14 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, int i, applied_bef; state->probing = false; - if (!dlen) - return; /* Sanity check received domain record */ - if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) { - pr_warn_ratelimited("Received illegal domain record\n"); + if (dlen < dom_rec_len(arrv_dom, 0)) + return; + if (dlen != dom_rec_len(arrv_dom, new_member_cnt)) + return; + if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) return; - } /* Synch generation numbers with peer if link just came up */ if (!state->synched) { From 8006f6bf5e39f11c697f48df20382b81d2f2f8b8 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Thu, 24 Nov 2016 10:55:06 +0100 Subject: [PATCH 495/503] net: ethtool: don't require CAP_NET_ADMIN for ETHTOOL_GLINKSETTINGS The ETHTOOL_GLINKSETTINGS command is deprecating the ETHTOOL_GSET command and likewise it shouldn't require the CAP_NET_ADMIN capability. Signed-off-by: Miroslav Lichvar Signed-off-by: David S. Miller --- net/core/ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 977489820eb9..047a1752ece1 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2479,6 +2479,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GET_TS_INFO: case ETHTOOL_GEEE: case ETHTOOL_GTUNABLE: + case ETHTOOL_GLINKSETTINGS: break; default: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) From 1f1e70efe53c01844ce76d77c3383c2bcb6beb49 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 24 Nov 2016 14:20:43 +0300 Subject: [PATCH 496/503] fsl/fman: fix a leak in tgec_free() We set "tgec->cfg" to NULL before passing it to kfree(). There is no need to set it to NULL at all. Let's just delete it. Fixes: 57ba4c9b56d8 ("fsl/fman: Add FMan MAC support") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_tgec.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c index efabb04a1ae8..4b0f3a50b293 100644 --- a/drivers/net/ethernet/freescale/fman/fman_tgec.c +++ b/drivers/net/ethernet/freescale/fman/fman_tgec.c @@ -722,9 +722,6 @@ int tgec_free(struct fman_mac *tgec) { free_init_resources(tgec); - if (tgec->cfg) - tgec->cfg = NULL; - kfree(tgec->cfg); kfree(tgec); From 4ee12efa2dbf949d72ef2f7ef2e044af5a67b515 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 24 Nov 2016 21:58:29 +0800 Subject: [PATCH 497/503] ibmvnic: drop duplicate header seq_file.h Drop duplicate header seq_file.h from ibmvnic.c. Signed-off-by: Geliang Tang Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 4f3281a03e7e..0fbf686f5e7c 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -74,7 +74,6 @@ #include #include #include -#include #include #include "ibmvnic.h" From 8f8a8b13b447842b147539ae2cab6699897539b9 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 24 Nov 2016 21:58:32 +0800 Subject: [PATCH 498/503] net: ieee802154: drop duplicate header delay.h Drop duplicate header delay.h from adf7242.c. Signed-off-by: Geliang Tang Acked-by: Stefan Schmidt Signed-off-by: David S. Miller --- drivers/net/ieee802154/adf7242.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index 9fa7ac9f8e68..f355df7cf84a 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include From 5e7dfeb758663391ec721e6a4519d3df874f9b1f Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 24 Nov 2016 21:58:33 +0800 Subject: [PATCH 499/503] net/mlx5: drop duplicate header delay.h Drop duplicate header delay.h from mlx5/core/main.c. Signed-off-by: Geliang Tang Acked-by: Matan Barak Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 3eb931585b3e..3b7c6a9f2b5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #ifdef CONFIG_RFS_ACCEL #include From e8f967c3d88489fc1562a31d4e44d905ac1d3aff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 24 Nov 2016 17:28:12 +0100 Subject: [PATCH 500/503] mvpp2: use correct size for memset gcc-7 detects a short memset in mvpp2, introduced in the original merge of the driver: drivers/net/ethernet/marvell/mvpp2.c: In function 'mvpp2_cls_init': drivers/net/ethernet/marvell/mvpp2.c:3296:2: error: 'memset' used with length equal to number of elements without multiplication by element size [-Werror=memset-elt-size] The result seems to be that we write uninitialized data into the flow table registers, although we did not get any warning about that uninitialized data usage. Using sizeof() lets us initialize then entire array instead. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 60227a3452a4..1026c452e39d 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -3293,7 +3293,7 @@ static void mvpp2_cls_init(struct mvpp2 *priv) mvpp2_write(priv, MVPP2_CLS_MODE_REG, MVPP2_CLS_MODE_ACTIVE_MASK); /* Clear classifier flow table */ - memset(&fe.data, 0, MVPP2_CLS_FLOWS_TBL_DATA_WORDS); + memset(&fe.data, 0, sizeof(fe.data)); for (index = 0; index < MVPP2_CLS_FLOWS_TBL_SIZE; index++) { fe.index = index; mvpp2_cls_flow_write(priv, &fe); From 6998cc6ec23740347670da13186d2979c5401903 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 24 Nov 2016 18:47:07 -0500 Subject: [PATCH 501/503] tipc: resolve connection flow control compatibility problem In commit 10724cc7bb78 ("tipc: redesign connection-level flow control") we replaced the previous message based flow control with one based on 1k blocks. In order to ensure backwards compatibility the mechanism falls back to using message as base unit when it senses that the peer doesn't support the new algorithm. The default flow control window, i.e., how many units can be sent before the sender blocks and waits for an acknowledge (aka advertisement) is 512. This was tested against the previous version, which uses an acknowledge frequency of on ack per 256 received message, and found to work fine. However, we missed the fact that versions older than Linux 3.15 use an acknowledge frequency of 512, which is exactly the limit where a 4.6+ sender will stop and wait for acknowledge. This would also work fine if it weren't for the fact that if the first sent message on a 4.6+ server side is an empty SYNACK, this one is also is counted as a sent message, while it is not counted as a received message on a legacy 3.15-receiver. This leads to the sender always being one step ahead of the receiver, a scenario causing the sender to block after 512 sent messages, while the receiver only has registered 511 read messages. Hence, the legacy receiver is not trigged to send an acknowledge, with a permanently blocked sender as result. We solve this deadlock by simply allowing the sender to send one more message before it blocks, i.e., by a making minimal change to the condition used for determining connection congestion. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index db32777ab591..41f013888f07 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -186,7 +186,7 @@ static struct tipc_sock *tipc_sk(const struct sock *sk) static bool tsk_conn_cong(struct tipc_sock *tsk) { - return tsk->snt_unacked >= tsk->snd_win; + return tsk->snt_unacked > tsk->snd_win; } /* tsk_blocks(): translate a buffer size in bytes to number of From 8e54cadab447dae779f80f79c87cbeaea9594f60 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 26 Nov 2016 20:05:42 -0500 Subject: [PATCH 502/503] fix default_file_splice_read() Botched calculation of number of pages. As the result, we were dropping pieces when doing splice to pipe from e.g. 9p. Reported-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: Al Viro --- fs/splice.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/splice.c b/fs/splice.c index dcaf185a5731..5a7750bd2eea 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -408,7 +408,8 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, if (res <= 0) return -ENOMEM; - nr_pages = res / PAGE_SIZE; + BUG_ON(dummy); + nr_pages = DIV_ROUND_UP(res, PAGE_SIZE); vec = __vec; if (nr_pages > PIPE_DEF_BUFFERS) { From e5517c2a5a49ed5e99047008629f1cd60246ea0e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Nov 2016 13:08:04 -0800 Subject: [PATCH 503/503] Linux 4.9-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0ede48ba5aaf..694111b43cf8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Psychotic Stoned Sheep # *DOCUMENTATION*