diff --git a/.mailmap b/.mailmap index 16b704e1d5d3..9f41b3906b66 100644 --- a/.mailmap +++ b/.mailmap @@ -512,6 +512,7 @@ Praveen BP Pradeep Kumar Chitrapu Prasad Sodagudi Punit Agrawal +Puranjay Mohan Qais Yousef Qais Yousef Quentin Monnet diff --git a/MAINTAINERS b/MAINTAINERS index f6dc90559341..ec0284125e8f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -553,7 +553,7 @@ F: Documentation/devicetree/bindings/iio/accel/adi,adxl345.yaml F: drivers/input/misc/adxl34x.c ADXL355 THREE-AXIS DIGITAL ACCELEROMETER DRIVER -M: Puranjay Mohan +M: Puranjay Mohan L: linux-iio@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml @@ -3714,7 +3714,7 @@ F: drivers/iio/imu/bmi323/ BPF JIT for ARM M: Russell King -M: Puranjay Mohan +M: Puranjay Mohan L: bpf@vger.kernel.org S: Maintained F: arch/arm/net/ @@ -3764,6 +3764,8 @@ X: arch/riscv/net/bpf_jit_comp64.c BPF JIT for RISC-V (64-bit) M: Björn Töpel +R: Pu Lehui +R: Puranjay Mohan L: bpf@vger.kernel.org S: Maintained F: arch/riscv/net/ @@ -4191,7 +4193,6 @@ S: Supported F: drivers/scsi/bnx2i/ BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER -M: Ariel Elior M: Sudarsana Kalluru M: Manish Chopra L: netdev@vger.kernel.org @@ -15160,9 +15161,8 @@ F: drivers/scsi/myrb.* F: drivers/scsi/myrs.* MYRICOM MYRI-10G 10GbE DRIVER (MYRI10GE) -M: Chris Lee L: netdev@vger.kernel.org -S: Supported +S: Orphan W: https://www.cspi.com/ethernet-products/support/downloads/ F: drivers/net/ethernet/myricom/myri10ge/ @@ -17990,7 +17990,6 @@ S: Supported F: drivers/scsi/qedi/ QLOGIC QL4xxx ETHERNET DRIVER -M: Ariel Elior M: Manish Chopra L: netdev@vger.kernel.org S: Supported @@ -18000,7 +17999,6 @@ F: include/linux/qed/ QLOGIC QL4xxx RDMA DRIVER M: Michal Kalderon -M: Ariel Elior L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/qedr/ @@ -21918,7 +21916,7 @@ F: include/linux/soc/ti/ti_sci_inta_msi.h F: include/linux/soc/ti/ti_sci_protocol.h TEXAS INSTRUMENTS' TMP117 TEMPERATURE SENSOR DRIVER -M: Puranjay Mohan +M: Puranjay Mohan L: linux-iio@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/iio/temperature/ti,tmp117.yaml @@ -24459,6 +24457,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har F: Documentation/admin-guide/LSM/Yama.rst F: security/yama/ +YAML NETLINK (YNL) +M: Donald Hunter +M: Jakub Kicinski +F: Documentation/netlink/ +F: Documentation/userspace-api/netlink/intro-specs.rst +F: Documentation/userspace-api/netlink/specs.rst +F: tools/net/ynl/ + YEALINK PHONE DRIVER M: Henk Vergonet L: usbb2k-api-dev@nongnu.org diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 1d672457d02f..72b5cd697f5d 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -871,16 +871,11 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], } /* dst = src (4 bytes)*/ -static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off, - struct jit_ctx *ctx) { +static inline void emit_a32_mov_r(const s8 dst, const s8 src, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; s8 rt; rt = arm_bpf_get_reg32(src, tmp[0], ctx); - if (off && off != 32) { - emit(ARM_LSL_I(rt, rt, 32 - off), ctx); - emit(ARM_ASR_I(rt, rt, 32 - off), ctx); - } arm_bpf_put_reg32(dst, rt, ctx); } @@ -889,15 +884,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { if (!is64) { - emit_a32_mov_r(dst_lo, src_lo, 0, ctx); + emit_a32_mov_r(dst_lo, src_lo, ctx); if (!ctx->prog->aux->verifier_zext) /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); } else if (__LINUX_ARM_ARCH__ < 6 && ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { /* complete 8 byte move */ - emit_a32_mov_r(dst_lo, src_lo, 0, ctx); - emit_a32_mov_r(dst_hi, src_hi, 0, ctx); + emit_a32_mov_r(dst_lo, src_lo, ctx); + emit_a32_mov_r(dst_hi, src_hi, ctx); } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { const u8 *tmp = bpf2a32[TMP_REG_1]; @@ -917,17 +912,52 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; - const s8 *rt; + s8 rs; + s8 rd; - rt = arm_bpf_get_reg64(dst, tmp, ctx); + if (is_stacked(dst_lo)) + rd = tmp[1]; + else + rd = dst_lo; + rs = arm_bpf_get_reg32(src_lo, rd, ctx); + /* rs may be one of src[1], dst[1], or tmp[1] */ + + /* Sign extend rs if needed. If off == 32, lower 32-bits of src are moved to dst and sign + * extension only happens in the upper 64 bits. + */ + if (off != 32) { + /* Sign extend rs into rd */ + emit(ARM_LSL_I(rd, rs, 32 - off), ctx); + emit(ARM_ASR_I(rd, rd, 32 - off), ctx); + } else { + rd = rs; + } + + /* Write rd to dst_lo + * + * Optimization: + * Assume: + * 1. dst == src and stacked. + * 2. off == 32 + * + * In this case src_lo was loaded into rd(tmp[1]) but rd was not sign extended as off==32. + * So, we don't need to write rd back to dst_lo as they have the same value. + * This saves us one str instruction. + */ + if (dst_lo != src_lo || off != 32) + arm_bpf_put_reg32(dst_lo, rd, ctx); - emit_a32_mov_r(dst_lo, src_lo, off, ctx); if (!is64) { if (!ctx->prog->aux->verifier_zext) /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); } else { - emit(ARM_ASR_I(rt[0], rt[1], 31), ctx); + if (is_stacked(dst_hi)) { + emit(ARM_ASR_I(tmp[0], rd, 31), ctx); + arm_bpf_put_reg32(dst_hi, tmp[0], ctx); + } else { + emit(ARM_ASR_I(dst_hi, rd, 31), ctx); + } } } diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 122021f9bdfc..13eac43c632d 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1844,15 +1844,15 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, emit_call(enter_prog, ctx); + /* save return value to callee saved register x20 */ + emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); + /* if (__bpf_prog_enter(prog) == 0) * goto skip_exec_of_prog; */ branch = ctx->image + ctx->idx; emit(A64_NOP, ctx); - /* save return value to callee saved register x20 */ - emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); - emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); if (!p->jited) emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 1adf2f39ce59..ec9d692838fc 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -722,6 +722,9 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of if (ret) return ret; + /* store prog start time */ + emit_mv(RV_REG_S1, RV_REG_A0, ctx); + /* if (__bpf_prog_enter(prog) == 0) * goto skip_exec_of_prog; */ @@ -729,9 +732,6 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of /* nop reserved for conditional jump */ emit(rv_nop(), ctx); - /* store prog start time */ - emit_mv(RV_REG_S1, RV_REG_A0, ctx); - /* arg1: &args_off */ emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx); if (!p->jited) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index df5fac428408..59cbc94b6e69 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1807,36 +1807,41 @@ populate_extable: if (BPF_MODE(insn->code) == BPF_PROBE_MEM || BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { /* Conservatively check that src_reg + insn->off is a kernel address: - * src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE - * src_reg is used as scratch for src_reg += insn->off and restored - * after emit_ldx if necessary + * src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE + * and + * src_reg + insn->off < VSYSCALL_ADDR */ - u64 limit = TASK_SIZE_MAX + PAGE_SIZE; + u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR; u8 *end_of_jmp; - /* At end of these emitted checks, insn->off will have been added - * to src_reg, so no need to do relative load with insn->off offset - */ - insn_off = 0; + /* movabsq r10, VSYSCALL_ADDR */ + emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32, + (u32)(long)VSYSCALL_ADDR); - /* movabsq r11, limit */ - EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG)); - EMIT((u32)limit, 4); - EMIT(limit >> 32, 4); + /* mov src_reg, r11 */ + EMIT_mov(AUX_REG, src_reg); if (insn->off) { - /* add src_reg, insn->off */ - maybe_emit_1mod(&prog, src_reg, true); - EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off); + /* add r11, insn->off */ + maybe_emit_1mod(&prog, AUX_REG, true); + EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off); } - /* cmp src_reg, r11 */ - maybe_emit_mod(&prog, src_reg, AUX_REG, true); - EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG)); + /* sub r11, r10 */ + maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true); + EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX)); - /* if unsigned '>=', goto load */ - EMIT2(X86_JAE, 0); + /* movabsq r10, limit */ + emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32, + (u32)(long)limit); + + /* cmp r10, r11 */ + maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true); + EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX)); + + /* if unsigned '>', goto load */ + EMIT2(X86_JA, 0); end_of_jmp = prog; /* xor dst_reg, dst_reg */ @@ -1862,18 +1867,6 @@ populate_extable: /* populate jmp_offset for JMP above */ start_of_ldx[-1] = prog - start_of_ldx; - if (insn->off && src_reg != dst_reg) { - /* sub src_reg, insn->off - * Restore src_reg after "add src_reg, insn->off" in prev - * if statement. But if src_reg == dst_reg, emit_ldx - * above already clobbered src_reg, so no need to restore. - * If add src_reg, insn->off was unnecessary, no need to - * restore either. - */ - maybe_emit_1mod(&prog, src_reg, true); - EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off); - } - if (!bpf_prog->aux->extable) break; @@ -3473,3 +3466,9 @@ bool bpf_jit_supports_ptr_xchg(void) { return true; } + +/* x86-64 JIT emits its own code to filter user addresses so return 0 here */ +u64 bpf_arch_uaddress_limit(void) +{ + return 0; +} diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 59b5dd0e2f41..14daf432f30b 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5705,7 +5705,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6141, .family = MV88E6XXX_FAMILY_6341, .name = "Marvell 88E6141", - .num_databases = 4096, + .num_databases = 256, .num_macs = 2048, .num_ports = 6, .num_internal_phys = 5, @@ -6164,7 +6164,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6341, .family = MV88E6XXX_FAMILY_6341, .name = "Marvell 88E6341", - .num_databases = 4096, + .num_databases = 256, .num_macs = 2048, .num_internal_phys = 5, .num_ports = 6, diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index b1f84b37032a..c7e7dac057a3 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2,7 +2,7 @@ /* * Broadcom GENET (Gigabit Ethernet) controller driver * - * Copyright (c) 2014-2020 Broadcom + * Copyright (c) 2014-2024 Broadcom */ #define pr_fmt(fmt) "bcmgenet: " fmt @@ -2467,14 +2467,18 @@ static void umac_enable_set(struct bcmgenet_priv *priv, u32 mask, bool enable) { u32 reg; + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); - if (reg & CMD_SW_RESET) + if (reg & CMD_SW_RESET) { + spin_unlock_bh(&priv->reg_lock); return; + } if (enable) reg |= mask; else reg &= ~mask; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); /* UniMAC stops on a packet boundary, wait for a full-size packet * to be processed @@ -2490,8 +2494,10 @@ static void reset_umac(struct bcmgenet_priv *priv) udelay(10); /* issue soft reset and disable MAC while updating its registers */ + spin_lock_bh(&priv->reg_lock); bcmgenet_umac_writel(priv, CMD_SW_RESET, UMAC_CMD); udelay(2); + spin_unlock_bh(&priv->reg_lock); } static void bcmgenet_intr_disable(struct bcmgenet_priv *priv) @@ -3334,7 +3340,9 @@ static void bcmgenet_netif_start(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); /* Start the network engine */ + netif_addr_lock_bh(dev); bcmgenet_set_rx_mode(dev); + netif_addr_unlock_bh(dev); bcmgenet_enable_rx_napi(priv); umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true); @@ -3595,16 +3603,19 @@ static void bcmgenet_set_rx_mode(struct net_device *dev) * 3. The number of filters needed exceeds the number filters * supported by the hardware. */ + spin_lock(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); if ((dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) || (nfilter > MAX_MDF_FILTER)) { reg |= CMD_PROMISC; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock(&priv->reg_lock); bcmgenet_umac_writel(priv, 0, UMAC_MDF_CTRL); return; } else { reg &= ~CMD_PROMISC; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock(&priv->reg_lock); } /* update MDF filter */ @@ -4003,6 +4014,7 @@ static int bcmgenet_probe(struct platform_device *pdev) goto err; } + spin_lock_init(&priv->reg_lock); spin_lock_init(&priv->lock); /* Set default pause parameters */ diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 7523b60b3c1c..43b923c48b14 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2014-2020 Broadcom + * Copyright (c) 2014-2024 Broadcom */ #ifndef __BCMGENET_H__ @@ -573,6 +573,8 @@ struct bcmgenet_rxnfc_rule { /* device context */ struct bcmgenet_priv { void __iomem *base; + /* reg_lock: lock to serialize access to shared registers */ + spinlock_t reg_lock; enum bcmgenet_version version; struct net_device *dev; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 7a41cad5788f..1248792d7fd4 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -2,7 +2,7 @@ /* * Broadcom GENET (Gigabit Ethernet) Wake-on-LAN support * - * Copyright (c) 2014-2020 Broadcom + * Copyright (c) 2014-2024 Broadcom */ #define pr_fmt(fmt) "bcmgenet_wol: " fmt @@ -151,6 +151,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, } /* Can't suspend with WoL if MAC is still in reset */ + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); if (reg & CMD_SW_RESET) reg &= ~CMD_SW_RESET; @@ -158,6 +159,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, /* disable RX */ reg &= ~CMD_RX_EN; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); mdelay(10); if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) { @@ -203,6 +205,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, } /* Enable CRC forward */ + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); priv->crc_fwd_en = 1; reg |= CMD_CRC_FWD; @@ -210,6 +213,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, /* Receiver must be enabled for WOL MP detection */ reg |= CMD_RX_EN; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); reg = UMAC_IRQ_MPD_R; if (hfb_enable) @@ -256,7 +260,9 @@ void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, } /* Disable CRC Forward */ + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); reg &= ~CMD_CRC_FWD; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); } diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 9ada89355747..c4a3698cef66 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -2,7 +2,7 @@ /* * Broadcom GENET MDIO routines * - * Copyright (c) 2014-2017 Broadcom + * Copyright (c) 2014-2024 Broadcom */ #include @@ -76,6 +76,7 @@ static void bcmgenet_mac_config(struct net_device *dev) reg |= RGMII_LINK; bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); reg &= ~((CMD_SPEED_MASK << CMD_SPEED_SHIFT) | CMD_HD_EN | @@ -88,6 +89,7 @@ static void bcmgenet_mac_config(struct net_device *dev) reg |= CMD_TX_EN | CMD_RX_EN; } bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); active = phy_init_eee(phydev, 0) >= 0; bcmgenet_eee_enable_set(dev, @@ -275,6 +277,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) * block for the interface to work, unconditionally clear the * Out-of-band disable since we do not need it. */ + mutex_lock(&phydev->lock); reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); reg &= ~OOB_DISABLE; if (priv->ext_phy) { @@ -286,6 +289,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) reg |= RGMII_MODE_EN; } bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); + mutex_unlock(&phydev->lock); if (init) dev_info(kdev, "configuring instance for %s\n", phy_name); diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c index 7246e13dd559..97291bfbeea5 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c +++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c @@ -312,7 +312,7 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf, void *kern_buf; /* Copy the user space buf */ - kern_buf = memdup_user(buf, nbytes); + kern_buf = memdup_user_nul(buf, nbytes); if (IS_ERR(kern_buf)) return PTR_ERR(kern_buf); @@ -372,7 +372,7 @@ bnad_debugfs_write_regwr(struct file *file, const char __user *buf, void *kern_buf; /* Copy the user space buf */ - kern_buf = memdup_user(buf, nbytes); + kern_buf = memdup_user_nul(buf, nbytes); if (IS_ERR(kern_buf)) return PTR_ERR(kern_buf); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 49d5808b7d11..de52bcb884c4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2670,12 +2670,12 @@ int cxgb4_selftest_lb_pkt(struct net_device *netdev) lb->loopback = 1; q = &adap->sge.ethtxq[pi->first_qset]; - __netif_tx_lock(q->txq, smp_processor_id()); + __netif_tx_lock_bh(q->txq); reclaim_completed_tx(adap, &q->q, -1, true); credits = txq_avail(&q->q) - ndesc; if (unlikely(credits < 0)) { - __netif_tx_unlock(q->txq); + __netif_tx_unlock_bh(q->txq); return -ENOMEM; } @@ -2710,7 +2710,7 @@ int cxgb4_selftest_lb_pkt(struct net_device *netdev) init_completion(&lb->completion); txq_advance(&q->q, ndesc); cxgb4_ring_tx_db(adap, &q->q, ndesc); - __netif_tx_unlock(q->txq); + __netif_tx_unlock_bh(q->txq); /* wait for the pkt to return */ ret = wait_for_completion_timeout(&lb->completion, 10 * HZ); diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index 93544f1cc2a5..f7ae0e0aa4a4 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -157,7 +157,7 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) * the lower time out */ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { - usleep_range(50, 60); + udelay(50); mdic = er32(MDIC); if (mdic & E1000_MDIC_READY) break; @@ -181,7 +181,7 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) * reading duplicate data in the next MDIC transaction. */ if (hw->mac.type == e1000_pch2lan) - usleep_range(100, 150); + udelay(100); if (success) { *data = (u16)mdic; @@ -237,7 +237,7 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) * the lower time out */ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { - usleep_range(50, 60); + udelay(50); mdic = er32(MDIC); if (mdic & E1000_MDIC_READY) break; @@ -261,7 +261,7 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) * reading duplicate data in the next MDIC transaction. */ if (hw->mac.type == e1000_pch2lan) - usleep_range(100, 150); + udelay(100); if (success) return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_debugfs.c b/drivers/net/ethernet/intel/ice/ice_debugfs.c index d252d98218d0..9fc0fd95a13d 100644 --- a/drivers/net/ethernet/intel/ice/ice_debugfs.c +++ b/drivers/net/ethernet/intel/ice/ice_debugfs.c @@ -171,7 +171,7 @@ ice_debugfs_module_write(struct file *filp, const char __user *buf, if (*ppos != 0 || count > 8) return -EINVAL; - cmd_buf = memdup_user(buf, count); + cmd_buf = memdup_user_nul(buf, count); if (IS_ERR(cmd_buf)) return PTR_ERR(cmd_buf); @@ -257,7 +257,7 @@ ice_debugfs_nr_messages_write(struct file *filp, const char __user *buf, if (*ppos != 0 || count > 4) return -EINVAL; - cmd_buf = memdup_user(buf, count); + cmd_buf = memdup_user_nul(buf, count); if (IS_ERR(cmd_buf)) return PTR_ERR(cmd_buf); @@ -332,7 +332,7 @@ ice_debugfs_enable_write(struct file *filp, const char __user *buf, if (*ppos != 0 || count > 2) return -EINVAL; - cmd_buf = memdup_user(buf, count); + cmd_buf = memdup_user_nul(buf, count); if (IS_ERR(cmd_buf)) return PTR_ERR(cmd_buf); @@ -428,7 +428,7 @@ ice_debugfs_log_size_write(struct file *filp, const char __user *buf, if (*ppos != 0 || count > 5) return -EINVAL; - cmd_buf = memdup_user(buf, count); + cmd_buf = memdup_user_nul(buf, count); if (IS_ERR(cmd_buf)) return PTR_ERR(cmd_buf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 2500f5ba4f5a..881d704644fb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -999,12 +999,10 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp, u16 pcifunc; int ret, lf; - cmd_buf = memdup_user(buffer, count + 1); + cmd_buf = memdup_user_nul(buffer, count); if (IS_ERR(cmd_buf)) return -ENOMEM; - cmd_buf[count] = '\0'; - cmd_buf_tmp = strchr(cmd_buf, '\n'); if (cmd_buf_tmp) { *cmd_buf_tmp = '\0'; diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index a5ac21a0ee33..cb6b33a228ea 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1868,8 +1868,8 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, struct flow_cls_offload *f) { struct qede_arfs_fltr_node *n; - int min_hlen, rc = -EINVAL; struct qede_arfs_tuple t; + int min_hlen, rc; __qede_lock(edev); @@ -1879,7 +1879,8 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, } /* parse flower attribute and prepare filter */ - if (qede_parse_flow_attr(edev, proto, f->rule, &t)) + rc = qede_parse_flow_attr(edev, proto, f->rule, &t); + if (rc) goto unlock; /* Validate profile mode and number of filters */ @@ -1888,11 +1889,13 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, DP_NOTICE(edev, "Filter configuration invalidated, filter mode=0x%x, configured mode=0x%x, filter count=0x%x\n", t.mode, edev->arfs->mode, edev->arfs->filter_count); + rc = -EINVAL; goto unlock; } /* parse tc actions and get the vf_id */ - if (qede_parse_actions(edev, &f->rule->action, f->common.extack)) + rc = qede_parse_actions(edev, &f->rule->action, f->common.extack); + if (rc) goto unlock; if (qede_flow_find_fltr(edev, &t)) { @@ -1998,10 +2001,9 @@ static int qede_flow_spec_to_rule(struct qede_dev *edev, if (IS_ERR(flow)) return PTR_ERR(flow); - if (qede_parse_flow_attr(edev, proto, flow->rule, t)) { - err = -EINVAL; + err = qede_parse_flow_attr(edev, proto, flow->rule, t); + if (err) goto err_out; - } /* Make sure location is valid and filter isn't already set */ err = qede_flow_spec_validate(edev, &flow->rule->action, t, diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index ba319fc21957..3a9148fb1422 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1674,6 +1674,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) bool raw_proto = false; void *oiph; __be32 vni = 0; + int nh; /* Need UDP and VXLAN header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) @@ -1762,12 +1763,28 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) skb->pkt_type = PACKET_HOST; } - oiph = skb_network_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(vxlan->dev, rx_length_errors); + DEV_STATS_INC(vxlan->dev, rx_errors); + vxlan_vnifilter_count(vxlan, vni, vninode, + VXLAN_VNI_STATS_RX_ERRORS, 0); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (!vxlan_ecn_decapsulate(vs, oiph, skb)) { - ++vxlan->dev->stats.rx_frame_errors; - ++vxlan->dev->stats.rx_errors; + DEV_STATS_INC(vxlan->dev, rx_frame_errors); + DEV_STATS_INC(vxlan->dev, rx_errors); vxlan_vnifilter_count(vxlan, vni, vninode, VXLAN_VNI_STATS_RX_ERRORS, 0); goto drop; @@ -1837,7 +1854,9 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) goto out; if (!pskb_may_pull(skb, arp_hdr_len(dev))) { - dev->stats.tx_dropped++; + dev_core_stats_tx_dropped_inc(dev); + vxlan_vnifilter_count(vxlan, vni, NULL, + VXLAN_VNI_STATS_TX_DROPS, 0); goto out; } parp = arp_hdr(skb); @@ -1893,7 +1912,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) reply->pkt_type = PACKET_HOST; if (netif_rx(reply) == NET_RX_DROP) { - dev->stats.rx_dropped++; + dev_core_stats_rx_dropped_inc(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_RX_DROPS, 0); } @@ -2052,7 +2071,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) goto out; if (netif_rx(reply) == NET_RX_DROP) { - dev->stats.rx_dropped++; + dev_core_stats_rx_dropped_inc(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_RX_DROPS, 0); } @@ -2263,7 +2282,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, len); } else { drop: - dev->stats.rx_dropped++; + dev_core_stats_rx_dropped_inc(dev); vxlan_vnifilter_count(dst_vxlan, vni, NULL, VXLAN_VNI_STATS_RX_DROPS, 0); } @@ -2295,7 +2314,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, addr_family, dst_port, vxlan->cfg.flags); if (!dst_vxlan) { - dev->stats.tx_errors++; + DEV_STATS_INC(dev, tx_errors); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_ERRORS, 0); kfree_skb(skb); @@ -2559,7 +2578,7 @@ out_unlock: return; drop: - dev->stats.tx_dropped++; + dev_core_stats_tx_dropped_inc(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); dev_kfree_skb(skb); return; @@ -2567,11 +2586,11 @@ drop: tx_error: rcu_read_unlock(); if (err == -ELOOP) - dev->stats.collisions++; + DEV_STATS_INC(dev, collisions); else if (err == -ENETUNREACH) - dev->stats.tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); dst_release(ndst); - dev->stats.tx_errors++; + DEV_STATS_INC(dev, tx_errors); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_ERRORS, 0); kfree_skb(skb); } @@ -2604,7 +2623,7 @@ static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev, return; drop: - dev->stats.tx_dropped++; + dev_core_stats_tx_dropped_inc(dev); vxlan_vnifilter_count(netdev_priv(dev), vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); dev_kfree_skb(skb); @@ -2642,7 +2661,7 @@ static netdev_tx_t vxlan_xmit_nhid(struct sk_buff *skb, struct net_device *dev, return NETDEV_TX_OK; drop: - dev->stats.tx_dropped++; + dev_core_stats_tx_dropped_inc(dev); vxlan_vnifilter_count(netdev_priv(dev), vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); dev_kfree_skb(skb); @@ -2739,7 +2758,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) !is_multicast_ether_addr(eth->h_dest)) vxlan_fdb_miss(vxlan, eth->h_dest); - dev->stats.tx_dropped++; + dev_core_stats_tx_dropped_inc(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); kfree_skb(skb); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index f0b8b709649f..a3adaec5504e 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -364,22 +364,6 @@ out: return rc; } -static int qeth_alloc_cq(struct qeth_card *card) -{ - if (card->options.cq == QETH_CQ_ENABLED) { - QETH_CARD_TEXT(card, 2, "cqon"); - card->qdio.c_q = qeth_alloc_qdio_queue(); - if (!card->qdio.c_q) { - dev_err(&card->gdev->dev, "Failed to create completion queue\n"); - return -ENOMEM; - } - } else { - QETH_CARD_TEXT(card, 2, "nocq"); - card->qdio.c_q = NULL; - } - return 0; -} - static void qeth_free_cq(struct qeth_card *card) { if (card->qdio.c_q) { @@ -388,6 +372,25 @@ static void qeth_free_cq(struct qeth_card *card) } } +static int qeth_alloc_cq(struct qeth_card *card) +{ + if (card->options.cq == QETH_CQ_ENABLED) { + QETH_CARD_TEXT(card, 2, "cqon"); + if (!card->qdio.c_q) { + card->qdio.c_q = qeth_alloc_qdio_queue(); + if (!card->qdio.c_q) { + dev_err(&card->gdev->dev, + "Failed to create completion queue\n"); + return -ENOMEM; + } + } + } else { + QETH_CARD_TEXT(card, 2, "nocq"); + qeth_free_cq(card); + } + return 0; +} + static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15, int delayed) { @@ -2628,6 +2631,10 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) QETH_CARD_TEXT(card, 2, "allcqdbf"); + /* completion */ + if (qeth_alloc_cq(card)) + goto out_err; + if (atomic_cmpxchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED, QETH_QDIO_ALLOCATED) != QETH_QDIO_UNINITIALIZED) return 0; @@ -2663,10 +2670,6 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) queue->priority = QETH_QIB_PQUE_PRIO_DEFAULT; } - /* completion */ - if (qeth_alloc_cq(card)) - goto out_freeoutq; - return 0; out_freeoutq: @@ -2677,6 +2680,8 @@ out_freeoutq: qeth_free_buffer_pool(card); out_buffer_pool: atomic_set(&card->qdio.state, QETH_QDIO_UNINITIALIZED); + qeth_free_cq(card); +out_err: return -ENOMEM; } @@ -2684,11 +2689,12 @@ static void qeth_free_qdio_queues(struct qeth_card *card) { int i, j; + qeth_free_cq(card); + if (atomic_xchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED) == QETH_QDIO_UNINITIALIZED) return; - qeth_free_cq(card); for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { if (card->qdio.in_q->bufs[j].rx_skb) { consume_skb(card->qdio.in_q->bufs[j].rx_skb); @@ -3742,24 +3748,11 @@ static void qeth_qdio_poll(struct ccw_device *cdev, unsigned long card_ptr) int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq) { - int rc; - - if (card->options.cq == QETH_CQ_NOTAVAILABLE) { - rc = -1; - goto out; - } else { - if (card->options.cq == cq) { - rc = 0; - goto out; - } - - qeth_free_qdio_queues(card); - card->options.cq = cq; - rc = 0; - } -out: - return rc; + if (card->options.cq == QETH_CQ_NOTAVAILABLE) + return -1; + card->options.cq = cq; + return 0; } EXPORT_SYMBOL_GPL(qeth_configure_cq); diff --git a/include/linux/filter.h b/include/linux/filter.h index c99bc3df2d28..219ee7a76874 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -963,6 +963,7 @@ bool bpf_jit_supports_far_kfunc_call(void); bool bpf_jit_supports_exceptions(void); bool bpf_jit_supports_ptr_xchg(void); bool bpf_jit_supports_arena(void); +u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e65ec3fd2799..a509caf823d6 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -461,10 +461,12 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock) { + read_lock_bh(&sk->sk_callback_lock); if (psock->saved_data_ready) psock->saved_data_ready(sk); else sk->sk_data_ready(sk); + read_unlock_bh(&sk->sk_callback_lock); } static inline void psock_set_prog(struct bpf_prog **pprog, diff --git a/include/net/gro.h b/include/net/gro.h index 50f1e403dbbb..c1d4ca0463a1 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -87,6 +87,15 @@ struct napi_gro_cb { /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; + + /* L3 offsets */ + union { + struct { + u16 network_offset; + u16 inner_network_offset; + }; + u16 network_offsets[2]; + }; }; #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 696bc55de8e8..1ea5ce5bb599 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2942,6 +2942,15 @@ bool __weak bpf_jit_supports_arena(void) return false; } +u64 __weak bpf_arch_uaddress_limit(void) +{ +#if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) + return TASK_SIZE; +#else + return 0; +#endif +} + /* Return TRUE if the JIT backend satisfies the following two conditions: * 1) JIT backend supports atomic_xchg() on pointer-sized words. * 2) Under the specific arch, the implementation of xchg() is the same diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 98188379d5c7..cb7ad1f795e1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18289,8 +18289,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) f = fdget(fd); map = __bpf_map_get(f); if (IS_ERR(map)) { - verbose(env, "fd %d is not pointing to valid bpf_map\n", - insn[0].imm); + verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); return PTR_ERR(map); } @@ -19676,6 +19675,36 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto next_insn; } + /* Make it impossible to de-reference a userspace address */ + if (BPF_CLASS(insn->code) == BPF_LDX && + (BPF_MODE(insn->code) == BPF_PROBE_MEM || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) { + struct bpf_insn *patch = &insn_buf[0]; + u64 uaddress_limit = bpf_arch_uaddress_limit(); + + if (!uaddress_limit) + goto next_insn; + + *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); + if (insn->off) + *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off); + *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32); + *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2); + *patch++ = *insn; + *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); + *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0); + + cnt = patch - insn_buf; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; + } + /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */ if (BPF_CLASS(insn->code) == BPF_LD && (BPF_MODE(insn->code) == BPF_ABS || diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c63a5fbf1f1c..291185f54ee4 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -375,7 +375,7 @@ config DEBUG_INFO_SPLIT Incompatible with older versions of ccache. config DEBUG_INFO_BTF - bool "Generate BTF typeinfo" + bool "Generate BTF type information" depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST depends on BPF_SYSCALL @@ -408,7 +408,8 @@ config PAHOLE_HAS_LANG_EXCLUDE using DEBUG_INFO_BTF_MODULES. config DEBUG_INFO_BTF_MODULES - def_bool y + bool "Generate BTF type information for kernel modules" + default y depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF help Generate compact split BTF type information for kernel modules. diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 68b45c82c37a..7bc2220fea80 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -1124,7 +1124,7 @@ static ssize_t extract_user_to_sg(struct iov_iter *iter, do { res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max, extraction_flags, &off); - if (res < 0) + if (res <= 0) goto failed; len = res; diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index f00158234505..9404dd551dfd 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -478,6 +478,8 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head, if (unlikely(!vhdr)) goto out; + NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = hlen; + type = vhdr->h_vlan_encapsulated_proto; ptype = gro_find_receive_by_type(type); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 7431f89e897b..d7c35f55bd69 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -266,7 +266,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, if (skb->dev == p->dev && ether_addr_equal(src, addr)) return; - skb = skb_copy(skb, GFP_ATOMIC); + skb = pskb_copy(skb, GFP_ATOMIC); if (!skb) { DEV_STATS_INC(dev, tx_dropped); return; diff --git a/net/core/filter.c b/net/core/filter.c index 8adf95765cdd..ae5254f712c9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4360,10 +4360,12 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + u32 flags = ri->flags; struct bpf_map *map; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; if (unlikely(!xdpf)) { @@ -4375,11 +4377,20 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); err = dev_map_enqueue_multi(xdpf, dev, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + flags & BPF_F_EXCLUDE_INGRESS); } else { err = dev_map_enqueue(fwd, xdpf, dev); } @@ -4442,9 +4453,9 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect_frame); static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, - void *fwd, - enum bpf_map_type map_type, u32 map_id) + struct bpf_prog *xdp_prog, void *fwd, + enum bpf_map_type map_type, u32 map_id, + u32 flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); struct bpf_map *map; @@ -4454,11 +4465,20 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); err = dev_map_redirect_multi(dev, skb, xdp_prog, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + flags & BPF_F_EXCLUDE_INGRESS); } else { err = dev_map_generic_redirect(fwd, skb, xdp_prog); } @@ -4495,9 +4515,11 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + u32 flags = ri->flags; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) { @@ -4517,7 +4539,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, return 0; } - return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id); + return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags); err: _trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err); return err; diff --git a/net/core/gro.c b/net/core/gro.c index 83f35d99a682..c7901253a1a8 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -371,6 +371,7 @@ static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) const skb_frag_t *frag0; unsigned int headlen; + NAPI_GRO_CB(skb)->network_offset = 0; NAPI_GRO_CB(skb)->data_offset = 0; headlen = skb_headlen(skb); NAPI_GRO_CB(skb)->frag0 = skb->data; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b99127712e67..4096e679f61c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2123,11 +2123,17 @@ static inline int skb_alloc_rx_flag(const struct sk_buff *skb) struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { - int headerlen = skb_headroom(skb); - unsigned int size = skb_end_offset(skb) + skb->data_len; - struct sk_buff *n = __alloc_skb(size, gfp_mask, - skb_alloc_rx_flag(skb), NUMA_NO_NODE); + struct sk_buff *n; + unsigned int size; + int headerlen; + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) + return NULL; + + headerlen = skb_headroom(skb); + size = skb_end_offset(skb) + skb->data_len; + n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; @@ -2455,12 +2461,17 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, /* * Allocate the copy buffer */ - struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask, skb_alloc_rx_flag(skb), - NUMA_NO_NODE); - int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; + struct sk_buff *n; + int oldheadroom; + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) + return NULL; + + oldheadroom = skb_headroom(skb); + n = __alloc_skb(newheadroom + skb->len + newtailroom, + gfp_mask, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); if (!n) return NULL; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 4d75ef9d24bf..fd20aae30be2 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1226,11 +1226,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) { - read_lock_bh(&sk->sk_callback_lock); + if (psock) sk_psock_data_ready(sk, psock); - read_unlock_bh(&sk->sk_callback_lock); - } rcu_read_unlock(); } } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 55bd72997b31..fafb123f798b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1572,6 +1572,7 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) /* The above will be needed by the transport layer if there is one * immediately following this IP hdr. */ + NAPI_GRO_CB(skb)->inner_network_offset = off; /* Note : No need to call skb_gro_postpull_rcsum() here, * as we already checked checksum over ipv4 header was 0 diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1fe794967211..39229fd0601a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1473,7 +1473,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, * by icmp_hdr(skb)->type. */ if (sk->sk_type == SOCK_RAW && - !inet_test_bit(HDRINCL, sk)) + !(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH)) icmp_type = fl4->fl4_icmp_type; else icmp_type = icmp_hdr(skb)->type; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dcb11f22cbf2..4cb43401e0e0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -612,6 +612,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0, sk->sk_uid); + fl4.fl4_icmp_type = 0; + fl4.fl4_icmp_code = 0; + if (!hdrincl) { rfv.msg = msg; rfv.hlen = 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 420905be5f30..b32cf2eeeb41 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -532,7 +532,8 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { - const struct iphdr *iph = ip_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct iphdr *iph = (struct iphdr *)(skb->data + offset); struct net *net = dev_net(skb->dev); int iif, sdif; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 3498dd1d0694..8721fe5beca2 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -471,6 +471,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *p; unsigned int ulen; int ret = 0; + int flush; /* requires non zero csum, for symmetry with GSO */ if (!uh->check) { @@ -504,13 +505,22 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, return p; } + flush = NAPI_GRO_CB(p)->flush; + + if (NAPI_GRO_CB(p)->flush_id != 1 || + NAPI_GRO_CB(p)->count != 1 || + !NAPI_GRO_CB(p)->is_atomic) + flush |= NAPI_GRO_CB(p)->flush_id; + else + NAPI_GRO_CB(p)->is_atomic = false; + /* Terminate the flow on len mismatch or if it grow "too much". * Under small packet flood GRO count could elsewhere grow a lot * leading to excessive truesize values. * On len mismatch merge the first packet shorter than gso_size, * otherwise complete the GRO packet. */ - if (ulen > ntohs(uh2->len)) { + if (ulen > ntohs(uh2->len) || flush) { pp = p; } else { if (NAPI_GRO_CB(skb)->is_flist) { @@ -718,7 +728,8 @@ EXPORT_SYMBOL(udp_gro_complete); INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) { - const struct iphdr *iph = ip_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct iphdr *iph = (struct iphdr *)(skb->data + offset); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); /* do fraglist only if there is no outer UDP encap (or we already processed it) */ diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index b41e35af69ea..c8b909a9904f 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -237,6 +237,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, goto out; skb_set_network_header(skb, off); + NAPI_GRO_CB(skb)->inner_network_offset = off; flush += ntohs(iph->payload_len) != skb->len - hlen; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1a4cccdd40c9..8f7aa8bac1e7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -272,7 +272,8 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { - const struct ipv6hdr *iph = ipv6_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset); struct net *net = dev_net(skb->dev); int iif, sdif; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index bbd347de00b4..b41152dd4246 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -164,7 +164,8 @@ flush: INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + offset); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); /* do fraglist only if there is no outer UDP encap (or we already processed it) */ diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 39e487ccc468..8ba00ad433c2 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -127,6 +127,9 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, /* checksums verified by L2TP */ skb->ip_summed = CHECKSUM_NONE; + /* drop outer flow-hash */ + skb_clear_hash(skb); + skb_dst_drop(skb); nf_reset_ct(skb); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7e74b812e366..965eb69dc5de 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3723,6 +3723,9 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT); mptcp_subflow_early_fallback(msk, subflow); } + + WRITE_ONCE(msk->write_seq, subflow->idsn); + WRITE_ONCE(msk->snd_nxt, subflow->idsn); if (likely(!__mptcp_check_fallback(msk))) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index f4a38bd6a7e0..bfb7758063f3 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -77,13 +77,15 @@ EXPORT_SYMBOL_GPL(nsh_pop); static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, netdev_features_t features) { + unsigned int outer_hlen, mac_len, nsh_len; struct sk_buff *segs = ERR_PTR(-EINVAL); u16 mac_offset = skb->mac_header; - unsigned int nsh_len, mac_len; - __be16 proto; + __be16 outer_proto, proto; skb_reset_network_header(skb); + outer_proto = skb->protocol; + outer_hlen = skb_mac_header_len(skb); mac_len = skb->mac_len; if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN))) @@ -113,10 +115,10 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, } for (skb = segs; skb; skb = skb->next) { - skb->protocol = htons(ETH_P_NSH); - __skb_push(skb, nsh_len); - skb->mac_header = mac_offset; - skb->network_header = skb->mac_header + mac_len; + skb->protocol = outer_proto; + __skb_push(skb, nsh_len + outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, outer_hlen); skb->mac_len = mac_len; } diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 0af4642aeec4..1539d315afe7 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -119,18 +119,13 @@ struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *lo switch (srx->transport.family) { case AF_INET: if (peer->srx.transport.sin.sin_port != - srx->transport.sin.sin_port || - peer->srx.transport.sin.sin_addr.s_addr != - srx->transport.sin.sin_addr.s_addr) + srx->transport.sin.sin_port) goto not_found; break; #ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: if (peer->srx.transport.sin6.sin6_port != - srx->transport.sin6.sin6_port || - memcmp(&peer->srx.transport.sin6.sin6_addr, - &srx->transport.sin6.sin6_addr, - sizeof(struct in6_addr)) != 0) + srx->transport.sin6.sin6_port) goto not_found; break; #endif diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index f2701068ed9e..6716c021a532 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -19,7 +19,7 @@ static int none_init_connection_security(struct rxrpc_connection *conn, */ static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp) { - return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 0, gfp); + return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 1, gfp); } static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index f1a68270862d..48a1475e6b06 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -155,7 +155,7 @@ static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t rem switch (call->conn->security_level) { default: space = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); - return rxrpc_alloc_data_txbuf(call, space, 0, gfp); + return rxrpc_alloc_data_txbuf(call, space, 1, gfp); case RXRPC_SECURITY_AUTH: shdr = sizeof(struct rxkad_level1_hdr); break; diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index e0679658d9de..c3913d8a50d3 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -21,20 +21,20 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ { struct rxrpc_wire_header *whdr; struct rxrpc_txbuf *txb; - size_t total, hoff = 0; + size_t total, hoff; void *buf; txb = kmalloc(sizeof(*txb), gfp); if (!txb) return NULL; - if (data_align) - hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr); + hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr); total = hoff + sizeof(*whdr) + data_size; + data_align = umax(data_align, L1_CACHE_BYTES); mutex_lock(&call->conn->tx_data_alloc_lock); - buf = __page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp, - ~(data_align - 1) & ~(L1_CACHE_BYTES - 1)); + buf = page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp, + data_align); mutex_unlock(&call->conn->tx_data_alloc_lock); if (!buf) { kfree(txb); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 5c9fd4791c4b..76284fc538eb 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -142,9 +142,9 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - *buf = NULL; if (skb_has_frag_list(frag) && __skb_linearize(frag)) goto err; + *buf = NULL; frag = skb_unshare(frag, GFP_ATOMIC); if (unlikely(!frag)) goto err; @@ -156,6 +156,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (!head) goto err; + /* Either the input skb ownership is transferred to headskb + * or the input skb is freed, clear the reference to avoid + * bad access on error path. + */ + *buf = NULL; if (skb_try_coalesce(head, frag, &headstolen, &delta)) { kfree_skb_partial(frag, headstolen); } else { @@ -179,7 +184,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) *headbuf = NULL; return 1; } - *buf = NULL; return 0; err: kfree_skb(*buf); diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 39ad96a18123..edcd26106557 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -205,6 +205,9 @@ __weak noinline struct file *bpf_testmod_return_ptr(int arg) case 5: return (void *)~(1ull << 30); /* trigger extable */ case 6: return &f; /* valid addr */ case 7: return (void *)((long)&f | 1); /* kernel tricks */ +#ifdef CONFIG_X86_64 + case 8: return (void *)VSYSCALL_ADDR; /* vsyscall page address */ +#endif default: return NULL; } }