mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
Including fixes from netfilter.
Previous releases - regressions: - netfilter: - nf_reject_ipv6: fix nf_reject_ip6_tcphdr_put() - nf_tables: keep deleted flowtable hooks until after RCU - tcp: check skb is non-NULL in tcp_rto_delta_us() - phy: aquantia: fix -ETIMEDOUT PHY probe failure when firmware not present - eth: virtio_net: fix mismatched buf address when unmapping for small packets - eth: stmmac: fix zero-division error when disabling tc cbs - eth: bonding: fix unnecessary warnings and logs from bond_xdp_get_xmit_slave() Previous releases - always broken: - netfilter: - fix clash resolution for bidirectional flows - fix allocation with no memcg accounting - eth: r8169: add tally counter fields added with RTL8125 - eth: ravb: fix rx and tx frame size limit Signed-off-by: Paolo Abeni <pabeni@redhat.com> -----BEGIN PGP SIGNATURE----- iQJGBAABCAAwFiEEg1AjqC77wbdLX2LbKSR5jcyPE6QFAmb1bHASHHBhYmVuaUBy ZWRoYXQuY29tAAoJECkkeY3MjxOkxUAP/3cnsANzqmulU+zXLRCyYqQkMnLDrXuC yb1sy4gf/2vih+UPAK0Gw+NXMnL/Ftlv2EMV9RQKFjIWV4D0AYGEmKdnPhe2ycRN 0Gr7zSZdP2KlA7HgYSehxmWjrNFatAmyGvIEYs+9JBzLnoZCkRlsrYE8HO7fk8+a 4FDyh+FyiniDKR3+W/tgPoZy/U+FS9AUftOrAjCM/o6c0WPugwgHDxwlyrBg3lAp Mkx8Q3IPWESOfPcUmJ+AezljfL1W3xAG/4cxALpN9lboeJaZNjvMQgMyqC1uVyHS VJOkOuhQEVfXpc9139j5DxPHhacmLBQGfDw6ZXevwRC9NwgaLcRh9cf3rUafA7uC qT7P5dt5y3kGOqp7pltUsFT7C47VD7ZlFz4J6eqTVCVTopjpMipZajvWZEIDNqPa ftsMW0ZIbjpJVTJAvhlrKySxsRFte6b3aa9VdttkevgQPMneEXyePe8Me6Fbrv+t hF5R8we6842xclLfjBCJT1d4e7yW8B5o69eygQbyaqRK9EhbaF+4R0V+NK9eVnd9 qZudNZBznnfdVgjjgcu12qievHEazIAFkyjs+ZCt2xYNcRg8cLwr/TclOB8fEMBO VpjPci4j1Ln158EbGJf30VQpZJzXSrxZ4HFZU1Be+d3fW58o1H9zMfvweOcvxI/v AQWSy3aMoWHB =l8TJ -----END PGP SIGNATURE----- Merge tag 'net-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net Pull networking fixes from Paolo Abeni: "Including fixes from netfilter. It looks like that most people are still traveling: both the ML volume and the processing capacity are low. Previous releases - regressions: - netfilter: - nf_reject_ipv6: fix nf_reject_ip6_tcphdr_put() - nf_tables: keep deleted flowtable hooks until after RCU - tcp: check skb is non-NULL in tcp_rto_delta_us() - phy: aquantia: fix -ETIMEDOUT PHY probe failure when firmware not present - eth: virtio_net: fix mismatched buf address when unmapping for small packets - eth: stmmac: fix zero-division error when disabling tc cbs - eth: bonding: fix unnecessary warnings and logs from bond_xdp_get_xmit_slave() Previous releases - always broken: - netfilter: - fix clash resolution for bidirectional flows - fix allocation with no memcg accounting - eth: r8169: add tally counter fields added with RTL8125 - eth: ravb: fix rx and tx frame size limit" * tag 'net-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (35 commits) selftests: netfilter: Avoid hanging ipvs.sh kselftest: add test for nfqueue induced conntrack race netfilter: nfnetlink_queue: remove old clash resolution logic netfilter: nf_tables: missing objects with no memcg accounting netfilter: nf_tables: use rcu chain hook list iterator from netlink dump path netfilter: ctnetlink: compile ctnetlink_label_size with CONFIG_NF_CONNTRACK_EVENTS netfilter: nf_reject: Fix build warning when CONFIG_BRIDGE_NETFILTER=n netfilter: nf_tables: Keep deleted flowtable hooks until after RCU docs: tproxy: ignore non-transparent sockets in iptables netfilter: ctnetlink: Guard possible unused functions selftests: netfilter: nft_tproxy.sh: add tcp tests selftests: netfilter: add reverse-clash resolution test case netfilter: conntrack: add clash resolution for reverse collisions netfilter: nf_nat: don't try nat source port reallocation for reverse dir clash selftests/net: packetdrill: increase timing tolerance in debug mode usbnet: fix cyclical race on disconnect with work queue net: stmmac: set PP_FLAG_DMA_SYNC_DEV only if XDP is enabled virtio_net: Fix mismatched buf address when unmapping for small packets bonding: Fix unnecessary warnings and logs from bond_xdp_get_xmit_slave() r8169: add missing MODULE_FIRMWARE entry for RTL8126A rev.b ...
This commit is contained in:
commit
62a0e2fa40
@ -17,7 +17,7 @@ The idea is that you identify packets with destination address matching a local
|
||||
socket on your box, set the packet mark to a certain value::
|
||||
|
||||
# iptables -t mangle -N DIVERT
|
||||
# iptables -t mangle -A PREROUTING -p tcp -m socket -j DIVERT
|
||||
# iptables -t mangle -A PREROUTING -p tcp -m socket --transparent -j DIVERT
|
||||
# iptables -t mangle -A DIVERT -j MARK --set-mark 1
|
||||
# iptables -t mangle -A DIVERT -j ACCEPT
|
||||
|
||||
|
@ -17316,8 +17316,8 @@ M: Parthiban Veerasooran <parthiban.veerasooran@microchip.com>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/networking/oa-tc6-framework.rst
|
||||
F: drivers/include/linux/oa_tc6.h
|
||||
F: drivers/net/ethernet/oa_tc6.c
|
||||
F: include/linux/oa_tc6.h
|
||||
|
||||
OPEN FIRMWARE AND FLATTENED DEVICE TREE
|
||||
M: Rob Herring <robh@kernel.org>
|
||||
|
@ -5610,9 +5610,9 @@ bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp)
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Should never happen. Mode guarded by bond_xdp_check() */
|
||||
netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond));
|
||||
WARN_ON_ONCE(1);
|
||||
if (net_ratelimit())
|
||||
netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n",
|
||||
BOND_MODE(bond));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -579,6 +579,33 @@ struct rtl8169_counters {
|
||||
__le32 rx_multicast;
|
||||
__le16 tx_aborted;
|
||||
__le16 tx_underrun;
|
||||
/* new since RTL8125 */
|
||||
__le64 tx_octets;
|
||||
__le64 rx_octets;
|
||||
__le64 rx_multicast64;
|
||||
__le64 tx_unicast64;
|
||||
__le64 tx_broadcast64;
|
||||
__le64 tx_multicast64;
|
||||
__le32 tx_pause_on;
|
||||
__le32 tx_pause_off;
|
||||
__le32 tx_pause_all;
|
||||
__le32 tx_deferred;
|
||||
__le32 tx_late_collision;
|
||||
__le32 tx_all_collision;
|
||||
__le32 tx_aborted32;
|
||||
__le32 align_errors32;
|
||||
__le32 rx_frame_too_long;
|
||||
__le32 rx_runt;
|
||||
__le32 rx_pause_on;
|
||||
__le32 rx_pause_off;
|
||||
__le32 rx_pause_all;
|
||||
__le32 rx_unknown_opcode;
|
||||
__le32 rx_mac_error;
|
||||
__le32 tx_underrun32;
|
||||
__le32 rx_mac_missed;
|
||||
__le32 rx_tcam_dropped;
|
||||
__le32 tdu;
|
||||
__le32 rdu;
|
||||
};
|
||||
|
||||
struct rtl8169_tc_offsets {
|
||||
@ -681,6 +708,7 @@ MODULE_FIRMWARE(FIRMWARE_8107E_2);
|
||||
MODULE_FIRMWARE(FIRMWARE_8125A_3);
|
||||
MODULE_FIRMWARE(FIRMWARE_8125B_2);
|
||||
MODULE_FIRMWARE(FIRMWARE_8126A_2);
|
||||
MODULE_FIRMWARE(FIRMWARE_8126A_3);
|
||||
|
||||
static inline struct device *tp_to_dev(struct rtl8169_private *tp)
|
||||
{
|
||||
|
@ -1052,6 +1052,7 @@ struct ravb_hw_info {
|
||||
netdev_features_t net_features;
|
||||
int stats_len;
|
||||
u32 tccr_mask;
|
||||
u32 tx_max_frame_size;
|
||||
u32 rx_max_frame_size;
|
||||
u32 rx_buffer_size;
|
||||
u32 rx_desc_size;
|
||||
|
@ -555,8 +555,16 @@ static void ravb_emac_init_gbeth(struct net_device *ndev)
|
||||
|
||||
static void ravb_emac_init_rcar(struct net_device *ndev)
|
||||
{
|
||||
/* Receive frame limit set register */
|
||||
ravb_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR);
|
||||
struct ravb_private *priv = netdev_priv(ndev);
|
||||
|
||||
/* Set receive frame length
|
||||
*
|
||||
* The length set here describes the frame from the destination address
|
||||
* up to and including the CRC data. However only the frame data,
|
||||
* excluding the CRC, are transferred to memory. To allow for the
|
||||
* largest frames add the CRC length to the maximum Rx descriptor size.
|
||||
*/
|
||||
ravb_write(ndev, priv->info->rx_max_frame_size + ETH_FCS_LEN, RFLR);
|
||||
|
||||
/* EMAC Mode: PAUSE prohibition; Duplex; RX Checksum; TX; RX */
|
||||
ravb_write(ndev, ECMR_ZPF | ECMR_DM |
|
||||
@ -2674,6 +2682,7 @@ static const struct ravb_hw_info ravb_gen2_hw_info = {
|
||||
.net_features = NETIF_F_RXCSUM,
|
||||
.stats_len = ARRAY_SIZE(ravb_gstrings_stats),
|
||||
.tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3,
|
||||
.tx_max_frame_size = SZ_2K,
|
||||
.rx_max_frame_size = SZ_2K,
|
||||
.rx_buffer_size = SZ_2K +
|
||||
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
|
||||
@ -2696,6 +2705,7 @@ static const struct ravb_hw_info ravb_gen3_hw_info = {
|
||||
.net_features = NETIF_F_RXCSUM,
|
||||
.stats_len = ARRAY_SIZE(ravb_gstrings_stats),
|
||||
.tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3,
|
||||
.tx_max_frame_size = SZ_2K,
|
||||
.rx_max_frame_size = SZ_2K,
|
||||
.rx_buffer_size = SZ_2K +
|
||||
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
|
||||
@ -2721,6 +2731,7 @@ static const struct ravb_hw_info ravb_gen4_hw_info = {
|
||||
.net_features = NETIF_F_RXCSUM,
|
||||
.stats_len = ARRAY_SIZE(ravb_gstrings_stats),
|
||||
.tccr_mask = TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3,
|
||||
.tx_max_frame_size = SZ_2K,
|
||||
.rx_max_frame_size = SZ_2K,
|
||||
.rx_buffer_size = SZ_2K +
|
||||
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
|
||||
@ -2770,6 +2781,7 @@ static const struct ravb_hw_info gbeth_hw_info = {
|
||||
.net_features = NETIF_F_RXCSUM | NETIF_F_HW_CSUM,
|
||||
.stats_len = ARRAY_SIZE(ravb_gstrings_stats_gbeth),
|
||||
.tccr_mask = TCCR_TSRQ0,
|
||||
.tx_max_frame_size = 1522,
|
||||
.rx_max_frame_size = SZ_8K,
|
||||
.rx_buffer_size = SZ_2K,
|
||||
.rx_desc_size = sizeof(struct ravb_rx_desc),
|
||||
@ -2981,7 +2993,7 @@ static int ravb_probe(struct platform_device *pdev)
|
||||
priv->avb_link_active_low =
|
||||
of_property_read_bool(np, "renesas,ether-link-active-low");
|
||||
|
||||
ndev->max_mtu = info->rx_max_frame_size -
|
||||
ndev->max_mtu = info->tx_max_frame_size -
|
||||
(ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
|
||||
ndev->min_mtu = ETH_MIN_MTU;
|
||||
|
||||
|
@ -847,9 +847,11 @@ static void ether3_remove(struct expansion_card *ec)
|
||||
{
|
||||
struct net_device *dev = ecard_get_drvdata(ec);
|
||||
|
||||
ether3_outw(priv(dev)->regs.config2 |= CFG2_CTRLO, REG_CONFIG2);
|
||||
ecard_set_drvdata(ec, NULL);
|
||||
|
||||
unregister_netdev(dev);
|
||||
del_timer_sync(&priv(dev)->timer);
|
||||
free_netdev(dev);
|
||||
ecard_release_resources(ec);
|
||||
}
|
||||
|
@ -2035,7 +2035,7 @@ static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv,
|
||||
rx_q->queue_index = queue;
|
||||
rx_q->priv_data = priv;
|
||||
|
||||
pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
|
||||
pp_params.flags = PP_FLAG_DMA_MAP | (xdp_prog ? PP_FLAG_DMA_SYNC_DEV : 0);
|
||||
pp_params.pool_size = dma_conf->dma_rx_size;
|
||||
num_pages = DIV_ROUND_UP(dma_conf->dma_buf_sz, PAGE_SIZE);
|
||||
pp_params.order = ilog2(num_pages);
|
||||
|
@ -386,6 +386,7 @@ static int tc_setup_cbs(struct stmmac_priv *priv,
|
||||
return ret;
|
||||
|
||||
priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Final adjustments for HW */
|
||||
|
@ -736,15 +736,15 @@ static int axienet_device_reset(struct net_device *ndev)
|
||||
*
|
||||
* Would either be called after a successful transmit operation, or after
|
||||
* there was an error when setting up the chain.
|
||||
* Returns the number of descriptors handled.
|
||||
* Returns the number of packets handled.
|
||||
*/
|
||||
static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
|
||||
int nr_bds, bool force, u32 *sizep, int budget)
|
||||
{
|
||||
struct axidma_bd *cur_p;
|
||||
unsigned int status;
|
||||
int i, packets = 0;
|
||||
dma_addr_t phys;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_bds; i++) {
|
||||
cur_p = &lp->tx_bd_v[(first_bd + i) % lp->tx_bd_num];
|
||||
@ -763,8 +763,10 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
|
||||
(cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
|
||||
DMA_TO_DEVICE);
|
||||
|
||||
if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK))
|
||||
if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) {
|
||||
napi_consume_skb(cur_p->skb, budget);
|
||||
packets++;
|
||||
}
|
||||
|
||||
cur_p->app0 = 0;
|
||||
cur_p->app1 = 0;
|
||||
@ -780,7 +782,13 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
|
||||
*sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
|
||||
}
|
||||
|
||||
return i;
|
||||
if (!force) {
|
||||
lp->tx_bd_ci += i;
|
||||
if (lp->tx_bd_ci >= lp->tx_bd_num)
|
||||
lp->tx_bd_ci %= lp->tx_bd_num;
|
||||
}
|
||||
|
||||
return packets;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -953,13 +961,10 @@ static int axienet_tx_poll(struct napi_struct *napi, int budget)
|
||||
u32 size = 0;
|
||||
int packets;
|
||||
|
||||
packets = axienet_free_tx_chain(lp, lp->tx_bd_ci, budget, false, &size, budget);
|
||||
packets = axienet_free_tx_chain(lp, lp->tx_bd_ci, lp->tx_bd_num, false,
|
||||
&size, budget);
|
||||
|
||||
if (packets) {
|
||||
lp->tx_bd_ci += packets;
|
||||
if (lp->tx_bd_ci >= lp->tx_bd_num)
|
||||
lp->tx_bd_ci %= lp->tx_bd_num;
|
||||
|
||||
u64_stats_update_begin(&lp->tx_stat_sync);
|
||||
u64_stats_add(&lp->tx_packets, packets);
|
||||
u64_stats_add(&lp->tx_bytes, size);
|
||||
@ -1282,9 +1287,10 @@ static irqreturn_t axienet_tx_irq(int irq, void *_ndev)
|
||||
u32 cr = lp->tx_dma_cr;
|
||||
|
||||
cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
|
||||
axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
|
||||
|
||||
napi_schedule(&lp->napi_tx);
|
||||
if (napi_schedule_prep(&lp->napi_tx)) {
|
||||
axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
|
||||
__napi_schedule(&lp->napi_tx);
|
||||
}
|
||||
}
|
||||
|
||||
return IRQ_HANDLED;
|
||||
@ -1326,9 +1332,10 @@ static irqreturn_t axienet_rx_irq(int irq, void *_ndev)
|
||||
u32 cr = lp->rx_dma_cr;
|
||||
|
||||
cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
|
||||
axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
|
||||
|
||||
napi_schedule(&lp->napi_rx);
|
||||
if (napi_schedule_prep(&lp->napi_rx)) {
|
||||
axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
|
||||
__napi_schedule(&lp->napi_rx);
|
||||
}
|
||||
}
|
||||
|
||||
return IRQ_HANDLED;
|
||||
|
@ -353,26 +353,32 @@ int aqr_firmware_load(struct phy_device *phydev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = aqr_wait_reset_complete(phydev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Check if the firmware is not already loaded by pooling
|
||||
* the current version returned by the PHY. If 0 is returned,
|
||||
* no firmware is loaded.
|
||||
/* Check if the firmware is not already loaded by polling
|
||||
* the current version returned by the PHY.
|
||||
*/
|
||||
ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_FW_ID);
|
||||
if (ret > 0)
|
||||
goto exit;
|
||||
ret = aqr_wait_reset_complete(phydev);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
/* Some firmware is loaded => do nothing */
|
||||
return 0;
|
||||
case -ETIMEDOUT:
|
||||
/* VEND1_GLOBAL_FW_ID still reads 0 after 2 seconds of polling.
|
||||
* We don't have full confidence that no firmware is loaded (in
|
||||
* theory it might just not have loaded yet), but we will
|
||||
* assume that, and load a new image.
|
||||
*/
|
||||
ret = aqr_firmware_load_nvmem(phydev);
|
||||
if (!ret)
|
||||
return ret;
|
||||
|
||||
ret = aqr_firmware_load_nvmem(phydev);
|
||||
if (!ret)
|
||||
goto exit;
|
||||
|
||||
ret = aqr_firmware_load_fs(phydev);
|
||||
if (ret)
|
||||
ret = aqr_firmware_load_fs(phydev);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
default:
|
||||
/* PHY read error, propagate it to the caller */
|
||||
return ret;
|
||||
}
|
||||
|
||||
exit:
|
||||
return 0;
|
||||
}
|
||||
|
@ -120,7 +120,8 @@ int aqr_phy_led_hw_control_set(struct phy_device *phydev, u8 index,
|
||||
int aqr_phy_led_active_low_set(struct phy_device *phydev, int index, bool enable)
|
||||
{
|
||||
return phy_modify_mmd(phydev, MDIO_MMD_VEND1, AQR_LED_DRIVE(index),
|
||||
VEND1_GLOBAL_LED_DRIVE_VDD, enable);
|
||||
VEND1_GLOBAL_LED_DRIVE_VDD,
|
||||
enable ? VEND1_GLOBAL_LED_DRIVE_VDD : 0);
|
||||
}
|
||||
|
||||
int aqr_phy_led_polarity_set(struct phy_device *phydev, int index, unsigned long modes)
|
||||
|
@ -435,6 +435,9 @@ static int aqr107_set_tunable(struct phy_device *phydev,
|
||||
}
|
||||
}
|
||||
|
||||
#define AQR_FW_WAIT_SLEEP_US 20000
|
||||
#define AQR_FW_WAIT_TIMEOUT_US 2000000
|
||||
|
||||
/* If we configure settings whilst firmware is still initializing the chip,
|
||||
* then these settings may be overwritten. Therefore make sure chip
|
||||
* initialization has completed. Use presence of the firmware ID as
|
||||
@ -444,11 +447,19 @@ static int aqr107_set_tunable(struct phy_device *phydev,
|
||||
*/
|
||||
int aqr_wait_reset_complete(struct phy_device *phydev)
|
||||
{
|
||||
int val;
|
||||
int ret, val;
|
||||
|
||||
return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1,
|
||||
VEND1_GLOBAL_FW_ID, val, val != 0,
|
||||
20000, 2000000, false);
|
||||
ret = read_poll_timeout(phy_read_mmd, val, val != 0,
|
||||
AQR_FW_WAIT_SLEEP_US, AQR_FW_WAIT_TIMEOUT_US,
|
||||
false, phydev, MDIO_MMD_VEND1,
|
||||
VEND1_GLOBAL_FW_ID);
|
||||
if (val < 0) {
|
||||
phydev_err(phydev, "Failed to read VEND1_GLOBAL_FW_ID: %pe\n",
|
||||
ERR_PTR(val));
|
||||
return val;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void aqr107_chip_info(struct phy_device *phydev)
|
||||
@ -478,7 +489,7 @@ static int aqr107_config_init(struct phy_device *phydev)
|
||||
{
|
||||
struct aqr107_priv *priv = phydev->priv;
|
||||
u32 led_active_low;
|
||||
int ret, index = 0;
|
||||
int ret;
|
||||
|
||||
/* Check that the PHY interface type is compatible */
|
||||
if (phydev->interface != PHY_INTERFACE_MODE_SGMII &&
|
||||
@ -505,10 +516,9 @@ static int aqr107_config_init(struct phy_device *phydev)
|
||||
|
||||
/* Restore LED polarity state after reset */
|
||||
for_each_set_bit(led_active_low, &priv->leds_active_low, AQR_MAX_LEDS) {
|
||||
ret = aqr_phy_led_active_low_set(phydev, index, led_active_low);
|
||||
ret = aqr_phy_led_active_low_set(phydev, led_active_low, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
index++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -464,10 +464,15 @@ static enum skb_state defer_bh(struct usbnet *dev, struct sk_buff *skb,
|
||||
void usbnet_defer_kevent (struct usbnet *dev, int work)
|
||||
{
|
||||
set_bit (work, &dev->flags);
|
||||
if (!schedule_work (&dev->kevent))
|
||||
netdev_dbg(dev->net, "kevent %s may have been dropped\n", usbnet_event_names[work]);
|
||||
else
|
||||
netdev_dbg(dev->net, "kevent %s scheduled\n", usbnet_event_names[work]);
|
||||
if (!usbnet_going_away(dev)) {
|
||||
if (!schedule_work(&dev->kevent))
|
||||
netdev_dbg(dev->net,
|
||||
"kevent %s may have been dropped\n",
|
||||
usbnet_event_names[work]);
|
||||
else
|
||||
netdev_dbg(dev->net,
|
||||
"kevent %s scheduled\n", usbnet_event_names[work]);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(usbnet_defer_kevent);
|
||||
|
||||
@ -535,7 +540,8 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
|
||||
tasklet_schedule (&dev->bh);
|
||||
break;
|
||||
case 0:
|
||||
__usbnet_queue_skb(&dev->rxq, skb, rx_start);
|
||||
if (!usbnet_going_away(dev))
|
||||
__usbnet_queue_skb(&dev->rxq, skb, rx_start);
|
||||
}
|
||||
} else {
|
||||
netif_dbg(dev, ifdown, dev->net, "rx: stopped\n");
|
||||
@ -843,9 +849,18 @@ int usbnet_stop (struct net_device *net)
|
||||
|
||||
/* deferred work (timer, softirq, task) must also stop */
|
||||
dev->flags = 0;
|
||||
del_timer_sync (&dev->delay);
|
||||
tasklet_kill (&dev->bh);
|
||||
del_timer_sync(&dev->delay);
|
||||
tasklet_kill(&dev->bh);
|
||||
cancel_work_sync(&dev->kevent);
|
||||
|
||||
/* We have cyclic dependencies. Those calls are needed
|
||||
* to break a cycle. We cannot fall into the gaps because
|
||||
* we have a flag
|
||||
*/
|
||||
tasklet_kill(&dev->bh);
|
||||
del_timer_sync(&dev->delay);
|
||||
cancel_work_sync(&dev->kevent);
|
||||
|
||||
if (!pm)
|
||||
usb_autopm_put_interface(dev->intf);
|
||||
|
||||
@ -1171,7 +1186,8 @@ fail_halt:
|
||||
status);
|
||||
} else {
|
||||
clear_bit (EVENT_RX_HALT, &dev->flags);
|
||||
tasklet_schedule (&dev->bh);
|
||||
if (!usbnet_going_away(dev))
|
||||
tasklet_schedule(&dev->bh);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1196,7 +1212,8 @@ fail_halt:
|
||||
usb_autopm_put_interface(dev->intf);
|
||||
fail_lowmem:
|
||||
if (resched)
|
||||
tasklet_schedule (&dev->bh);
|
||||
if (!usbnet_going_away(dev))
|
||||
tasklet_schedule(&dev->bh);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1559,6 +1576,7 @@ static void usbnet_bh (struct timer_list *t)
|
||||
} else if (netif_running (dev->net) &&
|
||||
netif_device_present (dev->net) &&
|
||||
netif_carrier_ok(dev->net) &&
|
||||
!usbnet_going_away(dev) &&
|
||||
!timer_pending(&dev->delay) &&
|
||||
!test_bit(EVENT_RX_PAUSED, &dev->flags) &&
|
||||
!test_bit(EVENT_RX_HALT, &dev->flags)) {
|
||||
@ -1606,6 +1624,7 @@ void usbnet_disconnect (struct usb_interface *intf)
|
||||
usb_set_intfdata(intf, NULL);
|
||||
if (!dev)
|
||||
return;
|
||||
usbnet_mark_going_away(dev);
|
||||
|
||||
xdev = interface_to_usbdev (intf);
|
||||
|
||||
|
@ -1807,6 +1807,11 @@ static struct sk_buff *receive_small(struct net_device *dev,
|
||||
struct page *page = virt_to_head_page(buf);
|
||||
struct sk_buff *skb;
|
||||
|
||||
/* We passed the address of virtnet header to virtio-core,
|
||||
* so truncate the padding.
|
||||
*/
|
||||
buf -= VIRTNET_RX_PAD + xdp_headroom;
|
||||
|
||||
len -= vi->hdr_len;
|
||||
u64_stats_add(&stats->bytes, len);
|
||||
|
||||
@ -2422,8 +2427,9 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
|
||||
if (unlikely(!buf))
|
||||
return -ENOMEM;
|
||||
|
||||
virtnet_rq_init_one_sg(rq, buf + VIRTNET_RX_PAD + xdp_headroom,
|
||||
vi->hdr_len + GOOD_PACKET_LEN);
|
||||
buf += VIRTNET_RX_PAD + xdp_headroom;
|
||||
|
||||
virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN);
|
||||
|
||||
err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
|
||||
if (err < 0) {
|
||||
|
@ -376,15 +376,11 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
|
||||
struct nf_conn;
|
||||
enum nf_nat_manip_type;
|
||||
struct nlattr;
|
||||
enum ip_conntrack_dir;
|
||||
|
||||
struct nf_nat_hook {
|
||||
int (*parse_nat_setup)(struct nf_conn *ct, enum nf_nat_manip_type manip,
|
||||
const struct nlattr *attr);
|
||||
void (*decode_session)(struct sk_buff *skb, struct flowi *fl);
|
||||
unsigned int (*manip_pkt)(struct sk_buff *skb, struct nf_conn *ct,
|
||||
enum nf_nat_manip_type mtype,
|
||||
enum ip_conntrack_dir dir);
|
||||
void (*remove_nat_bysrc)(struct nf_conn *ct);
|
||||
};
|
||||
|
||||
|
@ -76,8 +76,23 @@ struct usbnet {
|
||||
# define EVENT_LINK_CHANGE 11
|
||||
# define EVENT_SET_RX_MODE 12
|
||||
# define EVENT_NO_IP_ALIGN 13
|
||||
/* This one is special, as it indicates that the device is going away
|
||||
* there are cyclic dependencies between tasklet, timer and bh
|
||||
* that must be broken
|
||||
*/
|
||||
# define EVENT_UNPLUG 31
|
||||
};
|
||||
|
||||
static inline bool usbnet_going_away(struct usbnet *ubn)
|
||||
{
|
||||
return test_bit(EVENT_UNPLUG, &ubn->flags);
|
||||
}
|
||||
|
||||
static inline void usbnet_mark_going_away(struct usbnet *ubn)
|
||||
{
|
||||
set_bit(EVENT_UNPLUG, &ubn->flags);
|
||||
}
|
||||
|
||||
static inline struct usb_driver *driver_of(struct usb_interface *intf)
|
||||
{
|
||||
return to_usb_driver(intf->dev.driver);
|
||||
|
@ -2435,9 +2435,26 @@ static inline s64 tcp_rto_delta_us(const struct sock *sk)
|
||||
{
|
||||
const struct sk_buff *skb = tcp_rtx_queue_head(sk);
|
||||
u32 rto = inet_csk(sk)->icsk_rto;
|
||||
u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto);
|
||||
|
||||
return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
|
||||
if (likely(skb)) {
|
||||
u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto);
|
||||
|
||||
return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
|
||||
} else {
|
||||
WARN_ONCE(1,
|
||||
"rtx queue emtpy: "
|
||||
"out:%u sacked:%u lost:%u retrans:%u "
|
||||
"tlp_high_seq:%u sk_state:%u ca_state:%u "
|
||||
"advmss:%u mss_cache:%u pmtu:%u\n",
|
||||
tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out,
|
||||
tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out,
|
||||
tcp_sk(sk)->tlp_high_seq, sk->sk_state,
|
||||
inet_csk(sk)->icsk_ca_state,
|
||||
tcp_sk(sk)->advmss, tcp_sk(sk)->mss_cache,
|
||||
inet_csk(sk)->icsk_pmtu_cookie);
|
||||
return jiffies_to_usecs(rto);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -239,9 +239,8 @@ static int nf_reject_fill_skb_dst(struct sk_buff *skb_in)
|
||||
void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
|
||||
int hook)
|
||||
{
|
||||
struct sk_buff *nskb;
|
||||
struct iphdr *niph;
|
||||
const struct tcphdr *oth;
|
||||
struct sk_buff *nskb;
|
||||
struct tcphdr _oth;
|
||||
|
||||
oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook);
|
||||
@ -266,14 +265,12 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
|
||||
nskb->mark = IP4_REPLY_MARK(net, oldskb->mark);
|
||||
|
||||
skb_reserve(nskb, LL_MAX_HEADER);
|
||||
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
|
||||
ip4_dst_hoplimit(skb_dst(nskb)));
|
||||
nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
|
||||
ip4_dst_hoplimit(skb_dst(nskb)));
|
||||
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
|
||||
if (ip_route_me_harder(net, sk, nskb, RTN_UNSPEC))
|
||||
goto free_nskb;
|
||||
|
||||
niph = ip_hdr(nskb);
|
||||
|
||||
/* "Never happens" */
|
||||
if (nskb->len > dst_mtu(skb_dst(nskb)))
|
||||
goto free_nskb;
|
||||
@ -290,6 +287,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
|
||||
*/
|
||||
if (nf_bridge_info_exists(oldskb)) {
|
||||
struct ethhdr *oeth = eth_hdr(oldskb);
|
||||
struct iphdr *niph = ip_hdr(nskb);
|
||||
struct net_device *br_indev;
|
||||
|
||||
br_indev = nf_bridge_get_physindev(oldskb, net);
|
||||
|
@ -323,6 +323,7 @@ config IPV6_RPL_LWTUNNEL
|
||||
bool "IPv6: RPL Source Routing Header support"
|
||||
depends on IPV6
|
||||
select LWTUNNEL
|
||||
select DST_CACHE
|
||||
help
|
||||
Support for RFC6554 RPL Source Routing Header using the lightweight
|
||||
tunnels mechanism.
|
||||
|
@ -223,33 +223,23 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb,
|
||||
const struct tcphdr *oth, unsigned int otcplen)
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
int needs_ack;
|
||||
|
||||
skb_reset_transport_header(nskb);
|
||||
tcph = skb_put(nskb, sizeof(struct tcphdr));
|
||||
tcph = skb_put_zero(nskb, sizeof(struct tcphdr));
|
||||
/* Truncate to length (no data) */
|
||||
tcph->doff = sizeof(struct tcphdr)/4;
|
||||
tcph->source = oth->dest;
|
||||
tcph->dest = oth->source;
|
||||
|
||||
if (oth->ack) {
|
||||
needs_ack = 0;
|
||||
tcph->seq = oth->ack_seq;
|
||||
tcph->ack_seq = 0;
|
||||
} else {
|
||||
needs_ack = 1;
|
||||
tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
|
||||
otcplen - (oth->doff<<2));
|
||||
tcph->seq = 0;
|
||||
tcph->ack = 1;
|
||||
}
|
||||
|
||||
/* Reset flags */
|
||||
((u_int8_t *)tcph)[13] = 0;
|
||||
tcph->rst = 1;
|
||||
tcph->ack = needs_ack;
|
||||
tcph->window = 0;
|
||||
tcph->urg_ptr = 0;
|
||||
tcph->check = 0;
|
||||
|
||||
/* Adjust TCP checksum */
|
||||
tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
|
||||
@ -283,7 +273,6 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
|
||||
const struct tcphdr *otcph;
|
||||
unsigned int otcplen, hh_len;
|
||||
const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
|
||||
struct ipv6hdr *ip6h;
|
||||
struct dst_entry *dst = NULL;
|
||||
struct flowi6 fl6;
|
||||
|
||||
@ -339,8 +328,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
|
||||
nskb->mark = fl6.flowi6_mark;
|
||||
|
||||
skb_reserve(nskb, hh_len + dst->header_len);
|
||||
ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP,
|
||||
ip6_dst_hoplimit(dst));
|
||||
nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, ip6_dst_hoplimit(dst));
|
||||
nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen);
|
||||
|
||||
nf_ct_attach(nskb, oldskb);
|
||||
@ -355,6 +343,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
|
||||
*/
|
||||
if (nf_bridge_info_exists(oldskb)) {
|
||||
struct ethhdr *oeth = eth_hdr(oldskb);
|
||||
struct ipv6hdr *ip6h = ipv6_hdr(nskb);
|
||||
struct net_device *br_indev;
|
||||
|
||||
br_indev = nf_bridge_get_physindev(oldskb, net);
|
||||
|
@ -988,6 +988,56 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
|
||||
tstamp->start = ktime_get_real_ns();
|
||||
}
|
||||
|
||||
/**
|
||||
* nf_ct_match_reverse - check if ct1 and ct2 refer to identical flow
|
||||
* @ct1: conntrack in hash table to check against
|
||||
* @ct2: merge candidate
|
||||
*
|
||||
* returns true if ct1 and ct2 happen to refer to the same flow, but
|
||||
* in opposing directions, i.e.
|
||||
* ct1: a:b -> c:d
|
||||
* ct2: c:d -> a:b
|
||||
* for both directions. If so, @ct2 should not have been created
|
||||
* as the skb should have been picked up as ESTABLISHED flow.
|
||||
* But ct1 was not yet committed to hash table before skb that created
|
||||
* ct2 had arrived.
|
||||
*
|
||||
* Note we don't compare netns because ct entries in different net
|
||||
* namespace cannot clash to begin with.
|
||||
*
|
||||
* @return: true if ct1 and ct2 are identical when swapping origin/reply.
|
||||
*/
|
||||
static bool
|
||||
nf_ct_match_reverse(const struct nf_conn *ct1, const struct nf_conn *ct2)
|
||||
{
|
||||
u16 id1, id2;
|
||||
|
||||
if (!nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
|
||||
&ct2->tuplehash[IP_CT_DIR_REPLY].tuple))
|
||||
return false;
|
||||
|
||||
if (!nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple,
|
||||
&ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple))
|
||||
return false;
|
||||
|
||||
id1 = nf_ct_zone_id(nf_ct_zone(ct1), IP_CT_DIR_ORIGINAL);
|
||||
id2 = nf_ct_zone_id(nf_ct_zone(ct2), IP_CT_DIR_REPLY);
|
||||
if (id1 != id2)
|
||||
return false;
|
||||
|
||||
id1 = nf_ct_zone_id(nf_ct_zone(ct1), IP_CT_DIR_REPLY);
|
||||
id2 = nf_ct_zone_id(nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL);
|
||||
|
||||
return id1 == id2;
|
||||
}
|
||||
|
||||
static int nf_ct_can_merge(const struct nf_conn *ct,
|
||||
const struct nf_conn *loser_ct)
|
||||
{
|
||||
return nf_ct_match(ct, loser_ct) ||
|
||||
nf_ct_match_reverse(ct, loser_ct);
|
||||
}
|
||||
|
||||
/* caller must hold locks to prevent concurrent changes */
|
||||
static int __nf_ct_resolve_clash(struct sk_buff *skb,
|
||||
struct nf_conntrack_tuple_hash *h)
|
||||
@ -999,11 +1049,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
|
||||
|
||||
loser_ct = nf_ct_get(skb, &ctinfo);
|
||||
|
||||
if (nf_ct_is_dying(ct))
|
||||
return NF_DROP;
|
||||
|
||||
if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
|
||||
nf_ct_match(ct, loser_ct)) {
|
||||
if (nf_ct_can_merge(ct, loser_ct)) {
|
||||
struct net *net = nf_ct_net(ct);
|
||||
|
||||
nf_conntrack_get(&ct->ct_general);
|
||||
@ -2151,80 +2197,6 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
|
||||
nf_conntrack_get(skb_nfct(nskb));
|
||||
}
|
||||
|
||||
static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
|
||||
struct nf_conn *ct,
|
||||
enum ip_conntrack_info ctinfo)
|
||||
{
|
||||
const struct nf_nat_hook *nat_hook;
|
||||
struct nf_conntrack_tuple_hash *h;
|
||||
struct nf_conntrack_tuple tuple;
|
||||
unsigned int status;
|
||||
int dataoff;
|
||||
u16 l3num;
|
||||
u8 l4num;
|
||||
|
||||
l3num = nf_ct_l3num(ct);
|
||||
|
||||
dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
|
||||
if (dataoff <= 0)
|
||||
return NF_DROP;
|
||||
|
||||
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
|
||||
l4num, net, &tuple))
|
||||
return NF_DROP;
|
||||
|
||||
if (ct->status & IPS_SRC_NAT) {
|
||||
memcpy(tuple.src.u3.all,
|
||||
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.all,
|
||||
sizeof(tuple.src.u3.all));
|
||||
tuple.src.u.all =
|
||||
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all;
|
||||
}
|
||||
|
||||
if (ct->status & IPS_DST_NAT) {
|
||||
memcpy(tuple.dst.u3.all,
|
||||
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.all,
|
||||
sizeof(tuple.dst.u3.all));
|
||||
tuple.dst.u.all =
|
||||
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.all;
|
||||
}
|
||||
|
||||
h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple);
|
||||
if (!h)
|
||||
return NF_ACCEPT;
|
||||
|
||||
/* Store status bits of the conntrack that is clashing to re-do NAT
|
||||
* mangling according to what it has been done already to this packet.
|
||||
*/
|
||||
status = ct->status;
|
||||
|
||||
nf_ct_put(ct);
|
||||
ct = nf_ct_tuplehash_to_ctrack(h);
|
||||
nf_ct_set(skb, ct, ctinfo);
|
||||
|
||||
nat_hook = rcu_dereference(nf_nat_hook);
|
||||
if (!nat_hook)
|
||||
return NF_ACCEPT;
|
||||
|
||||
if (status & IPS_SRC_NAT) {
|
||||
unsigned int verdict = nat_hook->manip_pkt(skb, ct,
|
||||
NF_NAT_MANIP_SRC,
|
||||
IP_CT_DIR_ORIGINAL);
|
||||
if (verdict != NF_ACCEPT)
|
||||
return verdict;
|
||||
}
|
||||
|
||||
if (status & IPS_DST_NAT) {
|
||||
unsigned int verdict = nat_hook->manip_pkt(skb, ct,
|
||||
NF_NAT_MANIP_DST,
|
||||
IP_CT_DIR_ORIGINAL);
|
||||
if (verdict != NF_ACCEPT)
|
||||
return verdict;
|
||||
}
|
||||
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
|
||||
/* This packet is coming from userspace via nf_queue, complete the packet
|
||||
* processing after the helper invocation in nf_confirm().
|
||||
*/
|
||||
@ -2288,17 +2260,6 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
|
||||
if (!ct)
|
||||
return NF_ACCEPT;
|
||||
|
||||
if (!nf_ct_is_confirmed(ct)) {
|
||||
int ret = __nf_conntrack_update(net, skb, ct, ctinfo);
|
||||
|
||||
if (ret != NF_ACCEPT)
|
||||
return ret;
|
||||
|
||||
ct = nf_ct_get(skb, &ctinfo);
|
||||
if (!ct)
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
|
||||
return nf_confirm_cthelper(skb, ct, ctinfo);
|
||||
}
|
||||
|
||||
|
@ -382,7 +382,7 @@ nla_put_failure:
|
||||
#define ctnetlink_dump_secctx(a, b) (0)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NF_CONNTRACK_LABELS
|
||||
#ifdef CONFIG_NF_CONNTRACK_EVENTS
|
||||
static inline int ctnetlink_label_size(const struct nf_conn *ct)
|
||||
{
|
||||
struct nf_conn_labels *labels = nf_ct_labels_find(ct);
|
||||
@ -391,6 +391,7 @@ static inline int ctnetlink_label_size(const struct nf_conn *ct)
|
||||
return 0;
|
||||
return nla_total_size(sizeof(labels->bits));
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct)
|
||||
@ -411,10 +412,6 @@ ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct)
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
#define ctnetlink_dump_labels(a, b) (0)
|
||||
#define ctnetlink_label_size(a) (0)
|
||||
#endif
|
||||
|
||||
#define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
|
||||
|
||||
@ -652,7 +649,6 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct)
|
||||
|
||||
return len + len4;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline size_t ctnetlink_acct_size(const struct nf_conn *ct)
|
||||
{
|
||||
@ -690,6 +686,7 @@ static inline size_t ctnetlink_timestamp_size(const struct nf_conn *ct)
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NF_CONNTRACK_EVENTS
|
||||
static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
|
||||
|
@ -183,7 +183,35 @@ hash_by_src(const struct net *net,
|
||||
return reciprocal_scale(hash, nf_nat_htable_size);
|
||||
}
|
||||
|
||||
/* Is this tuple already taken? (not by us) */
|
||||
/**
|
||||
* nf_nat_used_tuple - check if proposed nat tuple clashes with existing entry
|
||||
* @tuple: proposed NAT binding
|
||||
* @ignored_conntrack: our (unconfirmed) conntrack entry
|
||||
*
|
||||
* A conntrack entry can be inserted to the connection tracking table
|
||||
* if there is no existing entry with an identical tuple in either direction.
|
||||
*
|
||||
* Example:
|
||||
* INITIATOR -> NAT/PAT -> RESPONDER
|
||||
*
|
||||
* INITIATOR passes through NAT/PAT ("us") and SNAT is done (saddr rewrite).
|
||||
* Then, later, NAT/PAT itself also connects to RESPONDER.
|
||||
*
|
||||
* This will not work if the SNAT done earlier has same IP:PORT source pair.
|
||||
*
|
||||
* Conntrack table has:
|
||||
* ORIGINAL: $IP_INITIATOR:$SPORT -> $IP_RESPONDER:$DPORT
|
||||
* REPLY: $IP_RESPONDER:$DPORT -> $IP_NAT:$SPORT
|
||||
*
|
||||
* and new locally originating connection wants:
|
||||
* ORIGINAL: $IP_NAT:$SPORT -> $IP_RESPONDER:$DPORT
|
||||
* REPLY: $IP_RESPONDER:$DPORT -> $IP_NAT:$SPORT
|
||||
*
|
||||
* ... which would mean incoming packets cannot be distinguished between
|
||||
* the existing and the newly added entry (identical IP_CT_DIR_REPLY tuple).
|
||||
*
|
||||
* @return: true if the proposed NAT mapping collides with an existing entry.
|
||||
*/
|
||||
static int
|
||||
nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
|
||||
const struct nf_conn *ignored_conntrack)
|
||||
@ -200,6 +228,94 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
|
||||
return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
|
||||
}
|
||||
|
||||
static bool nf_nat_allow_clash(const struct nf_conn *ct)
|
||||
{
|
||||
return nf_ct_l4proto_find(nf_ct_protonum(ct))->allow_clash;
|
||||
}
|
||||
|
||||
/**
|
||||
* nf_nat_used_tuple_new - check if to-be-inserted conntrack collides with existing entry
|
||||
* @tuple: proposed NAT binding
|
||||
* @ignored_ct: our (unconfirmed) conntrack entry
|
||||
*
|
||||
* Same as nf_nat_used_tuple, but also check for rare clash in reverse
|
||||
* direction. Should be called only when @tuple has not been altered, i.e.
|
||||
* @ignored_conntrack will not be subject to NAT.
|
||||
*
|
||||
* @return: true if the proposed NAT mapping collides with existing entry.
|
||||
*/
|
||||
static noinline bool
|
||||
nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
|
||||
const struct nf_conn *ignored_ct)
|
||||
{
|
||||
static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST_BIT;
|
||||
const struct nf_conntrack_tuple_hash *thash;
|
||||
const struct nf_conntrack_zone *zone;
|
||||
struct nf_conn *ct;
|
||||
bool taken = true;
|
||||
struct net *net;
|
||||
|
||||
if (!nf_nat_used_tuple(tuple, ignored_ct))
|
||||
return false;
|
||||
|
||||
if (!nf_nat_allow_clash(ignored_ct))
|
||||
return true;
|
||||
|
||||
/* Initial choice clashes with existing conntrack.
|
||||
* Check for (rare) reverse collision.
|
||||
*
|
||||
* This can happen when new packets are received in both directions
|
||||
* at the exact same time on different CPUs.
|
||||
*
|
||||
* Without SMP, first packet creates new conntrack entry and second
|
||||
* packet is resolved as established reply packet.
|
||||
*
|
||||
* With parallel processing, both packets could be picked up as
|
||||
* new and both get their own ct entry allocated.
|
||||
*
|
||||
* If ignored_conntrack and colliding ct are not subject to NAT then
|
||||
* pretend the tuple is available and let later clash resolution
|
||||
* handle this at insertion time.
|
||||
*
|
||||
* Without it, the 'reply' packet has its source port rewritten
|
||||
* by nat engine.
|
||||
*/
|
||||
if (READ_ONCE(ignored_ct->status) & uses_nat)
|
||||
return true;
|
||||
|
||||
net = nf_ct_net(ignored_ct);
|
||||
zone = nf_ct_zone(ignored_ct);
|
||||
|
||||
thash = nf_conntrack_find_get(net, zone, tuple);
|
||||
if (unlikely(!thash)) /* clashing entry went away */
|
||||
return false;
|
||||
|
||||
ct = nf_ct_tuplehash_to_ctrack(thash);
|
||||
|
||||
/* NB: IP_CT_DIR_ORIGINAL should be impossible because
|
||||
* nf_nat_used_tuple() handles origin collisions.
|
||||
*
|
||||
* Handle remote chance other CPU confirmed its ct right after.
|
||||
*/
|
||||
if (thash->tuple.dst.dir != IP_CT_DIR_REPLY)
|
||||
goto out;
|
||||
|
||||
/* clashing connection subject to NAT? Retry with new tuple. */
|
||||
if (READ_ONCE(ct->status) & uses_nat)
|
||||
goto out;
|
||||
|
||||
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
|
||||
&ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple) &&
|
||||
nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
|
||||
&ignored_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) {
|
||||
taken = false;
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
nf_ct_put(ct);
|
||||
return taken;
|
||||
}
|
||||
|
||||
static bool nf_nat_may_kill(struct nf_conn *ct, unsigned long flags)
|
||||
{
|
||||
static const unsigned long flags_refuse = IPS_FIXED_TIMEOUT |
|
||||
@ -611,7 +727,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
|
||||
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
|
||||
/* try the original tuple first */
|
||||
if (nf_in_range(orig_tuple, range)) {
|
||||
if (!nf_nat_used_tuple(orig_tuple, ct)) {
|
||||
if (!nf_nat_used_tuple_new(orig_tuple, ct)) {
|
||||
*tuple = *orig_tuple;
|
||||
return;
|
||||
}
|
||||
@ -1208,7 +1324,6 @@ static const struct nf_nat_hook nat_hook = {
|
||||
#ifdef CONFIG_XFRM
|
||||
.decode_session = __nf_nat_decode_session,
|
||||
#endif
|
||||
.manip_pkt = nf_nat_manip_pkt,
|
||||
.remove_nat_bysrc = nf_nat_cleanup_conntrack,
|
||||
};
|
||||
|
||||
|
@ -1849,7 +1849,7 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
|
||||
if (!hook_list)
|
||||
hook_list = &basechain->hook_list;
|
||||
|
||||
list_for_each_entry(hook, hook_list, list) {
|
||||
list_for_each_entry_rcu(hook, hook_list, list) {
|
||||
if (!first)
|
||||
first = hook;
|
||||
|
||||
@ -6684,7 +6684,7 @@ static int nft_setelem_catchall_insert(const struct net *net,
|
||||
}
|
||||
}
|
||||
|
||||
catchall = kmalloc(sizeof(*catchall), GFP_KERNEL);
|
||||
catchall = kmalloc(sizeof(*catchall), GFP_KERNEL_ACCOUNT);
|
||||
if (!catchall)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -9207,7 +9207,7 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
|
||||
flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
|
||||
FLOW_BLOCK_UNBIND);
|
||||
list_del_rcu(&hook->list);
|
||||
kfree(hook);
|
||||
kfree_rcu(hook, rcu);
|
||||
}
|
||||
kfree(flowtable->name);
|
||||
module_put(flowtable->data.type->owner);
|
||||
|
@ -535,7 +535,7 @@ nft_match_large_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
|
||||
struct xt_match *m = expr->ops->data;
|
||||
int ret;
|
||||
|
||||
priv->info = kmalloc(XT_ALIGN(m->matchsize), GFP_KERNEL);
|
||||
priv->info = kmalloc(XT_ALIGN(m->matchsize), GFP_KERNEL_ACCOUNT);
|
||||
if (!priv->info)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -808,7 +808,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
|
||||
goto err;
|
||||
}
|
||||
|
||||
ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
|
||||
ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL_ACCOUNT);
|
||||
if (!ops) {
|
||||
err = -ENOMEM;
|
||||
goto err;
|
||||
@ -898,7 +898,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
|
||||
goto err;
|
||||
}
|
||||
|
||||
ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
|
||||
ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL_ACCOUNT);
|
||||
if (!ops) {
|
||||
err = -ENOMEM;
|
||||
goto err;
|
||||
|
@ -163,7 +163,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
|
||||
|
||||
nla = tb[NFTA_LOG_PREFIX];
|
||||
if (nla != NULL) {
|
||||
priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL);
|
||||
priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL_ACCOUNT);
|
||||
if (priv->prefix == NULL)
|
||||
return -ENOMEM;
|
||||
nla_strscpy(priv->prefix, nla, nla_len(nla) + 1);
|
||||
|
@ -952,7 +952,7 @@ static int nft_secmark_obj_init(const struct nft_ctx *ctx,
|
||||
if (tb[NFTA_SECMARK_CTX] == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
priv->ctx = nla_strdup(tb[NFTA_SECMARK_CTX], GFP_KERNEL);
|
||||
priv->ctx = nla_strdup(tb[NFTA_SECMARK_CTX], GFP_KERNEL_ACCOUNT);
|
||||
if (!priv->ctx)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -66,7 +66,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
|
||||
if (priv->offset + priv->modulus - 1 < priv->offset)
|
||||
return -EOVERFLOW;
|
||||
|
||||
priv->counter = kmalloc(sizeof(*priv->counter), GFP_KERNEL);
|
||||
priv->counter = kmalloc(sizeof(*priv->counter), GFP_KERNEL_ACCOUNT);
|
||||
if (!priv->counter)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -663,7 +663,7 @@ static int pipapo_realloc_mt(struct nft_pipapo_field *f,
|
||||
check_add_overflow(rules, extra, &rules_alloc))
|
||||
return -EOVERFLOW;
|
||||
|
||||
new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL);
|
||||
new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL_ACCOUNT);
|
||||
if (!new_mt)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -936,7 +936,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
|
||||
return;
|
||||
}
|
||||
|
||||
new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL);
|
||||
new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL_ACCOUNT);
|
||||
if (!new_lt)
|
||||
return;
|
||||
|
||||
@ -1212,7 +1212,7 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
|
||||
scratch = kzalloc_node(struct_size(scratch, map,
|
||||
bsize_max * 2) +
|
||||
NFT_PIPAPO_ALIGN_HEADROOM,
|
||||
GFP_KERNEL, cpu_to_node(i));
|
||||
GFP_KERNEL_ACCOUNT, cpu_to_node(i));
|
||||
if (!scratch) {
|
||||
/* On failure, there's no need to undo previous
|
||||
* allocations: this means that some scratch maps have
|
||||
@ -1427,7 +1427,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
|
||||
struct nft_pipapo_match *new;
|
||||
int i;
|
||||
|
||||
new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL);
|
||||
new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL_ACCOUNT);
|
||||
if (!new)
|
||||
return NULL;
|
||||
|
||||
@ -1457,7 +1457,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
|
||||
new_lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS(src->bb) *
|
||||
src->bsize * sizeof(*dst->lt) +
|
||||
NFT_PIPAPO_ALIGN_HEADROOM,
|
||||
GFP_KERNEL);
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!new_lt)
|
||||
goto out_lt;
|
||||
|
||||
@ -1470,7 +1470,8 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
|
||||
|
||||
if (src->rules > 0) {
|
||||
dst->mt = kvmalloc_array(src->rules_alloc,
|
||||
sizeof(*src->mt), GFP_KERNEL);
|
||||
sizeof(*src->mt),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!dst->mt)
|
||||
goto out_mt;
|
||||
|
||||
|
@ -509,13 +509,14 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
|
||||
return err;
|
||||
}
|
||||
|
||||
md = metadata_dst_alloc(priv->opts.len, METADATA_IP_TUNNEL, GFP_KERNEL);
|
||||
md = metadata_dst_alloc(priv->opts.len, METADATA_IP_TUNNEL,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!md)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(&md->u.tun_info, &info, sizeof(info));
|
||||
#ifdef CONFIG_DST_CACHE
|
||||
err = dst_cache_init(&md->u.tun_info.dst_cache, GFP_KERNEL);
|
||||
err = dst_cache_init(&md->u.tun_info.dst_cache, GFP_KERNEL_ACCOUNT);
|
||||
if (err < 0) {
|
||||
metadata_dst_free(md);
|
||||
return err;
|
||||
|
@ -884,7 +884,7 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
|
||||
|
||||
mutex_lock(&qrtr_node_lock);
|
||||
list_for_each_entry(node, &qrtr_all_nodes, item) {
|
||||
skbn = skb_clone(skb, GFP_KERNEL);
|
||||
skbn = pskb_copy(skb, GFP_KERNEL);
|
||||
if (!skbn)
|
||||
break;
|
||||
skb_set_owner_w(skbn, skb->sk);
|
||||
|
@ -13,6 +13,7 @@ TEST_PROGS += conntrack_ipip_mtu.sh
|
||||
TEST_PROGS += conntrack_tcp_unreplied.sh
|
||||
TEST_PROGS += conntrack_sctp_collision.sh
|
||||
TEST_PROGS += conntrack_vrf.sh
|
||||
TEST_PROGS += conntrack_reverse_clash.sh
|
||||
TEST_PROGS += ipvs.sh
|
||||
TEST_PROGS += nf_conntrack_packetdrill.sh
|
||||
TEST_PROGS += nf_nat_edemux.sh
|
||||
@ -26,6 +27,8 @@ TEST_PROGS += nft_nat.sh
|
||||
TEST_PROGS += nft_nat_zones.sh
|
||||
TEST_PROGS += nft_queue.sh
|
||||
TEST_PROGS += nft_synproxy.sh
|
||||
TEST_PROGS += nft_tproxy_tcp.sh
|
||||
TEST_PROGS += nft_tproxy_udp.sh
|
||||
TEST_PROGS += nft_zones_many.sh
|
||||
TEST_PROGS += rpath.sh
|
||||
TEST_PROGS += xt_string.sh
|
||||
@ -36,6 +39,7 @@ TEST_GEN_PROGS = conntrack_dump_flush
|
||||
|
||||
TEST_GEN_FILES = audit_logread
|
||||
TEST_GEN_FILES += connect_close nf_queue
|
||||
TEST_GEN_FILES += conntrack_reverse_clash
|
||||
TEST_GEN_FILES += sctp_collision
|
||||
|
||||
include ../../lib.mk
|
||||
|
@ -81,6 +81,7 @@ CONFIG_NFT_QUEUE=m
|
||||
CONFIG_NFT_QUOTA=m
|
||||
CONFIG_NFT_REDIR=m
|
||||
CONFIG_NFT_SYNPROXY=m
|
||||
CONFIG_NFT_TPROXY=m
|
||||
CONFIG_VETH=m
|
||||
CONFIG_VLAN_8021Q=m
|
||||
CONFIG_XFRM_USER=m
|
||||
|
125
tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
Normal file
125
tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
Normal file
@ -0,0 +1,125 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Needs something like:
|
||||
*
|
||||
* iptables -t nat -A POSTROUTING -o nomatch -j MASQUERADE
|
||||
*
|
||||
* so NAT engine attaches a NAT null-binding to each connection.
|
||||
*
|
||||
* With unmodified kernels, child or parent will exit with
|
||||
* "Port number changed" error, even though no port translation
|
||||
* was requested.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#define LEN 512
|
||||
#define PORT 56789
|
||||
#define TEST_TIME 5
|
||||
|
||||
static void die(const char *e)
|
||||
{
|
||||
perror(e);
|
||||
exit(111);
|
||||
}
|
||||
|
||||
static void die_port(uint16_t got, uint16_t want)
|
||||
{
|
||||
fprintf(stderr, "Port number changed, wanted %d got %d\n", want, ntohs(got));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static int udp_socket(void)
|
||||
{
|
||||
static const struct timeval tv = {
|
||||
.tv_sec = 1,
|
||||
};
|
||||
int fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
|
||||
|
||||
if (fd < 0)
|
||||
die("socket");
|
||||
|
||||
setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
|
||||
return fd;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct sockaddr_in sa1 = {
|
||||
.sin_family = AF_INET,
|
||||
};
|
||||
struct sockaddr_in sa2 = {
|
||||
.sin_family = AF_INET,
|
||||
};
|
||||
int s1, s2, status;
|
||||
time_t end, now;
|
||||
socklen_t plen;
|
||||
char buf[LEN];
|
||||
bool child;
|
||||
|
||||
sa1.sin_port = htons(PORT);
|
||||
sa2.sin_port = htons(PORT + 1);
|
||||
|
||||
s1 = udp_socket();
|
||||
s2 = udp_socket();
|
||||
|
||||
inet_pton(AF_INET, "127.0.0.11", &sa1.sin_addr);
|
||||
inet_pton(AF_INET, "127.0.0.12", &sa2.sin_addr);
|
||||
|
||||
if (bind(s1, (struct sockaddr *)&sa1, sizeof(sa1)) < 0)
|
||||
die("bind 1");
|
||||
if (bind(s2, (struct sockaddr *)&sa2, sizeof(sa2)) < 0)
|
||||
die("bind 2");
|
||||
|
||||
child = fork() == 0;
|
||||
|
||||
now = time(NULL);
|
||||
end = now + TEST_TIME;
|
||||
|
||||
while (now < end) {
|
||||
struct sockaddr_in peer;
|
||||
socklen_t plen = sizeof(peer);
|
||||
|
||||
now = time(NULL);
|
||||
|
||||
if (child) {
|
||||
if (sendto(s1, buf, LEN, 0, (struct sockaddr *)&sa2, sizeof(sa2)) != LEN)
|
||||
continue;
|
||||
|
||||
if (recvfrom(s2, buf, LEN, 0, (struct sockaddr *)&peer, &plen) < 0)
|
||||
die("child recvfrom");
|
||||
|
||||
if (peer.sin_port != htons(PORT))
|
||||
die_port(peer.sin_port, PORT);
|
||||
} else {
|
||||
if (sendto(s2, buf, LEN, 0, (struct sockaddr *)&sa1, sizeof(sa1)) != LEN)
|
||||
continue;
|
||||
|
||||
if (recvfrom(s1, buf, LEN, 0, (struct sockaddr *)&peer, &plen) < 0)
|
||||
die("parent recvfrom");
|
||||
|
||||
if (peer.sin_port != htons((PORT + 1)))
|
||||
die_port(peer.sin_port, PORT + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (child)
|
||||
return 0;
|
||||
|
||||
wait(&status);
|
||||
|
||||
if (WIFEXITED(status))
|
||||
return WEXITSTATUS(status);
|
||||
|
||||
return 1;
|
||||
}
|
51
tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
Executable file
51
tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
Executable file
@ -0,0 +1,51 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
source lib.sh
|
||||
|
||||
cleanup()
|
||||
{
|
||||
cleanup_all_ns
|
||||
}
|
||||
|
||||
checktool "nft --version" "run test without nft"
|
||||
checktool "conntrack --version" "run test without conntrack"
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
setup_ns ns0
|
||||
|
||||
# make loopback connections get nat null bindings assigned
|
||||
ip netns exec "$ns0" nft -f - <<EOF
|
||||
table ip nat {
|
||||
chain POSTROUTING {
|
||||
type nat hook postrouting priority srcnat; policy accept;
|
||||
oifname "nomatch" counter packets 0 bytes 0 masquerade
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
do_flush()
|
||||
{
|
||||
local end
|
||||
local now
|
||||
|
||||
now=$(date +%s)
|
||||
end=$((now + 5))
|
||||
|
||||
while [ $now -lt $end ];do
|
||||
ip netns exec "$ns0" conntrack -F 2>/dev/null
|
||||
now=$(date +%s)
|
||||
done
|
||||
}
|
||||
|
||||
do_flush &
|
||||
|
||||
if ip netns exec "$ns0" ./conntrack_reverse_clash; then
|
||||
echo "PASS: No SNAT performed for null bindings"
|
||||
else
|
||||
echo "ERROR: SNAT performed without any matching snat rule"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
@ -97,7 +97,7 @@ cleanup() {
|
||||
}
|
||||
|
||||
server_listen() {
|
||||
ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" &
|
||||
ip netns exec "$ns2" timeout 5 socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" &
|
||||
server_pid=$!
|
||||
sleep 0.2
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ modprobe -q sctp
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
setup_ns ns1 ns2 nsrouter
|
||||
setup_ns ns1 ns2 ns3 nsrouter
|
||||
|
||||
TMPFILE0=$(mktemp)
|
||||
TMPFILE1=$(mktemp)
|
||||
@ -48,6 +48,7 @@ if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" >
|
||||
exit $ksft_skip
|
||||
fi
|
||||
ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
|
||||
ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3"
|
||||
|
||||
ip -net "$nsrouter" link set veth0 up
|
||||
ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
|
||||
@ -57,8 +58,13 @@ ip -net "$nsrouter" link set veth1 up
|
||||
ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
|
||||
ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
|
||||
|
||||
ip -net "$nsrouter" link set veth2 up
|
||||
ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2
|
||||
ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad
|
||||
|
||||
ip -net "$ns1" link set eth0 up
|
||||
ip -net "$ns2" link set eth0 up
|
||||
ip -net "$ns3" link set eth0 up
|
||||
|
||||
ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
|
||||
ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
|
||||
@ -70,6 +76,11 @@ ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
|
||||
ip -net "$ns2" route add default via 10.0.2.1
|
||||
ip -net "$ns2" route add default via dead:2::1
|
||||
|
||||
ip -net "$ns3" addr add 10.0.3.99/24 dev eth0
|
||||
ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad
|
||||
ip -net "$ns3" route add default via 10.0.3.1
|
||||
ip -net "$ns3" route add default via dead:3::1
|
||||
|
||||
load_ruleset() {
|
||||
local name=$1
|
||||
local prio=$2
|
||||
@ -473,6 +484,83 @@ EOF
|
||||
check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output"
|
||||
}
|
||||
|
||||
udp_listener_ready()
|
||||
{
|
||||
ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345
|
||||
}
|
||||
|
||||
output_files_written()
|
||||
{
|
||||
test -s "$1" && test -s "$2"
|
||||
}
|
||||
|
||||
test_udp_ct_race()
|
||||
{
|
||||
ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
|
||||
flush ruleset
|
||||
table inet udpq {
|
||||
chain prerouting {
|
||||
type nat hook prerouting priority dstnat - 5; policy accept;
|
||||
ip daddr 10.6.6.6 udp dport 12345 counter dnat to numgen inc mod 2 map { 0 : 10.0.2.99, 1 : 10.0.3.99 }
|
||||
}
|
||||
chain postrouting {
|
||||
type filter hook postrouting priority srcnat - 5; policy accept;
|
||||
udp dport 12345 counter queue num 12
|
||||
}
|
||||
}
|
||||
EOF
|
||||
:> "$TMPFILE1"
|
||||
:> "$TMPFILE2"
|
||||
|
||||
timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12345,fork OPEN:"$TMPFILE1",trunc &
|
||||
local rpid1=$!
|
||||
|
||||
timeout 10 ip netns exec "$ns3" socat UDP-LISTEN:12345,fork OPEN:"$TMPFILE2",trunc &
|
||||
local rpid2=$!
|
||||
|
||||
ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 &
|
||||
local nfqpid=$!
|
||||
|
||||
busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2"
|
||||
busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3"
|
||||
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12
|
||||
|
||||
# Send two packets, one should end up in ns1, other in ns2.
|
||||
# This is because nfqueue will delay packet for long enough so that
|
||||
# second packet will not find existing conntrack entry.
|
||||
echo "Packet 1" | ip netns exec "$ns1" socat STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221
|
||||
echo "Packet 2" | ip netns exec "$ns1" socat STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221
|
||||
|
||||
busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2"
|
||||
|
||||
kill "$nfqpid"
|
||||
|
||||
if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then
|
||||
echo "FAIL: Expected One udp conntrack entry"
|
||||
ip netns exec "$nsrouter" conntrack -L -p udp --dport 12345
|
||||
ret=1
|
||||
fi
|
||||
|
||||
if ! ip netns exec "$nsrouter" nft delete table inet udpq; then
|
||||
echo "FAIL: Could not delete udpq table"
|
||||
ret=1
|
||||
return
|
||||
fi
|
||||
|
||||
NUMLINES1=$(wc -l < "$TMPFILE1")
|
||||
NUMLINES2=$(wc -l < "$TMPFILE2")
|
||||
|
||||
if [ "$NUMLINES1" -ne 1 ] || [ "$NUMLINES2" -ne 1 ]; then
|
||||
ret=1
|
||||
echo "FAIL: uneven udp packet distribution: $NUMLINES1 $NUMLINES2"
|
||||
echo -n "$TMPFILE1: ";cat "$TMPFILE1"
|
||||
echo -n "$TMPFILE2: ";cat "$TMPFILE2"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "PASS: both udp receivers got one packet each"
|
||||
}
|
||||
|
||||
test_queue_removal()
|
||||
{
|
||||
read tainted_then < /proc/sys/kernel/tainted
|
||||
@ -512,6 +600,7 @@ EOF
|
||||
ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null
|
||||
|
||||
load_ruleset "filter" 0
|
||||
|
||||
@ -549,6 +638,7 @@ test_tcp_localhost_connectclose
|
||||
test_tcp_localhost_requeue
|
||||
test_sctp_forward
|
||||
test_sctp_output
|
||||
test_udp_ct_race
|
||||
|
||||
# should be last, adds vrf device in ns1 and changes routes
|
||||
test_icmp_vrf
|
||||
|
358
tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh
Executable file
358
tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh
Executable file
@ -0,0 +1,358 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# This tests tproxy on the following scenario:
|
||||
#
|
||||
# +------------+
|
||||
# +-------+ | nsrouter | +-------+
|
||||
# |ns1 |.99 .1| |.1 .99| ns2|
|
||||
# | eth0|---------------|veth0 veth1|------------------|eth0 |
|
||||
# | | 10.0.1.0/24 | | 10.0.2.0/24 | |
|
||||
# +-------+ dead:1::/64 | veth2 | dead:2::/64 +-------+
|
||||
# +------------+
|
||||
# |.1
|
||||
# |
|
||||
# |
|
||||
# | +-------+
|
||||
# | .99| ns3|
|
||||
# +------------------------|eth0 |
|
||||
# 10.0.3.0/24 | |
|
||||
# dead:3::/64 +-------+
|
||||
#
|
||||
# The tproxy implementation acts as an echo server so the client
|
||||
# must receive the same message it sent if it has been proxied.
|
||||
# If is not proxied the servers return PONG_NS# with the number
|
||||
# of the namespace the server is running.
|
||||
#
|
||||
# shellcheck disable=SC2162,SC2317
|
||||
|
||||
source lib.sh
|
||||
ret=0
|
||||
timeout=5
|
||||
|
||||
cleanup()
|
||||
{
|
||||
ip netns pids "$ns1" | xargs kill 2>/dev/null
|
||||
ip netns pids "$ns2" | xargs kill 2>/dev/null
|
||||
ip netns pids "$ns3" | xargs kill 2>/dev/null
|
||||
ip netns pids "$nsrouter" | xargs kill 2>/dev/null
|
||||
|
||||
cleanup_all_ns
|
||||
}
|
||||
|
||||
checktool "nft --version" "test without nft tool"
|
||||
checktool "socat -h" "run test without socat"
|
||||
|
||||
trap cleanup EXIT
|
||||
setup_ns ns1 ns2 ns3 nsrouter
|
||||
|
||||
if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
|
||||
echo "SKIP: No virtual ethernet pair device support in kernel"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
|
||||
ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3"
|
||||
|
||||
ip -net "$nsrouter" link set veth0 up
|
||||
ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
|
||||
ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
|
||||
|
||||
ip -net "$nsrouter" link set veth1 up
|
||||
ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
|
||||
ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
|
||||
|
||||
ip -net "$nsrouter" link set veth2 up
|
||||
ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2
|
||||
ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad
|
||||
|
||||
ip -net "$ns1" link set eth0 up
|
||||
ip -net "$ns2" link set eth0 up
|
||||
ip -net "$ns3" link set eth0 up
|
||||
|
||||
ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
|
||||
ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
|
||||
ip -net "$ns1" route add default via 10.0.1.1
|
||||
ip -net "$ns1" route add default via dead:1::1
|
||||
|
||||
ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
|
||||
ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
|
||||
ip -net "$ns2" route add default via 10.0.2.1
|
||||
ip -net "$ns2" route add default via dead:2::1
|
||||
|
||||
ip -net "$ns3" addr add 10.0.3.99/24 dev eth0
|
||||
ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad
|
||||
ip -net "$ns3" route add default via 10.0.3.1
|
||||
ip -net "$ns3" route add default via dead:3::1
|
||||
|
||||
ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null
|
||||
|
||||
test_ping() {
|
||||
if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then
|
||||
return 2
|
||||
fi
|
||||
|
||||
if ! ip netns exec "$ns1" ping -c 1 -q 10.0.3.99 > /dev/null; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! ip netns exec "$ns1" ping -c 1 -q dead:3::99 > /dev/null; then
|
||||
return 2
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
test_ping_router() {
|
||||
if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then
|
||||
return 3
|
||||
fi
|
||||
|
||||
if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then
|
||||
return 4
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
listener_ready()
|
||||
{
|
||||
local ns="$1"
|
||||
local port="$2"
|
||||
local proto="$3"
|
||||
ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port"
|
||||
}
|
||||
|
||||
test_tproxy()
|
||||
{
|
||||
local traffic_origin="$1"
|
||||
local ip_proto="$2"
|
||||
local expect_ns1_ns2="$3"
|
||||
local expect_ns1_ns3="$4"
|
||||
local expect_nsrouter_ns2="$5"
|
||||
local expect_nsrouter_ns3="$6"
|
||||
|
||||
# derived variables
|
||||
local testname="test_${ip_proto}_tcp_${traffic_origin}"
|
||||
local socat_ipproto
|
||||
local ns1_ip
|
||||
local ns2_ip
|
||||
local ns3_ip
|
||||
local ns2_target
|
||||
local ns3_target
|
||||
local nftables_subject
|
||||
local ip_command
|
||||
|
||||
# socat 1.8.0 has a bug that requires to specify the IP family to bind (fixed in 1.8.0.1)
|
||||
case $ip_proto in
|
||||
"ip")
|
||||
socat_ipproto="-4"
|
||||
ns1_ip=10.0.1.99
|
||||
ns2_ip=10.0.2.99
|
||||
ns3_ip=10.0.3.99
|
||||
ns2_target="tcp:$ns2_ip:8080"
|
||||
ns3_target="tcp:$ns3_ip:8080"
|
||||
nftables_subject="ip daddr $ns2_ip tcp dport 8080"
|
||||
ip_command="ip"
|
||||
;;
|
||||
"ip6")
|
||||
socat_ipproto="-6"
|
||||
ns1_ip=dead:1::99
|
||||
ns2_ip=dead:2::99
|
||||
ns3_ip=dead:3::99
|
||||
ns2_target="tcp:[$ns2_ip]:8080"
|
||||
ns3_target="tcp:[$ns3_ip]:8080"
|
||||
nftables_subject="ip6 daddr $ns2_ip tcp dport 8080"
|
||||
ip_command="ip -6"
|
||||
;;
|
||||
*)
|
||||
echo "FAIL: unsupported protocol"
|
||||
exit 255
|
||||
;;
|
||||
esac
|
||||
|
||||
case $traffic_origin in
|
||||
# to capture the local originated traffic we need to mark the outgoing
|
||||
# traffic so the policy based routing rule redirects it and can be processed
|
||||
# in the prerouting chain.
|
||||
"local")
|
||||
nftables_rules="
|
||||
flush ruleset
|
||||
table inet filter {
|
||||
chain divert {
|
||||
type filter hook prerouting priority 0; policy accept;
|
||||
$nftables_subject tproxy $ip_proto to :12345 meta mark set 1 accept
|
||||
}
|
||||
chain output {
|
||||
type route hook output priority 0; policy accept;
|
||||
$nftables_subject meta mark set 1 accept
|
||||
}
|
||||
}"
|
||||
;;
|
||||
"forward")
|
||||
nftables_rules="
|
||||
flush ruleset
|
||||
table inet filter {
|
||||
chain divert {
|
||||
type filter hook prerouting priority 0; policy accept;
|
||||
$nftables_subject tproxy $ip_proto to :12345 meta mark set 1 accept
|
||||
}
|
||||
}"
|
||||
;;
|
||||
*)
|
||||
echo "FAIL: unsupported parameter for traffic origin"
|
||||
exit 255
|
||||
;;
|
||||
esac
|
||||
|
||||
# shellcheck disable=SC2046 # Intended splitting of ip_command
|
||||
ip netns exec "$nsrouter" $ip_command rule add fwmark 1 table 100
|
||||
ip netns exec "$nsrouter" $ip_command route add local "${ns2_ip}" dev lo table 100
|
||||
echo "$nftables_rules" | ip netns exec "$nsrouter" nft -f /dev/stdin
|
||||
|
||||
timeout "$timeout" ip netns exec "$nsrouter" socat "$socat_ipproto" tcp-listen:12345,fork,ip-transparent SYSTEM:"cat" 2>/dev/null &
|
||||
local tproxy_pid=$!
|
||||
|
||||
timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" tcp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null &
|
||||
local server2_pid=$!
|
||||
|
||||
timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" tcp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null &
|
||||
local server3_pid=$!
|
||||
|
||||
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-t"
|
||||
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" 8080 "-t"
|
||||
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns3" 8080 "-t"
|
||||
|
||||
local result
|
||||
# request from ns1 to ns2 (forwarded traffic)
|
||||
result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO "$ns2_target")
|
||||
if [ "$result" == "$expect_ns1_ns2" ] ;then
|
||||
echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2"
|
||||
else
|
||||
echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2, not \"${expect_ns1_ns2}\" as intended"
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# request from ns1 to ns3(forwarded traffic)
|
||||
result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO "$ns3_target")
|
||||
if [ "$result" = "$expect_ns1_ns3" ] ;then
|
||||
echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3"
|
||||
else
|
||||
echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3, not \"$expect_ns1_ns3\" as intended"
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# request from nsrouter to ns2 (localy originated traffic)
|
||||
result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO "$ns2_target")
|
||||
if [ "$result" == "$expect_nsrouter_ns2" ] ;then
|
||||
echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2"
|
||||
else
|
||||
echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2, not \"$expect_nsrouter_ns2\" as intended"
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# request from nsrouter to ns3 (localy originated traffic)
|
||||
result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO "$ns3_target")
|
||||
if [ "$result" = "$expect_nsrouter_ns3" ] ;then
|
||||
echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3"
|
||||
else
|
||||
echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3, not \"$expect_nsrouter_ns3\" as intended"
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# cleanup
|
||||
kill "$tproxy_pid" "$server2_pid" "$server3_pid" 2>/dev/null
|
||||
# shellcheck disable=SC2046 # Intended splitting of ip_command
|
||||
ip netns exec "$nsrouter" $ip_command rule del fwmark 1 table 100
|
||||
ip netns exec "$nsrouter" $ip_command route flush table 100
|
||||
}
|
||||
|
||||
|
||||
test_ipv4_tcp_forward()
|
||||
{
|
||||
local traffic_origin="forward"
|
||||
local ip_proto="ip"
|
||||
local expect_ns1_ns2="I_M_PROXIED"
|
||||
local expect_ns1_ns3="PONG_NS3"
|
||||
local expect_nsrouter_ns2="PONG_NS2"
|
||||
local expect_nsrouter_ns3="PONG_NS3"
|
||||
|
||||
test_tproxy "$traffic_origin" \
|
||||
"$ip_proto" \
|
||||
"$expect_ns1_ns2" \
|
||||
"$expect_ns1_ns3" \
|
||||
"$expect_nsrouter_ns2" \
|
||||
"$expect_nsrouter_ns3"
|
||||
}
|
||||
|
||||
test_ipv4_tcp_local()
|
||||
{
|
||||
local traffic_origin="local"
|
||||
local ip_proto="ip"
|
||||
local expect_ns1_ns2="I_M_PROXIED"
|
||||
local expect_ns1_ns3="PONG_NS3"
|
||||
local expect_nsrouter_ns2="I_M_PROXIED"
|
||||
local expect_nsrouter_ns3="PONG_NS3"
|
||||
|
||||
test_tproxy "$traffic_origin" \
|
||||
"$ip_proto" \
|
||||
"$expect_ns1_ns2" \
|
||||
"$expect_ns1_ns3" \
|
||||
"$expect_nsrouter_ns2" \
|
||||
"$expect_nsrouter_ns3"
|
||||
}
|
||||
|
||||
test_ipv6_tcp_forward()
|
||||
{
|
||||
local traffic_origin="forward"
|
||||
local ip_proto="ip6"
|
||||
local expect_ns1_ns2="I_M_PROXIED"
|
||||
local expect_ns1_ns3="PONG_NS3"
|
||||
local expect_nsrouter_ns2="PONG_NS2"
|
||||
local expect_nsrouter_ns3="PONG_NS3"
|
||||
|
||||
test_tproxy "$traffic_origin" \
|
||||
"$ip_proto" \
|
||||
"$expect_ns1_ns2" \
|
||||
"$expect_ns1_ns3" \
|
||||
"$expect_nsrouter_ns2" \
|
||||
"$expect_nsrouter_ns3"
|
||||
}
|
||||
|
||||
test_ipv6_tcp_local()
|
||||
{
|
||||
local traffic_origin="local"
|
||||
local ip_proto="ip6"
|
||||
local expect_ns1_ns2="I_M_PROXIED"
|
||||
local expect_ns1_ns3="PONG_NS3"
|
||||
local expect_nsrouter_ns2="I_M_PROXIED"
|
||||
local expect_nsrouter_ns3="PONG_NS3"
|
||||
|
||||
test_tproxy "$traffic_origin" \
|
||||
"$ip_proto" \
|
||||
"$expect_ns1_ns2" \
|
||||
"$expect_ns1_ns3" \
|
||||
"$expect_nsrouter_ns2" \
|
||||
"$expect_nsrouter_ns3"
|
||||
}
|
||||
|
||||
if test_ping; then
|
||||
# queue bypass works (rules were skipped, no listener)
|
||||
echo "PASS: ${ns1} can reach ${ns2}"
|
||||
else
|
||||
echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
|
||||
exit $ret
|
||||
fi
|
||||
|
||||
test_ipv4_tcp_forward
|
||||
test_ipv4_tcp_local
|
||||
test_ipv6_tcp_forward
|
||||
test_ipv6_tcp_local
|
||||
|
||||
exit $ret
|
262
tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh
Executable file
262
tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh
Executable file
@ -0,0 +1,262 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# This tests tproxy on the following scenario:
|
||||
#
|
||||
# +------------+
|
||||
# +-------+ | nsrouter | +-------+
|
||||
# |ns1 |.99 .1| |.1 .99| ns2|
|
||||
# | eth0|---------------|veth0 veth1|------------------|eth0 |
|
||||
# | | 10.0.1.0/24 | | 10.0.2.0/24 | |
|
||||
# +-------+ dead:1::/64 | veth2 | dead:2::/64 +-------+
|
||||
# +------------+
|
||||
# |.1
|
||||
# |
|
||||
# |
|
||||
# | +-------+
|
||||
# | .99| ns3|
|
||||
# +------------------------|eth0 |
|
||||
# 10.0.3.0/24 | |
|
||||
# dead:3::/64 +-------+
|
||||
#
|
||||
# The tproxy implementation acts as an echo server so the client
|
||||
# must receive the same message it sent if it has been proxied.
|
||||
# If is not proxied the servers return PONG_NS# with the number
|
||||
# of the namespace the server is running.
|
||||
# shellcheck disable=SC2162,SC2317
|
||||
|
||||
source lib.sh
|
||||
ret=0
|
||||
# UDP is slow
|
||||
timeout=15
|
||||
|
||||
cleanup()
|
||||
{
|
||||
ip netns pids "$ns1" | xargs kill 2>/dev/null
|
||||
ip netns pids "$ns2" | xargs kill 2>/dev/null
|
||||
ip netns pids "$ns3" | xargs kill 2>/dev/null
|
||||
ip netns pids "$nsrouter" | xargs kill 2>/dev/null
|
||||
|
||||
cleanup_all_ns
|
||||
}
|
||||
|
||||
checktool "nft --version" "test without nft tool"
|
||||
checktool "socat -h" "run test without socat"
|
||||
|
||||
trap cleanup EXIT
|
||||
setup_ns ns1 ns2 ns3 nsrouter
|
||||
|
||||
if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
|
||||
echo "SKIP: No virtual ethernet pair device support in kernel"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
|
||||
ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3"
|
||||
|
||||
ip -net "$nsrouter" link set veth0 up
|
||||
ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
|
||||
ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
|
||||
|
||||
ip -net "$nsrouter" link set veth1 up
|
||||
ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
|
||||
ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
|
||||
|
||||
ip -net "$nsrouter" link set veth2 up
|
||||
ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2
|
||||
ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad
|
||||
|
||||
ip -net "$ns1" link set eth0 up
|
||||
ip -net "$ns2" link set eth0 up
|
||||
ip -net "$ns3" link set eth0 up
|
||||
|
||||
ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
|
||||
ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
|
||||
ip -net "$ns1" route add default via 10.0.1.1
|
||||
ip -net "$ns1" route add default via dead:1::1
|
||||
|
||||
ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
|
||||
ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
|
||||
ip -net "$ns2" route add default via 10.0.2.1
|
||||
ip -net "$ns2" route add default via dead:2::1
|
||||
|
||||
ip -net "$ns3" addr add 10.0.3.99/24 dev eth0
|
||||
ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad
|
||||
ip -net "$ns3" route add default via 10.0.3.1
|
||||
ip -net "$ns3" route add default via dead:3::1
|
||||
|
||||
ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null
|
||||
|
||||
test_ping() {
|
||||
if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then
|
||||
return 2
|
||||
fi
|
||||
|
||||
if ! ip netns exec "$ns1" ping -c 1 -q 10.0.3.99 > /dev/null; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! ip netns exec "$ns1" ping -c 1 -q dead:3::99 > /dev/null; then
|
||||
return 2
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
test_ping_router() {
|
||||
if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then
|
||||
return 3
|
||||
fi
|
||||
|
||||
if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then
|
||||
return 4
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
listener_ready()
|
||||
{
|
||||
local ns="$1"
|
||||
local port="$2"
|
||||
local proto="$3"
|
||||
ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port"
|
||||
}
|
||||
|
||||
test_tproxy_udp_forward()
|
||||
{
|
||||
local ip_proto="$1"
|
||||
|
||||
local expect_ns1_ns2="I_M_PROXIED"
|
||||
local expect_ns1_ns3="PONG_NS3"
|
||||
local expect_nsrouter_ns2="PONG_NS2"
|
||||
local expect_nsrouter_ns3="PONG_NS3"
|
||||
|
||||
# derived variables
|
||||
local testname="test_${ip_proto}_udp_forward"
|
||||
local socat_ipproto
|
||||
local ns1_ip
|
||||
local ns2_ip
|
||||
local ns3_ip
|
||||
local ns1_ip_port
|
||||
local ns2_ip_port
|
||||
local ns3_ip_port
|
||||
local ip_command
|
||||
|
||||
# socat 1.8.0 has a bug that requires to specify the IP family to bind (fixed in 1.8.0.1)
|
||||
case $ip_proto in
|
||||
"ip")
|
||||
socat_ipproto="-4"
|
||||
ns1_ip=10.0.1.99
|
||||
ns2_ip=10.0.2.99
|
||||
ns3_ip=10.0.3.99
|
||||
ns1_ip_port="$ns1_ip:18888"
|
||||
ns2_ip_port="$ns2_ip:8080"
|
||||
ns3_ip_port="$ns3_ip:8080"
|
||||
ip_command="ip"
|
||||
;;
|
||||
"ip6")
|
||||
socat_ipproto="-6"
|
||||
ns1_ip=dead:1::99
|
||||
ns2_ip=dead:2::99
|
||||
ns3_ip=dead:3::99
|
||||
ns1_ip_port="[$ns1_ip]:18888"
|
||||
ns2_ip_port="[$ns2_ip]:8080"
|
||||
ns3_ip_port="[$ns3_ip]:8080"
|
||||
ip_command="ip -6"
|
||||
;;
|
||||
*)
|
||||
echo "FAIL: unsupported protocol"
|
||||
exit 255
|
||||
;;
|
||||
esac
|
||||
|
||||
# shellcheck disable=SC2046 # Intended splitting of ip_command
|
||||
ip netns exec "$nsrouter" $ip_command rule add fwmark 1 table 100
|
||||
ip netns exec "$nsrouter" $ip_command route add local "$ns2_ip" dev lo table 100
|
||||
ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
|
||||
flush ruleset
|
||||
table inet filter {
|
||||
chain divert {
|
||||
type filter hook prerouting priority 0; policy accept;
|
||||
$ip_proto daddr $ns2_ip udp dport 8080 tproxy $ip_proto to :12345 meta mark set 1 accept
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
timeout "$timeout" ip netns exec "$nsrouter" socat -u "$socat_ipproto" udp-listen:12345,fork,ip-transparent,reuseport udp:"$ns1_ip_port",ip-transparent,reuseport,bind="$ns2_ip_port" 2>/dev/null &
|
||||
local tproxy_pid=$!
|
||||
|
||||
timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null &
|
||||
local server2_pid=$!
|
||||
|
||||
timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null &
|
||||
local server3_pid=$!
|
||||
|
||||
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-u"
|
||||
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" 8080 "-u"
|
||||
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns3" 8080 "-u"
|
||||
|
||||
local result
|
||||
# request from ns1 to ns2 (forwarded traffic)
|
||||
result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",sourceport=18888)
|
||||
if [ "$result" == "$expect_ns1_ns2" ] ;then
|
||||
echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2"
|
||||
else
|
||||
echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2, not \"${expect_ns1_ns2}\" as intended"
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# request from ns1 to ns3 (forwarded traffic)
|
||||
result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port")
|
||||
if [ "$result" = "$expect_ns1_ns3" ] ;then
|
||||
echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3"
|
||||
else
|
||||
echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3, not \"$expect_ns1_ns3\" as intended"
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# request from nsrouter to ns2 (localy originated traffic)
|
||||
result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port")
|
||||
if [ "$result" == "$expect_nsrouter_ns2" ] ;then
|
||||
echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2"
|
||||
else
|
||||
echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2, not \"$expect_nsrouter_ns2\" as intended"
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# request from nsrouter to ns3 (localy originated traffic)
|
||||
result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port")
|
||||
if [ "$result" = "$expect_nsrouter_ns3" ] ;then
|
||||
echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3"
|
||||
else
|
||||
echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3, not \"$expect_nsrouter_ns3\" as intended"
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# cleanup
|
||||
kill "$tproxy_pid" "$server2_pid" "$server3_pid" 2>/dev/null
|
||||
# shellcheck disable=SC2046 # Intended splitting of ip_command
|
||||
ip netns exec "$nsrouter" $ip_command rule del fwmark 1 table 100
|
||||
ip netns exec "$nsrouter" $ip_command route flush table 100
|
||||
}
|
||||
|
||||
|
||||
if test_ping; then
|
||||
# queue bypass works (rules were skipped, no listener)
|
||||
echo "PASS: ${ns1} can reach ${ns2}"
|
||||
else
|
||||
echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
|
||||
exit $ret
|
||||
fi
|
||||
|
||||
test_tproxy_udp_forward "ip"
|
||||
test_tproxy_udp_forward "ip6"
|
||||
|
||||
exit $ret
|
@ -30,12 +30,17 @@ if [ -z "$(which packetdrill)" ]; then
|
||||
exit "$KSFT_SKIP"
|
||||
fi
|
||||
|
||||
declare -a optargs
|
||||
if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
|
||||
optargs+=('--tolerance_usecs=14000')
|
||||
fi
|
||||
|
||||
ktap_print_header
|
||||
ktap_set_plan 2
|
||||
|
||||
unshare -n packetdrill ${ipv4_args[@]} $(basename $script) > /dev/null \
|
||||
unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $(basename $script) > /dev/null \
|
||||
&& ktap_test_pass "ipv4" || ktap_test_fail "ipv4"
|
||||
unshare -n packetdrill ${ipv6_args[@]} $(basename $script) > /dev/null \
|
||||
unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $(basename $script) > /dev/null \
|
||||
&& ktap_test_pass "ipv6" || ktap_test_fail "ipv6"
|
||||
|
||||
ktap_finished
|
||||
|
Loading…
Reference in New Issue
Block a user