From 79dce09ab02729a90cf6ce49c9aaaf17aa0d21db Mon Sep 17 00:00:00 2001 From: "longguang.yue" Date: Mon, 28 Sep 2020 10:49:38 +0800 Subject: [PATCH 1/7] ipvs: adjust the debug info in function set_tcp_state Outputting client,virtual,dst addresses info when tcp state changes, which makes the connection debug more clear Signed-off-by: longguang.yue Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_proto_tcp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index dc2e7da2742a..7da51390cea6 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -539,8 +539,8 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, if (new_state != cp->state) { struct ip_vs_dest *dest = cp->dest; - IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" - "%s:%d state: %s->%s conn->refcnt:%d\n", + IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] c:%s:%d v:%s:%d " + "d:%s:%d state: %s->%s conn->refcnt:%d\n", pd->pp->name, ((state_off == TCP_DIR_OUTPUT) ? "output " : "input "), @@ -548,10 +548,12 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, th->fin ? 'F' : '.', th->ack ? 'A' : '.', th->rst ? 'R' : '.', - IP_VS_DBG_ADDR(cp->daf, &cp->daddr), - ntohs(cp->dport), IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->daf, &cp->daddr), + ntohs(cp->dport), tcp_state_name(cp->state), tcp_state_name(new_state), refcount_read(&cp->refcnt)); From 4f25434bccc28cf8a07876ef5142a2869a674353 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Wed, 7 Oct 2020 12:32:52 -0700 Subject: [PATCH 2/7] netfilter: conntrack: connection timeout after re-register If the first packet conntrack sees after a re-register is an outgoing keepalive packet with no data (SEG.SEQ = SND.NXT-1), td_end is set to SND.NXT-1. When the peer correctly acknowledges SND.NXT, tcp_in_window fails check III (Upper bound for valid (s)ack: sack <= receiver.td_end) and returns false, which cascades into nf_conntrack_in setting skb->_nfct = 0 and in later conntrack iptables rules not matching. In cases where iptables are dropping packets that do not match conntrack rules this can result in idle tcp connections to time out. v2: adjust td_end when getting the reply rather than when sending out the keepalive packet. Fixes: f94e63801ab2 ("netfilter: conntrack: reset tcp maxwin on re-register") Signed-off-by: Francesco Ruggeri Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index e8c86ee4c1c4..c8fb2187ad4b 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -541,13 +541,20 @@ static bool tcp_in_window(const struct nf_conn *ct, swin = win << sender->td_scale; sender->td_maxwin = (swin == 0 ? 1 : swin); sender->td_maxend = end + sender->td_maxwin; - /* - * We haven't seen traffic in the other direction yet - * but we have to tweak window tracking to pass III - * and IV until that happens. - */ - if (receiver->td_maxwin == 0) + if (receiver->td_maxwin == 0) { + /* We haven't seen traffic in the other + * direction yet but we have to tweak window + * tracking to pass III and IV until that + * happens. + */ receiver->td_end = receiver->td_maxend = sack; + } else if (sack == receiver->td_end + 1) { + /* Likely a reply to a keepalive. + * Needed for III. + */ + receiver->td_end++; + } + } } else if (((state->state == TCP_CONNTRACK_SYN_SENT && dir == IP_CT_DIR_ORIGINAL) From 68f9f9c2c3b6a7259f6a92bc26cdc7bd22e7a982 Mon Sep 17 00:00:00 2001 From: Georg Kohmann Date: Tue, 13 Oct 2020 14:23:12 +0200 Subject: [PATCH 3/7] netfilter: Drop fragmented ndisc packets assembled in netfilter Fragmented ndisc packets assembled in netfilter not dropped as specified in RFC 6980, section 5. This behaviour breaks TAHI IPv6 Core Conformance Tests v6LC.2.1.22/23, V6LC.2.2.26/27 and V6LC.2.3.18. Setting IP6SKB_FRAGMENTED flag during reassembly. References: commit b800c3b966bc ("ipv6: drop fragmented ndisc packets by default (RFC 6980)") Signed-off-by: Georg Kohmann Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index fed9666a2f7d..054d287eb13d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -355,6 +355,7 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, ipv6_hdr(skb)->payload_len = htons(payload_len); ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ if (skb->ip_summed == CHECKSUM_COMPLETE) From 63137bc5882a1882c553d389fdeeeace86ee1741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20COCAULT?= Date: Wed, 14 Oct 2020 12:36:15 +0000 Subject: [PATCH 4/7] netfilter: ebtables: Fixes dropping of small packets in bridge nat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes an error causing small packets to get dropped. skb_ensure_writable expects the second parameter to be a length in the ethernet payload.=20 If we want to write the ethernet header (src, dst), we should pass 0. Otherwise, packets with small payloads (< ETH_ALEN) will get dropped. Fixes: c1a831167901 ("netfilter: bridge: convert skb_make_writable to skb_ensure_writable") Signed-off-by: Timothée COCAULT Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_dnat.c | 2 +- net/bridge/netfilter/ebt_redirect.c | 2 +- net/bridge/netfilter/ebt_snat.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index 12a4f4d93681..3fda71a8579d 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -21,7 +21,7 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; - if (skb_ensure_writable(skb, ETH_ALEN)) + if (skb_ensure_writable(skb, 0)) return EBT_DROP; ether_addr_copy(eth_hdr(skb)->h_dest, info->mac); diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 0cad62a4052b..307790562b49 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -21,7 +21,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_redirect_info *info = par->targinfo; - if (skb_ensure_writable(skb, ETH_ALEN)) + if (skb_ensure_writable(skb, 0)) return EBT_DROP; if (xt_hooknum(par) != NF_BR_BROUTING) diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index 27443bf229a3..7dfbcdfc30e5 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -22,7 +22,7 @@ ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; - if (skb_ensure_writable(skb, ETH_ALEN * 2)) + if (skb_ensure_writable(skb, 0)) return EBT_DROP; ether_addr_copy(eth_hdr(skb)->h_source, info->mac); From 64747d5ed19911a867af733f6679d2a859fb18ae Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Sun, 18 Oct 2020 16:30:19 +0100 Subject: [PATCH 5/7] docs: nf_flowtable: fix typo. "mailined" should be "mainlined." Signed-off-by: Jeremy Sowden Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_flowtable.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/nf_flowtable.rst b/Documentation/networking/nf_flowtable.rst index b6e1fa141aae..6cdf9a1724b6 100644 --- a/Documentation/networking/nf_flowtable.rst +++ b/Documentation/networking/nf_flowtable.rst @@ -109,7 +109,7 @@ More reading This documentation is based on the LWN.net articles [1]_\ [2]_. Rafal Milecki also made a very complete and comprehensive summary called "A state of network acceleration" that describes how things were before this infrastructure was -mailined [3]_ and it also makes a rough summary of this work [4]_. +mainlined [3]_ and it also makes a rough summary of this work [4]_. .. [1] https://lwn.net/Articles/738214/ .. [2] https://lwn.net/Articles/742164/ From 31cc578ae2de19c748af06d859019dced68e325d Mon Sep 17 00:00:00 2001 From: Saeed Mirzamohammadi Date: Tue, 20 Oct 2020 13:41:36 +0200 Subject: [PATCH 6/7] netfilter: nftables_offload: KASAN slab-out-of-bounds Read in nft_flow_rule_create This patch fixes the issue due to: BUG: KASAN: slab-out-of-bounds in nft_flow_rule_create+0x622/0x6a2 net/netfilter/nf_tables_offload.c:40 Read of size 8 at addr ffff888103910b58 by task syz-executor227/16244 The error happens when expr->ops is accessed early on before performing the boundary check and after nft_expr_next() moves the expr to go out-of-bounds. This patch checks the boundary condition before expr->ops that fixes the slab-out-of-bounds Read issue. Add nft_expr_more() and use it to fix this problem. Signed-off-by: Saeed Mirzamohammadi Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nf_tables_api.c | 6 +++--- net/netfilter/nf_tables_offload.c | 4 ++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3f7e56b1171e..55b4cadf290a 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -891,6 +891,12 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) return (struct nft_expr *)&rule->data[rule->dlen]; } +static inline bool nft_expr_more(const struct nft_rule *rule, + const struct nft_expr *expr) +{ + return expr != nft_expr_last(rule) && expr->ops; +} + static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) { return (void *)&rule->data[rule->dlen]; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9957e0ed8658..65cb8e3c13d9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -302,7 +302,7 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_expr *expr; expr = nft_expr_first(rule); - while (expr != nft_expr_last(rule) && expr->ops) { + while (nft_expr_more(rule, expr)) { if (expr->ops->activate) expr->ops->activate(ctx, expr); @@ -317,7 +317,7 @@ static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_expr *expr; expr = nft_expr_first(rule); - while (expr != nft_expr_last(rule) && expr->ops) { + while (nft_expr_more(rule, expr)) { if (expr->ops->deactivate) expr->ops->deactivate(ctx, expr, phase); @@ -3080,7 +3080,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, * is called on error from nf_tables_newrule(). */ expr = nft_expr_first(rule); - while (expr != nft_expr_last(rule) && expr->ops) { + while (nft_expr_more(rule, expr)) { next = nft_expr_next(expr); nf_tables_expr_destroy(ctx, expr); expr = next; diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 7c7e06624dc3..9f625724a20f 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -37,7 +37,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, struct nft_expr *expr; expr = nft_expr_first(rule); - while (expr->ops && expr != nft_expr_last(rule)) { + while (nft_expr_more(rule, expr)) { if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION) num_actions++; @@ -61,7 +61,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, ctx->net = net; ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC; - while (expr->ops && expr != nft_expr_last(rule)) { + while (nft_expr_more(rule, expr)) { if (!expr->ops->offload) { err = -EOPNOTSUPP; goto err_out; From c77761c8a59405cb7aa44188b30fffe13fbdd02d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 21 Oct 2020 12:55:52 +0200 Subject: [PATCH 7/7] netfilter: nf_fwd_netdev: clear timestamp in forwarding path Similar to 7980d2eabde8 ("ipvs: clear skb->tstamp in forwarding path"). fq qdisc requires tstamp to be cleared in forwarding path. Fixes: 8203e2d844d3 ("net: clear skb->tstamp in forwarding paths") Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC") Fixes: 80b14dee2bea ("net: Add a new socket option for a future transmit time.") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_dup_netdev.c | 1 + net/netfilter/nft_fwd_netdev.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index 2b01a151eaa8..a579e59ee5c5 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -19,6 +19,7 @@ static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev) skb_push(skb, skb->mac_len); skb->dev = dev; + skb->tstamp = 0; dev_queue_xmit(skb); } diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 3087e23297db..b77985986b24 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -138,6 +138,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, return; skb->dev = dev; + skb->tstamp = 0; neigh_xmit(neigh_table, dev, addr, skb); out: regs->verdict.code = verdict;