From ec624fe740b416fb68d536b37fb8eef46f90b5c2 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 14 Dec 2021 19:24:33 +0200 Subject: [PATCH 1/3] net/sched: Extend qdisc control block with tc control block BPF layer extends the qdisc control block via struct bpf_skb_data_end and because of that there is no more room to add variables to the qdisc layer control block without going over the skb->cb size. Extend the qdisc control block with a tc control block, and move all tc related variables to there as a pre-step for extending the tc control block with additional members. Signed-off-by: Paul Blakey Signed-off-by: Jakub Kicinski --- include/net/pkt_sched.h | 15 +++++++++++++++ include/net/sch_generic.h | 2 -- net/core/dev.c | 8 ++++---- net/sched/act_ct.c | 14 +++++++------- net/sched/cls_api.c | 6 ++++-- net/sched/cls_flower.c | 3 ++- net/sched/sch_frag.c | 3 ++- 7 files changed, 34 insertions(+), 17 deletions(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index bf79f3a890af..05f18e81f3e8 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -193,4 +193,19 @@ static inline void skb_txtime_consumed(struct sk_buff *skb) skb->tstamp = ktime_set(0, 0); } +struct tc_skb_cb { + struct qdisc_skb_cb qdisc_cb; + + u16 mru; + bool post_ct; +}; + +static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) +{ + struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb; + + BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); + return cb; +} + #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 22179b2fda72..c70e6d2b2fdd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -447,8 +447,6 @@ struct qdisc_skb_cb { }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; - u16 mru; - bool post_ct; }; typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); diff --git a/net/core/dev.c b/net/core/dev.c index 2a352e668d10..c4708e2487fb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3941,8 +3941,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) return skb; /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ - qdisc_skb_cb(skb)->mru = 0; - qdisc_skb_cb(skb)->post_ct = false; + tc_skb_cb(skb)->mru = 0; + tc_skb_cb(skb)->post_ct = false; mini_qdisc_bstats_cpu_update(miniq, skb); switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) { @@ -5103,8 +5103,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, } qdisc_skb_cb(skb)->pkt_len = skb->len; - qdisc_skb_cb(skb)->mru = 0; - qdisc_skb_cb(skb)->post_ct = false; + tc_skb_cb(skb)->mru = 0; + tc_skb_cb(skb)->post_ct = false; skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 90866ae45573..98e248b9c0b1 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -690,10 +690,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, u8 family, u16 zone, bool *defrag) { enum ip_conntrack_info ctinfo; - struct qdisc_skb_cb cb; struct nf_conn *ct; int err = 0; bool frag; + u16 mru; /* Previously seen (loopback)? Ignore. */ ct = nf_ct_get(skb, &ctinfo); @@ -708,7 +708,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, return err; skb_get(skb); - cb = *qdisc_skb_cb(skb); + mru = tc_skb_cb(skb)->mru; if (family == NFPROTO_IPV4) { enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; @@ -722,7 +722,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, if (!err) { *defrag = true; - cb.mru = IPCB(skb)->frag_max_size; + mru = IPCB(skb)->frag_max_size; } } else { /* NFPROTO_IPV6 */ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) @@ -735,7 +735,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, if (!err) { *defrag = true; - cb.mru = IP6CB(skb)->frag_max_size; + mru = IP6CB(skb)->frag_max_size; } #else err = -EOPNOTSUPP; @@ -744,7 +744,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, } if (err != -EINPROGRESS) - *qdisc_skb_cb(skb) = cb; + tc_skb_cb(skb)->mru = mru; skb_clear_hash(skb); skb->ignore_df = 1; return err; @@ -963,7 +963,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, tcf_action_update_bstats(&c->common, skb); if (clear) { - qdisc_skb_cb(skb)->post_ct = false; + tc_skb_cb(skb)->post_ct = false; ct = nf_ct_get(skb, &ctinfo); if (ct) { nf_conntrack_put(&ct->ct_general); @@ -1048,7 +1048,7 @@ do_nat: out_push: skb_push_rcsum(skb, nh_ofs); - qdisc_skb_cb(skb)->post_ct = true; + tc_skb_cb(skb)->post_ct = true; out_clear: if (defrag) qdisc_skb_cb(skb)->pkt_len = skb->len; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e54f0a42270c..ff8a9383bf1c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1617,12 +1617,14 @@ int tcf_classify(struct sk_buff *skb, /* If we missed on some chain */ if (ret == TC_ACT_UNSPEC && last_executed_chain) { + struct tc_skb_cb *cb = tc_skb_cb(skb); + ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) return TC_ACT_SHOT; ext->chain = last_executed_chain; - ext->mru = qdisc_skb_cb(skb)->mru; - ext->post_ct = qdisc_skb_cb(skb)->post_ct; + ext->mru = cb->mru; + ext->post_ct = cb->post_ct; } return ret; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index aab13ba11767..9782b93db1b3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -309,7 +310,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_fl_head *head = rcu_dereference_bh(tp->root); - bool post_ct = qdisc_skb_cb(skb)->post_ct; + bool post_ct = tc_skb_cb(skb)->post_ct; struct fl_flow_key skb_key; struct fl_flow_mask *mask; struct cls_fl_filter *f; diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c index 8c06381391d6..5ded4c8672a6 100644 --- a/net/sched/sch_frag.c +++ b/net/sched/sch_frag.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB #include #include +#include #include #include #include @@ -137,7 +138,7 @@ err: int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)) { - u16 mru = qdisc_skb_cb(skb)->mru; + u16 mru = tc_skb_cb(skb)->mru; int err; if (mru && skb->len > mru + skb->dev->hard_header_len) From 3849595866166b23bf6a0cb9ff87e06423167f67 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 14 Dec 2021 19:24:34 +0200 Subject: [PATCH 2/3] net/sched: flow_dissector: Fix matching on zone id for invalid conns If ct rejects a flow, it removes the conntrack info from the skb. act_ct sets the post_ct variable so the dissector will see this case as an +tracked +invalid state, but the zone id is lost with the conntrack info. To restore the zone id on such cases, set the last executed zone, via the tc control block, when passing ct, and read it back in the dissector if there is no ct info on the skb (invalid connection). Fixes: 7baf2429a1a9 ("net/sched: cls_flower add CT_FLAGS_INVALID flag support") Signed-off-by: Paul Blakey Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 +- include/net/pkt_sched.h | 1 + net/core/flow_dissector.c | 3 ++- net/sched/act_ct.c | 1 + net/sched/cls_flower.c | 3 ++- 5 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c8cb7e697d47..2ecf8cfd2223 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1380,7 +1380,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, u16 *ctinfo_map, size_t mapsize, - bool post_ct); + bool post_ct, u16 zone); void skb_flow_dissect_tunnel_info(const struct sk_buff *skb, struct flow_dissector *flow_dissector, diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 05f18e81f3e8..9e71691c491b 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -198,6 +198,7 @@ struct tc_skb_cb { u16 mru; bool post_ct; + u16 zone; /* Only valid if post_ct = true */ }; static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 3255f57f5131..1b094c481f1d 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -238,7 +238,7 @@ void skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, u16 *ctinfo_map, - size_t mapsize, bool post_ct) + size_t mapsize, bool post_ct, u16 zone) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) struct flow_dissector_key_ct *key; @@ -260,6 +260,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, if (!ct) { key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | TCA_FLOWER_KEY_CT_FLAGS_INVALID; + key->ct_zone = zone; return; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 98e248b9c0b1..ab3591408419 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1049,6 +1049,7 @@ out_push: skb_push_rcsum(skb, nh_ofs); tc_skb_cb(skb)->post_ct = true; + tc_skb_cb(skb)->zone = p->zone; out_clear: if (defrag) qdisc_skb_cb(skb)->pkt_len = skb->len; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 9782b93db1b3..ef54ed395874 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -311,6 +311,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, { struct cls_fl_head *head = rcu_dereference_bh(tp->root); bool post_ct = tc_skb_cb(skb)->post_ct; + u16 zone = tc_skb_cb(skb)->zone; struct fl_flow_key skb_key; struct fl_flow_mask *mask; struct cls_fl_filter *f; @@ -328,7 +329,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, skb_flow_dissect_ct(skb, &mask->dissector, &skb_key, fl_ct_info_to_flower_map, ARRAY_SIZE(fl_ct_info_to_flower_map), - post_ct); + post_ct, zone); skb_flow_dissect_hash(skb, &mask->dissector, &skb_key); skb_flow_dissect(skb, &mask->dissector, &skb_key, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); From 635d448a1cce4b4ebee52b351052c70434fa90ea Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 14 Dec 2021 19:24:35 +0200 Subject: [PATCH 3/3] net: openvswitch: Fix matching zone id for invalid conns arriving from tc Zone id is not restored if we passed ct and ct rejected the connection, as there is no ct info on the skb. Save the zone from tc skb cb to tc skb extension and pass it on to ovs, use that info to restore the zone id for invalid connections. Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do conntrack in act_ct") Signed-off-by: Paul Blakey Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + net/openvswitch/flow.c | 8 +++++++- net/sched/cls_api.c | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2ecf8cfd2223..4507d77d6941 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -286,6 +286,7 @@ struct nf_bridge_info { struct tc_skb_ext { __u32 chain; __u16 mru; + __u16 zone; bool post_ct; }; #endif diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9713035b89e3..6d262d9aa10e 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "conntrack.h" #include "datapath.h" @@ -860,6 +861,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, #endif bool post_ct = false; int res, err; + u16 zone = 0; /* Extract metadata from packet. */ if (tun_info) { @@ -898,6 +900,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->recirc_id = tc_ext ? tc_ext->chain : 0; OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0; post_ct = tc_ext ? tc_ext->post_ct : false; + zone = post_ct ? tc_ext->zone : 0; } else { key->recirc_id = 0; } @@ -906,8 +909,11 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, #endif err = key_extract(skb, key); - if (!err) + if (!err) { ovs_ct_fill_key(skb, key, post_ct); /* Must be after key_extract(). */ + if (post_ct && !skb_get_nfct(skb)) + key->ct_zone = zone; + } return err; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ff8a9383bf1c..35c74bdde848 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1625,6 +1625,7 @@ int tcf_classify(struct sk_buff *skb, ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; + ext->zone = cb->zone; } return ret;