From aec745e2c520bf2d046684a284dac11c25d8e717 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:33 -0500 Subject: [PATCH 1/6] net-tc: remove unused tc_verd fields Remove the last reference to tc_verd's munge and redirect ttl bits. These fields are no longer used. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index cb4bcdc58543..c769f71972f5 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -17,10 +17,6 @@ /* verdict bit breakdown * -bit 0: when set -> this packet has been munged already - -bit 1: when set -> It is ok to munge this packet - bit 2,3,4,5: Reclassify counter - sort of reverse TTL - if exceeded assume loop @@ -31,8 +27,6 @@ bit 6,7: Where this packet was last seen bit 8: when set --> Request not to classify on ingress. -bits 9,10,11: redirect counter - redirect TTL. Loop avoidance - * * */ @@ -56,7 +50,6 @@ bits 9,10,11: redirect counter - redirect TTL. Loop avoidance #define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT)) #define MAX_REC_LOOP 4 -#define MAX_RED_LOOP 4 #endif /* Action attributes */ From d6264071ce7d100a2b7c1f295167796ab5178caf Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:34 -0500 Subject: [PATCH 2/6] net-tc: make MAX_RECLASSIFY_LOOP local This field is no longer kept in tc_verd. Remove it from the global definition of that struct. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 5 ----- net/sched/sch_api.c | 3 ++- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index c769f71972f5..bba23dbb3ab6 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -17,9 +17,6 @@ /* verdict bit breakdown * -bit 2,3,4,5: Reclassify counter - sort of reverse TTL - if exceeded -assume loop - bit 6,7: Where this packet was last seen 0: Above the transmit example at the socket level 1: on the Ingress @@ -48,8 +45,6 @@ bit 8: when set --> Request not to classify on ingress. #define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT) #define V_TC_AT(x) _TC_MAKEVALUE(x,S_TC_AT) #define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT)) - -#define MAX_REC_LOOP 4 #endif /* Action attributes */ diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d7b93429f0cc..ef53ede11590 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1861,6 +1861,7 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, { __be16 protocol = tc_skb_protocol(skb); #ifdef CONFIG_NET_CLS_ACT + const int max_reclassify_loop = 4; const struct tcf_proto *old_tp = tp; int limit = 0; @@ -1885,7 +1886,7 @@ reclassify: return TC_ACT_UNSPEC; /* signal: continue lookup */ #ifdef CONFIG_NET_CLS_ACT reset: - if (unlikely(limit++ >= MAX_REC_LOOP)) { + if (unlikely(limit++ >= max_reclassify_loop)) { net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n", tp->q->ops->id, tp->prio & 0xffff, ntohs(tp->protocol)); From e7246e122aaa99ebbb8ad7da80f35a20577bd8af Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:35 -0500 Subject: [PATCH 3/6] net-tc: extract skip classify bit from tc_verd Packets sent by the IFB device skip subsequent tc classification. A single bit governs this state. Move it out of tc_verd in anticipation of removing that __u16 completely. The new bitfield tc_skip_classify temporarily uses one bit of a hole, until tc_verd is removed completely in a follow-up patch. Remove the bit hole comment. It could be 2, 3, 4 or 5 bits long. With that many options, little value in documenting it. Introduce a helper function to deduplicate the logic in the two sites that check this bit. The field tc_skip_classify is set only in IFB on skbs cloned in act_mirred, so original packet sources do not have to clear the bit when reusing packets (notably, pktgen and octeon). Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/ifb.c | 2 +- include/linux/skbuff.h | 5 ++++- include/net/sch_generic.h | 11 +++++++++++ include/uapi/linux/pkt_cls.h | 6 ------ net/core/dev.c | 10 +++------- net/sched/act_api.c | 11 ++++------- 6 files changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 082534e187fc..442c4c4a9606 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -81,7 +81,7 @@ static void ifb_ri_tasklet(unsigned long _txp) u32 from = G_TC_FROM(skb->tc_verd); skb->tc_verd = 0; - skb->tc_verd = SET_TC_NCLS(skb->tc_verd); + skb->tc_skip_classify = 1; u64_stats_update_begin(&txp->tsync); txp->tx_packets++; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b53c0cfd417e..570f60ec6cb4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -589,6 +589,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs + * @tc_skip_classify: do not classify packet. set by IFB device * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -749,7 +750,9 @@ struct sk_buff { #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; #endif - /* 2, 4 or 5 bit hole */ +#ifdef CONFIG_NET_CLS_ACT + __u8 tc_skip_classify:1; +#endif #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 498f81b229a4..857356f2d74b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -418,6 +418,17 @@ static inline bool skb_at_tc_ingress(const struct sk_buff *skb) #endif } +static inline bool skb_skip_tc_classify(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_CLS_ACT + if (skb->tc_skip_classify) { + skb->tc_skip_classify = 0; + return true; + } +#endif + return false; +} + /* Reset all TX qdiscs greater then index of a device. */ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index bba23dbb3ab6..1eed5d7509bc 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -22,8 +22,6 @@ bit 6,7: Where this packet was last seen 1: on the Ingress 2: on the Egress -bit 8: when set --> Request not to classify on ingress. - * * */ @@ -36,10 +34,6 @@ bit 8: when set --> Request not to classify on ingress. #define AT_INGRESS 0x1 #define AT_EGRESS 0x2 -#define TC_NCLS _TC_MAKEMASK1(8) -#define SET_TC_NCLS(v) ( TC_NCLS | (v & ~TC_NCLS)) -#define CLR_TC_NCLS(v) ( v & ~TC_NCLS) - #define S_TC_AT _TC_MAKE32(12) #define M_TC_AT _TC_MAKEMASK(2,S_TC_AT) #define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT) diff --git a/net/core/dev.c b/net/core/dev.c index 56818f7eab2b..e39e35d2e082 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4093,12 +4093,8 @@ another_round: goto out; } -#ifdef CONFIG_NET_CLS_ACT - if (skb->tc_verd & TC_NCLS) { - skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); - goto ncls; - } -#endif + if (skb_skip_tc_classify(skb)) + goto skip_classify; if (pfmemalloc) goto skip_taps; @@ -4128,8 +4124,8 @@ skip_taps: #endif #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = 0; -ncls: #endif +skip_classify: if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2095c83ce773..f04715a57300 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -426,11 +426,9 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, { int ret = -1, i; - if (skb->tc_verd & TC_NCLS) { - skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); - ret = TC_ACT_OK; - goto exec_done; - } + if (skb_skip_tc_classify(skb)) + return TC_ACT_OK; + for (i = 0; i < nr_actions; i++) { const struct tc_action *a = actions[i]; @@ -439,9 +437,8 @@ repeat: if (ret == TC_ACT_REPEAT) goto repeat; /* we need a ttl - JHS */ if (ret != TC_ACT_PIPE) - goto exec_done; + break; } -exec_done: return ret; } EXPORT_SYMBOL(tcf_action_exec); From a5135bcfba7345031df45e02cd150a45add47cf8 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:36 -0500 Subject: [PATCH 4/6] net-tc: convert tc_verd to integer bitfields Extract the remaining two fields from tc_verd and remove the __u16 completely. TC_AT and TC_FROM are converted to equivalent two-bit integer fields tc_at and tc_from. Where possible, use existing helper skb_at_tc_ingress when reading tc_at. Introduce helper skb_reset_tc to clear fields. Not documenting tc_from and tc_at, because they will be replaced with single bit fields in follow-on patches. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/ifb.c | 7 +++---- drivers/staging/octeon/ethernet-tx.c | 5 ++--- include/linux/skbuff.h | 6 ++---- include/net/sch_generic.h | 10 ++++++++- include/uapi/linux/pkt_cls.h | 31 ---------------------------- net/core/dev.c | 10 ++++----- net/core/pktgen.c | 4 +--- net/core/skbuff.c | 3 --- net/sched/act_ife.c | 7 +++---- net/sched/act_mirred.c | 9 ++++---- net/sched/sch_netem.c | 2 +- 11 files changed, 29 insertions(+), 65 deletions(-) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 442c4c4a9606..b73b6b6c066b 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -78,9 +78,9 @@ static void ifb_ri_tasklet(unsigned long _txp) } while ((skb = __skb_dequeue(&txp->tq)) != NULL) { - u32 from = G_TC_FROM(skb->tc_verd); + u32 from = skb->tc_from; - skb->tc_verd = 0; + skb_reset_tc(skb); skb->tc_skip_classify = 1; u64_stats_update_begin(&txp->tsync); @@ -239,7 +239,6 @@ static void ifb_setup(struct net_device *dev) static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) { struct ifb_dev_private *dp = netdev_priv(dev); - u32 from = G_TC_FROM(skb->tc_verd); struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb); u64_stats_update_begin(&txp->rsync); @@ -247,7 +246,7 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) txp->rx_bytes += skb->len; u64_stats_update_end(&txp->rsync); - if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) { + if (skb->tc_from == AT_STACK || !skb->skb_iif) { dev_kfree_skb(skb); dev->stats.rx_dropped++; return NETDEV_TX_OK; diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c index 6b4c20872323..0b8053205091 100644 --- a/drivers/staging/octeon/ethernet-tx.c +++ b/drivers/staging/octeon/ethernet-tx.c @@ -23,6 +23,7 @@ #endif /* CONFIG_XFRM */ #include +#include #include @@ -369,9 +370,7 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) #ifdef CONFIG_NET_SCHED skb->tc_index = 0; -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; -#endif /* CONFIG_NET_CLS_ACT */ + skb_reset_tc(skb); #endif /* CONFIG_NET_SCHED */ #endif /* REUSE_SKBUFFS_WITHOUT_FREE */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 570f60ec6cb4..f738d09947b2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -599,7 +599,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index - * @tc_verd: traffic control verdict * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @xmit_more: More SKBs are pending for this queue @@ -752,13 +751,12 @@ struct sk_buff { #endif #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; + __u8 tc_at:2; + __u8 tc_from:2; #endif #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ -#ifdef CONFIG_NET_CLS_ACT - __u16 tc_verd; /* traffic control verdict */ -#endif #endif union { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 857356f2d74b..f80dba516964 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -409,10 +409,18 @@ bool tcf_destroy(struct tcf_proto *tp, bool force); void tcf_destroy_chain(struct tcf_proto __rcu **fl); int skb_do_redirect(struct sk_buff *); +static inline void skb_reset_tc(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_CLS_ACT + skb->tc_at = 0; + skb->tc_from = 0; +#endif +} + static inline bool skb_at_tc_ingress(const struct sk_buff *skb) { #ifdef CONFIG_NET_CLS_ACT - return G_TC_AT(skb->tc_verd) & AT_INGRESS; + return skb->tc_at & AT_INGRESS; #else return false; #endif diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 1eed5d7509bc..cee753a7a40c 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -5,40 +5,9 @@ #include #ifdef __KERNEL__ -/* I think i could have done better macros ; for now this is stolen from - * some arch/mips code - jhs -*/ -#define _TC_MAKE32(x) ((x)) - -#define _TC_MAKEMASK1(n) (_TC_MAKE32(1) << _TC_MAKE32(n)) -#define _TC_MAKEMASK(v,n) (_TC_MAKE32((_TC_MAKE32(1)<<(v))-1) << _TC_MAKE32(n)) -#define _TC_MAKEVALUE(v,n) (_TC_MAKE32(v) << _TC_MAKE32(n)) -#define _TC_GETVALUE(v,n,m) ((_TC_MAKE32(v) & _TC_MAKE32(m)) >> _TC_MAKE32(n)) - -/* verdict bit breakdown - * -bit 6,7: Where this packet was last seen -0: Above the transmit example at the socket level -1: on the Ingress -2: on the Egress - - * - * */ - -#define S_TC_FROM _TC_MAKE32(6) -#define M_TC_FROM _TC_MAKEMASK(2,S_TC_FROM) -#define G_TC_FROM(x) _TC_GETVALUE(x,S_TC_FROM,M_TC_FROM) -#define V_TC_FROM(x) _TC_MAKEVALUE(x,S_TC_FROM) -#define SET_TC_FROM(v,n) ((V_TC_FROM(n)) | (v & ~M_TC_FROM)) #define AT_STACK 0x0 #define AT_INGRESS 0x1 #define AT_EGRESS 0x2 - -#define S_TC_AT _TC_MAKE32(12) -#define M_TC_AT _TC_MAKEMASK(2,S_TC_AT) -#define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT) -#define V_TC_AT(x) _TC_MAKEVALUE(x,S_TC_AT) -#define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT)) #endif /* Action attributes */ diff --git a/net/core/dev.c b/net/core/dev.c index e39e35d2e082..8b5d6d033473 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3153,7 +3153,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) if (!cl) return skb; - /* skb->tc_verd and qdisc_skb_cb(skb)->pkt_len were already set + /* skb->tc_at and qdisc_skb_cb(skb)->pkt_len were already set * earlier by the caller. */ qdisc_bstats_cpu_update(cl->q, skb); @@ -3320,7 +3320,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) qdisc_pkt_len_init(skb); #ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); + skb->tc_at = AT_EGRESS; # ifdef CONFIG_NET_EGRESS if (static_key_false(&egress_needed)) { skb = sch_handle_egress(skb, &rc, dev); @@ -3920,7 +3920,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, } qdisc_skb_cb(skb)->pkt_len = skb->len; - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); + skb->tc_at = AT_INGRESS; qdisc_bstats_cpu_update(cl->q, skb); switch (tc_classify(skb, cl, &cl_res, false)) { @@ -4122,9 +4122,7 @@ skip_taps: goto out; } #endif -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; -#endif + skb_reset_tc(skb); skip_classify: if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8e69ce472236..96947f5d41e4 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3439,9 +3439,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) /* skb was 'freed' by stack, so clean few * bits and reuse it */ -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; /* reset reclass/redir ttl */ -#endif + skb_reset_tc(skb); } while (--burst > 0); goto out; /* Skips xmit_mode M_START_XMIT */ } else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5a03730fbc1a..adec4bf807d8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -878,9 +878,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif #ifdef CONFIG_NET_SCHED CHECK_SKB_FIELD(tc_index); -#ifdef CONFIG_NET_CLS_ACT - CHECK_SKB_FIELD(tc_verd); -#endif #endif } diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 80b848d3f096..921fb20eaa7c 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -736,12 +736,11 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, u16 metalen = ife_get_sz(skb, ife); int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN; unsigned int skboff = skb->dev->hard_header_len; - u32 at = G_TC_AT(skb->tc_verd); int new_len = skb->len + hdrm; bool exceed_mtu = false; int err; - if (at & AT_EGRESS) { + if (!skb_at_tc_ingress(skb)) { if (new_len > skb->dev->mtu) exceed_mtu = true; } @@ -773,7 +772,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, return TC_ACT_SHOT; } - if (!(at & AT_EGRESS)) + if (skb_at_tc_ingress(skb)) skb_push(skb, skb->dev->hard_header_len); iethh = (struct ethhdr *)skb->data; @@ -816,7 +815,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, ether_addr_copy(oethh->h_dest, iethh->h_dest); oethh->h_proto = htons(ife->eth_type); - if (!(at & AT_EGRESS)) + if (skb_at_tc_ingress(skb)) skb_pull(skb, skb->dev->hard_header_len); spin_unlock(&ife->tcf_lock); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 2d9fa6e0a1b4..8543279bba49 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -170,7 +170,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, int retval, err = 0; int m_eaction; int mac_len; - u32 at; tcf_lastuse_update(&m->tcf_tm); bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); @@ -191,7 +190,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, goto out; } - at = G_TC_AT(skb->tc_verd); skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) goto out; @@ -200,8 +198,9 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, * and devices expect a mac header on xmit, then mac push/pull is * needed. */ - if (at != tcf_mirred_act_direction(m_eaction) && m_mac_header_xmit) { - if (at & AT_EGRESS) { + if (skb->tc_at != tcf_mirred_act_direction(m_eaction) && + m_mac_header_xmit) { + if (!skb_at_tc_ingress(skb)) { /* caught at egress, act ingress: pull mac */ mac_len = skb_network_header(skb) - skb_mac_header(skb); skb_pull_rcsum(skb2, mac_len); @@ -213,7 +212,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, /* mirror is always swallowed */ if (tcf_mirred_is_act_redirect(m_eaction)) - skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); + skb2->tc_from = skb2->tc_at; skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index bcfadfdea8e0..bb5c638b6852 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -626,7 +626,7 @@ deliver: * If it's at ingress let's pretend the delay is * from the network (tstamp will be updated). */ - if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) + if (skb->tc_from & AT_INGRESS) skb->tstamp = 0; #endif From 8dc07fdbf2054f157e8333f940a1ad728916c786 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:37 -0500 Subject: [PATCH 5/6] net-tc: convert tc_at to tc_at_ingress Field tc_at is used only within tc actions to distinguish ingress from egress processing. A single bit is sufficient for this purpose. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 ++- include/net/sch_generic.h | 3 +-- net/core/dev.c | 8 +++----- net/sched/act_mirred.c | 12 ++++++------ 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f738d09947b2..fab3f87e9bd1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -590,6 +590,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs * @tc_skip_classify: do not classify packet. set by IFB device + * @tc_at_ingress: used within tc_classify to distinguish in/egress * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -751,7 +752,7 @@ struct sk_buff { #endif #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; - __u8 tc_at:2; + __u8 tc_at_ingress:1; __u8 tc_from:2; #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f80dba516964..4bd6d5387209 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -412,7 +412,6 @@ int skb_do_redirect(struct sk_buff *); static inline void skb_reset_tc(struct sk_buff *skb) { #ifdef CONFIG_NET_CLS_ACT - skb->tc_at = 0; skb->tc_from = 0; #endif } @@ -420,7 +419,7 @@ static inline void skb_reset_tc(struct sk_buff *skb) static inline bool skb_at_tc_ingress(const struct sk_buff *skb) { #ifdef CONFIG_NET_CLS_ACT - return skb->tc_at & AT_INGRESS; + return skb->tc_at_ingress; #else return false; #endif diff --git a/net/core/dev.c b/net/core/dev.c index 8b5d6d033473..c143f1391117 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3153,9 +3153,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) if (!cl) return skb; - /* skb->tc_at and qdisc_skb_cb(skb)->pkt_len were already set - * earlier by the caller. - */ + /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ qdisc_bstats_cpu_update(cl->q, skb); switch (tc_classify(skb, cl, &cl_res, false)) { @@ -3320,7 +3318,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) qdisc_pkt_len_init(skb); #ifdef CONFIG_NET_CLS_ACT - skb->tc_at = AT_EGRESS; + skb->tc_at_ingress = 0; # ifdef CONFIG_NET_EGRESS if (static_key_false(&egress_needed)) { skb = sch_handle_egress(skb, &rc, dev); @@ -3920,7 +3918,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, } qdisc_skb_cb(skb)->pkt_len = skb->len; - skb->tc_at = AT_INGRESS; + skb->tc_at_ingress = 1; qdisc_bstats_cpu_update(cl->q, skb); switch (tc_classify(skb, cl, &cl_res, false)) { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 8543279bba49..e832c62fd705 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -39,15 +39,15 @@ static bool tcf_mirred_is_act_redirect(int action) return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR; } -static u32 tcf_mirred_act_direction(int action) +static bool tcf_mirred_act_wants_ingress(int action) { switch (action) { case TCA_EGRESS_REDIR: case TCA_EGRESS_MIRROR: - return AT_EGRESS; + return false; case TCA_INGRESS_REDIR: case TCA_INGRESS_MIRROR: - return AT_INGRESS; + return true; default: BUG(); } @@ -198,7 +198,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, * and devices expect a mac header on xmit, then mac push/pull is * needed. */ - if (skb->tc_at != tcf_mirred_act_direction(m_eaction) && + if (skb_at_tc_ingress(skb) != tcf_mirred_act_wants_ingress(m_eaction) && m_mac_header_xmit) { if (!skb_at_tc_ingress(skb)) { /* caught at egress, act ingress: pull mac */ @@ -212,11 +212,11 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, /* mirror is always swallowed */ if (tcf_mirred_is_act_redirect(m_eaction)) - skb2->tc_from = skb2->tc_at; + skb2->tc_from = skb_at_tc_ingress(skb) ? AT_INGRESS : AT_EGRESS; skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; - if (tcf_mirred_act_direction(m_eaction) & AT_EGRESS) + if (!tcf_mirred_act_wants_ingress(m_eaction)) err = dev_queue_xmit(skb2); else err = netif_receive_skb(skb2); From bc31c905e946b5c55df5d2938335e78ffb3157ca Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:38 -0500 Subject: [PATCH 6/6] net-tc: convert tc_from to tc_from_ingress and tc_redirected The tc_from field fulfills two roles. It encodes whether a packet was redirected by an act_mirred device and, if so, whether act_mirred was called on ingress or egress. Split it into separate fields. The information is needed by the special IFB loop, where packets are taken out of the normal path by act_mirred, forwarded to IFB, then reinjected at their original location (ingress or egress) by IFB. The IFB device cannot use skb->tc_at_ingress, because that may have been overwritten as the packet travels from act_mirred to ifb_xmit, when it passes through tc_classify on the IFB egress path. Cache this value in skb->tc_from_ingress. That field is valid only if a packet arriving at ifb_xmit came from act_mirred. Other packets can be crafted to reach ifb_xmit. These must be dropped. Set tc_redirected on redirection and drop all packets that do not have this bit set. Both fields are set only on cloned skbs in tc actions, so original packet sources do not have to clear the bit when reusing packets (notably, pktgen and octeon). Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/ifb.c | 13 +++++-------- include/linux/skbuff.h | 5 ++++- include/net/sch_generic.h | 2 +- include/uapi/linux/pkt_cls.h | 6 ------ net/sched/act_mirred.c | 6 ++++-- net/sched/sch_netem.c | 2 +- 6 files changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index b73b6b6c066b..312fce7302d3 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -78,9 +78,7 @@ static void ifb_ri_tasklet(unsigned long _txp) } while ((skb = __skb_dequeue(&txp->tq)) != NULL) { - u32 from = skb->tc_from; - - skb_reset_tc(skb); + skb->tc_redirected = 0; skb->tc_skip_classify = 1; u64_stats_update_begin(&txp->tsync); @@ -101,13 +99,12 @@ static void ifb_ri_tasklet(unsigned long _txp) rcu_read_unlock(); skb->skb_iif = txp->dev->ifindex; - if (from & AT_EGRESS) { + if (!skb->tc_from_ingress) { dev_queue_xmit(skb); - } else if (from & AT_INGRESS) { + } else { skb_pull(skb, skb->mac_len); netif_receive_skb(skb); - } else - BUG(); + } } if (__netif_tx_trylock(txq)) { @@ -246,7 +243,7 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) txp->rx_bytes += skb->len; u64_stats_update_end(&txp->rsync); - if (skb->tc_from == AT_STACK || !skb->skb_iif) { + if (!skb->tc_redirected || !skb->skb_iif) { dev_kfree_skb(skb); dev->stats.rx_dropped++; return NETDEV_TX_OK; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fab3f87e9bd1..3149a88de548 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -591,6 +591,8 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @ipvs_property: skbuff is owned by ipvs * @tc_skip_classify: do not classify packet. set by IFB device * @tc_at_ingress: used within tc_classify to distinguish in/egress + * @tc_redirected: packet was redirected by a tc action + * @tc_from_ingress: if tc_redirected, tc_at_ingress at time of redirect * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -753,7 +755,8 @@ struct sk_buff { #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; __u8 tc_at_ingress:1; - __u8 tc_from:2; + __u8 tc_redirected:1; + __u8 tc_from_ingress:1; #endif #ifdef CONFIG_NET_SCHED diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 4bd6d5387209..e2f426f6d62f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -412,7 +412,7 @@ int skb_do_redirect(struct sk_buff *); static inline void skb_reset_tc(struct sk_buff *skb) { #ifdef CONFIG_NET_CLS_ACT - skb->tc_from = 0; + skb->tc_redirected = 0; #endif } diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index cee753a7a40c..a081efbd61a2 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -4,12 +4,6 @@ #include #include -#ifdef __KERNEL__ -#define AT_STACK 0x0 -#define AT_INGRESS 0x1 -#define AT_EGRESS 0x2 -#endif - /* Action attributes */ enum { TCA_ACT_UNSPEC, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index e832c62fd705..84682f02b611 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -211,8 +211,10 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, } /* mirror is always swallowed */ - if (tcf_mirred_is_act_redirect(m_eaction)) - skb2->tc_from = skb_at_tc_ingress(skb) ? AT_INGRESS : AT_EGRESS; + if (tcf_mirred_is_act_redirect(m_eaction)) { + skb2->tc_redirected = 1; + skb2->tc_from_ingress = skb2->tc_at_ingress; + } skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index bb5c638b6852..c8bb62a1e744 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -626,7 +626,7 @@ deliver: * If it's at ingress let's pretend the delay is * from the network (tstamp will be updated). */ - if (skb->tc_from & AT_INGRESS) + if (skb->tc_redirected && skb->tc_from_ingress) skb->tstamp = 0; #endif