From 573e8fca255a27e3573b51f9b183d62641c47a3d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:33:47 -0700 Subject: [PATCH 1/6] net: skb_gro_checksum_* functions Add skb_gro_checksum_validate, skb_gro_checksum_validate_zero_check, and skb_gro_checksum_simple_validate, and __skb_gro_checksum_complete. These are the cognates of the normal checksum functions but are used in the gro_receive path and operate on GRO related fields in sk_buffs. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 76 +++++++++++++++++++++++++++++++++++++-- net/core/dev.c | 34 +++++++++++++++++- 2 files changed, 107 insertions(+), 3 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7e2b0b8b5cd7..eb73444e1bd0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1883,7 +1883,13 @@ struct napi_gro_cb { u16 proto; /* Used in udp_gro_receive */ - u16 udp_mark; + u8 udp_mark:1; + + /* GRO checksum is valid */ + u8 csum_valid:1; + + /* Number encapsulation layers crossed */ + u8 encapsulation; /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2154,11 +2160,77 @@ static inline void *skb_gro_network_header(struct sk_buff *skb) static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - if (skb->ip_summed == CHECKSUM_COMPLETE) + if (NAPI_GRO_CB(skb)->csum_valid) NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum, csum_partial(start, len, 0)); } +/* GRO checksum functions. These are logical equivalents of the normal + * checksum functions (in skbuff.h) except that they operate on the GRO + * offsets and fields in sk_buff. + */ + +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb); + +static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, + bool zero_okay, + __sum16 check) +{ + return (skb->ip_summed != CHECKSUM_PARTIAL && + (skb->ip_summed != CHECKSUM_UNNECESSARY || + (NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) && + (!zero_okay || check)); +} + +static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, + __wsum psum) +{ + if (NAPI_GRO_CB(skb)->csum_valid && + !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) + return 0; + + NAPI_GRO_CB(skb)->csum = psum; + + return __skb_gro_checksum_complete(skb); +} + +/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level + * checksum or an encapsulated one during GRO. This saves work + * if we fallback to normal path with the packet. + */ +static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (NAPI_GRO_CB(skb)->encapsulation) + skb->encapsulation = 1; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->encapsulation = 0; + } +} + +#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ + compute_pseudo) \ +({ \ + __sum16 __ret = 0; \ + if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ + __ret = __skb_gro_checksum_validate_complete(skb, \ + compute_pseudo(skb, proto)); \ + if (!__ret) \ + skb_gro_incr_csum_unnecessary(skb); \ + __ret; \ +}) + +#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) + +#define skb_gro_checksum_validate_zero_check(skb, proto, check, \ + compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) + +#define skb_gro_checksum_simple_validate(skb) \ + __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, diff --git a/net/core/dev.c b/net/core/dev.c index 1421dad4cb29..b6a718ec11c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3962,7 +3962,13 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff goto normal; gro_list_prepare(napi, skb); - NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ + + if (skb->ip_summed == CHECKSUM_COMPLETE) { + NAPI_GRO_CB(skb)->csum = skb->csum; + NAPI_GRO_CB(skb)->csum_valid = 1; + } else { + NAPI_GRO_CB(skb)->csum_valid = 0; + } rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -3975,6 +3981,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->udp_mark = 0; + NAPI_GRO_CB(skb)->encapsulation = 0; pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; @@ -4205,6 +4212,31 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_frags); +/* Compute the checksum from gro_offset and return the folded value + * after adding in any pseudo checksum. + */ +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb) +{ + __wsum wsum; + __sum16 sum; + + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0); + + /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */ + sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum)); + if (likely(!sum)) { + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(skb->dev); + } + + NAPI_GRO_CB(skb)->csum = wsum; + NAPI_GRO_CB(skb)->csum_valid = 1; + + return sum; +} +EXPORT_SYMBOL(__skb_gro_checksum_complete); + /* * net_rps_action_and_irq_enable sends any pending IPI's for rps. * Note: called with local irq disabled, but exits with local irq enabled. From 1933a7852ce6a81349855431b25122d7666bbfca Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:34:04 -0700 Subject: [PATCH 2/6] net: add gro_compute_pseudo functions Add inet_gro_compute_pseudo and ip6_gro_compute_pseudo. These are the logical equivalents of inet_compute_pseudo and ip6_compute_pseudo for GRO path. The IP header is taken from skb_gro_network_header. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ip.h | 8 ++++++++ include/net/ip6_checksum.h | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/include/net/ip.h b/include/net/ip.h index db4a771b9ef3..c8fd6112bd0b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -364,6 +364,14 @@ static inline void inet_set_txhash(struct sock *sk) sk->sk_txhash = flow_hash_from_keys(&keys); } +static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + + return csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), proto, 0); +} + /* * Map a multicast IP onto multicast MAC for type ethernet. */ diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index 55236cb71174..1a49b73f7f6e 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -48,6 +48,14 @@ static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto) skb->len, proto, 0)); } +static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) +{ + const struct ipv6hdr *iph = skb_gro_network_header(skb); + + return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, + skb_gro_len(skb), proto, 0)); +} + static __inline__ __sum16 tcp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, From 758f75d1ffa9ef482ae095f40087cf217e1f41b0 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:34:22 -0700 Subject: [PATCH 3/6] gre: call skb_gro_checksum_simple_validate Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/gre_offload.c | 41 ++++++----------------------------------- 1 file changed, 6 insertions(+), 35 deletions(-) diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 6556263c8fa5..d1bd16937d93 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -119,28 +119,6 @@ out: return segs; } -/* Compute the whole skb csum in s/w and store it, then verify GRO csum - * starting from gro_offset. - */ -static __sum16 gro_skb_checksum(struct sk_buff *skb) -{ - __sum16 sum; - - skb->csum = skb_checksum(skb, 0, skb->len, 0); - NAPI_GRO_CB(skb)->csum = csum_sub(skb->csum, - csum_partial(skb->data, skb_gro_offset(skb), 0)); - sum = csum_fold(NAPI_GRO_CB(skb)->csum); - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) { - if (unlikely(!sum) && !skb->csum_complete_sw) - netdev_rx_csum_fault(skb->dev); - } else { - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum_complete_sw = 1; - } - - return sum; -} - static struct sk_buff **gre_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -192,22 +170,15 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, if (unlikely(!greh)) goto out_unlock; } - if (greh->flags & GRE_CSUM) { /* Need to verify GRE csum first */ - __sum16 csum = 0; - if (skb->ip_summed == CHECKSUM_COMPLETE) - csum = csum_fold(NAPI_GRO_CB(skb)->csum); - /* Don't trust csum error calculated/reported by h/w */ - if (skb->ip_summed == CHECKSUM_NONE || csum != 0) - csum = gro_skb_checksum(skb); - - /* GRE CSUM is the 1's complement of the 1's complement sum - * of the GRE hdr plus payload so it should add up to 0xffff - * (and 0 after csum_fold()) just like the IPv4 hdr csum. - */ - if (csum) + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (greh->flags & GRE_CSUM) { + if (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_simple_validate(skb)) goto out_unlock; + NAPI_GRO_CB(skb)->encapsulation++; } + flush = 0; for (p = *head; p; p = p->next) { From 149d0774a729497c6a876260d3884826088724b6 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:34:30 -0700 Subject: [PATCH 4/6] tcp: Call skb_gro_checksum_validate In tcp[64]_gro_receive call skb_gro_checksum_validate to validate TCP checksum in the gro context. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 27 +++------------------------ net/ipv6/tcpv6_offload.c | 26 +++----------------------- 2 files changed, 6 insertions(+), 47 deletions(-) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index bc1b83cb8309..72912533a191 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -288,35 +288,14 @@ static int tcp_v4_gso_send_check(struct sk_buff *skb) static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - /* Use the IP hdr immediately proceeding for this transport */ - const struct iphdr *iph = skb_gro_network_header(skb); - __wsum wsum; - /* Don't bother verifying checksum if we're going to flush anyway. */ - if (NAPI_GRO_CB(skb)->flush) - goto skip_csum; - - wsum = NAPI_GRO_CB(skb)->csum; - - switch (skb->ip_summed) { - case CHECKSUM_NONE: - wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), - 0); - - /* fall through */ - - case CHECKSUM_COMPLETE: - if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - wsum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - + if (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate(skb, IPPROTO_TCP, + inet_gro_compute_pseudo)) { NAPI_GRO_CB(skb)->flush = 1; return NULL; } -skip_csum: return tcp_gro_receive(head, skb); } diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 01b0ff9a0c2c..dbb3d9262bf6 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -35,34 +35,14 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - const struct ipv6hdr *iph = skb_gro_network_header(skb); - __wsum wsum; - /* Don't bother verifying checksum if we're going to flush anyway. */ - if (NAPI_GRO_CB(skb)->flush) - goto skip_csum; - - wsum = NAPI_GRO_CB(skb)->csum; - - switch (skb->ip_summed) { - case CHECKSUM_NONE: - wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), - wsum); - - /* fall through */ - - case CHECKSUM_COMPLETE: - if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, - wsum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - + if (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate(skb, IPPROTO_TCP, + ip6_gro_compute_pseudo)) { NAPI_GRO_CB(skb)->flush = 1; return NULL; } -skip_csum: return tcp_gro_receive(head, skb); } From 57c67ff4bd92af634f7c91c40eb02a96dd785dda Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:34:44 -0700 Subject: [PATCH 5/6] udp: additional GRO support Implement GRO for UDPv6. Add UDP checksum verification in gro_receive for both UDP4 and UDP6 calling skb_gro_checksum_validate_zero_check. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/udp.h | 18 +++++++++++++ net/ipv4/udp.c | 1 + net/ipv4/udp_offload.c | 61 ++++++++++++++++++++++++++++++------------ net/ipv6/udp_offload.c | 33 +++++++++++++++++++++++ 4 files changed, 96 insertions(+), 17 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 70f941368ace..16f4e80f0519 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -158,6 +158,24 @@ static inline __sum16 udp_v4_check(int len, __be32 saddr, void udp_set_csum(bool nocheck, struct sk_buff *skb, __be32 saddr, __be32 daddr, int len); +struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, + struct udphdr *uh); +int udp_gro_complete(struct sk_buff *skb, int nhoff); + +static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) +{ + struct udphdr *uh; + unsigned int hlen, off; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*uh); + uh = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) + uh = skb_gro_header_slow(skb, hlen, off); + + return uh; +} + /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline void udp_lib_hash(struct sock *sk) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32f9571e776b..3549c21fe5f7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -99,6 +99,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 59035bc3008d..8ed460e3753c 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -228,29 +228,22 @@ unlock: } EXPORT_SYMBOL(udp_del_offload); -static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, + struct udphdr *uh) { struct udp_offload_priv *uo_priv; struct sk_buff *p, **pp = NULL; - struct udphdr *uh, *uh2; - unsigned int hlen, off; + struct udphdr *uh2; + unsigned int off = skb_gro_offset(skb); int flush = 1; if (NAPI_GRO_CB(skb)->udp_mark || - (!skb->encapsulation && skb->ip_summed != CHECKSUM_COMPLETE)) + (!skb->encapsulation && !NAPI_GRO_CB(skb)->csum_valid)) goto out; /* mark that this skb passed once through the udp gro layer */ NAPI_GRO_CB(skb)->udp_mark = 1; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*uh); - uh = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - uh = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!uh)) - goto out; - } + NAPI_GRO_CB(skb)->encapsulation++; rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); @@ -269,7 +262,12 @@ unflush: continue; uh2 = (struct udphdr *)(p->data + off); - if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) { + + /* Match ports and either checksums are either both zero + * or nonzero. + */ + if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) || + (!uh->check ^ !uh2->check)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } @@ -286,7 +284,24 @@ out: return pp; } -static int udp_gro_complete(struct sk_buff *skb, int nhoff) +static struct sk_buff **udp4_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct udphdr *uh = udp_gro_udphdr(skb); + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (unlikely(!uh) || + (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, + inet_gro_compute_pseudo))) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + return udp_gro_receive(head, skb, uh); +} + +int udp_gro_complete(struct sk_buff *skb, int nhoff) { struct udp_offload_priv *uo_priv; __be16 newlen = htons(skb->len - nhoff); @@ -311,12 +326,24 @@ static int udp_gro_complete(struct sk_buff *skb, int nhoff) return err; } +int udp4_gro_complete(struct sk_buff *skb, int nhoff) +{ + const struct iphdr *iph = ip_hdr(skb); + struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); + + if (uh->check) + uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, + iph->daddr, 0); + + return udp_gro_complete(skb, nhoff); +} + static const struct net_offload udpv4_offload = { .callbacks = { .gso_send_check = udp4_ufo_send_check, .gso_segment = udp4_ufo_fragment, - .gro_receive = udp_gro_receive, - .gro_complete = udp_gro_complete, + .gro_receive = udp4_gro_receive, + .gro_complete = udp4_gro_complete, }, }; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 0ae3d98f83e0..b13e377e9c53 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -10,6 +10,7 @@ * UDPv6 GSO support */ #include +#include #include #include #include @@ -127,10 +128,42 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, out: return segs; } + +static struct sk_buff **udp6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct udphdr *uh = udp_gro_udphdr(skb); + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (unlikely(!uh) || + (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, + ip6_gro_compute_pseudo))) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + return udp_gro_receive(head, skb, uh); +} + +int udp6_gro_complete(struct sk_buff *skb, int nhoff) +{ + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); + + if (uh->check) + uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, + &ipv6h->daddr, 0); + + return udp_gro_complete(skb, nhoff); +} + static const struct net_offload udpv6_offload = { .callbacks = { .gso_send_check = udp6_ufo_send_check, .gso_segment = udp6_ufo_fragment, + .gro_receive = udp6_gro_receive, + .gro_complete = udp6_gro_complete, }, }; From 48a5fc773190bd5339869003fa65d38559bb8890 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:34:52 -0700 Subject: [PATCH 6/6] gre: When GRE csum is present count as encap layer wrt csum In GRE demux if the GRE checksum pop rcv encapsulation so that any encapsulated checksums are treated as tunnel checksums. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/gre_demux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 0485bf7f8f03..7c1a8ff974dd 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -125,6 +125,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, *csum_err = true; return -EINVAL; } + skb_pop_rcv_encapsulation(skb); options++; }