Merge branch 'inet_csums'

Tom Herbert says:

====================
net: Checksum offload changes

I am working on overhauling RX checksum offload. Goals of this effort
are:

- Specify what exactly it means when driver returns CHECKSUM_UNNECESSARY
- Preserve CHECKSUM_COMPLETE through encapsulation layers
- Don't do skb_checksum more than once per packet
- Unify GRO and non-GRO csum verification as much as possible
- Unify the checksum functions (checksum_init)
- Simply code

What is in this first patch set:

- Create a common "checksum_init" function which is called from
  TCPv{4,6} and UDPv{4,6}
- Add some for RFC6936, UDP/IPv6 zero checksums
- Add architecture support for csum_add and provide implementations
  for x86_64 and Sparc 32 and 64 bit (please test the latter)

Please review carefully and test if possible, mucking with basic
checksum functions is always a little precarious :-)
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2014-05-05 15:28:52 -04:00
commit c020b9d420
12 changed files with 183 additions and 83 deletions

View File

@ -238,4 +238,16 @@ static inline __sum16 ip_compute_csum(const void *buff, int len)
return csum_fold(csum_partial(buff, len, 0)); return csum_fold(csum_partial(buff, len, 0));
} }
#define HAVE_ARCH_CSUM_ADD
static inline __wsum csum_add(__wsum csum, __wsum addend)
{
__asm__ __volatile__(
"addcc %0, %1, %0\n"
"addx %0, %%g0, %0"
: "=r" (csum)
: "r" (addend), "0" (csum));
return csum;
}
#endif /* !(__SPARC_CHECKSUM_H) */ #endif /* !(__SPARC_CHECKSUM_H) */

View File

@ -164,4 +164,16 @@ static inline __sum16 ip_compute_csum(const void *buff, int len)
return csum_fold(csum_partial(buff, len, 0)); return csum_fold(csum_partial(buff, len, 0));
} }
#define HAVE_ARCH_CSUM_ADD
static inline __wsum csum_add(__wsum csum, __wsum addend)
{
__asm__ __volatile__(
"addcc %0, %1, %0\n"
"addx %0, %%g0, %0"
: "=r" (csum)
: "r" (addend), "0" (csum));
return csum;
}
#endif /* !(__SPARC64_CHECKSUM_H) */ #endif /* !(__SPARC64_CHECKSUM_H) */

View File

@ -184,8 +184,15 @@ static inline unsigned add32_with_carry(unsigned a, unsigned b)
asm("addl %2,%0\n\t" asm("addl %2,%0\n\t"
"adcl $0,%0" "adcl $0,%0"
: "=r" (a) : "=r" (a)
: "0" (a), "r" (b)); : "0" (a), "rm" (b));
return a; return a;
} }
#define HAVE_ARCH_CSUM_ADD
static inline __wsum csum_add(__wsum csum, __wsum addend)
{
return (__force __wsum)add32_with_carry((__force unsigned)csum,
(__force unsigned)addend);
}
#endif /* _ASM_X86_CHECKSUM_64_H */ #endif /* _ASM_X86_CHECKSUM_64_H */

View File

@ -2741,6 +2741,99 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
0 : __skb_checksum_complete(skb); 0 : __skb_checksum_complete(skb);
} }
/* Check if we need to perform checksum complete validation.
*
* Returns true if checksum complete is needed, false otherwise
* (either checksum is unnecessary or zero checksum is allowed).
*/
static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
bool zero_okay,
__sum16 check)
{
if (skb_csum_unnecessary(skb)) {
return false;
} else if (zero_okay && !check) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
return false;
}
return true;
}
/* For small packets <= CHECKSUM_BREAK peform checksum complete directly
* in checksum_init.
*/
#define CHECKSUM_BREAK 76
/* Validate (init) checksum based on checksum complete.
*
* Return values:
* 0: checksum is validated or try to in skb_checksum_complete. In the latter
* case the ip_summed will not be CHECKSUM_UNNECESSARY and the pseudo
* checksum is stored in skb->csum for use in __skb_checksum_complete
* non-zero: value of invalid checksum
*
*/
static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
bool complete,
__wsum psum)
{
if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!csum_fold(csum_add(psum, skb->csum))) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
return 0;
}
}
skb->csum = psum;
if (complete || skb->len <= CHECKSUM_BREAK)
return __skb_checksum_complete(skb);
return 0;
}
static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
{
return 0;
}
/* Perform checksum validate (init). Note that this is a macro since we only
* want to calculate the pseudo header which is an input function if necessary.
* First we try to validate without any computation (checksum unnecessary) and
* then calculate based on checksum complete calling the function to compute
* pseudo header.
*
* Return values:
* 0: checksum is validated or try to in skb_checksum_complete
* non-zero: value of invalid checksum
*/
#define __skb_checksum_validate(skb, proto, complete, \
zero_okay, check, compute_pseudo) \
({ \
__sum16 __ret = 0; \
if (__skb_checksum_validate_needed(skb, zero_okay, check)) \
__ret = __skb_checksum_validate_complete(skb, \
complete, compute_pseudo(skb, proto)); \
__ret; \
})
#define skb_checksum_init(skb, proto, compute_pseudo) \
__skb_checksum_validate(skb, proto, false, false, 0, compute_pseudo)
#define skb_checksum_init_zero_check(skb, proto, check, compute_pseudo) \
__skb_checksum_validate(skb, proto, false, true, check, compute_pseudo)
#define skb_checksum_validate(skb, proto, compute_pseudo) \
__skb_checksum_validate(skb, proto, true, false, 0, compute_pseudo)
#define skb_checksum_validate_zero_check(skb, proto, check, \
compute_pseudo) \
__skb_checksum_validate_(skb, proto, true, true, check, compute_pseudo)
#define skb_checksum_simple_validate(skb) \
__skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
void nf_conntrack_destroy(struct nf_conntrack *nfct); void nf_conntrack_destroy(struct nf_conntrack *nfct);
static inline void nf_conntrack_put(struct nf_conntrack *nfct) static inline void nf_conntrack_put(struct nf_conntrack *nfct)

View File

@ -57,12 +57,14 @@ static __inline__ __wsum csum_and_copy_to_user
} }
#endif #endif
#ifndef HAVE_ARCH_CSUM_ADD
static inline __wsum csum_add(__wsum csum, __wsum addend) static inline __wsum csum_add(__wsum csum, __wsum addend)
{ {
u32 res = (__force u32)csum; u32 res = (__force u32)csum;
res += (__force u32)addend; res += (__force u32)addend;
return (__force __wsum)(res + (res < (__force u32)addend)); return (__force __wsum)(res + (res < (__force u32)addend));
} }
#endif
static inline __wsum csum_sub(__wsum csum, __wsum addend) static inline __wsum csum_sub(__wsum csum, __wsum addend)
{ {

View File

@ -342,6 +342,12 @@ static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *d
__ip_select_ident(iph, dst, more); __ip_select_ident(iph, dst, more);
} }
static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
{
return csum_tcpudp_nofold(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
skb->len, proto, 0);
}
/* /*
* Map a multicast IP onto multicast MAC for type ethernet. * Map a multicast IP onto multicast MAC for type ethernet.
*/ */

View File

@ -41,6 +41,13 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
__wsum csum); __wsum csum);
#endif #endif
static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto)
{
return ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr,
skb->len, proto, 0));
}
static __inline__ __sum16 tcp_v6_check(int len, static __inline__ __sum16 tcp_v6_check(int len,
const struct in6_addr *saddr, const struct in6_addr *saddr,
const struct in6_addr *daddr, const struct in6_addr *daddr,

View File

@ -1744,28 +1744,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
return sk; return sk;
} }
static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!tcp_v4_check(skb->len, iph->saddr,
iph->daddr, skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
return 0;
}
}
skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
skb->len, IPPROTO_TCP, 0);
if (skb->len <= 76) {
return __skb_checksum_complete(skb);
}
return 0;
}
/* The socket must have it's spinlock held when we get /* The socket must have it's spinlock held when we get
* here. * here.
* *
@ -1960,7 +1938,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
* Packet length and doff are validated by header prediction, * Packet length and doff are validated by header prediction,
* provided case of th->doff==0 is eliminated. * provided case of th->doff==0 is eliminated.
* So, we defer the checks. */ * So, we defer the checks. */
if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
goto csum_error; goto csum_error;
th = tcp_hdr(skb); th = tcp_hdr(skb);

View File

@ -1672,7 +1672,6 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
int proto) int proto)
{ {
const struct iphdr *iph;
int err; int err;
UDP_SKB_CB(skb)->partial_cov = 0; UDP_SKB_CB(skb)->partial_cov = 0;
@ -1684,22 +1683,8 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
return err; return err;
} }
iph = ip_hdr(skb); return skb_checksum_init_zero_check(skb, proto, uh->check,
if (uh->check == 0) { inet_compute_pseudo);
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
proto, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
if (!skb_csum_unnecessary(skb))
skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
skb->len, proto, 0);
/* Probably, we should checksum udp header (it should be in cache
* in any case) and data in tiny packets (< rx copybreak).
*/
return 0;
} }
/* /*

View File

@ -75,25 +75,12 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
return err; return err;
} }
if (uh->check == 0) { /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)
/* RFC 2460 section 8.1 says that we SHOULD log * we accept a checksum of zero here. When we find the socket
this error. Well, it is reasonable. * for the UDP packet we'll check if that socket allows zero checksum
*/ * for IPv6 (set by socket option).
LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", */
&ipv6_hdr(skb)->saddr, ntohs(uh->source), return skb_checksum_init_zero_check(skb, proto, uh->check,
&ipv6_hdr(skb)->daddr, ntohs(uh->dest)); ip6_compute_pseudo);
return 1;
}
if (skb->ip_summed == CHECKSUM_COMPLETE &&
!csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
skb->len, proto, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (!skb_csum_unnecessary(skb))
skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr,
skb->len, proto, 0));
return 0;
} }
EXPORT_SYMBOL(udp6_csum_init); EXPORT_SYMBOL(udp6_csum_init);

View File

@ -1294,25 +1294,6 @@ out:
return NULL; return NULL;
} }
static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
return 0;
}
}
skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, 0));
if (skb->len <= 76)
return __skb_checksum_complete(skb);
return 0;
}
/* The socket must have it's spinlock held when we get /* The socket must have it's spinlock held when we get
* here. * here.
* *
@ -1486,7 +1467,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, th->doff*4)) if (!pskb_may_pull(skb, th->doff*4))
goto discard_it; goto discard_it;
if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb)) if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
goto csum_error; goto csum_error;
th = tcp_hdr(skb); th = tcp_hdr(skb);

View File

@ -760,6 +760,17 @@ static void flush_stack(struct sock **stack, unsigned int count,
if (unlikely(skb1)) if (unlikely(skb1))
kfree_skb(skb1); kfree_skb(skb1);
} }
static void udp6_csum_zero_error(struct sk_buff *skb)
{
/* RFC 2460 section 8.1 says that we SHOULD log
* this error. Well, it is reasonable.
*/
LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
&ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
&ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
}
/* /*
* Note: called only from the BH handler context, * Note: called only from the BH handler context,
* so we don't need to lock the hashes. * so we don't need to lock the hashes.
@ -779,7 +790,12 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
dif = inet6_iif(skb); dif = inet6_iif(skb);
sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
while (sk) { while (sk) {
stack[count++] = sk; /* If zero checksum and sk_no_check is not on for
* the socket then skip it.
*/
if (uh->check || sk->sk_no_check)
stack[count++] = sk;
sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
uh->source, saddr, dif); uh->source, saddr, dif);
if (unlikely(count == ARRAY_SIZE(stack))) { if (unlikely(count == ARRAY_SIZE(stack))) {
@ -867,6 +883,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (sk != NULL) { if (sk != NULL) {
int ret; int ret;
if (!uh->check && !sk->sk_no_check) {
udp6_csum_zero_error(skb);
goto csum_error;
}
ret = udpv6_queue_rcv_skb(sk, skb); ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk); sock_put(sk);
@ -879,6 +900,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
return 0; return 0;
} }
if (!uh->check) {
udp6_csum_zero_error(skb);
goto csum_error;
}
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard; goto discard;
@ -1006,7 +1032,10 @@ static int udp_v6_push_pending_frames(struct sock *sk)
if (is_udplite) if (is_udplite)
csum = udplite_csum_outgoing(sk, skb); csum = udplite_csum_outgoing(sk, skb);
else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
skb->ip_summed = CHECKSUM_NONE;
goto send;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
up->len); up->len);
goto send; goto send;