Merge branch 'tcp-receive-path-optimizations'
Eric Dumazet says: ==================== tcp: receive path optimizations This series aims to reduce cache line misses in RX path. I am still working on better cache locality in tcp_sock but this will wait few more weeks. ==================== Link: https://lore.kernel.org/r/20211025164825.259415-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
e43b76abf7
include
net
@ -282,7 +282,6 @@ struct ipv6_pinfo {
|
||||
__be32 rcv_flowinfo;
|
||||
|
||||
__u32 dst_cookie;
|
||||
__u32 rx_dst_cookie;
|
||||
|
||||
struct ipv6_mc_socklist __rcu *ipv6_mc_list;
|
||||
struct ipv6_ac_socklist *ipv6_ac_list;
|
||||
|
@ -130,7 +130,8 @@ static inline void skb_mark_napi_id(struct sk_buff *skb,
|
||||
static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
|
||||
{
|
||||
#ifdef CONFIG_NET_RX_BUSY_POLL
|
||||
WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
|
||||
if (unlikely(READ_ONCE(sk->sk_napi_id) != skb->napi_id))
|
||||
WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
|
||||
#endif
|
||||
sk_rx_queue_set(sk, skb);
|
||||
}
|
||||
|
@ -207,11 +207,10 @@ struct inet_sock {
|
||||
__be32 inet_saddr;
|
||||
__s16 uc_ttl;
|
||||
__u16 cmsg_flags;
|
||||
struct ip_options_rcu __rcu *inet_opt;
|
||||
__be16 inet_sport;
|
||||
__u16 inet_id;
|
||||
|
||||
struct ip_options_rcu __rcu *inet_opt;
|
||||
int rx_dst_ifindex;
|
||||
__u8 tos;
|
||||
__u8 min_ttl;
|
||||
__u8 mc_ttl;
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/jhash.h>
|
||||
#include <linux/sockptr.h>
|
||||
#include <linux/static_key.h>
|
||||
|
||||
#include <net/inet_sock.h>
|
||||
#include <net/route.h>
|
||||
@ -750,6 +751,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
|
||||
struct sk_buff *skb, int tlen, int offset);
|
||||
int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
|
||||
struct ipcm_cookie *ipc, bool allow_ipv6);
|
||||
DECLARE_STATIC_KEY_FALSE(ip4_min_ttl);
|
||||
int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
|
||||
unsigned int optlen);
|
||||
int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
|
||||
|
@ -1092,6 +1092,7 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
|
||||
/*
|
||||
* socket options (ipv6_sockglue.c)
|
||||
*/
|
||||
DECLARE_STATIC_KEY_FALSE(ip6_min_hopcount);
|
||||
|
||||
int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
|
||||
unsigned int optlen);
|
||||
|
@ -259,6 +259,8 @@ struct bpf_local_storage;
|
||||
* @sk_rcvbuf: size of receive buffer in bytes
|
||||
* @sk_wq: sock wait queue and async head
|
||||
* @sk_rx_dst: receive input route used by early demux
|
||||
* @sk_rx_dst_ifindex: ifindex for @sk_rx_dst
|
||||
* @sk_rx_dst_cookie: cookie for @sk_rx_dst
|
||||
* @sk_dst_cache: destination cache
|
||||
* @sk_dst_pending_confirm: need to confirm neighbour
|
||||
* @sk_policy: flow policy
|
||||
@ -430,6 +432,9 @@ struct sock {
|
||||
struct xfrm_policy __rcu *sk_policy[2];
|
||||
#endif
|
||||
struct dst_entry *sk_rx_dst;
|
||||
int sk_rx_dst_ifindex;
|
||||
u32 sk_rx_dst_cookie;
|
||||
|
||||
struct dst_entry __rcu *sk_dst_cache;
|
||||
atomic_t sk_omem_alloc;
|
||||
int sk_sndbuf;
|
||||
@ -1911,10 +1916,8 @@ static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
|
||||
if (skb_rx_queue_recorded(skb)) {
|
||||
u16 rx_queue = skb_get_rx_queue(skb);
|
||||
|
||||
if (WARN_ON_ONCE(rx_queue == NO_QUEUE_MAPPING))
|
||||
return;
|
||||
|
||||
sk->sk_rx_queue_mapping = rx_queue;
|
||||
if (unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue))
|
||||
WRITE_ONCE(sk->sk_rx_queue_mapping, rx_queue);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -1922,15 +1925,19 @@ static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
|
||||
static inline void sk_rx_queue_clear(struct sock *sk)
|
||||
{
|
||||
#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
|
||||
sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING;
|
||||
WRITE_ONCE(sk->sk_rx_queue_mapping, NO_QUEUE_MAPPING);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int sk_rx_queue_get(const struct sock *sk)
|
||||
{
|
||||
#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
|
||||
if (sk && sk->sk_rx_queue_mapping != NO_QUEUE_MAPPING)
|
||||
return sk->sk_rx_queue_mapping;
|
||||
if (sk) {
|
||||
int res = READ_ONCE(sk->sk_rx_queue_mapping);
|
||||
|
||||
if (res != NO_QUEUE_MAPPING)
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
return -1;
|
||||
|
@ -886,6 +886,8 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname,
|
||||
return ip_mc_leave_group(sk, &mreq);
|
||||
}
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(ip4_min_ttl);
|
||||
|
||||
static int do_ip_setsockopt(struct sock *sk, int level, int optname,
|
||||
sockptr_t optval, unsigned int optlen)
|
||||
{
|
||||
@ -1352,7 +1354,14 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
|
||||
goto e_inval;
|
||||
if (val < 0 || val > 255)
|
||||
goto e_inval;
|
||||
inet->min_ttl = val;
|
||||
|
||||
if (val)
|
||||
static_branch_enable(&ip4_min_ttl);
|
||||
|
||||
/* tcp_v4_err() and tcp_v4_rcv() might read min_ttl
|
||||
* while we are changint it.
|
||||
*/
|
||||
WRITE_ONCE(inet->min_ttl, val);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -508,9 +508,12 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
|
||||
if (sk->sk_state == TCP_CLOSE)
|
||||
goto out;
|
||||
|
||||
if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
|
||||
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
|
||||
goto out;
|
||||
if (static_branch_unlikely(&ip4_min_ttl)) {
|
||||
/* min_ttl can be changed concurrently from do_ip_setsockopt() */
|
||||
if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
|
||||
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
tp = tcp_sk(sk);
|
||||
@ -1703,7 +1706,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
|
||||
sock_rps_save_rxhash(sk, skb);
|
||||
sk_mark_napi_id(sk, skb);
|
||||
if (dst) {
|
||||
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
|
||||
if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
|
||||
!INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check,
|
||||
dst, 0)) {
|
||||
dst_release(dst);
|
||||
@ -1788,7 +1791,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
|
||||
if (dst)
|
||||
dst = dst_check(dst, 0);
|
||||
if (dst &&
|
||||
inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
|
||||
sk->sk_rx_dst_ifindex == skb->skb_iif)
|
||||
skb_dst_set_noref(skb, dst);
|
||||
}
|
||||
}
|
||||
@ -2068,9 +2071,13 @@ process:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
|
||||
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
|
||||
goto discard_and_relse;
|
||||
|
||||
if (static_branch_unlikely(&ip4_min_ttl)) {
|
||||
/* min_ttl can be changed concurrently from do_ip_setsockopt() */
|
||||
if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
|
||||
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
|
||||
goto discard_and_relse;
|
||||
}
|
||||
}
|
||||
|
||||
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
|
||||
@ -2195,7 +2202,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
|
||||
|
||||
if (dst && dst_hold_safe(dst)) {
|
||||
sk->sk_rx_dst = dst;
|
||||
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
|
||||
sk->sk_rx_dst_ifindex = skb->skb_iif;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(inet_sk_rx_dst_set);
|
||||
|
@ -55,6 +55,8 @@
|
||||
struct ip6_ra_chain *ip6_ra_chain;
|
||||
DEFINE_RWLOCK(ip6_ra_lock);
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(ip6_min_hopcount);
|
||||
|
||||
int ip6_ra_control(struct sock *sk, int sel)
|
||||
{
|
||||
struct ip6_ra_chain *ra, *new_ra, **rap;
|
||||
@ -950,7 +952,14 @@ done:
|
||||
goto e_inval;
|
||||
if (val < 0 || val > 255)
|
||||
goto e_inval;
|
||||
np->min_hopcount = val;
|
||||
|
||||
if (val)
|
||||
static_branch_enable(&ip6_min_hopcount);
|
||||
|
||||
/* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount
|
||||
* while we are changing it.
|
||||
*/
|
||||
WRITE_ONCE(np->min_hopcount, val);
|
||||
retv = 0;
|
||||
break;
|
||||
case IPV6_DONTFRAG:
|
||||
|
@ -108,8 +108,8 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
|
||||
const struct rt6_info *rt = (const struct rt6_info *)dst;
|
||||
|
||||
sk->sk_rx_dst = dst;
|
||||
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
|
||||
tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
|
||||
sk->sk_rx_dst_ifindex = skb->skb_iif;
|
||||
sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
|
||||
}
|
||||
}
|
||||
|
||||
@ -414,9 +414,12 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
|
||||
if (sk->sk_state == TCP_CLOSE)
|
||||
goto out;
|
||||
|
||||
if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
|
||||
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
|
||||
goto out;
|
||||
if (static_branch_unlikely(&ip6_min_hopcount)) {
|
||||
/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
|
||||
if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
|
||||
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
tp = tcp_sk(sk);
|
||||
@ -569,7 +572,7 @@ done:
|
||||
static void tcp_v6_reqsk_destructor(struct request_sock *req)
|
||||
{
|
||||
kfree(inet_rsk(req)->ipv6_opt);
|
||||
kfree_skb(inet_rsk(req)->pktopts);
|
||||
consume_skb(inet_rsk(req)->pktopts);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TCP_MD5SIG
|
||||
@ -1509,9 +1512,9 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
|
||||
sock_rps_save_rxhash(sk, skb);
|
||||
sk_mark_napi_id(sk, skb);
|
||||
if (dst) {
|
||||
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
|
||||
if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
|
||||
INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
|
||||
dst, np->rx_dst_cookie) == NULL) {
|
||||
dst, sk->sk_rx_dst_cookie) == NULL) {
|
||||
dst_release(dst);
|
||||
sk->sk_rx_dst = NULL;
|
||||
}
|
||||
@ -1591,7 +1594,7 @@ ipv6_pktoptions:
|
||||
}
|
||||
}
|
||||
|
||||
kfree_skb(opt_skb);
|
||||
consume_skb(opt_skb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1726,9 +1729,13 @@ process:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
|
||||
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
|
||||
goto discard_and_relse;
|
||||
|
||||
if (static_branch_unlikely(&ip6_min_hopcount)) {
|
||||
/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
|
||||
if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
|
||||
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
|
||||
goto discard_and_relse;
|
||||
}
|
||||
}
|
||||
|
||||
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
|
||||
@ -1872,9 +1879,9 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
|
||||
struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
|
||||
|
||||
if (dst)
|
||||
dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
|
||||
dst = dst_check(dst, sk->sk_rx_dst_cookie);
|
||||
if (dst &&
|
||||
inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
|
||||
sk->sk_rx_dst_ifindex == skb->skb_iif)
|
||||
skb_dst_set_noref(skb, dst);
|
||||
}
|
||||
}
|
||||
|
@ -884,7 +884,7 @@ static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
|
||||
if (udp_sk_rx_dst_set(sk, dst)) {
|
||||
const struct rt6_info *rt = (const struct rt6_info *)dst;
|
||||
|
||||
inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
|
||||
sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1073,7 +1073,7 @@ INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
|
||||
dst = READ_ONCE(sk->sk_rx_dst);
|
||||
|
||||
if (dst)
|
||||
dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
|
||||
dst = dst_check(dst, sk->sk_rx_dst_cookie);
|
||||
if (dst) {
|
||||
/* set noref for now.
|
||||
* any place which wants to hold dst has to call
|
||||
|
Loading…
Reference in New Issue
Block a user