acdcecc612
UDP reuseport groups can hold a mix unconnected and connected sockets.
Ensure that connections only receive all traffic to their 4-tuple.
Fast reuseport returns on the first reuseport match on the assumption
that all matches are equal. Only if connections are present, return to
the previous behavior of scoring all sockets.
Record if connections are present and if so (1) treat such connected
sockets as an independent match from the group, (2) only return
2-tuple matches from reuseport and (3) do not return on the first
2-tuple reuseport match to allow for a higher scoring match later.
New field has_conns is set without locks. No other fields in the
bitmap are modified at runtime and the field is only ever set
unconditionally, so an RMW cannot miss a change.
Fixes: e32ea7e747
("soreuseport: fast reuseport UDP socket selection")
Link: http://lkml.kernel.org/r/CA+FuTSfRP09aJNYRt04SS6qj22ViiOEWaWmLAwX0psk8-PGNxw@mail.gmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Craig Gallek <kraig@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
130 lines
3.1 KiB
C
130 lines
3.1 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* common UDP/RAW code
|
|
* Linux INET implementation
|
|
*
|
|
* Authors:
|
|
* Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/module.h>
|
|
#include <linux/ip.h>
|
|
#include <linux/in.h>
|
|
#include <net/ip.h>
|
|
#include <net/sock.h>
|
|
#include <net/route.h>
|
|
#include <net/tcp_states.h>
|
|
#include <net/sock_reuseport.h>
|
|
|
|
int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
|
{
|
|
struct inet_sock *inet = inet_sk(sk);
|
|
struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
|
|
struct flowi4 *fl4;
|
|
struct rtable *rt;
|
|
__be32 saddr;
|
|
int oif;
|
|
int err;
|
|
|
|
|
|
if (addr_len < sizeof(*usin))
|
|
return -EINVAL;
|
|
|
|
if (usin->sin_family != AF_INET)
|
|
return -EAFNOSUPPORT;
|
|
|
|
sk_dst_reset(sk);
|
|
|
|
oif = sk->sk_bound_dev_if;
|
|
saddr = inet->inet_saddr;
|
|
if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
|
|
if (!oif || netif_index_is_l3_master(sock_net(sk), oif))
|
|
oif = inet->mc_index;
|
|
if (!saddr)
|
|
saddr = inet->mc_addr;
|
|
}
|
|
fl4 = &inet->cork.fl.u.ip4;
|
|
rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
|
|
RT_CONN_FLAGS(sk), oif,
|
|
sk->sk_protocol,
|
|
inet->inet_sport, usin->sin_port, sk);
|
|
if (IS_ERR(rt)) {
|
|
err = PTR_ERR(rt);
|
|
if (err == -ENETUNREACH)
|
|
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
|
|
goto out;
|
|
}
|
|
|
|
if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) {
|
|
ip_rt_put(rt);
|
|
err = -EACCES;
|
|
goto out;
|
|
}
|
|
if (!inet->inet_saddr)
|
|
inet->inet_saddr = fl4->saddr; /* Update source address */
|
|
if (!inet->inet_rcv_saddr) {
|
|
inet->inet_rcv_saddr = fl4->saddr;
|
|
if (sk->sk_prot->rehash)
|
|
sk->sk_prot->rehash(sk);
|
|
}
|
|
inet->inet_daddr = fl4->daddr;
|
|
inet->inet_dport = usin->sin_port;
|
|
reuseport_has_conns(sk, true);
|
|
sk->sk_state = TCP_ESTABLISHED;
|
|
sk_set_txhash(sk);
|
|
inet->inet_id = jiffies;
|
|
|
|
sk_dst_set(sk, &rt->dst);
|
|
err = 0;
|
|
out:
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL(__ip4_datagram_connect);
|
|
|
|
int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
|
{
|
|
int res;
|
|
|
|
lock_sock(sk);
|
|
res = __ip4_datagram_connect(sk, uaddr, addr_len);
|
|
release_sock(sk);
|
|
return res;
|
|
}
|
|
EXPORT_SYMBOL(ip4_datagram_connect);
|
|
|
|
/* Because UDP xmit path can manipulate sk_dst_cache without holding
|
|
* socket lock, we need to use sk_dst_set() here,
|
|
* even if we own the socket lock.
|
|
*/
|
|
void ip4_datagram_release_cb(struct sock *sk)
|
|
{
|
|
const struct inet_sock *inet = inet_sk(sk);
|
|
const struct ip_options_rcu *inet_opt;
|
|
__be32 daddr = inet->inet_daddr;
|
|
struct dst_entry *dst;
|
|
struct flowi4 fl4;
|
|
struct rtable *rt;
|
|
|
|
rcu_read_lock();
|
|
|
|
dst = __sk_dst_get(sk);
|
|
if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) {
|
|
rcu_read_unlock();
|
|
return;
|
|
}
|
|
inet_opt = rcu_dereference(inet->inet_opt);
|
|
if (inet_opt && inet_opt->opt.srr)
|
|
daddr = inet_opt->opt.faddr;
|
|
rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr,
|
|
inet->inet_saddr, inet->inet_dport,
|
|
inet->inet_sport, sk->sk_protocol,
|
|
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
|
|
|
|
dst = !IS_ERR(rt) ? &rt->dst : NULL;
|
|
sk_dst_set(sk, dst);
|
|
|
|
rcu_read_unlock();
|
|
}
|
|
EXPORT_SYMBOL_GPL(ip4_datagram_release_cb);
|