Merge branch 'net-initialize-fastreuse-on-inet_inherit_port'
Tim Froidcoeur says: ==================== net: initialize fastreuse on inet_inherit_port In the case of TPROXY, bind_conflict optimizations for SO_REUSEADDR or SO_REUSEPORT are broken, possibly resulting in O(n) instead of O(1) bind behaviour or in the incorrect reuse of a bind. the kernel keeps track for each bind_bucket if all sockets in the bind_bucket support SO_REUSEADDR or SO_REUSEPORT in two fastreuse flags. These flags allow skipping the costly bind_conflict check when possible (meaning when all sockets have the proper SO_REUSE option). For every socket added to a bind_bucket, these flags need to be updated. As soon as a socket that does not support reuse is added, the flag is set to false and will never go back to true, unless the bind_bucket is deleted. Note that there is no mechanism to re-evaluate these flags when a socket is removed (this might make sense when removing a socket that would not allow reuse; this leaves room for a future patch). For this optimization to work, it is mandatory that these flags are properly initialized and updated. When a child socket is created from a listen socket in __inet_inherit_port, the TPROXY case could create a new bind bucket without properly initializing these flags, thus preventing the optimization to work. Alternatively, a socket not allowing reuse could be added to an existing bind bucket without updating the flags, causing bind_conflict to never be called as it should. Patch 1/2 refactors the fastreuse update code in inet_csk_get_port into a small helper function, making the actual fix tiny and easier to understand. Patch 2/2 calls this new helper when __inet_inherit_port decides to create a new bind_bucket or use a different bind_bucket than the one of the listen socket. v4: - rebase on latest linux/net master branch v3: - remove company disclaimer from automatic signature v2: - remove unnecessary cast ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
633f5b6bca
@ -304,6 +304,10 @@ void inet_csk_listen_stop(struct sock *sk);
|
||||
|
||||
void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
|
||||
|
||||
/* update the fast reuse flag when adding a socket */
|
||||
void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
|
||||
struct sock *sk);
|
||||
|
||||
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
|
||||
|
||||
#define TCP_PINGPONG_THRESH 3
|
||||
|
@ -296,55 +296,12 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
|
||||
ipv6_only_sock(sk), true, false);
|
||||
}
|
||||
|
||||
/* Obtain a reference to a local port for the given sock,
|
||||
* if snum is zero it means select any available local port.
|
||||
* We try to allocate an odd port (and leave even ports for connect())
|
||||
*/
|
||||
int inet_csk_get_port(struct sock *sk, unsigned short snum)
|
||||
void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
|
||||
struct sock *sk)
|
||||
{
|
||||
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
|
||||
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
|
||||
int ret = 1, port = snum;
|
||||
struct inet_bind_hashbucket *head;
|
||||
struct net *net = sock_net(sk);
|
||||
struct inet_bind_bucket *tb = NULL;
|
||||
kuid_t uid = sock_i_uid(sk);
|
||||
int l3mdev;
|
||||
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
|
||||
|
||||
l3mdev = inet_sk_bound_l3mdev(sk);
|
||||
|
||||
if (!port) {
|
||||
head = inet_csk_find_open_port(sk, &tb, &port);
|
||||
if (!head)
|
||||
return ret;
|
||||
if (!tb)
|
||||
goto tb_not_found;
|
||||
goto success;
|
||||
}
|
||||
head = &hinfo->bhash[inet_bhashfn(net, port,
|
||||
hinfo->bhash_size)];
|
||||
spin_lock_bh(&head->lock);
|
||||
inet_bind_bucket_for_each(tb, &head->chain)
|
||||
if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
|
||||
tb->port == port)
|
||||
goto tb_found;
|
||||
tb_not_found:
|
||||
tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
|
||||
net, head, port, l3mdev);
|
||||
if (!tb)
|
||||
goto fail_unlock;
|
||||
tb_found:
|
||||
if (!hlist_empty(&tb->owners)) {
|
||||
if (sk->sk_reuse == SK_FORCE_REUSE)
|
||||
goto success;
|
||||
|
||||
if ((tb->fastreuse > 0 && reuse) ||
|
||||
sk_reuseport_match(tb, sk))
|
||||
goto success;
|
||||
if (inet_csk_bind_conflict(sk, tb, true, true))
|
||||
goto fail_unlock;
|
||||
}
|
||||
success:
|
||||
if (hlist_empty(&tb->owners)) {
|
||||
tb->fastreuse = reuse;
|
||||
if (sk->sk_reuseport) {
|
||||
@ -388,6 +345,58 @@ success:
|
||||
tb->fastreuseport = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Obtain a reference to a local port for the given sock,
|
||||
* if snum is zero it means select any available local port.
|
||||
* We try to allocate an odd port (and leave even ports for connect())
|
||||
*/
|
||||
int inet_csk_get_port(struct sock *sk, unsigned short snum)
|
||||
{
|
||||
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
|
||||
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
|
||||
int ret = 1, port = snum;
|
||||
struct inet_bind_hashbucket *head;
|
||||
struct net *net = sock_net(sk);
|
||||
struct inet_bind_bucket *tb = NULL;
|
||||
int l3mdev;
|
||||
|
||||
l3mdev = inet_sk_bound_l3mdev(sk);
|
||||
|
||||
if (!port) {
|
||||
head = inet_csk_find_open_port(sk, &tb, &port);
|
||||
if (!head)
|
||||
return ret;
|
||||
if (!tb)
|
||||
goto tb_not_found;
|
||||
goto success;
|
||||
}
|
||||
head = &hinfo->bhash[inet_bhashfn(net, port,
|
||||
hinfo->bhash_size)];
|
||||
spin_lock_bh(&head->lock);
|
||||
inet_bind_bucket_for_each(tb, &head->chain)
|
||||
if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
|
||||
tb->port == port)
|
||||
goto tb_found;
|
||||
tb_not_found:
|
||||
tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
|
||||
net, head, port, l3mdev);
|
||||
if (!tb)
|
||||
goto fail_unlock;
|
||||
tb_found:
|
||||
if (!hlist_empty(&tb->owners)) {
|
||||
if (sk->sk_reuse == SK_FORCE_REUSE)
|
||||
goto success;
|
||||
|
||||
if ((tb->fastreuse > 0 && reuse) ||
|
||||
sk_reuseport_match(tb, sk))
|
||||
goto success;
|
||||
if (inet_csk_bind_conflict(sk, tb, true, true))
|
||||
goto fail_unlock;
|
||||
}
|
||||
success:
|
||||
inet_csk_update_fastreuse(tb, sk);
|
||||
|
||||
if (!inet_csk(sk)->icsk_bind_hash)
|
||||
inet_bind_hash(sk, tb, port);
|
||||
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
|
||||
|
@ -163,6 +163,7 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
inet_csk_update_fastreuse(tb, child);
|
||||
}
|
||||
inet_bind_hash(child, tb, port);
|
||||
spin_unlock(&head->lock);
|
||||
|
Loading…
Reference in New Issue
Block a user