mirror of
https://github.com/torvalds/linux.git
synced 2024-12-05 18:41:23 +00:00
Merge branch 'dccp-tcp-fix-bhash2-issues-related-to-warn_on-in-inet_csk_get_port'
Kuniyuki Iwashima says: ==================== dccp/tcp: Fix bhash2 issues related to WARN_ON() in inet_csk_get_port(). syzkaller was hitting a WARN_ON() in inet_csk_get_port() in the 4th patch, which was because we forgot to fix up bhash2 bucket when connect() for a socket bound to a wildcard address fails in __inet_stream_connect(). There was a similar report [0], but its repro does not fire the WARN_ON() due to inconsistent error handling. When connect() for a socket bound to a wildcard address fails, saddr may or may not be reset depending on where the failure happens. When we fail in __inet_stream_connect(), sk->sk_prot->disconnect() resets saddr. OTOH, in (dccp|tcp)_v[46]_connect(), if we fail after inet_hash6?_connect(), we forget to reset saddr. We fix this inconsistent error handling in the 1st patch, and then we'll fix the bhash2 WARN_ON() issue. Note that there is still an issue in that we reset saddr without checking if there are conflicting sockets in bhash and bhash2, but this should be another series. See [1][2] for the previous discussion. [0]: https://lore.kernel.org/netdev/0000000000003f33bc05dfaf44fe@google.com/ [1]: https://lore.kernel.org/netdev/20221029001249.86337-1-kuniyu@amazon.com/ [2]: https://lore.kernel.org/netdev/20221103172419.20977-1-kuniyu@amazon.com/ [3]: https://lore.kernel.org/netdev/20221118081906.053d5231@kernel.org/T/#m00aafedb29ff0b55d5e67aef0252ef1baaf4b6ee ==================== Link: https://lore.kernel.org/r/20221119014914.31792-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
0972457f58
@ -281,7 +281,8 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in
|
||||
* sk_v6_rcv_saddr (ipv6) changes after it has been binded. The socket's
|
||||
* rcv_saddr field should already have been updated when this is called.
|
||||
*/
|
||||
int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk);
|
||||
int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family);
|
||||
void inet_bhash2_reset_saddr(struct sock *sk);
|
||||
|
||||
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
|
||||
struct inet_bind2_bucket *tb2, unsigned short port);
|
||||
|
@ -45,11 +45,10 @@ static unsigned int dccp_v4_pernet_id __read_mostly;
|
||||
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
||||
{
|
||||
const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
|
||||
struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
|
||||
__be32 daddr, nexthop, prev_sk_rcv_saddr;
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
__be16 orig_sport, orig_dport;
|
||||
__be32 daddr, nexthop;
|
||||
struct flowi4 *fl4;
|
||||
struct rtable *rt;
|
||||
int err;
|
||||
@ -91,26 +90,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
||||
daddr = fl4->daddr;
|
||||
|
||||
if (inet->inet_saddr == 0) {
|
||||
if (inet_csk(sk)->icsk_bind2_hash) {
|
||||
prev_addr_hashbucket =
|
||||
inet_bhashfn_portaddr(&dccp_hashinfo, sk,
|
||||
sock_net(sk),
|
||||
inet->inet_num);
|
||||
prev_sk_rcv_saddr = sk->sk_rcv_saddr;
|
||||
}
|
||||
inet->inet_saddr = fl4->saddr;
|
||||
}
|
||||
|
||||
sk_rcv_saddr_set(sk, inet->inet_saddr);
|
||||
|
||||
if (prev_addr_hashbucket) {
|
||||
err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
|
||||
err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET);
|
||||
if (err) {
|
||||
inet->inet_saddr = 0;
|
||||
sk_rcv_saddr_set(sk, prev_sk_rcv_saddr);
|
||||
ip_rt_put(rt);
|
||||
return err;
|
||||
}
|
||||
} else {
|
||||
sk_rcv_saddr_set(sk, inet->inet_saddr);
|
||||
}
|
||||
|
||||
inet->inet_dport = usin->sin_port;
|
||||
@ -157,6 +143,7 @@ failure:
|
||||
* This unhashes the socket and releases the local port, if necessary.
|
||||
*/
|
||||
dccp_set_state(sk, DCCP_CLOSED);
|
||||
inet_bhash2_reset_saddr(sk);
|
||||
ip_rt_put(rt);
|
||||
sk->sk_route_caps = 0;
|
||||
inet->inet_dport = 0;
|
||||
|
@ -934,26 +934,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||
}
|
||||
|
||||
if (saddr == NULL) {
|
||||
struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
|
||||
struct in6_addr prev_v6_rcv_saddr;
|
||||
|
||||
if (icsk->icsk_bind2_hash) {
|
||||
prev_addr_hashbucket = inet_bhashfn_portaddr(&dccp_hashinfo,
|
||||
sk, sock_net(sk),
|
||||
inet->inet_num);
|
||||
prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
|
||||
}
|
||||
|
||||
saddr = &fl6.saddr;
|
||||
sk->sk_v6_rcv_saddr = *saddr;
|
||||
|
||||
if (prev_addr_hashbucket) {
|
||||
err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
|
||||
if (err) {
|
||||
sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
|
||||
goto failure;
|
||||
}
|
||||
}
|
||||
err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
|
||||
if (err)
|
||||
goto failure;
|
||||
}
|
||||
|
||||
/* set the source address */
|
||||
@ -985,6 +970,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||
|
||||
late_failure:
|
||||
dccp_set_state(sk, DCCP_CLOSED);
|
||||
inet_bhash2_reset_saddr(sk);
|
||||
__sk_dst_reset(sk);
|
||||
failure:
|
||||
inet->inet_dport = 0;
|
||||
|
@ -279,8 +279,7 @@ int dccp_disconnect(struct sock *sk, int flags)
|
||||
|
||||
inet->inet_dport = 0;
|
||||
|
||||
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
|
||||
inet_reset_saddr(sk);
|
||||
inet_bhash2_reset_saddr(sk);
|
||||
|
||||
sk->sk_shutdown = 0;
|
||||
sock_reset_flag(sk, SOCK_DONE);
|
||||
|
@ -1230,7 +1230,6 @@ EXPORT_SYMBOL(inet_unregister_protosw);
|
||||
|
||||
static int inet_sk_reselect_saddr(struct sock *sk)
|
||||
{
|
||||
struct inet_bind_hashbucket *prev_addr_hashbucket;
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
__be32 old_saddr = inet->inet_saddr;
|
||||
__be32 daddr = inet->inet_daddr;
|
||||
@ -1260,16 +1259,8 @@ static int inet_sk_reselect_saddr(struct sock *sk)
|
||||
return 0;
|
||||
}
|
||||
|
||||
prev_addr_hashbucket =
|
||||
inet_bhashfn_portaddr(tcp_or_dccp_get_hashinfo(sk), sk,
|
||||
sock_net(sk), inet->inet_num);
|
||||
|
||||
inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;
|
||||
|
||||
err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
|
||||
err = inet_bhash2_update_saddr(sk, &new_saddr, AF_INET);
|
||||
if (err) {
|
||||
inet->inet_saddr = old_saddr;
|
||||
inet->inet_rcv_saddr = old_saddr;
|
||||
ip_rt_put(rt);
|
||||
return err;
|
||||
}
|
||||
|
@ -858,34 +858,80 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in
|
||||
return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
|
||||
}
|
||||
|
||||
int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk)
|
||||
static void inet_update_saddr(struct sock *sk, void *saddr, int family)
|
||||
{
|
||||
if (family == AF_INET) {
|
||||
inet_sk(sk)->inet_saddr = *(__be32 *)saddr;
|
||||
sk_rcv_saddr_set(sk, inet_sk(sk)->inet_saddr);
|
||||
}
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
else {
|
||||
sk->sk_v6_rcv_saddr = *(struct in6_addr *)saddr;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, bool reset)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
|
||||
struct inet_bind_hashbucket *head, *head2;
|
||||
struct inet_bind2_bucket *tb2, *new_tb2;
|
||||
int l3mdev = inet_sk_bound_l3mdev(sk);
|
||||
struct inet_bind_hashbucket *head2;
|
||||
int port = inet_sk(sk)->inet_num;
|
||||
struct net *net = sock_net(sk);
|
||||
int bhash;
|
||||
|
||||
if (!inet_csk(sk)->icsk_bind2_hash) {
|
||||
/* Not bind()ed before. */
|
||||
if (reset)
|
||||
inet_reset_saddr(sk);
|
||||
else
|
||||
inet_update_saddr(sk, saddr, family);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Allocate a bind2 bucket ahead of time to avoid permanently putting
|
||||
* the bhash2 table in an inconsistent state if a new tb2 bucket
|
||||
* allocation fails.
|
||||
*/
|
||||
new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC);
|
||||
if (!new_tb2)
|
||||
if (!new_tb2) {
|
||||
if (reset) {
|
||||
/* The (INADDR_ANY, port) bucket might have already
|
||||
* been freed, then we cannot fixup icsk_bind2_hash,
|
||||
* so we give up and unlink sk from bhash/bhash2 not
|
||||
* to leave inconsistency in bhash2.
|
||||
*/
|
||||
inet_put_port(sk);
|
||||
inet_reset_saddr(sk);
|
||||
}
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
bhash = inet_bhashfn(net, port, hinfo->bhash_size);
|
||||
head = &hinfo->bhash[bhash];
|
||||
head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
|
||||
|
||||
/* If we change saddr locklessly, another thread
|
||||
* iterating over bhash might see corrupted address.
|
||||
*/
|
||||
spin_lock_bh(&head->lock);
|
||||
|
||||
spin_lock(&head2->lock);
|
||||
__sk_del_bind2_node(sk);
|
||||
inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash);
|
||||
spin_unlock(&head2->lock);
|
||||
|
||||
if (reset)
|
||||
inet_reset_saddr(sk);
|
||||
else
|
||||
inet_update_saddr(sk, saddr, family);
|
||||
|
||||
head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
|
||||
|
||||
if (prev_saddr) {
|
||||
spin_lock_bh(&prev_saddr->lock);
|
||||
__sk_del_bind2_node(sk);
|
||||
inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
|
||||
inet_csk(sk)->icsk_bind2_hash);
|
||||
spin_unlock_bh(&prev_saddr->lock);
|
||||
}
|
||||
|
||||
spin_lock_bh(&head2->lock);
|
||||
spin_lock(&head2->lock);
|
||||
tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
|
||||
if (!tb2) {
|
||||
tb2 = new_tb2;
|
||||
@ -893,15 +939,29 @@ int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct soc
|
||||
}
|
||||
sk_add_bind2_node(sk, &tb2->owners);
|
||||
inet_csk(sk)->icsk_bind2_hash = tb2;
|
||||
spin_unlock_bh(&head2->lock);
|
||||
spin_unlock(&head2->lock);
|
||||
|
||||
spin_unlock_bh(&head->lock);
|
||||
|
||||
if (tb2 != new_tb2)
|
||||
kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family)
|
||||
{
|
||||
return __inet_bhash2_update_saddr(sk, saddr, family, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr);
|
||||
|
||||
void inet_bhash2_reset_saddr(struct sock *sk)
|
||||
{
|
||||
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
|
||||
__inet_bhash2_update_saddr(sk, NULL, 0, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(inet_bhash2_reset_saddr);
|
||||
|
||||
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
|
||||
* Note that we use 32bit integers (vs RFC 'short integers')
|
||||
* because 2^16 is not a multiple of num_ephemeral and this
|
||||
|
@ -3114,8 +3114,7 @@ int tcp_disconnect(struct sock *sk, int flags)
|
||||
|
||||
inet->inet_dport = 0;
|
||||
|
||||
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
|
||||
inet_reset_saddr(sk);
|
||||
inet_bhash2_reset_saddr(sk);
|
||||
|
||||
sk->sk_shutdown = 0;
|
||||
sock_reset_flag(sk, SOCK_DONE);
|
||||
|
@ -199,15 +199,14 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||
/* This will initiate an outgoing connection. */
|
||||
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
||||
{
|
||||
struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
|
||||
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
|
||||
struct inet_timewait_death_row *tcp_death_row;
|
||||
__be32 daddr, nexthop, prev_sk_rcv_saddr;
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct ip_options_rcu *inet_opt;
|
||||
struct net *net = sock_net(sk);
|
||||
__be16 orig_sport, orig_dport;
|
||||
__be32 daddr, nexthop;
|
||||
struct flowi4 *fl4;
|
||||
struct rtable *rt;
|
||||
int err;
|
||||
@ -251,24 +250,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
||||
tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
|
||||
|
||||
if (!inet->inet_saddr) {
|
||||
if (inet_csk(sk)->icsk_bind2_hash) {
|
||||
prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
|
||||
sk, net, inet->inet_num);
|
||||
prev_sk_rcv_saddr = sk->sk_rcv_saddr;
|
||||
}
|
||||
inet->inet_saddr = fl4->saddr;
|
||||
}
|
||||
|
||||
sk_rcv_saddr_set(sk, inet->inet_saddr);
|
||||
|
||||
if (prev_addr_hashbucket) {
|
||||
err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
|
||||
err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET);
|
||||
if (err) {
|
||||
inet->inet_saddr = 0;
|
||||
sk_rcv_saddr_set(sk, prev_sk_rcv_saddr);
|
||||
ip_rt_put(rt);
|
||||
return err;
|
||||
}
|
||||
} else {
|
||||
sk_rcv_saddr_set(sk, inet->inet_saddr);
|
||||
}
|
||||
|
||||
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
|
||||
@ -343,6 +331,7 @@ failure:
|
||||
* if necessary.
|
||||
*/
|
||||
tcp_set_state(sk, TCP_CLOSE);
|
||||
inet_bhash2_reset_saddr(sk);
|
||||
ip_rt_put(rt);
|
||||
sk->sk_route_caps = 0;
|
||||
inet->inet_dport = 0;
|
||||
|
@ -292,24 +292,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||
tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
|
||||
|
||||
if (!saddr) {
|
||||
struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
|
||||
struct in6_addr prev_v6_rcv_saddr;
|
||||
|
||||
if (icsk->icsk_bind2_hash) {
|
||||
prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
|
||||
sk, net, inet->inet_num);
|
||||
prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
|
||||
}
|
||||
saddr = &fl6.saddr;
|
||||
sk->sk_v6_rcv_saddr = *saddr;
|
||||
|
||||
if (prev_addr_hashbucket) {
|
||||
err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
|
||||
if (err) {
|
||||
sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
|
||||
goto failure;
|
||||
}
|
||||
}
|
||||
err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
|
||||
if (err)
|
||||
goto failure;
|
||||
}
|
||||
|
||||
/* set the source address */
|
||||
@ -359,6 +346,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||
|
||||
late_failure:
|
||||
tcp_set_state(sk, TCP_CLOSE);
|
||||
inet_bhash2_reset_saddr(sk);
|
||||
failure:
|
||||
inet->inet_dport = 0;
|
||||
sk->sk_route_caps = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user