forked from Minki/linux
69421bf984
When we call connect() for a UDP socket in a reuseport group, we have
to update sk->sk_reuseport_cb->has_conns to 1. Otherwise, the kernel
could select a unconnected socket wrongly for packets sent to the
connected socket.
However, the current way to set has_conns is illegal and possible to
trigger that problem. reuseport_has_conns() changes has_conns under
rcu_read_lock(), which upgrades the RCU reader to the updater. Then,
it must do the update under the updater's lock, reuseport_lock, but
it doesn't for now.
For this reason, there is a race below where we fail to set has_conns
resulting in the wrong socket selection. To avoid the race, let's split
the reader and updater with proper locking.
cpu1 cpu2
+----+ +----+
__ip[46]_datagram_connect() reuseport_grow()
. .
|- reuseport_has_conns(sk, true) |- more_reuse = __reuseport_alloc(more_socks_size)
| . |
| |- rcu_read_lock()
| |- reuse = rcu_dereference(sk->sk_reuseport_cb)
| |
| | | /* reuse->has_conns == 0 here */
| | |- more_reuse->has_conns = reuse->has_conns
| |- reuse->has_conns = 1 | /* more_reuse->has_conns SHOULD BE 1 HERE */
| | |
| | |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
| | | more_reuse)
| `- rcu_read_unlock() `- kfree_rcu(reuse, rcu)
|
|- sk->sk_state = TCP_ESTABLISHED
Note the likely(reuse) in reuseport_has_conns_set() is always true,
but we put the test there for ease of review. [0]
For the record, usually, sk_reuseport_cb is changed under lock_sock().
The only exception is reuseport_grow() & TCP reqsk migration case.
1) shutdown() TCP listener, which is moved into the latter part of
reuse->socks[] to migrate reqsk.
2) New listen() overflows reuse->socks[] and call reuseport_grow().
3) reuse->max_socks overflows u16 with the new listener.
4) reuseport_grow() pops the old shutdown()ed listener from the array
and update its sk->sk_reuseport_cb as NULL without lock_sock().
shutdown()ed TCP sk->sk_reuseport_cb can be changed without lock_sock(),
but, reuseport_has_conns_set() is called only for UDP under lock_sock(),
so likely(reuse) never be false in reuseport_has_conns_set().
[0]: https://lore.kernel.org/netdev/CANn89iLja=eQHbsM_Ta2sQF0tOGU8vAGrh_izRuuHjuO1ouUag@mail.gmail.com/
Fixes: acdcecc612
("udp: correct reuseport selection with connected sockets")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20221014182625.89913-1-kuniyu@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
63 lines
1.7 KiB
C
63 lines
1.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _SOCK_REUSEPORT_H
|
|
#define _SOCK_REUSEPORT_H
|
|
|
|
#include <linux/filter.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/types.h>
|
|
#include <linux/spinlock.h>
|
|
#include <net/sock.h>
|
|
|
|
extern spinlock_t reuseport_lock;
|
|
|
|
struct sock_reuseport {
|
|
struct rcu_head rcu;
|
|
|
|
u16 max_socks; /* length of socks */
|
|
u16 num_socks; /* elements in socks */
|
|
u16 num_closed_socks; /* closed elements in socks */
|
|
/* The last synq overflow event timestamp of this
|
|
* reuse->socks[] group.
|
|
*/
|
|
unsigned int synq_overflow_ts;
|
|
/* ID stays the same even after the size of socks[] grows. */
|
|
unsigned int reuseport_id;
|
|
unsigned int bind_inany:1;
|
|
unsigned int has_conns:1;
|
|
struct bpf_prog __rcu *prog; /* optional BPF sock selector */
|
|
struct sock *socks[]; /* array of sock pointers */
|
|
};
|
|
|
|
extern int reuseport_alloc(struct sock *sk, bool bind_inany);
|
|
extern int reuseport_add_sock(struct sock *sk, struct sock *sk2,
|
|
bool bind_inany);
|
|
extern void reuseport_detach_sock(struct sock *sk);
|
|
void reuseport_stop_listen_sock(struct sock *sk);
|
|
extern struct sock *reuseport_select_sock(struct sock *sk,
|
|
u32 hash,
|
|
struct sk_buff *skb,
|
|
int hdr_len);
|
|
struct sock *reuseport_migrate_sock(struct sock *sk,
|
|
struct sock *migrating_sk,
|
|
struct sk_buff *skb);
|
|
extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
|
|
extern int reuseport_detach_prog(struct sock *sk);
|
|
|
|
static inline bool reuseport_has_conns(struct sock *sk)
|
|
{
|
|
struct sock_reuseport *reuse;
|
|
bool ret = false;
|
|
|
|
rcu_read_lock();
|
|
reuse = rcu_dereference(sk->sk_reuseport_cb);
|
|
if (reuse && reuse->has_conns)
|
|
ret = true;
|
|
rcu_read_unlock();
|
|
|
|
return ret;
|
|
}
|
|
|
|
void reuseport_has_conns_set(struct sock *sk);
|
|
|
|
#endif /* _SOCK_REUSEPORT_H */
|