mirror of
https://github.com/torvalds/linux.git
synced 2024-11-25 21:51:40 +00:00
Merge branch 'udp-pernetns-hash'
Kuniyuki Iwashima says: ==================== udp: Introduce optional per-netns hash table. This series is the UDP version of the per-netns ehash series [0], which were initially in the same patch set. [1] The notable difference with TCP is the max table size is 64K and the min size is 128. This is because the possible hash range by udp_hashfn() always fits in 64K within the same netns and because we want to keep a bitmap in udp_lib_get_port() on the stack. Also, the UDP per-netns table isolates both 1-tuple and 2-tuple tables. For details, please see the last patch. patch 1 - 4: prep for per-netns hash table patch 5: add per-netns hash table [0]: https://lore.kernel.org/netdev/20220908011022.45342-1-kuniyu@amazon.com/ [1]: https://lore.kernel.org/netdev/20220826000445.46552-1-kuniyu@amazon.com/ ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
fd258f2aba
@ -1177,6 +1177,33 @@ udp_rmem_min - INTEGER
|
||||
udp_wmem_min - INTEGER
|
||||
UDP does not have tx memory accounting and this tunable has no effect.
|
||||
|
||||
udp_hash_entries - INTEGER
|
||||
Show the number of hash buckets for UDP sockets in the current
|
||||
networking namespace.
|
||||
|
||||
A negative value means the networking namespace does not own its
|
||||
hash buckets and shares the initial networking namespace's one.
|
||||
|
||||
udp_child_ehash_entries - INTEGER
|
||||
Control the number of hash buckets for UDP sockets in the child
|
||||
networking namespace, which must be set before clone() or unshare().
|
||||
|
||||
If the value is not 0, the kernel uses a value rounded up to 2^n
|
||||
as the actual hash bucket size. 0 is a special value, meaning
|
||||
the child networking namespace will share the initial networking
|
||||
namespace's hash buckets.
|
||||
|
||||
Note that the child will use the global one in case the kernel
|
||||
fails to allocate enough memory. In addition, the global hash
|
||||
buckets are spread over available NUMA nodes, but the allocation
|
||||
of the child hash table depends on the current process's NUMA
|
||||
policy, which could result in performance differences.
|
||||
|
||||
Possible values: 0, 2^n (n: 7 (128) - 16 (64K))
|
||||
|
||||
Default: 0
|
||||
|
||||
|
||||
RAW variables
|
||||
=============
|
||||
|
||||
|
@ -23,7 +23,9 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
|
||||
return (struct udphdr *)skb_transport_header(skb);
|
||||
}
|
||||
|
||||
#define UDP_HTABLE_SIZE_MIN_PERNET 128
|
||||
#define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256)
|
||||
#define UDP_HTABLE_SIZE_MAX 65536
|
||||
|
||||
static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
|
||||
{
|
||||
|
@ -43,6 +43,7 @@ struct tcp_fastopen_context;
|
||||
|
||||
struct netns_ipv4 {
|
||||
struct inet_timewait_death_row tcp_death_row;
|
||||
struct udp_table *udp_table;
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
struct ctl_table_header *forw_hdr;
|
||||
@ -207,6 +208,8 @@ struct netns_ipv4 {
|
||||
|
||||
atomic_t dev_addr_genid;
|
||||
|
||||
unsigned int sysctl_udp_child_hash_entries;
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
unsigned long *sysctl_local_reserved_ports;
|
||||
int sysctl_ip_prot_sock;
|
||||
|
@ -6432,7 +6432,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
|
||||
else
|
||||
sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
|
||||
dst4, tuple->ipv4.dport,
|
||||
dif, sdif, &udp_table, NULL);
|
||||
dif, sdif, net->ipv4.udp_table, NULL);
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
} else {
|
||||
struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
|
||||
@ -6448,7 +6448,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
|
||||
src6, tuple->ipv6.sport,
|
||||
dst6, tuple->ipv6.dport,
|
||||
dif, sdif,
|
||||
&udp_table, NULL);
|
||||
net->ipv4.udp_table, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -40,6 +40,7 @@ static int one_day_secs = 24 * 3600;
|
||||
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
|
||||
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
|
||||
static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024;
|
||||
static unsigned int udp_child_hash_entries_max = UDP_HTABLE_SIZE_MAX;
|
||||
static int tcp_plb_max_rounds = 31;
|
||||
static int tcp_plb_max_cong_thresh = 256;
|
||||
|
||||
@ -402,12 +403,36 @@ static int proc_tcp_ehash_entries(struct ctl_table *table, int write,
|
||||
if (!net_eq(net, &init_net) && !hinfo->pernet)
|
||||
tcp_ehash_entries *= -1;
|
||||
|
||||
memset(&tbl, 0, sizeof(tbl));
|
||||
tbl.data = &tcp_ehash_entries;
|
||||
tbl.maxlen = sizeof(int);
|
||||
|
||||
return proc_dointvec(&tbl, write, buffer, lenp, ppos);
|
||||
}
|
||||
|
||||
static int proc_udp_hash_entries(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
struct net *net = container_of(table->data, struct net,
|
||||
ipv4.sysctl_udp_child_hash_entries);
|
||||
int udp_hash_entries;
|
||||
struct ctl_table tbl;
|
||||
|
||||
udp_hash_entries = net->ipv4.udp_table->mask + 1;
|
||||
|
||||
/* A negative number indicates that the child netns
|
||||
* shares the global udp_table.
|
||||
*/
|
||||
if (!net_eq(net, &init_net) && net->ipv4.udp_table == &udp_table)
|
||||
udp_hash_entries *= -1;
|
||||
|
||||
memset(&tbl, 0, sizeof(tbl));
|
||||
tbl.data = &udp_hash_entries;
|
||||
tbl.maxlen = sizeof(int);
|
||||
|
||||
return proc_dointvec(&tbl, write, buffer, lenp, ppos);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
|
||||
static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp,
|
||||
@ -1361,6 +1386,21 @@ static struct ctl_table ipv4_net_table[] = {
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = &tcp_child_ehash_entries_max,
|
||||
},
|
||||
{
|
||||
.procname = "udp_hash_entries",
|
||||
.data = &init_net.ipv4.sysctl_udp_child_hash_entries,
|
||||
.mode = 0444,
|
||||
.proc_handler = proc_udp_hash_entries,
|
||||
},
|
||||
{
|
||||
.procname = "udp_child_hash_entries",
|
||||
.data = &init_net.ipv4.sysctl_udp_child_hash_entries,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_douintvec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = &udp_child_hash_entries_max,
|
||||
},
|
||||
{
|
||||
.procname = "udp_rmem_min",
|
||||
.data = &init_net.ipv4.sysctl_udp_rmem_min,
|
||||
|
196
net/ipv4/udp.c
196
net/ipv4/udp.c
@ -129,7 +129,12 @@ DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc);
|
||||
|
||||
#define MAX_UDP_PORTS 65536
|
||||
#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
|
||||
#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN_PERNET)
|
||||
|
||||
static struct udp_table *udp_get_table_prot(struct sock *sk)
|
||||
{
|
||||
return sk->sk_prot->h.udp_table ? : sock_net(sk)->ipv4.udp_table;
|
||||
}
|
||||
|
||||
static int udp_lib_lport_inuse(struct net *net, __u16 num,
|
||||
const struct udp_hslot *hslot,
|
||||
@ -232,16 +237,16 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
|
||||
int udp_lib_get_port(struct sock *sk, unsigned short snum,
|
||||
unsigned int hash2_nulladdr)
|
||||
{
|
||||
struct udp_table *udptable = udp_get_table_prot(sk);
|
||||
struct udp_hslot *hslot, *hslot2;
|
||||
struct udp_table *udptable = sk->sk_prot->h.udp_table;
|
||||
int error = 1;
|
||||
struct net *net = sock_net(sk);
|
||||
int error = 1;
|
||||
|
||||
if (!snum) {
|
||||
DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
|
||||
unsigned short first, last;
|
||||
int low, high, remaining;
|
||||
unsigned int rand;
|
||||
unsigned short first, last;
|
||||
DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
|
||||
|
||||
inet_get_local_port_range(net, &low, &high);
|
||||
remaining = (high - low) + 1;
|
||||
@ -467,7 +472,7 @@ static struct sock *udp4_lookup_run_bpf(struct net *net,
|
||||
struct sock *sk, *reuse_sk;
|
||||
bool no_reuseport;
|
||||
|
||||
if (udptable != &udp_table)
|
||||
if (udptable != net->ipv4.udp_table)
|
||||
return NULL; /* only UDP is supported */
|
||||
|
||||
no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport,
|
||||
@ -548,10 +553,11 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
|
||||
__be16 sport, __be16 dport)
|
||||
{
|
||||
const struct iphdr *iph = ip_hdr(skb);
|
||||
struct net *net = dev_net(skb->dev);
|
||||
|
||||
return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
|
||||
return __udp4_lib_lookup(net, iph->saddr, sport,
|
||||
iph->daddr, dport, inet_iif(skb),
|
||||
inet_sdif(skb), &udp_table, NULL);
|
||||
inet_sdif(skb), net->ipv4.udp_table, NULL);
|
||||
}
|
||||
|
||||
/* Must be called under rcu_read_lock().
|
||||
@ -564,7 +570,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
|
||||
struct sock *sk;
|
||||
|
||||
sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
|
||||
dif, 0, &udp_table, NULL);
|
||||
dif, 0, net->ipv4.udp_table, NULL);
|
||||
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
|
||||
sk = NULL;
|
||||
return sk;
|
||||
@ -802,7 +808,7 @@ out:
|
||||
|
||||
int udp_err(struct sk_buff *skb, u32 info)
|
||||
{
|
||||
return __udp4_lib_err(skb, info, &udp_table);
|
||||
return __udp4_lib_err(skb, info, dev_net(skb->dev)->ipv4.udp_table);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1999,7 +2005,7 @@ EXPORT_SYMBOL(udp_disconnect);
|
||||
void udp_lib_unhash(struct sock *sk)
|
||||
{
|
||||
if (sk_hashed(sk)) {
|
||||
struct udp_table *udptable = sk->sk_prot->h.udp_table;
|
||||
struct udp_table *udptable = udp_get_table_prot(sk);
|
||||
struct udp_hslot *hslot, *hslot2;
|
||||
|
||||
hslot = udp_hashslot(udptable, sock_net(sk),
|
||||
@ -2030,7 +2036,7 @@ EXPORT_SYMBOL(udp_lib_unhash);
|
||||
void udp_lib_rehash(struct sock *sk, u16 newhash)
|
||||
{
|
||||
if (sk_hashed(sk)) {
|
||||
struct udp_table *udptable = sk->sk_prot->h.udp_table;
|
||||
struct udp_table *udptable = udp_get_table_prot(sk);
|
||||
struct udp_hslot *hslot, *hslot2, *nhslot2;
|
||||
|
||||
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
|
||||
@ -2519,10 +2525,14 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
|
||||
__be16 rmt_port, __be32 rmt_addr,
|
||||
int dif, int sdif)
|
||||
{
|
||||
struct sock *sk, *result;
|
||||
struct udp_table *udptable = net->ipv4.udp_table;
|
||||
unsigned short hnum = ntohs(loc_port);
|
||||
unsigned int slot = udp_hashfn(net, hnum, udp_table.mask);
|
||||
struct udp_hslot *hslot = &udp_table.hash[slot];
|
||||
struct sock *sk, *result;
|
||||
struct udp_hslot *hslot;
|
||||
unsigned int slot;
|
||||
|
||||
slot = udp_hashfn(net, hnum, udptable->mask);
|
||||
hslot = &udptable->hash[slot];
|
||||
|
||||
/* Do not bother scanning a too big list */
|
||||
if (hslot->count > 10)
|
||||
@ -2550,14 +2560,19 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
|
||||
__be16 rmt_port, __be32 rmt_addr,
|
||||
int dif, int sdif)
|
||||
{
|
||||
unsigned short hnum = ntohs(loc_port);
|
||||
unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
|
||||
unsigned int slot2 = hash2 & udp_table.mask;
|
||||
struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
|
||||
struct udp_table *udptable = net->ipv4.udp_table;
|
||||
INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
|
||||
const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
|
||||
unsigned short hnum = ntohs(loc_port);
|
||||
unsigned int hash2, slot2;
|
||||
struct udp_hslot *hslot2;
|
||||
__portpair ports;
|
||||
struct sock *sk;
|
||||
|
||||
hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
|
||||
slot2 = hash2 & udptable->mask;
|
||||
hslot2 = &udptable->hash2[slot2];
|
||||
ports = INET_COMBINED_PORTS(rmt_port, hnum);
|
||||
|
||||
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
|
||||
if (inet_match(net, sk, acookie, ports, dif, sdif))
|
||||
return sk;
|
||||
@ -2637,7 +2652,7 @@ int udp_v4_early_demux(struct sk_buff *skb)
|
||||
|
||||
int udp_rcv(struct sk_buff *skb)
|
||||
{
|
||||
return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);
|
||||
return __udp4_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP);
|
||||
}
|
||||
|
||||
void udp_destroy_sock(struct sock *sk)
|
||||
@ -2960,7 +2975,7 @@ struct proto udp_prot = {
|
||||
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
|
||||
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
|
||||
.obj_size = sizeof(struct udp_sock),
|
||||
.h.udp_table = &udp_table,
|
||||
.h.udp_table = NULL,
|
||||
.diag_destroy = udp_abort,
|
||||
};
|
||||
EXPORT_SYMBOL(udp_prot);
|
||||
@ -2968,21 +2983,30 @@ EXPORT_SYMBOL(udp_prot);
|
||||
/* ------------------------------------------------------------------------ */
|
||||
#ifdef CONFIG_PROC_FS
|
||||
|
||||
static struct udp_table *udp_get_table_afinfo(struct udp_seq_afinfo *afinfo,
|
||||
struct net *net)
|
||||
{
|
||||
return afinfo->udp_table ? : net->ipv4.udp_table;
|
||||
}
|
||||
|
||||
static struct sock *udp_get_first(struct seq_file *seq, int start)
|
||||
{
|
||||
struct sock *sk;
|
||||
struct udp_seq_afinfo *afinfo;
|
||||
struct udp_iter_state *state = seq->private;
|
||||
struct net *net = seq_file_net(seq);
|
||||
struct udp_seq_afinfo *afinfo;
|
||||
struct udp_table *udptable;
|
||||
struct sock *sk;
|
||||
|
||||
if (state->bpf_seq_afinfo)
|
||||
afinfo = state->bpf_seq_afinfo;
|
||||
else
|
||||
afinfo = pde_data(file_inode(seq->file));
|
||||
|
||||
for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
|
||||
udptable = udp_get_table_afinfo(afinfo, net);
|
||||
|
||||
for (state->bucket = start; state->bucket <= udptable->mask;
|
||||
++state->bucket) {
|
||||
struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket];
|
||||
struct udp_hslot *hslot = &udptable->hash[state->bucket];
|
||||
|
||||
if (hlist_empty(&hslot->head))
|
||||
continue;
|
||||
@ -3004,9 +3028,10 @@ found:
|
||||
|
||||
static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
|
||||
{
|
||||
struct udp_seq_afinfo *afinfo;
|
||||
struct udp_iter_state *state = seq->private;
|
||||
struct net *net = seq_file_net(seq);
|
||||
struct udp_seq_afinfo *afinfo;
|
||||
struct udp_table *udptable;
|
||||
|
||||
if (state->bpf_seq_afinfo)
|
||||
afinfo = state->bpf_seq_afinfo;
|
||||
@ -3020,8 +3045,11 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
|
||||
sk->sk_family != afinfo->family)));
|
||||
|
||||
if (!sk) {
|
||||
if (state->bucket <= afinfo->udp_table->mask)
|
||||
spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
|
||||
udptable = udp_get_table_afinfo(afinfo, net);
|
||||
|
||||
if (state->bucket <= udptable->mask)
|
||||
spin_unlock_bh(&udptable->hash[state->bucket].lock);
|
||||
|
||||
return udp_get_first(seq, state->bucket + 1);
|
||||
}
|
||||
return sk;
|
||||
@ -3062,16 +3090,19 @@ EXPORT_SYMBOL(udp_seq_next);
|
||||
|
||||
void udp_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct udp_seq_afinfo *afinfo;
|
||||
struct udp_iter_state *state = seq->private;
|
||||
struct udp_seq_afinfo *afinfo;
|
||||
struct udp_table *udptable;
|
||||
|
||||
if (state->bpf_seq_afinfo)
|
||||
afinfo = state->bpf_seq_afinfo;
|
||||
else
|
||||
afinfo = pde_data(file_inode(seq->file));
|
||||
|
||||
if (state->bucket <= afinfo->udp_table->mask)
|
||||
spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
|
||||
udptable = udp_get_table_afinfo(afinfo, seq_file_net(seq));
|
||||
|
||||
if (state->bucket <= udptable->mask)
|
||||
spin_unlock_bh(&udptable->hash[state->bucket].lock);
|
||||
}
|
||||
EXPORT_SYMBOL(udp_seq_stop);
|
||||
|
||||
@ -3184,7 +3215,7 @@ EXPORT_SYMBOL(udp_seq_ops);
|
||||
|
||||
static struct udp_seq_afinfo udp4_seq_afinfo = {
|
||||
.family = AF_INET,
|
||||
.udp_table = &udp_table,
|
||||
.udp_table = NULL,
|
||||
};
|
||||
|
||||
static int __net_init udp4_proc_init_net(struct net *net)
|
||||
@ -3246,7 +3277,7 @@ void __init udp_table_init(struct udp_table *table, const char *name)
|
||||
&table->log,
|
||||
&table->mask,
|
||||
UDP_HTABLE_SIZE_MIN,
|
||||
64 * 1024);
|
||||
UDP_HTABLE_SIZE_MAX);
|
||||
|
||||
table->hash2 = table->hash + (table->mask + 1);
|
||||
for (i = 0; i <= table->mask; i++) {
|
||||
@ -3271,7 +3302,7 @@ u32 udp_flow_hashrnd(void)
|
||||
}
|
||||
EXPORT_SYMBOL(udp_flow_hashrnd);
|
||||
|
||||
static int __net_init udp_sysctl_init(struct net *net)
|
||||
static void __net_init udp_sysctl_init(struct net *net)
|
||||
{
|
||||
net->ipv4.sysctl_udp_rmem_min = PAGE_SIZE;
|
||||
net->ipv4.sysctl_udp_wmem_min = PAGE_SIZE;
|
||||
@ -3279,12 +3310,103 @@ static int __net_init udp_sysctl_init(struct net *net)
|
||||
#ifdef CONFIG_NET_L3_MASTER_DEV
|
||||
net->ipv4.sysctl_udp_l3mdev_accept = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_entries)
|
||||
{
|
||||
struct udp_table *udptable;
|
||||
int i;
|
||||
|
||||
udptable = kmalloc(sizeof(*udptable), GFP_KERNEL);
|
||||
if (!udptable)
|
||||
goto out;
|
||||
|
||||
udptable->hash = vmalloc_huge(hash_entries * 2 * sizeof(struct udp_hslot),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!udptable->hash)
|
||||
goto free_table;
|
||||
|
||||
udptable->hash2 = udptable->hash + hash_entries;
|
||||
udptable->mask = hash_entries - 1;
|
||||
udptable->log = ilog2(hash_entries);
|
||||
|
||||
for (i = 0; i < hash_entries; i++) {
|
||||
INIT_HLIST_HEAD(&udptable->hash[i].head);
|
||||
udptable->hash[i].count = 0;
|
||||
spin_lock_init(&udptable->hash[i].lock);
|
||||
|
||||
INIT_HLIST_HEAD(&udptable->hash2[i].head);
|
||||
udptable->hash2[i].count = 0;
|
||||
spin_lock_init(&udptable->hash2[i].lock);
|
||||
}
|
||||
|
||||
return udptable;
|
||||
|
||||
free_table:
|
||||
kfree(udptable);
|
||||
out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void __net_exit udp_pernet_table_free(struct net *net)
|
||||
{
|
||||
struct udp_table *udptable = net->ipv4.udp_table;
|
||||
|
||||
if (udptable == &udp_table)
|
||||
return;
|
||||
|
||||
kvfree(udptable->hash);
|
||||
kfree(udptable);
|
||||
}
|
||||
|
||||
static void __net_init udp_set_table(struct net *net)
|
||||
{
|
||||
struct udp_table *udptable;
|
||||
unsigned int hash_entries;
|
||||
struct net *old_net;
|
||||
|
||||
if (net_eq(net, &init_net))
|
||||
goto fallback;
|
||||
|
||||
old_net = current->nsproxy->net_ns;
|
||||
hash_entries = READ_ONCE(old_net->ipv4.sysctl_udp_child_hash_entries);
|
||||
if (!hash_entries)
|
||||
goto fallback;
|
||||
|
||||
/* Set min to keep the bitmap on stack in udp_lib_get_port() */
|
||||
if (hash_entries < UDP_HTABLE_SIZE_MIN_PERNET)
|
||||
hash_entries = UDP_HTABLE_SIZE_MIN_PERNET;
|
||||
else
|
||||
hash_entries = roundup_pow_of_two(hash_entries);
|
||||
|
||||
udptable = udp_pernet_table_alloc(hash_entries);
|
||||
if (udptable) {
|
||||
net->ipv4.udp_table = udptable;
|
||||
} else {
|
||||
pr_warn("Failed to allocate UDP hash table (entries: %u) "
|
||||
"for a netns, fallback to the global one\n",
|
||||
hash_entries);
|
||||
fallback:
|
||||
net->ipv4.udp_table = &udp_table;
|
||||
}
|
||||
}
|
||||
|
||||
static int __net_init udp_pernet_init(struct net *net)
|
||||
{
|
||||
udp_sysctl_init(net);
|
||||
udp_set_table(net);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __net_exit udp_pernet_exit(struct net *net)
|
||||
{
|
||||
udp_pernet_table_free(net);
|
||||
}
|
||||
|
||||
static struct pernet_operations __net_initdata udp_sysctl_ops = {
|
||||
.init = udp_sysctl_init,
|
||||
.init = udp_pernet_init,
|
||||
.exit = udp_pernet_exit,
|
||||
};
|
||||
|
||||
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
|
||||
@ -3302,7 +3424,7 @@ static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
|
||||
return -ENOMEM;
|
||||
|
||||
afinfo->family = AF_UNSPEC;
|
||||
afinfo->udp_table = &udp_table;
|
||||
afinfo->udp_table = NULL;
|
||||
st->bpf_seq_afinfo = afinfo;
|
||||
ret = bpf_iter_init_seq_net(priv_data, aux);
|
||||
if (ret)
|
||||
|
@ -147,13 +147,13 @@ done:
|
||||
static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||
const struct inet_diag_req_v2 *r)
|
||||
{
|
||||
udp_dump(&udp_table, skb, cb, r);
|
||||
udp_dump(sock_net(cb->skb->sk)->ipv4.udp_table, skb, cb, r);
|
||||
}
|
||||
|
||||
static int udp_diag_dump_one(struct netlink_callback *cb,
|
||||
const struct inet_diag_req_v2 *req)
|
||||
{
|
||||
return udp_dump_one(&udp_table, cb, req);
|
||||
return udp_dump_one(sock_net(cb->skb->sk)->ipv4.udp_table, cb, req);
|
||||
}
|
||||
|
||||
static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
|
||||
@ -225,7 +225,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
|
||||
static int udp_diag_destroy(struct sk_buff *in_skb,
|
||||
const struct inet_diag_req_v2 *req)
|
||||
{
|
||||
return __udp_diag_destroy(in_skb, req, &udp_table);
|
||||
return __udp_diag_destroy(in_skb, req, sock_net(in_skb->sk)->ipv4.udp_table);
|
||||
}
|
||||
|
||||
static int udplite_diag_destroy(struct sk_buff *in_skb,
|
||||
|
@ -600,10 +600,11 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
|
||||
__be16 dport)
|
||||
{
|
||||
const struct iphdr *iph = skb_gro_network_header(skb);
|
||||
struct net *net = dev_net(skb->dev);
|
||||
|
||||
return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
|
||||
return __udp4_lib_lookup(net, iph->saddr, sport,
|
||||
iph->daddr, dport, inet_iif(skb),
|
||||
inet_sdif(skb), &udp_table, NULL);
|
||||
inet_sdif(skb), net->ipv4.udp_table, NULL);
|
||||
}
|
||||
|
||||
INDIRECT_CALLABLE_SCOPE
|
||||
|
@ -217,7 +217,7 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net,
|
||||
struct sock *sk, *reuse_sk;
|
||||
bool no_reuseport;
|
||||
|
||||
if (udptable != &udp_table)
|
||||
if (udptable != net->ipv4.udp_table)
|
||||
return NULL; /* only UDP is supported */
|
||||
|
||||
no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport,
|
||||
@ -298,10 +298,11 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
|
||||
__be16 sport, __be16 dport)
|
||||
{
|
||||
const struct ipv6hdr *iph = ipv6_hdr(skb);
|
||||
struct net *net = dev_net(skb->dev);
|
||||
|
||||
return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
|
||||
return __udp6_lib_lookup(net, &iph->saddr, sport,
|
||||
&iph->daddr, dport, inet6_iif(skb),
|
||||
inet6_sdif(skb), &udp_table, NULL);
|
||||
inet6_sdif(skb), net->ipv4.udp_table, NULL);
|
||||
}
|
||||
|
||||
/* Must be called under rcu_read_lock().
|
||||
@ -314,7 +315,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
|
||||
struct sock *sk;
|
||||
|
||||
sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport,
|
||||
dif, 0, &udp_table, NULL);
|
||||
dif, 0, net->ipv4.udp_table, NULL);
|
||||
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
|
||||
sk = NULL;
|
||||
return sk;
|
||||
@ -689,7 +690,8 @@ static __inline__ int udpv6_err(struct sk_buff *skb,
|
||||
struct inet6_skb_parm *opt, u8 type,
|
||||
u8 code, int offset, __be32 info)
|
||||
{
|
||||
return __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
|
||||
return __udp6_lib_err(skb, opt, type, code, offset, info,
|
||||
dev_net(skb->dev)->ipv4.udp_table);
|
||||
}
|
||||
|
||||
static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
|
||||
@ -1063,13 +1065,18 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
|
||||
__be16 rmt_port, const struct in6_addr *rmt_addr,
|
||||
int dif, int sdif)
|
||||
{
|
||||
struct udp_table *udptable = net->ipv4.udp_table;
|
||||
unsigned short hnum = ntohs(loc_port);
|
||||
unsigned int hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
|
||||
unsigned int slot2 = hash2 & udp_table.mask;
|
||||
struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
|
||||
const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
|
||||
unsigned int hash2, slot2;
|
||||
struct udp_hslot *hslot2;
|
||||
__portpair ports;
|
||||
struct sock *sk;
|
||||
|
||||
hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
|
||||
slot2 = hash2 & udptable->mask;
|
||||
hslot2 = &udptable->hash2[slot2];
|
||||
ports = INET_COMBINED_PORTS(rmt_port, hnum);
|
||||
|
||||
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
|
||||
if (sk->sk_state == TCP_ESTABLISHED &&
|
||||
inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif))
|
||||
@ -1123,7 +1130,7 @@ void udp_v6_early_demux(struct sk_buff *skb)
|
||||
|
||||
INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb)
|
||||
{
|
||||
return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
|
||||
return __udp6_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1720,7 +1727,7 @@ EXPORT_SYMBOL(udp6_seq_ops);
|
||||
|
||||
static struct udp_seq_afinfo udp6_seq_afinfo = {
|
||||
.family = AF_INET6,
|
||||
.udp_table = &udp_table,
|
||||
.udp_table = NULL,
|
||||
};
|
||||
|
||||
int __net_init udp6_proc_init(struct net *net)
|
||||
@ -1770,7 +1777,7 @@ struct proto udpv6_prot = {
|
||||
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
|
||||
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
|
||||
.obj_size = sizeof(struct udp6_sock),
|
||||
.h.udp_table = &udp_table,
|
||||
.h.udp_table = NULL,
|
||||
.diag_destroy = udp_abort,
|
||||
};
|
||||
|
||||
|
@ -116,10 +116,11 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
|
||||
__be16 dport)
|
||||
{
|
||||
const struct ipv6hdr *iph = skb_gro_network_header(skb);
|
||||
struct net *net = dev_net(skb->dev);
|
||||
|
||||
return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
|
||||
return __udp6_lib_lookup(net, &iph->saddr, sport,
|
||||
&iph->daddr, dport, inet6_iif(skb),
|
||||
inet6_sdif(skb), &udp_table, NULL);
|
||||
inet6_sdif(skb), net->ipv4.udp_table, NULL);
|
||||
}
|
||||
|
||||
INDIRECT_CALLABLE_SCOPE
|
||||
|
Loading…
Reference in New Issue
Block a user