mirror of
https://github.com/torvalds/linux.git
synced 2024-12-26 04:42:12 +00:00
271b72c7fa
Goals are : 1) Optimizing handling of incoming Unicast UDP frames, so that no memory writes should happen in the fast path. Note: Multicasts and broadcasts still will need to take a lock, because doing a full lockless lookup in this case is difficult. 2) No expensive operations in the socket bind/unhash phases : - No expensive synchronize_rcu() calls. - No added rcu_head in socket structure, increasing memory needs, but more important, forcing us to use call_rcu() calls, that have the bad property of making sockets structure cold. (rcu grace period between socket freeing and its potential reuse make this socket being cold in CPU cache). David did a previous patch using call_rcu() and noticed a 20% impact on TCP connection rates. Quoting Cristopher Lameter : "Right. That results in cacheline cooldown. You'd want to recycle the object as they are cache hot on a per cpu basis. That is screwed up by the delayed regular rcu processing. We have seen multiple regressions due to cacheline cooldown. The only choice in cacheline hot sensitive areas is to deal with the complexity that comes with SLAB_DESTROY_BY_RCU or give up on RCU." - Because udp sockets are allocated from dedicated kmem_cache, use of SLAB_DESTROY_BY_RCU can help here. Theory of operation : --------------------- As the lookup is lockfree (using rcu_read_lock()/rcu_read_unlock()), special attention must be taken by readers and writers. Use of SLAB_DESTROY_BY_RCU is tricky too, because a socket can be freed, reused, inserted in a different chain or in worst case in the same chain while readers could do lookups in the same time. In order to avoid loops, a reader must check each socket found in a chain really belongs to the chain the reader was traversing. If it finds a mismatch, lookup must start again at the begining. This *restart* loop is the reason we had to use rdlock for the multicast case, because we dont want to send same message several times to the same socket. We use RCU only for fast path. Thus, /proc/net/udp still takes spinlocks. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
133 lines
3.1 KiB
C
133 lines
3.1 KiB
C
/*
|
|
* UDPLITEv6 An implementation of the UDP-Lite protocol over IPv6.
|
|
* See also net/ipv4/udplite.c
|
|
*
|
|
* Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk>
|
|
*
|
|
* Changes:
|
|
* Fixes:
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
#include "udp_impl.h"
|
|
|
|
static int udplitev6_rcv(struct sk_buff *skb)
|
|
{
|
|
return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
|
|
}
|
|
|
|
static void udplitev6_err(struct sk_buff *skb,
|
|
struct inet6_skb_parm *opt,
|
|
int type, int code, int offset, __be32 info)
|
|
{
|
|
__udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table);
|
|
}
|
|
|
|
static struct inet6_protocol udplitev6_protocol = {
|
|
.handler = udplitev6_rcv,
|
|
.err_handler = udplitev6_err,
|
|
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
|
|
};
|
|
|
|
struct proto udplitev6_prot = {
|
|
.name = "UDPLITEv6",
|
|
.owner = THIS_MODULE,
|
|
.close = udp_lib_close,
|
|
.connect = ip6_datagram_connect,
|
|
.disconnect = udp_disconnect,
|
|
.ioctl = udp_ioctl,
|
|
.init = udplite_sk_init,
|
|
.destroy = udpv6_destroy_sock,
|
|
.setsockopt = udpv6_setsockopt,
|
|
.getsockopt = udpv6_getsockopt,
|
|
.sendmsg = udpv6_sendmsg,
|
|
.recvmsg = udpv6_recvmsg,
|
|
.backlog_rcv = udpv6_queue_rcv_skb,
|
|
.hash = udp_lib_hash,
|
|
.unhash = udp_lib_unhash,
|
|
.get_port = udp_v6_get_port,
|
|
.obj_size = sizeof(struct udp6_sock),
|
|
.slab_flags = SLAB_DESTROY_BY_RCU,
|
|
.h.udp_table = &udplite_table,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_setsockopt = compat_udpv6_setsockopt,
|
|
.compat_getsockopt = compat_udpv6_getsockopt,
|
|
#endif
|
|
};
|
|
|
|
static struct inet_protosw udplite6_protosw = {
|
|
.type = SOCK_DGRAM,
|
|
.protocol = IPPROTO_UDPLITE,
|
|
.prot = &udplitev6_prot,
|
|
.ops = &inet6_dgram_ops,
|
|
.capability = -1,
|
|
.no_check = 0,
|
|
.flags = INET_PROTOSW_PERMANENT,
|
|
};
|
|
|
|
int __init udplitev6_init(void)
|
|
{
|
|
int ret;
|
|
|
|
ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
|
|
if (ret)
|
|
goto out;
|
|
|
|
ret = inet6_register_protosw(&udplite6_protosw);
|
|
if (ret)
|
|
goto out_udplitev6_protocol;
|
|
out:
|
|
return ret;
|
|
|
|
out_udplitev6_protocol:
|
|
inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
|
|
goto out;
|
|
}
|
|
|
|
void udplitev6_exit(void)
|
|
{
|
|
inet6_unregister_protosw(&udplite6_protosw);
|
|
inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
|
|
}
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
static struct udp_seq_afinfo udplite6_seq_afinfo = {
|
|
.name = "udplite6",
|
|
.family = AF_INET6,
|
|
.udp_table = &udplite_table,
|
|
.seq_fops = {
|
|
.owner = THIS_MODULE,
|
|
},
|
|
.seq_ops = {
|
|
.show = udp6_seq_show,
|
|
},
|
|
};
|
|
|
|
static int udplite6_proc_init_net(struct net *net)
|
|
{
|
|
return udp_proc_register(net, &udplite6_seq_afinfo);
|
|
}
|
|
|
|
static void udplite6_proc_exit_net(struct net *net)
|
|
{
|
|
udp_proc_unregister(net, &udplite6_seq_afinfo);
|
|
}
|
|
|
|
static struct pernet_operations udplite6_net_ops = {
|
|
.init = udplite6_proc_init_net,
|
|
.exit = udplite6_proc_exit_net,
|
|
};
|
|
|
|
int __init udplite6_proc_init(void)
|
|
{
|
|
return register_pernet_subsys(&udplite6_net_ops);
|
|
}
|
|
|
|
void udplite6_proc_exit(void)
|
|
{
|
|
unregister_pernet_subsys(&udplite6_net_ops);
|
|
}
|
|
#endif
|