forked from Minki/linux
net: convert TCP/DCCP ehash rwlocks to spinlocks
Now TCP & DCCP use RCU lookups, we can convert ehash rwlocks to spinlocks. /proc/net/tcp and other seq_file 'readers' can safely be converted to 'writers'. This should speedup writers, since spin_lock()/spin_unlock() only use one atomic operation instead of two for write_lock()/write_unlock() Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
b8c26a33c8
commit
9db66bdcc8
@ -116,7 +116,7 @@ struct inet_hashinfo {
|
||||
* TIME_WAIT sockets use a separate chain (twchain).
|
||||
*/
|
||||
struct inet_ehash_bucket *ehash;
|
||||
rwlock_t *ehash_locks;
|
||||
spinlock_t *ehash_locks;
|
||||
unsigned int ehash_size;
|
||||
unsigned int ehash_locks_mask;
|
||||
|
||||
@ -152,7 +152,7 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
|
||||
return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
|
||||
}
|
||||
|
||||
static inline rwlock_t *inet_ehash_lockp(
|
||||
static inline spinlock_t *inet_ehash_lockp(
|
||||
struct inet_hashinfo *hashinfo,
|
||||
unsigned int hash)
|
||||
{
|
||||
@ -177,16 +177,16 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
|
||||
size = 4096;
|
||||
if (sizeof(rwlock_t) != 0) {
|
||||
#ifdef CONFIG_NUMA
|
||||
if (size * sizeof(rwlock_t) > PAGE_SIZE)
|
||||
hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t));
|
||||
if (size * sizeof(spinlock_t) > PAGE_SIZE)
|
||||
hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t));
|
||||
else
|
||||
#endif
|
||||
hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t),
|
||||
hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t),
|
||||
GFP_KERNEL);
|
||||
if (!hashinfo->ehash_locks)
|
||||
return ENOMEM;
|
||||
for (i = 0; i < size; i++)
|
||||
rwlock_init(&hashinfo->ehash_locks[i]);
|
||||
spin_lock_init(&hashinfo->ehash_locks[i]);
|
||||
}
|
||||
hashinfo->ehash_locks_mask = size - 1;
|
||||
return 0;
|
||||
@ -197,7 +197,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
|
||||
if (hashinfo->ehash_locks) {
|
||||
#ifdef CONFIG_NUMA
|
||||
unsigned int size = (hashinfo->ehash_locks_mask + 1) *
|
||||
sizeof(rwlock_t);
|
||||
sizeof(spinlock_t);
|
||||
if (size > PAGE_SIZE)
|
||||
vfree(hashinfo->ehash_locks);
|
||||
else
|
||||
|
@ -271,13 +271,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
|
||||
struct net *net = sock_net(sk);
|
||||
unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
|
||||
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
|
||||
rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
|
||||
spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
|
||||
struct sock *sk2;
|
||||
const struct hlist_nulls_node *node;
|
||||
struct inet_timewait_sock *tw;
|
||||
|
||||
prefetch(head->chain.first);
|
||||
write_lock(lock);
|
||||
spin_lock(lock);
|
||||
|
||||
/* Check TIME-WAIT sockets first. */
|
||||
sk_nulls_for_each(sk2, node, &head->twchain) {
|
||||
@ -308,8 +307,8 @@ unique:
|
||||
sk->sk_hash = hash;
|
||||
WARN_ON(!sk_unhashed(sk));
|
||||
__sk_nulls_add_node_rcu(sk, &head->chain);
|
||||
spin_unlock(lock);
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
||||
write_unlock(lock);
|
||||
|
||||
if (twp) {
|
||||
*twp = tw;
|
||||
@ -325,7 +324,7 @@ unique:
|
||||
return 0;
|
||||
|
||||
not_unique:
|
||||
write_unlock(lock);
|
||||
spin_unlock(lock);
|
||||
return -EADDRNOTAVAIL;
|
||||
}
|
||||
|
||||
@ -340,7 +339,7 @@ void __inet_hash_nolisten(struct sock *sk)
|
||||
{
|
||||
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
||||
struct hlist_nulls_head *list;
|
||||
rwlock_t *lock;
|
||||
spinlock_t *lock;
|
||||
struct inet_ehash_bucket *head;
|
||||
|
||||
WARN_ON(!sk_unhashed(sk));
|
||||
@ -350,10 +349,10 @@ void __inet_hash_nolisten(struct sock *sk)
|
||||
list = &head->chain;
|
||||
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
|
||||
|
||||
write_lock(lock);
|
||||
spin_lock(lock);
|
||||
__sk_nulls_add_node_rcu(sk, list);
|
||||
spin_unlock(lock);
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
||||
write_unlock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
|
||||
|
||||
@ -402,12 +401,12 @@ void inet_unhash(struct sock *sk)
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
|
||||
spin_unlock_bh(&ilb->lock);
|
||||
} else {
|
||||
rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
|
||||
spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
|
||||
|
||||
write_lock_bh(lock);
|
||||
spin_lock_bh(lock);
|
||||
if (__sk_nulls_del_node_init_rcu(sk))
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
|
||||
write_unlock_bh(lock);
|
||||
spin_unlock_bh(lock);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(inet_unhash);
|
||||
|
@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
|
||||
struct inet_bind_hashbucket *bhead;
|
||||
struct inet_bind_bucket *tb;
|
||||
/* Unlink from established hashes. */
|
||||
rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
|
||||
spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
|
||||
|
||||
write_lock(lock);
|
||||
spin_lock(lock);
|
||||
if (hlist_nulls_unhashed(&tw->tw_node)) {
|
||||
write_unlock(lock);
|
||||
spin_unlock(lock);
|
||||
return;
|
||||
}
|
||||
hlist_nulls_del_rcu(&tw->tw_node);
|
||||
sk_nulls_node_init(&tw->tw_node);
|
||||
write_unlock(lock);
|
||||
spin_unlock(lock);
|
||||
|
||||
/* Disassociate with bind bucket. */
|
||||
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
|
||||
@ -76,7 +76,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
|
||||
const struct inet_sock *inet = inet_sk(sk);
|
||||
const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
|
||||
rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
|
||||
spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
|
||||
struct inet_bind_hashbucket *bhead;
|
||||
/* Step 1: Put TW into bind hash. Original socket stays there too.
|
||||
Note, that any socket with inet->num != 0 MUST be bound in
|
||||
@ -90,7 +90,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
|
||||
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
|
||||
spin_unlock(&bhead->lock);
|
||||
|
||||
write_lock(lock);
|
||||
spin_lock(lock);
|
||||
|
||||
/*
|
||||
* Step 2: Hash TW into TIMEWAIT chain.
|
||||
@ -104,7 +104,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
|
||||
if (__sk_nulls_del_node_init_rcu(sk))
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
|
||||
|
||||
write_unlock(lock);
|
||||
spin_unlock(lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
|
||||
@ -427,9 +427,9 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
|
||||
for (h = 0; h < (hashinfo->ehash_size); h++) {
|
||||
struct inet_ehash_bucket *head =
|
||||
inet_ehash_bucket(hashinfo, h);
|
||||
rwlock_t *lock = inet_ehash_lockp(hashinfo, h);
|
||||
spinlock_t *lock = inet_ehash_lockp(hashinfo, h);
|
||||
restart:
|
||||
write_lock(lock);
|
||||
spin_lock(lock);
|
||||
sk_nulls_for_each(sk, node, &head->twchain) {
|
||||
|
||||
tw = inet_twsk(sk);
|
||||
@ -438,13 +438,13 @@ restart:
|
||||
continue;
|
||||
|
||||
atomic_inc(&tw->tw_refcnt);
|
||||
write_unlock(lock);
|
||||
spin_unlock(lock);
|
||||
inet_twsk_deschedule(tw, twdr);
|
||||
inet_twsk_put(tw);
|
||||
|
||||
goto restart;
|
||||
}
|
||||
write_unlock(lock);
|
||||
spin_unlock(lock);
|
||||
}
|
||||
local_bh_enable();
|
||||
}
|
||||
|
@ -1970,13 +1970,13 @@ static void *established_get_first(struct seq_file *seq)
|
||||
struct sock *sk;
|
||||
struct hlist_nulls_node *node;
|
||||
struct inet_timewait_sock *tw;
|
||||
rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
|
||||
spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
|
||||
|
||||
/* Lockless fast path for the common case of empty buckets */
|
||||
if (empty_bucket(st))
|
||||
continue;
|
||||
|
||||
read_lock_bh(lock);
|
||||
spin_lock_bh(lock);
|
||||
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
|
||||
if (sk->sk_family != st->family ||
|
||||
!net_eq(sock_net(sk), net)) {
|
||||
@ -1995,7 +1995,7 @@ static void *established_get_first(struct seq_file *seq)
|
||||
rc = tw;
|
||||
goto out;
|
||||
}
|
||||
read_unlock_bh(lock);
|
||||
spin_unlock_bh(lock);
|
||||
st->state = TCP_SEQ_STATE_ESTABLISHED;
|
||||
}
|
||||
out:
|
||||
@ -2023,7 +2023,7 @@ get_tw:
|
||||
cur = tw;
|
||||
goto out;
|
||||
}
|
||||
read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
|
||||
spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
|
||||
st->state = TCP_SEQ_STATE_ESTABLISHED;
|
||||
|
||||
/* Look for next non empty bucket */
|
||||
@ -2033,7 +2033,7 @@ get_tw:
|
||||
if (st->bucket >= tcp_hashinfo.ehash_size)
|
||||
return NULL;
|
||||
|
||||
read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
|
||||
spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
|
||||
sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
|
||||
} else
|
||||
sk = sk_nulls_next(sk);
|
||||
@ -2134,7 +2134,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
|
||||
case TCP_SEQ_STATE_TIME_WAIT:
|
||||
case TCP_SEQ_STATE_ESTABLISHED:
|
||||
if (v)
|
||||
read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
|
||||
spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -38,14 +38,14 @@ void __inet6_hash(struct sock *sk)
|
||||
} else {
|
||||
unsigned int hash;
|
||||
struct hlist_nulls_head *list;
|
||||
rwlock_t *lock;
|
||||
spinlock_t *lock;
|
||||
|
||||
sk->sk_hash = hash = inet6_sk_ehashfn(sk);
|
||||
list = &inet_ehash_bucket(hashinfo, hash)->chain;
|
||||
lock = inet_ehash_lockp(hashinfo, hash);
|
||||
write_lock(lock);
|
||||
spin_lock(lock);
|
||||
__sk_nulls_add_node_rcu(sk, list);
|
||||
write_unlock(lock);
|
||||
spin_unlock(lock);
|
||||
}
|
||||
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
||||
@ -195,13 +195,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
|
||||
const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
|
||||
inet->dport);
|
||||
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
|
||||
rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
|
||||
spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
|
||||
struct sock *sk2;
|
||||
const struct hlist_nulls_node *node;
|
||||
struct inet_timewait_sock *tw;
|
||||
|
||||
prefetch(head->chain.first);
|
||||
write_lock(lock);
|
||||
spin_lock(lock);
|
||||
|
||||
/* Check TIME-WAIT sockets first. */
|
||||
sk_nulls_for_each(sk2, node, &head->twchain) {
|
||||
@ -230,8 +229,8 @@ unique:
|
||||
WARN_ON(!sk_unhashed(sk));
|
||||
__sk_nulls_add_node_rcu(sk, &head->chain);
|
||||
sk->sk_hash = hash;
|
||||
spin_unlock(lock);
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
||||
write_unlock(lock);
|
||||
|
||||
if (twp != NULL) {
|
||||
*twp = tw;
|
||||
@ -246,7 +245,7 @@ unique:
|
||||
return 0;
|
||||
|
||||
not_unique:
|
||||
write_unlock(lock);
|
||||
spin_unlock(lock);
|
||||
return -EADDRNOTAVAIL;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user