mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
fs/epoll: use a per-cpu counter for user's watches count
This counter tracks the number of watches a user has, to compare against the 'max_user_watches' limit. This causes a scalability bottleneck on SPECjbb2015 on large systems as there is only one user. Changing to a per-cpu counter increases throughput of the benchmark by about 30% on a 16-socket, > 1000 thread system. [rdunlap@infradead.org: fix build errors in kernel/user.c when CONFIG_EPOLL=n] [npiggin@gmail.com: move ifdefs into wrapper functions, slightly improve panic message] Link: https://lkml.kernel.org/r/1628051945.fens3r99ox.astroid@bobo.none [akpm@linux-foundation.org: tweak user_epoll_alloc(), per Guenter] Link: https://lkml.kernel.org/r/20210804191421.GA1900577@roeck-us.net Link: https://lkml.kernel.org/r/20210802032013.2751916-1-npiggin@gmail.com Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reported-by: Anton Blanchard <anton@ozlabs.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
4ce9f97045
commit
1e1c15839d
@ -723,7 +723,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
|
|||||||
*/
|
*/
|
||||||
call_rcu(&epi->rcu, epi_rcu_free);
|
call_rcu(&epi->rcu, epi_rcu_free);
|
||||||
|
|
||||||
atomic_long_dec(&ep->user->epoll_watches);
|
percpu_counter_dec(&ep->user->epoll_watches);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -1439,7 +1439,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
|
|||||||
{
|
{
|
||||||
int error, pwake = 0;
|
int error, pwake = 0;
|
||||||
__poll_t revents;
|
__poll_t revents;
|
||||||
long user_watches;
|
|
||||||
struct epitem *epi;
|
struct epitem *epi;
|
||||||
struct ep_pqueue epq;
|
struct ep_pqueue epq;
|
||||||
struct eventpoll *tep = NULL;
|
struct eventpoll *tep = NULL;
|
||||||
@ -1449,11 +1448,15 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
|
|||||||
|
|
||||||
lockdep_assert_irqs_enabled();
|
lockdep_assert_irqs_enabled();
|
||||||
|
|
||||||
user_watches = atomic_long_read(&ep->user->epoll_watches);
|
if (unlikely(percpu_counter_compare(&ep->user->epoll_watches,
|
||||||
if (unlikely(user_watches >= max_user_watches))
|
max_user_watches) >= 0))
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL)))
|
percpu_counter_inc(&ep->user->epoll_watches);
|
||||||
|
|
||||||
|
if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL))) {
|
||||||
|
percpu_counter_dec(&ep->user->epoll_watches);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
/* Item initialization follow here ... */
|
/* Item initialization follow here ... */
|
||||||
INIT_LIST_HEAD(&epi->rdllink);
|
INIT_LIST_HEAD(&epi->rdllink);
|
||||||
@ -1466,17 +1469,16 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
|
|||||||
mutex_lock_nested(&tep->mtx, 1);
|
mutex_lock_nested(&tep->mtx, 1);
|
||||||
/* Add the current item to the list of active epoll hook for this file */
|
/* Add the current item to the list of active epoll hook for this file */
|
||||||
if (unlikely(attach_epitem(tfile, epi) < 0)) {
|
if (unlikely(attach_epitem(tfile, epi) < 0)) {
|
||||||
kmem_cache_free(epi_cache, epi);
|
|
||||||
if (tep)
|
if (tep)
|
||||||
mutex_unlock(&tep->mtx);
|
mutex_unlock(&tep->mtx);
|
||||||
|
kmem_cache_free(epi_cache, epi);
|
||||||
|
percpu_counter_dec(&ep->user->epoll_watches);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (full_check && !tep)
|
if (full_check && !tep)
|
||||||
list_file(tfile);
|
list_file(tfile);
|
||||||
|
|
||||||
atomic_long_inc(&ep->user->epoll_watches);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add the current item to the RB tree. All RB tree operations are
|
* Add the current item to the RB tree. All RB tree operations are
|
||||||
* protected by "mtx", and ep_insert() is called with "mtx" held.
|
* protected by "mtx", and ep_insert() is called with "mtx" held.
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
#include <linux/uidgid.h>
|
#include <linux/uidgid.h>
|
||||||
#include <linux/atomic.h>
|
#include <linux/atomic.h>
|
||||||
|
#include <linux/percpu_counter.h>
|
||||||
#include <linux/refcount.h>
|
#include <linux/refcount.h>
|
||||||
#include <linux/ratelimit.h>
|
#include <linux/ratelimit.h>
|
||||||
|
|
||||||
@ -13,7 +14,7 @@
|
|||||||
struct user_struct {
|
struct user_struct {
|
||||||
refcount_t __count; /* reference count */
|
refcount_t __count; /* reference count */
|
||||||
#ifdef CONFIG_EPOLL
|
#ifdef CONFIG_EPOLL
|
||||||
atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
|
struct percpu_counter epoll_watches; /* The number of file descriptors currently watched */
|
||||||
#endif
|
#endif
|
||||||
unsigned long unix_inflight; /* How many files in flight in unix sockets */
|
unsigned long unix_inflight; /* How many files in flight in unix sockets */
|
||||||
atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */
|
atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */
|
||||||
|
@ -129,6 +129,22 @@ static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int user_epoll_alloc(struct user_struct *up)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_EPOLL
|
||||||
|
return percpu_counter_init(&up->epoll_watches, 0, GFP_KERNEL);
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static void user_epoll_free(struct user_struct *up)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_EPOLL
|
||||||
|
percpu_counter_destroy(&up->epoll_watches);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/* IRQs are disabled and uidhash_lock is held upon function entry.
|
/* IRQs are disabled and uidhash_lock is held upon function entry.
|
||||||
* IRQ state (as stored in flags) is restored and uidhash_lock released
|
* IRQ state (as stored in flags) is restored and uidhash_lock released
|
||||||
* upon function exit.
|
* upon function exit.
|
||||||
@ -138,6 +154,7 @@ static void free_user(struct user_struct *up, unsigned long flags)
|
|||||||
{
|
{
|
||||||
uid_hash_remove(up);
|
uid_hash_remove(up);
|
||||||
spin_unlock_irqrestore(&uidhash_lock, flags);
|
spin_unlock_irqrestore(&uidhash_lock, flags);
|
||||||
|
user_epoll_free(up);
|
||||||
kmem_cache_free(uid_cachep, up);
|
kmem_cache_free(uid_cachep, up);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -185,6 +202,10 @@ struct user_struct *alloc_uid(kuid_t uid)
|
|||||||
|
|
||||||
new->uid = uid;
|
new->uid = uid;
|
||||||
refcount_set(&new->__count, 1);
|
refcount_set(&new->__count, 1);
|
||||||
|
if (user_epoll_alloc(new)) {
|
||||||
|
kmem_cache_free(uid_cachep, new);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
ratelimit_state_init(&new->ratelimit, HZ, 100);
|
ratelimit_state_init(&new->ratelimit, HZ, 100);
|
||||||
ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
|
ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
|
||||||
|
|
||||||
@ -195,6 +216,7 @@ struct user_struct *alloc_uid(kuid_t uid)
|
|||||||
spin_lock_irq(&uidhash_lock);
|
spin_lock_irq(&uidhash_lock);
|
||||||
up = uid_hash_find(uid, hashent);
|
up = uid_hash_find(uid, hashent);
|
||||||
if (up) {
|
if (up) {
|
||||||
|
user_epoll_free(new);
|
||||||
kmem_cache_free(uid_cachep, new);
|
kmem_cache_free(uid_cachep, new);
|
||||||
} else {
|
} else {
|
||||||
uid_hash_insert(new, hashent);
|
uid_hash_insert(new, hashent);
|
||||||
@ -216,6 +238,9 @@ static int __init uid_cache_init(void)
|
|||||||
for(n = 0; n < UIDHASH_SZ; ++n)
|
for(n = 0; n < UIDHASH_SZ; ++n)
|
||||||
INIT_HLIST_HEAD(uidhash_table + n);
|
INIT_HLIST_HEAD(uidhash_table + n);
|
||||||
|
|
||||||
|
if (user_epoll_alloc(&root_user))
|
||||||
|
panic("root_user epoll percpu counter alloc failed");
|
||||||
|
|
||||||
/* Insert the root user immediately (init already runs as root) */
|
/* Insert the root user immediately (init already runs as root) */
|
||||||
spin_lock_irq(&uidhash_lock);
|
spin_lock_irq(&uidhash_lock);
|
||||||
uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID));
|
uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID));
|
||||||
|
Loading…
Reference in New Issue
Block a user