ucounts: Split rlimit and ucount values and max values

After the ucount rlimit code was merged a bunch of small but
 siginificant bugs were found and fixed.  At the time it was realized
 that part of the problem was that while the ucount rlimits were very
 similar to the oridinary ucounts (in being nested counts with limits)
 the semantics were slightly different and the code would be less error
 prone if there was less sharing.  This is the long awaited cleanup
 that should hopefully keep things more comprehensible and less error
 prone for whoever needs to touch that code next.
 
 Alexey Gladkov (1):
       ucounts: Split rlimit and ucount values and max values
 
  fs/exec.c                      |  2 +-
  fs/proc/array.c                |  2 +-
  include/linux/user_namespace.h | 35 ++++++++++++++++++++++-------------
  kernel/fork.c                  | 12 ++++++------
  kernel/sys.c                   |  2 +-
  kernel/ucount.c                | 34 +++++++++++++++-------------------
  kernel/user_namespace.c        | 10 +++++-----
  7 files changed, 51 insertions(+), 46 deletions(-)
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEgjlraLDcwBA2B+6cC/v6Eiajj0AFAmM7U4cACgkQC/v6Eiaj
 j0AbRA//RVrGJ9n5iYyHM7WgeoTlFbaupEyLTq5dEpkOMD9CEB4OpMymGA/VXbeX
 cjgF5dqykfrdpYBwJdosl1fgq15ZFe9ChKhPGQkI5CGlwyRYTl2kq+FrZLC790s8
 c4TN3fKO1DyQPn5+UNzlBgLP8ofiUqeScZJDGa+LeMlUIv1OFS3m05jHuG/uzl6b
 bbbdcn61tFKOFCapbE72hWusEQssPOAN+dSY1/lwKO05WOKR0N2CR0EHyZhW2Owd
 GIQ27Zh5ed/9xRNlxa8VIa+JDfuATbPeoWcvRmiWSEoAxKtPBUf8lwcltlHBUcKK
 72MH+KU9AaIZ1prq9ng4xEaM+vXiSSNspYB8siwph7au1gWx1Yu2yYVavEPeFB9o
 C0JaD7kTh6Mhk6xdPhnmFUHFOLLGC5LdnBcIwwoMb1jlwP4QJRVucbjpqaOptoiE
 SeWhRRKUBwpcQdztQZCR+X0h1paHRJJXplHFmeEGcMviGWntgKUaxXJQ4BJrnRTO
 pagn7h181KVF7u9Toh0IWzrd322mXNqmcgwhzE/S9pa5EJMQHt7qYkDzQCgEwoap
 JmIld9tKkv/0fYMOHordjMb1OY37feI7FyDAuZuLP1ZWYgKhOq0LrD5x8PzKmoyM
 6oKAOfXZUVT/Pnw21nEzAtHsazV3mLRpW+gLLiLSiWoSaYT4x14=
 =kjVh
 -----END PGP SIGNATURE-----

Merge tag 'ucount-rlimits-cleanups-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull ucounts update from Eric Biederman:
 "Split rlimit and ucount values and max values

  After the ucount rlimit code was merged a bunch of small but
  siginificant bugs were found and fixed. At the time it was realized
  that part of the problem was that while the ucount rlimits were very
  similar to the oridinary ucounts (in being nested counts with limits)
  the semantics were slightly different and the code would be less error
  prone if there was less sharing.

  This is the long awaited cleanup that should hopefully keep things
  more comprehensible and less error prone for whoever needs to touch
  that code next"

* tag 'ucount-rlimits-cleanups-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  ucounts: Split rlimit and ucount values and max values
This commit is contained in:
Linus Torvalds 2022-10-09 16:24:05 -07:00
commit 493ffd6605
7 changed files with 51 additions and 46 deletions

View File

@ -1879,7 +1879,7 @@ static int do_execveat_common(int fd, struct filename *filename,
* whether NPROC limit is still exceeded. * whether NPROC limit is still exceeded.
*/ */
if ((current->flags & PF_NPROC_EXCEEDED) && if ((current->flags & PF_NPROC_EXCEEDED) &&
is_ucounts_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) { is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
retval = -EAGAIN; retval = -EAGAIN;
goto out_ret; goto out_ret;
} }

View File

@ -279,7 +279,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
collect_sigign_sigcatch(p, &ignored, &caught); collect_sigign_sigcatch(p, &ignored, &caught);
num_threads = get_nr_threads(p); num_threads = get_nr_threads(p);
rcu_read_lock(); /* FIXME: is this correct? */ rcu_read_lock(); /* FIXME: is this correct? */
qsize = get_ucounts_value(task_ucounts(p), UCOUNT_RLIMIT_SIGPENDING); qsize = get_rlimit_value(task_ucounts(p), UCOUNT_RLIMIT_SIGPENDING);
rcu_read_unlock(); rcu_read_unlock();
qlim = task_rlimit(p, RLIMIT_SIGPENDING); qlim = task_rlimit(p, RLIMIT_SIGPENDING);
unlock_task_sighand(p, &flags); unlock_task_sighand(p, &flags);

View File

@ -54,15 +54,17 @@ enum ucount_type {
UCOUNT_FANOTIFY_GROUPS, UCOUNT_FANOTIFY_GROUPS,
UCOUNT_FANOTIFY_MARKS, UCOUNT_FANOTIFY_MARKS,
#endif #endif
UCOUNT_COUNTS,
};
enum rlimit_type {
UCOUNT_RLIMIT_NPROC, UCOUNT_RLIMIT_NPROC,
UCOUNT_RLIMIT_MSGQUEUE, UCOUNT_RLIMIT_MSGQUEUE,
UCOUNT_RLIMIT_SIGPENDING, UCOUNT_RLIMIT_SIGPENDING,
UCOUNT_RLIMIT_MEMLOCK, UCOUNT_RLIMIT_MEMLOCK,
UCOUNT_COUNTS, UCOUNT_RLIMIT_COUNTS,
}; };
#define MAX_PER_NAMESPACE_UCOUNTS UCOUNT_RLIMIT_NPROC
struct user_namespace { struct user_namespace {
struct uid_gid_map uid_map; struct uid_gid_map uid_map;
struct uid_gid_map gid_map; struct uid_gid_map gid_map;
@ -99,6 +101,7 @@ struct user_namespace {
#endif #endif
struct ucounts *ucounts; struct ucounts *ucounts;
long ucount_max[UCOUNT_COUNTS]; long ucount_max[UCOUNT_COUNTS];
long rlimit_max[UCOUNT_RLIMIT_COUNTS];
} __randomize_layout; } __randomize_layout;
struct ucounts { struct ucounts {
@ -107,6 +110,7 @@ struct ucounts {
kuid_t uid; kuid_t uid;
atomic_t count; atomic_t count;
atomic_long_t ucount[UCOUNT_COUNTS]; atomic_long_t ucount[UCOUNT_COUNTS];
atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS];
}; };
extern struct user_namespace init_user_ns; extern struct user_namespace init_user_ns;
@ -120,21 +124,26 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
struct ucounts * __must_check get_ucounts(struct ucounts *ucounts); struct ucounts * __must_check get_ucounts(struct ucounts *ucounts);
void put_ucounts(struct ucounts *ucounts); void put_ucounts(struct ucounts *ucounts);
static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type type) static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type type)
{ {
return atomic_long_read(&ucounts->ucount[type]); return atomic_long_read(&ucounts->rlimit[type]);
} }
long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type); long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type);
void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type); void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type);
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max); bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max);
static inline void set_rlimit_ucount_max(struct user_namespace *ns, static inline long get_userns_rlimit_max(struct user_namespace *ns, enum rlimit_type type)
enum ucount_type type, unsigned long max)
{ {
ns->ucount_max[type] = max <= LONG_MAX ? max : LONG_MAX; return READ_ONCE(ns->rlimit_max[type]);
}
static inline void set_userns_rlimit_max(struct user_namespace *ns,
enum rlimit_type type, unsigned long max)
{
ns->rlimit_max[type] = max <= LONG_MAX ? max : LONG_MAX;
} }
#ifdef CONFIG_USER_NS #ifdef CONFIG_USER_NS

View File

@ -925,13 +925,13 @@ void __init fork_init(void)
init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_SIGPENDING] =
init_task.signal->rlim[RLIMIT_NPROC]; init_task.signal->rlim[RLIMIT_NPROC];
for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) for (i = 0; i < UCOUNT_COUNTS; i++)
init_user_ns.ucount_max[i] = max_threads/2; init_user_ns.ucount_max[i] = max_threads/2;
set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_NPROC, RLIM_INFINITY); set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_NPROC, RLIM_INFINITY);
set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_MSGQUEUE, RLIM_INFINITY); set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_MSGQUEUE, RLIM_INFINITY);
set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_SIGPENDING, RLIM_INFINITY); set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_SIGPENDING, RLIM_INFINITY);
set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_MEMLOCK, RLIM_INFINITY); set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_MEMLOCK, RLIM_INFINITY);
#ifdef CONFIG_VMAP_STACK #ifdef CONFIG_VMAP_STACK
cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
@ -2117,7 +2117,7 @@ static __latent_entropy struct task_struct *copy_process(
goto bad_fork_free; goto bad_fork_free;
retval = -EAGAIN; retval = -EAGAIN;
if (is_ucounts_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) { if (is_rlimit_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
if (p->real_cred->user != INIT_USER && if (p->real_cred->user != INIT_USER &&
!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
goto bad_fork_cleanup_count; goto bad_fork_cleanup_count;

View File

@ -496,7 +496,7 @@ static void flag_nproc_exceeded(struct cred *new)
* for programs doing set*uid()+execve() by harmlessly deferring the * for programs doing set*uid()+execve() by harmlessly deferring the
* failure to the execve() stage. * failure to the execve() stage.
*/ */
if (is_ucounts_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) && if (is_rlimit_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) &&
new->user != INIT_USER) new->user != INIT_USER)
current->flags |= PF_NPROC_EXCEEDED; current->flags |= PF_NPROC_EXCEEDED;
else else

View File

@ -87,10 +87,6 @@ static struct ctl_table user_table[] = {
UCOUNT_ENTRY("max_fanotify_groups"), UCOUNT_ENTRY("max_fanotify_groups"),
UCOUNT_ENTRY("max_fanotify_marks"), UCOUNT_ENTRY("max_fanotify_marks"),
#endif #endif
{ },
{ },
{ },
{ },
{ } { }
}; };
#endif /* CONFIG_SYSCTL */ #endif /* CONFIG_SYSCTL */
@ -263,29 +259,29 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
put_ucounts(ucounts); put_ucounts(ucounts);
} }
long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v) long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v)
{ {
struct ucounts *iter; struct ucounts *iter;
long max = LONG_MAX; long max = LONG_MAX;
long ret = 0; long ret = 0;
for (iter = ucounts; iter; iter = iter->ns->ucounts) { for (iter = ucounts; iter; iter = iter->ns->ucounts) {
long new = atomic_long_add_return(v, &iter->ucount[type]); long new = atomic_long_add_return(v, &iter->rlimit[type]);
if (new < 0 || new > max) if (new < 0 || new > max)
ret = LONG_MAX; ret = LONG_MAX;
else if (iter == ucounts) else if (iter == ucounts)
ret = new; ret = new;
max = READ_ONCE(iter->ns->ucount_max[type]); max = get_userns_rlimit_max(iter->ns, type);
} }
return ret; return ret;
} }
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v) bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v)
{ {
struct ucounts *iter; struct ucounts *iter;
long new = -1; /* Silence compiler warning */ long new = -1; /* Silence compiler warning */
for (iter = ucounts; iter; iter = iter->ns->ucounts) { for (iter = ucounts; iter; iter = iter->ns->ucounts) {
long dec = atomic_long_sub_return(v, &iter->ucount[type]); long dec = atomic_long_sub_return(v, &iter->rlimit[type]);
WARN_ON_ONCE(dec < 0); WARN_ON_ONCE(dec < 0);
if (iter == ucounts) if (iter == ucounts)
new = dec; new = dec;
@ -294,11 +290,11 @@ bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
} }
static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts, static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
struct ucounts *last, enum ucount_type type) struct ucounts *last, enum rlimit_type type)
{ {
struct ucounts *iter, *next; struct ucounts *iter, *next;
for (iter = ucounts; iter != last; iter = next) { for (iter = ucounts; iter != last; iter = next) {
long dec = atomic_long_sub_return(1, &iter->ucount[type]); long dec = atomic_long_sub_return(1, &iter->rlimit[type]);
WARN_ON_ONCE(dec < 0); WARN_ON_ONCE(dec < 0);
next = iter->ns->ucounts; next = iter->ns->ucounts;
if (dec == 0) if (dec == 0)
@ -306,12 +302,12 @@ static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
} }
} }
void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type) void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type)
{ {
do_dec_rlimit_put_ucounts(ucounts, NULL, type); do_dec_rlimit_put_ucounts(ucounts, NULL, type);
} }
long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type) long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type)
{ {
/* Caller must hold a reference to ucounts */ /* Caller must hold a reference to ucounts */
struct ucounts *iter; struct ucounts *iter;
@ -319,12 +315,12 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type)
long dec, ret = 0; long dec, ret = 0;
for (iter = ucounts; iter; iter = iter->ns->ucounts) { for (iter = ucounts; iter; iter = iter->ns->ucounts) {
long new = atomic_long_add_return(1, &iter->ucount[type]); long new = atomic_long_add_return(1, &iter->rlimit[type]);
if (new < 0 || new > max) if (new < 0 || new > max)
goto unwind; goto unwind;
if (iter == ucounts) if (iter == ucounts)
ret = new; ret = new;
max = READ_ONCE(iter->ns->ucount_max[type]); max = get_userns_rlimit_max(iter->ns, type);
/* /*
* Grab an extra ucount reference for the caller when * Grab an extra ucount reference for the caller when
* the rlimit count was previously 0. * the rlimit count was previously 0.
@ -336,24 +332,24 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type)
} }
return ret; return ret;
dec_unwind: dec_unwind:
dec = atomic_long_sub_return(1, &iter->ucount[type]); dec = atomic_long_sub_return(1, &iter->rlimit[type]);
WARN_ON_ONCE(dec < 0); WARN_ON_ONCE(dec < 0);
unwind: unwind:
do_dec_rlimit_put_ucounts(ucounts, iter, type); do_dec_rlimit_put_ucounts(ucounts, iter, type);
return 0; return 0;
} }
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long rlimit) bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long rlimit)
{ {
struct ucounts *iter; struct ucounts *iter;
long max = rlimit; long max = rlimit;
if (rlimit > LONG_MAX) if (rlimit > LONG_MAX)
max = LONG_MAX; max = LONG_MAX;
for (iter = ucounts; iter; iter = iter->ns->ucounts) { for (iter = ucounts; iter; iter = iter->ns->ucounts) {
long val = get_ucounts_value(iter, type); long val = get_rlimit_value(iter, type);
if (val < 0 || val > max) if (val < 0 || val > max)
return true; return true;
max = READ_ONCE(iter->ns->ucount_max[type]); max = get_userns_rlimit_max(iter->ns, type);
} }
return false; return false;
} }

View File

@ -136,13 +136,13 @@ int create_user_ns(struct cred *new)
ns->owner = owner; ns->owner = owner;
ns->group = group; ns->group = group;
INIT_WORK(&ns->work, free_user_ns); INIT_WORK(&ns->work, free_user_ns);
for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) { for (i = 0; i < UCOUNT_COUNTS; i++) {
ns->ucount_max[i] = INT_MAX; ns->ucount_max[i] = INT_MAX;
} }
set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, enforced_nproc_rlimit()); set_userns_rlimit_max(ns, UCOUNT_RLIMIT_NPROC, enforced_nproc_rlimit());
set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE)); set_userns_rlimit_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE));
set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING)); set_userns_rlimit_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING));
set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK)); set_userns_rlimit_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK));
ns->ucounts = ucounts; ns->ucounts = ucounts;
/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */