diff --git a/fs/exec.c b/fs/exec.c index db17be51b112..0e46ec57fe0a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1010,16 +1010,26 @@ ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len) } EXPORT_SYMBOL(read_code); +/* + * Maps the mm_struct mm into the current task struct. + * On success, this function returns with the mutex + * exec_update_mutex locked. + */ static int exec_mmap(struct mm_struct *mm) { struct task_struct *tsk; struct mm_struct *old_mm, *active_mm; + int ret; /* Notify parent that we're no longer interested in the old VM */ tsk = current; old_mm = current->mm; exec_mm_release(tsk, old_mm); + ret = mutex_lock_killable(&tsk->signal->exec_update_mutex); + if (ret) + return ret; + if (old_mm) { sync_mm_rss(old_mm); /* @@ -1031,9 +1041,11 @@ static int exec_mmap(struct mm_struct *mm) down_read(&old_mm->mmap_sem); if (unlikely(old_mm->core_state)) { up_read(&old_mm->mmap_sem); + mutex_unlock(&tsk->signal->exec_update_mutex); return -EINTR; } } + task_lock(tsk); active_mm = tsk->active_mm; membarrier_exec_mmap(mm); @@ -1189,10 +1201,22 @@ no_thread_group: /* we have changed execution domain */ tsk->exit_signal = SIGCHLD; -#ifdef CONFIG_POSIX_TIMERS - exit_itimers(sig); - flush_itimer_signals(); -#endif + BUG_ON(!thread_group_leader(tsk)); + return 0; + +killed: + /* protects against exit_notify() and __exit_signal() */ + read_lock(&tasklist_lock); + sig->group_exit_task = NULL; + sig->notify_count = 0; + read_unlock(&tasklist_lock); + return -EAGAIN; +} + + +static int unshare_sighand(struct task_struct *me) +{ + struct sighand_struct *oldsighand = me->sighand; if (refcount_read(&oldsighand->count) != 1) { struct sighand_struct *newsighand; @@ -1210,23 +1234,13 @@ no_thread_group: write_lock_irq(&tasklist_lock); spin_lock(&oldsighand->siglock); - rcu_assign_pointer(tsk->sighand, newsighand); + rcu_assign_pointer(me->sighand, newsighand); spin_unlock(&oldsighand->siglock); write_unlock_irq(&tasklist_lock); __cleanup_sighand(oldsighand); } - - BUG_ON(!thread_group_leader(tsk)); return 0; - -killed: - /* protects against exit_notify() and __exit_signal() */ - read_lock(&tasklist_lock); - sig->group_exit_task = NULL; - sig->notify_count = 0; - read_unlock(&tasklist_lock); - return -EAGAIN; } char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk) @@ -1260,13 +1274,13 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) */ int flush_old_exec(struct linux_binprm * bprm) { + struct task_struct *me = current; int retval; /* - * Make sure we have a private signal table and that - * we are unassociated from the previous thread group. + * Make this the only thread in the thread group. */ - retval = de_thread(current); + retval = de_thread(me); if (retval) goto out; @@ -1286,18 +1300,31 @@ int flush_old_exec(struct linux_binprm * bprm) goto out; /* - * After clearing bprm->mm (to mark that current is using the - * prepared mm now), we have nothing left of the original + * After setting bprm->called_exec_mmap (to mark that current is + * using the prepared mm now), we have nothing left of the original * process. If anything from here on returns an error, the check * in search_binary_handler() will SEGV current. */ + bprm->called_exec_mmap = 1; bprm->mm = NULL; +#ifdef CONFIG_POSIX_TIMERS + exit_itimers(me->signal); + flush_itimer_signals(); +#endif + + /* + * Make the signal table private. + */ + retval = unshare_sighand(me); + if (retval) + goto out; + set_fs(USER_DS); - current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | + me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | PF_NOFREEZE | PF_NO_SETAFFINITY); flush_thread(); - current->personality &= ~bprm->per_clear; + me->personality &= ~bprm->per_clear; /* * We have to apply CLOEXEC before we change whether the process is @@ -1305,7 +1332,7 @@ int flush_old_exec(struct linux_binprm * bprm) * trying to access the should-be-closed file descriptors of a process * undergoing exec(2). */ - do_close_on_exec(current->files); + do_close_on_exec(me->files); return 0; out: @@ -1424,6 +1451,8 @@ static void free_bprm(struct linux_binprm *bprm) { free_arg_pages(bprm); if (bprm->cred) { + if (bprm->called_exec_mmap) + mutex_unlock(¤t->signal->exec_update_mutex); mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } @@ -1473,6 +1502,7 @@ void install_exec_creds(struct linux_binprm *bprm) * credentials; any time after this it may be unlocked. */ security_bprm_committed_creds(bprm); + mutex_unlock(¤t->signal->exec_update_mutex); mutex_unlock(¤t->signal->cred_guard_mutex); } EXPORT_SYMBOL(install_exec_creds); @@ -1664,7 +1694,7 @@ int search_binary_handler(struct linux_binprm *bprm) read_lock(&binfmt_lock); put_binfmt(fmt); - if (retval < 0 && !bprm->mm) { + if (retval < 0 && bprm->called_exec_mmap) { /* we got to flush_old_exec() and failed after it */ read_unlock(&binfmt_lock); force_sigsegv(SIGSEGV); diff --git a/fs/proc/base.c b/fs/proc/base.c index e7efe9d6f3d6..74f948a6b621 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -405,11 +405,11 @@ print0: static int lock_trace(struct task_struct *task) { - int err = mutex_lock_killable(&task->signal->cred_guard_mutex); + int err = mutex_lock_killable(&task->signal->exec_update_mutex); if (err) return err; if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { - mutex_unlock(&task->signal->cred_guard_mutex); + mutex_unlock(&task->signal->exec_update_mutex); return -EPERM; } return 0; @@ -417,7 +417,7 @@ static int lock_trace(struct task_struct *task) static void unlock_trace(struct task_struct *task) { - mutex_unlock(&task->signal->cred_guard_mutex); + mutex_unlock(&task->signal->exec_update_mutex); } #ifdef CONFIG_STACKTRACE @@ -2883,7 +2883,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh unsigned long flags; int result; - result = mutex_lock_killable(&task->signal->cred_guard_mutex); + result = mutex_lock_killable(&task->signal->exec_update_mutex); if (result) return result; @@ -2919,7 +2919,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh result = 0; out_unlock: - mutex_unlock(&task->signal->cred_guard_mutex); + mutex_unlock(&task->signal->exec_update_mutex); return result; } diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index b40fc633f3be..a345d9fed3d8 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -44,7 +44,13 @@ struct linux_binprm { * exec has happened. Used to sanitize execution environment * and to set AT_SECURE auxv for glibc. */ - secureexec:1; + secureexec:1, + /* + * Set by flush_old_exec, when exec_mmap has been called. + * This is past the point of no return, when the + * exec_update_mutex has been taken. + */ + called_exec_mmap:1; #ifdef __alpha__ unsigned int taso:1; #endif diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 88050259c466..a29df79540ce 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -224,7 +224,14 @@ struct signal_struct { struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations - * (notably. ptrace) */ + * (notably. ptrace) + * Deprecated do not use in new code. + * Use exec_update_mutex instead. + */ + struct mutex exec_update_mutex; /* Held while task_struct is being + * updated during exec, and may have + * inconsistent permissions. + */ } __randomize_layout; /* diff --git a/init/init_task.c b/init/init_task.c index 9e5cbe5eab7b..bd403ed3e418 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -26,6 +26,7 @@ static struct signal_struct init_signals = { .multiprocess = HLIST_HEAD_INIT, .rlim = INIT_RLIMITS, .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), + .exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex), #ifdef CONFIG_POSIX_TIMERS .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), .cputimer = { diff --git a/kernel/cred.c b/kernel/cred.c index 809a985b1793..71a792616917 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -675,8 +675,6 @@ void __init cred_init(void) * The caller may change these controls afterwards if desired. * * Returns the new credentials or NULL if out of memory. - * - * Does not take, and does not return holding current->cred_replace_mutex. */ struct cred *prepare_kernel_cred(struct task_struct *daemon) { diff --git a/kernel/events/core.c b/kernel/events/core.c index e453589da97c..71cba8cfccbc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1249,7 +1249,7 @@ static void put_ctx(struct perf_event_context *ctx) * function. * * Lock order: - * cred_guard_mutex + * exec_update_mutex * task_struct::perf_event_mutex * perf_event_context::mutex * perf_event::child_mutex; @@ -11263,14 +11263,14 @@ SYSCALL_DEFINE5(perf_event_open, } if (task) { - err = mutex_lock_interruptible(&task->signal->cred_guard_mutex); + err = mutex_lock_interruptible(&task->signal->exec_update_mutex); if (err) goto err_task; /* * Reuse ptrace permission checks for now. * - * We must hold cred_guard_mutex across this and any potential + * We must hold exec_update_mutex across this and any potential * perf_install_in_context() call for this new event to * serialize against exec() altering our credentials (and the * perf_event_exit_task() that could imply). @@ -11559,7 +11559,7 @@ SYSCALL_DEFINE5(perf_event_open, mutex_unlock(&ctx->mutex); if (task) { - mutex_unlock(&task->signal->cred_guard_mutex); + mutex_unlock(&task->signal->exec_update_mutex); put_task_struct(task); } @@ -11595,7 +11595,7 @@ err_alloc: free_event(event); err_cred: if (task) - mutex_unlock(&task->signal->cred_guard_mutex); + mutex_unlock(&task->signal->exec_update_mutex); err_task: if (task) put_task_struct(task); @@ -11900,7 +11900,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) /* * When a child task exits, feed back event values to parent events. * - * Can be called with cred_guard_mutex held when called from + * Can be called with exec_update_mutex held when called from * install_exec_creds(). */ void perf_event_exit_task(struct task_struct *child) diff --git a/kernel/fork.c b/kernel/fork.c index 60a1295f4384..e0668a79bca1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1224,7 +1224,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) struct mm_struct *mm; int err; - err = mutex_lock_killable(&task->signal->cred_guard_mutex); + err = mutex_lock_killable(&task->signal->exec_update_mutex); if (err) return ERR_PTR(err); @@ -1234,7 +1234,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) mmput(mm); mm = ERR_PTR(-EACCES); } - mutex_unlock(&task->signal->cred_guard_mutex); + mutex_unlock(&task->signal->exec_update_mutex); return mm; } @@ -1594,6 +1594,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->oom_score_adj_min = current->signal->oom_score_adj_min; mutex_init(&sig->cred_guard_mutex); + mutex_init(&sig->exec_update_mutex); return 0; } diff --git a/kernel/kcmp.c b/kernel/kcmp.c index a0e3d7a0e8b8..b3ff9288c6cc 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -173,8 +173,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, /* * One should have enough rights to inspect task details. */ - ret = kcmp_lock(&task1->signal->cred_guard_mutex, - &task2->signal->cred_guard_mutex); + ret = kcmp_lock(&task1->signal->exec_update_mutex, + &task2->signal->exec_update_mutex); if (ret) goto err; if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) || @@ -229,8 +229,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, } err_unlock: - kcmp_unlock(&task1->signal->cred_guard_mutex, - &task2->signal->cred_guard_mutex); + kcmp_unlock(&task1->signal->exec_update_mutex, + &task2->signal->exec_update_mutex); err: put_task_struct(task1); put_task_struct(task2); diff --git a/kernel/pid.c b/kernel/pid.c index 60820e72634c..efd34874b3d1 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -577,7 +577,7 @@ static struct file *__pidfd_fget(struct task_struct *task, int fd) struct file *file; int ret; - ret = mutex_lock_killable(&task->signal->cred_guard_mutex); + ret = mutex_lock_killable(&task->signal->exec_update_mutex); if (ret) return ERR_PTR(ret); @@ -586,7 +586,7 @@ static struct file *__pidfd_fget(struct task_struct *task, int fd) else file = ERR_PTR(-EPERM); - mutex_unlock(&task->signal->cred_guard_mutex); + mutex_unlock(&task->signal->exec_update_mutex); return file ?: ERR_PTR(-EBADF); } diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index de41e830cdac..74e957e302fe 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -206,7 +206,7 @@ static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter, if (!mm || IS_ERR(mm)) { rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; /* - * Explicitly map EACCES to EPERM as EPERM is a more a + * Explicitly map EACCES to EPERM as EPERM is a more * appropriate error code for process_vw_readv/writev */ if (rc == -EACCES) diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile index c0b7f89f0930..2f1f532c39db 100644 --- a/tools/testing/selftests/ptrace/Makefile +++ b/tools/testing/selftests/ptrace/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -iquote../../../../include/uapi -Wall +CFLAGS += -std=c99 -pthread -iquote../../../../include/uapi -Wall -TEST_GEN_PROGS := get_syscall_info peeksiginfo +TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess include ../lib.mk diff --git a/tools/testing/selftests/ptrace/vmaccess.c b/tools/testing/selftests/ptrace/vmaccess.c new file mode 100644 index 000000000000..4db327b44586 --- /dev/null +++ b/tools/testing/selftests/ptrace/vmaccess.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2020 Bernd Edlinger <bernd.edlinger@hotmail.de> + * All rights reserved. + * + * Check whether /proc/$pid/mem can be accessed without causing deadlocks + * when de_thread is blocked with ->cred_guard_mutex held. + */ + +#include "../kselftest_harness.h" +#include <stdio.h> +#include <fcntl.h> +#include <pthread.h> +#include <signal.h> +#include <unistd.h> +#include <sys/ptrace.h> + +static void *thread(void *arg) +{ + ptrace(PTRACE_TRACEME, 0, 0L, 0L); + return NULL; +} + +TEST(vmaccess) +{ + int f, pid = fork(); + char mm[64]; + + if (!pid) { + pthread_t pt; + + pthread_create(&pt, NULL, thread, NULL); + pthread_join(pt, NULL); + execlp("true", "true", NULL); + } + + sleep(1); + sprintf(mm, "/proc/%d/mem", pid); + f = open(mm, O_RDONLY); + ASSERT_GE(f, 0); + close(f); + f = kill(pid, SIGCONT); + ASSERT_EQ(f, 0); +} + +TEST(attach) +{ + int s, k, pid = fork(); + + if (!pid) { + pthread_t pt; + + pthread_create(&pt, NULL, thread, NULL); + pthread_join(pt, NULL); + execlp("sleep", "sleep", "2", NULL); + } + + sleep(1); + k = ptrace(PTRACE_ATTACH, pid, 0L, 0L); + ASSERT_EQ(errno, EAGAIN); + ASSERT_EQ(k, -1); + k = waitpid(-1, &s, WNOHANG); + ASSERT_NE(k, -1); + ASSERT_NE(k, 0); + ASSERT_NE(k, pid); + ASSERT_EQ(WIFEXITED(s), 1); + ASSERT_EQ(WEXITSTATUS(s), 0); + sleep(1); + k = ptrace(PTRACE_ATTACH, pid, 0L, 0L); + ASSERT_EQ(k, 0); + k = waitpid(-1, &s, 0); + ASSERT_EQ(k, pid); + ASSERT_EQ(WIFSTOPPED(s), 1); + ASSERT_EQ(WSTOPSIG(s), SIGSTOP); + k = ptrace(PTRACE_DETACH, pid, 0L, 0L); + ASSERT_EQ(k, 0); + k = waitpid(-1, &s, 0); + ASSERT_EQ(k, pid); + ASSERT_EQ(WIFEXITED(s), 1); + ASSERT_EQ(WEXITSTATUS(s), 0); + k = waitpid(-1, NULL, 0); + ASSERT_EQ(k, -1); + ASSERT_EQ(errno, ECHILD); +} + +TEST_HARNESS_MAIN