Merge branch 'exit-cleanups-for-v5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull exit cleanups from Eric Biederman:
 "While looking at some issues related to the exit path in the kernel I
  found several instances where the code is not using the existing
  abstractions properly.

  This set of changes introduces force_fatal_sig a way of sending a
  signal and not allowing it to be caught, and corrects the misuse of
  the existing abstractions that I found.

  A lot of the misuse of the existing abstractions are silly things such
  as doing something after calling a no return function, rolling BUG by
  hand, doing more work than necessary to terminate a kernel thread, or
  calling do_exit(SIGKILL) instead of calling force_sig(SIGKILL).

  In the review a deficiency in force_fatal_sig and force_sig_seccomp
  where ptrace or sigaction could prevent the delivery of the signal was
  found. I have added a change that adds SA_IMMUTABLE to change that
  makes it impossible to interrupt the delivery of those signals, and
  allows backporting to fix force_sig_seccomp

  And Arnd found an issue where a function passed to kthread_run had the
  wrong prototype, and after my cleanup was failing to build."

* 'exit-cleanups-for-v5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (23 commits)
  soc: ti: fix wkup_m3_rproc_boot_thread return type
  signal: Add SA_IMMUTABLE to ensure forced siganls do not get changed
  signal: Replace force_sigsegv(SIGSEGV) with force_fatal_sig(SIGSEGV)
  exit/r8188eu: Replace the macro thread_exit with a simple return 0
  exit/rtl8712: Replace the macro thread_exit with a simple return 0
  exit/rtl8723bs: Replace the macro thread_exit with a simple return 0
  signal/x86: In emulate_vsyscall force a signal instead of calling do_exit
  signal/sparc32: In setup_rt_frame and setup_fram use force_fatal_sig
  signal/sparc32: Exit with a fatal signal when try_to_clear_window_buffer fails
  exit/syscall_user_dispatch: Send ordinary signals on failure
  signal: Implement force_fatal_sig
  exit/kthread: Have kernel threads return instead of calling do_exit
  signal/s390: Use force_sigsegv in default_trap_handler
  signal/vm86_32: Properly send SIGSEGV when the vm86 state cannot be saved.
  signal/vm86_32: Replace open coded BUG_ON with an actual BUG_ON
  signal/sparc: In setup_tsb_params convert open coded BUG into BUG
  signal/powerpc: On swapcontext failure force SIGSEGV
  signal/sh: Use force_sig(SIGKILL) instead of do_group_exit(SIGKILL)
  signal/mips: Update (_save|_restore)_fp_context to fail with -EFAULT
  signal/sparc32: Remove unreachable do_exit in do_sparc_fault
  ...
This commit is contained in:
Linus Torvalds 2021-11-10 16:15:54 -08:00
commit 5147da902e
47 changed files with 97 additions and 96 deletions

View File

@ -294,7 +294,7 @@ int elf_check_arch(const struct elf32_hdr *x)
eflags = x->e_flags;
if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) {
pr_err("ABI mismatch - you need newer toolchain\n");
force_sigsegv(SIGSEGV);
force_fatal_sig(SIGSEGV);
return 0;
}

View File

@ -1145,7 +1145,7 @@ asmlinkage void set_esp0(unsigned long ssp)
*/
asmlinkage void fpsp040_die(void)
{
force_sigsegv(SIGSEGV);
force_fatal_sig(SIGSEGV);
}
#ifdef CONFIG_M68KFPU_EMU

View File

@ -29,8 +29,8 @@
#define EX2(a,b) \
9: a,##b; \
.section __ex_table,"a"; \
PTR 9b,bad_stack; \
PTR 9b+4,bad_stack; \
PTR 9b,fault; \
PTR 9b+4,fault; \
.previous
.set mips1

View File

@ -240,12 +240,3 @@ SYSCALL_DEFINE3(cachectl, char *, addr, int, nbytes, int, op)
{
return -ENOSYS;
}
/*
* If we ever come here the user sp is bad. Zap the process right away.
* Due to the bad stack signaling wouldn't work.
*/
asmlinkage void bad_stack(void)
{
do_exit(SIGSEGV);
}

View File

@ -118,7 +118,7 @@ DEFINE_SPINLOCK(die_lock);
/*
* This function is protected against re-entrancy.
*/
void die(const char *str, struct pt_regs *regs, int err)
void __noreturn die(const char *str, struct pt_regs *regs, int err)
{
struct task_struct *tsk = current;
static int die_counter;

View File

@ -13,7 +13,7 @@
#include <asm/tlbflush.h>
extern void die(const char *str, struct pt_regs *regs, long err);
extern void __noreturn die(const char *str, struct pt_regs *regs, long err);
/*
* This is useful to dump out the page tables associated with
@ -299,10 +299,6 @@ no_context:
show_pte(mm, addr);
die("Oops", regs, error_code);
bust_spinlocks(0);
do_exit(SIGKILL);
return;
/*
* We ran out of memory, or some other thing happened to us that made

View File

@ -197,7 +197,7 @@ void nommu_dump_state(struct pt_regs *regs,
}
/* This is normally the 'Oops' routine */
void die(const char *str, struct pt_regs *regs, long err)
void __noreturn die(const char *str, struct pt_regs *regs, long err)
{
console_verbose();

View File

@ -32,7 +32,7 @@ unsigned long pte_errors; /* updated by do_page_fault() */
*/
volatile pgd_t *current_pgd[NR_CPUS];
extern void die(char *, struct pt_regs *, long);
extern void __noreturn die(char *, struct pt_regs *, long);
/*
* This routine handles page faults. It determines the address,
@ -248,8 +248,6 @@ no_context:
die("Oops", regs, write_acc);
do_exit(SIGKILL);
/*
* We ran out of memory, or some other thing happened to us that made
* us unable to handle the page fault gracefully.

View File

@ -1062,8 +1062,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
* or if another thread unmaps the region containing the context.
* We kill the task with a SIGSEGV in this situation.
*/
if (do_setcontext(new_ctx, regs, 0))
do_exit(SIGSEGV);
if (do_setcontext(new_ctx, regs, 0)) {
force_fatal_sig(SIGSEGV);
return -EFAULT;
}
set_thread_flag(TIF_RESTOREALL);
return 0;

View File

@ -703,15 +703,18 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
* We kill the task with a SIGSEGV in this situation.
*/
if (__get_user_sigset(&set, &new_ctx->uc_sigmask))
do_exit(SIGSEGV);
if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) {
force_fatal_sig(SIGSEGV);
return -EFAULT;
}
set_current_blocked(&set);
if (!user_read_access_begin(new_ctx, ctx_size))
return -EFAULT;
if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
user_read_access_end();
do_exit(SIGSEGV);
force_fatal_sig(SIGSEGV);
return -EFAULT;
}
user_read_access_end();

View File

@ -23,6 +23,6 @@ enum die_val {
DIE_NMI_IPI,
};
extern void die(struct pt_regs *, const char *);
extern void __noreturn die(struct pt_regs *, const char *);
#endif

View File

@ -192,7 +192,7 @@ void show_regs(struct pt_regs *regs)
static DEFINE_SPINLOCK(die_lock);
void die(struct pt_regs *regs, const char *str)
void __noreturn die(struct pt_regs *regs, const char *str)
{
static int die_counter;

View File

@ -84,7 +84,7 @@ static void default_trap_handler(struct pt_regs *regs)
{
if (user_mode(regs)) {
report_user_fault(regs, SIGSEGV, 0);
do_exit(SIGSEGV);
force_fatal_sig(SIGSEGV);
} else
die(regs, "Unknown program exception");
}

View File

@ -260,7 +260,6 @@ static noinline void do_no_context(struct pt_regs *regs)
" in virtual user address space\n");
dump_fault_info(regs);
die(regs, "Oops");
do_exit(SIGKILL);
}
static noinline void do_low_address(struct pt_regs *regs)
@ -270,7 +269,6 @@ static noinline void do_low_address(struct pt_regs *regs)
if (regs->psw.mask & PSW_MASK_PSTATE) {
/* Low-address protection hit in user mode 'cannot happen'. */
die (regs, "Low-address protection");
do_exit(SIGKILL);
}
do_no_context(regs);

View File

@ -62,18 +62,20 @@ void fpu_state_restore(struct pt_regs *regs)
}
if (!tsk_used_math(tsk)) {
local_irq_enable();
int ret;
/*
* does a slab alloc which can sleep
*/
if (init_fpu(tsk)) {
local_irq_enable();
ret = init_fpu(tsk);
local_irq_disable();
if (ret) {
/*
* ran out of memory!
*/
do_group_exit(SIGKILL);
force_sig(SIGKILL);
return;
}
local_irq_disable();
}
grab_fpu(regs);

View File

@ -20,7 +20,7 @@
static DEFINE_SPINLOCK(die_lock);
void die(const char *str, struct pt_regs *regs, long err)
void __noreturn die(const char *str, struct pt_regs *regs, long err)
{
static int die_counter;

View File

@ -238,8 +238,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
show_fault_oops(regs, address);
die("Oops", regs, error_code);
bust_spinlocks(0);
do_exit(SIGKILL);
}
static void

View File

@ -244,7 +244,7 @@ static int setup_frame(struct ksignal *ksig, struct pt_regs *regs,
get_sigframe(ksig, regs, sigframe_size);
if (invalid_frame_pointer(sf, sigframe_size)) {
do_exit(SIGILL);
force_fatal_sig(SIGILL);
return -EINVAL;
}
@ -336,7 +336,7 @@ static int setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs,
sf = (struct rt_signal_frame __user *)
get_sigframe(ksig, regs, sigframe_size);
if (invalid_frame_pointer(sf, sigframe_size)) {
do_exit(SIGILL);
force_fatal_sig(SIGILL);
return -EINVAL;
}

View File

@ -121,8 +121,10 @@ void try_to_clear_window_buffer(struct pt_regs *regs, int who)
if ((sp & 7) ||
copy_to_user((char __user *) sp, &tp->reg_window[window],
sizeof(struct reg_window32)))
do_exit(SIGILL);
sizeof(struct reg_window32))) {
force_fatal_sig(SIGILL);
return;
}
}
tp->w_saved = 0;
}

View File

@ -248,7 +248,6 @@ no_context:
}
unhandled_fault(address, tsk, regs);
do_exit(SIGKILL);
/*
* We ran out of memory, or some other thing happened to us that made

View File

@ -266,7 +266,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
default:
printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n",
current->comm, current->pid, tsb_bytes);
do_exit(SIGSEGV);
BUG();
}
tte |= pte_sz_bits(page_sz);

View File

@ -158,7 +158,7 @@ static void bad_segv(struct faultinfo fi, unsigned long ip)
void fatal_sigsegv(void)
{
force_sigsegv(SIGSEGV);
force_fatal_sig(SIGSEGV);
do_signal(&current->thread.regs);
/*
* This is to tell gcc that we're not returning - do_signal

View File

@ -226,7 +226,8 @@ bool emulate_vsyscall(unsigned long error_code,
if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
warn_bad_vsyscall(KERN_DEBUG, regs,
"seccomp tried to change syscall nr or ip");
do_exit(SIGSYS);
force_fatal_sig(SIGSYS);
return true;
}
regs->orig_ax = -1;
if (tmp)

View File

@ -77,9 +77,6 @@ asmlinkage noinstr void __noreturn doublefault_shim(void)
* some way to reconstruct CR3. We could make a credible guess based
* on cpu_tlbstate, but that would be racy and would not account for
* PTI.
*
* Instead, don't bother. We can return through
* rewind_stack_do_exit() instead.
*/
panic("cannot return from double fault\n");
}

View File

@ -106,10 +106,8 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
*/
local_irq_enable();
if (!vm86 || !vm86->user_vm86) {
pr_alert("no user_vm86: BAD\n");
do_exit(SIGSEGV);
}
BUG_ON(!vm86 || !vm86->user_vm86);
set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
user = vm86->user_vm86;
@ -142,6 +140,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
user_access_end();
exit_vm86:
preempt_disable();
tsk->thread.sp0 = vm86->saved_sp0;
tsk->thread.sysenter_cs = __KERNEL_CS;
@ -161,7 +160,8 @@ Efault_end:
user_access_end();
Efault:
pr_alert("could not access userspace vm86 info\n");
do_exit(SIGSEGV);
force_fatal_sig(SIGSEGV);
goto exit_vm86;
}
static int do_vm86_irq_handling(int subfunction, int irqnumber);

View File

@ -527,7 +527,7 @@ void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
DEFINE_SPINLOCK(die_lock);
void die(const char * str, struct pt_regs * regs, long err)
void __noreturn die(const char * str, struct pt_regs * regs, long err)
{
static int die_counter;
const char *pr = "";

View File

@ -238,7 +238,7 @@ bad_page_fault:
void
bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
{
extern void die(const char*, struct pt_regs*, long);
extern void __noreturn die(const char*, struct pt_regs*, long);
const struct exception_table_entry *entry;
/* Are we prepared to handle this kernel fault? */
@ -257,5 +257,4 @@ bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
"address %08lx\n pc = %08lx, ra = %08lx\n",
address, regs->pc, regs->areg[0]);
die("Oops", regs, sig);
do_exit(sig);
}

View File

@ -520,7 +520,7 @@ static int svc_normal_to_secure_thread(void *data)
* physical address of memory block reserved by secure monitor software at
* secure world.
*
* svc_normal_to_secure_shm_thread() calls do_exit() directly since it is a
* svc_normal_to_secure_shm_thread() terminates directly since it is a
* standlone thread for which no one will call kthread_stop() or return when
* 'kthread_should_stop()' is true.
*/
@ -544,7 +544,7 @@ static int svc_normal_to_secure_shm_thread(void *data)
}
complete(&sh_mem->sync_complete);
do_exit(0);
return 0;
}
/**

View File

@ -413,8 +413,9 @@ void wkup_m3_ipc_put(struct wkup_m3_ipc *m3_ipc)
}
EXPORT_SYMBOL_GPL(wkup_m3_ipc_put);
static void wkup_m3_rproc_boot_thread(struct wkup_m3_ipc *m3_ipc)
static int wkup_m3_rproc_boot_thread(void *arg)
{
struct wkup_m3_ipc *m3_ipc = arg;
struct device *dev = m3_ipc->dev;
int ret;
@ -426,7 +427,7 @@ static void wkup_m3_rproc_boot_thread(struct wkup_m3_ipc *m3_ipc)
else
m3_ipc_state = m3_ipc;
do_exit(0);
return 0;
}
static int wkup_m3_ipc_probe(struct platform_device *pdev)
@ -500,7 +501,7 @@ static int wkup_m3_ipc_probe(struct platform_device *pdev)
* can boot the wkup_m3 as soon as it's ready without holding
* up kernel boot
*/
task = kthread_run((void *)wkup_m3_rproc_boot_thread, m3_ipc,
task = kthread_run(wkup_m3_rproc_boot_thread, m3_ipc,
"wkup_m3_rproc_loader");
if (IS_ERR(task)) {

View File

@ -323,7 +323,7 @@ post_process:
complete(&pcmdpriv->stop_cmd_thread);
thread_exit();
return 0;
}
/*

View File

@ -49,8 +49,6 @@ struct __queue {
spinlock_t lock;
};
#define thread_exit() complete_and_exit(NULL, 0)
static inline struct list_head *get_list_head(struct __queue *queue)
{
return (&(queue->queue));

View File

@ -37,7 +37,6 @@ struct __queue {
#define _pkt struct sk_buff
#define _buffer unsigned char
#define thread_exit() complete_and_exit(NULL, 0)
#define _init_queue(pqueue) \
do { \

View File

@ -393,7 +393,7 @@ _next:
r8712_free_cmd_obj(pcmd);
} while (1);
complete(&pcmdpriv->terminate_cmdthread_comp);
thread_exit();
return 0;
}
void r8712_event_handle(struct _adapter *padapter, __le32 *peventbuf)

View File

@ -518,7 +518,7 @@ post_process:
complete(&pcmdpriv->terminate_cmdthread_comp);
atomic_set(&pcmdpriv->cmdthd_running, false);
thread_exit();
return 0;
}
/*

View File

@ -2500,7 +2500,7 @@ int rtw_xmit_thread(void *context)
complete(&padapter->xmitpriv.terminate_xmitthread_comp);
thread_exit();
return 0;
}
void rtw_sctx_init(struct submit_ctx *sctx, int timeout_ms)

View File

@ -435,7 +435,7 @@ int rtl8723bs_xmit_thread(void *context)
complete(&pxmitpriv->SdioXmitTerminate);
thread_exit();
return 0;
}
s32 rtl8723bs_mgnt_xmit(

View File

@ -45,8 +45,6 @@
spinlock_t lock;
};
#define thread_exit() complete_and_exit(NULL, 0)
static inline struct list_head *get_next(struct list_head *list)
{
return list->next;

View File

@ -1850,7 +1850,7 @@ out:
* SIGSEGV.
*/
if (bprm->point_of_no_return && !fatal_signal_pending(current))
force_sigsegv(SIGSEGV);
force_fatal_sig(SIGSEGV);
out_unmark:
current->fs->in_exec = 0;

View File

@ -1513,10 +1513,7 @@ bail:
if (quota_enabled)
kfree(rm_quota);
/* no one is callint kthread_stop() for us so the kthread() api
* requires that we call do_exit(). And it isn't exported, but
* complete_and_exit() seems to be a minimal wrapper around it. */
complete_and_exit(NULL, status);
return status;
}
void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)

View File

@ -351,6 +351,7 @@ extern int kill_pid(struct pid *pid, int sig, int priv);
extern __must_check bool do_notify_parent(struct task_struct *, int);
extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
extern void force_sig(int);
extern void force_fatal_sig(int);
extern int send_sig(int, struct task_struct *, int);
extern int zap_other_threads(struct task_struct *p);
extern struct sigqueue *sigqueue_alloc(void);

View File

@ -70,6 +70,9 @@ struct ksignal {
int sig;
};
/* Used to kill the race between sigaction and forced signals */
#define SA_IMMUTABLE 0x00800000
#ifndef __ARCH_UAPI_SA_FLAGS
#ifdef SA_RESTORER
#define __ARCH_UAPI_SA_FLAGS SA_RESTORER

View File

@ -45,6 +45,7 @@
#define SA_UNSUPPORTED 0x00000400
#define SA_EXPOSE_TAGBITS 0x00000800
/* 0x00010000 used on mips */
/* 0x00800000 used for internal SA_IMMUTABLE */
/* 0x01000000 used on x86 */
/* 0x02000000 used on x86 */
/*

View File

@ -47,14 +47,18 @@ bool syscall_user_dispatch(struct pt_regs *regs)
* access_ok() is performed once, at prctl time, when
* the selector is loaded by userspace.
*/
if (unlikely(__get_user(state, sd->selector)))
do_exit(SIGSEGV);
if (unlikely(__get_user(state, sd->selector))) {
force_fatal_sig(SIGSEGV);
return true;
}
if (likely(state == SYSCALL_DISPATCH_FILTER_ALLOW))
return false;
if (state != SYSCALL_DISPATCH_FILTER_BLOCK)
do_exit(SIGSYS);
if (state != SYSCALL_DISPATCH_FILTER_BLOCK) {
force_fatal_sig(SIGSYS);
return true;
}
}
sd->on_dispatch = true;

View File

@ -433,7 +433,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
* If thread is going to be bound on a particular cpu, give its node
* in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
* When woken, the thread will run @threadfn() with @data as its
* argument. @threadfn() can either call do_exit() directly if it is a
* argument. @threadfn() can either return directly if it is a
* standalone thread for which no one will call kthread_stop(), or
* return when 'kthread_should_stop()' is true (which means
* kthread_stop() has been called). The return value should be zero

View File

@ -359,7 +359,6 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
case LINUX_REBOOT_CMD_HALT:
kernel_halt();
do_exit(0);
panic("cannot halt");
case LINUX_REBOOT_CMD_POWER_OFF:
kernel_power_off();

View File

@ -1323,6 +1323,7 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, bool
blocked = sigismember(&t->blocked, sig);
if (blocked || ignored || sigdfl) {
action->sa.sa_handler = SIG_DFL;
action->sa.sa_flags |= SA_IMMUTABLE;
if (blocked) {
sigdelset(&t->blocked, sig);
recalc_sigpending_and_wake(t);
@ -1649,6 +1650,19 @@ void force_sig(int sig)
}
EXPORT_SYMBOL(force_sig);
void force_fatal_sig(int sig)
{
struct kernel_siginfo info;
clear_siginfo(&info);
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_KERNEL;
info.si_pid = 0;
info.si_uid = 0;
force_sig_info_to_task(&info, current, true);
}
/*
* When things go south during signal handling, we
* will force a SIGSEGV. And if the signal that caused
@ -1657,15 +1671,10 @@ EXPORT_SYMBOL(force_sig);
*/
void force_sigsegv(int sig)
{
struct task_struct *p = current;
if (sig == SIGSEGV) {
unsigned long flags;
spin_lock_irqsave(&p->sighand->siglock, flags);
p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
spin_unlock_irqrestore(&p->sighand->siglock, flags);
}
force_sig(SIGSEGV);
if (sig == SIGSEGV)
force_fatal_sig(SIGSEGV);
else
force_sig(SIGSEGV);
}
int force_sig_fault_to_task(int sig, int code, void __user *addr
@ -2704,7 +2713,8 @@ relock:
if (!signr)
break; /* will return 0 */
if (unlikely(current->ptrace) && signr != SIGKILL) {
if (unlikely(current->ptrace) && (signr != SIGKILL) &&
!(sighand->action[signr -1].sa.sa_flags & SA_IMMUTABLE)) {
signr = ptrace_signal(signr, &ksig->info);
if (!signr)
continue;
@ -4054,6 +4064,10 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
k = &p->sighand->action[sig-1];
spin_lock_irq(&p->sighand->siglock);
if (k->sa.sa_flags & SA_IMMUTABLE) {
spin_unlock_irq(&p->sighand->siglock);
return -EINVAL;
}
if (oact)
*oact = *k;

View File

@ -890,7 +890,7 @@ out:
batadv_tp_vars_put(tp_vars);
do_exit(0);
return 0;
}
/**