5ea0727b16
Ensure the address limit is a user-mode segment before returning to user-mode. Otherwise a process can corrupt kernel-mode memory and elevate privileges [1]. The set_fs function sets the TIF_SETFS flag to force a slow path on return. In the slow path, the address limit is checked to be USER_DS if needed. The addr_limit_user_check function is added as a cross-architecture function to check the address limit. [1] https://bugs.chromium.org/p/project-zero/issues/detail?id=990 Signed-off-by: Thomas Garnier <thgarnie@google.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Mark Rutland <mark.rutland@arm.com> Cc: kernel-hardening@lists.openwall.com Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: David Howells <dhowells@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Miroslav Benes <mbenes@suse.cz> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: Pratyush Anand <panand@redhat.com> Cc: Russell King <linux@armlinux.org.uk> Cc: Petr Mladek <pmladek@suse.com> Cc: Rik van Riel <riel@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andy Lutomirski <luto@kernel.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: linux-arm-kernel@lists.infradead.org Cc: Will Drewry <wad@chromium.org> Cc: linux-api@vger.kernel.org Cc: Oleg Nesterov <oleg@redhat.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Paolo Bonzini <pbonzini@redhat.com> Link: http://lkml.kernel.org/r/20170615011203.144108-1-thgarnie@google.com
254 lines
8.3 KiB
C
254 lines
8.3 KiB
C
/* thread_info.h: low-level thread information
|
|
*
|
|
* Copyright (C) 2002 David Howells (dhowells@redhat.com)
|
|
* - Incorporating suggestions made by Linus Torvalds and Dave Miller
|
|
*/
|
|
|
|
#ifndef _ASM_X86_THREAD_INFO_H
|
|
#define _ASM_X86_THREAD_INFO_H
|
|
|
|
#include <linux/compiler.h>
|
|
#include <asm/page.h>
|
|
#include <asm/percpu.h>
|
|
#include <asm/types.h>
|
|
|
|
/*
|
|
* TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
|
|
* reserve at the top of the kernel stack. We do it because of a nasty
|
|
* 32-bit corner case. On x86_32, the hardware stack frame is
|
|
* variable-length. Except for vm86 mode, struct pt_regs assumes a
|
|
* maximum-length frame. If we enter from CPL 0, the top 8 bytes of
|
|
* pt_regs don't actually exist. Ordinarily this doesn't matter, but it
|
|
* does in at least one case:
|
|
*
|
|
* If we take an NMI early enough in SYSENTER, then we can end up with
|
|
* pt_regs that extends above sp0. On the way out, in the espfix code,
|
|
* we can read the saved SS value, but that value will be above sp0.
|
|
* Without this offset, that can result in a page fault. (We are
|
|
* careful that, in this case, the value we read doesn't matter.)
|
|
*
|
|
* In vm86 mode, the hardware frame is much longer still, so add 16
|
|
* bytes to make room for the real-mode segments.
|
|
*
|
|
* x86_64 has a fixed-length stack frame.
|
|
*/
|
|
#ifdef CONFIG_X86_32
|
|
# ifdef CONFIG_VM86
|
|
# define TOP_OF_KERNEL_STACK_PADDING 16
|
|
# else
|
|
# define TOP_OF_KERNEL_STACK_PADDING 8
|
|
# endif
|
|
#else
|
|
# define TOP_OF_KERNEL_STACK_PADDING 0
|
|
#endif
|
|
|
|
/*
|
|
* low level task data that entry.S needs immediate access to
|
|
* - this struct should fit entirely inside of one cache line
|
|
* - this struct shares the supervisor stack pages
|
|
*/
|
|
#ifndef __ASSEMBLY__
|
|
struct task_struct;
|
|
#include <asm/cpufeature.h>
|
|
#include <linux/atomic.h>
|
|
|
|
struct thread_info {
|
|
unsigned long flags; /* low level flags */
|
|
};
|
|
|
|
#define INIT_THREAD_INFO(tsk) \
|
|
{ \
|
|
.flags = 0, \
|
|
}
|
|
|
|
#define init_stack (init_thread_union.stack)
|
|
|
|
#else /* !__ASSEMBLY__ */
|
|
|
|
#include <asm/asm-offsets.h>
|
|
|
|
#endif
|
|
|
|
/*
|
|
* thread information flags
|
|
* - these are process state flags that various assembly files
|
|
* may need to access
|
|
*/
|
|
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
|
|
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
|
|
#define TIF_SIGPENDING 2 /* signal pending */
|
|
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
|
|
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
|
|
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
|
|
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
|
|
#define TIF_SECCOMP 8 /* secure computing */
|
|
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
|
|
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
|
|
#define TIF_PATCH_PENDING 13 /* pending live patching update */
|
|
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
|
|
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
|
|
#define TIF_IA32 17 /* IA32 compatibility process */
|
|
#define TIF_NOHZ 19 /* in adaptive nohz mode */
|
|
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
|
|
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
|
|
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
|
|
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
|
|
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
|
|
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
|
|
#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
|
|
#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
|
|
#define TIF_X32 30 /* 32-bit native x86-64 binary */
|
|
#define TIF_FSCHECK 31 /* Check FS is USER_DS on return */
|
|
|
|
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
|
|
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
|
|
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
|
|
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
|
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
|
|
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
|
|
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
|
|
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
|
|
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
|
|
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
|
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
|
|
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
|
|
#define _TIF_NOTSC (1 << TIF_NOTSC)
|
|
#define _TIF_IA32 (1 << TIF_IA32)
|
|
#define _TIF_NOHZ (1 << TIF_NOHZ)
|
|
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
|
|
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
|
|
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
|
|
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
|
|
#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
|
|
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
|
|
#define _TIF_ADDR32 (1 << TIF_ADDR32)
|
|
#define _TIF_X32 (1 << TIF_X32)
|
|
#define _TIF_FSCHECK (1 << TIF_FSCHECK)
|
|
|
|
/*
|
|
* work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
|
|
* enter_from_user_mode()
|
|
*/
|
|
#define _TIF_WORK_SYSCALL_ENTRY \
|
|
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
|
|
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
|
|
_TIF_NOHZ)
|
|
|
|
/* work to do on any return to user space */
|
|
#define _TIF_ALLWORK_MASK \
|
|
(_TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \
|
|
_TIF_NEED_RESCHED | _TIF_SINGLESTEP | _TIF_SYSCALL_EMU | \
|
|
_TIF_SYSCALL_AUDIT | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE | \
|
|
_TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT | \
|
|
_TIF_FSCHECK)
|
|
|
|
/* flags to check in __switch_to() */
|
|
#define _TIF_WORK_CTXSW \
|
|
(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
|
|
|
|
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
|
|
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
|
|
|
|
#define STACK_WARN (THREAD_SIZE/8)
|
|
|
|
/*
|
|
* macros/functions for gaining access to the thread information structure
|
|
*
|
|
* preempt_count needs to be 1 initially, until the scheduler is functional.
|
|
*/
|
|
#ifndef __ASSEMBLY__
|
|
|
|
static inline unsigned long current_stack_pointer(void)
|
|
{
|
|
unsigned long sp;
|
|
#ifdef CONFIG_X86_64
|
|
asm("mov %%rsp,%0" : "=g" (sp));
|
|
#else
|
|
asm("mov %%esp,%0" : "=g" (sp));
|
|
#endif
|
|
return sp;
|
|
}
|
|
|
|
/*
|
|
* Walks up the stack frames to make sure that the specified object is
|
|
* entirely contained by a single stack frame.
|
|
*
|
|
* Returns:
|
|
* GOOD_FRAME if within a frame
|
|
* BAD_STACK if placed across a frame boundary (or outside stack)
|
|
* NOT_STACK unable to determine (no frame pointers, etc)
|
|
*/
|
|
static inline int arch_within_stack_frames(const void * const stack,
|
|
const void * const stackend,
|
|
const void *obj, unsigned long len)
|
|
{
|
|
#if defined(CONFIG_FRAME_POINTER)
|
|
const void *frame = NULL;
|
|
const void *oldframe;
|
|
|
|
oldframe = __builtin_frame_address(1);
|
|
if (oldframe)
|
|
frame = __builtin_frame_address(2);
|
|
/*
|
|
* low ----------------------------------------------> high
|
|
* [saved bp][saved ip][args][local vars][saved bp][saved ip]
|
|
* ^----------------^
|
|
* allow copies only within here
|
|
*/
|
|
while (stack <= frame && frame < stackend) {
|
|
/*
|
|
* If obj + len extends past the last frame, this
|
|
* check won't pass and the next frame will be 0,
|
|
* causing us to bail out and correctly report
|
|
* the copy as invalid.
|
|
*/
|
|
if (obj + len <= frame)
|
|
return obj >= oldframe + 2 * sizeof(void *) ?
|
|
GOOD_FRAME : BAD_STACK;
|
|
oldframe = frame;
|
|
frame = *(const void * const *)frame;
|
|
}
|
|
return BAD_STACK;
|
|
#else
|
|
return NOT_STACK;
|
|
#endif
|
|
}
|
|
|
|
#else /* !__ASSEMBLY__ */
|
|
|
|
#ifdef CONFIG_X86_64
|
|
# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
|
|
#endif
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#ifdef CONFIG_X86_32
|
|
#define in_ia32_syscall() true
|
|
#else
|
|
#define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
|
|
current->thread.status & TS_COMPAT)
|
|
#endif
|
|
|
|
/*
|
|
* Force syscall return via IRET by making it look as if there was
|
|
* some work pending. IRET is our most capable (but slowest) syscall
|
|
* return path, which is able to restore modified SS, CS and certain
|
|
* EFLAGS values that other (fast) syscall return instructions
|
|
* are not able to restore properly.
|
|
*/
|
|
#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
|
|
|
|
extern void arch_task_cache_init(void);
|
|
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
|
|
extern void arch_release_task_struct(struct task_struct *tsk);
|
|
extern void arch_setup_new_exec(void);
|
|
#define arch_setup_new_exec arch_setup_new_exec
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#endif /* _ASM_X86_THREAD_INFO_H */
|