0100301bfd
Move the low-level context switch code to an out-of-line asm stub instead of using complex inline asm. This allows constructing a new stack frame for the child process to make it seamlessly flow to ret_from_fork without an extra test and branch in __switch_to(). It also improves code generation for __schedule() by using the C calling convention instead of clobbering all registers. Signed-off-by: Brian Gerst <brgerst@gmail.com> Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/1471106302-10159-5-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
297 lines
9.4 KiB
C
297 lines
9.4 KiB
C
/* thread_info.h: low-level thread information
|
|
*
|
|
* Copyright (C) 2002 David Howells (dhowells@redhat.com)
|
|
* - Incorporating suggestions made by Linus Torvalds and Dave Miller
|
|
*/
|
|
|
|
#ifndef _ASM_X86_THREAD_INFO_H
|
|
#define _ASM_X86_THREAD_INFO_H
|
|
|
|
#include <linux/compiler.h>
|
|
#include <asm/page.h>
|
|
#include <asm/percpu.h>
|
|
#include <asm/types.h>
|
|
|
|
/*
|
|
* TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
|
|
* reserve at the top of the kernel stack. We do it because of a nasty
|
|
* 32-bit corner case. On x86_32, the hardware stack frame is
|
|
* variable-length. Except for vm86 mode, struct pt_regs assumes a
|
|
* maximum-length frame. If we enter from CPL 0, the top 8 bytes of
|
|
* pt_regs don't actually exist. Ordinarily this doesn't matter, but it
|
|
* does in at least one case:
|
|
*
|
|
* If we take an NMI early enough in SYSENTER, then we can end up with
|
|
* pt_regs that extends above sp0. On the way out, in the espfix code,
|
|
* we can read the saved SS value, but that value will be above sp0.
|
|
* Without this offset, that can result in a page fault. (We are
|
|
* careful that, in this case, the value we read doesn't matter.)
|
|
*
|
|
* In vm86 mode, the hardware frame is much longer still, so add 16
|
|
* bytes to make room for the real-mode segments.
|
|
*
|
|
* x86_64 has a fixed-length stack frame.
|
|
*/
|
|
#ifdef CONFIG_X86_32
|
|
# ifdef CONFIG_VM86
|
|
# define TOP_OF_KERNEL_STACK_PADDING 16
|
|
# else
|
|
# define TOP_OF_KERNEL_STACK_PADDING 8
|
|
# endif
|
|
#else
|
|
# define TOP_OF_KERNEL_STACK_PADDING 0
|
|
#endif
|
|
|
|
/*
|
|
* low level task data that entry.S needs immediate access to
|
|
* - this struct should fit entirely inside of one cache line
|
|
* - this struct shares the supervisor stack pages
|
|
*/
|
|
#ifndef __ASSEMBLY__
|
|
struct task_struct;
|
|
#include <asm/cpufeature.h>
|
|
#include <linux/atomic.h>
|
|
|
|
struct thread_info {
|
|
struct task_struct *task; /* main task structure */
|
|
__u32 flags; /* low level flags */
|
|
__u32 status; /* thread synchronous flags */
|
|
__u32 cpu; /* current CPU */
|
|
};
|
|
|
|
#define INIT_THREAD_INFO(tsk) \
|
|
{ \
|
|
.task = &tsk, \
|
|
.flags = 0, \
|
|
.cpu = 0, \
|
|
}
|
|
|
|
#define init_thread_info (init_thread_union.thread_info)
|
|
#define init_stack (init_thread_union.stack)
|
|
|
|
#else /* !__ASSEMBLY__ */
|
|
|
|
#include <asm/asm-offsets.h>
|
|
|
|
#endif
|
|
|
|
/*
|
|
* thread information flags
|
|
* - these are process state flags that various assembly files
|
|
* may need to access
|
|
* - pending work-to-be-done flags are in LSW
|
|
* - other flags in MSW
|
|
* Warning: layout of LSW is hardcoded in entry.S
|
|
*/
|
|
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
|
|
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
|
|
#define TIF_SIGPENDING 2 /* signal pending */
|
|
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
|
|
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
|
|
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
|
|
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
|
|
#define TIF_SECCOMP 8 /* secure computing */
|
|
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
|
|
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
|
|
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
|
|
#define TIF_IA32 17 /* IA32 compatibility process */
|
|
#define TIF_NOHZ 19 /* in adaptive nohz mode */
|
|
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
|
|
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
|
|
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
|
|
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
|
|
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
|
|
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
|
|
#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
|
|
#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
|
|
#define TIF_X32 30 /* 32-bit native x86-64 binary */
|
|
|
|
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
|
|
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
|
|
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
|
|
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
|
|
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
|
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
|
|
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
|
|
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
|
|
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
|
|
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
|
#define _TIF_NOTSC (1 << TIF_NOTSC)
|
|
#define _TIF_IA32 (1 << TIF_IA32)
|
|
#define _TIF_NOHZ (1 << TIF_NOHZ)
|
|
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
|
|
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
|
|
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
|
|
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
|
|
#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
|
|
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
|
|
#define _TIF_ADDR32 (1 << TIF_ADDR32)
|
|
#define _TIF_X32 (1 << TIF_X32)
|
|
|
|
/*
|
|
* work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
|
|
* enter_from_user_mode()
|
|
*/
|
|
#define _TIF_WORK_SYSCALL_ENTRY \
|
|
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
|
|
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
|
|
_TIF_NOHZ)
|
|
|
|
/* work to do on any return to user space */
|
|
#define _TIF_ALLWORK_MASK \
|
|
((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
|
|
_TIF_NOHZ)
|
|
|
|
/* flags to check in __switch_to() */
|
|
#define _TIF_WORK_CTXSW \
|
|
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
|
|
|
|
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
|
|
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
|
|
|
|
#define STACK_WARN (THREAD_SIZE/8)
|
|
|
|
/*
|
|
* macros/functions for gaining access to the thread information structure
|
|
*
|
|
* preempt_count needs to be 1 initially, until the scheduler is functional.
|
|
*/
|
|
#ifndef __ASSEMBLY__
|
|
|
|
static inline struct thread_info *current_thread_info(void)
|
|
{
|
|
return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
|
|
}
|
|
|
|
static inline unsigned long current_stack_pointer(void)
|
|
{
|
|
unsigned long sp;
|
|
#ifdef CONFIG_X86_64
|
|
asm("mov %%rsp,%0" : "=g" (sp));
|
|
#else
|
|
asm("mov %%esp,%0" : "=g" (sp));
|
|
#endif
|
|
return sp;
|
|
}
|
|
|
|
/*
|
|
* Walks up the stack frames to make sure that the specified object is
|
|
* entirely contained by a single stack frame.
|
|
*
|
|
* Returns:
|
|
* 1 if within a frame
|
|
* -1 if placed across a frame boundary (or outside stack)
|
|
* 0 unable to determine (no frame pointers, etc)
|
|
*/
|
|
static inline int arch_within_stack_frames(const void * const stack,
|
|
const void * const stackend,
|
|
const void *obj, unsigned long len)
|
|
{
|
|
#if defined(CONFIG_FRAME_POINTER)
|
|
const void *frame = NULL;
|
|
const void *oldframe;
|
|
|
|
oldframe = __builtin_frame_address(1);
|
|
if (oldframe)
|
|
frame = __builtin_frame_address(2);
|
|
/*
|
|
* low ----------------------------------------------> high
|
|
* [saved bp][saved ip][args][local vars][saved bp][saved ip]
|
|
* ^----------------^
|
|
* allow copies only within here
|
|
*/
|
|
while (stack <= frame && frame < stackend) {
|
|
/*
|
|
* If obj + len extends past the last frame, this
|
|
* check won't pass and the next frame will be 0,
|
|
* causing us to bail out and correctly report
|
|
* the copy as invalid.
|
|
*/
|
|
if (obj + len <= frame)
|
|
return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1;
|
|
oldframe = frame;
|
|
frame = *(const void * const *)frame;
|
|
}
|
|
return -1;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
#else /* !__ASSEMBLY__ */
|
|
|
|
#ifdef CONFIG_X86_64
|
|
# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
|
|
#endif
|
|
|
|
/*
|
|
* ASM operand which evaluates to a 'thread_info' address of
|
|
* the current task, if it is known that "reg" is exactly "off"
|
|
* bytes below the top of the stack currently.
|
|
*
|
|
* ( The kernel stack's size is known at build time, it is usually
|
|
* 2 or 4 pages, and the bottom of the kernel stack contains
|
|
* the thread_info structure. So to access the thread_info very
|
|
* quickly from assembly code we can calculate down from the
|
|
* top of the kernel stack to the bottom, using constant,
|
|
* build-time calculations only. )
|
|
*
|
|
* For example, to fetch the current thread_info->flags value into %eax
|
|
* on x86-64 defconfig kernels, in syscall entry code where RSP is
|
|
* currently at exactly SIZEOF_PTREGS bytes away from the top of the
|
|
* stack:
|
|
*
|
|
* mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
|
|
*
|
|
* will translate to:
|
|
*
|
|
* 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax
|
|
*
|
|
* which is below the current RSP by almost 16K.
|
|
*/
|
|
#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
|
|
|
|
#endif
|
|
|
|
/*
|
|
* Thread-synchronous status.
|
|
*
|
|
* This is different from the flags in that nobody else
|
|
* ever touches our thread-synchronous status, so we don't
|
|
* have to worry about atomic accesses.
|
|
*/
|
|
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
|
|
#ifdef CONFIG_COMPAT
|
|
#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
|
|
#endif
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
static inline bool in_ia32_syscall(void)
|
|
{
|
|
#ifdef CONFIG_X86_32
|
|
return true;
|
|
#endif
|
|
#ifdef CONFIG_IA32_EMULATION
|
|
if (current_thread_info()->status & TS_COMPAT)
|
|
return true;
|
|
#endif
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Force syscall return via IRET by making it look as if there was
|
|
* some work pending. IRET is our most capable (but slowest) syscall
|
|
* return path, which is able to restore modified SS, CS and certain
|
|
* EFLAGS values that other (fast) syscall return instructions
|
|
* are not able to restore properly.
|
|
*/
|
|
#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
|
|
|
|
extern void arch_task_cache_init(void);
|
|
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
|
|
extern void arch_release_task_struct(struct task_struct *tsk);
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#endif /* _ASM_X86_THREAD_INFO_H */
|