linux/arch/x86/include/asm/irqflags.h
Denys Vlasenko 76f5df43ca x86/asm/entry/64: Always allocate a complete "struct pt_regs" on the kernel stack
The 64-bit entry code was using six stack slots less by not
saving/restoring registers which are callee-preserved according
to the C ABI, and was not allocating space for them.

Only when syscalls needed a complete "struct pt_regs" was
the complete area allocated and filled in.

As an additional twist, on interrupt entry a "slightly less
truncated pt_regs" trick is used, to make nested interrupt
stacks easier to unwind.

This proved to be a source of significant obfuscation and subtle
bugs. For example, 'stub_fork' had to pop the return address,
extend the struct, save registers, and push return address back.
Ugly. 'ia32_ptregs_common' pops return address and "returns" via
jmp insn, throwing a wrench into CPU return stack cache.

This patch changes the code to always allocate a complete
"struct pt_regs" on the kernel stack. The saving of registers
is still done lazily.

"Partial pt_regs" trick on interrupt stack is retained.

Macros which manipulate "struct pt_regs" on stack are reworked:

 - ALLOC_PT_GPREGS_ON_STACK allocates the structure.

 - SAVE_C_REGS saves to it those registers which are clobbered
   by C code.

 - SAVE_EXTRA_REGS saves to it all other registers.

 - Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros
   reverse it.

'ia32_ptregs_common', 'stub_fork' and friends lost their ugly dance
with the return pointer.

LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
instead of magic numbers.

'error_entry' and 'save_paranoid' now use SAVE_C_REGS +
SAVE_EXTRA_REGS instead of having it open-coded yet again.

Patch was run-tested: 64-bit executables, 32-bit executables,
strace works.

Timing tests did not show measurable difference in 32-bit
and 64-bit syscalls.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1423778052-21038-2-git-send-email-dvlasenk@redhat.com
Link: http://lkml.kernel.org/r/b89763d354aa23e670b9bdf3a40ae320320a7c2e.1424989793.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-03-04 22:50:49 +01:00

210 lines
4.0 KiB
C

#ifndef _X86_IRQFLAGS_H_
#define _X86_IRQFLAGS_H_
#include <asm/processor-flags.h>
#ifndef __ASSEMBLY__
/*
* Interrupt control:
*/
static inline unsigned long native_save_fl(void)
{
unsigned long flags;
/*
* "=rm" is safe here, because "pop" adjusts the stack before
* it evaluates its effective address -- this is part of the
* documented behavior of the "pop" instruction.
*/
asm volatile("# __raw_save_flags\n\t"
"pushf ; pop %0"
: "=rm" (flags)
: /* no input */
: "memory");
return flags;
}
static inline void native_restore_fl(unsigned long flags)
{
asm volatile("push %0 ; popf"
: /* no output */
:"g" (flags)
:"memory", "cc");
}
static inline void native_irq_disable(void)
{
asm volatile("cli": : :"memory");
}
static inline void native_irq_enable(void)
{
asm volatile("sti": : :"memory");
}
static inline void native_safe_halt(void)
{
asm volatile("sti; hlt": : :"memory");
}
static inline void native_halt(void)
{
asm volatile("hlt": : :"memory");
}
#endif
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
#ifndef __ASSEMBLY__
#include <linux/types.h>
static inline notrace unsigned long arch_local_save_flags(void)
{
return native_save_fl();
}
static inline notrace void arch_local_irq_restore(unsigned long flags)
{
native_restore_fl(flags);
}
static inline notrace void arch_local_irq_disable(void)
{
native_irq_disable();
}
static inline notrace void arch_local_irq_enable(void)
{
native_irq_enable();
}
/*
* Used in the idle loop; sti takes one instruction cycle
* to complete:
*/
static inline void arch_safe_halt(void)
{
native_safe_halt();
}
/*
* Used when interrupts are already enabled or to
* shutdown the processor:
*/
static inline void halt(void)
{
native_halt();
}
/*
* For spinlocks, etc:
*/
static inline notrace unsigned long arch_local_irq_save(void)
{
unsigned long flags = arch_local_save_flags();
arch_local_irq_disable();
return flags;
}
#else
#define ENABLE_INTERRUPTS(x) sti
#define DISABLE_INTERRUPTS(x) cli
#ifdef CONFIG_X86_64
#define SWAPGS swapgs
/*
* Currently paravirt can't handle swapgs nicely when we
* don't have a stack we can rely on (such as a user space
* stack). So we either find a way around these or just fault
* and emulate if a guest tries to call swapgs directly.
*
* Either way, this is a good way to document that we don't
* have a reliable stack. x86_64 only.
*/
#define SWAPGS_UNSAFE_STACK swapgs
#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */
#define INTERRUPT_RETURN jmp native_iret
#define USERGS_SYSRET64 \
swapgs; \
sysretq;
#define USERGS_SYSRET32 \
swapgs; \
sysretl
#define ENABLE_INTERRUPTS_SYSEXIT32 \
swapgs; \
sti; \
sysexit
#else
#define INTERRUPT_RETURN iret
#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
#define GET_CR0_INTO_EAX movl %cr0, %eax
#endif
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_PARAVIRT */
#ifndef __ASSEMBLY__
static inline int arch_irqs_disabled_flags(unsigned long flags)
{
return !(flags & X86_EFLAGS_IF);
}
static inline int arch_irqs_disabled(void)
{
unsigned long flags = arch_local_save_flags();
return arch_irqs_disabled_flags(flags);
}
#else
#ifdef CONFIG_X86_64
#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
TRACE_IRQS_ON; \
sti; \
SAVE_EXTRA_REGS; \
LOCKDEP_SYS_EXIT; \
RESTORE_EXTRA_REGS; \
cli; \
TRACE_IRQS_OFF;
#else
#define ARCH_LOCKDEP_SYS_EXIT \
pushl %eax; \
pushl %ecx; \
pushl %edx; \
call lockdep_sys_exit; \
popl %edx; \
popl %ecx; \
popl %eax;
#define ARCH_LOCKDEP_SYS_EXIT_IRQ
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
#else
# define TRACE_IRQS_ON
# define TRACE_IRQS_OFF
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT
# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ
# else
# define LOCKDEP_SYS_EXIT
# define LOCKDEP_SYS_EXIT_IRQ
# endif
#endif /* __ASSEMBLY__ */
#endif