mirror of
https://github.com/torvalds/linux.git
synced 2024-12-15 23:51:46 +00:00
x86/asm/entry: Create and use a 'TOP_OF_KERNEL_STACK_PADDING' macro
x86_32, unlike x86_64, pads the top of the kernel stack, because the hardware stack frame formats are variable in size. Document this padding and give it a name. This should make no change whatsoever to the compiled kernel image. It also doesn't fix any of the current bugs in this area. Signed-off-by: Andy Lutomirski <luto@amacapital.net> Acked-by: Denys Vlasenko <dvlasenk@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/02bf2f54b8dcb76a62a142b6dfe07d4ef7fc582e.1426009661.git.luto@amacapital.net [ Fixed small details, such as a missed magic constant in entry_32.S pointed out by Denys Vlasenko. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
9a036b93a3
commit
3ee4298f44
@ -849,7 +849,8 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
|
||||
#define task_pt_regs(task) \
|
||||
({ \
|
||||
struct pt_regs *__regs__; \
|
||||
__regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
|
||||
__regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task)) - \
|
||||
TOP_OF_KERNEL_STACK_PADDING); \
|
||||
__regs__ - 1; \
|
||||
})
|
||||
|
||||
|
@ -12,6 +12,33 @@
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/types.h>
|
||||
|
||||
/*
|
||||
* TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
|
||||
* reserve at the top of the kernel stack. We do it because of a nasty
|
||||
* 32-bit corner case. On x86_32, the hardware stack frame is
|
||||
* variable-length. Except for vm86 mode, struct pt_regs assumes a
|
||||
* maximum-length frame. If we enter from CPL 0, the top 8 bytes of
|
||||
* pt_regs don't actually exist. Ordinarily this doesn't matter, but it
|
||||
* does in at least one case:
|
||||
*
|
||||
* If we take an NMI early enough in SYSENTER, then we can end up with
|
||||
* pt_regs that extends above sp0. On the way out, in the espfix code,
|
||||
* we can read the saved SS value, but that value will be above sp0.
|
||||
* Without this offset, that can result in a page fault. (We are
|
||||
* careful that, in this case, the value we read doesn't matter.)
|
||||
*
|
||||
* In vm86 mode, the hardware frame is much longer still, but we neither
|
||||
* access the extra members from NMI context, nor do we write such a
|
||||
* frame at sp0 at all.
|
||||
*
|
||||
* x86_64 has a fixed-length stack frame.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
# define TOP_OF_KERNEL_STACK_PADDING 8
|
||||
#else
|
||||
# define TOP_OF_KERNEL_STACK_PADDING 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* low level task data that entry.S needs immediate access to
|
||||
* - this struct should fit entirely inside of one cache line
|
||||
|
@ -398,7 +398,7 @@ sysenter_past_esp:
|
||||
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
|
||||
* pushed above; +8 corresponds to copy_thread's esp0 setting.
|
||||
*/
|
||||
pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
|
||||
pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+TOP_OF_KERNEL_STACK_PADDING+4*4)(%esp)
|
||||
CFI_REL_OFFSET eip, 0
|
||||
|
||||
pushl_cfi %eax
|
||||
|
Loading…
Reference in New Issue
Block a user