linux/arch/x86
Alexander van Heukelum 2e04bc7656 i386: fix return to 16-bit stack from NMI handler
Returning to a task with a 16-bit stack requires special care: the iret
instruction does not restore the high word of esp in that case. The
espfix code fixes this, but currently is not invoked on NMIs. This means
that a running task gets the upper word of esp clobbered due intervening
NMIs. To reproduce, compile and run the following program with the nmi
watchdog enabled (nmi_watchdog=2 on the command line). Using gdb you can
see that the high bits of esp contain garbage, while the low bits are
still correct.

This patch puts the espfix code back into the NMI code path.

The patch is slightly complicated due to the irqtrace infrastructure not
being NMI-safe. The NMI return path cannot call TRACE_IRQS_IRET.
Otherwise, the tail of the normal iret-code is correct for the nmi code
path too. To be able to share this code-path, the TRACE_IRQS_IRET was
move up a bit. The espfix code exists after the TRACE_IRQS_IRET, but
this code explicitly disables interrupts. This short interrupts-off
section is now not traced anymore. The return-to-kernel path now always
includes the preliminary test to decide if the espfix code should be
called. This is never the case, but doing it this way keeps the patch as
simple as possible and the few extra instructions should not affect
timing in any significant way.

 #define _GNU_SOURCE
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/mman.h>
 #include <unistd.h>
 #include <sys/syscall.h>
 #include <asm/ldt.h>

int modify_ldt(int func, void *ptr, unsigned long bytecount)
{
        return syscall(SYS_modify_ldt, func, ptr, bytecount);
}

/* this is assumed to be usable */
 #define SEGBASEADDR 0x10000
 #define SEGLIMIT 0x20000

/* 16-bit segment */
struct user_desc desc = {
        .entry_number = 0,
        .base_addr = SEGBASEADDR,
        .limit = SEGLIMIT,
        .seg_32bit = 0,
        .contents = 0, /* ??? */
        .read_exec_only = 0,
        .limit_in_pages = 0,
        .seg_not_present = 0,
        .useable = 1
};

int main(void)
{
        setvbuf(stdout, NULL, _IONBF, 0);

        /* map a 64 kb segment */
        char *pointer = mmap((void *)SEGBASEADDR, SEGLIMIT+1,
                        PROT_EXEC|PROT_READ|PROT_WRITE,
                        MAP_SHARED|MAP_ANONYMOUS, -1, 0);
        if (pointer == NULL) {
                printf("could not map space\n");
                return 0;
        }

        /* write ldt, new mode */
        int err = modify_ldt(0x11, &desc, sizeof(desc));
        if (err) {
                printf("error modifying ldt: %i\n", err);
                return 0;
        }

        for (int i=0; i<1000; i++) {
        asm volatile (
                "pusha\n\t"
                "mov %ss, %eax\n\t" /* preserve ss:esp */
                "mov %esp, %ebp\n\t"
                "push $7\n\t" /* index 0, ldt, user mode */
                "push $65536-4096\n\t" /* esp */
                "lss (%esp), %esp\n\t" /* switch to new stack */
                "push %eax\n\t" /* save old ss:esp on new stack */
                "push %ebp\n\t"
                "add $17*65536, %esp\n\t" /* set high bits */
                "mov %esp, %edx\n\t"

                "mov $10000000, %ecx\n\t" /* wait... */
                "1: loop 1b\n\t" /* ... a bit */

                "cmp %esp, %edx\n\t"
                "je 1f\n\t"
                "ud2\n\t" /* esp changed inexplicably! */
                "1:\n\t"
                "sub $17*65536, %esp\n\t" /* restore high bits */
                "lss (%esp), %esp\n\t" /* restore old ss:esp */
                "popa\n\t");

                printf("\rx%ix", i);
        }

        return 0;
}

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Acked-by: Stas Sergeev <stsp@aknet.ru>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-06-17 21:35:09 -07:00
..
boot x86, boot: use .code16gcc instead of .code16 2009-06-16 17:47:32 -07:00
configs Merge branch 'x86-kbuild-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 15:30:41 -07:00
crypto crypto: aes-ni - Add support for more modes 2009-06-02 14:04:16 +10:00
ia32 Merge branch 'core/signal' into perfcounters/core 2009-04-30 21:16:49 +02:00
include/asm x86: msr.h linux/types.h is only required for __KERNEL__ 2009-06-17 18:56:01 +02:00
kernel i386: fix return to 16-bit stack from NMI handler 2009-06-17 21:35:09 -07:00
kvm KVM: Add VT-x machine check support 2009-06-10 12:27:08 +03:00
lguest lguest: PAE support 2009-06-12 22:27:08 +09:30
lib x86: MSR: add methods for writing of an MSR on several CPUs 2009-06-10 12:18:43 +02:00
math-emu Merge branch 'core/percpu' into percpu-cpumask-x86-for-linus-2 2009-03-27 17:28:43 +01:00
mm Merge branch 'linus' into x86/urgent 2009-06-17 08:59:10 +02:00
oprofile Merge branch 'linus' into perfcounters/core 2009-06-11 17:55:42 +02:00
pci Merge branch 'irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 15:25:41 -07:00
power x86: unify power/cpu_(32|64).c 2009-06-12 21:32:31 +02:00
vdso Merge branch 'linus' into perfcounters/core 2009-06-11 17:55:42 +02:00
video
xen Merge branch 'x86-xen-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 16:16:27 -07:00
Kbuild x86: standardize Kbuild rules 2009-04-16 18:09:02 +02:00
Kconfig Merge branch 'linus' into x86/mce3 2009-06-11 23:31:52 +02:00
Kconfig.cpu Revert "x86, bts: reenable ptrace branch trace support" 2009-06-11 00:32:00 +02:00
Kconfig.debug Merge branch 'tracing-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 19:53:40 -07:00
Makefile Merge branch 'x86-kbuild-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 15:30:41 -07:00
Makefile_32.cpu