linux/arch/x86
Andy Lutomirski f647d7c155 x86_64, switch_to(): Load TLS descriptors before switching DS and ES
Otherwise, if buggy user code points DS or ES into the TLS
array, they would be corrupted after a context switch.

This also significantly improves the comments and documents some
gotchas in the code.

Before this patch, the both tests below failed.  With this
patch, the es test passes, although the gsbase test still fails.

 ----- begin es test -----

/*
 * Copyright (c) 2014 Andy Lutomirski
 * GPL v2
 */

static unsigned short GDT3(int idx)
{
	return (idx << 3) | 3;
}

static int create_tls(int idx, unsigned int base)
{
	struct user_desc desc = {
		.entry_number    = idx,
		.base_addr       = base,
		.limit           = 0xfffff,
		.seg_32bit       = 1,
		.contents        = 0, /* Data, grow-up */
		.read_exec_only  = 0,
		.limit_in_pages  = 1,
		.seg_not_present = 0,
		.useable         = 0,
	};

	if (syscall(SYS_set_thread_area, &desc) != 0)
		err(1, "set_thread_area");

	return desc.entry_number;
}

int main()
{
	int idx = create_tls(-1, 0);
	printf("Allocated GDT index %d\n", idx);

	unsigned short orig_es;
	asm volatile ("mov %%es,%0" : "=rm" (orig_es));

	int errors = 0;
	int total = 1000;
	for (int i = 0; i < total; i++) {
		asm volatile ("mov %0,%%es" : : "rm" (GDT3(idx)));
		usleep(100);

		unsigned short es;
		asm volatile ("mov %%es,%0" : "=rm" (es));
		asm volatile ("mov %0,%%es" : : "rm" (orig_es));
		if (es != GDT3(idx)) {
			if (errors == 0)
				printf("[FAIL]\tES changed from 0x%hx to 0x%hx\n",
				       GDT3(idx), es);
			errors++;
		}
	}

	if (errors) {
		printf("[FAIL]\tES was corrupted %d/%d times\n", errors, total);
		return 1;
	} else {
		printf("[OK]\tES was preserved\n");
		return 0;
	}
}

 ----- end es test -----

 ----- begin gsbase test -----

/*
 * gsbase.c, a gsbase test
 * Copyright (c) 2014 Andy Lutomirski
 * GPL v2
 */

static unsigned char *testptr, *testptr2;

static unsigned char read_gs_testvals(void)
{
	unsigned char ret;
	asm volatile ("movb %%gs:%1, %0" : "=r" (ret) : "m" (*testptr));
	return ret;
}

int main()
{
	int errors = 0;

	testptr = mmap((void *)0x200000000UL, 1, PROT_READ | PROT_WRITE,
		       MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
	if (testptr == MAP_FAILED)
		err(1, "mmap");

	testptr2 = mmap((void *)0x300000000UL, 1, PROT_READ | PROT_WRITE,
		       MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
	if (testptr2 == MAP_FAILED)
		err(1, "mmap");

	*testptr = 0;
	*testptr2 = 1;

	if (syscall(SYS_arch_prctl, ARCH_SET_GS,
		    (unsigned long)testptr2 - (unsigned long)testptr) != 0)
		err(1, "ARCH_SET_GS");

	usleep(100);

	if (read_gs_testvals() == 1) {
		printf("[OK]\tARCH_SET_GS worked\n");
	} else {
		printf("[FAIL]\tARCH_SET_GS failed\n");
		errors++;
	}

	asm volatile ("mov %0,%%gs" : : "r" (0));

	if (read_gs_testvals() == 0) {
		printf("[OK]\tWriting 0 to gs worked\n");
	} else {
		printf("[FAIL]\tWriting 0 to gs failed\n");
		errors++;
	}

	usleep(100);

	if (read_gs_testvals() == 0) {
		printf("[OK]\tgsbase is still zero\n");
	} else {
		printf("[FAIL]\tgsbase was corrupted\n");
		errors++;
	}

	return errors == 0 ? 0 : 1;
}

 ----- end gsbase test -----

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: <stable@vger.kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/509d27c9fec78217691c3dad91cec87e1006b34a.1418075657.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-12-11 11:40:08 +01:00
..
boot x86: Use $(OBJDUMP) instead of plain objdump 2014-11-23 21:21:53 +01:00
configs x86: Add "make tinyconfig" to configure the tiniest possible kernel 2014-08-08 16:30:24 -07:00
crypto Revert "crypto: aesni - disable "by8" AVX CTR optimization" 2014-10-02 14:40:28 +08:00
ia32 x86_64, entry: Fix out of bounds read on sysenter 2014-10-31 18:47:09 -07:00
include x86/mm: Fix zone ranges boot printout 2014-12-11 11:35:02 +01:00
kernel x86_64, switch_to(): Load TLS descriptors before switching DS and ES 2014-12-11 11:40:08 +01:00
kvm KVM: x86: Fix uninitialized op->type for some immediate values 2014-11-05 12:36:58 +01:00
lguest
lib x86-64: make csum_partial_copy_from_user() error handling consistent 2014-11-16 11:00:42 -08:00
math-emu
mm x86/mm: Fix zone ranges boot printout 2014-12-11 11:35:02 +01:00
net x86: bpf_jit: fix two bugs in eBPF JIT compiler 2014-10-14 13:13:14 -04:00
oprofile percpu: Resolve ambiguities in __get_cpu_var/cpumask_var_t 2014-08-28 08:58:57 -04:00
pci xen: features and fixes for 3.18-rc0 2014-10-11 20:29:01 -04:00
platform Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip 2014-10-31 14:30:16 -07:00
power nosave: consolidate __nosave_{begin,end} in <asm/sections.h> 2014-10-09 22:26:04 -04:00
purgatory arch/x86/purgatory/Makefile: try to use automatic variable in kexec purgatory makefile 2014-10-14 02:18:21 +02:00
realmode
syscalls bpf: enable bpf syscall on x64 and i386 2014-09-26 15:05:14 -04:00
tools x86, kaslr: Handle Gold linker for finding bss/brk 2014-11-18 18:32:24 +01:00
um Merge git://git.infradead.org/users/eparis/audit 2014-10-19 16:25:56 -07:00
vdso x86/vdso: Fix vdso2c's special_pages[] error checking 2014-09-24 09:55:38 +02:00
video
xen x86/core, x86/xen/smp: Use 'die_complete' completion when taking CPU down 2014-11-10 11:16:40 +01:00
.gitignore x86/build: Add arch/x86/purgatory/ make generated files to gitignore 2014-10-09 09:29:46 +02:00
Kbuild kexec: create a new config option CONFIG_KEXEC_FILE for new syscall 2014-08-29 16:28:16 -07:00
Kconfig perf/x86: Fix embarrasing typo 2014-11-04 07:06:58 +01:00
Kconfig.cpu
Kconfig.debug
Makefile Merge branch 'x86-build-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip 2014-10-13 18:17:33 +02:00
Makefile_32.cpu
Makefile.um