304bceda6a
Fundamental model of the current Linux kernel is to lazily init and restore FPU instead of restoring the task state during context switch. This changes that fundamental lazy model to the non-lazy model for the processors supporting xsave feature. Reasons driving this model change are: i. Newer processors support optimized state save/restore using xsaveopt and xrstor by tracking the INIT state and MODIFIED state during context-switch. This is faster than modifying the cr0.TS bit which has serializing semantics. ii. Newer glibc versions use SSE for some of the optimized copy/clear routines. With certain workloads (like boot, kernel-compilation etc), application completes its work with in the first 5 task switches, thus taking upto 5 #DNA traps with the kernel not getting a chance to apply the above mentioned pre-load heuristic. iii. Some xstate features (like AMD's LWP feature) don't honor the cr0.TS bit and thus will not work correctly in the presence of lazy restore. Non-lazy state restore is needed for enabling such features. Some data on a two socket SNB system: * Saved 20K DNA exceptions during boot on a two socket SNB system. * Saved 50K DNA exceptions during kernel-compilation workload. * Improved throughput of the AVX based checksumming function inside the kernel by ~15% as xsave/xrstor is faster than the serializing clts/stts pair. Also now kernel_fpu_begin/end() relies on the patched alternative instructions. So move check_fpu() which uses the kernel_fpu_begin/end() after alternative_instructions(). Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Link: http://lkml.kernel.org/r/1345842782-24175-7-git-send-email-suresh.b.siddha@intel.com Merge 32-bit boot fix from, Link: http://lkml.kernel.org/r/1347300665-6209-4-git-send-email-suresh.b.siddha@intel.com Cc: Jim Kukunas <james.t.kukunas@linux.intel.com> Cc: NeilBrown <neilb@suse.de> Cc: Avi Kivity <avi@redhat.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
180 lines
3.8 KiB
C
180 lines
3.8 KiB
C
/*
|
|
* Copyright (C) 1994 Linus Torvalds
|
|
*
|
|
* Cyrix stuff, June 1998 by:
|
|
* - Rafael R. Reilova (moved everything from head.S),
|
|
* <rreilova@ececs.uc.edu>
|
|
* - Channing Corn (tests & fixes),
|
|
* - Andrew D. Balsa (code cleanup).
|
|
*/
|
|
#include <linux/init.h>
|
|
#include <linux/utsname.h>
|
|
#include <asm/bugs.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/processor-flags.h>
|
|
#include <asm/i387.h>
|
|
#include <asm/msr.h>
|
|
#include <asm/paravirt.h>
|
|
#include <asm/alternative.h>
|
|
|
|
static int __init no_halt(char *s)
|
|
{
|
|
WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n");
|
|
boot_cpu_data.hlt_works_ok = 0;
|
|
return 1;
|
|
}
|
|
|
|
__setup("no-hlt", no_halt);
|
|
|
|
static int __init no_387(char *s)
|
|
{
|
|
boot_cpu_data.hard_math = 0;
|
|
write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0());
|
|
return 1;
|
|
}
|
|
|
|
__setup("no387", no_387);
|
|
|
|
static double __initdata x = 4195835.0;
|
|
static double __initdata y = 3145727.0;
|
|
|
|
/*
|
|
* This used to check for exceptions..
|
|
* However, it turns out that to support that,
|
|
* the XMM trap handlers basically had to
|
|
* be buggy. So let's have a correct XMM trap
|
|
* handler, and forget about printing out
|
|
* some status at boot.
|
|
*
|
|
* We should really only care about bugs here
|
|
* anyway. Not features.
|
|
*/
|
|
static void __init check_fpu(void)
|
|
{
|
|
s32 fdiv_bug;
|
|
|
|
if (!boot_cpu_data.hard_math) {
|
|
#ifndef CONFIG_MATH_EMULATION
|
|
pr_emerg("No coprocessor found and no math emulation present\n");
|
|
pr_emerg("Giving up\n");
|
|
for (;;) ;
|
|
#endif
|
|
return;
|
|
}
|
|
|
|
kernel_fpu_begin();
|
|
|
|
/*
|
|
* trap_init() enabled FXSR and company _before_ testing for FP
|
|
* problems here.
|
|
*
|
|
* Test for the divl bug..
|
|
*/
|
|
__asm__("fninit\n\t"
|
|
"fldl %1\n\t"
|
|
"fdivl %2\n\t"
|
|
"fmull %2\n\t"
|
|
"fldl %1\n\t"
|
|
"fsubp %%st,%%st(1)\n\t"
|
|
"fistpl %0\n\t"
|
|
"fwait\n\t"
|
|
"fninit"
|
|
: "=m" (*&fdiv_bug)
|
|
: "m" (*&x), "m" (*&y));
|
|
|
|
kernel_fpu_end();
|
|
|
|
boot_cpu_data.fdiv_bug = fdiv_bug;
|
|
if (boot_cpu_data.fdiv_bug)
|
|
pr_warn("Hmm, FPU with FDIV bug\n");
|
|
}
|
|
|
|
static void __init check_hlt(void)
|
|
{
|
|
if (boot_cpu_data.x86 >= 5 || paravirt_enabled())
|
|
return;
|
|
|
|
pr_info("Checking 'hlt' instruction... ");
|
|
if (!boot_cpu_data.hlt_works_ok) {
|
|
pr_cont("disabled\n");
|
|
return;
|
|
}
|
|
halt();
|
|
halt();
|
|
halt();
|
|
halt();
|
|
pr_cont("OK\n");
|
|
}
|
|
|
|
/*
|
|
* Most 386 processors have a bug where a POPAD can lock the
|
|
* machine even from user space.
|
|
*/
|
|
|
|
static void __init check_popad(void)
|
|
{
|
|
#ifndef CONFIG_X86_POPAD_OK
|
|
int res, inp = (int) &res;
|
|
|
|
pr_info("Checking for popad bug... ");
|
|
__asm__ __volatile__(
|
|
"movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
|
|
: "=&a" (res)
|
|
: "d" (inp)
|
|
: "ecx", "edi");
|
|
/*
|
|
* If this fails, it means that any user program may lock the
|
|
* CPU hard. Too bad.
|
|
*/
|
|
if (res != 12345678)
|
|
pr_cont("Buggy\n");
|
|
else
|
|
pr_cont("OK\n");
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Check whether we are able to run this kernel safely on SMP.
|
|
*
|
|
* - In order to run on a i386, we need to be compiled for i386
|
|
* (for due to lack of "invlpg" and working WP on a i386)
|
|
* - In order to run on anything without a TSC, we need to be
|
|
* compiled for a i486.
|
|
*/
|
|
|
|
static void __init check_config(void)
|
|
{
|
|
/*
|
|
* We'd better not be a i386 if we're configured to use some
|
|
* i486+ only features! (WP works in supervisor mode and the
|
|
* new "invlpg" and "bswap" instructions)
|
|
*/
|
|
#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \
|
|
defined(CONFIG_X86_BSWAP)
|
|
if (boot_cpu_data.x86 == 3)
|
|
panic("Kernel requires i486+ for 'invlpg' and other features");
|
|
#endif
|
|
}
|
|
|
|
|
|
void __init check_bugs(void)
|
|
{
|
|
identify_boot_cpu();
|
|
#ifndef CONFIG_SMP
|
|
pr_info("CPU: ");
|
|
print_cpu_info(&boot_cpu_data);
|
|
#endif
|
|
check_config();
|
|
check_hlt();
|
|
check_popad();
|
|
init_utsname()->machine[1] =
|
|
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
|
|
alternative_instructions();
|
|
|
|
/*
|
|
* kernel_fpu_begin/end() in check_fpu() relies on the patched
|
|
* alternative instructions.
|
|
*/
|
|
check_fpu();
|
|
}
|