linux/arch/x86/kernel/cpu/bugs.c
Suresh Siddha 304bceda6a x86, fpu: use non-lazy fpu restore for processors supporting xsave
Fundamental model of the current Linux kernel is to lazily init and
restore FPU instead of restoring the task state during context switch.
This changes that fundamental lazy model to the non-lazy model for
the processors supporting xsave feature.

Reasons driving this model change are:

i. Newer processors support optimized state save/restore using xsaveopt and
xrstor by tracking the INIT state and MODIFIED state during context-switch.
This is faster than modifying the cr0.TS bit which has serializing semantics.

ii. Newer glibc versions use SSE for some of the optimized copy/clear routines.
With certain workloads (like boot, kernel-compilation etc), application
completes its work with in the first 5 task switches, thus taking upto 5 #DNA
traps with the kernel not getting a chance to apply the above mentioned
pre-load heuristic.

iii. Some xstate features (like AMD's LWP feature) don't honor the cr0.TS bit
and thus will not work correctly in the presence of lazy restore. Non-lazy
state restore is needed for enabling such features.

Some data on a two socket SNB system:
 * Saved 20K DNA exceptions during boot on a two socket SNB system.
 * Saved 50K DNA exceptions during kernel-compilation workload.
 * Improved throughput of the AVX based checksumming function inside the
   kernel by ~15% as xsave/xrstor is faster than the serializing clts/stts
   pair.

Also now kernel_fpu_begin/end() relies on the patched
alternative instructions. So move check_fpu() which uses the
kernel_fpu_begin/end() after alternative_instructions().

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1345842782-24175-7-git-send-email-suresh.b.siddha@intel.com
Merge 32-bit boot fix from,
Link: http://lkml.kernel.org/r/1347300665-6209-4-git-send-email-suresh.b.siddha@intel.com
Cc: Jim Kukunas <james.t.kukunas@linux.intel.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2012-09-18 15:52:11 -07:00

180 lines
3.8 KiB
C

/*
* Copyright (C) 1994 Linus Torvalds
*
* Cyrix stuff, June 1998 by:
* - Rafael R. Reilova (moved everything from head.S),
* <rreilova@ececs.uc.edu>
* - Channing Corn (tests & fixes),
* - Andrew D. Balsa (code cleanup).
*/
#include <linux/init.h>
#include <linux/utsname.h>
#include <asm/bugs.h>
#include <asm/processor.h>
#include <asm/processor-flags.h>
#include <asm/i387.h>
#include <asm/msr.h>
#include <asm/paravirt.h>
#include <asm/alternative.h>
static int __init no_halt(char *s)
{
WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n");
boot_cpu_data.hlt_works_ok = 0;
return 1;
}
__setup("no-hlt", no_halt);
static int __init no_387(char *s)
{
boot_cpu_data.hard_math = 0;
write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0());
return 1;
}
__setup("no387", no_387);
static double __initdata x = 4195835.0;
static double __initdata y = 3145727.0;
/*
* This used to check for exceptions..
* However, it turns out that to support that,
* the XMM trap handlers basically had to
* be buggy. So let's have a correct XMM trap
* handler, and forget about printing out
* some status at boot.
*
* We should really only care about bugs here
* anyway. Not features.
*/
static void __init check_fpu(void)
{
s32 fdiv_bug;
if (!boot_cpu_data.hard_math) {
#ifndef CONFIG_MATH_EMULATION
pr_emerg("No coprocessor found and no math emulation present\n");
pr_emerg("Giving up\n");
for (;;) ;
#endif
return;
}
kernel_fpu_begin();
/*
* trap_init() enabled FXSR and company _before_ testing for FP
* problems here.
*
* Test for the divl bug..
*/
__asm__("fninit\n\t"
"fldl %1\n\t"
"fdivl %2\n\t"
"fmull %2\n\t"
"fldl %1\n\t"
"fsubp %%st,%%st(1)\n\t"
"fistpl %0\n\t"
"fwait\n\t"
"fninit"
: "=m" (*&fdiv_bug)
: "m" (*&x), "m" (*&y));
kernel_fpu_end();
boot_cpu_data.fdiv_bug = fdiv_bug;
if (boot_cpu_data.fdiv_bug)
pr_warn("Hmm, FPU with FDIV bug\n");
}
static void __init check_hlt(void)
{
if (boot_cpu_data.x86 >= 5 || paravirt_enabled())
return;
pr_info("Checking 'hlt' instruction... ");
if (!boot_cpu_data.hlt_works_ok) {
pr_cont("disabled\n");
return;
}
halt();
halt();
halt();
halt();
pr_cont("OK\n");
}
/*
* Most 386 processors have a bug where a POPAD can lock the
* machine even from user space.
*/
static void __init check_popad(void)
{
#ifndef CONFIG_X86_POPAD_OK
int res, inp = (int) &res;
pr_info("Checking for popad bug... ");
__asm__ __volatile__(
"movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
: "=&a" (res)
: "d" (inp)
: "ecx", "edi");
/*
* If this fails, it means that any user program may lock the
* CPU hard. Too bad.
*/
if (res != 12345678)
pr_cont("Buggy\n");
else
pr_cont("OK\n");
#endif
}
/*
* Check whether we are able to run this kernel safely on SMP.
*
* - In order to run on a i386, we need to be compiled for i386
* (for due to lack of "invlpg" and working WP on a i386)
* - In order to run on anything without a TSC, we need to be
* compiled for a i486.
*/
static void __init check_config(void)
{
/*
* We'd better not be a i386 if we're configured to use some
* i486+ only features! (WP works in supervisor mode and the
* new "invlpg" and "bswap" instructions)
*/
#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \
defined(CONFIG_X86_BSWAP)
if (boot_cpu_data.x86 == 3)
panic("Kernel requires i486+ for 'invlpg' and other features");
#endif
}
void __init check_bugs(void)
{
identify_boot_cpu();
#ifndef CONFIG_SMP
pr_info("CPU: ");
print_cpu_info(&boot_cpu_data);
#endif
check_config();
check_hlt();
check_popad();
init_utsname()->machine[1] =
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
alternative_instructions();
/*
* kernel_fpu_begin/end() in check_fpu() relies on the patched
* alternative instructions.
*/
check_fpu();
}