linux/arch/x86/include/asm/fpu-internal.h
Suresh Siddha 0ca5bd0d88 x86, fpu: Consolidate inline asm routines for saving/restoring fpu state
Consolidate x86, x86_64 inline asm routines saving/restoring fpu state
using config_enabled().

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1343171129-2747-3-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2012-09-18 15:51:26 -07:00

515 lines
13 KiB
C

/*
* Copyright (C) 1994 Linus Torvalds
*
* Pentium III FXSR, SSE support
* General FPU state handling cleanups
* Gareth Hughes <gareth@valinux.com>, May 2000
* x86-64 work by Andi Kleen 2002
*/
#ifndef _FPU_INTERNAL_H
#define _FPU_INTERNAL_H
#include <linux/kernel_stat.h>
#include <linux/regset.h>
#include <linux/compat.h>
#include <linux/slab.h>
#include <asm/asm.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/sigcontext.h>
#include <asm/user.h>
#include <asm/uaccess.h>
#include <asm/xsave.h>
extern unsigned int sig_xstate_size;
extern void fpu_init(void);
DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
extern user_regset_active_fn fpregs_active, xfpregs_active;
extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
xstateregs_get;
extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
xstateregs_set;
/*
* xstateregs_active == fpregs_active. Please refer to the comment
* at the definition of fpregs_active.
*/
#define xstateregs_active fpregs_active
extern struct _fpx_sw_bytes fx_sw_reserved;
#ifdef CONFIG_IA32_EMULATION
extern unsigned int sig_xstate_ia32_size;
extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
struct _fpstate_ia32;
struct _xstate_ia32;
extern int save_i387_xstate_ia32(void __user *buf);
extern int restore_i387_xstate_ia32(void __user *buf);
#endif
#ifdef CONFIG_MATH_EMULATION
extern void finit_soft_fpu(struct i387_soft_struct *soft);
#else
static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
#endif
static inline int is_ia32_compat_frame(void)
{
return config_enabled(CONFIG_IA32_EMULATION) &&
test_thread_flag(TIF_IA32);
}
static inline int is_ia32_frame(void)
{
return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
}
static inline int is_x32_frame(void)
{
return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
}
#define X87_FSW_ES (1 << 7) /* Exception Summary */
static __always_inline __pure bool use_xsaveopt(void)
{
return static_cpu_has(X86_FEATURE_XSAVEOPT);
}
static __always_inline __pure bool use_xsave(void)
{
return static_cpu_has(X86_FEATURE_XSAVE);
}
static __always_inline __pure bool use_fxsr(void)
{
return static_cpu_has(X86_FEATURE_FXSR);
}
extern void __sanitize_i387_state(struct task_struct *);
static inline void sanitize_i387_state(struct task_struct *tsk)
{
if (!use_xsaveopt())
return;
__sanitize_i387_state(tsk);
}
#define check_insn(insn, output, input...) \
({ \
int err; \
asm volatile("1:" #insn "\n\t" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: movl $-1,%[err]\n" \
" jmp 2b\n" \
".previous\n" \
_ASM_EXTABLE(1b, 3b) \
: [err] "=r" (err), output \
: "0"(0), input); \
err; \
})
static inline int fsave_user(struct i387_fsave_struct __user *fx)
{
return check_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
}
static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
{
int err;
/*
* Clear the bytes not touched by the fxsave and reserved
* for the SW usage.
*/
err = __clear_user(&fx->sw_reserved,
sizeof(struct _fpx_sw_bytes));
if (unlikely(err))
return -EFAULT;
if (config_enabled(CONFIG_X86_32))
return check_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
else if (config_enabled(CONFIG_AS_FXSAVEQ))
return check_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
/* See comment in fpu_fxsave() below. */
return check_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
}
static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
{
if (config_enabled(CONFIG_X86_32))
return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else if (config_enabled(CONFIG_AS_FXSAVEQ))
return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
/* See comment in fpu_fxsave() below. */
return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
"m" (*fx));
}
static inline int frstor_checking(struct i387_fsave_struct *fx)
{
return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline void fpu_fxsave(struct fpu *fpu)
{
if (config_enabled(CONFIG_X86_32))
asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave));
else if (config_enabled(CONFIG_AS_FXSAVEQ))
asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave));
else {
/* Using "rex64; fxsave %0" is broken because, if the memory
* operand uses any extended registers for addressing, a second
* REX prefix will be generated (to the assembler, rex64
* followed by semicolon is a separate instruction), and hence
* the 64-bitness is lost.
*
* Using "fxsaveq %0" would be the ideal choice, but is only
* supported starting with gas 2.16.
*
* Using, as a workaround, the properly prefixed form below
* isn't accepted by any binutils version so far released,
* complaining that the same type of prefix is used twice if
* an extended register is needed for addressing (fix submitted
* to mainline 2005-11-21).
*
* asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave));
*
* This, however, we can work around by forcing the compiler to
* select an addressing mode that doesn't require extended
* registers.
*/
asm volatile( "rex64/fxsave (%[fx])"
: "=m" (fpu->state->fxsave)
: [fx] "R" (&fpu->state->fxsave));
}
}
#ifdef CONFIG_X86_64
int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
compat_sigset_t *set, struct pt_regs *regs);
int ia32_setup_frame(int sig, struct k_sigaction *ka,
compat_sigset_t *set, struct pt_regs *regs);
#else /* CONFIG_X86_32 */
#define ia32_setup_frame __setup_frame
#define ia32_setup_rt_frame __setup_rt_frame
#endif /* CONFIG_X86_64 */
/*
* These must be called with preempt disabled. Returns
* 'true' if the FPU state is still intact.
*/
static inline int fpu_save_init(struct fpu *fpu)
{
if (use_xsave()) {
fpu_xsave(fpu);
/*
* xsave header may indicate the init state of the FP.
*/
if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
return 1;
} else if (use_fxsr()) {
fpu_fxsave(fpu);
} else {
asm volatile("fnsave %[fx]; fwait"
: [fx] "=m" (fpu->state->fsave));
return 0;
}
/*
* If exceptions are pending, we need to clear them so
* that we don't randomly get exceptions later.
*
* FIXME! Is this perhaps only true for the old-style
* irq13 case? Maybe we could leave the x87 state
* intact otherwise?
*/
if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
asm volatile("fnclex");
return 0;
}
return 1;
}
static inline int __save_init_fpu(struct task_struct *tsk)
{
return fpu_save_init(&tsk->thread.fpu);
}
static inline int fpu_restore_checking(struct fpu *fpu)
{
if (use_xsave())
return fpu_xrstor_checking(&fpu->state->xsave);
else if (use_fxsr())
return fxrstor_checking(&fpu->state->fxsave);
else
return frstor_checking(&fpu->state->fsave);
}
static inline int restore_fpu_checking(struct task_struct *tsk)
{
/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
is pending. Clear the x87 state here by setting it to fixed
values. "m" is a random variable that should be in L1 */
alternative_input(
ASM_NOP8 ASM_NOP2,
"emms\n\t" /* clear stack tags */
"fildl %P[addr]", /* set F?P to defined value */
X86_FEATURE_FXSAVE_LEAK,
[addr] "m" (tsk->thread.fpu.has_fpu));
return fpu_restore_checking(&tsk->thread.fpu);
}
/*
* Software FPU state helpers. Careful: these need to
* be preemption protection *and* they need to be
* properly paired with the CR0.TS changes!
*/
static inline int __thread_has_fpu(struct task_struct *tsk)
{
return tsk->thread.fpu.has_fpu;
}
/* Must be paired with an 'stts' after! */
static inline void __thread_clear_has_fpu(struct task_struct *tsk)
{
tsk->thread.fpu.has_fpu = 0;
this_cpu_write(fpu_owner_task, NULL);
}
/* Must be paired with a 'clts' before! */
static inline void __thread_set_has_fpu(struct task_struct *tsk)
{
tsk->thread.fpu.has_fpu = 1;
this_cpu_write(fpu_owner_task, tsk);
}
/*
* Encapsulate the CR0.TS handling together with the
* software flag.
*
* These generally need preemption protection to work,
* do try to avoid using these on their own.
*/
static inline void __thread_fpu_end(struct task_struct *tsk)
{
__thread_clear_has_fpu(tsk);
stts();
}
static inline void __thread_fpu_begin(struct task_struct *tsk)
{
clts();
__thread_set_has_fpu(tsk);
}
/*
* FPU state switching for scheduling.
*
* This is a two-stage process:
*
* - switch_fpu_prepare() saves the old state and
* sets the new state of the CR0.TS bit. This is
* done within the context of the old process.
*
* - switch_fpu_finish() restores the new state as
* necessary.
*/
typedef struct { int preload; } fpu_switch_t;
/*
* FIXME! We could do a totally lazy restore, but we need to
* add a per-cpu "this was the task that last touched the FPU
* on this CPU" variable, and the task needs to have a "I last
* touched the FPU on this CPU" and check them.
*
* We don't do that yet, so "fpu_lazy_restore()" always returns
* false, but some day..
*/
static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
{
return new == this_cpu_read_stable(fpu_owner_task) &&
cpu == new->thread.fpu.last_cpu;
}
static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
{
fpu_switch_t fpu;
fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
if (__thread_has_fpu(old)) {
if (!__save_init_fpu(old))
cpu = ~0;
old->thread.fpu.last_cpu = cpu;
old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
/* Don't change CR0.TS if we just switch! */
if (fpu.preload) {
new->fpu_counter++;
__thread_set_has_fpu(new);
prefetch(new->thread.fpu.state);
} else
stts();
} else {
old->fpu_counter = 0;
old->thread.fpu.last_cpu = ~0;
if (fpu.preload) {
new->fpu_counter++;
if (fpu_lazy_restore(new, cpu))
fpu.preload = 0;
else
prefetch(new->thread.fpu.state);
__thread_fpu_begin(new);
}
}
return fpu;
}
/*
* By the time this gets called, we've already cleared CR0.TS and
* given the process the FPU if we are going to preload the FPU
* state - all we need to do is to conditionally restore the register
* state itself.
*/
static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
{
if (fpu.preload) {
if (unlikely(restore_fpu_checking(new)))
__thread_fpu_end(new);
}
}
/*
* Signal frame handlers...
*/
extern int save_i387_xstate(void __user *buf);
extern int restore_i387_xstate(void __user *buf);
static inline void __clear_fpu(struct task_struct *tsk)
{
if (__thread_has_fpu(tsk)) {
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
"2:\n"
_ASM_EXTABLE(1b, 2b));
__thread_fpu_end(tsk);
}
}
/*
* The actual user_fpu_begin/end() functions
* need to be preemption-safe.
*
* NOTE! user_fpu_end() must be used only after you
* have saved the FP state, and user_fpu_begin() must
* be used only immediately before restoring it.
* These functions do not do any save/restore on
* their own.
*/
static inline void user_fpu_end(void)
{
preempt_disable();
__thread_fpu_end(current);
preempt_enable();
}
static inline void user_fpu_begin(void)
{
preempt_disable();
if (!user_has_fpu())
__thread_fpu_begin(current);
preempt_enable();
}
/*
* These disable preemption on their own and are safe
*/
static inline void save_init_fpu(struct task_struct *tsk)
{
WARN_ON_ONCE(!__thread_has_fpu(tsk));
preempt_disable();
__save_init_fpu(tsk);
__thread_fpu_end(tsk);
preempt_enable();
}
static inline void clear_fpu(struct task_struct *tsk)
{
preempt_disable();
__clear_fpu(tsk);
preempt_enable();
}
/*
* i387 state interaction
*/
static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
{
if (cpu_has_fxsr) {
return tsk->thread.fpu.state->fxsave.cwd;
} else {
return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
}
}
static inline unsigned short get_fpu_swd(struct task_struct *tsk)
{
if (cpu_has_fxsr) {
return tsk->thread.fpu.state->fxsave.swd;
} else {
return (unsigned short)tsk->thread.fpu.state->fsave.swd;
}
}
static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
{
if (cpu_has_xmm) {
return tsk->thread.fpu.state->fxsave.mxcsr;
} else {
return MXCSR_DEFAULT;
}
}
static bool fpu_allocated(struct fpu *fpu)
{
return fpu->state != NULL;
}
static inline int fpu_alloc(struct fpu *fpu)
{
if (fpu_allocated(fpu))
return 0;
fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
if (!fpu->state)
return -ENOMEM;
WARN_ON((unsigned long)fpu->state & 15);
return 0;
}
static inline void fpu_free(struct fpu *fpu)
{
if (fpu->state) {
kmem_cache_free(task_xstate_cachep, fpu->state);
fpu->state = NULL;
}
}
static inline void fpu_copy(struct fpu *dst, struct fpu *src)
{
memcpy(dst->state, src->state, xstate_size);
}
extern void fpu_finit(struct fpu *fpu);
#endif