The PAUSE instruction is currently used in the retpoline and RSB filling macros as a speculation trap. The use of PAUSE was originally suggested because it showed a very, very small difference in the amount of cycles/time used to execute the retpoline as compared to LFENCE. On AMD, the PAUSE instruction is not a serializing instruction, so the pause/jmp loop will use excess power as it is speculated over waiting for return to mispredict to the correct target. The RSB filling macro is applicable to AMD, and, if software is unable to verify that LFENCE is serializing on AMD (possible when running under a hypervisor), the generic retpoline support will be used and, so, is also applicable to AMD. Keep the current usage of PAUSE for Intel, but add an LFENCE instruction to the speculation trap for AMD. The same sequence has been adopted by GCC for the GCC generated retpolines. Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Borislav Petkov <bp@alien8.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Acked-by: Arjan van de Ven <arjan@linux.intel.com> Cc: Rik van Riel <riel@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Paul Turner <pjt@google.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Jiri Kosina <jikos@kernel.org> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org> Cc: Kees Cook <keescook@google.com> Link: https://lkml.kernel.org/r/20180113232730.31060.36287.stgit@tlendack-t1.amdoffice.net
219 lines
6.0 KiB
C
219 lines
6.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#ifndef __NOSPEC_BRANCH_H__
|
|
#define __NOSPEC_BRANCH_H__
|
|
|
|
#include <asm/alternative.h>
|
|
#include <asm/alternative-asm.h>
|
|
#include <asm/cpufeatures.h>
|
|
|
|
/*
|
|
* Fill the CPU return stack buffer.
|
|
*
|
|
* Each entry in the RSB, if used for a speculative 'ret', contains an
|
|
* infinite 'pause; lfence; jmp' loop to capture speculative execution.
|
|
*
|
|
* This is required in various cases for retpoline and IBRS-based
|
|
* mitigations for the Spectre variant 2 vulnerability. Sometimes to
|
|
* eliminate potentially bogus entries from the RSB, and sometimes
|
|
* purely to ensure that it doesn't get empty, which on some CPUs would
|
|
* allow predictions from other (unwanted!) sources to be used.
|
|
*
|
|
* We define a CPP macro such that it can be used from both .S files and
|
|
* inline assembly. It's possible to do a .macro and then include that
|
|
* from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
|
|
*/
|
|
|
|
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
|
|
#define RSB_FILL_LOOPS 16 /* To avoid underflow */
|
|
|
|
/*
|
|
* Google experimented with loop-unrolling and this turned out to be
|
|
* the optimal version — two calls, each with their own speculation
|
|
* trap should their return address end up getting used, in a loop.
|
|
*/
|
|
#define __FILL_RETURN_BUFFER(reg, nr, sp) \
|
|
mov $(nr/2), reg; \
|
|
771: \
|
|
call 772f; \
|
|
773: /* speculation trap */ \
|
|
pause; \
|
|
lfence; \
|
|
jmp 773b; \
|
|
772: \
|
|
call 774f; \
|
|
775: /* speculation trap */ \
|
|
pause; \
|
|
lfence; \
|
|
jmp 775b; \
|
|
774: \
|
|
dec reg; \
|
|
jnz 771b; \
|
|
add $(BITS_PER_LONG/8) * nr, sp;
|
|
|
|
#ifdef __ASSEMBLY__
|
|
|
|
/*
|
|
* This should be used immediately before a retpoline alternative. It tells
|
|
* objtool where the retpolines are so that it can make sense of the control
|
|
* flow by just reading the original instruction(s) and ignoring the
|
|
* alternatives.
|
|
*/
|
|
.macro ANNOTATE_NOSPEC_ALTERNATIVE
|
|
.Lannotate_\@:
|
|
.pushsection .discard.nospec
|
|
.long .Lannotate_\@ - .
|
|
.popsection
|
|
.endm
|
|
|
|
/*
|
|
* These are the bare retpoline primitives for indirect jmp and call.
|
|
* Do not use these directly; they only exist to make the ALTERNATIVE
|
|
* invocation below less ugly.
|
|
*/
|
|
.macro RETPOLINE_JMP reg:req
|
|
call .Ldo_rop_\@
|
|
.Lspec_trap_\@:
|
|
pause
|
|
lfence
|
|
jmp .Lspec_trap_\@
|
|
.Ldo_rop_\@:
|
|
mov \reg, (%_ASM_SP)
|
|
ret
|
|
.endm
|
|
|
|
/*
|
|
* This is a wrapper around RETPOLINE_JMP so the called function in reg
|
|
* returns to the instruction after the macro.
|
|
*/
|
|
.macro RETPOLINE_CALL reg:req
|
|
jmp .Ldo_call_\@
|
|
.Ldo_retpoline_jmp_\@:
|
|
RETPOLINE_JMP \reg
|
|
.Ldo_call_\@:
|
|
call .Ldo_retpoline_jmp_\@
|
|
.endm
|
|
|
|
/*
|
|
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
|
|
* indirect jmp/call which may be susceptible to the Spectre variant 2
|
|
* attack.
|
|
*/
|
|
.macro JMP_NOSPEC reg:req
|
|
#ifdef CONFIG_RETPOLINE
|
|
ANNOTATE_NOSPEC_ALTERNATIVE
|
|
ALTERNATIVE_2 __stringify(jmp *\reg), \
|
|
__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
|
|
__stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
|
|
#else
|
|
jmp *\reg
|
|
#endif
|
|
.endm
|
|
|
|
.macro CALL_NOSPEC reg:req
|
|
#ifdef CONFIG_RETPOLINE
|
|
ANNOTATE_NOSPEC_ALTERNATIVE
|
|
ALTERNATIVE_2 __stringify(call *\reg), \
|
|
__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
|
|
__stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
|
|
#else
|
|
call *\reg
|
|
#endif
|
|
.endm
|
|
|
|
/*
|
|
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
|
|
* monstrosity above, manually.
|
|
*/
|
|
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
|
|
#ifdef CONFIG_RETPOLINE
|
|
ANNOTATE_NOSPEC_ALTERNATIVE
|
|
ALTERNATIVE "jmp .Lskip_rsb_\@", \
|
|
__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
|
|
\ftr
|
|
.Lskip_rsb_\@:
|
|
#endif
|
|
.endm
|
|
|
|
#else /* __ASSEMBLY__ */
|
|
|
|
#define ANNOTATE_NOSPEC_ALTERNATIVE \
|
|
"999:\n\t" \
|
|
".pushsection .discard.nospec\n\t" \
|
|
".long 999b - .\n\t" \
|
|
".popsection\n\t"
|
|
|
|
#if defined(CONFIG_X86_64) && defined(RETPOLINE)
|
|
|
|
/*
|
|
* Since the inline asm uses the %V modifier which is only in newer GCC,
|
|
* the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
|
|
*/
|
|
# define CALL_NOSPEC \
|
|
ANNOTATE_NOSPEC_ALTERNATIVE \
|
|
ALTERNATIVE( \
|
|
"call *%[thunk_target]\n", \
|
|
"call __x86_indirect_thunk_%V[thunk_target]\n", \
|
|
X86_FEATURE_RETPOLINE)
|
|
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
|
|
|
|
#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
|
|
/*
|
|
* For i386 we use the original ret-equivalent retpoline, because
|
|
* otherwise we'll run out of registers. We don't care about CET
|
|
* here, anyway.
|
|
*/
|
|
# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
|
|
" jmp 904f;\n" \
|
|
" .align 16\n" \
|
|
"901: call 903f;\n" \
|
|
"902: pause;\n" \
|
|
" lfence;\n" \
|
|
" jmp 902b;\n" \
|
|
" .align 16\n" \
|
|
"903: addl $4, %%esp;\n" \
|
|
" pushl %[thunk_target];\n" \
|
|
" ret;\n" \
|
|
" .align 16\n" \
|
|
"904: call 901b;\n", \
|
|
X86_FEATURE_RETPOLINE)
|
|
|
|
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
|
|
#else /* No retpoline for C / inline asm */
|
|
# define CALL_NOSPEC "call *%[thunk_target]\n"
|
|
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
|
|
#endif
|
|
|
|
/* The Spectre V2 mitigation variants */
|
|
enum spectre_v2_mitigation {
|
|
SPECTRE_V2_NONE,
|
|
SPECTRE_V2_RETPOLINE_MINIMAL,
|
|
SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
|
|
SPECTRE_V2_RETPOLINE_GENERIC,
|
|
SPECTRE_V2_RETPOLINE_AMD,
|
|
SPECTRE_V2_IBRS,
|
|
};
|
|
|
|
/*
|
|
* On VMEXIT we must ensure that no RSB predictions learned in the guest
|
|
* can be followed in the host, by overwriting the RSB completely. Both
|
|
* retpoline and IBRS mitigations for Spectre v2 need this; only on future
|
|
* CPUs with IBRS_ATT *might* it be avoided.
|
|
*/
|
|
static inline void vmexit_fill_RSB(void)
|
|
{
|
|
#ifdef CONFIG_RETPOLINE
|
|
unsigned long loops = RSB_CLEAR_LOOPS / 2;
|
|
|
|
asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
|
|
ALTERNATIVE("jmp 910f",
|
|
__stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
|
|
X86_FEATURE_RETPOLINE)
|
|
"910:"
|
|
: "=&r" (loops), ASM_CALL_CONSTRAINT
|
|
: "r" (loops) : "memory" );
|
|
#endif
|
|
}
|
|
#endif /* __ASSEMBLY__ */
|
|
#endif /* __NOSPEC_BRANCH_H__ */
|