forked from Minki/linux
bpf,x86: Respect X86_FEATURE_RETPOLINE*
Current BPF codegen doesn't respect X86_FEATURE_RETPOLINE* flags and unconditionally emits a thunk call, this is sub-optimal and doesn't match the regular, compiler generated, code. Update the i386 JIT to emit code equal to what the compiler emits for the regular kernel text (IOW. a plain THUNK call). Update the x86_64 JIT to emit code similar to the result of compiler and kernel rewrites as according to X86_FEATURE_RETPOLINE* flags. Inlining RETPOLINE_AMD (lfence; jmp *%reg) and !RETPOLINE (jmp *%reg), while doing a THUNK call for RETPOLINE. This removes the hard-coded retpoline thunks and shrinks the generated code. Leaving a single retpoline thunk definition in the kernel. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Borislav Petkov <bp@suse.de> Acked-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Josh Poimboeuf <jpoimboe@redhat.com> Tested-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/r/20211026120310.614772675@infradead.org
This commit is contained in:
parent
dceba0817c
commit
87c87ecd00
@ -316,63 +316,4 @@ static inline void mds_idle_clear_cpu_buffers(void)
|
|||||||
|
|
||||||
#endif /* __ASSEMBLY__ */
|
#endif /* __ASSEMBLY__ */
|
||||||
|
|
||||||
/*
|
|
||||||
* Below is used in the eBPF JIT compiler and emits the byte sequence
|
|
||||||
* for the following assembly:
|
|
||||||
*
|
|
||||||
* With retpolines configured:
|
|
||||||
*
|
|
||||||
* callq do_rop
|
|
||||||
* spec_trap:
|
|
||||||
* pause
|
|
||||||
* lfence
|
|
||||||
* jmp spec_trap
|
|
||||||
* do_rop:
|
|
||||||
* mov %rcx,(%rsp) for x86_64
|
|
||||||
* mov %edx,(%esp) for x86_32
|
|
||||||
* retq
|
|
||||||
*
|
|
||||||
* Without retpolines configured:
|
|
||||||
*
|
|
||||||
* jmp *%rcx for x86_64
|
|
||||||
* jmp *%edx for x86_32
|
|
||||||
*/
|
|
||||||
#ifdef CONFIG_RETPOLINE
|
|
||||||
# ifdef CONFIG_X86_64
|
|
||||||
# define RETPOLINE_RCX_BPF_JIT_SIZE 17
|
|
||||||
# define RETPOLINE_RCX_BPF_JIT() \
|
|
||||||
do { \
|
|
||||||
EMIT1_off32(0xE8, 7); /* callq do_rop */ \
|
|
||||||
/* spec_trap: */ \
|
|
||||||
EMIT2(0xF3, 0x90); /* pause */ \
|
|
||||||
EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
|
|
||||||
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
|
|
||||||
/* do_rop: */ \
|
|
||||||
EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \
|
|
||||||
EMIT1(0xC3); /* retq */ \
|
|
||||||
} while (0)
|
|
||||||
# else /* !CONFIG_X86_64 */
|
|
||||||
# define RETPOLINE_EDX_BPF_JIT() \
|
|
||||||
do { \
|
|
||||||
EMIT1_off32(0xE8, 7); /* call do_rop */ \
|
|
||||||
/* spec_trap: */ \
|
|
||||||
EMIT2(0xF3, 0x90); /* pause */ \
|
|
||||||
EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
|
|
||||||
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
|
|
||||||
/* do_rop: */ \
|
|
||||||
EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
|
|
||||||
EMIT1(0xC3); /* ret */ \
|
|
||||||
} while (0)
|
|
||||||
# endif
|
|
||||||
#else /* !CONFIG_RETPOLINE */
|
|
||||||
# ifdef CONFIG_X86_64
|
|
||||||
# define RETPOLINE_RCX_BPF_JIT_SIZE 2
|
|
||||||
# define RETPOLINE_RCX_BPF_JIT() \
|
|
||||||
EMIT2(0xFF, 0xE1); /* jmp *%rcx */
|
|
||||||
# else /* !CONFIG_X86_64 */
|
|
||||||
# define RETPOLINE_EDX_BPF_JIT() \
|
|
||||||
EMIT2(0xFF, 0xE2) /* jmp *%edx */
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
|
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
|
||||||
|
@ -387,6 +387,25 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
|
|||||||
return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
|
return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
|
||||||
|
|
||||||
|
static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
|
||||||
|
{
|
||||||
|
u8 *prog = *pprog;
|
||||||
|
|
||||||
|
#ifdef CONFIG_RETPOLINE
|
||||||
|
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
|
||||||
|
EMIT_LFENCE();
|
||||||
|
EMIT2(0xFF, 0xE0 + reg);
|
||||||
|
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
|
||||||
|
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
EMIT2(0xFF, 0xE0 + reg);
|
||||||
|
|
||||||
|
*pprog = prog;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Generate the following code:
|
* Generate the following code:
|
||||||
*
|
*
|
||||||
@ -468,7 +487,7 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
|
|||||||
* rdi == ctx (1st arg)
|
* rdi == ctx (1st arg)
|
||||||
* rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
|
* rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
|
||||||
*/
|
*/
|
||||||
RETPOLINE_RCX_BPF_JIT();
|
emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
|
||||||
|
|
||||||
/* out: */
|
/* out: */
|
||||||
ctx->tail_call_indirect_label = prog - start;
|
ctx->tail_call_indirect_label = prog - start;
|
||||||
@ -1179,8 +1198,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
|
|||||||
/* speculation barrier */
|
/* speculation barrier */
|
||||||
case BPF_ST | BPF_NOSPEC:
|
case BPF_ST | BPF_NOSPEC:
|
||||||
if (boot_cpu_has(X86_FEATURE_XMM2))
|
if (boot_cpu_has(X86_FEATURE_XMM2))
|
||||||
/* Emit 'lfence' */
|
EMIT_LFENCE();
|
||||||
EMIT3(0x0F, 0xAE, 0xE8);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* ST: *(u8*)(dst_reg + off) = imm */
|
/* ST: *(u8*)(dst_reg + off) = imm */
|
||||||
@ -2084,24 +2102,6 @@ cleanup:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int emit_fallback_jump(u8 **pprog)
|
|
||||||
{
|
|
||||||
u8 *prog = *pprog;
|
|
||||||
int err = 0;
|
|
||||||
|
|
||||||
#ifdef CONFIG_RETPOLINE
|
|
||||||
/* Note that this assumes the the compiler uses external
|
|
||||||
* thunks for indirect calls. Both clang and GCC use the same
|
|
||||||
* naming convention for external thunks.
|
|
||||||
*/
|
|
||||||
err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
|
|
||||||
#else
|
|
||||||
EMIT2(0xFF, 0xE2); /* jmp rdx */
|
|
||||||
#endif
|
|
||||||
*pprog = prog;
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
|
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
|
||||||
{
|
{
|
||||||
u8 *jg_reloc, *prog = *pprog;
|
u8 *jg_reloc, *prog = *pprog;
|
||||||
@ -2123,9 +2123,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
|
|||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
err = emit_fallback_jump(&prog); /* jmp thunk/indirect */
|
emit_indirect_jump(&prog, 2 /* rdx */, prog);
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
|
|
||||||
*pprog = prog;
|
*pprog = prog;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include <asm/cacheflush.h>
|
#include <asm/cacheflush.h>
|
||||||
#include <asm/set_memory.h>
|
#include <asm/set_memory.h>
|
||||||
#include <asm/nospec-branch.h>
|
#include <asm/nospec-branch.h>
|
||||||
|
#include <asm/asm-prototypes.h>
|
||||||
#include <linux/bpf.h>
|
#include <linux/bpf.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1267,6 +1268,21 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
|
|||||||
*pprog = prog;
|
*pprog = prog;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int emit_jmp_edx(u8 **pprog, u8 *ip)
|
||||||
|
{
|
||||||
|
u8 *prog = *pprog;
|
||||||
|
int cnt = 0;
|
||||||
|
|
||||||
|
#ifdef CONFIG_RETPOLINE
|
||||||
|
EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
|
||||||
|
#else
|
||||||
|
EMIT2(0xFF, 0xE2);
|
||||||
|
#endif
|
||||||
|
*pprog = prog;
|
||||||
|
|
||||||
|
return cnt;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Generate the following code:
|
* Generate the following code:
|
||||||
* ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
|
* ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
|
||||||
@ -1280,7 +1296,7 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
|
|||||||
* goto *(prog->bpf_func + prologue_size);
|
* goto *(prog->bpf_func + prologue_size);
|
||||||
* out:
|
* out:
|
||||||
*/
|
*/
|
||||||
static void emit_bpf_tail_call(u8 **pprog)
|
static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
|
||||||
{
|
{
|
||||||
u8 *prog = *pprog;
|
u8 *prog = *pprog;
|
||||||
int cnt = 0;
|
int cnt = 0;
|
||||||
@ -1362,7 +1378,7 @@ static void emit_bpf_tail_call(u8 **pprog)
|
|||||||
* eax == ctx (1st arg)
|
* eax == ctx (1st arg)
|
||||||
* edx == prog->bpf_func + prologue_size
|
* edx == prog->bpf_func + prologue_size
|
||||||
*/
|
*/
|
||||||
RETPOLINE_EDX_BPF_JIT();
|
cnt += emit_jmp_edx(&prog, ip + cnt);
|
||||||
|
|
||||||
if (jmp_label1 == -1)
|
if (jmp_label1 == -1)
|
||||||
jmp_label1 = cnt;
|
jmp_label1 = cnt;
|
||||||
@ -2122,7 +2138,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case BPF_JMP | BPF_TAIL_CALL:
|
case BPF_JMP | BPF_TAIL_CALL:
|
||||||
emit_bpf_tail_call(&prog);
|
emit_bpf_tail_call(&prog, image + addrs[i - 1]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* cond jump */
|
/* cond jump */
|
||||||
|
Loading…
Reference in New Issue
Block a user