Merge branch 'x86/urgent' into x86/cpu, to resolve conflict

There's a new conflict between this commit pending in x86/cpu:

  63edbaa48a x86/cpu/topology: Add support for the AMD 0x80000026 leaf

And these fixes in x86/urgent:

  c064b536a8 x86/cpu/amd: Make the NODEID_MSR union actually work
  1b3108f689 x86/cpu/amd: Make the CPUID 0x80000008 parser correct

Resolve them.

 Conflicts:
	arch/x86/kernel/cpu/topology_amd.c

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2024-04-11 14:46:05 +02:00
commit 21f546a43a
39 changed files with 536 additions and 169 deletions

View File

@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically,
the BHB might be shared across privilege levels even in the presence of
Enhanced IBRS.
Currently the only known real-world BHB attack vector is via
unprivileged eBPF. Therefore, it's highly recommended to not enable
unprivileged eBPF, especially when eIBRS is used (without retpolines).
For a full mitigation against BHB attacks, it's recommended to use
retpolines (or eIBRS combined with retpolines).
Previously the only known real-world BHB attack vector was via unprivileged
eBPF. Further research has found attacks that don't require unprivileged eBPF.
For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
use the BHB clearing sequence.
Attack scenarios
----------------
@ -430,6 +429,23 @@ The possible values in this file are:
'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
=========================== =======================================================
- Branch History Injection (BHI) protection status:
.. list-table::
* - BHI: Not affected
- System is not affected
* - BHI: Retpoline
- System is protected by retpoline
* - BHI: BHI_DIS_S
- System is protected by BHI_DIS_S
* - BHI: SW loop, KVM SW loop
- System is protected by software clearing sequence
* - BHI: Vulnerable
- System is vulnerable to BHI
* - BHI: Vulnerable, KVM: SW loop
- System is vulnerable; KVM is protected by software clearing sequence
Full mitigation might require a microcode update from the CPU
vendor. When the necessary microcode is not available, the kernel will
report vulnerability.
@ -484,7 +500,11 @@ Spectre variant 2
Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
boot, by setting the IBRS bit, and they're automatically protected against
Spectre v2 variant attacks.
some Spectre v2 variant attacks. The BHB can still influence the choice of
indirect branch predictor entry, and although branch predictor entries are
isolated between modes when eIBRS is enabled, the BHB itself is not isolated
between modes. Systems which support BHI_DIS_S will set it to protect against
BHI attacks.
On Intel's enhanced IBRS systems, this includes cross-thread branch target
injections on SMT systems (STIBP). In other words, Intel eIBRS enables
@ -638,6 +658,18 @@ kernel command line.
spectre_v2=off. Spectre variant 1 mitigations
cannot be disabled.
spectre_bhi=
[X86] Control mitigation of Branch History Injection
(BHI) vulnerability. This setting affects the deployment
of the HW BHI control and the SW BHB clearing sequence.
on
(default) Enable the HW or SW mitigation as
needed.
off
Disable the mitigation.
For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
Mitigation selection guide

View File

@ -3444,6 +3444,7 @@
retbleed=off [X86]
spec_rstack_overflow=off [X86]
spec_store_bypass_disable=off [X86,PPC]
spectre_bhi=off [X86]
spectre_v2_user=off [X86]
srbds=off [X86,INTEL]
ssbd=force-off [ARM64]
@ -6063,6 +6064,15 @@
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/admin-guide/laptops/sonypi.rst
spectre_bhi= [X86] Control mitigation of Branch History Injection
(BHI) vulnerability. This setting affects the
deployment of the HW BHI control and the SW BHB
clearing sequence.
on - (default) Enable the HW or SW mitigation
as needed.
off - Disable the mitigation.
spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
The default operation protects the kernel from

View File

@ -2633,6 +2633,16 @@ config MITIGATION_RFDS
stored in floating point, vector and integer registers.
See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
config MITIGATION_SPECTRE_BHI
bool "Mitigate Spectre-BHB (Branch History Injection)"
depends on CPU_SUP_INTEL
default y
help
Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
where the branch history buffer is poisoned to speculatively steer
indirect branches.
See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
endif
config ARCH_HAS_ADD_PAGES

View File

@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
if (likely(unr < NR_syscalls)) {
unr = array_index_nospec(unr, NR_syscalls);
regs->ax = sys_call_table[unr](regs);
regs->ax = x64_sys_call(regs, unr);
return true;
}
return false;
@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
xnr = array_index_nospec(xnr, X32_NR_syscalls);
regs->ax = x32_sys_call_table[xnr](regs);
regs->ax = x32_sys_call(regs, xnr);
return true;
}
return false;
@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
if (likely(unr < IA32_NR_syscalls)) {
unr = array_index_nospec(unr, IA32_NR_syscalls);
regs->ax = ia32_sys_call_table[unr](regs);
regs->ax = ia32_sys_call(regs, unr);
} else if (nr != -1) {
regs->ax = __ia32_sys_ni_syscall(regs);
}
@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void)
}
/**
* int80_emulation - 32-bit legacy syscall entry
* do_int80_emulation - 32-bit legacy syscall C entry from asm
*
* This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in
@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void)
* eax: system call number
* ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
*/
DEFINE_IDTENTRY_RAW(int80_emulation)
__visible noinstr void do_int80_emulation(struct pt_regs *regs)
{
int nr;

View File

@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
/* clobbers %rax, make sure it is after saving the syscall nr */
IBRS_ENTER
UNTRAIN_RET
CLEAR_BRANCH_HISTORY
call do_syscall_64 /* returns with IRQs disabled */
@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
call make_task_dead
SYM_CODE_END(rewind_stack_and_make_dead)
.popsection
/*
* This sequence executes branches in order to remove user branch information
* from the branch history tracker in the Branch Predictor, therefore removing
* user influence on subsequent BTB lookups.
*
* It should be used on parts prior to Alder Lake. Newer parts should use the
* BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
* virtualized on newer hardware the VMM should protect against BHI attacks by
* setting BHI_DIS_S for the guests.
*
* CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
* and not clearing the branch history. The call tree looks like:
*
* call 1
* call 2
* call 2
* call 2
* call 2
* call 2
* ret
* ret
* ret
* ret
* ret
* ret
*
* This means that the stack is non-constant and ORC can't unwind it with %rsp
* alone. Therefore we unconditionally set up the frame pointer, which allows
* ORC to unwind properly.
*
* The alignment is for performance and not for safety, and may be safely
* refactored in the future if needed.
*/
SYM_FUNC_START(clear_bhb_loop)
push %rbp
mov %rsp, %rbp
movl $5, %ecx
ANNOTATE_INTRA_FUNCTION_CALL
call 1f
jmp 5f
.align 64, 0xcc
ANNOTATE_INTRA_FUNCTION_CALL
1: call 2f
RET
.align 64, 0xcc
2: movl $5, %eax
3: jmp 4f
nop
4: sub $1, %eax
jnz 3b
sub $1, %ecx
jnz 1b
RET
5: lfence
pop %rbp
RET
SYM_FUNC_END(clear_bhb_loop)
EXPORT_SYMBOL_GPL(clear_bhb_loop)
STACK_FRAME_NON_STANDARD(clear_bhb_loop)

View File

@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER
UNTRAIN_RET
CLEAR_BRANCH_HISTORY
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER
UNTRAIN_RET
CLEAR_BRANCH_HISTORY
movq %rsp, %rdi
call do_fast_syscall_32
@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
int3
SYM_CODE_END(entry_SYSCALL_compat)
/*
* int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
* point to C routines, however since this is a system call interface the branch
* history needs to be scrubbed to protect against BHI attacks, and that
* scrubbing needs to take place in assembly code prior to entering any C
* routines.
*/
SYM_CODE_START(int80_emulation)
ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
CLEAR_BRANCH_HISTORY
jmp do_int80_emulation
SYM_CODE_END(int80_emulation)

View File

@ -18,8 +18,25 @@
#include <asm/syscalls_32.h>
#undef __SYSCALL
/*
* The sys_call_table[] is no longer used for system calls, but
* kernel/trace/trace_syscalls.c still wants to know the system
* call address.
*/
#ifdef CONFIG_X86_32
#define __SYSCALL(nr, sym) __ia32_##sym,
__visible const sys_call_ptr_t ia32_sys_call_table[] = {
const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_32.h>
};
#undef __SYSCALL
#endif
#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
{
switch (nr) {
#include <asm/syscalls_32.h>
default: return __ia32_sys_ni_syscall(regs);
}
};

View File

@ -11,8 +11,23 @@
#include <asm/syscalls_64.h>
#undef __SYSCALL
/*
* The sys_call_table[] is no longer used for system calls, but
* kernel/trace/trace_syscalls.c still wants to know the system
* call address.
*/
#define __SYSCALL(nr, sym) __x64_##sym,
asmlinkage const sys_call_ptr_t sys_call_table[] = {
const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h>
};
#undef __SYSCALL
#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
{
switch (nr) {
#include <asm/syscalls_64.h>
default: return __x64_sys_ni_syscall(regs);
}
};

View File

@ -11,8 +11,12 @@
#include <asm/syscalls_x32.h>
#undef __SYSCALL
#define __SYSCALL(nr, sym) __x64_##sym,
#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
#include <asm/syscalls_x32.h>
long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
{
switch (nr) {
#include <asm/syscalls_x32.h>
default: return __x64_sys_ni_syscall(regs);
}
};

View File

@ -13,6 +13,7 @@
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/hardirq.h>
#include <asm/io.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
@ -98,7 +99,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
static inline u32 native_apic_mem_read(u32 reg)
{
return *((volatile u32 *)(APIC_BASE + reg));
return readl((void __iomem *)(APIC_BASE + reg));
}
static inline void native_apic_mem_eoi(void)

View File

@ -461,11 +461,15 @@
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
* CPUID levels like 0x80000022, etc.
* CPUID levels like 0x80000022, etc and Linux defined features.
*
* Reuse free bits when adding new feature flags!
*/
#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
/*
* BUG word(s)
@ -515,4 +519,5 @@
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
#endif /* _ASM_X86_CPUFEATURES_H */

View File

@ -61,10 +61,13 @@
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
/* A mask for bits which the kernel toggles when controlling mitigations */
#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
| SPEC_CTRL_RRSBA_DIS_S)
| SPEC_CTRL_RRSBA_DIS_S \
| SPEC_CTRL_BHI_DIS_S)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@ -163,6 +166,10 @@
* are restricted to targets in
* kernel.
*/
#define ARCH_CAP_BHI_NO BIT(20) /*
* CPU is not affected by Branch
* History Injection.
*/
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.

View File

@ -326,6 +326,19 @@
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
.endm
#ifdef CONFIG_X86_64
.macro CLEAR_BRANCH_HISTORY
ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
.endm
.macro CLEAR_BRANCH_HISTORY_VMEXIT
ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
.endm
#else
#define CLEAR_BRANCH_HISTORY
#define CLEAR_BRANCH_HISTORY_VMEXIT
#endif
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
@ -368,6 +381,10 @@ extern void srso_alias_return_thunk(void);
extern void entry_untrain_ret(void);
extern void entry_ibpb(void);
#ifdef CONFIG_X86_64
extern void clear_bhb_loop(void);
#endif
extern void (*x86_return_thunk)(void);
extern void __warn_thunk(void);

View File

@ -16,19 +16,17 @@
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
/* This is used purely for kernel/trace/trace_syscalls.c */
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
extern const sys_call_ptr_t sys_call_table[];
#if defined(CONFIG_X86_32)
#define ia32_sys_call_table sys_call_table
#else
/*
* These may not exist, but still put the prototypes in so we
* can use IS_ENABLED().
*/
extern const sys_call_ptr_t ia32_sys_call_table[];
extern const sys_call_ptr_t x32_sys_call_table[];
#endif
extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
}
bool do_syscall_64(struct pt_regs *regs, int nr);
void do_int80_emulation(struct pt_regs *regs);
#endif /* CONFIG_X86_32 */

View File

@ -1687,11 +1687,11 @@ static int x2apic_state;
static bool x2apic_hw_locked(void)
{
u64 ia32_cap;
u64 x86_arch_cap_msr;
u64 msr;
ia32_cap = x86_read_arch_cap_msr();
if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) {
x86_arch_cap_msr = x86_read_arch_cap_msr();
if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) {
rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
return (msr & LEGACY_XAPIC_DISABLED);
}

View File

@ -536,7 +536,6 @@ clear_sev:
static void early_init_amd(struct cpuinfo_x86 *c)
{
u64 value;
u32 dummy;
if (c->x86 >= 0xf)
@ -604,20 +603,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
early_detect_mem_encrypt(c);
/* Re-enable TopologyExtensions if switched off by BIOS */
if (c->x86 == 0x15 &&
(c->x86_model >= 0x10 && c->x86_model <= 0x6f) &&
!cpu_has(c, X86_FEATURE_TOPOEXT)) {
if (msr_set_bit(0xc0011005, 54) > 0) {
rdmsrl(0xc0011005, value);
if (value & BIT_64(54)) {
set_cpu_cap(c, X86_FEATURE_TOPOEXT);
pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
}
}
}
if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) {
if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB))
setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);

View File

@ -61,6 +61,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
EXPORT_SYMBOL_GPL(x86_pred_cmd);
static u64 __ro_after_init x86_arch_cap_msr;
static DEFINE_MUTEX(spec_ctrl_mutex);
void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
@ -144,6 +146,8 @@ void __init cpu_select_mitigations(void)
x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
}
x86_arch_cap_msr = x86_read_arch_cap_msr();
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
@ -301,8 +305,6 @@ static const char * const taa_strings[] = {
static void __init taa_select_mitigation(void)
{
u64 ia32_cap;
if (!boot_cpu_has_bug(X86_BUG_TAA)) {
taa_mitigation = TAA_MITIGATION_OFF;
return;
@ -341,9 +343,8 @@ static void __init taa_select_mitigation(void)
* On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
* update is required.
*/
ia32_cap = x86_read_arch_cap_msr();
if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) &&
!(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))
taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
/*
@ -401,8 +402,6 @@ static const char * const mmio_strings[] = {
static void __init mmio_select_mitigation(void)
{
u64 ia32_cap;
if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
cpu_mitigations_off()) {
@ -413,8 +412,6 @@ static void __init mmio_select_mitigation(void)
if (mmio_mitigation == MMIO_MITIGATION_OFF)
return;
ia32_cap = x86_read_arch_cap_msr();
/*
* Enable CPU buffer clear mitigation for host and VMM, if also affected
* by MDS or TAA. Otherwise, enable mitigation for VMM only.
@ -437,7 +434,7 @@ static void __init mmio_select_mitigation(void)
* be propagated to uncore buffers, clearing the Fill buffers on idle
* is required irrespective of SMT state.
*/
if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
static_branch_enable(&mds_idle_clear);
/*
@ -447,10 +444,10 @@ static void __init mmio_select_mitigation(void)
* FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
* affected systems.
*/
if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
(boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
!(ia32_cap & ARCH_CAP_MDS_NO)))
!(x86_arch_cap_msr & ARCH_CAP_MDS_NO)))
mmio_mitigation = MMIO_MITIGATION_VERW;
else
mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
@ -508,7 +505,7 @@ static void __init rfds_select_mitigation(void)
if (rfds_mitigation == RFDS_MITIGATION_OFF)
return;
if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR)
if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
else
rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
@ -659,8 +656,6 @@ void update_srbds_msr(void)
static void __init srbds_select_mitigation(void)
{
u64 ia32_cap;
if (!boot_cpu_has_bug(X86_BUG_SRBDS))
return;
@ -669,8 +664,7 @@ static void __init srbds_select_mitigation(void)
* are only exposed to SRBDS when TSX is enabled or when CPU is affected
* by Processor MMIO Stale Data vulnerability.
*/
ia32_cap = x86_read_arch_cap_msr();
if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
@ -813,7 +807,7 @@ static void __init gds_select_mitigation(void)
/* Will verify below that mitigation _can_ be disabled */
/* No microcode */
if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) {
if (gds_mitigation == GDS_MITIGATION_FORCE) {
/*
* This only needs to be done on the boot CPU so do it
@ -1544,20 +1538,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
return SPECTRE_V2_RETPOLINE;
}
static bool __ro_after_init rrsba_disabled;
/* Disable in-kernel use of non-RSB RET predictors */
static void __init spec_ctrl_disable_kernel_rrsba(void)
{
u64 ia32_cap;
if (rrsba_disabled)
return;
if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) {
rrsba_disabled = true;
return;
}
if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
return;
ia32_cap = x86_read_arch_cap_msr();
if (ia32_cap & ARCH_CAP_RRSBA) {
x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
update_spec_ctrl(x86_spec_ctrl_base);
}
x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
update_spec_ctrl(x86_spec_ctrl_base);
rrsba_disabled = true;
}
static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
@ -1607,6 +1606,73 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
dump_stack();
}
/*
* Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
* branch history in userspace. Not needed if BHI_NO is set.
*/
static bool __init spec_ctrl_bhi_dis(void)
{
if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
return false;
x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
update_spec_ctrl(x86_spec_ctrl_base);
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
return true;
}
enum bhi_mitigations {
BHI_MITIGATION_OFF,
BHI_MITIGATION_ON,
};
static enum bhi_mitigations bhi_mitigation __ro_after_init =
IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF;
static int __init spectre_bhi_parse_cmdline(char *str)
{
if (!str)
return -EINVAL;
if (!strcmp(str, "off"))
bhi_mitigation = BHI_MITIGATION_OFF;
else if (!strcmp(str, "on"))
bhi_mitigation = BHI_MITIGATION_ON;
else
pr_err("Ignoring unknown spectre_bhi option (%s)", str);
return 0;
}
early_param("spectre_bhi", spectre_bhi_parse_cmdline);
static void __init bhi_select_mitigation(void)
{
if (bhi_mitigation == BHI_MITIGATION_OFF)
return;
/* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
spec_ctrl_disable_kernel_rrsba();
if (rrsba_disabled)
return;
}
if (spec_ctrl_bhi_dis())
return;
if (!IS_ENABLED(CONFIG_X86_64))
return;
/* Mitigate KVM by default */
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
/* Mitigate syscalls when the mitigation is forced =on */
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
}
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@ -1718,6 +1784,9 @@ static void __init spectre_v2_select_mitigation(void)
mode == SPECTRE_V2_RETPOLINE)
spec_ctrl_disable_kernel_rrsba();
if (boot_cpu_has(X86_BUG_BHI))
bhi_select_mitigation();
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
@ -1832,8 +1901,6 @@ static void update_indir_branch_cond(void)
/* Update the static key controlling the MDS CPU buffer clear in idle */
static void update_mds_branch_idle(void)
{
u64 ia32_cap = x86_read_arch_cap_msr();
/*
* Enable the idle clearing if SMT is active on CPUs which are
* affected only by MSBDS and not any other MDS variant.
@ -1848,7 +1915,7 @@ static void update_mds_branch_idle(void)
if (sched_smt_active()) {
static_branch_enable(&mds_idle_clear);
} else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
(ia32_cap & ARCH_CAP_FBSDP_NO)) {
(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) {
static_branch_disable(&mds_idle_clear);
}
}
@ -2695,15 +2762,15 @@ static char *stibp_state(void)
switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
return ", STIBP: disabled";
return "; STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
return ", STIBP: forced";
return "; STIBP: forced";
case SPECTRE_V2_USER_STRICT_PREFERRED:
return ", STIBP: always-on";
return "; STIBP: always-on";
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (static_key_enabled(&switch_to_cond_stibp))
return ", STIBP: conditional";
return "; STIBP: conditional";
}
return "";
}
@ -2712,10 +2779,10 @@ static char *ibpb_state(void)
{
if (boot_cpu_has(X86_FEATURE_IBPB)) {
if (static_key_enabled(&switch_mm_always_ibpb))
return ", IBPB: always-on";
return "; IBPB: always-on";
if (static_key_enabled(&switch_mm_cond_ibpb))
return ", IBPB: conditional";
return ", IBPB: disabled";
return "; IBPB: conditional";
return "; IBPB: disabled";
}
return "";
}
@ -2725,14 +2792,30 @@ static char *pbrsb_eibrs_state(void)
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
return ", PBRSB-eIBRS: SW sequence";
return "; PBRSB-eIBRS: SW sequence";
else
return ", PBRSB-eIBRS: Vulnerable";
return "; PBRSB-eIBRS: Vulnerable";
} else {
return ", PBRSB-eIBRS: Not affected";
return "; PBRSB-eIBRS: Not affected";
}
}
static const char *spectre_bhi_state(void)
{
if (!boot_cpu_has_bug(X86_BUG_BHI))
return "; BHI: Not affected";
else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
return "; BHI: BHI_DIS_S";
else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
return "; BHI: SW loop, KVM: SW loop";
else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && rrsba_disabled)
return "; BHI: Retpoline";
else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
return "; BHI: Vulnerable, KVM: SW loop";
return "; BHI: Vulnerable";
}
static ssize_t spectre_v2_show_state(char *buf)
{
if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
@ -2745,13 +2828,15 @@ static ssize_t spectre_v2_show_state(char *buf)
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
stibp_state(),
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
pbrsb_eibrs_state(),
spectre_bhi_state(),
/* this should always be at the end */
spectre_v2_module_string());
}

View File

@ -1117,6 +1117,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_SPECTRE_V2 BIT(8)
#define NO_MMIO BIT(9)
#define NO_EIBRS_PBRSB BIT(10)
#define NO_BHI BIT(11)
#define VULNWL(vendor, family, model, whitelist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@ -1179,18 +1180,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
/* AMD Family 0xf - 0x12 */
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
/* Zhaoxin Family 7 */
VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
{}
};
@ -1280,25 +1281,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi
u64 x86_read_arch_cap_msr(void)
{
u64 ia32_cap = 0;
u64 x86_arch_cap_msr = 0;
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);
return ia32_cap;
return x86_arch_cap_msr;
}
static bool arch_cap_mmio_immune(u64 ia32_cap)
static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr)
{
return (ia32_cap & ARCH_CAP_FBSDP_NO &&
ia32_cap & ARCH_CAP_PSDP_NO &&
ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO &&
x86_arch_cap_msr & ARCH_CAP_PSDP_NO &&
x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO);
}
static bool __init vulnerable_to_rfds(u64 ia32_cap)
static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
{
/* The "immunity" bit trumps everything else: */
if (ia32_cap & ARCH_CAP_RFDS_NO)
if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO)
return false;
/*
@ -1306,7 +1307,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
* indicate that mitigation is needed because guest is running on a
* vulnerable hardware or may migrate to such hardware:
*/
if (ia32_cap & ARCH_CAP_RFDS_CLEAR)
if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
return true;
/* Only consult the blacklist when there is no enumeration: */
@ -1315,11 +1316,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = x86_read_arch_cap_msr();
u64 x86_arch_cap_msr = x86_read_arch_cap_msr();
/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
!(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
!(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO))
setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
@ -1331,7 +1332,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
!(ia32_cap & ARCH_CAP_SSB_NO) &&
!(x86_arch_cap_msr & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
@ -1342,17 +1343,17 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Don't use AutoIBRS when SNP is enabled because it degrades host
* userspace indirect branch performance.
*/
if ((ia32_cap & ARCH_CAP_IBRS_ALL) ||
if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) ||
(cpu_has(c, X86_FEATURE_AUTOIBRS) &&
!cpu_feature_enabled(X86_FEATURE_SEV_SNP))) {
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
!(ia32_cap & ARCH_CAP_PBRSB_NO))
!(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
}
if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
!(ia32_cap & ARCH_CAP_MDS_NO)) {
!(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) {
setup_force_cpu_bug(X86_BUG_MDS);
if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
@ -1371,9 +1372,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* TSX_CTRL check alone is not sufficient for cases when the microcode
* update is not present or running as guest that don't get TSX_CTRL.
*/
if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) &&
(cpu_has(c, X86_FEATURE_RTM) ||
(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)))
setup_force_cpu_bug(X86_BUG_TAA);
/*
@ -1399,7 +1400,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
* nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
*/
if (!arch_cap_mmio_immune(ia32_cap)) {
if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
if (cpu_matches(cpu_vuln_blacklist, MMIO))
setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
@ -1407,7 +1408,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
}
if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA))
setup_force_cpu_bug(X86_BUG_RETBLEED);
}
@ -1425,18 +1426,25 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* disabling AVX2. The only way to do this in HW is to clear XCR0[2],
* which means that AVX will be disabled.
*/
if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) &&
boot_cpu_has(X86_FEATURE_AVX))
setup_force_cpu_bug(X86_BUG_GDS);
if (vulnerable_to_rfds(ia32_cap))
if (vulnerable_to_rfds(x86_arch_cap_msr))
setup_force_cpu_bug(X86_BUG_RFDS);
/* When virtualized, eIBRS could be hidden, assume vulnerable */
if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
!cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
(boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
boot_cpu_has(X86_FEATURE_HYPERVISOR)))
setup_force_cpu_bug(X86_BUG_BHI);
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
/* Rogue Data Cache Load? No! */
if (ia32_cap & ARCH_CAP_RDCL_NO)
if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO)
return;
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);

View File

@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
{ X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },

View File

@ -123,7 +123,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
#endif
set_cpu_possible(cpu, true);
set_cpu_present(cpu, true);
}
@ -210,7 +209,11 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
topo_info.nr_disabled_cpus++;
}
/* Register present and possible CPUs in the domain maps */
/*
* Register present and possible CPUs in the domain
* maps. cpu_possible_map will be updated in
* topology_init_possible_cpus() after enumeration is done.
*/
for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
}

View File

@ -29,11 +29,21 @@ static bool parse_8000_0008(struct topo_scan *tscan)
if (!sft)
sft = get_count_order(ecx.cpu_nthreads + 1);
topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1);
/*
* cpu_nthreads describes the number of threads in the package
* sft is the number of APIC ID bits per package
*
* As the number of actual threads per core is not described in
* this leaf, just set the CORE domain shift and let the later
* parsers set SMT shift. Assume one thread per core by default
* which is correct if there are no other CPUID leafs to parse.
*/
topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1);
topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1);
return true;
}
static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id)
static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id)
{
/*
* Starting with Fam 17h the DIE domain could probably be used to
@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_topoext)
tscan->c->topo.initial_apicid = leaf.ext_apic_id;
/*
* If leaf 0xb is available, then SMT shift is set already. If not
* take it from ecx.threads_per_core and use topo_update_dom() -
* topology_set_dom() would propagate and overwrite the already
* propagated CORE level.
* If leaf 0xb is available, then the domain shifts are set
* already and nothing to do here.
*/
if (!has_topoext) {
/*
* Leaf 0x80000008 set the CORE domain shift already.
* Update the SMT domain, but do not propagate it.
*/
unsigned int nthreads = leaf.core_nthreads + 1;
topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads);
@ -109,13 +121,13 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_topoext)
static bool parse_fam10h_node_id(struct topo_scan *tscan)
{
struct {
union {
union {
struct {
u64 node_id : 3,
nodes_per_pkg : 3,
unused : 58;
u64 msr;
};
u64 msr;
} nid;
if (!boot_cpu_has(X86_FEATURE_NODEID_MSR))
@ -135,6 +147,26 @@ static void legacy_set_llc(struct topo_scan *tscan)
tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN];
}
static void topoext_fixup(struct topo_scan *tscan)
{
struct cpuinfo_x86 *c = tscan->c;
u64 msrval;
/* Try to re-enable TopologyExtensions if switched off by BIOS */
if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD ||
c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f)
return;
if (msr_set_bit(0xc0011005, 54) <= 0)
return;
rdmsrl(0xc0011005, msrval);
if (msrval & BIT_64(54)) {
set_cpu_cap(c, X86_FEATURE_TOPOEXT);
pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
}
}
static void parse_topology_amd(struct topo_scan *tscan)
{
bool has_topoext = false;
@ -167,6 +199,7 @@ static void parse_topology_amd(struct topo_scan *tscan)
void cpu_parse_topology_amd(struct topo_scan *tscan)
{
tscan->amd_nodes_per_pkg = 1;
topoext_fixup(tscan);
parse_topology_amd(tscan);
if (tscan->amd_nodes_per_pkg > 1)

View File

@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs {
#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
/* CPUID level 0x80000007 (EDX). */
@ -128,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
default:
return x86_feature;
}

View File

@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
call vmx_spec_ctrl_restore_host
CLEAR_BRANCH_HISTORY_VMEXIT
/* Put return value in AX */
mov %_ASM_BX, %_ASM_AX

View File

@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
static u64 kvm_get_arch_capabilities(void)
{

View File

@ -25,6 +25,7 @@
#include <subdev/bios.h>
#include <subdev/bios/init.h>
#include <subdev/gsp.h>
void
gm107_devinit_disable(struct nvkm_devinit *init)
@ -33,10 +34,13 @@ gm107_devinit_disable(struct nvkm_devinit *init)
u32 r021c00 = nvkm_rd32(device, 0x021c00);
u32 r021c04 = nvkm_rd32(device, 0x021c04);
if (r021c00 & 0x00000001)
nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
if (r021c00 & 0x00000004)
nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
/* gsp only wants to enable/disable display */
if (!nvkm_gsp_rm(device->gsp)) {
if (r021c00 & 0x00000001)
nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
if (r021c00 & 0x00000004)
nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
}
if (r021c04 & 0x00000001)
nvkm_subdev_disable(device, NVKM_ENGINE_DISP, 0);
}

View File

@ -41,6 +41,7 @@ r535_devinit_new(const struct nvkm_devinit_func *hw,
rm->dtor = r535_devinit_dtor;
rm->post = hw->post;
rm->disable = hw->disable;
ret = nv50_devinit_new_(rm, device, type, inst, pdevinit);
if (ret)

View File

@ -1133,6 +1133,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
if (ret)
return ret;
ret = btrfs_record_root_in_trans(trans, node->root);
if (ret)
return ret;
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
return ret;
}

View File

@ -2533,7 +2533,7 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
*/
if (bits & EXTENT_CLEAR_META_RESV &&
root != fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, len, false);
btrfs_delalloc_release_metadata(inode, len, true);
/* For sanity tests. */
if (btrfs_is_testing(fs_info))
@ -4503,6 +4503,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
struct btrfs_trans_handle *trans;
struct btrfs_block_rsv block_rsv;
u64 root_flags;
u64 qgroup_reserved = 0;
int ret;
down_write(&fs_info->subvol_sem);
@ -4547,12 +4548,20 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
if (ret)
goto out_undead;
qgroup_reserved = block_rsv.qgroup_rsv_reserved;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_release;
}
ret = btrfs_record_root_in_trans(trans, root);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
qgroup_reserved = 0;
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
@ -4611,7 +4620,9 @@ out_end_trans:
ret = btrfs_end_transaction(trans);
inode->i_flags |= S_DEAD;
out_release:
btrfs_subvolume_release_metadata(root, &block_rsv);
btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
if (qgroup_reserved)
btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
out_undead:
if (ret) {
spin_lock(&dest->root_item_lock);

View File

@ -613,6 +613,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
int ret;
dev_t anon_dev;
u64 objectid;
u64 qgroup_reserved = 0;
root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
if (!root_item)
@ -650,13 +651,18 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
trans_num_items, false);
if (ret)
goto out_new_inode_args;
qgroup_reserved = block_rsv.qgroup_rsv_reserved;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
btrfs_subvolume_release_metadata(root, &block_rsv);
goto out_new_inode_args;
goto out_release_rsv;
}
ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
if (ret)
goto out;
btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
qgroup_reserved = 0;
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
/* Tree log can't currently deal with an inode which is a new root. */
@ -767,9 +773,11 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
out:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
btrfs_subvolume_release_metadata(root, &block_rsv);
btrfs_end_transaction(trans);
out_release_rsv:
btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
if (qgroup_reserved)
btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
out_new_inode_args:
btrfs_new_inode_args_destroy(&new_inode_args);
out_inode:
@ -791,6 +799,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct btrfs_pending_snapshot *pending_snapshot;
unsigned int trans_num_items;
struct btrfs_trans_handle *trans;
struct btrfs_block_rsv *block_rsv;
u64 qgroup_reserved = 0;
int ret;
/* We do not support snapshotting right now. */
@ -827,19 +837,19 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
goto free_pending;
}
btrfs_init_block_rsv(&pending_snapshot->block_rsv,
BTRFS_BLOCK_RSV_TEMP);
block_rsv = &pending_snapshot->block_rsv;
btrfs_init_block_rsv(block_rsv, BTRFS_BLOCK_RSV_TEMP);
/*
* 1 to add dir item
* 1 to add dir index
* 1 to update parent inode item
*/
trans_num_items = create_subvol_num_items(inherit) + 3;
ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
&pending_snapshot->block_rsv,
ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, block_rsv,
trans_num_items, false);
if (ret)
goto free_pending;
qgroup_reserved = block_rsv->qgroup_rsv_reserved;
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
@ -852,6 +862,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
ret = PTR_ERR(trans);
goto fail;
}
ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
if (ret) {
btrfs_end_transaction(trans);
goto fail;
}
btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
qgroup_reserved = 0;
trans->pending_snapshot = pending_snapshot;
@ -881,7 +898,9 @@ fail:
if (ret && pending_snapshot->snap)
pending_snapshot->snap->anon_dev = 0;
btrfs_put_root(pending_snapshot->snap);
btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv);
btrfs_block_rsv_release(fs_info, block_rsv, (u64)-1, NULL);
if (qgroup_reserved)
btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
free_pending:
if (pending_snapshot->anon_dev)
free_anon_bdev(pending_snapshot->anon_dev);

View File

@ -4495,6 +4495,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
BTRFS_QGROUP_RSV_META_PREALLOC);
trace_qgroup_meta_convert(root, num_bytes);
qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes);
if (!sb_rdonly(fs_info->sb))
add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS);
}
/*

View File

@ -548,13 +548,3 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
}
return ret;
}
void btrfs_subvolume_release_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv)
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 qgroup_to_release;
btrfs_block_rsv_release(fs_info, rsv, (u64)-1, &qgroup_to_release);
btrfs_qgroup_convert_reserved_meta(root, qgroup_to_release);
}

View File

@ -18,8 +18,6 @@ struct btrfs_trans_handle;
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int nitems, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
u64 ref_id, u64 dirid, u64 sequence,
const struct fscrypt_str *name);

View File

@ -745,14 +745,6 @@ again:
h->reloc_reserved = reloc_reserved;
}
/*
* Now that we have found a transaction to be a part of, convert the
* qgroup reservation from prealloc to pertrans. A different transaction
* can't race in and free our pertrans out from under us.
*/
if (qgroup_reserved)
btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
got_it:
if (!current->journal_info)
current->journal_info = h;
@ -786,8 +778,15 @@ got_it:
* not just freed.
*/
btrfs_end_transaction(h);
return ERR_PTR(ret);
goto reserve_fail;
}
/*
* Now that we have found a transaction to be a part of, convert the
* qgroup reservation from prealloc to pertrans. A different transaction
* can't race in and free our pertrans out from under us.
*/
if (qgroup_reserved)
btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
return h;
@ -1495,6 +1494,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
radix_tree_tag_clear(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
btrfs_qgroup_free_meta_all_pertrans(root);
spin_unlock(&fs_info->fs_roots_radix_lock);
btrfs_free_log(trans, root);
@ -1519,7 +1519,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
if (ret2)
return ret2;
spin_lock(&fs_info->fs_roots_radix_lock);
btrfs_qgroup_free_meta_all_pertrans(root);
}
}
spin_unlock(&fs_info->fs_roots_radix_lock);

View File

@ -268,7 +268,7 @@ static inline void *offset_to_ptr(const int *off)
* - When one operand is a null pointer constant (i.e. when x is an integer
* constant expression) and the other is an object pointer (i.e. our
* third operand), the conditional operator returns the type of the
* object pointer operand (i.e. "int *). Here, within the sizeof(), we
* object pointer operand (i.e. "int *"). Here, within the sizeof(), we
* would then get:
* sizeof(*((int *)(...)) == sizeof(int) == 4
* - When one operand is a void pointer (i.e. when x is not an integer

View File

@ -2,6 +2,8 @@
#ifndef __LINUX_GFP_TYPES_H
#define __LINUX_GFP_TYPES_H
#include <linux/bits.h>
/* The typedef is in types.h but we want the documentation here */
#if 0
/**

View File

@ -3207,7 +3207,8 @@ enum cpu_mitigations {
};
static enum cpu_mitigations cpu_mitigations __ro_after_init =
CPU_MITIGATIONS_AUTO;
IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO :
CPU_MITIGATIONS_OFF;
static int __init mitigations_parse_cmdline(char *arg)
{

View File

@ -8,6 +8,7 @@
#include <linux/build_bug.h>
#include <linux/compiler.h>
#include <linux/math.h>
#include <linux/panic.h>
#include <endian.h>
#include <byteswap.h>

View File

@ -37,4 +37,9 @@ static inline void totalram_pages_add(long count)
{
}
static inline int early_pfn_to_nid(unsigned long pfn)
{
return 0;
}
#endif

View File

@ -0,0 +1,19 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _TOOLS_LINUX_PANIC_H
#define _TOOLS_LINUX_PANIC_H
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
static inline void panic(const char *fmt, ...)
{
va_list argp;
va_start(argp, fmt);
vfprintf(stderr, fmt, argp);
va_end(argp);
exit(-1);
}
#endif