6d991ba509
The seccomp speculation control operates on all tasks of a process, but only the current task of a process can update the MSR immediately. For the other threads the update is deferred to the next context switch. This creates the following situation with Process A and B: Process A task 2 and Process B task 1 are pinned on CPU1. Process A task 2 does not have the speculation control TIF bit set. Process B task 1 has the speculation control TIF bit set. CPU0 CPU1 MSR bit is set ProcB.T1 schedules out ProcA.T2 schedules in MSR bit is cleared ProcA.T1 seccomp_update() set TIF bit on ProcA.T2 ProcB.T1 schedules in MSR is not updated <-- FAIL This happens because the context switch code tries to avoid the MSR update if the speculation control TIF bits of the incoming and the outgoing task are the same. In the worst case ProcB.T1 and ProcA.T2 are the only tasks scheduling back and forth on CPU1, which keeps the MSR stale forever. In theory this could be remedied by IPIs, but chasing the remote task which could be migrated is complex and full of races. The straight forward solution is to avoid the asychronous update of the TIF bit and defer it to the next context switch. The speculation control state is stored in task_struct::atomic_flags by the prctl and seccomp updates already. Add a new TIF_SPEC_FORCE_UPDATE bit and set this after updating the atomic_flags. Check the bit on context switch and force a synchronous update of the speculation control if set. Use the same mechanism for updating the current task. Reported-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Tom Lendacky <thomas.lendacky@amd.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: David Woodhouse <dwmw@amazon.co.uk> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Casey Schaufler <casey.schaufler@intel.com> Cc: Asit Mallick <asit.k.mallick@intel.com> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Jon Masters <jcm@redhat.com> Cc: Waiman Long <longman9394@gmail.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: Dave Stewart <david.c.stewart@intel.com> Cc: Kees Cook <keescook@chromium.org> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1811272247140.1875@nanos.tec.linutronix.de
89 lines
2.8 KiB
C
89 lines
2.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_X86_SPECCTRL_H_
|
|
#define _ASM_X86_SPECCTRL_H_
|
|
|
|
#include <linux/thread_info.h>
|
|
#include <asm/nospec-branch.h>
|
|
|
|
/*
|
|
* On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
|
|
* the guest has, while on VMEXIT we restore the host view. This
|
|
* would be easier if SPEC_CTRL were architecturally maskable or
|
|
* shadowable for guests but this is not (currently) the case.
|
|
* Takes the guest view of SPEC_CTRL MSR as a parameter and also
|
|
* the guest's version of VIRT_SPEC_CTRL, if emulated.
|
|
*/
|
|
extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
|
|
|
|
/**
|
|
* x86_spec_ctrl_set_guest - Set speculation control registers for the guest
|
|
* @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
|
|
* @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
|
|
* (may get translated to MSR_AMD64_LS_CFG bits)
|
|
*
|
|
* Avoids writing to the MSR if the content/bits are the same
|
|
*/
|
|
static inline
|
|
void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
|
|
{
|
|
x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
|
|
}
|
|
|
|
/**
|
|
* x86_spec_ctrl_restore_host - Restore host speculation control registers
|
|
* @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
|
|
* @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
|
|
* (may get translated to MSR_AMD64_LS_CFG bits)
|
|
*
|
|
* Avoids writing to the MSR if the content/bits are the same
|
|
*/
|
|
static inline
|
|
void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
|
|
{
|
|
x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
|
|
}
|
|
|
|
/* AMD specific Speculative Store Bypass MSR data */
|
|
extern u64 x86_amd_ls_cfg_base;
|
|
extern u64 x86_amd_ls_cfg_ssbd_mask;
|
|
|
|
static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
|
|
{
|
|
BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
|
|
return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
|
|
}
|
|
|
|
static inline u64 stibp_tif_to_spec_ctrl(u64 tifn)
|
|
{
|
|
BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
|
|
return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
|
|
}
|
|
|
|
static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
|
|
{
|
|
BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
|
|
return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
|
|
}
|
|
|
|
static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl)
|
|
{
|
|
BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
|
|
return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
|
|
}
|
|
|
|
static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
|
|
{
|
|
return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
|
|
}
|
|
|
|
#ifdef CONFIG_SMP
|
|
extern void speculative_store_bypass_ht_init(void);
|
|
#else
|
|
static inline void speculative_store_bypass_ht_init(void) { }
|
|
#endif
|
|
|
|
extern void speculation_ctrl_update(unsigned long tif);
|
|
extern void speculation_ctrl_update_current(void);
|
|
|
|
#endif
|