6b28baca9b
When PTEs are set to PROT_NONE the kernel just clears the Present bit and preserves the PFN, which creates attack surface for L1TF speculation speculation attacks. This is important inside guests, because L1TF speculation bypasses physical page remapping. While the host has its own migitations preventing leaking data from other VMs into the guest, this would still risk leaking the wrong page inside the current guest. This uses the same technique as Linus' swap entry patch: while an entry is is in PROTNONE state invert the complete PFN part part of it. This ensures that the the highest bit will point to non existing memory. The invert is done by pte/pmd_modify and pfn/pmd/pud_pte for PROTNONE and pte/pmd/pud_pfn undo it. This assume that no code path touches the PFN part of a PTE directly without using these primitives. This doesn't handle the case that MMIO is on the top of the CPU physical memory. If such an MMIO region was exposed by an unpriviledged driver for mmap it would be possible to attack some real memory. However this situation is all rather unlikely. For 32bit non PAE the inversion is not done because there are really not enough bits to protect anything. Q: Why does the guest need to be protected when the HyperVisor already has L1TF mitigations? A: Here's an example: Physical pages 1 2 get mapped into a guest as GPA 1 -> PA 2 GPA 2 -> PA 1 through EPT. The L1TF speculation ignores the EPT remapping. Now the guest kernel maps GPA 1 to process A and GPA 2 to process B, and they belong to different users and should be isolated. A sets the GPA 1 PA 2 PTE to PROT_NONE to bypass the EPT remapping and gets read access to the underlying physical page. Which in this case points to PA 2, so it can read process B's data, if it happened to be in L1, so isolation inside the guest is broken. There's nothing the hypervisor can do about this. This mitigation has to be done in the guest itself. [ tglx: Massaged changelog ] Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Dave Hansen <dave.hansen@intel.com>
116 lines
2.7 KiB
C
116 lines
2.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_X86_PGTABLE_2LEVEL_H
|
|
#define _ASM_X86_PGTABLE_2LEVEL_H
|
|
|
|
#define pte_ERROR(e) \
|
|
pr_err("%s:%d: bad pte %08lx\n", __FILE__, __LINE__, (e).pte_low)
|
|
#define pgd_ERROR(e) \
|
|
pr_err("%s:%d: bad pgd %08lx\n", __FILE__, __LINE__, pgd_val(e))
|
|
|
|
/*
|
|
* Certain architectures need to do special things when PTEs
|
|
* within a page table are directly modified. Thus, the following
|
|
* hook is made available.
|
|
*/
|
|
static inline void native_set_pte(pte_t *ptep , pte_t pte)
|
|
{
|
|
*ptep = pte;
|
|
}
|
|
|
|
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
|
|
{
|
|
*pmdp = pmd;
|
|
}
|
|
|
|
static inline void native_set_pud(pud_t *pudp, pud_t pud)
|
|
{
|
|
}
|
|
|
|
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
|
|
{
|
|
native_set_pte(ptep, pte);
|
|
}
|
|
|
|
static inline void native_pmd_clear(pmd_t *pmdp)
|
|
{
|
|
native_set_pmd(pmdp, __pmd(0));
|
|
}
|
|
|
|
static inline void native_pud_clear(pud_t *pudp)
|
|
{
|
|
}
|
|
|
|
static inline void native_pte_clear(struct mm_struct *mm,
|
|
unsigned long addr, pte_t *xp)
|
|
{
|
|
*xp = native_make_pte(0);
|
|
}
|
|
|
|
#ifdef CONFIG_SMP
|
|
static inline pte_t native_ptep_get_and_clear(pte_t *xp)
|
|
{
|
|
return __pte(xchg(&xp->pte_low, 0));
|
|
}
|
|
#else
|
|
#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
|
|
#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
|
|
{
|
|
return __pmd(xchg((pmdval_t *)xp, 0));
|
|
}
|
|
#else
|
|
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
|
|
#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
static inline pud_t native_pudp_get_and_clear(pud_t *xp)
|
|
{
|
|
return __pud(xchg((pudval_t *)xp, 0));
|
|
}
|
|
#else
|
|
#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
|
|
#endif
|
|
|
|
/* Bit manipulation helper on pte/pgoff entry */
|
|
static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift,
|
|
unsigned long mask, unsigned int leftshift)
|
|
{
|
|
return ((value >> rightshift) & mask) << leftshift;
|
|
}
|
|
|
|
/* Encode and de-code a swap entry */
|
|
#define SWP_TYPE_BITS 5
|
|
#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
|
|
|
|
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
|
|
|
|
#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
|
|
& ((1U << SWP_TYPE_BITS) - 1))
|
|
#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT)
|
|
#define __swp_entry(type, offset) ((swp_entry_t) { \
|
|
((type) << (_PAGE_BIT_PRESENT + 1)) \
|
|
| ((offset) << SWP_OFFSET_SHIFT) })
|
|
#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
|
|
#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
|
|
|
|
/* No inverted PFNs on 2 level page tables */
|
|
|
|
static inline u64 protnone_mask(u64 val)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
|
|
{
|
|
return val;
|
|
}
|
|
|
|
static inline bool __pte_needs_invert(u64 val)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
#endif /* _ASM_X86_PGTABLE_2LEVEL_H */
|