KVM: x86/mmu: Add shadow_me_value and repurpose shadow_me_mask

Intel Multi-Key Total Memory Encryption (MKTME) repurposes couple of
high bits of physical address bits as 'KeyID' bits.  Intel Trust Domain
Extentions (TDX) further steals part of MKTME KeyID bits as TDX private
KeyID bits.  TDX private KeyID bits cannot be set in any mapping in the
host kernel since they can only be accessed by software running inside a
new CPU isolated mode.  And unlike to AMD's SME, host kernel doesn't set
any legacy MKTME KeyID bits to any mapping either.  Therefore, it's not
legitimate for KVM to set any KeyID bits in SPTE which maps guest
memory.

KVM maintains shadow_zero_check bits to represent which bits must be
zero for SPTE which maps guest memory.  MKTME KeyID bits should be set
to shadow_zero_check.  Currently, shadow_me_mask is used by AMD to set
the sme_me_mask to SPTE, and shadow_me_shadow is excluded from
shadow_zero_check.  So initializing shadow_me_mask to represent all
MKTME keyID bits doesn't work for VMX (as oppositely, they must be set
to shadow_zero_check).

Introduce a new 'shadow_me_value' to replace existing shadow_me_mask,
and repurpose shadow_me_mask as 'all possible memory encryption bits'.
The new schematic of them will be:

 - shadow_me_value: the memory encryption bit(s) that will be set to the
   SPTE (the original shadow_me_mask).
 - shadow_me_mask: all possible memory encryption bits (which is a super
   set of shadow_me_value).
 - For now, shadow_me_value is supposed to be set by SVM and VMX
   respectively, and it is a constant during KVM's life time.  This
   perhaps doesn't fit MKTME but for now host kernel doesn't support it
   (and perhaps will never do).
 - Bits in shadow_me_mask are set to shadow_zero_check, except the bits
   in shadow_me_value.

Introduce a new helper kvm_mmu_set_me_spte_mask() to initialize them.
Replace shadow_me_mask with shadow_me_value in almost all code paths,
except the one in PT64_PERM_MASK, which is used by need_remote_flush()
to determine whether remote TLB flush is needed.  This should still use
shadow_me_mask as any encryption bit change should need a TLB flush.
And for AMD, move initializing shadow_me_value/shadow_me_mask from
kvm_mmu_reset_all_pte_masks() to svm_hardware_setup().

Signed-off-by: Kai Huang <kai.huang@intel.com>
Message-Id: <f90964b93a3398b1cf1c56f510f3281e0709e2ab.1650363789.git.kai.huang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Kai Huang 2022-04-19 23:17:03 +12:00 committed by Paolo Bonzini
parent c919e881ba
commit e54f1ff244
5 changed files with 34 additions and 10 deletions

View File

@ -90,6 +90,7 @@ static inline gfn_t kvm_mmu_max_gfn(void)
} }
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask); void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only); void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
void kvm_init_mmu(struct kvm_vcpu *vcpu); void kvm_init_mmu(struct kvm_vcpu *vcpu);

View File

@ -3546,7 +3546,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
* or a PAE 3-level page table. In either case we need to be aware that * or a PAE 3-level page table. In either case we need to be aware that
* the shadow page table may be a PAE or a long mode page table. * the shadow page table may be a PAE or a long mode page table.
*/ */
pm_mask = PT_PRESENT_MASK | shadow_me_mask; pm_mask = PT_PRESENT_MASK | shadow_me_value;
if (mmu->root_role.level >= PT64_ROOT_4LEVEL) { if (mmu->root_role.level >= PT64_ROOT_4LEVEL) {
pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
@ -4531,8 +4531,16 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
return; return;
for (i = context->root_role.level; --i >= 0;) { for (i = context->root_role.level; --i >= 0;) {
shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask; /*
shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask; * So far shadow_me_value is a constant during KVM's life
* time. Bits in shadow_me_value are allowed to be set.
* Bits in shadow_me_mask but not in shadow_me_value are
* not allowed to be set.
*/
shadow_zero_check->rsvd_bits_mask[0][i] |= shadow_me_mask;
shadow_zero_check->rsvd_bits_mask[1][i] |= shadow_me_mask;
shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_value;
shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_value;
} }
} }
@ -5624,7 +5632,7 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
if (!tdp_enabled) if (!tdp_enabled)
set_memory_decrypted((unsigned long)mmu->pae_root, 1); set_memory_decrypted((unsigned long)mmu->pae_root, 1);
else else
WARN_ON_ONCE(shadow_me_mask); WARN_ON_ONCE(shadow_me_value);
for (i = 0; i < 4; ++i) for (i = 0; i < 4; ++i)
mmu->pae_root[i] = INVALID_PAE_ROOT; mmu->pae_root[i] = INVALID_PAE_ROOT;

View File

@ -33,6 +33,7 @@ u64 __read_mostly shadow_mmio_value;
u64 __read_mostly shadow_mmio_mask; u64 __read_mostly shadow_mmio_mask;
u64 __read_mostly shadow_mmio_access_mask; u64 __read_mostly shadow_mmio_access_mask;
u64 __read_mostly shadow_present_mask; u64 __read_mostly shadow_present_mask;
u64 __read_mostly shadow_me_value;
u64 __read_mostly shadow_me_mask; u64 __read_mostly shadow_me_mask;
u64 __read_mostly shadow_acc_track_mask; u64 __read_mostly shadow_acc_track_mask;
@ -167,8 +168,8 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
else else
pte_access &= ~ACC_WRITE_MASK; pte_access &= ~ACC_WRITE_MASK;
if (shadow_me_mask && !kvm_is_mmio_pfn(pfn)) if (shadow_me_value && !kvm_is_mmio_pfn(pfn))
spte |= shadow_me_mask; spte |= shadow_me_value;
spte |= (u64)pfn << PAGE_SHIFT; spte |= (u64)pfn << PAGE_SHIFT;
@ -284,7 +285,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
u64 spte = SPTE_MMU_PRESENT_MASK; u64 spte = SPTE_MMU_PRESENT_MASK;
spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK | spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK |
shadow_user_mask | shadow_x_mask | shadow_me_mask; shadow_user_mask | shadow_x_mask | shadow_me_value;
if (ad_disabled) if (ad_disabled)
spte |= SPTE_TDP_AD_DISABLED_MASK; spte |= SPTE_TDP_AD_DISABLED_MASK;
@ -388,6 +389,17 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
} }
EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask)
{
/* shadow_me_value must be a subset of shadow_me_mask */
if (WARN_ON(me_value & ~me_mask))
me_value = me_mask = 0;
shadow_me_value = me_value;
shadow_me_mask = me_mask;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_me_spte_mask);
void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
{ {
shadow_user_mask = VMX_EPT_READABLE_MASK; shadow_user_mask = VMX_EPT_READABLE_MASK;
@ -397,8 +409,6 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
shadow_x_mask = VMX_EPT_EXECUTABLE_MASK; shadow_x_mask = VMX_EPT_EXECUTABLE_MASK;
shadow_present_mask = has_exec_only ? 0ull : VMX_EPT_READABLE_MASK; shadow_present_mask = has_exec_only ? 0ull : VMX_EPT_READABLE_MASK;
shadow_acc_track_mask = VMX_EPT_RWX_MASK; shadow_acc_track_mask = VMX_EPT_RWX_MASK;
shadow_me_mask = 0ull;
shadow_host_writable_mask = EPT_SPTE_HOST_WRITABLE; shadow_host_writable_mask = EPT_SPTE_HOST_WRITABLE;
shadow_mmu_writable_mask = EPT_SPTE_MMU_WRITABLE; shadow_mmu_writable_mask = EPT_SPTE_MMU_WRITABLE;
@ -449,7 +459,8 @@ void kvm_mmu_reset_all_pte_masks(void)
shadow_x_mask = 0; shadow_x_mask = 0;
shadow_present_mask = PT_PRESENT_MASK; shadow_present_mask = PT_PRESENT_MASK;
shadow_acc_track_mask = 0; shadow_acc_track_mask = 0;
shadow_me_mask = sme_me_mask; shadow_me_mask = 0;
shadow_me_value = 0;
shadow_host_writable_mask = DEFAULT_SPTE_HOST_WRITABLE; shadow_host_writable_mask = DEFAULT_SPTE_HOST_WRITABLE;
shadow_mmu_writable_mask = DEFAULT_SPTE_MMU_WRITABLE; shadow_mmu_writable_mask = DEFAULT_SPTE_MMU_WRITABLE;

View File

@ -151,6 +151,7 @@ extern u64 __read_mostly shadow_mmio_value;
extern u64 __read_mostly shadow_mmio_mask; extern u64 __read_mostly shadow_mmio_mask;
extern u64 __read_mostly shadow_mmio_access_mask; extern u64 __read_mostly shadow_mmio_access_mask;
extern u64 __read_mostly shadow_present_mask; extern u64 __read_mostly shadow_present_mask;
extern u64 __read_mostly shadow_me_value;
extern u64 __read_mostly shadow_me_mask; extern u64 __read_mostly shadow_me_mask;
/* /*

View File

@ -4892,6 +4892,9 @@ static __init int svm_hardware_setup(void)
get_npt_level(), PG_LEVEL_1G); get_npt_level(), PG_LEVEL_1G);
pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis"); pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
/* Setup shadow_me_value and shadow_me_mask */
kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask);
/* Note, SEV setup consumes npt_enabled. */ /* Note, SEV setup consumes npt_enabled. */
sev_hardware_setup(); sev_hardware_setup();