mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
* Clean up SVM's enter/exit assembly code so that it can be compiled
without OBJECT_FILES_NON_STANDARD. This fixes a warning "Unpatched return thunk in use. This should not happen!" when running KVM selftests. * Fix a mostly benign bug in the gfn_to_pfn_cache infrastructure where KVM would allow userspace to refresh the cache with a bogus GPA. The bug has existed for quite some time, but was exposed by a new sanity check added in 6.9 (to ensure a cache is either GPA-based or HVA-based). * Drop an unused param from gfn_to_pfn_cache_invalidate_start() that got left behind during a 6.9 cleanup. * Fix a math goof in x86's hugepage logic for KVM_SET_MEMORY_ATTRIBUTES that results in an array overflow (detected by KASAN). * Fix a bug where KVM incorrectly clears root_role.direct when userspace sets guest CPUID. * Fix a dirty logging bug in the where KVM fails to write-protect SPTEs used by a nested guest, if KVM is using Page-Modification Logging and the nested hypervisor is NOT using EPT. x86 PMU: * Drop support for virtualizing adaptive PEBS, as KVM's implementation is architecturally broken without an obvious/easy path forward, and because exposing adaptive PEBS can leak host LBRs to the guest, i.e. can leak host kernel addresses to the guest. * Set the enable bits for general purpose counters in PERF_GLOBAL_CTRL at RESET time, as done by both Intel and AMD processors. * Disable LBR virtualization on CPUs that don't support LBR callstacks, as KVM unconditionally uses PERF_SAMPLE_BRANCH_CALL_STACK when creating the perf event, and would fail on such CPUs. Tests: * Fix a flaw in the max_guest_memory selftest that results in it exhausting the supply of ucall structures when run with more than 256 vCPUs. * Mark KVM_MEM_READONLY as supported for RISC-V in set_memory_region_test. -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmYjdqcUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroPNRAgAh1AdKBAWnq9bFN2Np1kSAcRAk3bs REDq/0iD1T9TvIwEmE1lHaRuqvCSO15WW+DKvbs7TS8zA0DyY7X/x8sIIy5YzZ5C bQ+JXiqk55OAj0sPskBpCvE5qEreuU8qAit57+8OseKWs57EICvJjrfsRnHlmIub pgGas3I42LjIgsuZRr2kjv+GrvaiikW+wWK6sq3CvPzTtHV196d26AK5l4NOoLkY 0FTbBIYUSJ7wxs92xuTed5mZ7JFZdsa5DVMXF5MRZ9W6g2vZCLbqCNRddRhSAsl0 gKmqZkuPTB7AnGQbJ2h/aKFT0ydsguzqbbKq62sK7ft5f1CUlbp9luDC9w== =99rq -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "This is a bit on the large side, mostly due to two changes: - Changes to disable some broken PMU virtualization (see below for details under "x86 PMU") - Clean up SVM's enter/exit assembly code so that it can be compiled without OBJECT_FILES_NON_STANDARD. This fixes a warning "Unpatched return thunk in use. This should not happen!" when running KVM selftests. Everything else is small bugfixes and selftest changes: - Fix a mostly benign bug in the gfn_to_pfn_cache infrastructure where KVM would allow userspace to refresh the cache with a bogus GPA. The bug has existed for quite some time, but was exposed by a new sanity check added in 6.9 (to ensure a cache is either GPA-based or HVA-based). - Drop an unused param from gfn_to_pfn_cache_invalidate_start() that got left behind during a 6.9 cleanup. - Fix a math goof in x86's hugepage logic for KVM_SET_MEMORY_ATTRIBUTES that results in an array overflow (detected by KASAN). - Fix a bug where KVM incorrectly clears root_role.direct when userspace sets guest CPUID. - Fix a dirty logging bug in the where KVM fails to write-protect SPTEs used by a nested guest, if KVM is using Page-Modification Logging and the nested hypervisor is NOT using EPT. x86 PMU: - Drop support for virtualizing adaptive PEBS, as KVM's implementation is architecturally broken without an obvious/easy path forward, and because exposing adaptive PEBS can leak host LBRs to the guest, i.e. can leak host kernel addresses to the guest. - Set the enable bits for general purpose counters in PERF_GLOBAL_CTRL at RESET time, as done by both Intel and AMD processors. - Disable LBR virtualization on CPUs that don't support LBR callstacks, as KVM unconditionally uses PERF_SAMPLE_BRANCH_CALL_STACK when creating the perf event, and would fail on such CPUs. Tests: - Fix a flaw in the max_guest_memory selftest that results in it exhausting the supply of ucall structures when run with more than 256 vCPUs. - Mark KVM_MEM_READONLY as supported for RISC-V in set_memory_region_test" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (30 commits) KVM: Drop unused @may_block param from gfn_to_pfn_cache_invalidate_start() KVM: selftests: Add coverage of EPT-disabled to vmx_dirty_log_test KVM: x86/mmu: Fix and clarify comments about clearing D-bit vs. write-protecting KVM: x86/mmu: Remove function comments above clear_dirty_{gfn_range,pt_masked}() KVM: x86/mmu: Write-protect L2 SPTEs in TDP MMU when clearing dirty status KVM: x86/mmu: Precisely invalidate MMU root_role during CPUID update KVM: VMX: Disable LBR virtualization if the CPU doesn't support LBR callstacks perf/x86/intel: Expose existence of callback support to KVM KVM: VMX: Snapshot LBR capabilities during module initialization KVM: x86/pmu: Do not mask LVTPC when handling a PMI on AMD platforms KVM: x86: Snapshot if a vCPU's vendor model is AMD vs. Intel compatible KVM: x86: Stop compiling vmenter.S with OBJECT_FILES_NON_STANDARD KVM: SVM: Create a stack frame in __svm_sev_es_vcpu_run() KVM: SVM: Save/restore args across SEV-ES VMRUN via host save area KVM: SVM: Save/restore non-volatile GPRs in SEV-ES VMRUN via host save area KVM: SVM: Clobber RAX instead of RBX when discarding spec_ctrl_intercepted KVM: SVM: Drop 32-bit "support" from __svm_sev_es_vcpu_run() KVM: SVM: Wrap __svm_sev_es_vcpu_run() with #ifdef CONFIG_KVM_AMD_SEV KVM: SVM: Create a stack frame in __svm_vcpu_run() for unwinding KVM: SVM: Remove a useless zeroing of allocated memory ...
This commit is contained in:
commit
817772266d
@ -1693,6 +1693,7 @@ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
|
||||
lbr->from = x86_pmu.lbr_from;
|
||||
lbr->to = x86_pmu.lbr_to;
|
||||
lbr->info = x86_pmu.lbr_info;
|
||||
lbr->has_callstack = x86_pmu_has_lbr_callstack();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
|
||||
|
||||
|
@ -855,6 +855,7 @@ struct kvm_vcpu_arch {
|
||||
int cpuid_nent;
|
||||
struct kvm_cpuid_entry2 *cpuid_entries;
|
||||
struct kvm_hypervisor_cpuid kvm_cpuid;
|
||||
bool is_amd_compatible;
|
||||
|
||||
/*
|
||||
* FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
|
||||
|
@ -555,6 +555,7 @@ struct x86_pmu_lbr {
|
||||
unsigned int from;
|
||||
unsigned int to;
|
||||
unsigned int info;
|
||||
bool has_callstack;
|
||||
};
|
||||
|
||||
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
|
||||
|
@ -3,11 +3,6 @@
|
||||
ccflags-y += -I $(srctree)/arch/x86/kvm
|
||||
ccflags-$(CONFIG_KVM_WERROR) += -Werror
|
||||
|
||||
ifeq ($(CONFIG_FRAME_POINTER),y)
|
||||
OBJECT_FILES_NON_STANDARD_vmx/vmenter.o := y
|
||||
OBJECT_FILES_NON_STANDARD_svm/vmenter.o := y
|
||||
endif
|
||||
|
||||
include $(srctree)/virt/kvm/Makefile.kvm
|
||||
|
||||
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
|
||||
|
@ -376,6 +376,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvm_update_pv_runtime(vcpu);
|
||||
|
||||
vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu);
|
||||
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
|
||||
vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
|
||||
|
||||
|
@ -120,6 +120,16 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
|
||||
return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx);
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.is_amd_compatible;
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !guest_cpuid_is_amd_compatible(vcpu);
|
||||
}
|
||||
|
||||
static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
@ -2776,7 +2776,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
|
||||
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
|
||||
|
||||
r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
|
||||
if (r && lvt_type == APIC_LVTPC)
|
||||
if (r && lvt_type == APIC_LVTPC &&
|
||||
guest_cpuid_is_intel_compatible(apic->vcpu))
|
||||
kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
|
||||
return r;
|
||||
}
|
||||
|
@ -4935,7 +4935,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
||||
context->cpu_role.base.level, is_efer_nx(context),
|
||||
guest_can_use(vcpu, X86_FEATURE_GBPAGES),
|
||||
is_cr4_pse(context),
|
||||
guest_cpuid_is_amd_or_hygon(vcpu));
|
||||
guest_cpuid_is_amd_compatible(vcpu));
|
||||
}
|
||||
|
||||
static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
|
||||
@ -5576,9 +5576,9 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
* that problem is swept under the rug; KVM's CPUID API is horrific and
|
||||
* it's all but impossible to solve it without introducing a new API.
|
||||
*/
|
||||
vcpu->arch.root_mmu.root_role.word = 0;
|
||||
vcpu->arch.guest_mmu.root_role.word = 0;
|
||||
vcpu->arch.nested_mmu.root_role.word = 0;
|
||||
vcpu->arch.root_mmu.root_role.invalid = 1;
|
||||
vcpu->arch.guest_mmu.root_role.invalid = 1;
|
||||
vcpu->arch.nested_mmu.root_role.invalid = 1;
|
||||
vcpu->arch.root_mmu.cpu_role.ext.valid = 0;
|
||||
vcpu->arch.guest_mmu.cpu_role.ext.valid = 0;
|
||||
vcpu->arch.nested_mmu.cpu_role.ext.valid = 0;
|
||||
@ -7399,7 +7399,8 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
|
||||
* by the memslot, KVM can't use a hugepage due to the
|
||||
* misaligned address regardless of memory attributes.
|
||||
*/
|
||||
if (gfn >= slot->base_gfn) {
|
||||
if (gfn >= slot->base_gfn &&
|
||||
gfn + nr_pages <= slot->base_gfn + slot->npages) {
|
||||
if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
|
||||
hugepage_clear_mixed(slot, gfn, level);
|
||||
else
|
||||
|
@ -1548,17 +1548,21 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
|
||||
* AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
|
||||
* If AD bits are not enabled, this will require clearing the writable bit on
|
||||
* each SPTE. Returns true if an SPTE has been changed and the TLBs need to
|
||||
* be flushed.
|
||||
*/
|
||||
static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp)
|
||||
{
|
||||
/*
|
||||
* All TDP MMU shadow pages share the same role as their root, aside
|
||||
* from level, so it is valid to key off any shadow page to determine if
|
||||
* write protection is needed for an entire tree.
|
||||
*/
|
||||
return kvm_mmu_page_ad_need_write_protect(sp) || !kvm_ad_enabled();
|
||||
}
|
||||
|
||||
static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
gfn_t start, gfn_t end)
|
||||
{
|
||||
u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK;
|
||||
const u64 dbit = tdp_mmu_need_write_protect(root) ? PT_WRITABLE_MASK :
|
||||
shadow_dirty_mask;
|
||||
struct tdp_iter iter;
|
||||
bool spte_set = false;
|
||||
|
||||
@ -1573,7 +1577,7 @@ retry:
|
||||
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
|
||||
continue;
|
||||
|
||||
KVM_MMU_WARN_ON(kvm_ad_enabled() &&
|
||||
KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
|
||||
spte_ad_need_write_protect(iter.old_spte));
|
||||
|
||||
if (!(iter.old_spte & dbit))
|
||||
@ -1590,11 +1594,9 @@ retry:
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
|
||||
* AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
|
||||
* If AD bits are not enabled, this will require clearing the writable bit on
|
||||
* each SPTE. Returns true if an SPTE has been changed and the TLBs need to
|
||||
* be flushed.
|
||||
* Clear the dirty status (D-bit or W-bit) of all the SPTEs mapping GFNs in the
|
||||
* memslot. Returns true if an SPTE has been changed and the TLBs need to be
|
||||
* flushed.
|
||||
*/
|
||||
bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot)
|
||||
@ -1610,18 +1612,11 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
|
||||
return spte_set;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
|
||||
* set in mask, starting at gfn. The given memslot is expected to contain all
|
||||
* the GFNs represented by set bits in the mask. If AD bits are enabled,
|
||||
* clearing the dirty status will involve clearing the dirty bit on each SPTE
|
||||
* or, if AD bits are not enabled, clearing the writable bit on each SPTE.
|
||||
*/
|
||||
static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
gfn_t gfn, unsigned long mask, bool wrprot)
|
||||
{
|
||||
u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK :
|
||||
shadow_dirty_mask;
|
||||
const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root)) ? PT_WRITABLE_MASK :
|
||||
shadow_dirty_mask;
|
||||
struct tdp_iter iter;
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
@ -1633,7 +1628,7 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
if (!mask)
|
||||
break;
|
||||
|
||||
KVM_MMU_WARN_ON(kvm_ad_enabled() &&
|
||||
KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
|
||||
spte_ad_need_write_protect(iter.old_spte));
|
||||
|
||||
if (iter.level > PG_LEVEL_4K ||
|
||||
@ -1659,11 +1654,9 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
}
|
||||
|
||||
/*
|
||||
* Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
|
||||
* set in mask, starting at gfn. The given memslot is expected to contain all
|
||||
* the GFNs represented by set bits in the mask. If AD bits are enabled,
|
||||
* clearing the dirty status will involve clearing the dirty bit on each SPTE
|
||||
* or, if AD bits are not enabled, clearing the writable bit on each SPTE.
|
||||
* Clear the dirty status (D-bit or W-bit) of all the 4k SPTEs mapping GFNs for
|
||||
* which a bit is set in mask, starting at gfn. The given memslot is expected to
|
||||
* contain all the GFNs represented by set bits in the mask.
|
||||
*/
|
||||
void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
|
@ -775,8 +775,20 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
pmu->pebs_data_cfg_mask = ~0ull;
|
||||
bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
|
||||
|
||||
if (vcpu->kvm->arch.enable_pmu)
|
||||
static_call(kvm_x86_pmu_refresh)(vcpu);
|
||||
if (!vcpu->kvm->arch.enable_pmu)
|
||||
return;
|
||||
|
||||
static_call(kvm_x86_pmu_refresh)(vcpu);
|
||||
|
||||
/*
|
||||
* At RESET, both Intel and AMD CPUs set all enable bits for general
|
||||
* purpose counters in IA32_PERF_GLOBAL_CTRL (so that software that
|
||||
* was written for v1 PMUs don't unknowingly leave GP counters disabled
|
||||
* in the global controls). Emulate that behavior when refreshing the
|
||||
* PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL.
|
||||
*/
|
||||
if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters)
|
||||
pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0);
|
||||
}
|
||||
|
||||
void kvm_pmu_init(struct kvm_vcpu *vcpu)
|
||||
|
@ -434,7 +434,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
|
||||
/* Avoid using vmalloc for smaller buffers. */
|
||||
size = npages * sizeof(struct page *);
|
||||
if (size > PAGE_SIZE)
|
||||
pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
|
||||
pages = __vmalloc(size, GFP_KERNEL_ACCOUNT);
|
||||
else
|
||||
pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
|
||||
|
||||
|
@ -1503,6 +1503,11 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
|
||||
__free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE));
|
||||
}
|
||||
|
||||
static struct sev_es_save_area *sev_es_host_save_area(struct svm_cpu_data *sd)
|
||||
{
|
||||
return page_address(sd->save_area) + 0x400;
|
||||
}
|
||||
|
||||
static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
@ -1519,12 +1524,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||
* or subsequent vmload of host save area.
|
||||
*/
|
||||
vmsave(sd->save_area_pa);
|
||||
if (sev_es_guest(vcpu->kvm)) {
|
||||
struct sev_es_save_area *hostsa;
|
||||
hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
|
||||
|
||||
sev_es_prepare_switch_to_guest(hostsa);
|
||||
}
|
||||
if (sev_es_guest(vcpu->kvm))
|
||||
sev_es_prepare_switch_to_guest(sev_es_host_save_area(sd));
|
||||
|
||||
if (tsc_scaling)
|
||||
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
|
||||
@ -4101,6 +4102,7 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
|
||||
|
||||
static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted)
|
||||
{
|
||||
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
guest_state_enter_irqoff();
|
||||
@ -4108,7 +4110,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
|
||||
amd_clear_divider();
|
||||
|
||||
if (sev_es_guest(vcpu->kvm))
|
||||
__svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
|
||||
__svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted,
|
||||
sev_es_host_save_area(sd));
|
||||
else
|
||||
__svm_vcpu_run(svm, spec_ctrl_intercepted);
|
||||
|
||||
|
@ -698,7 +698,8 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* vmenter.S */
|
||||
|
||||
void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
|
||||
void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted,
|
||||
struct sev_es_save_area *hostsa);
|
||||
void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
|
||||
|
||||
#define DEFINE_KVM_GHCB_ACCESSORS(field) \
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <asm/asm.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/bitsperlong.h>
|
||||
#include <asm/frame.h>
|
||||
#include <asm/kvm_vcpu_regs.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
#include "kvm-asm-offsets.h"
|
||||
@ -67,7 +68,7 @@
|
||||
"", X86_FEATURE_V_SPEC_CTRL
|
||||
901:
|
||||
.endm
|
||||
.macro RESTORE_HOST_SPEC_CTRL_BODY
|
||||
.macro RESTORE_HOST_SPEC_CTRL_BODY spec_ctrl_intercepted:req
|
||||
900:
|
||||
/* Same for after vmexit. */
|
||||
mov $MSR_IA32_SPEC_CTRL, %ecx
|
||||
@ -76,7 +77,7 @@
|
||||
* Load the value that the guest had written into MSR_IA32_SPEC_CTRL,
|
||||
* if it was not intercepted during guest execution.
|
||||
*/
|
||||
cmpb $0, (%_ASM_SP)
|
||||
cmpb $0, \spec_ctrl_intercepted
|
||||
jnz 998f
|
||||
rdmsr
|
||||
movl %eax, SVM_spec_ctrl(%_ASM_DI)
|
||||
@ -99,6 +100,7 @@
|
||||
*/
|
||||
SYM_FUNC_START(__svm_vcpu_run)
|
||||
push %_ASM_BP
|
||||
mov %_ASM_SP, %_ASM_BP
|
||||
#ifdef CONFIG_X86_64
|
||||
push %r15
|
||||
push %r14
|
||||
@ -268,7 +270,7 @@ SYM_FUNC_START(__svm_vcpu_run)
|
||||
RET
|
||||
|
||||
RESTORE_GUEST_SPEC_CTRL_BODY
|
||||
RESTORE_HOST_SPEC_CTRL_BODY
|
||||
RESTORE_HOST_SPEC_CTRL_BODY (%_ASM_SP)
|
||||
|
||||
10: cmpb $0, _ASM_RIP(kvm_rebooting)
|
||||
jne 2b
|
||||
@ -290,66 +292,68 @@ SYM_FUNC_START(__svm_vcpu_run)
|
||||
|
||||
SYM_FUNC_END(__svm_vcpu_run)
|
||||
|
||||
#ifdef CONFIG_KVM_AMD_SEV
|
||||
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define SEV_ES_GPRS_BASE 0x300
|
||||
#define SEV_ES_RBX (SEV_ES_GPRS_BASE + __VCPU_REGS_RBX * WORD_SIZE)
|
||||
#define SEV_ES_RBP (SEV_ES_GPRS_BASE + __VCPU_REGS_RBP * WORD_SIZE)
|
||||
#define SEV_ES_RSI (SEV_ES_GPRS_BASE + __VCPU_REGS_RSI * WORD_SIZE)
|
||||
#define SEV_ES_RDI (SEV_ES_GPRS_BASE + __VCPU_REGS_RDI * WORD_SIZE)
|
||||
#define SEV_ES_R12 (SEV_ES_GPRS_BASE + __VCPU_REGS_R12 * WORD_SIZE)
|
||||
#define SEV_ES_R13 (SEV_ES_GPRS_BASE + __VCPU_REGS_R13 * WORD_SIZE)
|
||||
#define SEV_ES_R14 (SEV_ES_GPRS_BASE + __VCPU_REGS_R14 * WORD_SIZE)
|
||||
#define SEV_ES_R15 (SEV_ES_GPRS_BASE + __VCPU_REGS_R15 * WORD_SIZE)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
|
||||
* @svm: struct vcpu_svm *
|
||||
* @spec_ctrl_intercepted: bool
|
||||
*/
|
||||
SYM_FUNC_START(__svm_sev_es_vcpu_run)
|
||||
push %_ASM_BP
|
||||
#ifdef CONFIG_X86_64
|
||||
push %r15
|
||||
push %r14
|
||||
push %r13
|
||||
push %r12
|
||||
#else
|
||||
push %edi
|
||||
push %esi
|
||||
#endif
|
||||
push %_ASM_BX
|
||||
FRAME_BEGIN
|
||||
|
||||
/*
|
||||
* Save variables needed after vmexit on the stack, in inverse
|
||||
* order compared to when they are needed.
|
||||
* Save non-volatile (callee-saved) registers to the host save area.
|
||||
* Except for RAX and RSP, all GPRs are restored on #VMEXIT, but not
|
||||
* saved on VMRUN.
|
||||
*/
|
||||
mov %rbp, SEV_ES_RBP (%rdx)
|
||||
mov %r15, SEV_ES_R15 (%rdx)
|
||||
mov %r14, SEV_ES_R14 (%rdx)
|
||||
mov %r13, SEV_ES_R13 (%rdx)
|
||||
mov %r12, SEV_ES_R12 (%rdx)
|
||||
mov %rbx, SEV_ES_RBX (%rdx)
|
||||
|
||||
/* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */
|
||||
push %_ASM_ARG2
|
||||
|
||||
/* Save @svm. */
|
||||
push %_ASM_ARG1
|
||||
|
||||
.ifnc _ASM_ARG1, _ASM_DI
|
||||
/*
|
||||
* Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX
|
||||
* and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL.
|
||||
* Save volatile registers that hold arguments that are needed after
|
||||
* #VMEXIT (RDI=@svm and RSI=@spec_ctrl_intercepted).
|
||||
*/
|
||||
mov %_ASM_ARG1, %_ASM_DI
|
||||
.endif
|
||||
mov %rdi, SEV_ES_RDI (%rdx)
|
||||
mov %rsi, SEV_ES_RSI (%rdx)
|
||||
|
||||
/* Clobbers RAX, RCX, RDX. */
|
||||
/* Clobbers RAX, RCX, RDX (@hostsa). */
|
||||
RESTORE_GUEST_SPEC_CTRL
|
||||
|
||||
/* Get svm->current_vmcb->pa into RAX. */
|
||||
mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
|
||||
mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX
|
||||
mov SVM_current_vmcb(%rdi), %rax
|
||||
mov KVM_VMCB_pa(%rax), %rax
|
||||
|
||||
/* Enter guest mode */
|
||||
sti
|
||||
|
||||
1: vmrun %_ASM_AX
|
||||
1: vmrun %rax
|
||||
|
||||
2: cli
|
||||
|
||||
/* Pop @svm to RDI, guest registers have been saved already. */
|
||||
pop %_ASM_DI
|
||||
|
||||
#ifdef CONFIG_MITIGATION_RETPOLINE
|
||||
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
|
||||
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
|
||||
FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
|
||||
#endif
|
||||
|
||||
/* Clobbers RAX, RCX, RDX. */
|
||||
/* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */
|
||||
RESTORE_HOST_SPEC_CTRL
|
||||
|
||||
/*
|
||||
@ -361,30 +365,17 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
|
||||
*/
|
||||
UNTRAIN_RET_VM
|
||||
|
||||
/* "Pop" @spec_ctrl_intercepted. */
|
||||
pop %_ASM_BX
|
||||
|
||||
pop %_ASM_BX
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
pop %r12
|
||||
pop %r13
|
||||
pop %r14
|
||||
pop %r15
|
||||
#else
|
||||
pop %esi
|
||||
pop %edi
|
||||
#endif
|
||||
pop %_ASM_BP
|
||||
FRAME_END
|
||||
RET
|
||||
|
||||
RESTORE_GUEST_SPEC_CTRL_BODY
|
||||
RESTORE_HOST_SPEC_CTRL_BODY
|
||||
RESTORE_HOST_SPEC_CTRL_BODY %sil
|
||||
|
||||
3: cmpb $0, _ASM_RIP(kvm_rebooting)
|
||||
3: cmpb $0, kvm_rebooting(%rip)
|
||||
jne 2b
|
||||
ud2
|
||||
|
||||
_ASM_EXTABLE(1b, 3b)
|
||||
|
||||
SYM_FUNC_END(__svm_sev_es_vcpu_run)
|
||||
#endif /* CONFIG_KVM_AMD_SEV */
|
||||
|
@ -535,7 +535,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
perf_capabilities = vcpu_get_perf_capabilities(vcpu);
|
||||
if (cpuid_model_is_consistent(vcpu) &&
|
||||
(perf_capabilities & PMU_CAP_LBR_FMT))
|
||||
x86_perf_get_lbr(&lbr_desc->records);
|
||||
memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps));
|
||||
else
|
||||
lbr_desc->records.nr = 0;
|
||||
|
||||
|
@ -218,6 +218,8 @@ module_param(ple_window_max, uint, 0444);
|
||||
int __read_mostly pt_mode = PT_MODE_SYSTEM;
|
||||
module_param(pt_mode, int, S_IRUGO);
|
||||
|
||||
struct x86_pmu_lbr __ro_after_init vmx_lbr_caps;
|
||||
|
||||
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
|
||||
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
|
||||
static DEFINE_MUTEX(vmx_l1d_flush_mutex);
|
||||
@ -7862,10 +7864,9 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
vmx_update_exception_bitmap(vcpu);
|
||||
}
|
||||
|
||||
static u64 vmx_get_perf_capabilities(void)
|
||||
static __init u64 vmx_get_perf_capabilities(void)
|
||||
{
|
||||
u64 perf_cap = PMU_CAP_FW_WRITES;
|
||||
struct x86_pmu_lbr lbr;
|
||||
u64 host_perf_cap = 0;
|
||||
|
||||
if (!enable_pmu)
|
||||
@ -7875,15 +7876,43 @@ static u64 vmx_get_perf_capabilities(void)
|
||||
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
|
||||
x86_perf_get_lbr(&lbr);
|
||||
if (lbr.nr)
|
||||
x86_perf_get_lbr(&vmx_lbr_caps);
|
||||
|
||||
/*
|
||||
* KVM requires LBR callstack support, as the overhead due to
|
||||
* context switching LBRs without said support is too high.
|
||||
* See intel_pmu_create_guest_lbr_event() for more info.
|
||||
*/
|
||||
if (!vmx_lbr_caps.has_callstack)
|
||||
memset(&vmx_lbr_caps, 0, sizeof(vmx_lbr_caps));
|
||||
else if (vmx_lbr_caps.nr)
|
||||
perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
|
||||
}
|
||||
|
||||
if (vmx_pebs_supported()) {
|
||||
perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
|
||||
if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4)
|
||||
perf_cap &= ~PERF_CAP_PEBS_BASELINE;
|
||||
|
||||
/*
|
||||
* Disallow adaptive PEBS as it is functionally broken, can be
|
||||
* used by the guest to read *host* LBRs, and can be used to
|
||||
* bypass userspace event filters. To correctly and safely
|
||||
* support adaptive PEBS, KVM needs to:
|
||||
*
|
||||
* 1. Account for the ADAPTIVE flag when (re)programming fixed
|
||||
* counters.
|
||||
*
|
||||
* 2. Gain support from perf (or take direct control of counter
|
||||
* programming) to support events without adaptive PEBS
|
||||
* enabled for the hardware counter.
|
||||
*
|
||||
* 3. Ensure LBR MSRs cannot hold host data on VM-Entry with
|
||||
* adaptive PEBS enabled and MSR_PEBS_DATA_CFG.LBRS=1.
|
||||
*
|
||||
* 4. Document which PMU events are effectively exposed to the
|
||||
* guest via adaptive PEBS, and make adaptive PEBS mutually
|
||||
* exclusive with KVM_SET_PMU_EVENT_FILTER if necessary.
|
||||
*/
|
||||
perf_cap &= ~PERF_CAP_PEBS_BASELINE;
|
||||
}
|
||||
|
||||
return perf_cap;
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "vmx_ops.h"
|
||||
#include "../cpuid.h"
|
||||
#include "run_flags.h"
|
||||
#include "../mmu.h"
|
||||
|
||||
#define MSR_TYPE_R 1
|
||||
#define MSR_TYPE_W 2
|
||||
@ -109,6 +110,8 @@ struct lbr_desc {
|
||||
bool msr_passthrough;
|
||||
};
|
||||
|
||||
extern struct x86_pmu_lbr vmx_lbr_caps;
|
||||
|
||||
/*
|
||||
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
|
||||
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
|
||||
@ -719,7 +722,8 @@ static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
|
||||
if (!enable_ept)
|
||||
return true;
|
||||
|
||||
return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
|
||||
return allow_smaller_maxphyaddr &&
|
||||
cpuid_maxphyaddr(vcpu) < kvm_get_shadow_phys_bits();
|
||||
}
|
||||
|
||||
static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)
|
||||
|
@ -3470,7 +3470,7 @@ static bool is_mci_status_msr(u32 msr)
|
||||
static bool can_set_mci_status(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* McStatusWrEn enabled? */
|
||||
if (guest_cpuid_is_amd_or_hygon(vcpu))
|
||||
if (guest_cpuid_is_amd_compatible(vcpu))
|
||||
return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
|
||||
|
||||
return false;
|
||||
|
@ -22,10 +22,11 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
|
||||
{
|
||||
uint64_t gpa;
|
||||
|
||||
for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
|
||||
*((volatile uint64_t *)gpa) = gpa;
|
||||
|
||||
GUEST_DONE();
|
||||
for (;;) {
|
||||
for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
|
||||
*((volatile uint64_t *)gpa) = gpa;
|
||||
GUEST_SYNC(0);
|
||||
}
|
||||
}
|
||||
|
||||
struct vcpu_info {
|
||||
@ -55,7 +56,7 @@ static void rendezvous_with_boss(void)
|
||||
static void run_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu_run(vcpu);
|
||||
TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
|
||||
TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
|
||||
}
|
||||
|
||||
static void *vcpu_worker(void *data)
|
||||
@ -64,17 +65,13 @@ static void *vcpu_worker(void *data)
|
||||
struct kvm_vcpu *vcpu = info->vcpu;
|
||||
struct kvm_vm *vm = vcpu->vm;
|
||||
struct kvm_sregs sregs;
|
||||
struct kvm_regs regs;
|
||||
|
||||
vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size);
|
||||
|
||||
/* Snapshot regs before the first run. */
|
||||
vcpu_regs_get(vcpu, ®s);
|
||||
rendezvous_with_boss();
|
||||
|
||||
run_vcpu(vcpu);
|
||||
rendezvous_with_boss();
|
||||
vcpu_regs_set(vcpu, ®s);
|
||||
vcpu_sregs_get(vcpu, &sregs);
|
||||
#ifdef __x86_64__
|
||||
/* Toggle CR0.WP to trigger a MMU context reset. */
|
||||
|
@ -333,7 +333,7 @@ static void test_invalid_memory_region_flags(void)
|
||||
struct kvm_vm *vm;
|
||||
int r, i;
|
||||
|
||||
#if defined __aarch64__ || defined __x86_64__
|
||||
#if defined __aarch64__ || defined __riscv || defined __x86_64__
|
||||
supported_flags |= KVM_MEM_READONLY;
|
||||
#endif
|
||||
|
||||
|
@ -416,12 +416,30 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters
|
||||
|
||||
static void guest_test_gp_counters(void)
|
||||
{
|
||||
uint8_t pmu_version = guest_get_pmu_version();
|
||||
uint8_t nr_gp_counters = 0;
|
||||
uint32_t base_msr;
|
||||
|
||||
if (guest_get_pmu_version())
|
||||
if (pmu_version)
|
||||
nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
|
||||
|
||||
/*
|
||||
* For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
|
||||
* "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
|
||||
* of GP counters. If there are no GP counters, require KVM to leave
|
||||
* PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but
|
||||
* follow the spirit of the architecture and only globally enable GP
|
||||
* counters, of which there are none.
|
||||
*/
|
||||
if (pmu_version > 1) {
|
||||
uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
|
||||
|
||||
if (nr_gp_counters)
|
||||
GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
|
||||
else
|
||||
GUEST_ASSERT_EQ(global_ctrl, 0);
|
||||
}
|
||||
|
||||
if (this_cpu_has(X86_FEATURE_PDCM) &&
|
||||
rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
|
||||
base_msr = MSR_IA32_PMC0;
|
||||
|
@ -28,16 +28,16 @@
|
||||
#define NESTED_TEST_MEM1 0xc0001000
|
||||
#define NESTED_TEST_MEM2 0xc0002000
|
||||
|
||||
static void l2_guest_code(void)
|
||||
static void l2_guest_code(u64 *a, u64 *b)
|
||||
{
|
||||
*(volatile uint64_t *)NESTED_TEST_MEM1;
|
||||
*(volatile uint64_t *)NESTED_TEST_MEM1 = 1;
|
||||
READ_ONCE(*a);
|
||||
WRITE_ONCE(*a, 1);
|
||||
GUEST_SYNC(true);
|
||||
GUEST_SYNC(false);
|
||||
|
||||
*(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
|
||||
WRITE_ONCE(*b, 1);
|
||||
GUEST_SYNC(true);
|
||||
*(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
|
||||
WRITE_ONCE(*b, 1);
|
||||
GUEST_SYNC(true);
|
||||
GUEST_SYNC(false);
|
||||
|
||||
@ -45,17 +45,33 @@ static void l2_guest_code(void)
|
||||
vmcall();
|
||||
}
|
||||
|
||||
static void l2_guest_code_ept_enabled(void)
|
||||
{
|
||||
l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
|
||||
}
|
||||
|
||||
static void l2_guest_code_ept_disabled(void)
|
||||
{
|
||||
/* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
|
||||
l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
|
||||
}
|
||||
|
||||
void l1_guest_code(struct vmx_pages *vmx)
|
||||
{
|
||||
#define L2_GUEST_STACK_SIZE 64
|
||||
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
|
||||
void *l2_rip;
|
||||
|
||||
GUEST_ASSERT(vmx->vmcs_gpa);
|
||||
GUEST_ASSERT(prepare_for_vmx_operation(vmx));
|
||||
GUEST_ASSERT(load_vmcs(vmx));
|
||||
|
||||
prepare_vmcs(vmx, l2_guest_code,
|
||||
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
|
||||
if (vmx->eptp_gpa)
|
||||
l2_rip = l2_guest_code_ept_enabled;
|
||||
else
|
||||
l2_rip = l2_guest_code_ept_disabled;
|
||||
|
||||
prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
|
||||
|
||||
GUEST_SYNC(false);
|
||||
GUEST_ASSERT(!vmlaunch());
|
||||
@ -64,7 +80,7 @@ void l1_guest_code(struct vmx_pages *vmx)
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
static void test_vmx_dirty_log(bool enable_ept)
|
||||
{
|
||||
vm_vaddr_t vmx_pages_gva = 0;
|
||||
struct vmx_pages *vmx;
|
||||
@ -76,8 +92,7 @@ int main(int argc, char *argv[])
|
||||
struct ucall uc;
|
||||
bool done = false;
|
||||
|
||||
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
|
||||
TEST_REQUIRE(kvm_cpu_has_ept());
|
||||
pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
|
||||
@ -103,11 +118,16 @@ int main(int argc, char *argv[])
|
||||
*
|
||||
* Note that prepare_eptp should be called only L1's GPA map is done,
|
||||
* meaning after the last call to virt_map.
|
||||
*
|
||||
* When EPT is disabled, the L2 guest code will still access the same L1
|
||||
* GPAs as the EPT enabled case.
|
||||
*/
|
||||
prepare_eptp(vmx, vm, 0);
|
||||
nested_map_memslot(vmx, vm, 0);
|
||||
nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
|
||||
nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
|
||||
if (enable_ept) {
|
||||
prepare_eptp(vmx, vm, 0);
|
||||
nested_map_memslot(vmx, vm, 0);
|
||||
nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
|
||||
nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
|
||||
}
|
||||
|
||||
bmap = bitmap_zalloc(TEST_MEM_PAGES);
|
||||
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
|
||||
@ -148,3 +168,15 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
|
||||
|
||||
test_vmx_dirty_log(/*enable_ept=*/false);
|
||||
|
||||
if (kvm_cpu_has_ept())
|
||||
test_vmx_dirty_log(/*enable_ept=*/true);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -832,8 +832,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
||||
* mn_active_invalidate_count (see above) instead of
|
||||
* mmu_invalidate_in_progress.
|
||||
*/
|
||||
gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
|
||||
hva_range.may_block);
|
||||
gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end);
|
||||
|
||||
/*
|
||||
* If one or more memslots were found and thus zapped, notify arch code
|
||||
|
@ -26,13 +26,11 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
|
||||
#ifdef CONFIG_HAVE_KVM_PFNCACHE
|
||||
void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
bool may_block);
|
||||
unsigned long end);
|
||||
#else
|
||||
static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
bool may_block)
|
||||
unsigned long end)
|
||||
{
|
||||
}
|
||||
#endif /* HAVE_KVM_PFNCACHE */
|
||||
|
@ -23,7 +23,7 @@
|
||||
* MMU notifier 'invalidate_range_start' hook.
|
||||
*/
|
||||
void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
|
||||
unsigned long end, bool may_block)
|
||||
unsigned long end)
|
||||
{
|
||||
struct gfn_to_pfn_cache *gpc;
|
||||
|
||||
@ -57,6 +57,19 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
|
||||
spin_unlock(&kvm->gpc_lock);
|
||||
}
|
||||
|
||||
static bool kvm_gpc_is_valid_len(gpa_t gpa, unsigned long uhva,
|
||||
unsigned long len)
|
||||
{
|
||||
unsigned long offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) :
|
||||
offset_in_page(gpa);
|
||||
|
||||
/*
|
||||
* The cached access must fit within a single page. The 'len' argument
|
||||
* to activate() and refresh() exists only to enforce that.
|
||||
*/
|
||||
return offset + len <= PAGE_SIZE;
|
||||
}
|
||||
|
||||
bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
|
||||
{
|
||||
struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
|
||||
@ -74,7 +87,7 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
|
||||
if (kvm_is_error_hva(gpc->uhva))
|
||||
return false;
|
||||
|
||||
if (offset_in_page(gpc->uhva) + len > PAGE_SIZE)
|
||||
if (!kvm_gpc_is_valid_len(gpc->gpa, gpc->uhva, len))
|
||||
return false;
|
||||
|
||||
if (!gpc->valid)
|
||||
@ -232,8 +245,7 @@ out_error:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva,
|
||||
unsigned long len)
|
||||
static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva)
|
||||
{
|
||||
unsigned long page_offset;
|
||||
bool unmap_old = false;
|
||||
@ -247,15 +259,6 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l
|
||||
if (WARN_ON_ONCE(kvm_is_error_gpa(gpa) == kvm_is_error_hva(uhva)))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* The cached acces must fit within a single page. The 'len' argument
|
||||
* exists only to enforce that.
|
||||
*/
|
||||
page_offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) :
|
||||
offset_in_page(gpa);
|
||||
if (page_offset + len > PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
lockdep_assert_held(&gpc->refresh_lock);
|
||||
|
||||
write_lock_irq(&gpc->lock);
|
||||
@ -270,6 +273,8 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l
|
||||
old_uhva = PAGE_ALIGN_DOWN(gpc->uhva);
|
||||
|
||||
if (kvm_is_error_gpa(gpa)) {
|
||||
page_offset = offset_in_page(uhva);
|
||||
|
||||
gpc->gpa = INVALID_GPA;
|
||||
gpc->memslot = NULL;
|
||||
gpc->uhva = PAGE_ALIGN_DOWN(uhva);
|
||||
@ -279,6 +284,8 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l
|
||||
} else {
|
||||
struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
|
||||
|
||||
page_offset = offset_in_page(gpa);
|
||||
|
||||
if (gpc->gpa != gpa || gpc->generation != slots->generation ||
|
||||
kvm_is_error_hva(gpc->uhva)) {
|
||||
gfn_t gfn = gpa_to_gfn(gpa);
|
||||
@ -354,6 +361,9 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len)
|
||||
|
||||
guard(mutex)(&gpc->refresh_lock);
|
||||
|
||||
if (!kvm_gpc_is_valid_len(gpc->gpa, gpc->uhva, len))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If the GPA is valid then ignore the HVA, as a cache can be GPA-based
|
||||
* or HVA-based, not both. For GPA-based caches, the HVA will be
|
||||
@ -361,7 +371,7 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len)
|
||||
*/
|
||||
uhva = kvm_is_error_gpa(gpc->gpa) ? gpc->uhva : KVM_HVA_ERR_BAD;
|
||||
|
||||
return __kvm_gpc_refresh(gpc, gpc->gpa, uhva, len);
|
||||
return __kvm_gpc_refresh(gpc, gpc->gpa, uhva);
|
||||
}
|
||||
|
||||
void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm)
|
||||
@ -381,6 +391,9 @@ static int __kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned
|
||||
{
|
||||
struct kvm *kvm = gpc->kvm;
|
||||
|
||||
if (!kvm_gpc_is_valid_len(gpa, uhva, len))
|
||||
return -EINVAL;
|
||||
|
||||
guard(mutex)(&gpc->refresh_lock);
|
||||
|
||||
if (!gpc->active) {
|
||||
@ -400,11 +413,18 @@ static int __kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned
|
||||
gpc->active = true;
|
||||
write_unlock_irq(&gpc->lock);
|
||||
}
|
||||
return __kvm_gpc_refresh(gpc, gpa, uhva, len);
|
||||
return __kvm_gpc_refresh(gpc, gpa, uhva);
|
||||
}
|
||||
|
||||
int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
|
||||
{
|
||||
/*
|
||||
* Explicitly disallow INVALID_GPA so that the magic value can be used
|
||||
* by KVM to differentiate between GPA-based and HVA-based caches.
|
||||
*/
|
||||
if (WARN_ON_ONCE(kvm_is_error_gpa(gpa)))
|
||||
return -EINVAL;
|
||||
|
||||
return __kvm_gpc_activate(gpc, gpa, KVM_HVA_ERR_BAD, len);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user