RISC-V:
* Remove 's' & 'u' as valid ISA extension * Do not allow disabling the base extensions 'i'/'m'/'a'/'c' x86: * Fix NMI watchdog in guests on AMD * Fix for SEV cache incoherency issues * Don't re-acquire SRCU lock in complete_emulated_io() * Avoid NULL pointer deref if VM creation fails * Fix race conditions between APICv disabling and vCPU creation * Bugfixes for disabling of APICv * Preserve BSP MSR_KVM_POLL_CONTROL across suspend/resume selftests: * Do not use bitfields larger than 32-bits, they differ between GCC and clang -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmJi3KUUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroMhvQf/Yncfg3MkOvKsVxnCe7diKDTI/E2n wBGNIcL8r7L9oIltHL4Mh7JQTacHFQOZ9PQ30NO1p+pznZ03e8LR59IF1JpP7VOU sWrLZ5a4bIAEjOpA7Jxcee6hUBwewBauDgFLbb+YAI2lAahiH7jVfywDRife/c3k N2LjeA75K8UvMiDCfjxxxerFJK91zaqjWlUNF2OhtFp/5pnMfS+nli9Q8QS837pZ oUf+0Beb2RpSHan+wbYVU7X3ZLwtpR0M3w3uXOG+X3as56wDf26znXS02aSwa45x lfX+pqJfmb4vCJJDXt6avH27EVgTq0Vew+BhQHG3VLRO6uxZ+smX6qmsuw== =kvbw -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "The main and larger change here is a workaround for AMD's lack of cache coherency for encrypted-memory guests. I have another patch pending, but it's waiting for review from the architecture maintainers. RISC-V: - Remove 's' & 'u' as valid ISA extension - Do not allow disabling the base extensions 'i'/'m'/'a'/'c' x86: - Fix NMI watchdog in guests on AMD - Fix for SEV cache incoherency issues - Don't re-acquire SRCU lock in complete_emulated_io() - Avoid NULL pointer deref if VM creation fails - Fix race conditions between APICv disabling and vCPU creation - Bugfixes for disabling of APICv - Preserve BSP MSR_KVM_POLL_CONTROL across suspend/resume selftests: - Do not use bitfields larger than 32-bits, they differ between GCC and clang" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: kvm: selftests: introduce and use more page size-related constants kvm: selftests: do not use bitfields larger than 32-bits for PTEs KVM: SEV: add cache flush to solve SEV cache incoherency issues KVM: SVM: Flush when freeing encrypted pages even on SME_COHERENT CPUs KVM: SVM: Simplify and harden helper to flush SEV guest page(s) KVM: selftests: Silence compiler warning in the kvm_page_table_test KVM: x86/pmu: Update AMD PMC sample period to fix guest NMI-watchdog x86/kvm: Preserve BSP MSR_KVM_POLL_CONTROL across suspend/resume KVM: SPDX style and spelling fixes KVM: x86: Skip KVM_GUESTDBG_BLOCKIRQ APICv update if APICv is disabled KVM: x86: Pend KVM_REQ_APICV_UPDATE during vCPU creation to fix a race KVM: nVMX: Defer APICv updates while L2 is active until L1 is active KVM: x86: Tag APICv DISABLE inhibit, not ABSENT, if APICv is disabled KVM: Initialize debugfs_dentry when a VM is created to avoid NULL deref KVM: Add helpers to wrap vcpu->srcu_idx and yell if it's abused KVM: RISC-V: Use kvm_vcpu.srcu_idx, drop RISC-V's unnecessary copy KVM: x86: Don't re-acquire SRCU lock in complete_emulated_io() RISC-V: KVM: Restrict the extensions that can be disabled RISC-V: KVM: Remove 's' & 'u' as valid ISA extension
This commit is contained in:
commit
bb4ce2c658
@ -168,9 +168,10 @@ int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
return -EINVAL;
|
||||
/* Read the entry from guest memory */
|
||||
addr = base + (index * sizeof(rpte));
|
||||
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
|
||||
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
if (ret) {
|
||||
if (pte_ret_p)
|
||||
*pte_ret_p = addr;
|
||||
@ -246,9 +247,9 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
|
||||
/* Read the table to find the root of the radix tree */
|
||||
ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
|
||||
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
|
||||
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -306,10 +306,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
|
||||
/* copy parameters in */
|
||||
hv_ptr = kvmppc_get_gpr(vcpu, 4);
|
||||
regs_ptr = kvmppc_get_gpr(vcpu, 5);
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
|
||||
hv_ptr, regs_ptr);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
if (err)
|
||||
return H_PARAMETER;
|
||||
|
||||
@ -410,10 +410,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
|
||||
byteswap_hv_regs(&l2_hv);
|
||||
byteswap_pt_regs(&l2_regs);
|
||||
}
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
|
||||
hv_ptr, regs_ptr);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
if (err)
|
||||
return H_AUTHORITY;
|
||||
|
||||
@ -600,16 +600,16 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
|
||||
goto not_found;
|
||||
|
||||
/* Write what was loaded into our buffer back to the L1 guest */
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
if (rc)
|
||||
goto not_found;
|
||||
} else {
|
||||
/* Load the data to be stored from the L1 guest into our buf */
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
if (rc)
|
||||
goto not_found;
|
||||
|
||||
|
@ -229,9 +229,9 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;
|
||||
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
if (rc)
|
||||
goto fail;
|
||||
|
||||
|
@ -425,9 +425,9 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
if (rc)
|
||||
return EMULATE_DO_MMIO;
|
||||
|
||||
|
@ -193,9 +193,6 @@ struct kvm_vcpu_arch {
|
||||
|
||||
/* Don't run the VCPU (blocked) */
|
||||
bool pause;
|
||||
|
||||
/* SRCU lock index for in-kernel run loop */
|
||||
int srcu_idx;
|
||||
};
|
||||
|
||||
static inline void kvm_arch_hardware_unsetup(void) {}
|
||||
|
@ -38,14 +38,16 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||
sizeof(kvm_vcpu_stats_desc),
|
||||
};
|
||||
|
||||
#define KVM_RISCV_ISA_ALLOWED (riscv_isa_extension_mask(a) | \
|
||||
riscv_isa_extension_mask(c) | \
|
||||
riscv_isa_extension_mask(d) | \
|
||||
riscv_isa_extension_mask(f) | \
|
||||
riscv_isa_extension_mask(i) | \
|
||||
riscv_isa_extension_mask(m) | \
|
||||
riscv_isa_extension_mask(s) | \
|
||||
riscv_isa_extension_mask(u))
|
||||
#define KVM_RISCV_ISA_DISABLE_ALLOWED (riscv_isa_extension_mask(d) | \
|
||||
riscv_isa_extension_mask(f))
|
||||
|
||||
#define KVM_RISCV_ISA_DISABLE_NOT_ALLOWED (riscv_isa_extension_mask(a) | \
|
||||
riscv_isa_extension_mask(c) | \
|
||||
riscv_isa_extension_mask(i) | \
|
||||
riscv_isa_extension_mask(m))
|
||||
|
||||
#define KVM_RISCV_ISA_ALLOWED (KVM_RISCV_ISA_DISABLE_ALLOWED | \
|
||||
KVM_RISCV_ISA_DISABLE_NOT_ALLOWED)
|
||||
|
||||
static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -219,7 +221,8 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_CONFIG_REG(isa):
|
||||
if (!vcpu->arch.ran_atleast_once) {
|
||||
vcpu->arch.isa = reg_val;
|
||||
/* Ignore the disable request for these extensions */
|
||||
vcpu->arch.isa = reg_val | KVM_RISCV_ISA_DISABLE_NOT_ALLOWED;
|
||||
vcpu->arch.isa &= riscv_isa_extension_base(NULL);
|
||||
vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
|
||||
kvm_riscv_vcpu_fp_reset(vcpu);
|
||||
@ -724,13 +727,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
/* Mark this VCPU ran at least once */
|
||||
vcpu->arch.ran_atleast_once = true;
|
||||
|
||||
vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
|
||||
/* Process MMIO value returned from user-space */
|
||||
if (run->exit_reason == KVM_EXIT_MMIO) {
|
||||
ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
|
||||
if (ret) {
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -739,13 +742,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
if (run->exit_reason == KVM_EXIT_RISCV_SBI) {
|
||||
ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
|
||||
if (ret) {
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (run->immediate_exit) {
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
@ -784,7 +787,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
vcpu->mode = IN_GUEST_MODE;
|
||||
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
smp_mb__after_srcu_read_unlock();
|
||||
|
||||
/*
|
||||
@ -802,7 +805,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -846,7 +849,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
preempt_enable();
|
||||
|
||||
vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
|
||||
ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
|
||||
}
|
||||
@ -855,7 +858,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu_put(vcpu);
|
||||
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -456,9 +456,9 @@ static int stage2_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!kvm_arch_vcpu_runnable(vcpu)) {
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
kvm_vcpu_halt(vcpu);
|
||||
vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||
}
|
||||
}
|
||||
|
@ -1334,11 +1334,11 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
|
||||
hrtimer_start(&vcpu->arch.ckc_timer, sltime, HRTIMER_MODE_REL);
|
||||
VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
|
||||
no_timer:
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
kvm_vcpu_halt(vcpu);
|
||||
vcpu->valid_wakeup = false;
|
||||
__unset_cpu_idle(vcpu);
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
|
||||
hrtimer_cancel(&vcpu->arch.ckc_timer);
|
||||
return 0;
|
||||
|
@ -4237,14 +4237,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
* We try to hold kvm->srcu during most of vcpu_run (except when run-
|
||||
* ning the guest), so that memslots (and other stuff) are protected
|
||||
*/
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
|
||||
do {
|
||||
rc = vcpu_pre_run(vcpu);
|
||||
if (rc)
|
||||
break;
|
||||
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
/*
|
||||
* As PF_VCPU will be used in fault handler, between
|
||||
* guest_enter and guest_exit should be no uaccess.
|
||||
@ -4281,12 +4281,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
__enable_cpu_timer_accounting(vcpu);
|
||||
guest_exit_irqoff();
|
||||
local_irq_enable();
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
|
||||
rc = vcpu_post_run(vcpu, exit_reason);
|
||||
} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
|
||||
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -1091,7 +1091,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
|
||||
handle_last_fault(vcpu, vsie_page);
|
||||
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
|
||||
/* save current guest state of bp isolation override */
|
||||
guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
|
||||
@ -1133,7 +1133,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
if (!guest_bp_isolation)
|
||||
clear_thread_flag(TIF_ISOLATE_BP_GUEST);
|
||||
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
|
||||
if (rc == -EINTR) {
|
||||
VCPU_EVENT(vcpu, 3, "%s", "machine check");
|
||||
|
@ -118,6 +118,7 @@ KVM_X86_OP_OPTIONAL(mem_enc_register_region)
|
||||
KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
|
||||
KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
|
||||
KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
|
||||
KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
|
||||
KVM_X86_OP(get_msr_feature)
|
||||
KVM_X86_OP(can_emulate_instruction)
|
||||
KVM_X86_OP(apic_init_signal_blocked)
|
||||
|
@ -1484,6 +1484,7 @@ struct kvm_x86_ops {
|
||||
int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
|
||||
int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
|
||||
int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
|
||||
void (*guest_memory_reclaimed)(struct kvm *kvm);
|
||||
|
||||
int (*get_msr_feature)(struct kvm_msr_entry *entry);
|
||||
|
||||
|
@ -69,6 +69,7 @@ static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __align
|
||||
DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
|
||||
static int has_steal_clock = 0;
|
||||
|
||||
static int has_guest_poll = 0;
|
||||
/*
|
||||
* No need for any "IO delay" on KVM
|
||||
*/
|
||||
@ -706,14 +707,26 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
|
||||
|
||||
static int kvm_suspend(void)
|
||||
{
|
||||
u64 val = 0;
|
||||
|
||||
kvm_guest_cpu_offline(false);
|
||||
|
||||
#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
|
||||
if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
|
||||
rdmsrl(MSR_KVM_POLL_CONTROL, val);
|
||||
has_guest_poll = !(val & 1);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_resume(void)
|
||||
{
|
||||
kvm_cpu_online(raw_smp_processor_id());
|
||||
|
||||
#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
|
||||
if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll)
|
||||
wrmsrl(MSR_KVM_POLL_CONTROL, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
static struct syscore_ops kvm_syscore_ops = {
|
||||
|
@ -138,6 +138,15 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
|
||||
return sample_period;
|
||||
}
|
||||
|
||||
static inline void pmc_update_sample_period(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (!pmc->perf_event || pmc->is_paused)
|
||||
return;
|
||||
|
||||
perf_event_period(pmc->perf_event,
|
||||
get_sample_period(pmc, pmc->counter));
|
||||
}
|
||||
|
||||
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
|
||||
void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
|
||||
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
|
||||
|
@ -257,6 +257,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
|
||||
if (pmc) {
|
||||
pmc->counter += data - pmc_read_counter(pmc);
|
||||
pmc_update_sample_period(pmc);
|
||||
return 0;
|
||||
}
|
||||
/* MSR_EVNTSELn */
|
||||
|
@ -2226,51 +2226,47 @@ int sev_cpu_init(struct svm_cpu_data *sd)
|
||||
* Pages used by hardware to hold guest encrypted state must be flushed before
|
||||
* returning them to the system.
|
||||
*/
|
||||
static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
|
||||
unsigned long len)
|
||||
static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
|
||||
{
|
||||
int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
|
||||
|
||||
/*
|
||||
* If hardware enforced cache coherency for encrypted mappings of the
|
||||
* same physical page is supported, nothing to do.
|
||||
* Note! The address must be a kernel address, as regular page walk
|
||||
* checks are performed by VM_PAGE_FLUSH, i.e. operating on a user
|
||||
* address is non-deterministic and unsafe. This function deliberately
|
||||
* takes a pointer to deter passing in a user address.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
|
||||
unsigned long addr = (unsigned long)va;
|
||||
|
||||
/*
|
||||
* If CPU enforced cache coherency for encrypted mappings of the
|
||||
* same physical page is supported, use CLFLUSHOPT instead. NOTE: cache
|
||||
* flush is still needed in order to work properly with DMA devices.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) {
|
||||
clflush_cache_range(va, PAGE_SIZE);
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the VM Page Flush MSR is supported, use it to flush the page
|
||||
* (using the page virtual address and the guest ASID).
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
|
||||
struct kvm_sev_info *sev;
|
||||
unsigned long va_start;
|
||||
u64 start, stop;
|
||||
|
||||
/* Align start and stop to page boundaries. */
|
||||
va_start = (unsigned long)va;
|
||||
start = (u64)va_start & PAGE_MASK;
|
||||
stop = PAGE_ALIGN((u64)va_start + len);
|
||||
|
||||
if (start < stop) {
|
||||
sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
|
||||
|
||||
while (start < stop) {
|
||||
wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
|
||||
start | sev->asid);
|
||||
|
||||
start += PAGE_SIZE;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
WARN(1, "Address overflow, using WBINVD\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Hardware should always have one of the above features,
|
||||
* but if not, use WBINVD and issue a warning.
|
||||
* VM Page Flush takes a host virtual address and a guest ASID. Fall
|
||||
* back to WBINVD if this faults so as not to make any problems worse
|
||||
* by leaving stale encrypted data in the cache.
|
||||
*/
|
||||
WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
|
||||
if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid)))
|
||||
goto do_wbinvd;
|
||||
|
||||
return;
|
||||
|
||||
do_wbinvd:
|
||||
wbinvd_on_all_cpus();
|
||||
}
|
||||
|
||||
void sev_guest_memory_reclaimed(struct kvm *kvm)
|
||||
{
|
||||
if (!sev_guest(kvm))
|
||||
return;
|
||||
|
||||
wbinvd_on_all_cpus();
|
||||
}
|
||||
|
||||
@ -2284,7 +2280,8 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
|
||||
svm = to_svm(vcpu);
|
||||
|
||||
if (vcpu->arch.guest_state_protected)
|
||||
sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE);
|
||||
sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
|
||||
|
||||
__free_page(virt_to_page(svm->sev_es.vmsa));
|
||||
|
||||
if (svm->sev_es.ghcb_sa_free)
|
||||
|
@ -4620,6 +4620,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
|
||||
.mem_enc_ioctl = sev_mem_enc_ioctl,
|
||||
.mem_enc_register_region = sev_mem_enc_register_region,
|
||||
.mem_enc_unregister_region = sev_mem_enc_unregister_region,
|
||||
.guest_memory_reclaimed = sev_guest_memory_reclaimed,
|
||||
|
||||
.vm_copy_enc_context_from = sev_vm_copy_enc_context_from,
|
||||
.vm_move_enc_context_from = sev_vm_move_enc_context_from,
|
||||
|
@ -609,6 +609,8 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
|
||||
struct kvm_enc_region *range);
|
||||
int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd);
|
||||
int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd);
|
||||
void sev_guest_memory_reclaimed(struct kvm *kvm);
|
||||
|
||||
void pre_sev_run(struct vcpu_svm *svm, int cpu);
|
||||
void __init sev_set_cpu_caps(void);
|
||||
void __init sev_hardware_setup(void);
|
||||
|
@ -4618,6 +4618,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
|
||||
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
|
||||
}
|
||||
|
||||
if (vmx->nested.update_vmcs01_apicv_status) {
|
||||
vmx->nested.update_vmcs01_apicv_status = false;
|
||||
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
|
||||
}
|
||||
|
||||
if ((vm_exit_reason != -1) &&
|
||||
(enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
|
||||
vmx->nested.need_vmcs12_to_shadow_sync = true;
|
||||
|
@ -431,15 +431,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
!(msr & MSR_PMC_FULL_WIDTH_BIT))
|
||||
data = (s64)(s32)data;
|
||||
pmc->counter += data - pmc_read_counter(pmc);
|
||||
if (pmc->perf_event && !pmc->is_paused)
|
||||
perf_event_period(pmc->perf_event,
|
||||
get_sample_period(pmc, data));
|
||||
pmc_update_sample_period(pmc);
|
||||
return 0;
|
||||
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
|
||||
pmc->counter += data - pmc_read_counter(pmc);
|
||||
if (pmc->perf_event && !pmc->is_paused)
|
||||
perf_event_period(pmc->perf_event,
|
||||
get_sample_period(pmc, data));
|
||||
pmc_update_sample_period(pmc);
|
||||
return 0;
|
||||
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
|
||||
if (data == pmc->eventsel)
|
||||
|
@ -4174,6 +4174,11 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
if (is_guest_mode(vcpu)) {
|
||||
vmx->nested.update_vmcs01_apicv_status = true;
|
||||
return;
|
||||
}
|
||||
|
||||
pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
|
||||
if (cpu_has_secondary_exec_ctrls()) {
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
|
@ -183,6 +183,7 @@ struct nested_vmx {
|
||||
bool change_vmcs01_virtual_apic_mode;
|
||||
bool reload_vmcs01_apic_access_page;
|
||||
bool update_vmcs01_cpu_dirty_logging;
|
||||
bool update_vmcs01_apicv_status;
|
||||
|
||||
/*
|
||||
* Enlightened VMCS has been enabled. It does not mean that L1 has to
|
||||
|
@ -9111,7 +9111,7 @@ static void kvm_apicv_init(struct kvm *kvm)
|
||||
|
||||
if (!enable_apicv)
|
||||
set_or_clear_apicv_inhibit(inhibits,
|
||||
APICV_INHIBIT_REASON_ABSENT, true);
|
||||
APICV_INHIBIT_REASON_DISABLE, true);
|
||||
}
|
||||
|
||||
static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
|
||||
@ -9889,6 +9889,11 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
|
||||
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
|
||||
}
|
||||
|
||||
void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
|
||||
{
|
||||
static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
|
||||
}
|
||||
|
||||
static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!lapic_in_kernel(vcpu))
|
||||
@ -10097,7 +10102,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
/* Store vcpu->apicv_active before vcpu->mode. */
|
||||
smp_store_release(&vcpu->mode, IN_GUEST_MODE);
|
||||
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
|
||||
/*
|
||||
* 1) We should set ->mode before checking ->requests. Please see
|
||||
@ -10128,7 +10133,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
smp_wmb();
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
r = 1;
|
||||
goto cancel_injection;
|
||||
}
|
||||
@ -10254,7 +10259,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
|
||||
/*
|
||||
* Profile KVM exit RIPs:
|
||||
@ -10284,7 +10289,7 @@ out:
|
||||
}
|
||||
|
||||
/* Called within kvm->srcu read side. */
|
||||
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
|
||||
static inline int vcpu_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
bool hv_timer;
|
||||
|
||||
@ -10300,12 +10305,12 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
|
||||
if (hv_timer)
|
||||
kvm_lapic_switch_to_sw_timer(vcpu);
|
||||
|
||||
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
|
||||
kvm_vcpu_halt(vcpu);
|
||||
else
|
||||
kvm_vcpu_block(vcpu);
|
||||
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
|
||||
if (hv_timer)
|
||||
kvm_lapic_switch_to_hv_timer(vcpu);
|
||||
@ -10347,7 +10352,6 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
|
||||
static int vcpu_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
||||
vcpu->arch.l1tf_flush_l1d = true;
|
||||
|
||||
@ -10355,7 +10359,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
|
||||
if (kvm_vcpu_running(vcpu)) {
|
||||
r = vcpu_enter_guest(vcpu);
|
||||
} else {
|
||||
r = vcpu_block(kvm, vcpu);
|
||||
r = vcpu_block(vcpu);
|
||||
}
|
||||
|
||||
if (r <= 0)
|
||||
@ -10374,9 +10378,9 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
if (__xfer_to_guest_mode_work_pending()) {
|
||||
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
r = xfer_to_guest_mode_handle_work(vcpu);
|
||||
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
@ -10387,12 +10391,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
return r;
|
||||
return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
|
||||
}
|
||||
|
||||
static int complete_emulated_pio(struct kvm_vcpu *vcpu)
|
||||
@ -10484,7 +10483,6 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
|
||||
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_run *kvm_run = vcpu->run;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
int r;
|
||||
|
||||
vcpu_load(vcpu);
|
||||
@ -10492,7 +10490,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
kvm_run->flags = 0;
|
||||
kvm_load_guest_fpu(vcpu);
|
||||
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
|
||||
if (kvm_run->immediate_exit) {
|
||||
r = -EINTR;
|
||||
@ -10504,9 +10502,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
|
||||
|
||||
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
kvm_vcpu_block(vcpu);
|
||||
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
|
||||
if (kvm_apic_accept_events(vcpu) < 0) {
|
||||
r = 0;
|
||||
@ -10567,7 +10565,7 @@ out:
|
||||
if (kvm_run->kvm_valid_regs)
|
||||
store_regs(vcpu);
|
||||
post_kvm_run_save(vcpu);
|
||||
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
|
||||
kvm_sigset_deactivate(vcpu);
|
||||
vcpu_put(vcpu);
|
||||
@ -10985,6 +10983,9 @@ static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
|
||||
struct kvm_vcpu *vcpu;
|
||||
unsigned long i;
|
||||
|
||||
if (!enable_apicv)
|
||||
return;
|
||||
|
||||
down_write(&kvm->arch.apicv_update_lock);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
@ -11196,8 +11197,21 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
|
||||
if (r < 0)
|
||||
goto fail_mmu_destroy;
|
||||
if (kvm_apicv_activated(vcpu->kvm))
|
||||
|
||||
/*
|
||||
* Defer evaluating inhibits until the vCPU is first run, as
|
||||
* this vCPU will not get notified of any changes until this
|
||||
* vCPU is visible to other vCPUs (marked online and added to
|
||||
* the set of vCPUs). Opportunistically mark APICv active as
|
||||
* VMX in particularly is highly unlikely to have inhibits.
|
||||
* Ignore the current per-VM APICv state so that vCPU creation
|
||||
* is guaranteed to run with a deterministic value, the request
|
||||
* will ensure the vCPU gets the correct state before VM-Entry.
|
||||
*/
|
||||
if (enable_apicv) {
|
||||
vcpu->arch.apicv_active = true;
|
||||
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
|
||||
}
|
||||
} else
|
||||
static_branch_inc(&kvm_has_noapic_vcpu);
|
||||
|
||||
|
@ -315,7 +315,10 @@ struct kvm_vcpu {
|
||||
int cpu;
|
||||
int vcpu_id; /* id given by userspace at creation */
|
||||
int vcpu_idx; /* index in kvm->vcpus array */
|
||||
int srcu_idx;
|
||||
int ____srcu_idx; /* Don't use this directly. You've been warned. */
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
int srcu_depth;
|
||||
#endif
|
||||
int mode;
|
||||
u64 requests;
|
||||
unsigned long guest_debug;
|
||||
@ -840,6 +843,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm)
|
||||
unlikely(__ret); \
|
||||
})
|
||||
|
||||
static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
WARN_ONCE(vcpu->srcu_depth++,
|
||||
"KVM: Illegal vCPU srcu_idx LOCK, depth=%d", vcpu->srcu_depth - 1);
|
||||
#endif
|
||||
vcpu->____srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
}
|
||||
|
||||
static inline void kvm_vcpu_srcu_read_unlock(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->____srcu_idx);
|
||||
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
WARN_ONCE(--vcpu->srcu_depth,
|
||||
"KVM: Illegal vCPU srcu_idx UNLOCK, depth=%d", vcpu->srcu_depth);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
|
||||
{
|
||||
return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
|
||||
@ -2197,6 +2219,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
|
||||
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
|
||||
unsigned long start, unsigned long end);
|
||||
|
||||
void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
|
||||
int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
|
||||
#else
|
||||
|
@ -60,6 +60,23 @@
|
||||
/* CPUID.0x8000_0001.EDX */
|
||||
#define CPUID_GBPAGES (1ul << 26)
|
||||
|
||||
/* Page table bitfield declarations */
|
||||
#define PTE_PRESENT_MASK BIT_ULL(0)
|
||||
#define PTE_WRITABLE_MASK BIT_ULL(1)
|
||||
#define PTE_USER_MASK BIT_ULL(2)
|
||||
#define PTE_ACCESSED_MASK BIT_ULL(5)
|
||||
#define PTE_DIRTY_MASK BIT_ULL(6)
|
||||
#define PTE_LARGE_MASK BIT_ULL(7)
|
||||
#define PTE_GLOBAL_MASK BIT_ULL(8)
|
||||
#define PTE_NX_MASK BIT_ULL(63)
|
||||
|
||||
#define PAGE_SHIFT 12
|
||||
#define PAGE_SIZE (1ULL << PAGE_SHIFT)
|
||||
#define PAGE_MASK (~(PAGE_SIZE-1))
|
||||
|
||||
#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
|
||||
#define PTE_GET_PFN(pte) (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
|
||||
|
||||
/* General Registers in 64-Bit Mode */
|
||||
struct gpr64_regs {
|
||||
u64 rax;
|
||||
|
@ -278,7 +278,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
|
||||
else
|
||||
guest_test_phys_mem = p->phys_offset;
|
||||
#ifdef __s390x__
|
||||
alignment = max(0x100000, alignment);
|
||||
alignment = max(0x100000UL, alignment);
|
||||
#endif
|
||||
guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
|
||||
|
||||
|
@ -19,38 +19,6 @@
|
||||
|
||||
vm_vaddr_t exception_handlers;
|
||||
|
||||
/* Virtual translation table structure declarations */
|
||||
struct pageUpperEntry {
|
||||
uint64_t present:1;
|
||||
uint64_t writable:1;
|
||||
uint64_t user:1;
|
||||
uint64_t write_through:1;
|
||||
uint64_t cache_disable:1;
|
||||
uint64_t accessed:1;
|
||||
uint64_t ignored_06:1;
|
||||
uint64_t page_size:1;
|
||||
uint64_t ignored_11_08:4;
|
||||
uint64_t pfn:40;
|
||||
uint64_t ignored_62_52:11;
|
||||
uint64_t execute_disable:1;
|
||||
};
|
||||
|
||||
struct pageTableEntry {
|
||||
uint64_t present:1;
|
||||
uint64_t writable:1;
|
||||
uint64_t user:1;
|
||||
uint64_t write_through:1;
|
||||
uint64_t cache_disable:1;
|
||||
uint64_t accessed:1;
|
||||
uint64_t dirty:1;
|
||||
uint64_t reserved_07:1;
|
||||
uint64_t global:1;
|
||||
uint64_t ignored_11_09:3;
|
||||
uint64_t pfn:40;
|
||||
uint64_t ignored_62_52:11;
|
||||
uint64_t execute_disable:1;
|
||||
};
|
||||
|
||||
void regs_dump(FILE *stream, struct kvm_regs *regs,
|
||||
uint8_t indent)
|
||||
{
|
||||
@ -195,23 +163,21 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
|
||||
return &page_table[index];
|
||||
}
|
||||
|
||||
static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
|
||||
uint64_t pt_pfn,
|
||||
uint64_t vaddr,
|
||||
uint64_t paddr,
|
||||
int level,
|
||||
enum x86_page_size page_size)
|
||||
static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
|
||||
uint64_t pt_pfn,
|
||||
uint64_t vaddr,
|
||||
uint64_t paddr,
|
||||
int level,
|
||||
enum x86_page_size page_size)
|
||||
{
|
||||
struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
|
||||
uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
|
||||
|
||||
if (!pte->present) {
|
||||
pte->writable = true;
|
||||
pte->present = true;
|
||||
pte->page_size = (level == page_size);
|
||||
if (pte->page_size)
|
||||
pte->pfn = paddr >> vm->page_shift;
|
||||
if (!(*pte & PTE_PRESENT_MASK)) {
|
||||
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
|
||||
if (level == page_size)
|
||||
*pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
|
||||
else
|
||||
pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
|
||||
*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
|
||||
} else {
|
||||
/*
|
||||
* Entry already present. Assert that the caller doesn't want
|
||||
@ -221,7 +187,7 @@ static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
|
||||
TEST_ASSERT(level != page_size,
|
||||
"Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
|
||||
page_size, vaddr);
|
||||
TEST_ASSERT(!pte->page_size,
|
||||
TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
|
||||
"Cannot create page table at level: %u, vaddr: 0x%lx\n",
|
||||
level, vaddr);
|
||||
}
|
||||
@ -232,8 +198,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
enum x86_page_size page_size)
|
||||
{
|
||||
const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
|
||||
struct pageUpperEntry *pml4e, *pdpe, *pde;
|
||||
struct pageTableEntry *pte;
|
||||
uint64_t *pml4e, *pdpe, *pde;
|
||||
uint64_t *pte;
|
||||
|
||||
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
|
||||
"Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
|
||||
@ -257,24 +223,22 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
*/
|
||||
pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
|
||||
vaddr, paddr, 3, page_size);
|
||||
if (pml4e->page_size)
|
||||
if (*pml4e & PTE_LARGE_MASK)
|
||||
return;
|
||||
|
||||
pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
|
||||
if (pdpe->page_size)
|
||||
pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
|
||||
if (*pdpe & PTE_LARGE_MASK)
|
||||
return;
|
||||
|
||||
pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
|
||||
if (pde->page_size)
|
||||
pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
|
||||
if (*pde & PTE_LARGE_MASK)
|
||||
return;
|
||||
|
||||
/* Fill in page table entry. */
|
||||
pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
|
||||
TEST_ASSERT(!pte->present,
|
||||
pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
|
||||
TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
|
||||
"PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
|
||||
pte->pfn = paddr >> vm->page_shift;
|
||||
pte->writable = true;
|
||||
pte->present = 1;
|
||||
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
|
||||
}
|
||||
|
||||
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
|
||||
@ -282,22 +246,22 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
|
||||
__virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
|
||||
}
|
||||
|
||||
static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
|
||||
static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
|
||||
uint64_t vaddr)
|
||||
{
|
||||
uint16_t index[4];
|
||||
struct pageUpperEntry *pml4e, *pdpe, *pde;
|
||||
struct pageTableEntry *pte;
|
||||
uint64_t *pml4e, *pdpe, *pde;
|
||||
uint64_t *pte;
|
||||
struct kvm_cpuid_entry2 *entry;
|
||||
struct kvm_sregs sregs;
|
||||
int max_phy_addr;
|
||||
/* Set the bottom 52 bits. */
|
||||
uint64_t rsvd_mask = 0x000fffffffffffff;
|
||||
uint64_t rsvd_mask = 0;
|
||||
|
||||
entry = kvm_get_supported_cpuid_index(0x80000008, 0);
|
||||
max_phy_addr = entry->eax & 0x000000ff;
|
||||
/* Clear the bottom bits of the reserved mask. */
|
||||
rsvd_mask = (rsvd_mask >> max_phy_addr) << max_phy_addr;
|
||||
/* Set the high bits in the reserved mask. */
|
||||
if (max_phy_addr < 52)
|
||||
rsvd_mask = GENMASK_ULL(51, max_phy_addr);
|
||||
|
||||
/*
|
||||
* SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
|
||||
@ -307,7 +271,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
|
||||
*/
|
||||
vcpu_sregs_get(vm, vcpuid, &sregs);
|
||||
if ((sregs.efer & EFER_NX) == 0) {
|
||||
rsvd_mask |= (1ull << 63);
|
||||
rsvd_mask |= PTE_NX_MASK;
|
||||
}
|
||||
|
||||
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
|
||||
@ -329,30 +293,29 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
|
||||
index[3] = (vaddr >> 39) & 0x1ffu;
|
||||
|
||||
pml4e = addr_gpa2hva(vm, vm->pgd);
|
||||
TEST_ASSERT(pml4e[index[3]].present,
|
||||
TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
|
||||
"Expected pml4e to be present for gva: 0x%08lx", vaddr);
|
||||
TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
|
||||
(rsvd_mask | (1ull << 7))) == 0,
|
||||
TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0,
|
||||
"Unexpected reserved bits set.");
|
||||
|
||||
pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
|
||||
TEST_ASSERT(pdpe[index[2]].present,
|
||||
pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
|
||||
TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
|
||||
"Expected pdpe to be present for gva: 0x%08lx", vaddr);
|
||||
TEST_ASSERT(pdpe[index[2]].page_size == 0,
|
||||
TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
|
||||
"Expected pdpe to map a pde not a 1-GByte page.");
|
||||
TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
|
||||
TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0,
|
||||
"Unexpected reserved bits set.");
|
||||
|
||||
pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
|
||||
TEST_ASSERT(pde[index[1]].present,
|
||||
pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
|
||||
TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
|
||||
"Expected pde to be present for gva: 0x%08lx", vaddr);
|
||||
TEST_ASSERT(pde[index[1]].page_size == 0,
|
||||
TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
|
||||
"Expected pde to map a pte not a 2-MByte page.");
|
||||
TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
|
||||
TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0,
|
||||
"Unexpected reserved bits set.");
|
||||
|
||||
pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
|
||||
TEST_ASSERT(pte[index[0]].present,
|
||||
pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
|
||||
TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
|
||||
"Expected pte to be present for gva: 0x%08lx", vaddr);
|
||||
|
||||
return &pte[index[0]];
|
||||
@ -360,7 +323,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
|
||||
|
||||
uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
|
||||
{
|
||||
struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
|
||||
uint64_t *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
|
||||
|
||||
return *(uint64_t *)pte;
|
||||
}
|
||||
@ -368,18 +331,17 @@ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
|
||||
void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
|
||||
uint64_t pte)
|
||||
{
|
||||
struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
|
||||
vaddr);
|
||||
uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
|
||||
|
||||
*(uint64_t *)new_pte = pte;
|
||||
}
|
||||
|
||||
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
|
||||
{
|
||||
struct pageUpperEntry *pml4e, *pml4e_start;
|
||||
struct pageUpperEntry *pdpe, *pdpe_start;
|
||||
struct pageUpperEntry *pde, *pde_start;
|
||||
struct pageTableEntry *pte, *pte_start;
|
||||
uint64_t *pml4e, *pml4e_start;
|
||||
uint64_t *pdpe, *pdpe_start;
|
||||
uint64_t *pde, *pde_start;
|
||||
uint64_t *pte, *pte_start;
|
||||
|
||||
if (!vm->pgd_created)
|
||||
return;
|
||||
@ -389,58 +351,58 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
|
||||
fprintf(stream, "%*s index hvaddr gpaddr "
|
||||
"addr w exec dirty\n",
|
||||
indent, "");
|
||||
pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
|
||||
pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
|
||||
for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
|
||||
pml4e = &pml4e_start[n1];
|
||||
if (!pml4e->present)
|
||||
if (!(*pml4e & PTE_PRESENT_MASK))
|
||||
continue;
|
||||
fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
|
||||
fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
|
||||
" %u\n",
|
||||
indent, "",
|
||||
pml4e - pml4e_start, pml4e,
|
||||
addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
|
||||
pml4e->writable, pml4e->execute_disable);
|
||||
addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
|
||||
!!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
|
||||
|
||||
pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
|
||||
pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
|
||||
for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
|
||||
pdpe = &pdpe_start[n2];
|
||||
if (!pdpe->present)
|
||||
if (!(*pdpe & PTE_PRESENT_MASK))
|
||||
continue;
|
||||
fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
|
||||
fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
|
||||
"%u %u\n",
|
||||
indent, "",
|
||||
pdpe - pdpe_start, pdpe,
|
||||
addr_hva2gpa(vm, pdpe),
|
||||
(uint64_t) pdpe->pfn, pdpe->writable,
|
||||
pdpe->execute_disable);
|
||||
PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
|
||||
!!(*pdpe & PTE_NX_MASK));
|
||||
|
||||
pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
|
||||
pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
|
||||
for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
|
||||
pde = &pde_start[n3];
|
||||
if (!pde->present)
|
||||
if (!(*pde & PTE_PRESENT_MASK))
|
||||
continue;
|
||||
fprintf(stream, "%*spde 0x%-3zx %p "
|
||||
"0x%-12lx 0x%-10lx %u %u\n",
|
||||
"0x%-12lx 0x%-10llx %u %u\n",
|
||||
indent, "", pde - pde_start, pde,
|
||||
addr_hva2gpa(vm, pde),
|
||||
(uint64_t) pde->pfn, pde->writable,
|
||||
pde->execute_disable);
|
||||
PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
|
||||
!!(*pde & PTE_NX_MASK));
|
||||
|
||||
pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
|
||||
pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
|
||||
for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
|
||||
pte = &pte_start[n4];
|
||||
if (!pte->present)
|
||||
if (!(*pte & PTE_PRESENT_MASK))
|
||||
continue;
|
||||
fprintf(stream, "%*spte 0x%-3zx %p "
|
||||
"0x%-12lx 0x%-10lx %u %u "
|
||||
"0x%-12lx 0x%-10llx %u %u "
|
||||
" %u 0x%-10lx\n",
|
||||
indent, "",
|
||||
pte - pte_start, pte,
|
||||
addr_hva2gpa(vm, pte),
|
||||
(uint64_t) pte->pfn,
|
||||
pte->writable,
|
||||
pte->execute_disable,
|
||||
pte->dirty,
|
||||
PTE_GET_PFN(*pte),
|
||||
!!(*pte & PTE_WRITABLE_MASK),
|
||||
!!(*pte & PTE_NX_MASK),
|
||||
!!(*pte & PTE_DIRTY_MASK),
|
||||
((uint64_t) n1 << 27)
|
||||
| ((uint64_t) n2 << 18)
|
||||
| ((uint64_t) n3 << 9)
|
||||
@ -558,8 +520,8 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
|
||||
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
{
|
||||
uint16_t index[4];
|
||||
struct pageUpperEntry *pml4e, *pdpe, *pde;
|
||||
struct pageTableEntry *pte;
|
||||
uint64_t *pml4e, *pdpe, *pde;
|
||||
uint64_t *pte;
|
||||
|
||||
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
|
||||
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
|
||||
@ -572,22 +534,22 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
if (!vm->pgd_created)
|
||||
goto unmapped_gva;
|
||||
pml4e = addr_gpa2hva(vm, vm->pgd);
|
||||
if (!pml4e[index[3]].present)
|
||||
if (!(pml4e[index[3]] & PTE_PRESENT_MASK))
|
||||
goto unmapped_gva;
|
||||
|
||||
pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
|
||||
if (!pdpe[index[2]].present)
|
||||
pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
|
||||
if (!(pdpe[index[2]] & PTE_PRESENT_MASK))
|
||||
goto unmapped_gva;
|
||||
|
||||
pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
|
||||
if (!pde[index[1]].present)
|
||||
pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
|
||||
if (!(pde[index[1]] & PTE_PRESENT_MASK))
|
||||
goto unmapped_gva;
|
||||
|
||||
pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
|
||||
if (!pte[index[0]].present)
|
||||
pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
|
||||
if (!(pte[index[0]] & PTE_PRESENT_MASK))
|
||||
goto unmapped_gva;
|
||||
|
||||
return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
|
||||
return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK);
|
||||
|
||||
unmapped_gva:
|
||||
TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
|
||||
|
@ -29,7 +29,6 @@
|
||||
#define X86_FEATURE_XSAVE (1 << 26)
|
||||
#define X86_FEATURE_OSXSAVE (1 << 27)
|
||||
|
||||
#define PAGE_SIZE (1 << 12)
|
||||
#define NUM_TILES 8
|
||||
#define TILE_SIZE 1024
|
||||
#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include "vmx.h"
|
||||
|
||||
#define VCPU_ID 1
|
||||
#define PAGE_SIZE 4096
|
||||
#define MAXPHYADDR 36
|
||||
|
||||
#define MEM_REGION_GVA 0x0000123456789000
|
||||
|
@ -21,8 +21,6 @@
|
||||
|
||||
#define VCPU_ID 1
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
#define SMRAM_SIZE 65536
|
||||
#define SMRAM_MEMSLOT ((1 << 16) | 1)
|
||||
#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
|
||||
|
@ -32,7 +32,6 @@
|
||||
#define MSR_IA32_TSC_ADJUST 0x3b
|
||||
#endif
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
#define VCPU_ID 5
|
||||
|
||||
#define TSC_ADJUST_VALUE (1ll << 32)
|
||||
|
@ -23,7 +23,6 @@
|
||||
#define SHINFO_REGION_GVA 0xc0000000ULL
|
||||
#define SHINFO_REGION_GPA 0xc0000000ULL
|
||||
#define SHINFO_REGION_SLOT 10
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE))
|
||||
#define DUMMY_REGION_SLOT 11
|
||||
|
@ -15,7 +15,6 @@
|
||||
|
||||
#define HCALL_REGION_GPA 0xc0000000ULL
|
||||
#define HCALL_REGION_SLOT 10
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
static struct kvm_vm *vm;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* KVM dirty ring implementation
|
||||
*
|
||||
|
@ -164,6 +164,10 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
|
||||
{
|
||||
}
|
||||
|
||||
__weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
|
||||
{
|
||||
}
|
||||
|
||||
bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
|
||||
{
|
||||
/*
|
||||
@ -357,6 +361,12 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
|
||||
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
|
||||
#endif
|
||||
|
||||
static void kvm_flush_shadow_all(struct kvm *kvm)
|
||||
{
|
||||
kvm_arch_flush_shadow_all(kvm);
|
||||
kvm_arch_guest_memory_reclaimed(kvm);
|
||||
}
|
||||
|
||||
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
|
||||
static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
|
||||
gfp_t gfp_flags)
|
||||
@ -485,12 +495,15 @@ typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||
typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
|
||||
unsigned long end);
|
||||
|
||||
typedef void (*on_unlock_fn_t)(struct kvm *kvm);
|
||||
|
||||
struct kvm_hva_range {
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
pte_t pte;
|
||||
hva_handler_t handler;
|
||||
on_lock_fn_t on_lock;
|
||||
on_unlock_fn_t on_unlock;
|
||||
bool flush_on_ret;
|
||||
bool may_block;
|
||||
};
|
||||
@ -578,8 +591,11 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
|
||||
if (range->flush_on_ret && ret)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
if (locked)
|
||||
if (locked) {
|
||||
KVM_MMU_UNLOCK(kvm);
|
||||
if (!IS_KVM_NULL_FN(range->on_unlock))
|
||||
range->on_unlock(kvm);
|
||||
}
|
||||
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
@ -600,6 +616,7 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
|
||||
.pte = pte,
|
||||
.handler = handler,
|
||||
.on_lock = (void *)kvm_null_fn,
|
||||
.on_unlock = (void *)kvm_null_fn,
|
||||
.flush_on_ret = true,
|
||||
.may_block = false,
|
||||
};
|
||||
@ -619,6 +636,7 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
|
||||
.pte = __pte(0),
|
||||
.handler = handler,
|
||||
.on_lock = (void *)kvm_null_fn,
|
||||
.on_unlock = (void *)kvm_null_fn,
|
||||
.flush_on_ret = false,
|
||||
.may_block = false,
|
||||
};
|
||||
@ -662,7 +680,7 @@ void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
|
||||
kvm->mmu_notifier_range_end = end;
|
||||
} else {
|
||||
/*
|
||||
* Fully tracking multiple concurrent ranges has dimishing
|
||||
* Fully tracking multiple concurrent ranges has diminishing
|
||||
* returns. Keep things simple and just find the minimal range
|
||||
* which includes the current and new ranges. As there won't be
|
||||
* enough information to subtract a range after its invalidate
|
||||
@ -687,6 +705,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
||||
.pte = __pte(0),
|
||||
.handler = kvm_unmap_gfn_range,
|
||||
.on_lock = kvm_inc_notifier_count,
|
||||
.on_unlock = kvm_arch_guest_memory_reclaimed,
|
||||
.flush_on_ret = true,
|
||||
.may_block = mmu_notifier_range_blockable(range),
|
||||
};
|
||||
@ -741,6 +760,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
|
||||
.pte = __pte(0),
|
||||
.handler = (void *)kvm_null_fn,
|
||||
.on_lock = kvm_dec_notifier_count,
|
||||
.on_unlock = (void *)kvm_null_fn,
|
||||
.flush_on_ret = false,
|
||||
.may_block = mmu_notifier_range_blockable(range),
|
||||
};
|
||||
@ -813,7 +833,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
|
||||
int idx;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
kvm_arch_flush_shadow_all(kvm);
|
||||
kvm_flush_shadow_all(kvm);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
}
|
||||
|
||||
@ -955,12 +975,6 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
|
||||
int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
|
||||
kvm_vcpu_stats_header.num_desc;
|
||||
|
||||
/*
|
||||
* Force subsequent debugfs file creations to fail if the VM directory
|
||||
* is not created.
|
||||
*/
|
||||
kvm->debugfs_dentry = ERR_PTR(-ENOENT);
|
||||
|
||||
if (!debugfs_initialized())
|
||||
return 0;
|
||||
|
||||
@ -1081,6 +1095,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
||||
|
||||
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
|
||||
|
||||
/*
|
||||
* Force subsequent debugfs file creations to fail if the VM directory
|
||||
* is not created (by kvm_create_vm_debugfs()).
|
||||
*/
|
||||
kvm->debugfs_dentry = ERR_PTR(-ENOENT);
|
||||
|
||||
if (init_srcu_struct(&kvm->srcu))
|
||||
goto out_err_no_srcu;
|
||||
if (init_srcu_struct(&kvm->irq_srcu))
|
||||
@ -1225,7 +1245,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
||||
WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait));
|
||||
kvm->mn_active_invalidate_count = 0;
|
||||
#else
|
||||
kvm_arch_flush_shadow_all(kvm);
|
||||
kvm_flush_shadow_all(kvm);
|
||||
#endif
|
||||
kvm_arch_destroy_vm(kvm);
|
||||
kvm_destroy_devices(kvm);
|
||||
@ -1652,6 +1672,7 @@ static void kvm_invalidate_memslot(struct kvm *kvm,
|
||||
* - kvm_is_visible_gfn (mmu_check_root)
|
||||
*/
|
||||
kvm_arch_flush_shadow_memslot(kvm, old);
|
||||
kvm_arch_guest_memory_reclaimed(kvm);
|
||||
|
||||
/* Was released by kvm_swap_active_memslots, reacquire. */
|
||||
mutex_lock(&kvm->slots_arch_lock);
|
||||
@ -1799,7 +1820,7 @@ static int kvm_set_memslot(struct kvm *kvm,
|
||||
|
||||
/*
|
||||
* No need to refresh new->arch, changes after dropping slots_arch_lock
|
||||
* will directly hit the final, active memsot. Architectures are
|
||||
* will directly hit the final, active memslot. Architectures are
|
||||
* responsible for knowing that new->arch may be stale.
|
||||
*/
|
||||
kvm_commit_memory_region(kvm, old, new, change);
|
||||
|
@ -1,4 +1,4 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
|
||||
#ifndef __KVM_MM_H__
|
||||
#define __KVM_MM_H__ 1
|
||||
|
Loading…
Reference in New Issue
Block a user