Merge remote-tracking branch 'kvm/master' into HEAD
Pick commit fdba608f15
("KVM: VMX: Wake vCPU when delivering posted
IRQ even if vCPU == this vCPU"). In addition to fixing a bug, it
also aligns the non-nested and nested usage of triggering posted
interrupts, allowing for additional cleanups.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
commit
855fb0384a
@ -2413,8 +2413,12 @@
|
||||
Default is 1 (enabled)
|
||||
|
||||
kvm-intel.emulate_invalid_guest_state=
|
||||
[KVM,Intel] Enable emulation of invalid guest states
|
||||
Default is 0 (disabled)
|
||||
[KVM,Intel] Disable emulation of invalid guest state.
|
||||
Ignored if kvm-intel.enable_unrestricted_guest=1, as
|
||||
guest state is never invalid for unrestricted guests.
|
||||
This param doesn't apply to nested guests (L2), as KVM
|
||||
never emulates invalid L2 guest state.
|
||||
Default is 1 (enabled)
|
||||
|
||||
kvm-intel.flexpriority=
|
||||
[KVM,Intel] Disable FlexPriority feature (TPR shadow).
|
||||
|
@ -47,6 +47,7 @@ KVM_X86_OP(set_dr7)
|
||||
KVM_X86_OP(cache_reg)
|
||||
KVM_X86_OP(get_rflags)
|
||||
KVM_X86_OP(set_rflags)
|
||||
KVM_X86_OP(get_if_flag)
|
||||
KVM_X86_OP(tlb_flush_all)
|
||||
KVM_X86_OP(tlb_flush_current)
|
||||
KVM_X86_OP_NULL(tlb_remote_flush)
|
||||
|
@ -97,7 +97,7 @@
|
||||
KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
|
||||
#define KVM_REQ_TLB_FLUSH_GUEST \
|
||||
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
|
||||
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
|
||||
#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
|
||||
#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
|
||||
@ -1354,6 +1354,7 @@ struct kvm_x86_ops {
|
||||
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
|
||||
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
|
||||
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
|
||||
bool (*get_if_flag)(struct kvm_vcpu *vcpu);
|
||||
|
||||
void (*tlb_flush_all)(struct kvm_vcpu *vcpu);
|
||||
void (*tlb_flush_current)(struct kvm_vcpu *vcpu);
|
||||
|
@ -1923,11 +1923,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
|
||||
|
||||
all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
|
||||
|
||||
if (all_cpus)
|
||||
goto check_and_send_ipi;
|
||||
|
||||
if (!sparse_banks_len)
|
||||
goto ret_success;
|
||||
|
||||
if (!all_cpus &&
|
||||
kvm_read_guest(kvm,
|
||||
if (kvm_read_guest(kvm,
|
||||
hc->ingpa + offsetof(struct hv_send_ipi_ex,
|
||||
vp_set.bank_contents),
|
||||
sparse_banks,
|
||||
@ -1935,6 +1937,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
}
|
||||
|
||||
check_and_send_ipi:
|
||||
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
|
||||
|
@ -3971,7 +3971,21 @@ out_retry:
|
||||
static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault, int mmu_seq)
|
||||
{
|
||||
if (is_obsolete_sp(vcpu->kvm, to_shadow_page(vcpu->arch.mmu->root_hpa)))
|
||||
struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root_hpa);
|
||||
|
||||
/* Special roots, e.g. pae_root, are not backed by shadow pages. */
|
||||
if (sp && is_obsolete_sp(vcpu->kvm, sp))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Roots without an associated shadow page are considered invalid if
|
||||
* there is a pending request to free obsolete roots. The request is
|
||||
* only a hint that the current root _may_ be obsolete and needs to be
|
||||
* reloaded, e.g. if the guest frees a PGD that KVM is tracking as a
|
||||
* previous root, then __kvm_mmu_prepare_zap_page() signals all vCPUs
|
||||
* to reload even if no vCPU is actively using the root.
|
||||
*/
|
||||
if (!sp && kvm_test_request(KVM_REQ_MMU_RELOAD, vcpu))
|
||||
return true;
|
||||
|
||||
return fault->slot &&
|
||||
|
@ -26,6 +26,7 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
|
||||
*/
|
||||
void tdp_iter_restart(struct tdp_iter *iter)
|
||||
{
|
||||
iter->yielded = false;
|
||||
iter->yielded_gfn = iter->next_last_level_gfn;
|
||||
iter->level = iter->root_level;
|
||||
|
||||
@ -160,6 +161,11 @@ static bool try_step_up(struct tdp_iter *iter)
|
||||
*/
|
||||
void tdp_iter_next(struct tdp_iter *iter)
|
||||
{
|
||||
if (iter->yielded) {
|
||||
tdp_iter_restart(iter);
|
||||
return;
|
||||
}
|
||||
|
||||
if (try_step_down(iter))
|
||||
return;
|
||||
|
||||
|
@ -45,6 +45,12 @@ struct tdp_iter {
|
||||
* iterator walks off the end of the paging structure.
|
||||
*/
|
||||
bool valid;
|
||||
/*
|
||||
* True if KVM dropped mmu_lock and yielded in the middle of a walk, in
|
||||
* which case tdp_iter_next() needs to restart the walk at the root
|
||||
* level instead of advancing to the next entry.
|
||||
*/
|
||||
bool yielded;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -502,6 +502,8 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
|
||||
struct tdp_iter *iter,
|
||||
u64 new_spte)
|
||||
{
|
||||
WARN_ON_ONCE(iter->yielded);
|
||||
|
||||
lockdep_assert_held_read(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
@ -575,6 +577,8 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
|
||||
u64 new_spte, bool record_acc_track,
|
||||
bool record_dirty_log)
|
||||
{
|
||||
WARN_ON_ONCE(iter->yielded);
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
@ -640,18 +644,19 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
|
||||
* If this function should yield and flush is set, it will perform a remote
|
||||
* TLB flush before yielding.
|
||||
*
|
||||
* If this function yields, it will also reset the tdp_iter's walk over the
|
||||
* paging structure and the calling function should skip to the next
|
||||
* iteration to allow the iterator to continue its traversal from the
|
||||
* paging structure root.
|
||||
* If this function yields, iter->yielded is set and the caller must skip to
|
||||
* the next iteration, where tdp_iter_next() will reset the tdp_iter's walk
|
||||
* over the paging structures to allow the iterator to continue its traversal
|
||||
* from the paging structure root.
|
||||
*
|
||||
* Return true if this function yielded and the iterator's traversal was reset.
|
||||
* Return false if a yield was not needed.
|
||||
* Returns true if this function yielded.
|
||||
*/
|
||||
static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
|
||||
struct tdp_iter *iter, bool flush,
|
||||
bool shared)
|
||||
static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
|
||||
struct tdp_iter *iter,
|
||||
bool flush, bool shared)
|
||||
{
|
||||
WARN_ON(iter->yielded);
|
||||
|
||||
/* Ensure forward progress has been made before yielding. */
|
||||
if (iter->next_last_level_gfn == iter->yielded_gfn)
|
||||
return false;
|
||||
@ -671,12 +676,10 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
|
||||
|
||||
WARN_ON(iter->gfn > iter->next_last_level_gfn);
|
||||
|
||||
tdp_iter_restart(iter);
|
||||
|
||||
return true;
|
||||
iter->yielded = true;
|
||||
}
|
||||
|
||||
return false;
|
||||
return iter->yielded;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1594,6 +1594,15 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
|
||||
to_svm(vcpu)->vmcb->save.rflags = rflags;
|
||||
}
|
||||
|
||||
static bool svm_get_if_flag(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vmcb *vmcb = to_svm(vcpu)->vmcb;
|
||||
|
||||
return sev_es_guest(vcpu->kvm)
|
||||
? vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK
|
||||
: kvm_get_rflags(vcpu) & X86_EFLAGS_IF;
|
||||
}
|
||||
|
||||
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
|
||||
{
|
||||
kvm_register_mark_available(vcpu, reg);
|
||||
@ -3583,14 +3592,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
|
||||
if (!gif_set(svm))
|
||||
return true;
|
||||
|
||||
if (sev_es_guest(vcpu->kvm)) {
|
||||
/*
|
||||
* SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
|
||||
* bit to determine the state of the IF flag.
|
||||
*/
|
||||
if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
|
||||
return true;
|
||||
} else if (is_guest_mode(vcpu)) {
|
||||
if (is_guest_mode(vcpu)) {
|
||||
/* As long as interrupts are being delivered... */
|
||||
if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
|
||||
? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF)
|
||||
@ -3601,7 +3603,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
|
||||
if (nested_exit_on_intr(svm))
|
||||
return false;
|
||||
} else {
|
||||
if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
|
||||
if (!svm_get_if_flag(vcpu))
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -4634,6 +4636,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
|
||||
.cache_reg = svm_cache_reg,
|
||||
.get_rflags = svm_get_rflags,
|
||||
.set_rflags = svm_set_rflags,
|
||||
.get_if_flag = svm_get_if_flag,
|
||||
|
||||
.tlb_flush_all = svm_flush_tlb,
|
||||
.tlb_flush_current = svm_flush_tlb,
|
||||
|
@ -1372,6 +1372,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
|
||||
vmx->emulation_required = vmx_emulation_required(vcpu);
|
||||
}
|
||||
|
||||
static bool vmx_get_if_flag(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vmx_get_rflags(vcpu) & X86_EFLAGS_IF;
|
||||
}
|
||||
|
||||
u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
|
||||
@ -3995,8 +4000,7 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
|
||||
* guaranteed to see PID.ON=1 and sync the PIR to IRR if triggering a
|
||||
* posted interrupt "fails" because vcpu->mode != IN_GUEST_MODE.
|
||||
*/
|
||||
if (vcpu != kvm_get_running_vcpu() &&
|
||||
!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
|
||||
if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
|
||||
kvm_vcpu_kick(vcpu);
|
||||
|
||||
return 0;
|
||||
@ -5921,18 +5925,14 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
|
||||
vmx_flush_pml_buffer(vcpu);
|
||||
|
||||
/*
|
||||
* We should never reach this point with a pending nested VM-Enter, and
|
||||
* more specifically emulation of L2 due to invalid guest state (see
|
||||
* below) should never happen as that means we incorrectly allowed a
|
||||
* nested VM-Enter with an invalid vmcs12.
|
||||
* KVM should never reach this point with a pending nested VM-Enter.
|
||||
* More specifically, short-circuiting VM-Entry to emulate L2 due to
|
||||
* invalid guest state should never happen as that means KVM knowingly
|
||||
* allowed a nested VM-Enter with an invalid vmcs12. More below.
|
||||
*/
|
||||
if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
|
||||
return -EIO;
|
||||
|
||||
/* If guest state is invalid, start emulating */
|
||||
if (vmx->emulation_required)
|
||||
return handle_invalid_guest_state(vcpu);
|
||||
|
||||
if (is_guest_mode(vcpu)) {
|
||||
/*
|
||||
* PML is never enabled when running L2, bail immediately if a
|
||||
@ -5954,10 +5954,30 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
|
||||
*/
|
||||
nested_mark_vmcs12_pages_dirty(vcpu);
|
||||
|
||||
/*
|
||||
* Synthesize a triple fault if L2 state is invalid. In normal
|
||||
* operation, nested VM-Enter rejects any attempt to enter L2
|
||||
* with invalid state. However, those checks are skipped if
|
||||
* state is being stuffed via RSM or KVM_SET_NESTED_STATE. If
|
||||
* L2 state is invalid, it means either L1 modified SMRAM state
|
||||
* or userspace provided bad state. Synthesize TRIPLE_FAULT as
|
||||
* doing so is architecturally allowed in the RSM case, and is
|
||||
* the least awful solution for the userspace case without
|
||||
* risking false positives.
|
||||
*/
|
||||
if (vmx->emulation_required) {
|
||||
nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (nested_vmx_reflect_vmexit(vcpu))
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* If guest state is invalid, start emulating. L2 is handled above. */
|
||||
if (vmx->emulation_required)
|
||||
return handle_invalid_guest_state(vcpu);
|
||||
|
||||
if (exit_reason.failed_vmentry) {
|
||||
dump_vmcs(vcpu);
|
||||
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
|
||||
@ -6652,9 +6672,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
* consistency check VM-Exit due to invalid guest state and bail.
|
||||
*/
|
||||
if (unlikely(vmx->emulation_required)) {
|
||||
|
||||
/* We don't emulate invalid state of a nested guest */
|
||||
vmx->fail = is_guest_mode(vcpu);
|
||||
vmx->fail = 0;
|
||||
|
||||
vmx->exit_reason.full = EXIT_REASON_INVALID_STATE;
|
||||
vmx->exit_reason.failed_vmentry = 1;
|
||||
@ -7609,6 +7627,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
|
||||
.cache_reg = vmx_cache_reg,
|
||||
.get_rflags = vmx_get_rflags,
|
||||
.set_rflags = vmx_set_rflags,
|
||||
.get_if_flag = vmx_get_if_flag,
|
||||
|
||||
.tlb_flush_all = vmx_flush_tlb_all,
|
||||
.tlb_flush_current = vmx_flush_tlb_current,
|
||||
|
@ -906,7 +906,8 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
||||
!load_pdptrs(vcpu, kvm_read_cr3(vcpu)))
|
||||
return 1;
|
||||
|
||||
if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
|
||||
if (!(cr0 & X86_CR0_PG) &&
|
||||
(is_64_bit_mode(vcpu) || kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)))
|
||||
return 1;
|
||||
|
||||
static_call(kvm_x86_set_cr0)(vcpu, cr0);
|
||||
@ -1343,7 +1344,7 @@ static const u32 msrs_to_save_all[] = {
|
||||
MSR_IA32_UMWAIT_CONTROL,
|
||||
|
||||
MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
|
||||
MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
|
||||
MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
|
||||
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
|
||||
MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
|
||||
MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
|
||||
@ -3424,7 +3425,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
|
||||
if (!msr_info->host_initiated)
|
||||
return 1;
|
||||
if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) && kvm_get_msr_feature(&msr_ent))
|
||||
if (kvm_get_msr_feature(&msr_ent))
|
||||
return 1;
|
||||
if (data & ~msr_ent.data)
|
||||
return 1;
|
||||
@ -7144,7 +7145,13 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
|
||||
unsigned short port, void *val, unsigned int count)
|
||||
{
|
||||
if (vcpu->arch.pio.count) {
|
||||
/* Complete previous iteration. */
|
||||
/*
|
||||
* Complete a previous iteration that required userspace I/O.
|
||||
* Note, @count isn't guaranteed to match pio.count as userspace
|
||||
* can modify ECX before rerunning the vCPU. Ignore any such
|
||||
* shenanigans as KVM doesn't support modifying the rep count,
|
||||
* and the emulator ensures @count doesn't overflow the buffer.
|
||||
*/
|
||||
} else {
|
||||
int r = __emulator_pio_in(vcpu, size, port, count);
|
||||
if (!r)
|
||||
@ -7153,7 +7160,6 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
|
||||
/* Results already available, fall through. */
|
||||
}
|
||||
|
||||
WARN_ON(count != vcpu->arch.pio.count);
|
||||
complete_emulator_pio_in(vcpu, val);
|
||||
return 1;
|
||||
}
|
||||
@ -9043,14 +9049,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_run *kvm_run = vcpu->run;
|
||||
|
||||
/*
|
||||
* if_flag is obsolete and useless, so do not bother
|
||||
* setting it for SEV-ES guests. Userspace can just
|
||||
* use kvm_run->ready_for_interrupt_injection.
|
||||
*/
|
||||
kvm_run->if_flag = !vcpu->arch.guest_state_protected
|
||||
&& (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
|
||||
|
||||
kvm_run->if_flag = static_call(kvm_x86_get_if_flag)(vcpu);
|
||||
kvm_run->cr8 = kvm_get_cr8(vcpu);
|
||||
kvm_run->apic_base = kvm_get_apic_base(vcpu);
|
||||
|
||||
|
2
tools/testing/selftests/kvm/.gitignore
vendored
2
tools/testing/selftests/kvm/.gitignore
vendored
@ -30,10 +30,12 @@
|
||||
/x86_64/svm_int_ctl_test
|
||||
/x86_64/sync_regs_test
|
||||
/x86_64/tsc_msrs_test
|
||||
/x86_64/userspace_io_test
|
||||
/x86_64/userspace_msr_exit_test
|
||||
/x86_64/vmx_apic_access_test
|
||||
/x86_64/vmx_close_while_nested_test
|
||||
/x86_64/vmx_dirty_log_test
|
||||
/x86_64/vmx_invalid_nested_guest_state
|
||||
/x86_64/vmx_preemption_timer_test
|
||||
/x86_64/vmx_set_nested_state_test
|
||||
/x86_64/vmx_tsc_adjust_test
|
||||
|
@ -59,10 +59,12 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
|
||||
|
@ -321,6 +321,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm);
|
||||
|
||||
unsigned int vm_get_page_size(struct kvm_vm *vm);
|
||||
unsigned int vm_get_page_shift(struct kvm_vm *vm);
|
||||
unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
|
||||
uint64_t vm_get_max_gfn(struct kvm_vm *vm);
|
||||
int vm_get_fd(struct kvm_vm *vm);
|
||||
|
||||
|
@ -302,7 +302,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
||||
(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
|
||||
|
||||
/* Limit physical addresses to PA-bits. */
|
||||
vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
|
||||
vm->max_gfn = vm_compute_max_gfn(vm);
|
||||
|
||||
/* Allocate and setup memory for guest. */
|
||||
vm->vpages_mapped = sparsebit_alloc();
|
||||
@ -2328,6 +2328,11 @@ unsigned int vm_get_page_shift(struct kvm_vm *vm)
|
||||
return vm->page_shift;
|
||||
}
|
||||
|
||||
unsigned long __attribute__((weak)) vm_compute_max_gfn(struct kvm_vm *vm)
|
||||
{
|
||||
return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
|
||||
}
|
||||
|
||||
uint64_t vm_get_max_gfn(struct kvm_vm *vm)
|
||||
{
|
||||
return vm->max_gfn;
|
||||
|
@ -1431,3 +1431,71 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui
|
||||
|
||||
return cpuid;
|
||||
}
|
||||
|
||||
#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
|
||||
#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
|
||||
#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
|
||||
|
||||
static inline unsigned x86_family(unsigned int eax)
|
||||
{
|
||||
unsigned int x86;
|
||||
|
||||
x86 = (eax >> 8) & 0xf;
|
||||
|
||||
if (x86 == 0xf)
|
||||
x86 += (eax >> 20) & 0xff;
|
||||
|
||||
return x86;
|
||||
}
|
||||
|
||||
unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
|
||||
{
|
||||
const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
|
||||
unsigned long ht_gfn, max_gfn, max_pfn;
|
||||
uint32_t eax, ebx, ecx, edx, max_ext_leaf;
|
||||
|
||||
max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
|
||||
|
||||
/* Avoid reserved HyperTransport region on AMD processors. */
|
||||
eax = ecx = 0;
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx ||
|
||||
ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx ||
|
||||
edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
|
||||
return max_gfn;
|
||||
|
||||
/* On parts with <40 physical address bits, the area is fully hidden */
|
||||
if (vm->pa_bits < 40)
|
||||
return max_gfn;
|
||||
|
||||
/* Before family 17h, the HyperTransport area is just below 1T. */
|
||||
ht_gfn = (1 << 28) - num_ht_pages;
|
||||
eax = 1;
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
if (x86_family(eax) < 0x17)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* Otherwise it's at the top of the physical address space, possibly
|
||||
* reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use
|
||||
* the old conservative value if MAXPHYADDR is not enumerated.
|
||||
*/
|
||||
eax = 0x80000000;
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
max_ext_leaf = eax;
|
||||
if (max_ext_leaf < 0x80000008)
|
||||
goto done;
|
||||
|
||||
eax = 0x80000008;
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1;
|
||||
if (max_ext_leaf >= 0x8000001f) {
|
||||
eax = 0x8000001f;
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
max_pfn >>= (ebx >> 6) & 0x3f;
|
||||
}
|
||||
|
||||
ht_gfn = max_pfn - num_ht_pages;
|
||||
done:
|
||||
return min(max_gfn, ht_gfn - 1);
|
||||
}
|
||||
|
@ -75,7 +75,7 @@ static void l1_guest_code(struct svm_test_data *svm)
|
||||
vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
|
||||
|
||||
/* No intercepts for real and virtual interrupts */
|
||||
vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR);
|
||||
vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));
|
||||
|
||||
/* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
|
||||
vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
|
||||
|
114
tools/testing/selftests/kvm/x86_64/userspace_io_test.c
Normal file
114
tools/testing/selftests/kvm/x86_64/userspace_io_test.c
Normal file
@ -0,0 +1,114 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include "test_util.h"
|
||||
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
|
||||
#define VCPU_ID 1
|
||||
|
||||
static void guest_ins_port80(uint8_t *buffer, unsigned int count)
|
||||
{
|
||||
unsigned long end;
|
||||
|
||||
if (count == 2)
|
||||
end = (unsigned long)buffer + 1;
|
||||
else
|
||||
end = (unsigned long)buffer + 8192;
|
||||
|
||||
asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
|
||||
GUEST_ASSERT_1(count == 0, count);
|
||||
GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end);
|
||||
}
|
||||
|
||||
static void guest_code(void)
|
||||
{
|
||||
uint8_t buffer[8192];
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Special case tests. main() will adjust RCX 2 => 1 and 3 => 8192 to
|
||||
* test that KVM doesn't explode when userspace modifies the "count" on
|
||||
* a userspace I/O exit. KVM isn't required to play nice with the I/O
|
||||
* itself as KVM doesn't support manipulating the count, it just needs
|
||||
* to not explode or overflow a buffer.
|
||||
*/
|
||||
guest_ins_port80(buffer, 2);
|
||||
guest_ins_port80(buffer, 3);
|
||||
|
||||
/* Verify KVM fills the buffer correctly when not stuffing RCX. */
|
||||
memset(buffer, 0, sizeof(buffer));
|
||||
guest_ins_port80(buffer, 8192);
|
||||
for (i = 0; i < 8192; i++)
|
||||
GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct kvm_regs regs;
|
||||
struct kvm_run *run;
|
||||
struct kvm_vm *vm;
|
||||
struct ucall uc;
|
||||
int rc;
|
||||
|
||||
/* Tell stdout not to buffer its content */
|
||||
setbuf(stdout, NULL);
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code);
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
memset(®s, 0, sizeof(regs));
|
||||
|
||||
while (1) {
|
||||
rc = _vcpu_run(vm, VCPU_ID);
|
||||
|
||||
TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
|
||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
||||
"Unexpected exit reason: %u (%s),\n",
|
||||
run->exit_reason,
|
||||
exit_reason_str(run->exit_reason));
|
||||
|
||||
if (get_ucall(vm, VCPU_ID, &uc))
|
||||
break;
|
||||
|
||||
TEST_ASSERT(run->io.port == 0x80,
|
||||
"Expected I/O at port 0x80, got port 0x%x\n", run->io.port);
|
||||
|
||||
/*
|
||||
* Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
|
||||
* Note, this abuses KVM's batching of rep string I/O to avoid
|
||||
* getting stuck in an infinite loop. That behavior isn't in
|
||||
* scope from a testing perspective as it's not ABI in any way,
|
||||
* i.e. it really is abusing internal KVM knowledge.
|
||||
*/
|
||||
vcpu_regs_get(vm, VCPU_ID, ®s);
|
||||
if (regs.rcx == 2)
|
||||
regs.rcx = 1;
|
||||
if (regs.rcx == 3)
|
||||
regs.rcx = 8192;
|
||||
memset((void *)run + run->io.data_offset, 0xaa, 4096);
|
||||
vcpu_regs_set(vm, VCPU_ID, ®s);
|
||||
}
|
||||
|
||||
switch (uc.cmd) {
|
||||
case UCALL_DONE:
|
||||
break;
|
||||
case UCALL_ABORT:
|
||||
TEST_FAIL("%s at %s:%ld : argN+1 = 0x%lx, argN+2 = 0x%lx",
|
||||
(const char *)uc.args[0], __FILE__, uc.args[1],
|
||||
uc.args[2], uc.args[3]);
|
||||
default:
|
||||
TEST_FAIL("Unknown ucall %lu", uc.cmd);
|
||||
}
|
||||
|
||||
kvm_vm_free(vm);
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,105 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
#include "vmx.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include "kselftest.h"
|
||||
|
||||
#define VCPU_ID 0
|
||||
#define ARBITRARY_IO_PORT 0x2000
|
||||
|
||||
static struct kvm_vm *vm;
|
||||
|
||||
static void l2_guest_code(void)
|
||||
{
|
||||
/*
|
||||
* Generate an exit to L0 userspace, i.e. main(), via I/O to an
|
||||
* arbitrary port.
|
||||
*/
|
||||
asm volatile("inb %%dx, %%al"
|
||||
: : [port] "d" (ARBITRARY_IO_PORT) : "rax");
|
||||
}
|
||||
|
||||
static void l1_guest_code(struct vmx_pages *vmx_pages)
|
||||
{
|
||||
#define L2_GUEST_STACK_SIZE 64
|
||||
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
|
||||
|
||||
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
|
||||
GUEST_ASSERT(load_vmcs(vmx_pages));
|
||||
|
||||
/* Prepare the VMCS for L2 execution. */
|
||||
prepare_vmcs(vmx_pages, l2_guest_code,
|
||||
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
|
||||
|
||||
/*
|
||||
* L2 must be run without unrestricted guest, verify that the selftests
|
||||
* library hasn't enabled it. Because KVM selftests jump directly to
|
||||
* 64-bit mode, unrestricted guest support isn't required.
|
||||
*/
|
||||
GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
|
||||
!(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
|
||||
|
||||
GUEST_ASSERT(!vmlaunch());
|
||||
|
||||
/* L2 should triple fault after main() stuffs invalid guest state. */
|
||||
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
vm_vaddr_t vmx_pages_gva;
|
||||
struct kvm_sregs sregs;
|
||||
struct kvm_run *run;
|
||||
struct ucall uc;
|
||||
|
||||
nested_vmx_check_supported();
|
||||
|
||||
vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
|
||||
|
||||
/* Allocate VMX pages and shared descriptors (vmx_pages). */
|
||||
vcpu_alloc_vmx(vm, &vmx_pages_gva);
|
||||
vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
|
||||
|
||||
vcpu_run(vm, VCPU_ID);
|
||||
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
/*
|
||||
* The first exit to L0 userspace should be an I/O access from L2.
|
||||
* Running L1 should launch L2 without triggering an exit to userspace.
|
||||
*/
|
||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
||||
"Expected KVM_EXIT_IO, got: %u (%s)\n",
|
||||
run->exit_reason, exit_reason_str(run->exit_reason));
|
||||
|
||||
TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
|
||||
"Expected IN from port %d from L2, got port %d",
|
||||
ARBITRARY_IO_PORT, run->io.port);
|
||||
|
||||
/*
|
||||
* Stuff invalid guest state for L2 by making TR unusuable. The next
|
||||
* KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
|
||||
* emulating invalid guest state for L2.
|
||||
*/
|
||||
memset(&sregs, 0, sizeof(sregs));
|
||||
vcpu_sregs_get(vm, VCPU_ID, &sregs);
|
||||
sregs.tr.unusable = 1;
|
||||
vcpu_sregs_set(vm, VCPU_ID, &sregs);
|
||||
|
||||
vcpu_run(vm, VCPU_ID);
|
||||
|
||||
switch (get_ucall(vm, VCPU_ID, &uc)) {
|
||||
case UCALL_DONE:
|
||||
break;
|
||||
case UCALL_ABORT:
|
||||
TEST_FAIL("%s", (const char *)uc.args[0]);
|
||||
default:
|
||||
TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
|
||||
}
|
||||
}
|
@ -110,22 +110,5 @@ int main(int argc, char *argv[])
|
||||
ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
|
||||
TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
|
||||
|
||||
/* testcase 4, set capabilities when we don't have PDCM bit */
|
||||
entry_1_0->ecx &= ~X86_FEATURE_PDCM;
|
||||
vcpu_set_cpuid(vm, VCPU_ID, cpuid);
|
||||
ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
|
||||
TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
|
||||
|
||||
/* testcase 5, set capabilities when we don't have PMU version bits */
|
||||
entry_1_0->ecx |= X86_FEATURE_PDCM;
|
||||
eax.split.version_id = 0;
|
||||
entry_1_0->ecx = eax.full;
|
||||
vcpu_set_cpuid(vm, VCPU_ID, cpuid);
|
||||
ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
|
||||
TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
|
||||
|
||||
vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0);
|
||||
ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user