forked from Minki/linux
KVM fixes for v4.13-rc4
ARM: - Yet another race with VM destruction plugged - A set of small vgic fixes x86: - Preserve pending INIT - RCU fixes in paravirtual async pf, VM teardown, and VMXOFF emulation - nVMX interrupt injection and dirty tracking fixes - initialize to make UBSAN happy -----BEGIN PGP SIGNATURE----- iQEcBAABCAAGBQJZhNZ2AAoJEED/6hsPKofoKDEH/iIw1pcgdEW2NP/kFtKXSMCK josdFwGPQMjBGzx6No4tfMCNDOjW2FKYXapN6CASAqMJo5H2krj8VHMVwm0h3lUl 4RdbbkFTdfl/Znp8M39efFheWrjX+L37AltKV7xAgA7n8cO39KV4RReimzSc7aVq 5dDt4k0dbF9/zXHxkGiKEhwaSSbZEEznQQ/09annSoOVe6om5esUrUtnUF5P99uz IhAsmJbZxE5VmowjT5MjaR1mXSLLNL55HWKvkf3B3ZGnyxQU+3Vz7IGf2Ma2j+jV IrdXA11NHDY1anDYgDhFlr3rTCPu9CBmTv4O8zsDRlX9TGpr8bBX2dvjRKl7uOo= =KM80 -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM fixes from Radim Krčmář: "ARM: - Yet another race with VM destruction plugged - A set of small vgic fixes x86: - Preserve pending INIT - RCU fixes in paravirtual async pf, VM teardown, and VMXOFF emulation - nVMX interrupt injection and dirty tracking fixes - initialize to make UBSAN happy" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: arm/arm64: vgic: Use READ_ONCE fo cmpxchg KVM: nVMX: Fix interrupt window request with "Acknowledge interrupt on exit" KVM: nVMX: mark vmcs12 pages dirty on L2 exit kvm: nVMX: don't flush VMCS12 during VMXOFF or VCPU teardown KVM: nVMX: do not pin the VMCS12 KVM: avoid using rcu_dereference_protected KVM: X86: init irq->level in kvm_pv_kick_cpu_op KVM: X86: Fix loss of pending INIT due to race KVM: async_pf: make rcu irq exit if not triggered from idle task KVM: nVMX: fixes to nested virt interrupt injection KVM: nVMX: do not fill vm_exit_intr_error_code in prepare_vmcs12 KVM: arm/arm64: Handle hva aging while destroying the vm KVM: arm/arm64: PMU: Fix overflow interrupt injection KVM: arm/arm64: Fix bug in advertising KVM_CAP_MSI_DEVID capability
This commit is contained in:
commit
6999507416
@ -764,7 +764,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
if (p->is_write) {
|
||||
if (r->CRm & 0x2)
|
||||
/* accessing PMOVSSET_EL0 */
|
||||
kvm_pmu_overflow_set(vcpu, p->regval & mask);
|
||||
vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
|
||||
else
|
||||
/* accessing PMOVSCLR_EL0 */
|
||||
vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
|
||||
|
@ -151,6 +151,8 @@ void kvm_async_pf_task_wait(u32 token)
|
||||
if (hlist_unhashed(&n.link))
|
||||
break;
|
||||
|
||||
rcu_irq_exit();
|
||||
|
||||
if (!n.halted) {
|
||||
local_irq_enable();
|
||||
schedule();
|
||||
@ -159,11 +161,11 @@ void kvm_async_pf_task_wait(u32 token)
|
||||
/*
|
||||
* We cannot reschedule. So halt.
|
||||
*/
|
||||
rcu_irq_exit();
|
||||
native_safe_halt();
|
||||
local_irq_disable();
|
||||
rcu_irq_enter();
|
||||
}
|
||||
|
||||
rcu_irq_enter();
|
||||
}
|
||||
if (!n.halted)
|
||||
finish_swait(&n.wq, &wait);
|
||||
|
@ -2430,6 +2430,16 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
|
||||
svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
|
||||
svm->vmcb->control.exit_code_hi = 0;
|
||||
svm->vmcb->control.exit_info_1 = error_code;
|
||||
|
||||
/*
|
||||
* FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
|
||||
* The fix is to add the ancillary datum (CR2 or DR6) to structs
|
||||
* kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
|
||||
* written only when inject_pending_event runs (DR6 would written here
|
||||
* too). This should be conditional on a new capability---if the
|
||||
* capability is disabled, kvm_multiple_exception would write the
|
||||
* ancillary information to CR2 or DR6, for backwards ABI-compatibility.
|
||||
*/
|
||||
if (svm->vcpu.arch.exception.nested_apf)
|
||||
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
|
||||
else
|
||||
|
@ -416,13 +416,10 @@ struct nested_vmx {
|
||||
|
||||
/* The guest-physical address of the current VMCS L1 keeps for L2 */
|
||||
gpa_t current_vmptr;
|
||||
/* The host-usable pointer to the above */
|
||||
struct page *current_vmcs12_page;
|
||||
struct vmcs12 *current_vmcs12;
|
||||
/*
|
||||
* Cache of the guest's VMCS, existing outside of guest memory.
|
||||
* Loaded from guest memory during VMPTRLD. Flushed to guest
|
||||
* memory during VMXOFF, VMCLEAR, VMPTRLD.
|
||||
* memory during VMCLEAR and VMPTRLD.
|
||||
*/
|
||||
struct vmcs12 *cached_vmcs12;
|
||||
/*
|
||||
@ -927,6 +924,10 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
|
||||
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
|
||||
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
|
||||
static int alloc_identity_pagetable(struct kvm *kvm);
|
||||
static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
|
||||
static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
|
||||
static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
|
||||
u16 error_code);
|
||||
|
||||
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
|
||||
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
|
||||
@ -2428,6 +2429,30 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
vmx_set_interrupt_shadow(vcpu, 0);
|
||||
}
|
||||
|
||||
static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
|
||||
unsigned long exit_qual)
|
||||
{
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
unsigned int nr = vcpu->arch.exception.nr;
|
||||
u32 intr_info = nr | INTR_INFO_VALID_MASK;
|
||||
|
||||
if (vcpu->arch.exception.has_error_code) {
|
||||
vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
|
||||
intr_info |= INTR_INFO_DELIVER_CODE_MASK;
|
||||
}
|
||||
|
||||
if (kvm_exception_is_soft(nr))
|
||||
intr_info |= INTR_TYPE_SOFT_EXCEPTION;
|
||||
else
|
||||
intr_info |= INTR_TYPE_HARD_EXCEPTION;
|
||||
|
||||
if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
|
||||
vmx_get_nmi_mask(vcpu))
|
||||
intr_info |= INTR_INFO_UNBLOCK_NMI;
|
||||
|
||||
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
|
||||
}
|
||||
|
||||
/*
|
||||
* KVM wants to inject page-faults which it got to the guest. This function
|
||||
* checks whether in a nested guest, we need to inject them to L1 or L2.
|
||||
@ -2437,23 +2462,38 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
unsigned int nr = vcpu->arch.exception.nr;
|
||||
|
||||
if (!((vmcs12->exception_bitmap & (1u << nr)) ||
|
||||
(nr == PF_VECTOR && vcpu->arch.exception.nested_apf)))
|
||||
return 0;
|
||||
if (nr == PF_VECTOR) {
|
||||
if (vcpu->arch.exception.nested_apf) {
|
||||
nested_vmx_inject_exception_vmexit(vcpu,
|
||||
vcpu->arch.apf.nested_apf_token);
|
||||
return 1;
|
||||
}
|
||||
/*
|
||||
* FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
|
||||
* The fix is to add the ancillary datum (CR2 or DR6) to structs
|
||||
* kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
|
||||
* can be written only when inject_pending_event runs. This should be
|
||||
* conditional on a new capability---if the capability is disabled,
|
||||
* kvm_multiple_exception would write the ancillary information to
|
||||
* CR2 or DR6, for backwards ABI-compatibility.
|
||||
*/
|
||||
if (nested_vmx_is_page_fault_vmexit(vmcs12,
|
||||
vcpu->arch.exception.error_code)) {
|
||||
nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2);
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
unsigned long exit_qual = 0;
|
||||
if (nr == DB_VECTOR)
|
||||
exit_qual = vcpu->arch.dr6;
|
||||
|
||||
if (vcpu->arch.exception.nested_apf) {
|
||||
vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code);
|
||||
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
|
||||
PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
|
||||
INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
|
||||
vcpu->arch.apf.nested_apf_token);
|
||||
return 1;
|
||||
if (vmcs12->exception_bitmap & (1u << nr)) {
|
||||
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
|
||||
vmcs_read32(VM_EXIT_INTR_INFO),
|
||||
vmcs_readl(EXIT_QUALIFICATION));
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vmx_queue_exception(struct kvm_vcpu *vcpu)
|
||||
@ -2667,7 +2707,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
|
||||
* reason is that if one of these bits is necessary, it will appear
|
||||
* in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
|
||||
* fields of vmcs01 and vmcs02, will turn these bits off - and
|
||||
* nested_vmx_exit_handled() will not pass related exits to L1.
|
||||
* nested_vmx_exit_reflected() will not pass related exits to L1.
|
||||
* These rules have exceptions below.
|
||||
*/
|
||||
|
||||
@ -4955,6 +4995,28 @@ static bool vmx_get_enable_apicv(void)
|
||||
return enable_apicv;
|
||||
}
|
||||
|
||||
static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
gfn_t gfn;
|
||||
|
||||
/*
|
||||
* Don't need to mark the APIC access page dirty; it is never
|
||||
* written to by the CPU during APIC virtualization.
|
||||
*/
|
||||
|
||||
if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
|
||||
gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
|
||||
kvm_vcpu_mark_page_dirty(vcpu, gfn);
|
||||
}
|
||||
|
||||
if (nested_cpu_has_posted_intr(vmcs12)) {
|
||||
gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
|
||||
kvm_vcpu_mark_page_dirty(vcpu, gfn);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
@ -4962,18 +5024,15 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
|
||||
void *vapic_page;
|
||||
u16 status;
|
||||
|
||||
if (vmx->nested.pi_desc &&
|
||||
vmx->nested.pi_pending) {
|
||||
vmx->nested.pi_pending = false;
|
||||
if (!pi_test_and_clear_on(vmx->nested.pi_desc))
|
||||
return;
|
||||
if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
|
||||
return;
|
||||
|
||||
max_irr = find_last_bit(
|
||||
(unsigned long *)vmx->nested.pi_desc->pir, 256);
|
||||
|
||||
if (max_irr == 256)
|
||||
return;
|
||||
vmx->nested.pi_pending = false;
|
||||
if (!pi_test_and_clear_on(vmx->nested.pi_desc))
|
||||
return;
|
||||
|
||||
max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
|
||||
if (max_irr != 256) {
|
||||
vapic_page = kmap(vmx->nested.virtual_apic_page);
|
||||
__kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
|
||||
kunmap(vmx->nested.virtual_apic_page);
|
||||
@ -4985,6 +5044,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
|
||||
vmcs_write16(GUEST_INTR_STATUS, status);
|
||||
}
|
||||
}
|
||||
|
||||
nested_mark_vmcs12_pages_dirty(vcpu);
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
|
||||
@ -7134,34 +7195,32 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
|
||||
{
|
||||
vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
|
||||
vmcs_write64(VMCS_LINK_POINTER, -1ull);
|
||||
}
|
||||
|
||||
static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
|
||||
{
|
||||
if (vmx->nested.current_vmptr == -1ull)
|
||||
return;
|
||||
|
||||
/* current_vmptr and current_vmcs12 are always set/reset together */
|
||||
if (WARN_ON(vmx->nested.current_vmcs12 == NULL))
|
||||
return;
|
||||
|
||||
if (enable_shadow_vmcs) {
|
||||
/* copy to memory all shadowed fields in case
|
||||
they were modified */
|
||||
copy_shadow_to_vmcs12(vmx);
|
||||
vmx->nested.sync_shadow_vmcs = false;
|
||||
vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
|
||||
SECONDARY_EXEC_SHADOW_VMCS);
|
||||
vmcs_write64(VMCS_LINK_POINTER, -1ull);
|
||||
vmx_disable_shadow_vmcs(vmx);
|
||||
}
|
||||
vmx->nested.posted_intr_nv = -1;
|
||||
|
||||
/* Flush VMCS12 to guest memory */
|
||||
memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12,
|
||||
VMCS12_SIZE);
|
||||
kvm_vcpu_write_guest_page(&vmx->vcpu,
|
||||
vmx->nested.current_vmptr >> PAGE_SHIFT,
|
||||
vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
|
||||
|
||||
kunmap(vmx->nested.current_vmcs12_page);
|
||||
nested_release_page(vmx->nested.current_vmcs12_page);
|
||||
vmx->nested.current_vmptr = -1ull;
|
||||
vmx->nested.current_vmcs12 = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -7175,12 +7234,14 @@ static void free_nested(struct vcpu_vmx *vmx)
|
||||
|
||||
vmx->nested.vmxon = false;
|
||||
free_vpid(vmx->nested.vpid02);
|
||||
nested_release_vmcs12(vmx);
|
||||
vmx->nested.posted_intr_nv = -1;
|
||||
vmx->nested.current_vmptr = -1ull;
|
||||
if (vmx->nested.msr_bitmap) {
|
||||
free_page((unsigned long)vmx->nested.msr_bitmap);
|
||||
vmx->nested.msr_bitmap = NULL;
|
||||
}
|
||||
if (enable_shadow_vmcs) {
|
||||
vmx_disable_shadow_vmcs(vmx);
|
||||
vmcs_clear(vmx->vmcs01.shadow_vmcs);
|
||||
free_vmcs(vmx->vmcs01.shadow_vmcs);
|
||||
vmx->vmcs01.shadow_vmcs = NULL;
|
||||
@ -7579,14 +7640,14 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
nested_release_vmcs12(vmx);
|
||||
vmx->nested.current_vmcs12 = new_vmcs12;
|
||||
vmx->nested.current_vmcs12_page = page;
|
||||
/*
|
||||
* Load VMCS12 from guest memory since it is not already
|
||||
* cached.
|
||||
*/
|
||||
memcpy(vmx->nested.cached_vmcs12,
|
||||
vmx->nested.current_vmcs12, VMCS12_SIZE);
|
||||
memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
|
||||
kunmap(page);
|
||||
nested_release_page_clean(page);
|
||||
|
||||
set_current_vmptr(vmx, vmptr);
|
||||
}
|
||||
|
||||
@ -8019,12 +8080,11 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
|
||||
* should handle it ourselves in L0 (and then continue L2). Only call this
|
||||
* when in is_guest_mode (L2).
|
||||
*/
|
||||
static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
||||
static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
|
||||
{
|
||||
u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
u32 exit_reason = vmx->exit_reason;
|
||||
|
||||
trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
|
||||
vmcs_readl(EXIT_QUALIFICATION),
|
||||
@ -8033,6 +8093,18 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
||||
vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
|
||||
KVM_ISA_VMX);
|
||||
|
||||
/*
|
||||
* The host physical addresses of some pages of guest memory
|
||||
* are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
|
||||
* may write to these pages via their host physical address while
|
||||
* L2 is running, bypassing any address-translation-based dirty
|
||||
* tracking (e.g. EPT write protection).
|
||||
*
|
||||
* Mark them dirty on every exit from L2 to prevent them from
|
||||
* getting out of sync with dirty tracking.
|
||||
*/
|
||||
nested_mark_vmcs12_pages_dirty(vcpu);
|
||||
|
||||
if (vmx->nested.nested_run_pending)
|
||||
return false;
|
||||
|
||||
@ -8169,6 +8241,29 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
static int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason)
|
||||
{
|
||||
u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
||||
|
||||
/*
|
||||
* At this point, the exit interruption info in exit_intr_info
|
||||
* is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT
|
||||
* we need to query the in-kernel LAPIC.
|
||||
*/
|
||||
WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT);
|
||||
if ((exit_intr_info &
|
||||
(INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
|
||||
(INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) {
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
vmcs12->vm_exit_intr_error_code =
|
||||
vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
|
||||
}
|
||||
|
||||
nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info,
|
||||
vmcs_readl(EXIT_QUALIFICATION));
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
|
||||
{
|
||||
*info1 = vmcs_readl(EXIT_QUALIFICATION);
|
||||
@ -8415,12 +8510,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
||||
if (vmx->emulation_required)
|
||||
return handle_invalid_guest_state(vcpu);
|
||||
|
||||
if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
|
||||
nested_vmx_vmexit(vcpu, exit_reason,
|
||||
vmcs_read32(VM_EXIT_INTR_INFO),
|
||||
vmcs_readl(EXIT_QUALIFICATION));
|
||||
return 1;
|
||||
}
|
||||
if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason))
|
||||
return nested_vmx_reflect_vmexit(vcpu, exit_reason);
|
||||
|
||||
if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
|
||||
dump_vmcs();
|
||||
@ -9223,7 +9314,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
|
||||
vmx->nested.posted_intr_nv = -1;
|
||||
vmx->nested.current_vmptr = -1ull;
|
||||
vmx->nested.current_vmcs12 = NULL;
|
||||
|
||||
vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
|
||||
|
||||
@ -9509,12 +9599,15 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
|
||||
|
||||
WARN_ON(!is_guest_mode(vcpu));
|
||||
|
||||
if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code))
|
||||
nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
|
||||
vmcs_read32(VM_EXIT_INTR_INFO),
|
||||
vmcs_readl(EXIT_QUALIFICATION));
|
||||
else
|
||||
if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) {
|
||||
vmcs12->vm_exit_intr_error_code = fault->error_code;
|
||||
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
|
||||
PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
|
||||
INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
|
||||
fault->address);
|
||||
} else {
|
||||
kvm_inject_page_fault(vcpu, fault);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
|
||||
@ -10094,12 +10187,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
||||
* "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
|
||||
* vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
|
||||
* !enable_ept, EB.PF is 1, so the "or" will always be 1.
|
||||
*
|
||||
* A problem with this approach (when !enable_ept) is that L1 may be
|
||||
* injected with more page faults than it asked for. This could have
|
||||
* caused problems, but in practice existing hypervisors don't care.
|
||||
* To fix this, we will need to emulate the PFEC checking (on the L1
|
||||
* page tables), using walk_addr(), when injecting PFs to L1.
|
||||
*/
|
||||
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
|
||||
enable_ept ? vmcs12->page_fault_error_code_mask : 0);
|
||||
@ -10847,13 +10934,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
||||
|
||||
vmcs12->vm_exit_reason = exit_reason;
|
||||
vmcs12->exit_qualification = exit_qualification;
|
||||
|
||||
vmcs12->vm_exit_intr_info = exit_intr_info;
|
||||
if ((vmcs12->vm_exit_intr_info &
|
||||
(INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
|
||||
(INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
|
||||
vmcs12->vm_exit_intr_error_code =
|
||||
vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
|
||||
|
||||
vmcs12->idt_vectoring_info_field = 0;
|
||||
vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
|
||||
vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
||||
@ -11049,8 +11131,15 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
|
||||
|
||||
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
|
||||
|
||||
if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
|
||||
&& nested_exit_intr_ack_set(vcpu)) {
|
||||
/*
|
||||
* TODO: SDM says that with acknowledge interrupt on exit, bit 31 of
|
||||
* the VM-exit interrupt information (valid interrupt) is always set to
|
||||
* 1 on EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't need
|
||||
* kvm_cpu_has_interrupt(). See the commit message for details.
|
||||
*/
|
||||
if (nested_exit_intr_ack_set(vcpu) &&
|
||||
exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
|
||||
kvm_cpu_has_interrupt(vcpu)) {
|
||||
int irq = kvm_cpu_get_interrupt(vcpu);
|
||||
WARN_ON(irq < 0);
|
||||
vmcs12->vm_exit_intr_info = irq |
|
||||
|
@ -3159,15 +3159,18 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
|
||||
kvm_set_hflags(vcpu, hflags);
|
||||
|
||||
vcpu->arch.smi_pending = events->smi.pending;
|
||||
if (events->smi.smm_inside_nmi)
|
||||
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
|
||||
else
|
||||
vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
|
||||
if (lapic_in_kernel(vcpu)) {
|
||||
if (events->smi.latched_init)
|
||||
set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
|
||||
|
||||
if (events->smi.smm) {
|
||||
if (events->smi.smm_inside_nmi)
|
||||
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
|
||||
else
|
||||
clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
|
||||
vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
|
||||
if (lapic_in_kernel(vcpu)) {
|
||||
if (events->smi.latched_init)
|
||||
set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
|
||||
else
|
||||
clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -6215,6 +6218,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
|
||||
|
||||
lapic_irq.shorthand = 0;
|
||||
lapic_irq.dest_mode = 0;
|
||||
lapic_irq.level = 0;
|
||||
lapic_irq.dest_id = apicid;
|
||||
lapic_irq.msi_redir_hint = false;
|
||||
|
||||
|
@ -48,7 +48,6 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val);
|
||||
void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val);
|
||||
void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val);
|
||||
void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
|
||||
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu);
|
||||
@ -86,7 +85,6 @@ static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) {}
|
||||
static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {}
|
||||
static inline void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) {}
|
||||
static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {}
|
||||
static inline bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
|
||||
|
@ -477,7 +477,8 @@ struct kvm {
|
||||
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
|
||||
{
|
||||
return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
|
||||
lockdep_is_held(&kvm->slots_lock));
|
||||
lockdep_is_held(&kvm->slots_lock) ||
|
||||
!refcount_read(&kvm->users_count));
|
||||
}
|
||||
|
||||
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
|
||||
@ -570,7 +571,8 @@ void kvm_put_kvm(struct kvm *kvm);
|
||||
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
|
||||
{
|
||||
return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
|
||||
lockdep_is_held(&kvm->slots_lock));
|
||||
lockdep_is_held(&kvm->slots_lock) ||
|
||||
!refcount_read(&kvm->users_count));
|
||||
}
|
||||
|
||||
static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
|
||||
|
@ -1718,12 +1718,16 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
|
||||
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
{
|
||||
if (!kvm->arch.pgd)
|
||||
return 0;
|
||||
trace_kvm_age_hva(start, end);
|
||||
return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
|
||||
}
|
||||
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
if (!kvm->arch.pgd)
|
||||
return 0;
|
||||
trace_kvm_test_age_hva(hva);
|
||||
return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
|
||||
}
|
||||
|
@ -203,11 +203,15 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
|
||||
return reg;
|
||||
}
|
||||
|
||||
static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu)
|
||||
static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
bool overflow = !!kvm_pmu_overflow_status(vcpu);
|
||||
bool overflow;
|
||||
|
||||
if (!kvm_arm_pmu_v3_ready(vcpu))
|
||||
return;
|
||||
|
||||
overflow = !!kvm_pmu_overflow_status(vcpu);
|
||||
if (pmu->irq_level == overflow)
|
||||
return;
|
||||
|
||||
@ -215,33 +219,11 @@ static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (likely(irqchip_in_kernel(vcpu->kvm))) {
|
||||
int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
|
||||
pmu->irq_num, overflow,
|
||||
&vcpu->arch.pmu);
|
||||
pmu->irq_num, overflow, pmu);
|
||||
WARN_ON(ret);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_overflow_set - set PMU overflow interrupt
|
||||
* @vcpu: The vcpu pointer
|
||||
* @val: the value guest writes to PMOVSSET register
|
||||
*/
|
||||
void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
if (val == 0)
|
||||
return;
|
||||
|
||||
vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val;
|
||||
kvm_pmu_check_overflow(vcpu);
|
||||
}
|
||||
|
||||
static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!kvm_arm_pmu_v3_ready(vcpu))
|
||||
return;
|
||||
kvm_pmu_check_overflow(vcpu);
|
||||
}
|
||||
|
||||
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
@ -303,7 +285,7 @@ static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
|
||||
}
|
||||
|
||||
/**
|
||||
* When perf event overflows, call kvm_pmu_overflow_set to set overflow status.
|
||||
* When the perf event overflows, set the overflow status and inform the vcpu.
|
||||
*/
|
||||
static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
|
||||
struct perf_sample_data *data,
|
||||
@ -313,7 +295,12 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
|
||||
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
|
||||
int idx = pmc->idx;
|
||||
|
||||
kvm_pmu_overflow_set(vcpu, BIT(idx));
|
||||
vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
|
||||
|
||||
if (kvm_pmu_overflow_status(vcpu)) {
|
||||
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -341,7 +328,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
|
||||
reg = lower_32_bits(reg);
|
||||
vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
|
||||
if (!reg)
|
||||
kvm_pmu_overflow_set(vcpu, BIT(i));
|
||||
vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -285,9 +285,6 @@ int vgic_init(struct kvm *kvm)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (vgic_has_its(kvm))
|
||||
dist->msis_require_devid = true;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
kvm_vgic_vcpu_enable(vcpu);
|
||||
|
||||
|
@ -1598,6 +1598,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
|
||||
INIT_LIST_HEAD(&its->device_list);
|
||||
INIT_LIST_HEAD(&its->collection_list);
|
||||
|
||||
dev->kvm->arch.vgic.msis_require_devid = true;
|
||||
dev->kvm->arch.vgic.has_its = true;
|
||||
its->enabled = false;
|
||||
its->dev = dev;
|
||||
|
@ -369,7 +369,7 @@ static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu,
|
||||
return;
|
||||
|
||||
do {
|
||||
old_propbaser = dist->propbaser;
|
||||
old_propbaser = READ_ONCE(dist->propbaser);
|
||||
propbaser = old_propbaser;
|
||||
propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
|
||||
propbaser = vgic_sanitise_propbaser(propbaser);
|
||||
@ -397,7 +397,7 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
|
||||
return;
|
||||
|
||||
do {
|
||||
old_pendbaser = vgic_cpu->pendbaser;
|
||||
old_pendbaser = READ_ONCE(vgic_cpu->pendbaser);
|
||||
pendbaser = old_pendbaser;
|
||||
pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
|
||||
pendbaser = vgic_sanitise_pendbaser(pendbaser);
|
||||
|
@ -717,10 +717,9 @@ out_err_no_srcu:
|
||||
hardware_disable_all();
|
||||
out_err_no_disable:
|
||||
for (i = 0; i < KVM_NR_BUSES; i++)
|
||||
kfree(rcu_access_pointer(kvm->buses[i]));
|
||||
kfree(kvm_get_bus(kvm, i));
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
|
||||
kvm_free_memslots(kvm,
|
||||
rcu_dereference_protected(kvm->memslots[i], 1));
|
||||
kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
|
||||
kvm_arch_free_vm(kvm);
|
||||
mmdrop(current->mm);
|
||||
return ERR_PTR(r);
|
||||
@ -754,9 +753,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
||||
spin_unlock(&kvm_lock);
|
||||
kvm_free_irq_routing(kvm);
|
||||
for (i = 0; i < KVM_NR_BUSES; i++) {
|
||||
struct kvm_io_bus *bus;
|
||||
struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
|
||||
|
||||
bus = rcu_dereference_protected(kvm->buses[i], 1);
|
||||
if (bus)
|
||||
kvm_io_bus_destroy(bus);
|
||||
kvm->buses[i] = NULL;
|
||||
@ -770,8 +768,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
||||
kvm_arch_destroy_vm(kvm);
|
||||
kvm_destroy_devices(kvm);
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
|
||||
kvm_free_memslots(kvm,
|
||||
rcu_dereference_protected(kvm->memslots[i], 1));
|
||||
kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
|
||||
cleanup_srcu_struct(&kvm->irq_srcu);
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
kvm_arch_free_vm(kvm);
|
||||
|
Loading…
Reference in New Issue
Block a user