diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 263be2debde8..3a727ca02f24 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -144,6 +144,15 @@ struct shared_msr_entry { u64 mask; }; +/* + * The nested_vmx structure is part of vcpu_vmx, and holds information we need + * for correct emulation of VMX (i.e., nested VMX) on this vcpu. + */ +struct nested_vmx { + /* Has the level1 guest done vmxon? */ + bool vmxon; +}; + struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; @@ -203,6 +212,9 @@ struct vcpu_vmx { u32 exit_reason; bool rdtscp_enabled; + + /* Support for a guest hypervisor (nested VMX) */ + struct nested_vmx nested; }; enum segment_cache_field { @@ -3933,6 +3945,99 @@ static int handle_invalid_op(struct kvm_vcpu *vcpu) return 1; } +/* + * Emulate the VMXON instruction. + * Currently, we just remember that VMX is active, and do not save or even + * inspect the argument to VMXON (the so-called "VMXON pointer") because we + * do not currently need to store anything in that guest-allocated memory + * region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their + * argument is different from the VMXON pointer (which the spec says they do). + */ +static int handle_vmon(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs; + struct vcpu_vmx *vmx = to_vmx(vcpu); + + /* The Intel VMX Instruction Reference lists a bunch of bits that + * are prerequisite to running VMXON, most notably cr4.VMXE must be + * set to 1 (see vmx_set_cr4() for when we allow the guest to set this). + * Otherwise, we should fail with #UD. We test these now: + */ + if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE) || + !kvm_read_cr0_bits(vcpu, X86_CR0_PE) || + (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); + if (is_long_mode(vcpu) && !cs.l) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + if (vmx_get_cpl(vcpu)) { + kvm_inject_gp(vcpu, 0); + return 1; + } + + vmx->nested.vmxon = true; + + skip_emulated_instruction(vcpu); + return 1; +} + +/* + * Intel's VMX Instruction Reference specifies a common set of prerequisites + * for running VMX instructions (except VMXON, whose prerequisites are + * slightly different). It also specifies what exception to inject otherwise. + */ +static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs; + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!vmx->nested.vmxon) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 0; + } + + vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); + if ((vmx_get_rflags(vcpu) & X86_EFLAGS_VM) || + (is_long_mode(vcpu) && !cs.l)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 0; + } + + if (vmx_get_cpl(vcpu)) { + kvm_inject_gp(vcpu, 0); + return 0; + } + + return 1; +} + +/* + * Free whatever needs to be freed from vmx->nested when L1 goes down, or + * just stops using VMX. + */ +static void free_nested(struct vcpu_vmx *vmx) +{ + if (!vmx->nested.vmxon) + return; + vmx->nested.vmxon = false; +} + +/* Emulate the VMXOFF instruction */ +static int handle_vmoff(struct kvm_vcpu *vcpu) +{ + if (!nested_vmx_check_permission(vcpu)) + return 1; + free_nested(to_vmx(vcpu)); + skip_emulated_instruction(vcpu); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -3961,8 +4066,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_VMREAD] = handle_vmx_insn, [EXIT_REASON_VMRESUME] = handle_vmx_insn, [EXIT_REASON_VMWRITE] = handle_vmx_insn, - [EXIT_REASON_VMOFF] = handle_vmx_insn, - [EXIT_REASON_VMON] = handle_vmx_insn, + [EXIT_REASON_VMOFF] = handle_vmoff, + [EXIT_REASON_VMON] = handle_vmon, [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_WBINVD] = handle_wbinvd, @@ -4363,6 +4468,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); free_vpid(vmx); + free_nested(vmx); free_loaded_vmcs(vmx->loaded_vmcs); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu);