KVM: x86: SVM changes for 6.6:

- Add support for SEV-ES DebugSwap, i.e. allow SEV-ES guests to use debug
    registers and generate/handle #DBs
 
  - Clean up LBR virtualization code
 
  - Fix a bug where KVM fails to set the target pCPU during an IRTE update
 
  - Fix fatal bugs in SEV-ES intrahost migration
 
  - Fix a bug where the recent (architecturally correct) change to reinject
    #BP and skip INT3 broke SEV guests (can't decode INT3 to skip it)
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCgAwFiEEMHr+pfEFOIzK+KY1YJEiAU0MEvkFAmTue8YSHHNlYW5qY0Bn
 b29nbGUuY29tAAoJEGCRIgFNDBL5aqUP/jF7DyMXyQGYMKoQhFxWyGRhfqV8Ov8i
 7sUpEKSx5WTxOsFHBgdGeNU+m9eBJHWVmrJM9imI4OCUvJmxasRRsnyhvEUvBIUE
 amQT45aVm2xqjRNRUkOCUUHiDKtUdwpSRlOSyhnDTKmlMbNoH+fO3SLJ1oB/fsae
 wnmyiF98j2vT/5mD6F/F87hlNMq4CqG/OZWJ9Kk8GfvfJpUcC8r/H0NsMgSMF2/L
 Q+Hn+r/XDfMSrBiyEzevWyPbJi7nL+WF9EQDJASf+aAkmFMHK6AU4XNITwVw3XcZ
 FGtSP/NzvnePhd5gqtbiW9hRQkWcKjqnydtyI3ZDVVBpEbJ6OJn3+UFoLZ8NoSE+
 D3EDs1PA7Qjty6kYx9/NZpXz5BAMd9mikkTL7PTrlrAZAEimToqoHx7mBjmLp4E+
 diKrpG2w1OTtO/Pafi0z0zZN6Yc9MJOyZVK78DpIiLey3rNip9SawWGh+wV14WNC
 nbn7Wpf8EGE1E8n00mwrGMRCuRm7LQhLbcVXITiGKrbpxUzam6sqDIgt73Q7xma2
 NWcPizeFNy47uurNOA2V9xHkbEAYjWaM12uyzmGzILvvmvNnpU0NuZ78cgV5ZWMk
 4US53CAQbG4+qUCJWhIDoriluaLXjL9tLiZgJW0T6cus3nL5NWYqvlq6TWYyK00J
 zjiK7vky77Pq
 =WC5V
 -----END PGP SIGNATURE-----

Merge tag 'kvm-x86-svm-6.6' of https://github.com/kvm-x86/linux into HEAD

KVM: x86: SVM changes for 6.6:

 - Add support for SEV-ES DebugSwap, i.e. allow SEV-ES guests to use debug
   registers and generate/handle #DBs

 - Clean up LBR virtualization code

 - Fix a bug where KVM fails to set the target pCPU during an IRTE update

 - Fix fatal bugs in SEV-ES intrahost migration

 - Fix a bug where the recent (architecturally correct) change to reinject
   #BP and skip INT3 broke SEV guests (can't decode INT3 to skip it)
This commit is contained in:
Paolo Bonzini 2023-08-31 13:32:40 -04:00
commit bd7fe98b35
7 changed files with 252 additions and 136 deletions

View File

@ -438,6 +438,7 @@
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */

View File

@ -288,6 +288,7 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
struct vmcb_seg {
u16 selector;
@ -345,7 +346,7 @@ struct vmcb_save_area {
u64 last_excp_from;
u64 last_excp_to;
u8 reserved_0x298[72];
u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
u64 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
} __packed;
/* Save area definition for SEV-ES and SEV-SNP guests */
@ -512,7 +513,7 @@ struct ghcb {
} __packed;
#define EXPECTED_VMCB_SAVE_AREA_SIZE 740
#define EXPECTED_VMCB_SAVE_AREA_SIZE 744
#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032
#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648
#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024

View File

@ -791,6 +791,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
int ret = 0;
unsigned long flags;
struct amd_svm_iommu_ir *ir;
u64 entry;
/**
* In some cases, the existing irte is updated and re-set,
@ -824,6 +825,18 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
ir->data = pi->ir_data;
spin_lock_irqsave(&svm->ir_list_lock, flags);
/*
* Update the target pCPU for IOMMU doorbells if the vCPU is running.
* If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM
* will update the pCPU info when the vCPU awkened and/or scheduled in.
* See also avic_vcpu_load().
*/
entry = READ_ONCE(*(svm->avic_physical_id_cache));
if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
true, pi->ir_data);
list_add(&ir->node, &svm->ir_list);
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
out:
@ -986,10 +999,11 @@ static inline int
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
{
int ret = 0;
unsigned long flags;
struct amd_svm_iommu_ir *ir;
struct vcpu_svm *svm = to_svm(vcpu);
lockdep_assert_held(&svm->ir_list_lock);
if (!kvm_arch_has_assigned_device(vcpu->kvm))
return 0;
@ -997,19 +1011,15 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
* Here, we go through the per-vcpu ir_list to update all existing
* interrupt remapping table entry targeting this vcpu.
*/
spin_lock_irqsave(&svm->ir_list_lock, flags);
if (list_empty(&svm->ir_list))
goto out;
return 0;
list_for_each_entry(ir, &svm->ir_list, node) {
ret = amd_iommu_update_ga(cpu, r, ir->data);
if (ret)
break;
return ret;
}
out:
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
return ret;
return 0;
}
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@ -1017,6 +1027,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
u64 entry;
int h_physical_id = kvm_cpu_get_apicid(cpu);
struct vcpu_svm *svm = to_svm(vcpu);
unsigned long flags;
lockdep_assert_preemption_disabled();
@ -1033,6 +1044,15 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (kvm_vcpu_is_blocking(vcpu))
return;
/*
* Grab the per-vCPU interrupt remapping lock even if the VM doesn't
* _currently_ have assigned devices, as that can change. Holding
* ir_list_lock ensures that either svm_ir_list_add() will consume
* up-to-date entry information, or that this task will wait until
* svm_ir_list_add() completes to set the new target pCPU.
*/
spin_lock_irqsave(&svm->ir_list_lock, flags);
entry = READ_ONCE(*(svm->avic_physical_id_cache));
WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
@ -1042,25 +1062,48 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
}
void avic_vcpu_put(struct kvm_vcpu *vcpu)
{
u64 entry;
struct vcpu_svm *svm = to_svm(vcpu);
unsigned long flags;
lockdep_assert_preemption_disabled();
/*
* Note, reading the Physical ID entry outside of ir_list_lock is safe
* as only the pCPU that has loaded (or is loading) the vCPU is allowed
* to modify the entry, and preemption is disabled. I.e. the vCPU
* can't be scheduled out and thus avic_vcpu_{put,load}() can't run
* recursively.
*/
entry = READ_ONCE(*(svm->avic_physical_id_cache));
/* Nothing to do if IsRunning == '0' due to vCPU blocking. */
if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
return;
/*
* Take and hold the per-vCPU interrupt remapping lock while updating
* the Physical ID entry even though the lock doesn't protect against
* multiple writers (see above). Holding ir_list_lock ensures that
* either svm_ir_list_add() will consume up-to-date entry information,
* or that this task will wait until svm_ir_list_add() completes to
* mark the vCPU as not running.
*/
spin_lock_irqsave(&svm->ir_list_lock, flags);
avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
}
void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)

View File

@ -23,6 +23,7 @@
#include <asm/pkru.h>
#include <asm/trapnr.h>
#include <asm/fpu/xcr.h>
#include <asm/debugreg.h>
#include "mmu.h"
#include "x86.h"
@ -54,9 +55,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
/* enable/disable SEV-ES support */
static bool sev_es_enabled = true;
module_param_named(sev_es, sev_es_enabled, bool, 0444);
/* enable/disable SEV-ES DebugSwap support */
static bool sev_es_debug_swap_enabled = true;
module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
#else
#define sev_enabled false
#define sev_es_enabled false
#define sev_es_debug_swap_enabled false
#endif /* CONFIG_KVM_AMD_SEV */
static u8 sev_enc_bit;
@ -606,6 +612,9 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
save->xss = svm->vcpu.arch.ia32_xss;
save->dr6 = svm->vcpu.arch.dr6;
if (sev_es_debug_swap_enabled)
save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
pr_debug("Virtual Machine Save Area (VMSA):\n");
print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
@ -619,6 +628,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
struct vcpu_svm *svm = to_svm(vcpu);
int ret;
if (vcpu->guest_debug) {
pr_warn_once("KVM_SET_GUEST_DEBUG for SEV-ES guest is not supported");
return -EINVAL;
}
/* Perform some pre-encryption checks against the VMSA */
ret = sev_es_sync_vmsa(svm);
if (ret)
@ -1725,7 +1739,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
* Note, the source is not required to have the same number of
* vCPUs as the destination when migrating a vanilla SEV VM.
*/
src_vcpu = kvm_get_vcpu(dst_kvm, i);
src_vcpu = kvm_get_vcpu(src_kvm, i);
src_svm = to_svm(src_vcpu);
/*
@ -2171,7 +2185,7 @@ void __init sev_hardware_setup(void)
bool sev_es_supported = false;
bool sev_supported = false;
if (!sev_enabled || !npt_enabled)
if (!sev_enabled || !npt_enabled || !nrips)
goto out;
/*
@ -2256,6 +2270,9 @@ out:
sev_enabled = sev_supported;
sev_es_enabled = sev_es_supported;
if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) ||
!cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
sev_es_debug_swap_enabled = false;
#endif
}
@ -2881,7 +2898,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
svm->sev_es.ghcb_sa);
break;
case SVM_VMGEXIT_NMI_COMPLETE:
ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
++vcpu->stat.nmi_window_exits;
svm->nmi_masked = false;
kvm_make_request(KVM_REQ_EVENT, vcpu);
ret = 1;
break;
case SVM_VMGEXIT_AP_HLT_LOOP:
ret = kvm_emulate_ap_reset_hold(vcpu);
@ -2944,6 +2964,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
static void sev_es_init_vmcb(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
struct kvm_vcpu *vcpu = &svm->vcpu;
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
@ -2952,9 +2973,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
/*
* An SEV-ES guest requires a VMSA area that is a separate from the
* VMCB page. Do not include the encryption mask on the VMSA physical
* address since hardware will access it using the guest key.
* address since hardware will access it using the guest key. Note,
* the VMSA will be NULL if this vCPU is the destination for intrahost
* migration, and will be copied later.
*/
svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
if (svm->sev_es.vmsa)
svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
/* Can't intercept CR register access, HV can't modify CR registers */
svm_clr_intercept(svm, INTERCEPT_CR0_READ);
@ -2972,8 +2996,23 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
svm_set_intercept(svm, TRAP_CR4_WRITE);
svm_set_intercept(svm, TRAP_CR8_WRITE);
/* No support for enable_vmware_backdoor */
clr_exception_intercept(svm, GP_VECTOR);
vmcb->control.intercepts[INTERCEPT_DR] = 0;
if (!sev_es_debug_swap_enabled) {
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
recalc_intercepts(svm);
} else {
/*
* Disable #DB intercept iff DebugSwap is enabled. KVM doesn't
* allow debugging SEV-ES guests, and enables DebugSwap iff
* NO_NESTED_DATA_BP is supported, so there's no reason to
* intercept #DB when DebugSwap is enabled. For simplicity
* with respect to guest debug, intercept #DB for other VMs
* even if NO_NESTED_DATA_BP is supported, i.e. even if the
* guest can't DoS the CPU with infinite #DB vectoring.
*/
clr_exception_intercept(svm, DB_VECTOR);
}
/* Can't intercept XSETBV, HV can't modify XCR0 directly */
svm_clr_intercept(svm, INTERCEPT_XSETBV);
@ -3000,6 +3039,12 @@ void sev_init_vmcb(struct vcpu_svm *svm)
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
clr_exception_intercept(svm, UD_VECTOR);
/*
* Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as
* KVM can't decrypt guest memory to decode the faulting instruction.
*/
clr_exception_intercept(svm, GP_VECTOR);
if (sev_es_guest(svm->vcpu.kvm))
sev_es_init_vmcb(svm);
}
@ -3018,20 +3063,41 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
{
/*
* As an SEV-ES guest, hardware will restore the host state on VMEXIT,
* of which one step is to perform a VMLOAD. KVM performs the
* corresponding VMSAVE in svm_prepare_guest_switch for both
* traditional and SEV-ES guests.
* All host state for SEV-ES guests is categorized into three swap types
* based on how it is handled by hardware during a world switch:
*
* A: VMRUN: Host state saved in host save area
* VMEXIT: Host state loaded from host save area
*
* B: VMRUN: Host state _NOT_ saved in host save area
* VMEXIT: Host state loaded from host save area
*
* C: VMRUN: Host state _NOT_ saved in host save area
* VMEXIT: Host state initialized to default(reset) values
*
* Manually save type-B state, i.e. state that is loaded by VMEXIT but
* isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
* by common SVM code).
*/
/* XCR0 is restored on VMEXIT, save the current host value */
hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
/* PKRU is restored on VMEXIT, save the current host value */
hostsa->pkru = read_pkru();
/* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
hostsa->xss = host_xss;
/*
* If DebugSwap is enabled, debug registers are loaded but NOT saved by
* the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both
* saves and loads debug registers (Type-A).
*/
if (sev_es_debug_swap_enabled) {
hostsa->dr0 = native_get_debugreg(0);
hostsa->dr1 = native_get_debugreg(1);
hostsa->dr2 = native_get_debugreg(2);
hostsa->dr3 = native_get_debugreg(3);
hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
}
}
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)

View File

@ -203,7 +203,7 @@ static int nested = true;
module_param(nested, int, S_IRUGO);
/* enable/disable Next RIP Save */
static int nrips = true;
int nrips = true;
module_param(nrips, int, 0444);
/* enable/disable Virtual VMLOAD VMSAVE */
@ -365,6 +365,8 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
}
static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len);
static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
bool commit_side_effects)
@ -385,6 +387,14 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
}
if (!svm->next_rip) {
/*
* FIXME: Drop this when kvm_emulate_instruction() does the
* right thing and treats "can't emulate" as outright failure
* for EMULTYPE_SKIP.
*/
if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0))
return 0;
if (unlikely(!commit_side_effects))
old_rflags = svm->vmcb->save.rflags;
@ -677,6 +687,39 @@ free_save_area:
}
static void set_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
recalc_intercepts(svm);
}
static void clr_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb->control.intercepts[INTERCEPT_DR] = 0;
recalc_intercepts(svm);
}
static int direct_access_msr_slot(u32 msr)
{
u32 i;
@ -947,50 +990,24 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
}
static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index)
static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
{
/*
* If the LBR virtualization is disabled, the LBR msrs are always
* kept in the vmcb01 to avoid copying them on nested guest entries.
*
* If nested, and the LBR virtualization is enabled/disabled, the msrs
* are moved between the vmcb01 and vmcb02 as needed.
* If LBR virtualization is disabled, the LBR MSRs are always kept in
* vmcb01. If LBR virtualization is enabled and L1 is running VMs of
* its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
*/
struct vmcb *vmcb =
(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ?
svm->vmcb : svm->vmcb01.ptr;
switch (index) {
case MSR_IA32_DEBUGCTLMSR:
return vmcb->save.dbgctl;
case MSR_IA32_LASTBRANCHFROMIP:
return vmcb->save.br_from;
case MSR_IA32_LASTBRANCHTOIP:
return vmcb->save.br_to;
case MSR_IA32_LASTINTFROMIP:
return vmcb->save.last_excp_from;
case MSR_IA32_LASTINTTOIP:
return vmcb->save.last_excp_to;
default:
KVM_BUG(false, svm->vcpu.kvm,
"%s: Unknown MSR 0x%x", __func__, index);
return 0;
}
return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
svm->vmcb01.ptr;
}
void svm_update_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) &
DEBUGCTLMSR_LBR;
bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
LBR_CTL_ENABLE_MASK);
if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled))
if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))
enable_lbrv = true;
bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
(is_guest_mode(vcpu) && svm->lbrv_enabled &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
if (enable_lbrv == current_enable_lbrv)
return;
@ -1201,10 +1218,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
* Guest access to VMware backdoor ports could legitimately
* trigger #GP because of TSS I/O permission bitmap.
* We intercept those #GP and allow access to them anyway
* as VMware does. Don't intercept #GP for SEV guests as KVM can't
* decrypt guest memory to decode the faulting instruction.
* as VMware does.
*/
if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
if (enable_vmware_backdoor)
set_exception_intercept(svm, GP_VECTOR);
svm_set_intercept(svm, INTERCEPT_INTR);
@ -1949,7 +1965,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (vcpu->arch.guest_state_protected)
if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm)))
return;
get_debugreg(vcpu->arch.db[0], 0);
@ -2510,12 +2526,13 @@ static int iret_interception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
WARN_ON_ONCE(sev_es_guest(vcpu->kvm));
++vcpu->stat.nmi_window_exits;
svm->awaiting_iret_completion = true;
svm_clr_iret_intercept(svm);
if (!sev_es_guest(vcpu->kvm))
svm->nmi_iret_rip = kvm_rip_read(vcpu);
svm->nmi_iret_rip = kvm_rip_read(vcpu);
kvm_make_request(KVM_REQ_EVENT, vcpu);
return 1;
@ -2680,6 +2697,13 @@ static int dr_interception(struct kvm_vcpu *vcpu)
unsigned long val;
int err = 0;
/*
* SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT
* for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early.
*/
if (sev_es_guest(vcpu->kvm))
return 1;
if (vcpu->guest_debug == 0) {
/*
* No more DR vmexits; force a reload of the debug registers
@ -2802,11 +2826,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = svm->tsc_aux;
break;
case MSR_IA32_DEBUGCTLMSR:
msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl;
break;
case MSR_IA32_LASTBRANCHFROMIP:
msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from;
break;
case MSR_IA32_LASTBRANCHTOIP:
msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to;
break;
case MSR_IA32_LASTINTFROMIP:
msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from;
break;
case MSR_IA32_LASTINTTOIP:
msr_info->data = svm_get_lbr_msr(svm, msr_info->index);
msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to;
break;
case MSR_VM_HSAVE_PA:
msr_info->data = svm->nested.hsave_msr;
@ -3037,13 +3069,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (data & DEBUGCTL_RESERVED_BITS)
return 1;
if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK)
svm->vmcb->save.dbgctl = data;
else
svm->vmcb01.ptr->save.dbgctl = data;
svm_get_lbr_vmcb(svm)->save.dbgctl = data;
svm_update_lbrv(vcpu);
break;
case MSR_VM_HSAVE_PA:
/*
@ -3769,6 +3796,19 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
return; /* IRET will cause a vm exit */
/*
* SEV-ES guests are responsible for signaling when a vCPU is ready to
* receive a new NMI, as SEV-ES guests can't be single-stepped, i.e.
* KVM can't intercept and single-step IRET to detect when NMIs are
* unblocked (architecturally speaking). See SVM_VMGEXIT_NMI_COMPLETE.
*
* Note, GIF is guaranteed to be '1' for SEV-ES guests as hardware
* ignores SEV-ES guest writes to EFER.SVME *and* CLGI/STGI are not
* supported NAEs in the GHCB protocol.
*/
if (sev_es_guest(vcpu->kvm))
return;
if (!gif_set(svm)) {
if (vgif)
svm_set_intercept(svm, INTERCEPT_STGI);
@ -3918,12 +3958,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
svm->soft_int_injected = false;
/*
* If we've made progress since setting HF_IRET_MASK, we've
* If we've made progress since setting awaiting_iret_completion, we've
* executed an IRET and can allow NMI injection.
*/
if (svm->awaiting_iret_completion &&
(sev_es_guest(vcpu->kvm) ||
kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
kvm_rip_read(vcpu) != svm->nmi_iret_rip) {
svm->awaiting_iret_completion = false;
svm->nmi_masked = false;
kvm_make_request(KVM_REQ_EVENT, vcpu);
@ -4651,16 +4690,25 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
* and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
* decode garbage.
*
* Inject #UD if KVM reached this point without an instruction buffer.
* In practice, this path should never be hit by a well-behaved guest,
* e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
* is still theoretically reachable, e.g. via unaccelerated fault-like
* AVIC access, and needs to be handled by KVM to avoid putting the
* guest into an infinite loop. Injecting #UD is somewhat arbitrary,
* but its the least awful option given lack of insight into the guest.
* If KVM is NOT trying to simply skip an instruction, inject #UD if
* KVM reached this point without an instruction buffer. In practice,
* this path should never be hit by a well-behaved guest, e.g. KVM
* doesn't intercept #UD or #GP for SEV guests, but this path is still
* theoretically reachable, e.g. via unaccelerated fault-like AVIC
* access, and needs to be handled by KVM to avoid putting the guest
* into an infinite loop. Injecting #UD is somewhat arbitrary, but
* its the least awful option given lack of insight into the guest.
*
* If KVM is trying to skip an instruction, simply resume the guest.
* If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM
* will attempt to re-inject the INT3/INTO and skip the instruction.
* In that scenario, retrying the INT3/INTO and hoping the guest will
* make forward progress is the only option that has a chance of
* success (and in practice it will work the vast majority of the time).
*/
if (unlikely(!insn)) {
kvm_queue_exception(vcpu, UD_VECTOR);
if (!(emul_type & EMULTYPE_SKIP))
kvm_queue_exception(vcpu, UD_VECTOR);
return false;
}
@ -5112,9 +5160,11 @@ static __init int svm_hardware_setup(void)
svm_adjust_mmio_mask();
nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
/*
* Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
* may be modified by svm_adjust_mmio_mask()).
* may be modified by svm_adjust_mmio_mask()), as well as nrips.
*/
sev_hardware_setup();
@ -5126,11 +5176,6 @@ static __init int svm_hardware_setup(void)
goto err;
}
if (nrips) {
if (!boot_cpu_has(X86_FEATURE_NRIPS))
nrips = false;
}
enable_apicv = avic = avic && avic_hardware_setup();
if (!enable_apicv) {

View File

@ -33,6 +33,7 @@
#define MSRPM_OFFSETS 32
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
extern int nrips;
extern int vgif;
extern bool intercept_smi;
extern bool x2avic_enabled;
@ -406,48 +407,6 @@ static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u3
return test_bit(bit, (unsigned long *)&control->intercepts);
}
static inline void set_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
if (!sev_es_guest(svm->vcpu.kvm)) {
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
}
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
recalc_intercepts(svm);
}
static inline void clr_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb->control.intercepts[INTERCEPT_DR] = 0;
/* DR7 access must remain intercepted for an SEV-ES guest */
if (sev_es_guest(svm->vcpu.kvm)) {
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
}
recalc_intercepts(svm);
}
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
{
struct vmcb *vmcb = svm->vmcb01.ptr;

View File

@ -434,6 +434,7 @@
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */