forked from Minki/linux
s390:
* fix register corruption * ENOTSUPP/EOPNOTSUPP mixed * reset cleanups/fixes * selftests x86: * Bug fixes and cleanups * AMD support for APIC virtualization even in combination with in-kernel PIT or IOAPIC. MIPS: * Compilation fix. Generic: * Fix refcount overflow for zero page. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJeOuf7AAoJEL/70l94x66DOBQH/j1W9lUpbDgr9aWbrZT+O/yP FWzUDrRlCZCjV1FQKbGPa4YLeDRTG5n+RIQTjmCGRqiMqeoELSJ1+iK99e97nG/u L28zf/90Nf0R+wwHL4AOFeploTYfG4WP8EVnlr3CG2UCJrNjxN1KU7yRZoWmWa2d ckLJ8ouwNvx6VZd233LVmT38EP4352d1LyqIL8/+oXDIyAcRJLFQu1gRCwagsh3w 1v1czowFpWnRQ/z9zU7YD+PA4v85/Ge8sVVHlpi1X5NgV/khk4U6B0crAw6M+la+ mTmpz9g56oAh9m9NUdtv4zDCz1EWGH0v8+ZkAajUKtrM0ftJMn57P6p8PH4VVlE= =5+Wl -----END PGP SIGNATURE----- Merge tag 'kvm-5.6-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull more KVM updates from Paolo Bonzini: "s390: - fix register corruption - ENOTSUPP/EOPNOTSUPP mixed - reset cleanups/fixes - selftests x86: - Bug fixes and cleanups - AMD support for APIC virtualization even in combination with in-kernel PIT or IOAPIC. MIPS: - Compilation fix. Generic: - Fix refcount overflow for zero page" * tag 'kvm-5.6-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (42 commits) KVM: vmx: delete meaningless vmx_decache_cr0_guest_bits() declaration KVM: x86: Mark CR4.UMIP as reserved based on associated CPUID bit x86: vmxfeatures: rename features for consistency with KVM and manual KVM: SVM: relax conditions for allowing MSR_IA32_SPEC_CTRL accesses KVM: x86: Fix perfctr WRMSR for running counters x86/kvm/hyper-v: don't allow to turn on unsupported VMX controls for nested guests x86/kvm/hyper-v: move VMX controls sanitization out of nested_enable_evmcs() kvm: mmu: Separate generating and setting mmio ptes kvm: mmu: Replace unsigned with unsigned int for PTE access KVM: nVMX: Remove stale comment from nested_vmx_load_cr3() KVM: MIPS: Fold comparecount_func() into comparecount_wakeup() KVM: MIPS: Fix a build error due to referencing not-yet-defined function x86/kvm: do not setup pv tlb flush when not paravirtualized KVM: fix overflow of zero page refcount with ksm running KVM: x86: Take a u64 when checking for a valid dr7 value KVM: x86: use raw clock values consistently KVM: x86: reorganize pvclock_gtod_data members KVM: nVMX: delete meaningless nested_vmx_run() declaration KVM: SVM: allow AVIC without split irqchip kvm: ioapic: Lazy update IOAPIC EOI ...
This commit is contained in:
commit
90568ecf56
@ -4177,6 +4177,42 @@ This ioctl issues an ultravisor call to terminate the secure guest,
|
||||
unpins the VPA pages and releases all the device pages that are used to
|
||||
track the secure pages by hypervisor.
|
||||
|
||||
4.122 KVM_S390_NORMAL_RESET
|
||||
|
||||
Capability: KVM_CAP_S390_VCPU_RESETS
|
||||
Architectures: s390
|
||||
Type: vcpu ioctl
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
|
||||
This ioctl resets VCPU registers and control structures according to
|
||||
the cpu reset definition in the POP (Principles Of Operation).
|
||||
|
||||
4.123 KVM_S390_INITIAL_RESET
|
||||
|
||||
Capability: none
|
||||
Architectures: s390
|
||||
Type: vcpu ioctl
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
|
||||
This ioctl resets VCPU registers and control structures according to
|
||||
the initial cpu reset definition in the POP. However, the cpu is not
|
||||
put into ESA mode. This reset is a superset of the normal reset.
|
||||
|
||||
4.124 KVM_S390_CLEAR_RESET
|
||||
|
||||
Capability: KVM_CAP_S390_VCPU_RESETS
|
||||
Architectures: s390
|
||||
Type: vcpu ioctl
|
||||
Parameters: none
|
||||
Returns: 0
|
||||
|
||||
This ioctl resets VCPU registers and control structures according to
|
||||
the clear cpu reset definition in the POP. However, the cpu is not put
|
||||
into ESA mode. This reset is a superset of the initial reset.
|
||||
|
||||
|
||||
5. The kvm_run structure
|
||||
------------------------
|
||||
|
||||
@ -5405,3 +5441,10 @@ handling by KVM (as some KVM hypercall may be mistakenly treated as TLB
|
||||
flush hypercalls by Hyper-V) so userspace should disable KVM identification
|
||||
in CPUID and only exposes Hyper-V identification. In this case, guest
|
||||
thinks it's running on Hyper-V and only use Hyper-V hypercalls.
|
||||
|
||||
8.22 KVM_CAP_S390_VCPU_RESETS
|
||||
|
||||
Architectures: s390
|
||||
|
||||
This capability indicates that the KVM_S390_NORMAL_RESET and
|
||||
KVM_S390_CLEAR_RESET ioctls are available.
|
||||
|
@ -280,6 +280,22 @@ static inline void dump_handler(const char *symbol, void *start, void *end)
|
||||
pr_debug("\tEND(%s)\n", symbol);
|
||||
}
|
||||
|
||||
/* low level hrtimer wake routine */
|
||||
static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
vcpu = container_of(timer, struct kvm_vcpu, arch.comparecount_timer);
|
||||
|
||||
kvm_mips_callbacks->queue_timer_int(vcpu);
|
||||
|
||||
vcpu->arch.wait = 0;
|
||||
if (swq_has_sleeper(&vcpu->wq))
|
||||
swake_up_one(&vcpu->wq);
|
||||
|
||||
return kvm_mips_count_timeout(vcpu);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
|
||||
{
|
||||
return 0;
|
||||
@ -1209,27 +1225,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_mips_comparecount_func(unsigned long data)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
|
||||
|
||||
kvm_mips_callbacks->queue_timer_int(vcpu);
|
||||
|
||||
vcpu->arch.wait = 0;
|
||||
if (swq_has_sleeper(&vcpu->wq))
|
||||
swake_up_one(&vcpu->wq);
|
||||
}
|
||||
|
||||
/* low level hrtimer wake routine */
|
||||
static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
vcpu = container_of(timer, struct kvm_vcpu, arch.comparecount_timer);
|
||||
kvm_mips_comparecount_func((unsigned long) vcpu);
|
||||
return kvm_mips_count_timeout(vcpu);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
|
||||
struct kvm_translation *tr)
|
||||
{
|
||||
|
@ -122,6 +122,11 @@ struct mcck_volatile_info {
|
||||
__u32 reserved;
|
||||
};
|
||||
|
||||
#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \
|
||||
CR0_MEASUREMENT_ALERT_SUBMASK)
|
||||
#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
|
||||
CR14_EXTERNAL_DAMAGE_SUBMASK)
|
||||
|
||||
#define CPUSTAT_STOPPED 0x80000000
|
||||
#define CPUSTAT_WAIT 0x10000000
|
||||
#define CPUSTAT_ECALL_PEND 0x08000000
|
||||
|
@ -2190,7 +2190,7 @@ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
return -EINVAL;
|
||||
|
||||
if (!test_kvm_facility(kvm, 72))
|
||||
return -ENOTSUPP;
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
mutex_lock(&fi->ais_lock);
|
||||
ais.simm = fi->simm;
|
||||
@ -2499,7 +2499,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
int ret = 0;
|
||||
|
||||
if (!test_kvm_facility(kvm, 72))
|
||||
return -ENOTSUPP;
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
|
||||
return -EFAULT;
|
||||
@ -2579,7 +2579,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
struct kvm_s390_ais_all ais;
|
||||
|
||||
if (!test_kvm_facility(kvm, 72))
|
||||
return -ENOTSUPP;
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais)))
|
||||
return -EFAULT;
|
||||
|
@ -529,6 +529,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_S390_CMMA_MIGRATION:
|
||||
case KVM_CAP_S390_AIS:
|
||||
case KVM_CAP_S390_AIS_MIGRATION:
|
||||
case KVM_CAP_S390_VCPU_RESETS:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_S390_HPAGE_1M:
|
||||
@ -2808,35 +2809,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
|
||||
}
|
||||
|
||||
static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* this equals initial cpu reset in pop, but we don't switch to ESA */
|
||||
vcpu->arch.sie_block->gpsw.mask = 0UL;
|
||||
vcpu->arch.sie_block->gpsw.addr = 0UL;
|
||||
kvm_s390_set_prefix(vcpu, 0);
|
||||
kvm_s390_set_cpu_timer(vcpu, 0);
|
||||
vcpu->arch.sie_block->ckc = 0UL;
|
||||
vcpu->arch.sie_block->todpr = 0;
|
||||
memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
|
||||
vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 |
|
||||
CR0_INTERRUPT_KEY_SUBMASK |
|
||||
CR0_MEASUREMENT_ALERT_SUBMASK;
|
||||
vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
|
||||
CR14_UNUSED_33 |
|
||||
CR14_EXTERNAL_DAMAGE_SUBMASK;
|
||||
/* make sure the new fpc will be lazily loaded */
|
||||
save_fpu_regs();
|
||||
current->thread.fpu.fpc = 0;
|
||||
vcpu->arch.sie_block->gbea = 1;
|
||||
vcpu->arch.sie_block->pp = 0;
|
||||
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
|
||||
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
|
||||
kvm_clear_async_pf_completion_queue(vcpu);
|
||||
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
|
||||
kvm_s390_vcpu_stop(vcpu);
|
||||
kvm_s390_clear_local_irqs(vcpu);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
mutex_lock(&vcpu->kvm->lock);
|
||||
@ -3279,10 +3251,53 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
|
||||
static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_s390_vcpu_initial_reset(vcpu);
|
||||
return 0;
|
||||
vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
|
||||
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
|
||||
memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
|
||||
|
||||
kvm_clear_async_pf_completion_queue(vcpu);
|
||||
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
|
||||
kvm_s390_vcpu_stop(vcpu);
|
||||
kvm_s390_clear_local_irqs(vcpu);
|
||||
}
|
||||
|
||||
static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Initial reset is a superset of the normal reset */
|
||||
kvm_arch_vcpu_ioctl_normal_reset(vcpu);
|
||||
|
||||
/* this equals initial cpu reset in pop, but we don't switch to ESA */
|
||||
vcpu->arch.sie_block->gpsw.mask = 0;
|
||||
vcpu->arch.sie_block->gpsw.addr = 0;
|
||||
kvm_s390_set_prefix(vcpu, 0);
|
||||
kvm_s390_set_cpu_timer(vcpu, 0);
|
||||
vcpu->arch.sie_block->ckc = 0;
|
||||
vcpu->arch.sie_block->todpr = 0;
|
||||
memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
|
||||
vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
|
||||
vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
|
||||
vcpu->run->s.regs.fpc = 0;
|
||||
vcpu->arch.sie_block->gbea = 1;
|
||||
vcpu->arch.sie_block->pp = 0;
|
||||
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
|
||||
}
|
||||
|
||||
static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_sync_regs *regs = &vcpu->run->s.regs;
|
||||
|
||||
/* Clear reset is a superset of the initial reset */
|
||||
kvm_arch_vcpu_ioctl_initial_reset(vcpu);
|
||||
|
||||
memset(®s->gprs, 0, sizeof(regs->gprs));
|
||||
memset(®s->vrs, 0, sizeof(regs->vrs));
|
||||
memset(®s->acrs, 0, sizeof(regs->acrs));
|
||||
memset(®s->gscb, 0, sizeof(regs->gscb));
|
||||
|
||||
regs->etoken = 0;
|
||||
regs->etoken_extension = 0;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
@ -4343,7 +4358,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
switch (ioctl) {
|
||||
case KVM_S390_STORE_STATUS:
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
r = kvm_s390_vcpu_store_status(vcpu, arg);
|
||||
r = kvm_s390_store_status_unloaded(vcpu, arg);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
break;
|
||||
case KVM_S390_SET_INITIAL_PSW: {
|
||||
@ -4355,8 +4370,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
|
||||
break;
|
||||
}
|
||||
case KVM_S390_CLEAR_RESET:
|
||||
r = 0;
|
||||
kvm_arch_vcpu_ioctl_clear_reset(vcpu);
|
||||
break;
|
||||
case KVM_S390_INITIAL_RESET:
|
||||
r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
|
||||
r = 0;
|
||||
kvm_arch_vcpu_ioctl_initial_reset(vcpu);
|
||||
break;
|
||||
case KVM_S390_NORMAL_RESET:
|
||||
r = 0;
|
||||
kvm_arch_vcpu_ioctl_normal_reset(vcpu);
|
||||
break;
|
||||
case KVM_SET_ONE_REG:
|
||||
case KVM_GET_ONE_REG: {
|
||||
|
@ -78,6 +78,8 @@
|
||||
#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
|
||||
#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
|
||||
#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
|
||||
#define KVM_REQ_APICV_UPDATE \
|
||||
KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
|
||||
#define CR0_RESERVED_BITS \
|
||||
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
|
||||
@ -873,6 +875,12 @@ enum kvm_irqchip_mode {
|
||||
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
|
||||
};
|
||||
|
||||
#define APICV_INHIBIT_REASON_DISABLE 0
|
||||
#define APICV_INHIBIT_REASON_HYPERV 1
|
||||
#define APICV_INHIBIT_REASON_NESTED 2
|
||||
#define APICV_INHIBIT_REASON_IRQWIN 3
|
||||
#define APICV_INHIBIT_REASON_PIT_REINJ 4
|
||||
|
||||
struct kvm_arch {
|
||||
unsigned long n_used_mmu_pages;
|
||||
unsigned long n_requested_mmu_pages;
|
||||
@ -904,6 +912,7 @@ struct kvm_arch {
|
||||
struct kvm_apic_map *apic_map;
|
||||
|
||||
bool apic_access_page_done;
|
||||
unsigned long apicv_inhibit_reasons;
|
||||
|
||||
gpa_t wall_clock;
|
||||
|
||||
@ -1118,7 +1127,8 @@ struct kvm_x86_ops {
|
||||
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
|
||||
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
|
||||
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
|
||||
bool (*get_enable_apicv)(struct kvm *kvm);
|
||||
bool (*check_apicv_inhibit_reasons)(ulong bit);
|
||||
void (*pre_update_apicv_exec_ctrl)(struct kvm *kvm, bool activate);
|
||||
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
|
||||
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
|
||||
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
|
||||
@ -1477,7 +1487,11 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
|
||||
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
|
||||
struct x86_exception *exception);
|
||||
|
||||
void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
|
||||
bool kvm_apicv_activated(struct kvm *kvm);
|
||||
void kvm_apicv_init(struct kvm *kvm, bool enable);
|
||||
void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
|
||||
void kvm_request_apicv_update(struct kvm *kvm, bool activate,
|
||||
unsigned long bit);
|
||||
|
||||
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
@ -22,8 +22,8 @@
|
||||
/*
|
||||
* Definitions of Primary Processor-Based VM-Execution Controls.
|
||||
*/
|
||||
#define CPU_BASED_INTR_WINDOW_EXITING VMCS_CONTROL_BIT(VIRTUAL_INTR_PENDING)
|
||||
#define CPU_BASED_USE_TSC_OFFSETTING VMCS_CONTROL_BIT(TSC_OFFSETTING)
|
||||
#define CPU_BASED_INTR_WINDOW_EXITING VMCS_CONTROL_BIT(INTR_WINDOW_EXITING)
|
||||
#define CPU_BASED_USE_TSC_OFFSETTING VMCS_CONTROL_BIT(USE_TSC_OFFSETTING)
|
||||
#define CPU_BASED_HLT_EXITING VMCS_CONTROL_BIT(HLT_EXITING)
|
||||
#define CPU_BASED_INVLPG_EXITING VMCS_CONTROL_BIT(INVLPG_EXITING)
|
||||
#define CPU_BASED_MWAIT_EXITING VMCS_CONTROL_BIT(MWAIT_EXITING)
|
||||
@ -34,7 +34,7 @@
|
||||
#define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING)
|
||||
#define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING)
|
||||
#define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR)
|
||||
#define CPU_BASED_NMI_WINDOW_EXITING VMCS_CONTROL_BIT(VIRTUAL_NMI_PENDING)
|
||||
#define CPU_BASED_NMI_WINDOW_EXITING VMCS_CONTROL_BIT(NMI_WINDOW_EXITING)
|
||||
#define CPU_BASED_MOV_DR_EXITING VMCS_CONTROL_BIT(MOV_DR_EXITING)
|
||||
#define CPU_BASED_UNCOND_IO_EXITING VMCS_CONTROL_BIT(UNCOND_IO_EXITING)
|
||||
#define CPU_BASED_USE_IO_BITMAPS VMCS_CONTROL_BIT(USE_IO_BITMAPS)
|
||||
|
@ -34,8 +34,8 @@
|
||||
#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* EPTP switching (in guest) */
|
||||
|
||||
/* Primary Processor-Based VM-Execution Controls, word 1 */
|
||||
#define VMX_FEATURE_VIRTUAL_INTR_PENDING ( 1*32+ 2) /* "" VM-Exit if INTRs are unblocked in guest */
|
||||
#define VMX_FEATURE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */
|
||||
#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* "" VM-Exit if INTRs are unblocked in guest */
|
||||
#define VMX_FEATURE_USE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */
|
||||
#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* "" VM-Exit on HLT */
|
||||
#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* "" VM-Exit on INVLPG */
|
||||
#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* "" VM-Exit on MWAIT */
|
||||
@ -46,7 +46,7 @@
|
||||
#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */
|
||||
#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */
|
||||
#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */
|
||||
#define VMX_FEATURE_VIRTUAL_NMI_PENDING ( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */
|
||||
#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */
|
||||
#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* "" VM-Exit on accesses to debug registers */
|
||||
#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* "" VM-Exit on *all* IN{S} and OUT{S}*/
|
||||
#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* "" VM-Exit based on I/O port */
|
||||
|
@ -736,6 +736,9 @@ static __init int kvm_setup_pv_tlb_flush(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (!kvm_para_available() || nopv)
|
||||
return 0;
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
|
||||
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
|
||||
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
|
||||
|
@ -776,9 +776,10 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
|
||||
|
||||
/*
|
||||
* Hyper-V SynIC auto EOI SINT's are
|
||||
* not compatible with APICV, so deactivate APICV
|
||||
* not compatible with APICV, so request
|
||||
* to deactivate APICV permanently.
|
||||
*/
|
||||
kvm_vcpu_deactivate_apicv(vcpu);
|
||||
kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV);
|
||||
synic->active = true;
|
||||
synic->dont_zero_synic_pages = dont_zero_synic_pages;
|
||||
return 0;
|
||||
|
@ -295,12 +295,24 @@ void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject)
|
||||
if (atomic_read(&ps->reinject) == reinject)
|
||||
return;
|
||||
|
||||
/*
|
||||
* AMD SVM AVIC accelerates EOI write and does not trap.
|
||||
* This cause in-kernel PIT re-inject mode to fail
|
||||
* since it checks ps->irq_ack before kvm_set_irq()
|
||||
* and relies on the ack notifier to timely queue
|
||||
* the pt->worker work iterm and reinject the missed tick.
|
||||
* So, deactivate APICv when PIT is in reinject mode.
|
||||
*/
|
||||
if (reinject) {
|
||||
kvm_request_apicv_update(kvm, false,
|
||||
APICV_INHIBIT_REASON_PIT_REINJ);
|
||||
/* The initial state is preserved while ps->reinject == 0. */
|
||||
kvm_pit_reset_reinject(pit);
|
||||
kvm_register_irq_ack_notifier(kvm, &ps->irq_ack_notifier);
|
||||
kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
|
||||
} else {
|
||||
kvm_request_apicv_update(kvm, true,
|
||||
APICV_INHIBIT_REASON_PIT_REINJ);
|
||||
kvm_unregister_irq_ack_notifier(kvm, &ps->irq_ack_notifier);
|
||||
kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
|
||||
}
|
||||
|
@ -49,6 +49,11 @@
|
||||
static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
|
||||
bool line_status);
|
||||
|
||||
static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu,
|
||||
struct kvm_ioapic *ioapic,
|
||||
int trigger_mode,
|
||||
int pin);
|
||||
|
||||
static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
|
||||
unsigned long addr,
|
||||
unsigned long length)
|
||||
@ -154,10 +159,16 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
|
||||
__rtc_irq_eoi_tracking_restore_one(vcpu);
|
||||
}
|
||||
|
||||
static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
|
||||
static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu,
|
||||
int vector)
|
||||
{
|
||||
if (test_and_clear_bit(vcpu->vcpu_id,
|
||||
ioapic->rtc_status.dest_map.map)) {
|
||||
struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
|
||||
|
||||
/* RTC special handling */
|
||||
if (test_bit(vcpu->vcpu_id, dest_map->map) &&
|
||||
(vector == dest_map->vectors[vcpu->vcpu_id]) &&
|
||||
(test_and_clear_bit(vcpu->vcpu_id,
|
||||
ioapic->rtc_status.dest_map.map))) {
|
||||
--ioapic->rtc_status.pending_eoi;
|
||||
rtc_status_pending_eoi_check_valid(ioapic);
|
||||
}
|
||||
@ -171,6 +182,31 @@ static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
|
||||
return false;
|
||||
}
|
||||
|
||||
static void ioapic_lazy_update_eoi(struct kvm_ioapic *ioapic, int irq)
|
||||
{
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
|
||||
if (!kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
|
||||
entry->fields.dest_id,
|
||||
entry->fields.dest_mode) ||
|
||||
kvm_apic_pending_eoi(vcpu, entry->fields.vector))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If no longer has pending EOI in LAPICs, update
|
||||
* EOI for this vetor.
|
||||
*/
|
||||
rtc_irq_eoi(ioapic, vcpu, entry->fields.vector);
|
||||
kvm_ioapic_update_eoi_one(vcpu, ioapic,
|
||||
entry->fields.trig_mode,
|
||||
irq);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
|
||||
int irq_level, bool line_status)
|
||||
{
|
||||
@ -188,6 +224,15 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD SVM AVIC accelerate EOI write and do not trap,
|
||||
* in-kernel IOAPIC will not be able to receive the EOI.
|
||||
* In this case, we do lazy update of the pending EOI when
|
||||
* trying to set IOAPIC irq.
|
||||
*/
|
||||
if (kvm_apicv_activated(ioapic->kvm))
|
||||
ioapic_lazy_update_eoi(ioapic, irq);
|
||||
|
||||
/*
|
||||
* Return 0 for coalesced interrupts; for edge-triggered interrupts,
|
||||
* this only happens if a previous edge has not been delivered due
|
||||
@ -454,72 +499,68 @@ static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
|
||||
}
|
||||
|
||||
#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
|
||||
|
||||
static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
|
||||
struct kvm_ioapic *ioapic, int vector, int trigger_mode)
|
||||
static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu,
|
||||
struct kvm_ioapic *ioapic,
|
||||
int trigger_mode,
|
||||
int pin)
|
||||
{
|
||||
struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
int i;
|
||||
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[pin];
|
||||
|
||||
/* RTC special handling */
|
||||
if (test_bit(vcpu->vcpu_id, dest_map->map) &&
|
||||
vector == dest_map->vectors[vcpu->vcpu_id])
|
||||
rtc_irq_eoi(ioapic, vcpu);
|
||||
/*
|
||||
* We are dropping lock while calling ack notifiers because ack
|
||||
* notifier callbacks for assigned devices call into IOAPIC
|
||||
* recursively. Since remote_irr is cleared only after call
|
||||
* to notifiers if the same vector will be delivered while lock
|
||||
* is dropped it will be put into irr and will be delivered
|
||||
* after ack notifier returns.
|
||||
*/
|
||||
spin_unlock(&ioapic->lock);
|
||||
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
|
||||
spin_lock(&ioapic->lock);
|
||||
|
||||
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
|
||||
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
|
||||
if (trigger_mode != IOAPIC_LEVEL_TRIG ||
|
||||
kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
|
||||
return;
|
||||
|
||||
if (ent->fields.vector != vector)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We are dropping lock while calling ack notifiers because ack
|
||||
* notifier callbacks for assigned devices call into IOAPIC
|
||||
* recursively. Since remote_irr is cleared only after call
|
||||
* to notifiers if the same vector will be delivered while lock
|
||||
* is dropped it will be put into irr and will be delivered
|
||||
* after ack notifier returns.
|
||||
*/
|
||||
spin_unlock(&ioapic->lock);
|
||||
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
|
||||
spin_lock(&ioapic->lock);
|
||||
|
||||
if (trigger_mode != IOAPIC_LEVEL_TRIG ||
|
||||
kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
|
||||
continue;
|
||||
|
||||
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
|
||||
ent->fields.remote_irr = 0;
|
||||
if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
|
||||
++ioapic->irq_eoi[i];
|
||||
if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
|
||||
/*
|
||||
* Real hardware does not deliver the interrupt
|
||||
* immediately during eoi broadcast, and this
|
||||
* lets a buggy guest make slow progress
|
||||
* even if it does not correctly handle a
|
||||
* level-triggered interrupt. Emulate this
|
||||
* behavior if we detect an interrupt storm.
|
||||
*/
|
||||
schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
|
||||
ioapic->irq_eoi[i] = 0;
|
||||
trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
|
||||
} else {
|
||||
ioapic_service(ioapic, i, false);
|
||||
}
|
||||
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
|
||||
ent->fields.remote_irr = 0;
|
||||
if (!ent->fields.mask && (ioapic->irr & (1 << pin))) {
|
||||
++ioapic->irq_eoi[pin];
|
||||
if (ioapic->irq_eoi[pin] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
|
||||
/*
|
||||
* Real hardware does not deliver the interrupt
|
||||
* immediately during eoi broadcast, and this
|
||||
* lets a buggy guest make slow progress
|
||||
* even if it does not correctly handle a
|
||||
* level-triggered interrupt. Emulate this
|
||||
* behavior if we detect an interrupt storm.
|
||||
*/
|
||||
schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
|
||||
ioapic->irq_eoi[pin] = 0;
|
||||
trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
|
||||
} else {
|
||||
ioapic->irq_eoi[i] = 0;
|
||||
ioapic_service(ioapic, pin, false);
|
||||
}
|
||||
} else {
|
||||
ioapic->irq_eoi[pin] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
|
||||
{
|
||||
int i;
|
||||
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
|
||||
|
||||
spin_lock(&ioapic->lock);
|
||||
__kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
|
||||
rtc_irq_eoi(ioapic, vcpu, vector);
|
||||
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
|
||||
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
|
||||
|
||||
if (ent->fields.vector != vector)
|
||||
continue;
|
||||
kvm_ioapic_update_eoi_one(vcpu, ioapic, trigger_mode, i);
|
||||
}
|
||||
spin_unlock(&ioapic->lock);
|
||||
}
|
||||
|
||||
|
@ -2187,6 +2187,21 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
||||
pr_warn_once("APIC base relocation is unsupported by KVM");
|
||||
}
|
||||
|
||||
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
if (vcpu->arch.apicv_active) {
|
||||
/* irr_pending is always true when apicv is activated. */
|
||||
apic->irr_pending = true;
|
||||
apic->isr_count = 1;
|
||||
} else {
|
||||
apic->irr_pending = (apic_search_irr(apic) != -1);
|
||||
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
|
||||
|
||||
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
@ -2229,8 +2244,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
|
||||
kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
|
||||
}
|
||||
apic->irr_pending = vcpu->arch.apicv_active;
|
||||
apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
|
||||
kvm_apic_update_apicv(vcpu);
|
||||
apic->highest_isr_cache = -1;
|
||||
update_divide_count(apic);
|
||||
atomic_set(&apic->lapic_timer.pending, 0);
|
||||
@ -2487,9 +2501,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
|
||||
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
|
||||
update_divide_count(apic);
|
||||
start_apic_timer(apic);
|
||||
apic->irr_pending = true;
|
||||
apic->isr_count = vcpu->arch.apicv_active ?
|
||||
1 : count_vectors(apic->regs + APIC_ISR);
|
||||
kvm_apic_update_apicv(vcpu);
|
||||
apic->highest_isr_cache = -1;
|
||||
if (vcpu->arch.apicv_active) {
|
||||
kvm_x86_ops->apicv_post_state_restore(vcpu);
|
||||
|
@ -91,6 +91,7 @@ void kvm_apic_update_ppr(struct kvm_vcpu *vcpu);
|
||||
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
|
||||
struct dest_map *dest_map);
|
||||
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
|
||||
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu);
|
||||
|
||||
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
|
||||
struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map);
|
||||
|
@ -451,9 +451,9 @@ static u64 get_mmio_spte_generation(u64 spte)
|
||||
return gen;
|
||||
}
|
||||
|
||||
static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
|
||||
unsigned access)
|
||||
static u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)
|
||||
{
|
||||
|
||||
u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK;
|
||||
u64 mask = generation_mmio_spte_mask(gen);
|
||||
u64 gpa = gfn << PAGE_SHIFT;
|
||||
@ -464,6 +464,17 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
|
||||
mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
|
||||
<< shadow_nonpresent_or_rsvd_mask_len;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
|
||||
unsigned int access)
|
||||
{
|
||||
u64 mask = make_mmio_spte(vcpu, gfn, access);
|
||||
unsigned int gen = get_mmio_spte_generation(mask);
|
||||
|
||||
access = mask & ACC_ALL;
|
||||
|
||||
trace_mark_mmio_spte(sptep, gfn, access, gen);
|
||||
mmu_spte_set(sptep, mask);
|
||||
}
|
||||
@ -484,7 +495,7 @@ static unsigned get_mmio_spte_access(u64 spte)
|
||||
}
|
||||
|
||||
static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
|
||||
kvm_pfn_t pfn, unsigned access)
|
||||
kvm_pfn_t pfn, unsigned int access)
|
||||
{
|
||||
if (unlikely(is_noslot_pfn(pfn))) {
|
||||
mark_mmio_spte(vcpu, sptep, gfn, access);
|
||||
@ -2475,7 +2486,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
|
||||
gva_t gaddr,
|
||||
unsigned level,
|
||||
int direct,
|
||||
unsigned access)
|
||||
unsigned int access)
|
||||
{
|
||||
union kvm_mmu_page_role role;
|
||||
unsigned quadrant;
|
||||
@ -2990,7 +3001,7 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
|
||||
#define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1)
|
||||
|
||||
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
unsigned pte_access, int level,
|
||||
unsigned int pte_access, int level,
|
||||
gfn_t gfn, kvm_pfn_t pfn, bool speculative,
|
||||
bool can_unsync, bool host_writable)
|
||||
{
|
||||
@ -3081,9 +3092,10 @@ set_pte:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
|
||||
int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn,
|
||||
bool speculative, bool host_writable)
|
||||
static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
unsigned int pte_access, int write_fault, int level,
|
||||
gfn_t gfn, kvm_pfn_t pfn, bool speculative,
|
||||
bool host_writable)
|
||||
{
|
||||
int was_rmapped = 0;
|
||||
int rmap_count;
|
||||
@ -3165,7 +3177,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
struct page *pages[PTE_PREFETCH_NUM];
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned access = sp->role.access;
|
||||
unsigned int access = sp->role.access;
|
||||
int i, ret;
|
||||
gfn_t gfn;
|
||||
|
||||
@ -3400,7 +3412,8 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
|
||||
}
|
||||
|
||||
static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
|
||||
kvm_pfn_t pfn, unsigned access, int *ret_val)
|
||||
kvm_pfn_t pfn, unsigned int access,
|
||||
int *ret_val)
|
||||
{
|
||||
/* The pfn is invalid, report the error! */
|
||||
if (unlikely(is_error_pfn(pfn))) {
|
||||
@ -4005,7 +4018,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
|
||||
|
||||
if (is_mmio_spte(spte)) {
|
||||
gfn_t gfn = get_mmio_spte_gfn(spte);
|
||||
unsigned access = get_mmio_spte_access(spte);
|
||||
unsigned int access = get_mmio_spte_access(spte);
|
||||
|
||||
if (!check_mmio_spte(vcpu, spte))
|
||||
return RET_PF_INVALID;
|
||||
@ -4349,7 +4362,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
|
||||
unsigned access, int *nr_present)
|
||||
unsigned int access, int *nr_present)
|
||||
{
|
||||
if (unlikely(is_mmio_spte(*sptep))) {
|
||||
if (gfn != get_mmio_spte_gfn(*sptep)) {
|
||||
|
@ -387,6 +387,8 @@ static u8 rsm_ins_bytes[] = "\x0f\xaa";
|
||||
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
|
||||
static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
|
||||
static void svm_complete_interrupts(struct vcpu_svm *svm);
|
||||
static void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate);
|
||||
static inline void avic_post_state_restore(struct kvm_vcpu *vcpu);
|
||||
|
||||
static int nested_svm_exit_handled(struct vcpu_svm *svm);
|
||||
static int nested_svm_intercept(struct vcpu_svm *svm);
|
||||
@ -1545,7 +1547,10 @@ static void avic_init_vmcb(struct vcpu_svm *svm)
|
||||
vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
|
||||
vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
|
||||
vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
|
||||
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
|
||||
if (kvm_apicv_activated(svm->vcpu.kvm))
|
||||
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
|
||||
else
|
||||
vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
|
||||
}
|
||||
|
||||
static void init_vmcb(struct vcpu_svm *svm)
|
||||
@ -1729,23 +1734,28 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
|
||||
* field of the VMCB. Therefore, we set up the
|
||||
* APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
|
||||
*/
|
||||
static int avic_init_access_page(struct kvm_vcpu *vcpu)
|
||||
static int avic_update_access_page(struct kvm *kvm, bool activate)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
if (kvm->arch.apic_access_page_done)
|
||||
/*
|
||||
* During kvm_destroy_vm(), kvm_pit_set_reinject() could trigger
|
||||
* APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
|
||||
* memory region. So, we need to ensure that kvm->mm == current->mm.
|
||||
*/
|
||||
if ((kvm->arch.apic_access_page_done == activate) ||
|
||||
(kvm->mm != current->mm))
|
||||
goto out;
|
||||
|
||||
ret = __x86_set_memory_region(kvm,
|
||||
APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
|
||||
APIC_DEFAULT_PHYS_BASE,
|
||||
PAGE_SIZE);
|
||||
activate ? PAGE_SIZE : 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
kvm->arch.apic_access_page_done = true;
|
||||
kvm->arch.apic_access_page_done = activate;
|
||||
out:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return ret;
|
||||
@ -1753,21 +1763,24 @@ out:
|
||||
|
||||
static int avic_init_backing_page(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
u64 *entry, new_entry;
|
||||
int id = vcpu->vcpu_id;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
ret = avic_init_access_page(vcpu);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
|
||||
return -EINVAL;
|
||||
|
||||
if (!svm->vcpu.arch.apic->regs)
|
||||
return -EINVAL;
|
||||
|
||||
if (kvm_apicv_activated(vcpu->kvm)) {
|
||||
int ret;
|
||||
|
||||
ret = avic_update_access_page(vcpu->kvm, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
|
||||
|
||||
/* Setting AVIC backing page address in the phy APIC ID table */
|
||||
@ -2052,6 +2065,18 @@ free_avic:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int svm_vm_init(struct kvm *kvm)
|
||||
{
|
||||
if (avic) {
|
||||
int ret = avic_vm_init(kvm);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
kvm_apicv_init(kvm, avic);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
|
||||
{
|
||||
@ -2223,7 +2248,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
|
||||
/* We initialize this flag to true to make sure that the is_running
|
||||
* bit would be set the first time the vcpu is loaded.
|
||||
*/
|
||||
svm->avic_is_running = true;
|
||||
if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm))
|
||||
svm->avic_is_running = true;
|
||||
|
||||
svm->nested.hsave = page_address(hsave_page);
|
||||
|
||||
@ -2348,6 +2374,8 @@ static void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
|
||||
kvm_vcpu_update_apicv(vcpu);
|
||||
avic_set_running(vcpu, true);
|
||||
}
|
||||
|
||||
@ -4197,6 +4225,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
break;
|
||||
case MSR_IA32_SPEC_CTRL:
|
||||
if (!msr_info->host_initiated &&
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
|
||||
return 1;
|
||||
@ -4282,6 +4312,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
break;
|
||||
case MSR_IA32_SPEC_CTRL:
|
||||
if (!msr->host_initiated &&
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
|
||||
return 1;
|
||||
@ -4440,6 +4472,14 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
|
||||
svm_clear_vintr(svm);
|
||||
|
||||
/*
|
||||
* For AVIC, the only reason to end up here is ExtINTs.
|
||||
* In this case AVIC was temporarily disabled for
|
||||
* requesting the IRQ window and we have to re-enable it.
|
||||
*/
|
||||
svm_toggle_avic_for_irq_window(&svm->vcpu, true);
|
||||
|
||||
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
|
||||
mark_dirty(svm->vmcb, VMCB_INTR);
|
||||
++svm->vcpu.stat.irq_window_exits;
|
||||
@ -5135,11 +5175,6 @@ static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
}
|
||||
|
||||
static bool svm_get_enable_apicv(struct kvm *kvm)
|
||||
{
|
||||
return avic && irqchip_split(kvm);
|
||||
}
|
||||
|
||||
static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
|
||||
{
|
||||
}
|
||||
@ -5148,17 +5183,71 @@ static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
|
||||
{
|
||||
}
|
||||
|
||||
/* Note: Currently only used by Hyper-V. */
|
||||
static void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate)
|
||||
{
|
||||
if (!avic || !lapic_in_kernel(vcpu))
|
||||
return;
|
||||
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
kvm_request_apicv_update(vcpu->kvm, activate,
|
||||
APICV_INHIBIT_REASON_IRQWIN);
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
}
|
||||
|
||||
static int svm_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned long flags;
|
||||
struct amd_svm_iommu_ir *ir;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (!kvm_arch_has_assigned_device(vcpu->kvm))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Here, we go through the per-vcpu ir_list to update all existing
|
||||
* interrupt remapping table entry targeting this vcpu.
|
||||
*/
|
||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||
|
||||
if (list_empty(&svm->ir_list))
|
||||
goto out;
|
||||
|
||||
list_for_each_entry(ir, &svm->ir_list, node) {
|
||||
if (activate)
|
||||
ret = amd_iommu_activate_guest_mode(ir->data);
|
||||
else
|
||||
ret = amd_iommu_deactivate_guest_mode(ir->data);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
out:
|
||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct vmcb *vmcb = svm->vmcb;
|
||||
bool activated = kvm_vcpu_apicv_active(vcpu);
|
||||
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
if (activated) {
|
||||
/**
|
||||
* During AVIC temporary deactivation, guest could update
|
||||
* APIC ID, DFR and LDR registers, which would not be trapped
|
||||
* by avic_unaccelerated_access_interception(). In this case,
|
||||
* we need to check and update the AVIC logical APIC ID table
|
||||
* accordingly before re-activating.
|
||||
*/
|
||||
avic_post_state_restore(vcpu);
|
||||
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
|
||||
else
|
||||
} else {
|
||||
vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
|
||||
}
|
||||
mark_dirty(vmcb, VMCB_AVIC);
|
||||
|
||||
svm_set_pi_irte_mode(vcpu, activated);
|
||||
}
|
||||
|
||||
static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
|
||||
@ -5445,9 +5534,6 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
return;
|
||||
|
||||
/*
|
||||
* In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
|
||||
* 1, because that's a separate STGI/VMRUN intercept. The next time we
|
||||
@ -5457,6 +5543,13 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
|
||||
* window under the assumption that the hardware will set the GIF.
|
||||
*/
|
||||
if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) {
|
||||
/*
|
||||
* IRQ window is not needed when AVIC is enabled,
|
||||
* unless we have pending ExtINT since it cannot be injected
|
||||
* via AVIC. In such case, we need to temporarily disable AVIC,
|
||||
* and fallback to injecting IRQ via V_IRQ.
|
||||
*/
|
||||
svm_toggle_avic_for_irq_window(vcpu, false);
|
||||
svm_set_vintr(svm);
|
||||
svm_inject_irq(svm, 0x0);
|
||||
}
|
||||
@ -5929,6 +6022,14 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
|
||||
guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
|
||||
|
||||
/*
|
||||
* Currently, AVIC does not work with nested virtualization.
|
||||
* So, we disable AVIC when cpuid for SVM is set in the L1 guest.
|
||||
*/
|
||||
if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM))
|
||||
kvm_request_apicv_update(vcpu->kvm, false,
|
||||
APICV_INHIBIT_REASON_NESTED);
|
||||
}
|
||||
|
||||
#define F feature_bit
|
||||
@ -7257,6 +7358,22 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
|
||||
(svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
|
||||
}
|
||||
|
||||
static bool svm_check_apicv_inhibit_reasons(ulong bit)
|
||||
{
|
||||
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
|
||||
BIT(APICV_INHIBIT_REASON_HYPERV) |
|
||||
BIT(APICV_INHIBIT_REASON_NESTED) |
|
||||
BIT(APICV_INHIBIT_REASON_IRQWIN) |
|
||||
BIT(APICV_INHIBIT_REASON_PIT_REINJ);
|
||||
|
||||
return supported & BIT(bit);
|
||||
}
|
||||
|
||||
static void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate)
|
||||
{
|
||||
avic_update_access_page(kvm, activate);
|
||||
}
|
||||
|
||||
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
||||
.cpu_has_kvm_support = has_svm,
|
||||
.disabled_by_bios = is_disabled,
|
||||
@ -7274,7 +7391,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
||||
|
||||
.vm_alloc = svm_vm_alloc,
|
||||
.vm_free = svm_vm_free,
|
||||
.vm_init = avic_vm_init,
|
||||
.vm_init = svm_vm_init,
|
||||
.vm_destroy = svm_vm_destroy,
|
||||
|
||||
.prepare_guest_switch = svm_prepare_guest_switch,
|
||||
@ -7331,8 +7448,9 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
||||
.enable_irq_window = enable_irq_window,
|
||||
.update_cr8_intercept = update_cr8_intercept,
|
||||
.set_virtual_apic_mode = svm_set_virtual_apic_mode,
|
||||
.get_enable_apicv = svm_get_enable_apicv,
|
||||
.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
|
||||
.check_apicv_inhibit_reasons = svm_check_apicv_inhibit_reasons,
|
||||
.pre_update_apicv_exec_ctrl = svm_pre_update_apicv_exec_ctrl,
|
||||
.load_eoi_exitmap = svm_load_eoi_exitmap,
|
||||
.hwapic_irr_update = svm_hwapic_irr_update,
|
||||
.hwapic_isr_update = svm_hwapic_isr_update,
|
||||
|
@ -1291,6 +1291,25 @@ TRACE_EVENT(kvm_hv_stimer_cleanup,
|
||||
__entry->vcpu_id, __entry->timer_index)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_apicv_update_request,
|
||||
TP_PROTO(bool activate, unsigned long bit),
|
||||
TP_ARGS(activate, bit),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(bool, activate)
|
||||
__field(unsigned long, bit)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->activate = activate;
|
||||
__entry->bit = bit;
|
||||
),
|
||||
|
||||
TP_printk("%s bit=%lu",
|
||||
__entry->activate ? "activate" : "deactivate",
|
||||
__entry->bit)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for AMD AVIC
|
||||
*/
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "evmcs.h"
|
||||
#include "vmcs.h"
|
||||
#include "vmx.h"
|
||||
#include "trace.h"
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(enable_evmcs);
|
||||
|
||||
@ -346,6 +347,84 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
|
||||
{
|
||||
u32 ctl_low = (u32)*pdata;
|
||||
u32 ctl_high = (u32)(*pdata >> 32);
|
||||
|
||||
/*
|
||||
* Hyper-V 2016 and 2019 try using these features even when eVMCS
|
||||
* is enabled but there are no corresponding fields.
|
||||
*/
|
||||
switch (msr_index) {
|
||||
case MSR_IA32_VMX_EXIT_CTLS:
|
||||
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
|
||||
ctl_high &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
|
||||
break;
|
||||
case MSR_IA32_VMX_ENTRY_CTLS:
|
||||
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
|
||||
ctl_high &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
|
||||
break;
|
||||
case MSR_IA32_VMX_PROCBASED_CTLS2:
|
||||
ctl_high &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
|
||||
break;
|
||||
}
|
||||
|
||||
*pdata = ctl_low | ((u64)ctl_high << 32);
|
||||
}
|
||||
|
||||
int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
|
||||
{
|
||||
int ret = 0;
|
||||
u32 unsupp_ctl;
|
||||
|
||||
unsupp_ctl = vmcs12->pin_based_vm_exec_control &
|
||||
EVMCS1_UNSUPPORTED_PINCTRL;
|
||||
if (unsupp_ctl) {
|
||||
trace_kvm_nested_vmenter_failed(
|
||||
"eVMCS: unsupported pin-based VM-execution controls",
|
||||
unsupp_ctl);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
unsupp_ctl = vmcs12->secondary_vm_exec_control &
|
||||
EVMCS1_UNSUPPORTED_2NDEXEC;
|
||||
if (unsupp_ctl) {
|
||||
trace_kvm_nested_vmenter_failed(
|
||||
"eVMCS: unsupported secondary VM-execution controls",
|
||||
unsupp_ctl);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
unsupp_ctl = vmcs12->vm_exit_controls &
|
||||
EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
|
||||
if (unsupp_ctl) {
|
||||
trace_kvm_nested_vmenter_failed(
|
||||
"eVMCS: unsupported VM-exit controls",
|
||||
unsupp_ctl);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
unsupp_ctl = vmcs12->vm_entry_controls &
|
||||
EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
|
||||
if (unsupp_ctl) {
|
||||
trace_kvm_nested_vmenter_failed(
|
||||
"eVMCS: unsupported VM-entry controls",
|
||||
unsupp_ctl);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
unsupp_ctl = vmcs12->vm_function_control & EVMCS1_UNSUPPORTED_VMFUNC;
|
||||
if (unsupp_ctl) {
|
||||
trace_kvm_nested_vmenter_failed(
|
||||
"eVMCS: unsupported VM-function controls",
|
||||
unsupp_ctl);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
|
||||
uint16_t *vmcs_version)
|
||||
{
|
||||
@ -356,11 +435,5 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
|
||||
if (vmcs_version)
|
||||
*vmcs_version = nested_get_evmcs_version(vcpu);
|
||||
|
||||
vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
|
||||
vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
|
||||
vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
|
||||
vmx->nested.msrs.secondary_ctls_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
|
||||
vmx->nested.msrs.vmfunc_controls &= ~EVMCS1_UNSUPPORTED_VMFUNC;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
#include "capabilities.h"
|
||||
#include "vmcs.h"
|
||||
#include "vmcs12.h"
|
||||
|
||||
struct vmcs_config;
|
||||
|
||||
@ -201,5 +202,7 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa);
|
||||
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
|
||||
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
|
||||
uint16_t *vmcs_version);
|
||||
void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata);
|
||||
int nested_evmcs_check_controls(struct vmcs12 *vmcs12);
|
||||
|
||||
#endif /* __KVM_X86_VMX_EVMCS_H */
|
||||
|
@ -1074,10 +1074,10 @@ static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
|
||||
}
|
||||
|
||||
/*
|
||||
* Load guest's/host's cr3 at nested entry/exit. nested_ept is true if we are
|
||||
* emulating VM entry into a guest with EPT enabled.
|
||||
* Returns 0 on success, 1 on failure. Invalid state exit qualification code
|
||||
* is assigned to entry_failure_code on failure.
|
||||
* Load guest's/host's cr3 at nested entry/exit. @nested_ept is true if we are
|
||||
* emulating VM-Entry into a guest with EPT enabled. On failure, the expected
|
||||
* Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to
|
||||
* @entry_failure_code.
|
||||
*/
|
||||
static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
|
||||
u32 *entry_failure_code)
|
||||
@ -2757,6 +2757,9 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
|
||||
nested_check_vm_entry_controls(vcpu, vmcs12))
|
||||
return -EINVAL;
|
||||
|
||||
if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled)
|
||||
return nested_evmcs_check_controls(vmcs12);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -4723,8 +4726,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
|
||||
return nested_vmx_succeed(vcpu);
|
||||
}
|
||||
|
||||
static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
|
||||
|
||||
/* Emulate the VMLAUNCH instruction */
|
||||
static int handle_vmlaunch(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -260,13 +260,12 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
break;
|
||||
default:
|
||||
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) {
|
||||
if (msr_info->host_initiated)
|
||||
pmc->counter = data;
|
||||
else
|
||||
pmc->counter = (s32)data;
|
||||
if (!msr_info->host_initiated)
|
||||
data = (s64)(s32)data;
|
||||
pmc->counter += data - pmc_read_counter(pmc);
|
||||
return 0;
|
||||
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
|
||||
pmc->counter = data;
|
||||
pmc->counter += data - pmc_read_counter(pmc);
|
||||
return 0;
|
||||
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
|
||||
if (data == pmc->eventsel)
|
||||
|
@ -1428,8 +1428,6 @@ static bool emulation_required(struct kvm_vcpu *vcpu)
|
||||
return emulate_invalid_guest_state && !guest_state_valid(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
|
||||
|
||||
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
@ -1853,8 +1851,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
|
||||
if (!nested_vmx_allowed(vcpu))
|
||||
return 1;
|
||||
return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
|
||||
&msr_info->data);
|
||||
if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
|
||||
&msr_info->data))
|
||||
return 1;
|
||||
/*
|
||||
* Enlightened VMCS v1 doesn't have certain fields, but buggy
|
||||
* Hyper-V versions are still trying to use corresponding
|
||||
* features when they are exposed. Filter out the essential
|
||||
* minimum.
|
||||
*/
|
||||
if (!msr_info->host_initiated &&
|
||||
vmx->nested.enlightened_vmcs_enabled)
|
||||
nested_evmcs_filter_control_msr(msr_info->index,
|
||||
&msr_info->data);
|
||||
break;
|
||||
case MSR_IA32_RTIT_CTL:
|
||||
if (pt_mode != PT_MODE_HOST_GUEST)
|
||||
return 1;
|
||||
@ -3719,11 +3729,6 @@ void pt_update_intercept_for_msr(struct vcpu_vmx *vmx)
|
||||
}
|
||||
}
|
||||
|
||||
static bool vmx_get_enable_apicv(struct kvm *kvm)
|
||||
{
|
||||
return enable_apicv;
|
||||
}
|
||||
|
||||
static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
@ -6813,6 +6818,7 @@ static int vmx_vm_init(struct kvm *kvm)
|
||||
break;
|
||||
}
|
||||
}
|
||||
kvm_apicv_init(kvm, enable_apicv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -7714,6 +7720,14 @@ static __exit void hardware_unsetup(void)
|
||||
free_kvm_area();
|
||||
}
|
||||
|
||||
static bool vmx_check_apicv_inhibit_reasons(ulong bit)
|
||||
{
|
||||
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
|
||||
BIT(APICV_INHIBIT_REASON_HYPERV);
|
||||
|
||||
return supported & BIT(bit);
|
||||
}
|
||||
|
||||
static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
|
||||
.cpu_has_kvm_support = cpu_has_kvm_support,
|
||||
.disabled_by_bios = vmx_disabled_by_bios,
|
||||
@ -7786,10 +7800,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
|
||||
.update_cr8_intercept = update_cr8_intercept,
|
||||
.set_virtual_apic_mode = vmx_set_virtual_apic_mode,
|
||||
.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
|
||||
.get_enable_apicv = vmx_get_enable_apicv,
|
||||
.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
|
||||
.load_eoi_exitmap = vmx_load_eoi_exitmap,
|
||||
.apicv_post_state_restore = vmx_apicv_post_state_restore,
|
||||
.check_apicv_inhibit_reasons = vmx_check_apicv_inhibit_reasons,
|
||||
.hwapic_irr_update = vmx_hwapic_irr_update,
|
||||
.hwapic_isr_update = vmx_hwapic_isr_update,
|
||||
.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "cpuid.h"
|
||||
#include "pmu.h"
|
||||
#include "hyperv.h"
|
||||
#include "lapic.h"
|
||||
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/interrupt.h>
|
||||
@ -897,6 +898,8 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
|
||||
__reserved_bits |= X86_CR4_PKE; \
|
||||
if (!__cpu_has(__c, X86_FEATURE_LA57)) \
|
||||
__reserved_bits |= X86_CR4_LA57; \
|
||||
if (!__cpu_has(__c, X86_FEATURE_UMIP)) \
|
||||
__reserved_bits |= X86_CR4_UMIP; \
|
||||
__reserved_bits; \
|
||||
})
|
||||
|
||||
@ -1609,6 +1612,8 @@ struct pvclock_clock {
|
||||
u64 mask;
|
||||
u32 mult;
|
||||
u32 shift;
|
||||
u64 base_cycles;
|
||||
u64 offset;
|
||||
};
|
||||
|
||||
struct pvclock_gtod_data {
|
||||
@ -1617,11 +1622,8 @@ struct pvclock_gtod_data {
|
||||
struct pvclock_clock clock; /* extract of a clocksource struct */
|
||||
struct pvclock_clock raw_clock; /* extract of a clocksource struct */
|
||||
|
||||
u64 boot_ns_raw;
|
||||
u64 boot_ns;
|
||||
u64 nsec_base;
|
||||
ktime_t offs_boot;
|
||||
u64 wall_time_sec;
|
||||
u64 monotonic_raw_nsec;
|
||||
};
|
||||
|
||||
static struct pvclock_gtod_data pvclock_gtod_data;
|
||||
@ -1629,10 +1631,6 @@ static struct pvclock_gtod_data pvclock_gtod_data;
|
||||
static void update_pvclock_gtod(struct timekeeper *tk)
|
||||
{
|
||||
struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
|
||||
u64 boot_ns, boot_ns_raw;
|
||||
|
||||
boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
|
||||
boot_ns_raw = ktime_to_ns(ktime_add(tk->tkr_raw.base, tk->offs_boot));
|
||||
|
||||
write_seqcount_begin(&vdata->seq);
|
||||
|
||||
@ -1642,23 +1640,35 @@ static void update_pvclock_gtod(struct timekeeper *tk)
|
||||
vdata->clock.mask = tk->tkr_mono.mask;
|
||||
vdata->clock.mult = tk->tkr_mono.mult;
|
||||
vdata->clock.shift = tk->tkr_mono.shift;
|
||||
vdata->clock.base_cycles = tk->tkr_mono.xtime_nsec;
|
||||
vdata->clock.offset = tk->tkr_mono.base;
|
||||
|
||||
vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->archdata.vclock_mode;
|
||||
vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last;
|
||||
vdata->raw_clock.mask = tk->tkr_raw.mask;
|
||||
vdata->raw_clock.mult = tk->tkr_raw.mult;
|
||||
vdata->raw_clock.shift = tk->tkr_raw.shift;
|
||||
|
||||
vdata->boot_ns = boot_ns;
|
||||
vdata->nsec_base = tk->tkr_mono.xtime_nsec;
|
||||
vdata->raw_clock.base_cycles = tk->tkr_raw.xtime_nsec;
|
||||
vdata->raw_clock.offset = tk->tkr_raw.base;
|
||||
|
||||
vdata->wall_time_sec = tk->xtime_sec;
|
||||
|
||||
vdata->boot_ns_raw = boot_ns_raw;
|
||||
vdata->monotonic_raw_nsec = tk->tkr_raw.xtime_nsec;
|
||||
vdata->offs_boot = tk->offs_boot;
|
||||
|
||||
write_seqcount_end(&vdata->seq);
|
||||
}
|
||||
|
||||
static s64 get_kvmclock_base_ns(void)
|
||||
{
|
||||
/* Count up from boot time, but with the frequency of the raw clock. */
|
||||
return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot));
|
||||
}
|
||||
#else
|
||||
static s64 get_kvmclock_base_ns(void)
|
||||
{
|
||||
/* Master clock not used, so we can just use CLOCK_BOOTTIME. */
|
||||
return ktime_get_boottime_ns();
|
||||
}
|
||||
#endif
|
||||
|
||||
void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
|
||||
@ -1672,7 +1682,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
|
||||
int version;
|
||||
int r;
|
||||
struct pvclock_wall_clock wc;
|
||||
struct timespec64 boot;
|
||||
u64 wall_nsec;
|
||||
|
||||
if (!wall_clock)
|
||||
return;
|
||||
@ -1692,17 +1702,12 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
|
||||
/*
|
||||
* The guest calculates current wall clock time by adding
|
||||
* system time (updated by kvm_guest_time_update below) to the
|
||||
* wall clock specified here. guest system time equals host
|
||||
* system time for us, thus we must fill in host boot time here.
|
||||
* wall clock specified here. We do the reverse here.
|
||||
*/
|
||||
getboottime64(&boot);
|
||||
wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
|
||||
|
||||
if (kvm->arch.kvmclock_offset) {
|
||||
struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
|
||||
boot = timespec64_sub(boot, ts);
|
||||
}
|
||||
wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */
|
||||
wc.nsec = boot.tv_nsec;
|
||||
wc.nsec = do_div(wall_nsec, 1000000000);
|
||||
wc.sec = (u32)wall_nsec; /* overflow in 2106 guest time */
|
||||
wc.version = version;
|
||||
|
||||
kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
|
||||
@ -1950,7 +1955,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
|
||||
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
|
||||
offset = kvm_compute_tsc_offset(vcpu, data);
|
||||
ns = ktime_get_boottime_ns();
|
||||
ns = get_kvmclock_base_ns();
|
||||
elapsed = ns - kvm->arch.last_tsc_nsec;
|
||||
|
||||
if (vcpu->arch.virtual_tsc_khz) {
|
||||
@ -2125,10 +2130,10 @@ static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(>od->seq);
|
||||
ns = gtod->monotonic_raw_nsec;
|
||||
ns = gtod->raw_clock.base_cycles;
|
||||
ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode);
|
||||
ns >>= gtod->clock.shift;
|
||||
ns += gtod->boot_ns_raw;
|
||||
ns >>= gtod->raw_clock.shift;
|
||||
ns += ktime_to_ns(ktime_add(gtod->raw_clock.offset, gtod->offs_boot));
|
||||
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
|
||||
*t = ns;
|
||||
|
||||
@ -2145,7 +2150,7 @@ static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
|
||||
do {
|
||||
seq = read_seqcount_begin(>od->seq);
|
||||
ts->tv_sec = gtod->wall_time_sec;
|
||||
ns = gtod->nsec_base;
|
||||
ns = gtod->clock.base_cycles;
|
||||
ns += vgettsc(>od->clock, tsc_timestamp, &mode);
|
||||
ns >>= gtod->clock.shift;
|
||||
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
|
||||
@ -2288,7 +2293,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
|
||||
spin_lock(&ka->pvclock_gtod_sync_lock);
|
||||
if (!ka->use_master_clock) {
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
return ktime_get_boottime_ns() + ka->kvmclock_offset;
|
||||
return get_kvmclock_base_ns() + ka->kvmclock_offset;
|
||||
}
|
||||
|
||||
hv_clock.tsc_timestamp = ka->master_cycle_now;
|
||||
@ -2304,7 +2309,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
|
||||
&hv_clock.tsc_to_system_mul);
|
||||
ret = __pvclock_read_cycles(&hv_clock, rdtsc());
|
||||
} else
|
||||
ret = ktime_get_boottime_ns() + ka->kvmclock_offset;
|
||||
ret = get_kvmclock_base_ns() + ka->kvmclock_offset;
|
||||
|
||||
put_cpu();
|
||||
|
||||
@ -2403,7 +2408,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
}
|
||||
if (!use_master_clock) {
|
||||
host_tsc = rdtsc();
|
||||
kernel_ns = ktime_get_boottime_ns();
|
||||
kernel_ns = get_kvmclock_base_ns();
|
||||
}
|
||||
|
||||
tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
|
||||
@ -2443,6 +2448,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
|
||||
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
|
||||
vcpu->last_guest_tsc = tsc_timestamp;
|
||||
WARN_ON(vcpu->hv_clock.system_time < 0);
|
||||
|
||||
/* If the host uses TSC clocksource, then it is stable */
|
||||
pvclock_flags = 0;
|
||||
@ -7456,18 +7462,22 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
|
||||
kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
|
||||
}
|
||||
|
||||
void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
|
||||
bool kvm_apicv_activated(struct kvm *kvm)
|
||||
{
|
||||
if (!lapic_in_kernel(vcpu)) {
|
||||
WARN_ON_ONCE(vcpu->arch.apicv_active);
|
||||
return;
|
||||
}
|
||||
if (!vcpu->arch.apicv_active)
|
||||
return;
|
||||
|
||||
vcpu->arch.apicv_active = false;
|
||||
kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
|
||||
return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apicv_activated);
|
||||
|
||||
void kvm_apicv_init(struct kvm *kvm, bool enable)
|
||||
{
|
||||
if (enable)
|
||||
clear_bit(APICV_INHIBIT_REASON_DISABLE,
|
||||
&kvm->arch.apicv_inhibit_reasons);
|
||||
else
|
||||
set_bit(APICV_INHIBIT_REASON_DISABLE,
|
||||
&kvm->arch.apicv_inhibit_reasons);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apicv_init);
|
||||
|
||||
static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
|
||||
{
|
||||
@ -7996,6 +8006,47 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
|
||||
kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
|
||||
}
|
||||
|
||||
void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!lapic_in_kernel(vcpu))
|
||||
return;
|
||||
|
||||
vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
|
||||
kvm_apic_update_apicv(vcpu);
|
||||
kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
|
||||
|
||||
/*
|
||||
* NOTE: Do not hold any lock prior to calling this.
|
||||
*
|
||||
* In particular, kvm_request_apicv_update() expects kvm->srcu not to be
|
||||
* locked, because it calls __x86_set_memory_region() which does
|
||||
* synchronize_srcu(&kvm->srcu).
|
||||
*/
|
||||
void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
|
||||
{
|
||||
if (!kvm_x86_ops->check_apicv_inhibit_reasons ||
|
||||
!kvm_x86_ops->check_apicv_inhibit_reasons(bit))
|
||||
return;
|
||||
|
||||
if (activate) {
|
||||
if (!test_and_clear_bit(bit, &kvm->arch.apicv_inhibit_reasons) ||
|
||||
!kvm_apicv_activated(kvm))
|
||||
return;
|
||||
} else {
|
||||
if (test_and_set_bit(bit, &kvm->arch.apicv_inhibit_reasons) ||
|
||||
kvm_apicv_activated(kvm))
|
||||
return;
|
||||
}
|
||||
|
||||
trace_kvm_apicv_update_request(activate, bit);
|
||||
if (kvm_x86_ops->pre_update_apicv_exec_ctrl)
|
||||
kvm_x86_ops->pre_update_apicv_exec_ctrl(kvm, activate);
|
||||
kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
|
||||
|
||||
static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!kvm_apic_present(vcpu))
|
||||
@ -8186,6 +8237,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
|
||||
kvm_hv_process_stimers(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
|
||||
kvm_vcpu_update_apicv(vcpu);
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
|
||||
@ -9219,10 +9272,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
return r;
|
||||
|
||||
if (irqchip_in_kernel(vcpu->kvm)) {
|
||||
vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm);
|
||||
r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
|
||||
if (r < 0)
|
||||
goto fail_mmu_destroy;
|
||||
if (kvm_apicv_activated(vcpu->kvm))
|
||||
vcpu->arch.apicv_active = true;
|
||||
} else
|
||||
static_key_slow_inc(&kvm_no_apic_vcpu);
|
||||
|
||||
@ -9633,7 +9687,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
mutex_init(&kvm->arch.apic_map_lock);
|
||||
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
|
||||
|
||||
kvm->arch.kvmclock_offset = -ktime_get_boottime_ns();
|
||||
kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
|
||||
pvclock_update_vm_gtod_copy(kvm);
|
||||
|
||||
kvm->arch.guest_can_read_msr_platform_info = true;
|
||||
@ -10448,3 +10502,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
|
||||
|
@ -357,7 +357,7 @@ static inline bool kvm_pat_valid(u64 data)
|
||||
return (data | ((data & 0x0202020202020202ull) << 1)) == data;
|
||||
}
|
||||
|
||||
static inline bool kvm_dr7_valid(unsigned long data)
|
||||
static inline bool kvm_dr7_valid(u64 data)
|
||||
{
|
||||
/* Bits [63:32] are reserved */
|
||||
return !(data >> 32);
|
||||
|
@ -1009,6 +1009,7 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176
|
||||
#define KVM_CAP_ARM_NISV_TO_USER 177
|
||||
#define KVM_CAP_ARM_INJECT_EXT_DABT 178
|
||||
#define KVM_CAP_S390_VCPU_RESETS 179
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@ -1473,6 +1474,10 @@ struct kvm_enc_region {
|
||||
/* Available with KVM_CAP_ARM_SVE */
|
||||
#define KVM_ARM_VCPU_FINALIZE _IOW(KVMIO, 0xc2, int)
|
||||
|
||||
/* Available with KVM_CAP_S390_VCPU_RESETS */
|
||||
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
|
||||
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
|
||||
|
||||
/* Secure Encrypted Virtualization command */
|
||||
enum sev_cmd_id {
|
||||
/* Guest initialization commands */
|
||||
|
@ -36,6 +36,7 @@ TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
|
||||
|
||||
TEST_GEN_PROGS_s390x = s390x/memop
|
||||
TEST_GEN_PROGS_s390x += s390x/sync_regs_test
|
||||
TEST_GEN_PROGS_s390x += s390x/resets
|
||||
TEST_GEN_PROGS_s390x += dirty_log_test
|
||||
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
|
||||
|
||||
|
@ -125,6 +125,12 @@ void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
|
||||
struct kvm_sregs *sregs);
|
||||
int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
|
||||
struct kvm_sregs *sregs);
|
||||
void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid,
|
||||
struct kvm_fpu *fpu);
|
||||
void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid,
|
||||
struct kvm_fpu *fpu);
|
||||
void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
|
||||
void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
|
||||
#ifdef __KVM_HAVE_VCPU_EVENTS
|
||||
void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
|
||||
struct kvm_vcpu_events *events);
|
||||
|
@ -1373,6 +1373,42 @@ int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
|
||||
return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
|
||||
}
|
||||
|
||||
void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu);
|
||||
TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)",
|
||||
ret, errno, strerror(errno));
|
||||
}
|
||||
|
||||
void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu);
|
||||
TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)",
|
||||
ret, errno, strerror(errno));
|
||||
}
|
||||
|
||||
void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg);
|
||||
TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)",
|
||||
ret, errno, strerror(errno));
|
||||
}
|
||||
|
||||
void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg);
|
||||
TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)",
|
||||
ret, errno, strerror(errno));
|
||||
}
|
||||
|
||||
/*
|
||||
* VCPU Ioctl
|
||||
*
|
||||
|
197
tools/testing/selftests/kvm/s390x/resets.c
Normal file
197
tools/testing/selftests/kvm/s390x/resets.c
Normal file
@ -0,0 +1,197 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Test for s390x CPU resets
|
||||
*
|
||||
* Copyright (C) 2020, IBM
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
|
||||
#define VCPU_ID 3
|
||||
#define LOCAL_IRQS 32
|
||||
|
||||
struct kvm_s390_irq buf[VCPU_ID + LOCAL_IRQS];
|
||||
|
||||
struct kvm_vm *vm;
|
||||
struct kvm_run *run;
|
||||
struct kvm_sync_regs *regs;
|
||||
static uint64_t regs_null[16];
|
||||
|
||||
static uint64_t crs[16] = { 0x40000ULL,
|
||||
0x42000ULL,
|
||||
0, 0, 0, 0, 0,
|
||||
0x43000ULL,
|
||||
0, 0, 0, 0, 0,
|
||||
0x44000ULL,
|
||||
0, 0
|
||||
};
|
||||
|
||||
static void guest_code_initial(void)
|
||||
{
|
||||
/* Round toward 0 */
|
||||
uint32_t fpc = 0x11;
|
||||
|
||||
/* Dirty registers */
|
||||
asm volatile (
|
||||
" lctlg 0,15,%0\n"
|
||||
" sfpc %1\n"
|
||||
: : "Q" (crs), "d" (fpc));
|
||||
GUEST_SYNC(0);
|
||||
}
|
||||
|
||||
static void test_one_reg(uint64_t id, uint64_t value)
|
||||
{
|
||||
struct kvm_one_reg reg;
|
||||
uint64_t eval_reg;
|
||||
|
||||
reg.addr = (uintptr_t)&eval_reg;
|
||||
reg.id = id;
|
||||
vcpu_get_reg(vm, VCPU_ID, ®);
|
||||
TEST_ASSERT(eval_reg == value, "value == %s", value);
|
||||
}
|
||||
|
||||
static void assert_noirq(void)
|
||||
{
|
||||
struct kvm_s390_irq_state irq_state;
|
||||
int irqs;
|
||||
|
||||
irq_state.len = sizeof(buf);
|
||||
irq_state.buf = (unsigned long)buf;
|
||||
irqs = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_GET_IRQ_STATE, &irq_state);
|
||||
/*
|
||||
* irqs contains the number of retrieved interrupts. Any interrupt
|
||||
* (notably, the emergency call interrupt we have injected) should
|
||||
* be cleared by the resets, so this should be 0.
|
||||
*/
|
||||
TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d\n", errno);
|
||||
TEST_ASSERT(!irqs, "IRQ pending");
|
||||
}
|
||||
|
||||
static void assert_clear(void)
|
||||
{
|
||||
struct kvm_sregs sregs;
|
||||
struct kvm_regs regs;
|
||||
struct kvm_fpu fpu;
|
||||
|
||||
vcpu_regs_get(vm, VCPU_ID, ®s);
|
||||
TEST_ASSERT(!memcmp(®s.gprs, regs_null, sizeof(regs.gprs)), "grs == 0");
|
||||
|
||||
vcpu_sregs_get(vm, VCPU_ID, &sregs);
|
||||
TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0");
|
||||
|
||||
vcpu_fpu_get(vm, VCPU_ID, &fpu);
|
||||
TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
|
||||
}
|
||||
|
||||
static void assert_initial(void)
|
||||
{
|
||||
struct kvm_sregs sregs;
|
||||
struct kvm_fpu fpu;
|
||||
|
||||
vcpu_sregs_get(vm, VCPU_ID, &sregs);
|
||||
TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0");
|
||||
TEST_ASSERT(sregs.crs[14] == 0xC2000000UL, "cr14 == 0xC2000000");
|
||||
TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12),
|
||||
"cr1-13 == 0");
|
||||
TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0");
|
||||
|
||||
vcpu_fpu_get(vm, VCPU_ID, &fpu);
|
||||
TEST_ASSERT(!fpu.fpc, "fpc == 0");
|
||||
|
||||
test_one_reg(KVM_REG_S390_GBEA, 1);
|
||||
test_one_reg(KVM_REG_S390_PP, 0);
|
||||
test_one_reg(KVM_REG_S390_TODPR, 0);
|
||||
test_one_reg(KVM_REG_S390_CPU_TIMER, 0);
|
||||
test_one_reg(KVM_REG_S390_CLOCK_COMP, 0);
|
||||
}
|
||||
|
||||
static void assert_normal(void)
|
||||
{
|
||||
test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
|
||||
assert_noirq();
|
||||
}
|
||||
|
||||
static void inject_irq(int cpu_id)
|
||||
{
|
||||
struct kvm_s390_irq_state irq_state;
|
||||
struct kvm_s390_irq *irq = &buf[0];
|
||||
int irqs;
|
||||
|
||||
/* Inject IRQ */
|
||||
irq_state.len = sizeof(struct kvm_s390_irq);
|
||||
irq_state.buf = (unsigned long)buf;
|
||||
irq->type = KVM_S390_INT_EMERGENCY;
|
||||
irq->u.emerg.code = cpu_id;
|
||||
irqs = _vcpu_ioctl(vm, cpu_id, KVM_S390_SET_IRQ_STATE, &irq_state);
|
||||
TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno);
|
||||
}
|
||||
|
||||
static void test_normal(void)
|
||||
{
|
||||
printf("Testing normal reset\n");
|
||||
/* Create VM */
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
regs = &run->s.regs;
|
||||
|
||||
vcpu_run(vm, VCPU_ID);
|
||||
|
||||
inject_irq(VCPU_ID);
|
||||
|
||||
vcpu_ioctl(vm, VCPU_ID, KVM_S390_NORMAL_RESET, 0);
|
||||
assert_normal();
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
static void test_initial(void)
|
||||
{
|
||||
printf("Testing initial reset\n");
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
regs = &run->s.regs;
|
||||
|
||||
vcpu_run(vm, VCPU_ID);
|
||||
|
||||
inject_irq(VCPU_ID);
|
||||
|
||||
vcpu_ioctl(vm, VCPU_ID, KVM_S390_INITIAL_RESET, 0);
|
||||
assert_normal();
|
||||
assert_initial();
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
static void test_clear(void)
|
||||
{
|
||||
printf("Testing clear reset\n");
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
regs = &run->s.regs;
|
||||
|
||||
vcpu_run(vm, VCPU_ID);
|
||||
|
||||
inject_irq(VCPU_ID);
|
||||
|
||||
vcpu_ioctl(vm, VCPU_ID, KVM_S390_CLEAR_RESET, 0);
|
||||
assert_normal();
|
||||
assert_initial();
|
||||
assert_clear();
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
setbuf(stdout, NULL); /* Tell stdout not to buffer its content */
|
||||
|
||||
test_initial();
|
||||
if (kvm_check_cap(KVM_CAP_S390_VCPU_RESETS)) {
|
||||
test_normal();
|
||||
test_clear();
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -186,6 +186,7 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
|
||||
*/
|
||||
if (pfn_valid(pfn))
|
||||
return PageReserved(pfn_to_page(pfn)) &&
|
||||
!is_zero_pfn(pfn) &&
|
||||
!kvm_is_zone_device_pfn(pfn);
|
||||
|
||||
return true;
|
||||
|
Loading…
Reference in New Issue
Block a user