- Fix the handling of the phycal timer offset when FEAT_ECV
   and CNTPOFF_EL2 are implemented.
 
 - Restore the functionnality of Permission Indirection that
   was broken by the Fine Grained Trapping rework
 
 - Cleanup some PMU event sharing code
 
 MIPS:
 
 - Fix W=1 build.
 
 s390:
 
 - One small fix for gisa to avoid stalls.
 
 x86:
 
 - Truncate writes to PMU counters to the counter's width to avoid spurious
   overflows when emulating counter events in software.
 
 - Set the LVTPC entry mask bit when handling a PMI (to match Intel-defined
   architectural behavior).
 
 - Treat KVM_REQ_PMI as a wake event instead of queueing host IRQ work to
   kick the guest out of emulated halt.
 
 - Fix for loading XSAVE state from an old kernel into a new one.
 
 - Fixes for AMD AVIC
 
 selftests:
 
 - Play nice with %llx when formatting guest printf and assert statements.
 
 - Clean up stale test metadata.
 
 - Zero-initialize structures in memslot perf test to workaround a suspected
   "may be used uninitialized" false positives from GCC.
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmUtvXgUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroOE3gf/Q0Xvi/oU/+iDMuvfCbMZg/nhbrsa
 WmE/zXLrCF0DknppAsWulkhLGL2ceL6X+f37f2vWpBdG9SVDG/vSAg+QQDwsXiKN
 hTJoaybtMMPZM64emPZKeLMrq3A/U32qIMmWMJkoQRyz6dftUhGqZEuy1jw8oomJ
 n9idRDCMkbo+bick4URt0FEuI3Tf6dPIlG7P5hObFTw+nenzzxTjoxWZ432Mgyod
 yqveEke4hcQ+6K1zdAcDNZqT9ZhxeTxAO4yrBAYfnFoPLhUXKDUumkqAQPNOhKTo
 YN+b29kHBm+HvYkHN785FQla/13wjE1aq5TUj5J7NEDv4uRXDefDq2OAeg==
 =b9AY
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Fix the handling of the phycal timer offset when FEAT_ECV and
     CNTPOFF_EL2 are implemented

   - Restore the functionnality of Permission Indirection that was
     broken by the Fine Grained Trapping rework

   - Cleanup some PMU event sharing code

  MIPS:

   - Fix W=1 build

  s390:

   - One small fix for gisa to avoid stalls

  x86:

   - Truncate writes to PMU counters to the counter's width to avoid
     spurious overflows when emulating counter events in software

   - Set the LVTPC entry mask bit when handling a PMI (to match
     Intel-defined architectural behavior)

   - Treat KVM_REQ_PMI as a wake event instead of queueing host IRQ work
     to kick the guest out of emulated halt

   - Fix for loading XSAVE state from an old kernel into a new one

   - Fixes for AMD AVIC

  selftests:

   - Play nice with %llx when formatting guest printf and assert
     statements

   - Clean up stale test metadata

   - Zero-initialize structures in memslot perf test to workaround a
     suspected 'may be used uninitialized' false positives from GCC"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (21 commits)
  KVM: arm64: timers: Correctly handle TGE flip with CNTPOFF_EL2
  KVM: arm64: POR{E0}_EL1 do not need trap handlers
  KVM: arm64: Add nPIR{E0}_EL1 to HFG traps
  KVM: MIPS: fix -Wunused-but-set-variable warning
  KVM: arm64: pmu: Drop redundant check for non-NULL kvm_pmu_events
  KVM: SVM: Fix build error when using -Werror=unused-but-set-variable
  x86: KVM: SVM: refresh AVIC inhibition in svm_leave_nested()
  x86: KVM: SVM: add support for Invalid IPI Vector interception
  x86: KVM: SVM: always update the x2avic msr interception
  KVM: selftests: Force load all supported XSAVE state in state test
  KVM: selftests: Load XSAVE state into untouched vCPU during state test
  KVM: selftests: Touch relevant XSAVE state in guest for state test
  KVM: x86: Constrain guest-supported xfeatures only at KVM_GET_XSAVE{2}
  x86/fpu: Allow caller to constrain xfeatures when copying to uabi buffer
  KVM: selftests: Zero-initialize entire test_result in memslot perf test
  KVM: selftests: Remove obsolete and incorrect test case metadata
  KVM: selftests: Treat %llx like %lx when formatting guest printf
  KVM: x86/pmu: Synthesize at most one PMI per VM-exit
  KVM: x86: Mask LVTPC when handling a PMI
  KVM: x86/pmu: Truncate counter value to allowed width on write
  ...
This commit is contained in:
Linus Torvalds 2023-10-16 18:34:17 -07:00
commit 86d6a628a2
36 changed files with 276 additions and 120 deletions

View File

@ -344,14 +344,14 @@
*/
#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51))
#define __HFGRTR_EL2_MASK GENMASK(49, 0)
#define __HFGRTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \
BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
GENMASK(26, 25) | BIT(21) | BIT(18) | \
GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
#define __HFGWTR_EL2_MASK GENMASK(49, 0)
#define __HFGWTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
#define __HFGITR_EL2_RES0 GENMASK(63, 57)
#define __HFGITR_EL2_MASK GENMASK(54, 0)

View File

@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = {
.get_input_level = kvm_arch_timer_get_input_level,
};
static bool has_cntpoff(void)
{
return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
}
static int nr_timers(struct kvm_vcpu *vcpu)
{
if (!vcpu_has_nv(vcpu))
@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void)
return timecounter->cc->read(timecounter->cc);
}
static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
{
if (vcpu_has_nv(vcpu)) {
if (is_hyp_ctxt(vcpu)) {
@ -548,8 +543,7 @@ static void timer_save_state(struct arch_timer_context *ctx)
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
cval = read_sysreg_el0(SYS_CNTP_CVAL);
if (!has_cntpoff())
cval -= timer_get_offset(ctx);
cval -= timer_get_offset(ctx);
timer_set_cval(ctx, cval);
@ -636,8 +630,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
cval = timer_get_cval(ctx);
offset = timer_get_offset(ctx);
set_cntpoff(offset);
if (!has_cntpoff())
cval += offset;
cval += offset;
write_sysreg_el0(cval, SYS_CNTP_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);

View File

@ -977,6 +977,8 @@ enum fg_filter_id {
static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
/* HFGRTR_EL2, HFGWTR_EL2 */
SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0),
SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0),
SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0),
SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0),
SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0),

View File

@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
___activate_traps(vcpu);
if (has_cntpoff()) {
struct timer_map map;
get_timer_map(vcpu, &map);
/*
* We're entrering the guest. Reload the correct
* values from memory now that TGE is clear.
*/
if (map.direct_ptimer == vcpu_ptimer(vcpu))
val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
if (map.direct_ptimer == vcpu_hptimer(vcpu))
val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
if (map.direct_ptimer) {
write_sysreg_el0(val, SYS_CNTP_CVAL);
isb();
}
}
val = read_sysreg(cpacr_el1);
val |= CPACR_ELx_TTA;
val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
@ -77,6 +97,30 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
if (has_cntpoff()) {
struct timer_map map;
u64 val, offset;
get_timer_map(vcpu, &map);
/*
* We're exiting the guest. Save the latest CVAL value
* to memory and apply the offset now that TGE is set.
*/
val = read_sysreg_el0(SYS_CNTP_CVAL);
if (map.direct_ptimer == vcpu_ptimer(vcpu))
__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
if (map.direct_ptimer == vcpu_hptimer(vcpu))
__vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
offset = read_sysreg_s(SYS_CNTPOFF_EL2);
if (map.direct_ptimer && offset) {
write_sysreg_el0(val + offset, SYS_CNTP_CVAL);
isb();
}
}
/*
* ARM errata 1165522 and 1530923 require the actual execution of the
* above before we can switch to the EL2/EL0 translation regime used by

View File

@ -39,7 +39,7 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
if (!kvm_arm_support_pmu_v3() || !pmu || !kvm_pmu_switch_needed(attr))
if (!kvm_arm_support_pmu_v3() || !kvm_pmu_switch_needed(attr))
return;
if (!attr->exclude_host)
@ -55,7 +55,7 @@ void kvm_clr_pmu_events(u32 clr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
if (!kvm_arm_support_pmu_v3() || !pmu)
if (!kvm_arm_support_pmu_v3())
return;
pmu->events_host &= ~clr;

View File

@ -2122,8 +2122,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
{ SYS_DESC(SYS_PIRE0_EL1), access_vm_reg, reset_unknown, PIRE0_EL1 },
{ SYS_DESC(SYS_PIR_EL1), access_vm_reg, reset_unknown, PIR_EL1 },
{ SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 },
{ SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
{ SYS_DESC(SYS_LORSA_EL1), trap_loregion },

View File

@ -592,7 +592,7 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
gfn_t gfn = gpa >> PAGE_SHIFT;
int srcu_idx, err;
kvm_pfn_t pfn;
pte_t *ptep, entry, old_pte;
pte_t *ptep, entry;
bool writeable;
unsigned long prot_bits;
unsigned long mmu_seq;
@ -664,7 +664,6 @@ retry:
entry = pfn_pte(pfn, __pgprot(prot_bits));
/* Write the PTE */
old_pte = *ptep;
set_pte(ptep, entry);
err = 0;

View File

@ -303,11 +303,6 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
return 0;
}
static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa)
{
return READ_ONCE(gisa->next_alert) != (u32)virt_to_phys(gisa);
}
static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
{
set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
@ -3216,11 +3211,12 @@ void kvm_s390_gisa_destroy(struct kvm *kvm)
if (!gi->origin)
return;
if (gi->alert.mask)
KVM_EVENT(3, "vm 0x%pK has unexpected iam 0x%02x",
kvm, gi->alert.mask);
while (gisa_in_alert_list(gi->origin))
cpu_relax();
WARN(gi->alert.mask != 0x00,
"unexpected non zero alert.mask 0x%02x",
gi->alert.mask);
gi->alert.mask = 0x00;
if (gisa_set_iam(gi->origin, gi->alert.mask))
process_gib_alert_list();
hrtimer_cancel(&gi->timer);
gi->origin = NULL;
VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa);

View File

@ -157,7 +157,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) {
static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
#endif
extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
unsigned int size, u64 xfeatures, u32 pkru);
extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
static inline void fpstate_set_confidential(struct fpu_guest *gfpu)

View File

@ -528,7 +528,6 @@ struct kvm_pmu {
u64 raw_event_mask;
struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
struct irq_work irq_work;
/*
* Overlay the bitmap with a 64-bit atomic so that all bits can be

View File

@ -268,6 +268,7 @@ enum avic_ipi_failure_cause {
AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
AVIC_IPI_FAILURE_INVALID_TARGET,
AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
AVIC_IPI_FAILURE_INVALID_IPI_VECTOR,
};
#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0)

View File

@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
unsigned int size, u32 pkru)
unsigned int size, u64 xfeatures, u32 pkru)
{
struct fpstate *kstate = gfpu->fpstate;
union fpregs_state *ustate = buf;
struct membuf mb = { .p = buf, .left = size };
if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
__copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
__copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,
XSTATE_COPY_XSAVE);
} else {
memcpy(&ustate->fxsave, &kstate->regs.fxsave,
sizeof(ustate->fxsave));

View File

@ -1049,6 +1049,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
* __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
* @to: membuf descriptor
* @fpstate: The fpstate buffer from which to copy
* @xfeatures: The mask of xfeatures to save (XSAVE mode only)
* @pkru_val: The PKRU value to store in the PKRU component
* @copy_mode: The requested copy mode
*
@ -1059,7 +1060,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
* It supports partial copy but @to.pos always starts from zero.
*/
void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
u32 pkru_val, enum xstate_copy_mode copy_mode)
u64 xfeatures, u32 pkru_val,
enum xstate_copy_mode copy_mode)
{
const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
struct xregs_state *xinit = &init_fpstate.regs.xsave;
@ -1083,7 +1085,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
break;
case XSTATE_COPY_XSAVE:
header.xfeatures &= fpstate->user_xfeatures;
header.xfeatures &= fpstate->user_xfeatures & xfeatures;
break;
}
@ -1185,6 +1187,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode copy_mode)
{
__copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
tsk->thread.fpu.fpstate->user_xfeatures,
tsk->thread.pkru, copy_mode);
}
@ -1536,10 +1539,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
if (!guest_fpu)
newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
newfps->xfd = curfps->xfd & ~xfeatures;
/* Do the final updates within the locked region */

View File

@ -43,7 +43,8 @@ enum xstate_copy_mode {
struct membuf;
extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
u32 pkru_val, enum xstate_copy_mode copy_mode);
u64 xfeatures, u32 pkru_val,
enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);

View File

@ -360,14 +360,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.guest_supported_xcr0 =
cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
/*
* FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
* XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
* supported by the host.
*/
vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 |
XFEATURE_MASK_FPSSE;
kvm_update_pv_runtime(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);

View File

@ -2759,13 +2759,17 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
{
u32 reg = kvm_lapic_get_reg(apic, lvt_type);
int vector, mode, trig_mode;
int r;
if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
vector = reg & APIC_VECTOR_MASK;
mode = reg & APIC_MODE_MASK;
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
NULL);
r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
if (r && lvt_type == APIC_LVTPC)
kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
return r;
}
return 0;
}

View File

@ -93,14 +93,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
#undef __KVM_X86_PMU_OP
}
static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
{
struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
kvm_pmu_deliver_pmi(vcpu);
}
static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@ -124,20 +116,7 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
}
if (!pmc->intr || skip_pmi)
return;
/*
* Inject PMI. If vcpu was in a guest mode during NMI PMI
* can be ejected on a guest mode re-entry. Otherwise we can't
* be sure that vcpu wasn't executing hlt instruction at the
* time of vmexit and is not going to re-enter guest mode until
* woken up. So we should wake it, but this is impossible from
* NMI context. Do it from irq work instead.
*/
if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
else
if (pmc->intr && !skip_pmi)
kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
}
@ -675,9 +654,6 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
void kvm_pmu_reset(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
irq_work_sync(&pmu->irq_work);
static_call(kvm_x86_pmu_reset)(vcpu);
}
@ -687,7 +663,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
memset(pmu, 0, sizeof(*pmu));
static_call(kvm_x86_pmu_init)(vcpu);
init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
pmu->event_count = 0;
pmu->need_cleanup = false;
kvm_pmu_refresh(vcpu);

View File

@ -74,6 +74,12 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
return counter & pmc_bitmask(pmc);
}
static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
{
pmc->counter += val - pmc_read_counter(pmc);
pmc->counter &= pmc_bitmask(pmc);
}
static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
{
if (pmc->perf_event) {

View File

@ -529,8 +529,11 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
break;
case AVIC_IPI_FAILURE_INVALID_IPI_VECTOR:
/* Invalid IPI with vector < 16 */
break;
default:
pr_err("Unknown IPI interception\n");
vcpu_unimpl(vcpu, "Unknown avic incomplete IPI interception\n");
}
return 1;

View File

@ -1253,6 +1253,9 @@ void svm_leave_nested(struct kvm_vcpu *vcpu)
nested_svm_uninit_mmu_context(vcpu);
vmcb_mark_all_dirty(svm->vmcb);
if (kvm_apicv_activated(vcpu->kvm))
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
}
kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);

View File

@ -160,7 +160,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* MSR_PERFCTRn */
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
if (pmc) {
pmc->counter += data - pmc_read_counter(pmc);
pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
return 0;
}

View File

@ -691,7 +691,7 @@ static int svm_hardware_enable(void)
*/
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) {
struct sev_es_save_area *hostsa;
u32 msr_hi;
u32 __maybe_unused msr_hi;
hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
@ -913,8 +913,7 @@ void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept)
if (intercept == svm->x2avic_msrs_intercepted)
return;
if (!x2avic_enabled ||
!apic_x2apic_mode(svm->vcpu.arch.apic))
if (!x2avic_enabled)
return;
for (i = 0; i < MAX_DIRECT_ACCESS_MSRS; i++) {

View File

@ -436,11 +436,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated &&
!(msr & MSR_PMC_FULL_WIDTH_BIT))
data = (s64)(s32)data;
pmc->counter += data - pmc_read_counter(pmc);
pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
pmc->counter += data - pmc_read_counter(pmc);
pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {

View File

@ -5382,26 +5382,37 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
return 0;
}
static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return;
fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
guest_xsave->region,
sizeof(guest_xsave->region),
vcpu->arch.pkru);
}
static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
u8 *state, unsigned int size)
{
/*
* Only copy state for features that are enabled for the guest. The
* state itself isn't problematic, but setting bits in the header for
* features that are supported in *this* host but not exposed to the
* guest can result in KVM_SET_XSAVE failing when live migrating to a
* compatible host without the features that are NOT exposed to the
* guest.
*
* FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
* XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
* supported by the host.
*/
u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 |
XFEATURE_MASK_FPSSE;
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return;
fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
state, size, vcpu->arch.pkru);
fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
supported_xcr0, vcpu->arch.pkru);
}
static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
sizeof(guest_xsave->region));
}
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
@ -12843,6 +12854,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
return true;
#endif
if (kvm_test_request(KVM_REQ_PMI, vcpu))
return true;
if (kvm_arch_interrupt_allowed(vcpu) &&
(kvm_cpu_has_interrupt(vcpu) ||
kvm_guest_apic_has_interrupt(vcpu)))

View File

@ -82,6 +82,8 @@ struct timer_map {
struct arch_timer_context *emul_ptimer;
};
void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map);
struct arch_timer_cpu {
struct arch_timer_context timers[NR_KVM_TIMERS];
@ -145,4 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt);
void kvm_timer_cpu_up(void);
void kvm_timer_cpu_down(void);
static inline bool has_cntpoff(void)
{
return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
}
#endif

View File

@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* tools/testing/selftests/kvm/include/kvm_util.h
*
* Copyright (C) 2018, Google LLC.
*/
#ifndef SELFTEST_KVM_UCALL_COMMON_H

View File

@ -68,6 +68,12 @@ struct xstate {
#define XFEATURE_MASK_OPMASK BIT_ULL(5)
#define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6)
#define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7)
#define XFEATURE_MASK_PT BIT_ULL(8)
#define XFEATURE_MASK_PKRU BIT_ULL(9)
#define XFEATURE_MASK_PASID BIT_ULL(10)
#define XFEATURE_MASK_CET_USER BIT_ULL(11)
#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12)
#define XFEATURE_MASK_LBR BIT_ULL(15)
#define XFEATURE_MASK_XTILE_CFG BIT_ULL(17)
#define XFEATURE_MASK_XTILE_DATA BIT_ULL(18)
@ -147,6 +153,7 @@ struct kvm_x86_cpu_feature {
#define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
#define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
#define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
#define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
#define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
#define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
@ -553,6 +560,13 @@ static inline void xsetbv(u32 index, u64 value)
__asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
}
static inline void wrpkru(u32 pkru)
{
/* Note, ECX and EDX are architecturally required to be '0'. */
asm volatile(".byte 0x0f,0x01,0xef\n\t"
: : "a" (pkru), "c"(0), "d"(0));
}
static inline struct desc_ptr get_gdt(void)
{
struct desc_ptr gdt;
@ -908,6 +922,15 @@ static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
!kvm_cpu_has(feature.anti_feature);
}
static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
{
if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
return 0;
return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
}
static inline size_t kvm_cpuid2_size(int nr_entries)
{
return sizeof(struct kvm_cpuid2) +

View File

@ -200,6 +200,13 @@ repeat:
++fmt;
}
/*
* Play nice with %llu, %llx, etc. KVM selftests only support
* 64-bit builds, so just treat %ll* the same as %l*.
*/
if (qualifier == 'l' && *fmt == 'l')
++fmt;
/* default base */
base = 10;

View File

@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* tools/testing/selftests/kvm/lib/x86_64/processor.c
*
* Copyright (C) 2021, Google LLC.
*/

View File

@ -1033,9 +1033,8 @@ static bool test_loop(const struct test_data *data,
struct test_result *rbestruntime)
{
uint64_t maxslots;
struct test_result result;
struct test_result result = {};
result.nloops = 0;
if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
&result.nloops,
&result.slot_runtime, &result.guest_runtime)) {
@ -1089,7 +1088,7 @@ int main(int argc, char *argv[])
.seconds = 5,
.runs = 1,
};
struct test_result rbestslottime;
struct test_result rbestslottime = {};
int tctr;
if (!check_memory_sizes())
@ -1098,11 +1097,10 @@ int main(int argc, char *argv[])
if (!parse_args(argc, argv, &targs))
return -1;
rbestslottime.slottimens = 0;
for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
const struct test_data *data = &tests[tctr];
unsigned int runctr;
struct test_result rbestruntime;
struct test_result rbestruntime = {};
if (tctr > targs.tfirst)
pr_info("\n");
@ -1110,7 +1108,6 @@ int main(int argc, char *argv[])
pr_info("Testing %s performance with %i runs, %d seconds each\n",
data->name, targs.runs, targs.seconds);
rbestruntime.runtimens = 0;
for (runctr = 0; runctr < targs.runs; runctr++)
if (!test_loop(data, &targs,
&rbestslottime, &rbestruntime))

View File

@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* KVM_GET/SET_* tests
*
* Copyright (C) 2022, Red Hat, Inc.
*
* Tests for Hyper-V extensions to SVM.

View File

@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* tools/testing/selftests/kvm/nx_huge_page_test.c
*
* Usage: to be run via nx_huge_page_test.sh, which does the necessary
* environment setup and teardown
*

View File

@ -4,7 +4,6 @@
# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
# Makes use of root privileges to set up huge pages and KVM module parameters.
#
# tools/testing/selftests/kvm/nx_huge_page_test.sh
# Copyright (C) 2022, Google LLC.
set -e

View File

@ -139,6 +139,83 @@ static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
static void __attribute__((__flatten__)) guest_code(void *arg)
{
GUEST_SYNC(1);
if (this_cpu_has(X86_FEATURE_XSAVE)) {
uint64_t supported_xcr0 = this_cpu_supported_xcr0();
uint8_t buffer[4096];
memset(buffer, 0xcc, sizeof(buffer));
set_cr4(get_cr4() | X86_CR4_OSXSAVE);
GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
xsetbv(0, xgetbv(0) | supported_xcr0);
/*
* Modify state for all supported xfeatures to take them out of
* their "init" state, i.e. to make them show up in XSTATE_BV.
*
* Note off-by-default features, e.g. AMX, are out of scope for
* this particular testcase as they have a different ABI.
*/
GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
asm volatile ("fincstp");
GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
if (supported_xcr0 & XFEATURE_MASK_YMM)
asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
if (supported_xcr0 & XFEATURE_MASK_AVX512) {
asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
}
if (this_cpu_has(X86_FEATURE_MPX)) {
uint64_t bounds[2] = { 10, 0xffffffffull };
uint64_t output[2] = { };
GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
/*
* Don't bother trying to get BNDCSR into the INUSE
* state. MSR_IA32_BNDCFGS doesn't count as it isn't
* managed via XSAVE/XRSTOR, and BNDCFGU can only be
* modified by XRSTOR. Stuffing XSTATE_BV in the host
* is simpler than doing XRSTOR here in the guest.
*
* However, temporarily enable MPX in BNDCFGS so that
* BNDMOV actually loads BND1. If MPX isn't *fully*
* enabled, all MPX instructions are treated as NOPs.
*
* Hand encode "bndmov (%rax),%bnd1" as support for MPX
* mnemonics/registers has been removed from gcc and
* clang (and was never fully supported by clang).
*/
wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
/*
* Hand encode "bndmov %bnd1, (%rax)" to sanity check
* that BND1 actually got loaded.
*/
asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
wrmsr(MSR_IA32_BNDCFGS, 0);
GUEST_ASSERT_EQ(bounds[0], output[0]);
GUEST_ASSERT_EQ(bounds[1], output[1]);
}
if (this_cpu_has(X86_FEATURE_PKU)) {
GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
set_cr4(get_cr4() | X86_CR4_PKE);
GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
wrpkru(-1u);
}
}
GUEST_SYNC(2);
if (arg) {
@ -153,10 +230,11 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
int main(int argc, char *argv[])
{
uint64_t *xstate_bv, saved_xstate_bv;
vm_vaddr_t nested_gva = 0;
struct kvm_cpuid2 empty_cpuid = {};
struct kvm_regs regs1, regs2;
struct kvm_vcpu *vcpu;
struct kvm_vcpu *vcpu, *vcpuN;
struct kvm_vm *vm;
struct kvm_x86_state *state;
struct ucall uc;
@ -209,6 +287,34 @@ int main(int argc, char *argv[])
/* Restore state in a new VM. */
vcpu = vm_recreate_with_one_vcpu(vm);
vcpu_load_state(vcpu, state);
/*
* Restore XSAVE state in a dummy vCPU, first without doing
* KVM_SET_CPUID2, and then with an empty guest CPUID. Except
* for off-by-default xfeatures, e.g. AMX, KVM is supposed to
* allow KVM_SET_XSAVE regardless of guest CPUID. Manually
* load only XSAVE state, MSRs in particular have a much more
* convoluted ABI.
*
* Load two versions of XSAVE state: one with the actual guest
* XSAVE state, and one with all supported features forced "on"
* in xstate_bv, e.g. to ensure that KVM allows loading all
* supported features, even if something goes awry in saving
* the original snapshot.
*/
xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
saved_xstate_bv = *xstate_bv;
vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
vcpu_xsave_set(vcpuN, state->xsave);
*xstate_bv = kvm_cpu_supported_xcr0();
vcpu_xsave_set(vcpuN, state->xsave);
vcpu_init_cpuid(vcpuN, &empty_cpuid);
vcpu_xsave_set(vcpuN, state->xsave);
*xstate_bv = saved_xstate_bv;
vcpu_xsave_set(vcpuN, state->xsave);
kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));

View File

@ -1,10 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* svm_vmcall_test
*
* Copyright © 2021 Amazon.com, Inc. or its affiliates.
*
* Xen shared_info / pvclock testing
*/
#include "test_util.h"

View File

@ -1,10 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* svm_vmcall_test
*
* Copyright © 2021 Amazon.com, Inc. or its affiliates.
*
* Xen shared_info / pvclock testing
*/
#include "test_util.h"