mirror of
https://github.com/torvalds/linux.git
synced 2024-12-02 00:51:44 +00:00
49d5759268
- Provide a virtual cache topology to the guest to avoid inconsistencies with migration on heterogenous systems. Non secure software has no practical need to traverse the caches by set/way in the first place. - Add support for taking stage-2 access faults in parallel. This was an accidental omission in the original parallel faults implementation, but should provide a marginal improvement to machines w/o FEAT_HAFDBS (such as hardware from the fruit company). - A preamble to adding support for nested virtualization to KVM, including vEL2 register state, rudimentary nested exception handling and masking unsupported features for nested guests. - Fixes to the PSCI relay that avoid an unexpected host SVE trap when resuming a CPU when running pKVM. - VGIC maintenance interrupt support for the AIC - Improvements to the arch timer emulation, primarily aimed at reducing the trap overhead of running nested. - Add CONFIG_USERFAULTFD to the KVM selftests config fragment in the interest of CI systems. - Avoid VM-wide stop-the-world operations when a vCPU accesses its own redistributor. - Serialize when toggling CPACR_EL1.SMEN to avoid unexpected exceptions in the host. - Aesthetic and comment/kerneldoc fixes - Drop the vestiges of the old Columbia mailing list and add [Oliver] as co-maintainer This also drags in arm64's 'for-next/sme2' branch, because both it and the PSCI relay changes touch the EL2 initialization code. RISC-V: - Fix wrong usage of PGDIR_SIZE instead of PUD_SIZE - Correctly place the guest in S-mode after redirecting a trap to the guest - Redirect illegal instruction traps to guest - SBI PMU support for guest s390: - Two patches sorting out confusion between virtual and physical addresses, which currently are the same on s390. - A new ioctl that performs cmpxchg on guest memory - A few fixes x86: - Change tdp_mmu to a read-only parameter - Separate TDP and shadow MMU page fault paths - Enable Hyper-V invariant TSC control - Fix a variety of APICv and AVIC bugs, some of them real-world, some of them affecting architecurally legal but unlikely to happen in practice - Mark APIC timer as expired if its in one-shot mode and the count underflows while the vCPU task was being migrated - Advertise support for Intel's new fast REP string features - Fix a double-shootdown issue in the emergency reboot code - Ensure GIF=1 and disable SVM during an emergency reboot, i.e. give SVM similar treatment to VMX - Update Xen's TSC info CPUID sub-leaves as appropriate - Add support for Hyper-V's extended hypercalls, where "support" at this point is just forwarding the hypercalls to userspace - Clean up the kvm->lock vs. kvm->srcu sequences when updating the PMU and MSR filters - One-off fixes and cleanups - Fix and cleanup the range-based TLB flushing code, used when KVM is running on Hyper-V - Add support for filtering PMU events using a mask. If userspace wants to restrict heavily what events the guest can use, it can now do so without needing an absurd number of filter entries - Clean up KVM's handling of "PMU MSRs to save", especially when vPMU support is disabled - Add PEBS support for Intel Sapphire Rapids - Fix a mostly benign overflow bug in SEV's send|receive_update_data() - Move several SVM-specific flags into vcpu_svm x86 Intel: - Handle NMI VM-Exits before leaving the noinstr region - A few trivial cleanups in the VM-Enter flows - Stop enabling VMFUNC for L1 purely to document that KVM doesn't support EPTP switching (or any other VM function) for L1 - Fix a crash when using eVMCS's enlighted MSR bitmaps Generic: - Clean up the hardware enable and initialization flow, which was scattered around multiple arch-specific hooks. Instead, just let the arch code call into generic code. Both x86 and ARM should benefit from not having to fight common KVM code's notion of how to do initialization. - Account allocations in generic kvm_arch_alloc_vm() - Fix a memory leak if coalesced MMIO unregistration fails selftests: - On x86, cache the CPU vendor (AMD vs. Intel) and use the info to emit the correct hypercall instruction instead of relying on KVM to patch in VMMCALL - Use TAP interface for kvm_binary_stats_test and tsc_msrs_test -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmP2YA0UHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroPg/Qf+J6nT+TkIa+8Ei+fN1oMTDp4YuIOx mXvJ9mRK9sQ+tAUVwvDz3qN/fK5mjsYbRHIDlVc5p2Q3bCrVGDDqXPFfCcLx1u+O 9U9xjkO4JxD2LS9pc70FYOyzVNeJ8VMGOBbC2b0lkdYZ4KnUc6e/WWFKJs96bK+H duo+RIVyaMthnvbTwSv1K3qQb61n6lSJXplywS8KWFK6NZAmBiEFDAWGRYQE9lLs VcVcG0iDJNL/BQJ5InKCcvXVGskcCm9erDszPo7w4Bypa4S9AMS42DHUaRZrBJwV /WqdH7ckIz7+OSV0W1j+bKTHAFVTCjXYOM7wQykgjawjICzMSnnG9Gpskw== =goe1 -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm updates from Paolo Bonzini: "ARM: - Provide a virtual cache topology to the guest to avoid inconsistencies with migration on heterogenous systems. Non secure software has no practical need to traverse the caches by set/way in the first place - Add support for taking stage-2 access faults in parallel. This was an accidental omission in the original parallel faults implementation, but should provide a marginal improvement to machines w/o FEAT_HAFDBS (such as hardware from the fruit company) - A preamble to adding support for nested virtualization to KVM, including vEL2 register state, rudimentary nested exception handling and masking unsupported features for nested guests - Fixes to the PSCI relay that avoid an unexpected host SVE trap when resuming a CPU when running pKVM - VGIC maintenance interrupt support for the AIC - Improvements to the arch timer emulation, primarily aimed at reducing the trap overhead of running nested - Add CONFIG_USERFAULTFD to the KVM selftests config fragment in the interest of CI systems - Avoid VM-wide stop-the-world operations when a vCPU accesses its own redistributor - Serialize when toggling CPACR_EL1.SMEN to avoid unexpected exceptions in the host - Aesthetic and comment/kerneldoc fixes - Drop the vestiges of the old Columbia mailing list and add [Oliver] as co-maintainer RISC-V: - Fix wrong usage of PGDIR_SIZE instead of PUD_SIZE - Correctly place the guest in S-mode after redirecting a trap to the guest - Redirect illegal instruction traps to guest - SBI PMU support for guest s390: - Sort out confusion between virtual and physical addresses, which currently are the same on s390 - A new ioctl that performs cmpxchg on guest memory - A few fixes x86: - Change tdp_mmu to a read-only parameter - Separate TDP and shadow MMU page fault paths - Enable Hyper-V invariant TSC control - Fix a variety of APICv and AVIC bugs, some of them real-world, some of them affecting architecurally legal but unlikely to happen in practice - Mark APIC timer as expired if its in one-shot mode and the count underflows while the vCPU task was being migrated - Advertise support for Intel's new fast REP string features - Fix a double-shootdown issue in the emergency reboot code - Ensure GIF=1 and disable SVM during an emergency reboot, i.e. give SVM similar treatment to VMX - Update Xen's TSC info CPUID sub-leaves as appropriate - Add support for Hyper-V's extended hypercalls, where "support" at this point is just forwarding the hypercalls to userspace - Clean up the kvm->lock vs. kvm->srcu sequences when updating the PMU and MSR filters - One-off fixes and cleanups - Fix and cleanup the range-based TLB flushing code, used when KVM is running on Hyper-V - Add support for filtering PMU events using a mask. If userspace wants to restrict heavily what events the guest can use, it can now do so without needing an absurd number of filter entries - Clean up KVM's handling of "PMU MSRs to save", especially when vPMU support is disabled - Add PEBS support for Intel Sapphire Rapids - Fix a mostly benign overflow bug in SEV's send|receive_update_data() - Move several SVM-specific flags into vcpu_svm x86 Intel: - Handle NMI VM-Exits before leaving the noinstr region - A few trivial cleanups in the VM-Enter flows - Stop enabling VMFUNC for L1 purely to document that KVM doesn't support EPTP switching (or any other VM function) for L1 - Fix a crash when using eVMCS's enlighted MSR bitmaps Generic: - Clean up the hardware enable and initialization flow, which was scattered around multiple arch-specific hooks. Instead, just let the arch code call into generic code. Both x86 and ARM should benefit from not having to fight common KVM code's notion of how to do initialization - Account allocations in generic kvm_arch_alloc_vm() - Fix a memory leak if coalesced MMIO unregistration fails selftests: - On x86, cache the CPU vendor (AMD vs. Intel) and use the info to emit the correct hypercall instruction instead of relying on KVM to patch in VMMCALL - Use TAP interface for kvm_binary_stats_test and tsc_msrs_test" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (325 commits) KVM: SVM: hyper-v: placate modpost section mismatch error KVM: x86/mmu: Make tdp_mmu_allowed static KVM: arm64: nv: Use reg_to_encoding() to get sysreg ID KVM: arm64: nv: Only toggle cache for virtual EL2 when SCTLR_EL2 changes KVM: arm64: nv: Filter out unsupported features from ID regs KVM: arm64: nv: Emulate EL12 register accesses from the virtual EL2 KVM: arm64: nv: Allow a sysreg to be hidden from userspace only KVM: arm64: nv: Emulate PSTATE.M for a guest hypervisor KVM: arm64: nv: Add accessors for SPSR_EL1, ELR_EL1 and VBAR_EL1 from virtual EL2 KVM: arm64: nv: Handle SMCs taken from virtual EL2 KVM: arm64: nv: Handle trapped ERET from virtual EL2 KVM: arm64: nv: Inject HVC exceptions to the virtual EL2 KVM: arm64: nv: Support virtual EL2 exceptions KVM: arm64: nv: Handle HCR_EL2.NV system register traps KVM: arm64: nv: Add nested virt VCPU primitives for vEL2 VCPU state KVM: arm64: nv: Add EL2 system registers to vcpu context KVM: arm64: nv: Allow userspace to set PSR_MODE_EL2x KVM: arm64: nv: Reset VCPU to EL2 registers if VCPU nested virt is set KVM: arm64: nv: Introduce nested virtualization VCPU feature KVM: arm64: Use the S2 MMU context to iterate over S2 table ...
217 lines
6.9 KiB
C
217 lines
6.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef ARCH_X86_KVM_REVERSE_CPUID_H
|
|
#define ARCH_X86_KVM_REVERSE_CPUID_H
|
|
|
|
#include <uapi/asm/kvm.h>
|
|
#include <asm/cpufeature.h>
|
|
#include <asm/cpufeatures.h>
|
|
|
|
/*
|
|
* Hardware-defined CPUID leafs that are either scattered by the kernel or are
|
|
* unknown to the kernel, but need to be directly used by KVM. Note, these
|
|
* word values conflict with the kernel's "bug" caps, but KVM doesn't use those.
|
|
*/
|
|
enum kvm_only_cpuid_leafs {
|
|
CPUID_12_EAX = NCAPINTS,
|
|
CPUID_7_1_EDX,
|
|
CPUID_8000_0007_EDX,
|
|
NR_KVM_CPU_CAPS,
|
|
|
|
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
|
|
};
|
|
|
|
/*
|
|
* Define a KVM-only feature flag.
|
|
*
|
|
* For features that are scattered by cpufeatures.h, __feature_translate() also
|
|
* needs to be updated to translate the kernel-defined feature into the
|
|
* KVM-defined feature.
|
|
*
|
|
* For features that are 100% KVM-only, i.e. not defined by cpufeatures.h,
|
|
* forego the intermediate KVM_X86_FEATURE and directly define X86_FEATURE_* so
|
|
* that X86_FEATURE_* can be used in KVM. No __feature_translate() handling is
|
|
* needed in this case.
|
|
*/
|
|
#define KVM_X86_FEATURE(w, f) ((w)*32 + (f))
|
|
|
|
/* Intel-defined SGX sub-features, CPUID level 0x12 (EAX). */
|
|
#define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0)
|
|
#define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1)
|
|
#define KVM_X86_FEATURE_SGX_EDECCSSA KVM_X86_FEATURE(CPUID_12_EAX, 11)
|
|
|
|
/* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */
|
|
#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
|
|
#define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
|
|
#define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
|
|
|
|
/* CPUID level 0x80000007 (EDX). */
|
|
#define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8)
|
|
|
|
struct cpuid_reg {
|
|
u32 function;
|
|
u32 index;
|
|
int reg;
|
|
};
|
|
|
|
static const struct cpuid_reg reverse_cpuid[] = {
|
|
[CPUID_1_EDX] = { 1, 0, CPUID_EDX},
|
|
[CPUID_8000_0001_EDX] = {0x80000001, 0, CPUID_EDX},
|
|
[CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX},
|
|
[CPUID_1_ECX] = { 1, 0, CPUID_ECX},
|
|
[CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX},
|
|
[CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX},
|
|
[CPUID_7_0_EBX] = { 7, 0, CPUID_EBX},
|
|
[CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX},
|
|
[CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX},
|
|
[CPUID_6_EAX] = { 6, 0, CPUID_EAX},
|
|
[CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},
|
|
[CPUID_7_ECX] = { 7, 0, CPUID_ECX},
|
|
[CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
|
|
[CPUID_7_EDX] = { 7, 0, CPUID_EDX},
|
|
[CPUID_7_1_EAX] = { 7, 1, CPUID_EAX},
|
|
[CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},
|
|
[CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
|
|
[CPUID_7_1_EDX] = { 7, 1, CPUID_EDX},
|
|
[CPUID_8000_0007_EDX] = {0x80000007, 0, CPUID_EDX},
|
|
[CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX},
|
|
};
|
|
|
|
/*
|
|
* Reverse CPUID and its derivatives can only be used for hardware-defined
|
|
* feature words, i.e. words whose bits directly correspond to a CPUID leaf.
|
|
* Retrieving a feature bit or masking guest CPUID from a Linux-defined word
|
|
* is nonsensical as the bit number/mask is an arbitrary software-defined value
|
|
* and can't be used by KVM to query/control guest capabilities. And obviously
|
|
* the leaf being queried must have an entry in the lookup table.
|
|
*/
|
|
static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
|
|
{
|
|
BUILD_BUG_ON(x86_leaf == CPUID_LNX_1);
|
|
BUILD_BUG_ON(x86_leaf == CPUID_LNX_2);
|
|
BUILD_BUG_ON(x86_leaf == CPUID_LNX_3);
|
|
BUILD_BUG_ON(x86_leaf == CPUID_LNX_4);
|
|
BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
|
|
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
|
|
}
|
|
|
|
/*
|
|
* Translate feature bits that are scattered in the kernel's cpufeatures word
|
|
* into KVM feature words that align with hardware's definitions.
|
|
*/
|
|
static __always_inline u32 __feature_translate(int x86_feature)
|
|
{
|
|
if (x86_feature == X86_FEATURE_SGX1)
|
|
return KVM_X86_FEATURE_SGX1;
|
|
else if (x86_feature == X86_FEATURE_SGX2)
|
|
return KVM_X86_FEATURE_SGX2;
|
|
else if (x86_feature == X86_FEATURE_SGX_EDECCSSA)
|
|
return KVM_X86_FEATURE_SGX_EDECCSSA;
|
|
else if (x86_feature == X86_FEATURE_CONSTANT_TSC)
|
|
return KVM_X86_FEATURE_CONSTANT_TSC;
|
|
|
|
return x86_feature;
|
|
}
|
|
|
|
static __always_inline u32 __feature_leaf(int x86_feature)
|
|
{
|
|
return __feature_translate(x86_feature) / 32;
|
|
}
|
|
|
|
/*
|
|
* Retrieve the bit mask from an X86_FEATURE_* definition. Features contain
|
|
* the hardware defined bit number (stored in bits 4:0) and a software defined
|
|
* "word" (stored in bits 31:5). The word is used to index into arrays of
|
|
* bit masks that hold the per-cpu feature capabilities, e.g. this_cpu_has().
|
|
*/
|
|
static __always_inline u32 __feature_bit(int x86_feature)
|
|
{
|
|
x86_feature = __feature_translate(x86_feature);
|
|
|
|
reverse_cpuid_check(x86_feature / 32);
|
|
return 1 << (x86_feature & 31);
|
|
}
|
|
|
|
#define feature_bit(name) __feature_bit(X86_FEATURE_##name)
|
|
|
|
static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned int x86_feature)
|
|
{
|
|
unsigned int x86_leaf = __feature_leaf(x86_feature);
|
|
|
|
reverse_cpuid_check(x86_leaf);
|
|
return reverse_cpuid[x86_leaf];
|
|
}
|
|
|
|
static __always_inline u32 *__cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry,
|
|
u32 reg)
|
|
{
|
|
switch (reg) {
|
|
case CPUID_EAX:
|
|
return &entry->eax;
|
|
case CPUID_EBX:
|
|
return &entry->ebx;
|
|
case CPUID_ECX:
|
|
return &entry->ecx;
|
|
case CPUID_EDX:
|
|
return &entry->edx;
|
|
default:
|
|
BUILD_BUG();
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
static __always_inline u32 *cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry,
|
|
unsigned int x86_feature)
|
|
{
|
|
const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature);
|
|
|
|
return __cpuid_entry_get_reg(entry, cpuid.reg);
|
|
}
|
|
|
|
static __always_inline u32 cpuid_entry_get(struct kvm_cpuid_entry2 *entry,
|
|
unsigned int x86_feature)
|
|
{
|
|
u32 *reg = cpuid_entry_get_reg(entry, x86_feature);
|
|
|
|
return *reg & __feature_bit(x86_feature);
|
|
}
|
|
|
|
static __always_inline bool cpuid_entry_has(struct kvm_cpuid_entry2 *entry,
|
|
unsigned int x86_feature)
|
|
{
|
|
return cpuid_entry_get(entry, x86_feature);
|
|
}
|
|
|
|
static __always_inline void cpuid_entry_clear(struct kvm_cpuid_entry2 *entry,
|
|
unsigned int x86_feature)
|
|
{
|
|
u32 *reg = cpuid_entry_get_reg(entry, x86_feature);
|
|
|
|
*reg &= ~__feature_bit(x86_feature);
|
|
}
|
|
|
|
static __always_inline void cpuid_entry_set(struct kvm_cpuid_entry2 *entry,
|
|
unsigned int x86_feature)
|
|
{
|
|
u32 *reg = cpuid_entry_get_reg(entry, x86_feature);
|
|
|
|
*reg |= __feature_bit(x86_feature);
|
|
}
|
|
|
|
static __always_inline void cpuid_entry_change(struct kvm_cpuid_entry2 *entry,
|
|
unsigned int x86_feature,
|
|
bool set)
|
|
{
|
|
u32 *reg = cpuid_entry_get_reg(entry, x86_feature);
|
|
|
|
/*
|
|
* Open coded instead of using cpuid_entry_{clear,set}() to coerce the
|
|
* compiler into using CMOV instead of Jcc when possible.
|
|
*/
|
|
if (set)
|
|
*reg |= __feature_bit(x86_feature);
|
|
else
|
|
*reg &= ~__feature_bit(x86_feature);
|
|
}
|
|
|
|
#endif /* ARCH_X86_KVM_REVERSE_CPUID_H */
|